diff --git a/README.md b/README.md
index 440fff2..04d37c6 100644
--- a/README.md
+++ b/README.md
@@ -185,4 +185,4 @@ TODO
## :bookmark_tabs: 后 记
-[资料分享与工具推荐](https://github.com/heibaiying/BigData-Notes/blob/master/notes/资料分享与工具推荐.md)
+[资料分享与开发工具推荐](https://github.com/heibaiying/BigData-Notes/blob/master/notes/资料分享与工具推荐.md)
\ No newline at end of file
diff --git a/code/spark/spark-core/file/emp.json b/code/spark/spark-core/file/emp.json
deleted file mode 100644
index 03af1f5..0000000
--- a/code/spark/spark-core/file/emp.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{"EMPNO": 7369,"ENAME": "SMITH","JOB": "CLERK","MGR": 7902,"HIREDATE": "1980-12-17 00:00:00","SAL": 800.00,"COMM": null,"DEPTNO": 20}
-{"EMPNO": 7499,"ENAME": "ALLEN","JOB": "SALESMAN","MGR": 7698,"HIREDATE": "1981-02-20 00:00:00","SAL": 1600.00,"COMM": 300.00,"DEPTNO": 30}
-{"EMPNO": 7521,"ENAME": "WARD","JOB": "SALESMAN","MGR": 7698,"HIREDATE": "1981-02-22 00:00:00","SAL": 1250.00,"COMM": 500.00,"DEPTNO": 30}
-{"EMPNO": 7566,"ENAME": "JONES","JOB": "MANAGER","MGR": 7839,"HIREDATE": "1981-04-02 00:00:00","SAL": 2975.00,"COMM": null,"DEPTNO": 20}
-{"EMPNO": 7654,"ENAME": "MARTIN","JOB": "SALESMAN","MGR": 7698,"HIREDATE": "1981-09-28 00:00:00","SAL": 1250.00,"COMM": 1400.00,"DEPTNO": 30}
-{"EMPNO": 7698,"ENAME": "BLAKE","JOB": "MANAGER","MGR": 7839,"HIREDATE": "1981-05-01 00:00:00","SAL": 2850.00,"COMM": null,"DEPTNO": 30}
-{"EMPNO": 7782,"ENAME": "CLARK","JOB": "MANAGER","MGR": 7839,"HIREDATE": "1981-06-09 00:00:00","SAL": 2450.00,"COMM": null,"DEPTNO": 10}
-{"EMPNO": 7788,"ENAME": "SCOTT","JOB": "ANALYST","MGR": 7566,"HIREDATE": "1987-04-19 00:00:00","SAL": 1500.00,"COMM": null,"DEPTNO": 20}
-{"EMPNO": 7839,"ENAME": "KING","JOB": "PRESIDENT","MGR": null,"HIREDATE": "1981-11-17 00:00:00","SAL": 5000.00,"COMM": null,"DEPTNO": 10}
-{"EMPNO": 7844,"ENAME": "TURNER","JOB": "SALESMAN","MGR": 7698,"HIREDATE": "1981-09-08 00:00:00","SAL": 1500.00,"COMM": 0.00,"DEPTNO": 30}
-{"EMPNO": 7876,"ENAME": "ADAMS","JOB": "CLERK","MGR": 7788,"HIREDATE": "1987-05-23 00:00:00","SAL": 1100.00,"COMM": null,"DEPTNO": 20}
-{"EMPNO": 7900,"ENAME": "JAMES","JOB": "CLERK","MGR": 7698,"HIREDATE": "1981-12-03 00:00:00","SAL": 950.00,"COMM": null,"DEPTNO": 30}
-{"EMPNO": 7902,"ENAME": "FORD","JOB": "ANALYST","MGR": 7566,"HIREDATE": "1981-12-03 00:00:00","SAL": 3000.00,"COMM": null,"DEPTNO": 20}
-{"EMPNO": 7934,"ENAME": "MILLER","JOB": "CLERK","MGR": 7782,"HIREDATE": "1982-01-23 00:00:00","SAL": 1300.00,"COMM": null,"DEPTNO": 10}
\ No newline at end of file
diff --git a/code/spark/spark-core/pom.xml b/code/spark/spark-core/pom.xml
deleted file mode 100644
index e8128f4..0000000
--- a/code/spark/spark-core/pom.xml
+++ /dev/null
@@ -1,59 +0,0 @@
-
-
- 4.0.0
-
- com.heibaiying
- spark-core
- 1.0
-
-
- 2.12
- 2.4.0
-
-
-
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
- 8
- 8
-
-
-
-
-
-
-
-
-
- org.apache.spark
- spark-core_${scala.version}
- ${spark.version}
-
-
- org.apache.spark
- spark-sql_${scala.version}
- ${spark.version}
-
-
-
-
- junit
- junit
- 4.12
-
-
-
- com.thoughtworks.paranamer
- paranamer
- 2.8
-
-
-
-
-
\ No newline at end of file
diff --git a/code/spark/spark-core/src/main/java/rdd/java/TransformationTest.java b/code/spark/spark-core/src/main/java/rdd/java/TransformationTest.java
deleted file mode 100644
index 19a21bf..0000000
--- a/code/spark/spark-core/src/main/java/rdd/java/TransformationTest.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package rdd.java;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.Arrays;
-import java.util.List;
-
-
-public class TransformationTest {
-
-
- private static JavaSparkContext sc = null;
-
-
- @Before
- public void prepare() {
- SparkConf conf = new SparkConf().setAppName("TransformationTest").setMaster("local[2]");
- sc = new JavaSparkContext(conf);
- }
-
- @Test
- public void map() {
- List list = Arrays.asList(3, 6, 9, 10, 12, 21);
- /*
- * 不要使用方法引用的形式 : System.out::println , 否则会抛出下面的异常:
- * org.apache.spark.SparkException: Task not serializable
- * Caused by: java.io.NotSerializableException: java.io.PrintStream
- * 这是由于Spark程序中map、foreach等算子内部引用了类成员函数或变量时,需要该类所有成员都支持序列化,
- * 如果该类某些成员变量不支持序列化,就会抛出上面的异常
- */
- sc.parallelize(list).map(x -> x * 10).foreach(x -> System.out.println(x));
- }
-
-
- @After
- public void destroy() {
- sc.close();
- }
-
-}
diff --git a/code/spark/spark-core/src/main/java/rdd/scala/SparkSqlApp.scala b/code/spark/spark-core/src/main/java/rdd/scala/SparkSqlApp.scala
deleted file mode 100644
index 5bdfa93..0000000
--- a/code/spark/spark-core/src/main/java/rdd/scala/SparkSqlApp.scala
+++ /dev/null
@@ -1,27 +0,0 @@
-package rdd.scala
-
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.functions._
-
-
-object SparkSqlApp {
-
- // 测试方法
- def main(args: Array[String]): Unit = {
-
- val spark = SparkSession.builder().appName("aggregations").master("local[2]").getOrCreate()
-
- val df = spark.read.json("/usr/file/json/emp.json")
-
- import spark.implicits._
-
- df.select($"ename").limit(5).show()
- df.sort("sal").limit(3).show()
-
- df.orderBy(desc("sal")).limit(3).show()
-
- df.select("deptno").distinct().show()
-
- df.orderBy(desc("deptno"), asc("sal")).show(2)
- }
-}
diff --git a/code/spark/spark-core/src/main/java/rdd/scala/TransformationTest.scala b/code/spark/spark-core/src/main/java/rdd/scala/TransformationTest.scala
deleted file mode 100644
index 1b3f03e..0000000
--- a/code/spark/spark-core/src/main/java/rdd/scala/TransformationTest.scala
+++ /dev/null
@@ -1,201 +0,0 @@
-package rdd.scala
-
-import org.apache.spark.{SparkConf, SparkContext}
-import org.junit.{After, Test}
-
-import scala.collection.mutable.ListBuffer
-
-class TransformationTest {
-
- val conf: SparkConf = new SparkConf().setAppName("TransformationTest").setMaster("local[2]")
- val sc = new SparkContext(conf)
-
-
- @Test
- def map(): Unit = {
- val list = List(1, 2, 3)
- sc.parallelize(list).map(_ * 10).foreach(println)
- }
-
-
- @Test
- def filter(): Unit = {
- val list = List(3, 6, 9, 10, 12, 21)
- sc.parallelize(list).filter(_ >= 10).foreach(println)
- }
-
-
- @Test
- def flatMap(): Unit = {
- val list = List(List(1, 2), List(3), List(), List(4, 5))
- sc.parallelize(list).flatMap(_.toList).map(_ * 10).foreach(println)
-
- val lines = List("spark flume spark",
- "hadoop flume hive")
- sc.parallelize(lines).flatMap(line => line.split(" ")).
- map(word => (word, 1)).reduceByKey(_ + _).foreach(println)
-
- }
-
-
- @Test
- def mapPartitions(): Unit = {
- val list = List(1, 2, 3, 4, 5, 6)
- sc.parallelize(list, 3).mapPartitions(iterator => {
- val buffer = new ListBuffer[Int]
- while (iterator.hasNext) {
- buffer.append(iterator.next() * 100)
- }
- buffer.toIterator
- }).foreach(println)
- }
-
-
- @Test
- def mapPartitionsWithIndex(): Unit = {
- val list = List(1, 2, 3, 4, 5, 6)
- sc.parallelize(list, 3).mapPartitionsWithIndex((index, iterator) => {
- val buffer = new ListBuffer[String]
- while (iterator.hasNext) {
- buffer.append(index + "分区:" + iterator.next() * 100)
- }
- buffer.toIterator
- }).foreach(println)
- }
-
-
- @Test
- def sample(): Unit = {
- val list = List(1, 2, 3, 4, 5, 6)
- sc.parallelize(list).sample(withReplacement = false, fraction = 0.5).foreach(println)
- }
-
-
- @Test
- def union(): Unit = {
- val list1 = List(1, 2, 3)
- val list2 = List(4, 5, 6)
- sc.parallelize(list1).union(sc.parallelize(list2)).foreach(println)
- }
-
-
- @Test
- def intersection(): Unit = {
- val list1 = List(1, 2, 3, 4, 5)
- val list2 = List(4, 5, 6)
- sc.parallelize(list1).intersection(sc.parallelize(list2)).foreach(println)
- }
-
- @Test
- def distinct(): Unit = {
- val list = List(1, 2, 2, 4, 4)
- sc.parallelize(list).distinct().foreach(println)
- }
-
-
- @Test
- def groupByKey(): Unit = {
- val list = List(("hadoop", 2), ("spark", 3), ("spark", 5), ("storm", 6), ("hadoop", 2))
- sc.parallelize(list).groupByKey().map(x => (x._1, x._2.toList)).foreach(println)
- }
-
-
- @Test
- def reduceByKey(): Unit = {
- val list = List(("hadoop", 2), ("spark", 3), ("spark", 5), ("storm", 6), ("hadoop", 2))
- sc.parallelize(list).reduceByKey(_ + _).foreach(println)
- }
-
- @Test
- def aggregateByKey(): Unit = {
- val list = List(("hadoop", 3), ("hadoop", 2), ("spark", 4), ("spark", 3), ("storm", 6), ("storm", 8))
- sc.parallelize(list, numSlices = 6).aggregateByKey(zeroValue = 0, numPartitions = 5)(
- seqOp = math.max(_, _),
- combOp = _ + _
- ).getNumPartitions
- }
-
-
- @Test
- def sortBy(): Unit = {
- val list01 = List((100, "hadoop"), (90, "spark"), (120, "storm"))
- sc.parallelize(list01).sortByKey(ascending = false).foreach(println)
-
- val list02 = List(("hadoop", 100), ("spark", 90), ("storm", 120))
- sc.parallelize(list02).sortBy(x => x._2, ascending = false).foreach(println)
- }
-
-
- @Test
- def join(): Unit = {
- val list01 = List((1, "student01"), (2, "student02"), (3, "student03"))
- val list02 = List((1, "teacher01"), (2, "teacher02"), (3, "teacher03"))
- sc.parallelize(list01).join(sc.parallelize(list02)).foreach(println)
- }
-
-
- @Test
- def cogroup(): Unit = {
- val list01 = List((1, "a"), (1, "a"), (2, "b"), (3, "e"))
- val list02 = List((1, "A"), (2, "B"), (3, "E"))
- val list03 = List((1, "[ab]"), (2, "[bB]"), (3, "eE"), (3, "eE"))
- sc.parallelize(list01).cogroup(sc.parallelize(list02), sc.parallelize(list03)).foreach(println)
- }
-
-
- @Test
- def cartesian(): Unit = {
- val list1 = List("A", "B", "C")
- val list2 = List(1, 2, 3)
- sc.parallelize(list1).cartesian(sc.parallelize(list2)).foreach(println)
- }
-
-
- @Test
- def reduce(): Unit = {
- val list = List(1, 2, 3, 4, 5)
- sc.parallelize(list).reduce((x, y) => x + y)
- sc.parallelize(list).reduce(_ + _)
- }
-
- // 继承Ordering[T],实现自定义比较器
- class CustomOrdering extends Ordering[(Int, String)] {
- override def compare(x: (Int, String), y: (Int, String)): Int
- = if (x._2.length > y._2.length) 1 else -1
- }
-
- @Test
- def takeOrdered(): Unit = {
- val list = List((1, "hadoop"), (1, "storm"), (1, "azkaban"), (1, "hive"))
- // 定义隐式默认值
- implicit val implicitOrdering = new CustomOrdering
- sc.parallelize(list).takeOrdered(5)
- }
-
-
- @Test
- def countByKey(): Unit = {
- val list = List(("hadoop", 10), ("hadoop", 10), ("storm", 3), ("storm", 3), ("azkaban", 1))
- sc.parallelize(list).countByKey()
- }
-
- @Test
- def saveAsTextFile(): Unit = {
- val list = List(("hadoop", 10), ("hadoop", 10), ("storm", 3), ("storm", 3), ("azkaban", 1))
- sc.parallelize(list).saveAsTextFile("/usr/file/temp")
- }
-
- @Test
- def saveAsSequenceFile(): Unit = {
- val list = List(("hadoop", 10), ("hadoop", 10), ("storm", 3), ("storm", 3), ("azkaban", 1))
- sc.parallelize(list).saveAsSequenceFile("/usr/file/sequence")
- }
-
-
- @After
- def destroy(): Unit = {
- sc.stop()
- }
-
-
-}
\ No newline at end of file
diff --git a/code/spark/spark-core/src/main/java/rdd/scala/WordCount.scala b/code/spark/spark-core/src/main/java/rdd/scala/WordCount.scala
deleted file mode 100644
index a09daf6..0000000
--- a/code/spark/spark-core/src/main/java/rdd/scala/WordCount.scala
+++ /dev/null
@@ -1,14 +0,0 @@
-package rdd.scala
-
-import org.apache.spark.{SparkConf, SparkContext}
-
-
-object WordCount extends App {
-
- val conf = new SparkConf().setAppName("sparkBase").setMaster("local[2]")
- val sc = new SparkContext(conf)
- val rdd = sc.textFile("input/wc.txt").flatMap(_.split(",")).map((_, 1)).reduceByKey(_ + _)
- rdd.foreach(println)
- rdd.saveAsTextFile("output/")
-
-}
\ No newline at end of file
diff --git a/resources/Reformatting.java b/resources/Reformatting.java
deleted file mode 100644
index 9a70635..0000000
--- a/resources/Reformatting.java
+++ /dev/null
@@ -1,167 +0,0 @@
-import javafx.util.Pair;
-
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import static java.util.regex.Pattern.*;
-
-/**
- * @author : heibaiying
- * @description : 生成导航和图片格式转换
- */
-public class Reformatting {
-
- /**
- * GITHUB 用户名
- **/
- private static final String GITHUB_USERNAME = "heibaiying";
- /**
- * 项目地址
- **/
- private static final String PROJECT_NAME = "BigData-Notes";
-
- public static void main(String[] args) {
-
- if (args.length < 1) {
- System.out.println("请输入文件路径");
- return;
- }
-
- String dir = "D:\\BigData-Notes\\notes\\Hbase协处理器.md";
-
- String preUrl = "https://github.com/" + GITHUB_USERNAME + "/" + PROJECT_NAME + "/blob/master/pictures/";
- String regex = "(!\\[(\\S*)]\\(\\S:\\\\" + PROJECT_NAME + "\\\\pictures\\\\(\\S*)\\)[^()]*?)";
-
- List filesList = getAllFile(dir, new ArrayList<>());
- for (String filePath : filesList) {
- // 获取文件内容
- String content = getContent(filePath);
- // 修改图片
- String newContent = changeImageUrl(content, preUrl, regex);
- // 获取全部标题
- List> allTitle = getAllTitle(newContent);
- // 生成导航
- String nav = genNav(allTitle);
- // 写出并覆盖原文件
- write(filePath, newContent, nav);
- }
- System.out.println("格式转换成功!");
- }
-
-
- private static String changeImageUrl(String content, String preUrl, String oldImageUrlRegex) {
-
- //github 支持的居中方式
- return content.replaceAll(oldImageUrlRegex,
- String.format("
", preUrl));
-
- }
-
- private static List getAllFile(String dir, List filesList) {
- File file = new File(dir);
- //如果是文件 则不遍历
- if (file.isFile() && file.getName().endsWith(".md")) {
- filesList.add(file.getAbsolutePath());
- }
- //如果是文件夹 则遍历下面的所有文件
- File[] files = file.listFiles();
- if (files != null) {
- for (File f : files) {
- if (f.isDirectory() && !f.getName().startsWith(".")) {
- getAllFile(f.getAbsolutePath(), filesList);
- } else if (f.getName().endsWith(".md")) {
- filesList.add(f.getAbsolutePath());
- }
- }
- }
- return filesList;
- }
-
-
- private static void write(String filePath, String content, String nav) {
- try {
- String newContent = "";
- if (content.contains("")) {
- // 如果原来有目录则替换
- newContent = content.replaceAll("(?m)()", nav);
- } else {
- StringBuilder stringBuilder = new StringBuilder(content);
- // 如果原来没有目录,则title和正文一个标题间写入
- int index = content.indexOf("## ");
- stringBuilder.insert(index - 1, nav);
- newContent = stringBuilder.toString();
- }
- // 写出覆盖文件
- FileWriter fileWriter = new FileWriter(new File(filePath));
- fileWriter.write(newContent);
- fileWriter.flush();
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- }
-
- private static String genNav(List> flagAndTitles) {
- StringBuilder builder = new StringBuilder();
- // 目录头
- builder.append("\n");
- return builder.toString();
- }
-
- private static String genBlank(int i, int scale) {
- StringBuilder builder = new StringBuilder();
- for (int j = 0; j < i; j++) {
- for (int k = 0; k < scale; k++) {
- builder.append(" ");
- }
- }
- return builder.toString();
- }
-
- private static List> getAllTitle(String content) {
- List> list = new ArrayList<>();
- Pattern pattern = compile("(?m)^(#{2,10})\\s?(.*)");
- Matcher matcher = pattern.matcher(content);
- while (matcher.find()) {
- String group2 = matcher.group(2);
- if (!group2.contains("参考资料")) {
- list.add(new Pair<>(matcher.group(1), group2));
- }
- }
- return list;
- }
-
- private static String getContent(String filePath) {
- StringBuilder builder = new StringBuilder();
-
- try {
- FileReader reader = new FileReader(filePath);
- char[] chars = new char[1024 * 1024];
-
- int read;
- while ((read = reader.read(chars)) != -1) {
- builder.append(new String(chars, 0, read));
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- return builder.toString();
- }
-
-}