diff --git a/README.md b/README.md index 217a6e7..68f68ca 100644 --- a/README.md +++ b/README.md @@ -70,10 +70,28 @@ ## 三、Spark +**Spark Core :** + 1. Spark简介 2. [Spark单机版本环境搭建](https://github.com/heibaiying/BigData-Notes/blob/master/notes/installation/Spark%E5%8D%95%E6%9C%BA%E7%89%88%E6%9C%AC%E7%8E%AF%E5%A2%83%E6%90%AD%E5%BB%BA.md) -3. RDD详解 -4. Spark Transformation 和 Action +3. 弹性式数据集RDD +4. Spark命令行的基本使用 +5. RDD常用算子之——Transformation Action +6. RDD常用算子之——Action +7. Spark广播变量与累加器 + +**Spark SQL :** + +1. DataFrame和DateSet +2. Spark SQL之常用SQL语句 +3. External Data Source + +**Spark Streaming :** + +1. Spark Streaming简介 +2. DStream常用函数 +3. Spark Streaming 整合 flume +4. Spark Streaming 整合 kafka ## 四、Flink diff --git a/code/spark/spark-base/output/wcResult/._SUCCESS.crc b/code/spark/spark-base/output/wcResult/._SUCCESS.crc deleted file mode 100644 index 3b7b044..0000000 Binary files a/code/spark/spark-base/output/wcResult/._SUCCESS.crc and /dev/null differ diff --git a/code/spark/spark-base/output/wcResult/.part-00000.crc b/code/spark/spark-base/output/wcResult/.part-00000.crc deleted file mode 100644 index c9619cf..0000000 Binary files a/code/spark/spark-base/output/wcResult/.part-00000.crc and /dev/null differ diff --git a/code/spark/spark-base/output/wcResult/.part-00001.crc b/code/spark/spark-base/output/wcResult/.part-00001.crc deleted file mode 100644 index 5c99fc2..0000000 Binary files a/code/spark/spark-base/output/wcResult/.part-00001.crc and /dev/null differ diff --git a/code/spark/spark-base/output/wcResult/_SUCCESS b/code/spark/spark-base/output/wcResult/_SUCCESS deleted file mode 100644 index e69de29..0000000 diff --git a/code/spark/spark-base/output/wcResult/part-00000 b/code/spark/spark-base/output/wcResult/part-00000 deleted file mode 100644 index 5c32392..0000000 --- a/code/spark/spark-base/output/wcResult/part-00000 +++ /dev/null @@ -1 +0,0 @@ -(mapreduce,1) diff --git a/code/spark/spark-base/output/wcResult/part-00001 b/code/spark/spark-base/output/wcResult/part-00001 deleted file mode 100644 index 575646b..0000000 --- a/code/spark/spark-base/output/wcResult/part-00001 +++ /dev/null @@ -1,2 +0,0 @@ -(spark,2) -(hadoop,2) diff --git a/code/spark/spark-base/src/main/scala/com/heibaiying/spark/rdd/WordCount.scala b/code/spark/spark-base/src/main/scala/com/heibaiying/spark/rdd/WordCount.scala index bd418b7..8c7c3f8 100644 --- a/code/spark/spark-base/src/main/scala/com/heibaiying/spark/rdd/WordCount.scala +++ b/code/spark/spark-base/src/main/scala/com/heibaiying/spark/rdd/WordCount.scala @@ -3,14 +3,12 @@ package com.heibaiying.spark.rdd import org.apache.spark.{SparkConf, SparkContext} -object WordCount { +object WordCount extends App { - def main(args: Array[String]): Unit = { - val conf = new SparkConf().setAppName("sparkBase").setMaster("local[2]") - val sc = new SparkContext(conf) - val rdd = sc.textFile("input/wc.txt").flatMap(_.split(",")).map((_, 1)).reduceByKey(_ + _) - rdd.foreach(println) - rdd.saveAsTextFile("output/") - } + val conf = new SparkConf().setAppName("sparkBase").setMaster("local[2]") + val sc = new SparkContext(conf) + val rdd = sc.textFile("input/wc.txt").flatMap(_.split(",")).map((_, 1)).reduceByKey(_ + _) + rdd.foreach(println) + rdd.saveAsTextFile("output/") } \ No newline at end of file diff --git a/notes/Spark-RDD.md b/notes/Spark-RDD.md new file mode 100644 index 0000000..90d91eb --- /dev/null +++ b/notes/Spark-RDD.md @@ -0,0 +1,2 @@ +弹性式数据集RDD +