Merge branch 'master' of github.com:heibaiying/BigData-Notes
This commit is contained in:
commit
2bf214d18b
27
code/spark/spark-core/README.md
Normal file
27
code/spark/spark-core/README.md
Normal file
@ -0,0 +1,27 @@
|
||||
val list = List(3,6,9,10,12,21)
|
||||
val listRDD = sc.parallelize(list)
|
||||
val intsRDD = listRDD.map(_*10)
|
||||
intsRDD.foreach(println)
|
||||
|
||||
sc.parallelize(list).map(_*10).foreach(println)
|
||||
|
||||
|
||||
sc.parallelize(list).filter(_>=10).foreach(println)
|
||||
|
||||
val list = List(List(1, 2), List(3), List(), List(4, 5))
|
||||
sc.parallelize(list).flatMap(_.toList).map(_*10).foreach(println)
|
||||
|
||||
|
||||
val list = List(1,2,3,4,5)
|
||||
sc.parallelize(list).reduce((x,y) => x+y)
|
||||
sc.parallelize(list).reduce(_+_)
|
||||
|
||||
|
||||
val list = List(("hadoop", 2), ("spark", 3), ("spark", 5), ("storm", 6),("hadoop", 2))
|
||||
sc.parallelize(list).reduceByKey(_+_).foreach(println)
|
||||
|
||||
|
||||
|
||||
val list = List(("hadoop", 2), ("spark", 3), ("spark", 5), ("storm", 6),("hadoop", 2))
|
||||
sc.parallelize(list).groupByKey().map(x=>(x._1,x._2.toList)).foreach(println)
|
||||
|
55
code/spark/spark-core/pom.xml
Normal file
55
code/spark/spark-core/pom.xml
Normal file
@ -0,0 +1,55 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.heibaiying</groupId>
|
||||
<artifactId>spark-core</artifactId>
|
||||
<version>1.0</version>
|
||||
|
||||
<properties>
|
||||
<scala.version>2.12.8</scala.version>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>8</source>
|
||||
<target>8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-core_2.12</artifactId>
|
||||
<version>2.4.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.scalatest</groupId>
|
||||
<artifactId>scalatest_2.12</artifactId>
|
||||
<version>3.0.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.12</version>
|
||||
</dependency>
|
||||
<!--Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 10582-->
|
||||
<dependency>
|
||||
<groupId>com.thoughtworks.paranamer</groupId>
|
||||
<artifactId>paranamer</artifactId>
|
||||
<version>2.8</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
</project>
|
@ -0,0 +1,44 @@
|
||||
package rdd.java;
|
||||
|
||||
import org.apache.spark.SparkConf;
|
||||
import org.apache.spark.api.java.JavaSparkContext;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class TransformationTest {
|
||||
|
||||
|
||||
private static JavaSparkContext sc = null;
|
||||
|
||||
|
||||
@Before
|
||||
public void prepare() {
|
||||
SparkConf conf = new SparkConf().setAppName("TransformationTest").setMaster("local[2]");
|
||||
sc = new JavaSparkContext(conf);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void map() {
|
||||
List<Integer> list = Arrays.asList(3, 6, 9, 10, 12, 21);
|
||||
/*
|
||||
* 不要使用方法引用的形式 : System.out::println , 否则会抛出下面的异常:
|
||||
* org.apache.spark.SparkException: Task not serializable
|
||||
* Caused by: java.io.NotSerializableException: java.io.PrintStream
|
||||
* 这是由于Spark程序中map、foreach等算子内部引用了类成员函数或变量时,需要该类所有成员都支持序列化,
|
||||
* 如果该类某些成员变量不支持序列化,就会抛出上面的异常
|
||||
*/
|
||||
sc.parallelize(list).map(x -> x * 10).foreach(x -> System.out.println(x));
|
||||
}
|
||||
|
||||
|
||||
@After
|
||||
public void destroy() {
|
||||
sc.close();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
package rdd.scala
|
||||
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
import org.junit.{After, Test}
|
||||
|
||||
class TransformationTest {
|
||||
|
||||
val conf: SparkConf = new SparkConf().setAppName("TransformationTest").setMaster("local[2]")
|
||||
val sc = new SparkContext(conf)
|
||||
|
||||
|
||||
@Test
|
||||
def map(): Unit = {
|
||||
val list = List(3, 6, 9, 10, 12, 21)
|
||||
sc.parallelize(list).map(_ * 10).foreach(println)
|
||||
}
|
||||
|
||||
@After
|
||||
def destroy(): Unit = {
|
||||
sc.stop()
|
||||
}
|
||||
|
||||
|
||||
}
|
0
notes/Spark-Transformation和Action.md
Normal file
0
notes/Spark-Transformation和Action.md
Normal file
Loading…
x
Reference in New Issue
Block a user