spark straming basis

This commit is contained in:
罗祥 2019-05-27 13:43:49 +08:00
parent 79228ff9ef
commit 5407561d44
7 changed files with 66 additions and 73 deletions

View File

@ -8,5 +8,17 @@
<artifactId>spark-streaming-basis</artifactId> <artifactId>spark-streaming-basis</artifactId>
<version>1.0</version> <version>1.0</version>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.12</artifactId>
<version>2.4.3</version>
</dependency>
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
</dependencies>
</project> </project>

View File

@ -1,18 +0,0 @@
package com.heibaiying
import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}
object JedisPoolUtils {
/*创建Jedis连接池*/
val config = new JedisPoolConfig
config.setMaxTotal(30)
config.setMaxIdle(10)
val jedisPool = new JedisPool(config, "localhost", 6379)
def getConnection: Jedis = {
jedisPool.getResource
}
}

View File

@ -4,23 +4,23 @@ import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.{Seconds, StreamingContext}
/** /**
* * 词频统计
*/ */
object NetworkWordCount { object NetworkWordCount {
def main(args: Array[String]) { def main(args: Array[String]) {
/*指定时间间隔为5s*/
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]") val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
/*指定时间间隔*/ val ssc = new StreamingContext(sparkConf, Seconds(5))
val ssc = new StreamingContext(sparkConf, Seconds(1))
val lines = ssc.socketTextStream("hadoop001", 9999) /*创建文本输入流,并进行词频统计*/
val lines = ssc.socketTextStream("192.168.200.229", 9999)
lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _).print() lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _).print()
/*启动服务*/ /*启动服务*/
ssc.start() ssc.start()
/*等待服务结束*/ /*等待服务结束*/
ssc.awaitTermination() ssc.awaitTermination()

View File

@ -1,43 +1,45 @@
package com.heibaiying package com.heibaiying
import com.heibaiying.utils.JedisPoolUtil
import org.apache.spark.SparkConf import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.{Seconds, StreamingContext}
import redis.clients.jedis.Jedis
/** /**
* 词频统计 * 词频统计
*/ */
object NetworkWordCountV2 { object NetworkWordCountToRedis {
def main(args: Array[String]) { def main(args: Array[String]) {
/*
* 本地测试时最好指定hadoop用户名,否则会默认使用本地电脑的用户名,
* 此时在HDFS上创建目录时可能会抛出权限不足的异常
*/
System.setProperty("HADOOP_USER_NAME", "root")
/*指定时间间隔为5s*/ /*指定时间间隔为5s*/
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]") val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
val ssc = new StreamingContext(sparkConf, Seconds(5)) val ssc = new StreamingContext(sparkConf, Seconds(5))
/*必须要设置检查点*/
ssc.checkpoint("hdfs://192.168.200.229:8020/spark-streaming")
/*创建文本输入流,并进行词频统计*/ /*创建文本输入流,并进行词频统计*/
val lines = ssc.socketTextStream("192.168.200.229", 9999) val lines = ssc.socketTextStream("192.168.200.229", 9999)
lines.flatMap(_.split(" ")).map(x => (x, 1)) val pairs: DStream[(String, Int)] = lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _)
.updateStateByKey((values: Seq[Int], state: Option[Int]) => {
val currentCount: Int = values.sum pairs.foreachRDD { rdd =>
val lastCount: Int = state.getOrElse(0) rdd.foreachPartition { partitionOfRecords =>
Some(currentCount + lastCount) var jedis: Jedis = null
}) try {
.print() jedis = JedisPoolUtil.getConnection
partitionOfRecords.foreach(record => jedis.hincrBy("wordCount", record._1, record._2))
} catch {
case ex: Exception =>
ex.printStackTrace()
} finally {
if (jedis != null) jedis.close()
}
}
}
/*启动服务*/ /*启动服务*/
ssc.start() ssc.start()
/*等待服务结束*/ /*等待服务结束*/
ssc.awaitTermination() ssc.awaitTermination()
} }
} }

View File

@ -4,20 +4,35 @@ import org.apache.spark.SparkConf
import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.{Seconds, StreamingContext}
/** /**
* 词频统计 * 词频统计升级版
*/ */
object NetworkWordCountV2 { object NetworkWordCountV2 {
def main(args: Array[String]) { def main(args: Array[String]) {
/*
* 本地测试时最好指定hadoop用户名,否则会默认使用本地电脑的用户名,
* 此时在HDFS上创建目录时可能会抛出权限不足的异常
*/
System.setProperty("HADOOP_USER_NAME", "root")
/*指定时间间隔为5s*/ /*指定时间间隔为5s*/
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]") val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
val ssc = new StreamingContext(sparkConf, Seconds(5)) val ssc = new StreamingContext(sparkConf, Seconds(5))
/*必须要设置检查点*/
ssc.checkpoint("hdfs://192.168.200.229:8020/spark-streaming")
/*创建文本输入流,并进行词频统计*/ /*创建文本输入流,并进行词频统计*/
val lines = ssc.socketTextStream("192.168.200.229", 9999) val lines = ssc.socketTextStream("192.168.200.229", 9999)
lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _).print() lines.flatMap(_.split(" ")).map(x => (x, 1))
.updateStateByKey((values: Seq[Int], state: Option[Int]) => {
val currentCount: Int = values.sum
val lastCount: Int = state.getOrElse(0)
Some(currentCount + lastCount)
})
.print()
/*启动服务*/ /*启动服务*/
ssc.start() ssc.start()

View File

@ -1,30 +1,30 @@
package com.heibaiying.utils; package com.heibaiying.utils;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool; import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig; import redis.clients.jedis.JedisPoolConfig;
public class JedisPoolUtil { public class JedisPoolUtil {
// 必须要声明为 volatile 防止指令重排序 /* 声明为volatile防止指令重排序 */
private static volatile JedisPool JedisPool = null; private static volatile JedisPool jedisPool = null;
private JedisPoolUtil() { private static final String HOST = "localhost";
if (JedisPool != null) { private static final int PORT = 6379;
throw new RuntimeException("单例模式禁止反射调用!");
}
}
public static JedisPool getConnect() {
if (JedisPool == null) { /* 双重检查锁实现懒汉式单例 */
public static Jedis getConnection() {
if (jedisPool == null) {
synchronized (JedisPoolUtil.class) { synchronized (JedisPoolUtil.class) {
if (JedisPool != null) { if (jedisPool == null) {
JedisPoolConfig config = new JedisPoolConfig(); JedisPoolConfig config = new JedisPoolConfig();
config.setMaxTotal(30); config.setMaxTotal(30);
config.setMaxIdle(10); config.setMaxIdle(10);
JedisPool jedisPool = new JedisPool(config, "localhost", 6379); jedisPool = new JedisPool(config, HOST, PORT);
} }
} }
} }
return JedisPool; return jedisPool.getResource();
} }
} }

View File

@ -1,18 +0,0 @@
package com.heibaiying.utils
import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}
object JedisPoolUtil {
/*创建Jedis连接池*/
val config = new JedisPoolConfig
config.setMaxTotal(30)
config.setMaxIdle(10)
val jedisPool = new JedisPool(config, "localhost", 6379)
def getConnection: Jedis = {
jedisPool.getResource
}
}