spark straming basis
This commit is contained in:
parent
79228ff9ef
commit
5407561d44
@ -8,5 +8,17 @@
|
|||||||
<artifactId>spark-streaming-basis</artifactId>
|
<artifactId>spark-streaming-basis</artifactId>
|
||||||
<version>1.0</version>
|
<version>1.0</version>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.spark</groupId>
|
||||||
|
<artifactId>spark-streaming_2.12</artifactId>
|
||||||
|
<version>2.4.3</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>redis.clients</groupId>
|
||||||
|
<artifactId>jedis</artifactId>
|
||||||
|
<version>2.9.0</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
@ -1,18 +0,0 @@
|
|||||||
package com.heibaiying
|
|
||||||
|
|
||||||
import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}
|
|
||||||
|
|
||||||
object JedisPoolUtils {
|
|
||||||
|
|
||||||
/*创建Jedis连接池*/
|
|
||||||
val config = new JedisPoolConfig
|
|
||||||
config.setMaxTotal(30)
|
|
||||||
config.setMaxIdle(10)
|
|
||||||
val jedisPool = new JedisPool(config, "localhost", 6379)
|
|
||||||
|
|
||||||
|
|
||||||
def getConnection: Jedis = {
|
|
||||||
jedisPool.getResource
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -4,23 +4,23 @@ import org.apache.spark.SparkConf
|
|||||||
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
* 词频统计
|
||||||
*/
|
*/
|
||||||
object NetworkWordCount {
|
object NetworkWordCount {
|
||||||
|
|
||||||
|
|
||||||
def main(args: Array[String]) {
|
def main(args: Array[String]) {
|
||||||
|
|
||||||
|
/*指定时间间隔为5s*/
|
||||||
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
|
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
|
||||||
/*指定时间间隔*/
|
val ssc = new StreamingContext(sparkConf, Seconds(5))
|
||||||
val ssc = new StreamingContext(sparkConf, Seconds(1))
|
|
||||||
|
|
||||||
val lines = ssc.socketTextStream("hadoop001", 9999)
|
/*创建文本输入流,并进行词频统计*/
|
||||||
|
val lines = ssc.socketTextStream("192.168.200.229", 9999)
|
||||||
lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _).print()
|
lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _).print()
|
||||||
|
|
||||||
/*启动服务*/
|
/*启动服务*/
|
||||||
ssc.start()
|
ssc.start()
|
||||||
|
|
||||||
/*等待服务结束*/
|
/*等待服务结束*/
|
||||||
ssc.awaitTermination()
|
ssc.awaitTermination()
|
||||||
|
|
||||||
|
@ -1,43 +1,45 @@
|
|||||||
package com.heibaiying
|
package com.heibaiying
|
||||||
|
|
||||||
|
import com.heibaiying.utils.JedisPoolUtil
|
||||||
import org.apache.spark.SparkConf
|
import org.apache.spark.SparkConf
|
||||||
|
import org.apache.spark.streaming.dstream.DStream
|
||||||
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
||||||
|
import redis.clients.jedis.Jedis
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 词频统计
|
* 词频统计
|
||||||
*/
|
*/
|
||||||
object NetworkWordCountV2 {
|
object NetworkWordCountToRedis {
|
||||||
|
|
||||||
|
|
||||||
def main(args: Array[String]) {
|
def main(args: Array[String]) {
|
||||||
|
|
||||||
/*
|
|
||||||
* 本地测试时最好指定hadoop用户名,否则会默认使用本地电脑的用户名,
|
|
||||||
* 此时在HDFS上创建目录时可能会抛出权限不足的异常
|
|
||||||
*/
|
|
||||||
System.setProperty("HADOOP_USER_NAME", "root")
|
|
||||||
|
|
||||||
/*指定时间间隔为5s*/
|
/*指定时间间隔为5s*/
|
||||||
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
|
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
|
||||||
val ssc = new StreamingContext(sparkConf, Seconds(5))
|
val ssc = new StreamingContext(sparkConf, Seconds(5))
|
||||||
|
|
||||||
/*必须要设置检查点*/
|
|
||||||
ssc.checkpoint("hdfs://192.168.200.229:8020/spark-streaming")
|
|
||||||
|
|
||||||
/*创建文本输入流,并进行词频统计*/
|
/*创建文本输入流,并进行词频统计*/
|
||||||
val lines = ssc.socketTextStream("192.168.200.229", 9999)
|
val lines = ssc.socketTextStream("192.168.200.229", 9999)
|
||||||
lines.flatMap(_.split(" ")).map(x => (x, 1))
|
val pairs: DStream[(String, Int)] = lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _)
|
||||||
.updateStateByKey((values: Seq[Int], state: Option[Int]) => {
|
|
||||||
val currentCount: Int = values.sum
|
pairs.foreachRDD { rdd =>
|
||||||
val lastCount: Int = state.getOrElse(0)
|
rdd.foreachPartition { partitionOfRecords =>
|
||||||
Some(currentCount + lastCount)
|
var jedis: Jedis = null
|
||||||
})
|
try {
|
||||||
.print()
|
jedis = JedisPoolUtil.getConnection
|
||||||
|
partitionOfRecords.foreach(record => jedis.hincrBy("wordCount", record._1, record._2))
|
||||||
|
} catch {
|
||||||
|
case ex: Exception =>
|
||||||
|
ex.printStackTrace()
|
||||||
|
} finally {
|
||||||
|
if (jedis != null) jedis.close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*启动服务*/
|
/*启动服务*/
|
||||||
ssc.start()
|
ssc.start()
|
||||||
/*等待服务结束*/
|
/*等待服务结束*/
|
||||||
ssc.awaitTermination()
|
ssc.awaitTermination()
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,20 +4,35 @@ import org.apache.spark.SparkConf
|
|||||||
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
import org.apache.spark.streaming.{Seconds, StreamingContext}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 词频统计
|
* 词频统计升级版
|
||||||
*/
|
*/
|
||||||
object NetworkWordCountV2 {
|
object NetworkWordCountV2 {
|
||||||
|
|
||||||
|
|
||||||
def main(args: Array[String]) {
|
def main(args: Array[String]) {
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 本地测试时最好指定hadoop用户名,否则会默认使用本地电脑的用户名,
|
||||||
|
* 此时在HDFS上创建目录时可能会抛出权限不足的异常
|
||||||
|
*/
|
||||||
|
System.setProperty("HADOOP_USER_NAME", "root")
|
||||||
|
|
||||||
/*指定时间间隔为5s*/
|
/*指定时间间隔为5s*/
|
||||||
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
|
val sparkConf = new SparkConf().setAppName("NetworkWordCount").setMaster("local[2]")
|
||||||
val ssc = new StreamingContext(sparkConf, Seconds(5))
|
val ssc = new StreamingContext(sparkConf, Seconds(5))
|
||||||
|
|
||||||
|
/*必须要设置检查点*/
|
||||||
|
ssc.checkpoint("hdfs://192.168.200.229:8020/spark-streaming")
|
||||||
|
|
||||||
/*创建文本输入流,并进行词频统计*/
|
/*创建文本输入流,并进行词频统计*/
|
||||||
val lines = ssc.socketTextStream("192.168.200.229", 9999)
|
val lines = ssc.socketTextStream("192.168.200.229", 9999)
|
||||||
lines.flatMap(_.split(" ")).map(x => (x, 1)).reduceByKey(_ + _).print()
|
lines.flatMap(_.split(" ")).map(x => (x, 1))
|
||||||
|
.updateStateByKey((values: Seq[Int], state: Option[Int]) => {
|
||||||
|
val currentCount: Int = values.sum
|
||||||
|
val lastCount: Int = state.getOrElse(0)
|
||||||
|
Some(currentCount + lastCount)
|
||||||
|
})
|
||||||
|
.print()
|
||||||
|
|
||||||
/*启动服务*/
|
/*启动服务*/
|
||||||
ssc.start()
|
ssc.start()
|
||||||
|
@ -1,30 +1,30 @@
|
|||||||
package com.heibaiying.utils;
|
package com.heibaiying.utils;
|
||||||
|
|
||||||
|
import redis.clients.jedis.Jedis;
|
||||||
import redis.clients.jedis.JedisPool;
|
import redis.clients.jedis.JedisPool;
|
||||||
import redis.clients.jedis.JedisPoolConfig;
|
import redis.clients.jedis.JedisPoolConfig;
|
||||||
|
|
||||||
public class JedisPoolUtil {
|
public class JedisPoolUtil {
|
||||||
|
|
||||||
// 必须要声明为 volatile 防止指令重排序
|
/* 声明为volatile防止指令重排序 */
|
||||||
private static volatile JedisPool JedisPool = null;
|
private static volatile JedisPool jedisPool = null;
|
||||||
|
|
||||||
private JedisPoolUtil() {
|
private static final String HOST = "localhost";
|
||||||
if (JedisPool != null) {
|
private static final int PORT = 6379;
|
||||||
throw new RuntimeException("单例模式禁止反射调用!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static JedisPool getConnect() {
|
|
||||||
if (JedisPool == null) {
|
/* 双重检查锁实现懒汉式单例 */
|
||||||
|
public static Jedis getConnection() {
|
||||||
|
if (jedisPool == null) {
|
||||||
synchronized (JedisPoolUtil.class) {
|
synchronized (JedisPoolUtil.class) {
|
||||||
if (JedisPool != null) {
|
if (jedisPool == null) {
|
||||||
JedisPoolConfig config = new JedisPoolConfig();
|
JedisPoolConfig config = new JedisPoolConfig();
|
||||||
config.setMaxTotal(30);
|
config.setMaxTotal(30);
|
||||||
config.setMaxIdle(10);
|
config.setMaxIdle(10);
|
||||||
JedisPool jedisPool = new JedisPool(config, "localhost", 6379);
|
jedisPool = new JedisPool(config, HOST, PORT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return JedisPool;
|
return jedisPool.getResource();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,18 +0,0 @@
|
|||||||
package com.heibaiying.utils
|
|
||||||
|
|
||||||
import redis.clients.jedis.{Jedis, JedisPool, JedisPoolConfig}
|
|
||||||
|
|
||||||
object JedisPoolUtil {
|
|
||||||
|
|
||||||
/*创建Jedis连接池*/
|
|
||||||
val config = new JedisPoolConfig
|
|
||||||
config.setMaxTotal(30)
|
|
||||||
config.setMaxIdle(10)
|
|
||||||
val jedisPool = new JedisPool(config, "localhost", 6379)
|
|
||||||
|
|
||||||
|
|
||||||
def getConnection: Jedis = {
|
|
||||||
jedisPool.getResource
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
Loading…
x
Reference in New Issue
Block a user