strom集成其他框架
This commit is contained in:
94
code/Storm/storm-kafka-integration/pom.xml
Normal file
94
code/Storm/storm-kafka-integration/pom.xml
Normal file
@ -0,0 +1,94 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.heibaiying</groupId>
|
||||
<artifactId>storm-kafka-integration</artifactId>
|
||||
<version>1.0</version>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>8</source>
|
||||
<target>8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!--使用shade进行打包-->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<configuration>
|
||||
<createDependencyReducedPom>true</createDependencyReducedPom>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.sf</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.dsa</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
<exclude>META-INF/*.rsa</exclude>
|
||||
<exclude>META-INF/*.EC</exclude>
|
||||
<exclude>META-INF/*.ec</exclude>
|
||||
<exclude>META-INF/MSFTSIG.SF</exclude>
|
||||
<exclude>META-INF/MSFTSIG.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
<artifactSet>
|
||||
<excludes>
|
||||
<exclude>org.apache.storm:storm-core</exclude>
|
||||
</excludes>
|
||||
</artifactSet>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<transformers>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<properties>
|
||||
<storm.version>1.2.2</storm.version>
|
||||
<kafka.version>2.2.0</kafka.version>
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.storm</groupId>
|
||||
<artifactId>storm-core</artifactId>
|
||||
<version>${storm.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.storm</groupId>
|
||||
<artifactId>storm-kafka-client</artifactId>
|
||||
<version>${storm.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.kafka</groupId>
|
||||
<artifactId>kafka-clients</artifactId>
|
||||
<version>${kafka.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
@ -0,0 +1,40 @@
|
||||
package com.heibaiying.kafka.read;
|
||||
|
||||
import org.apache.storm.task.OutputCollector;
|
||||
import org.apache.storm.task.TopologyContext;
|
||||
import org.apache.storm.topology.OutputFieldsDeclarer;
|
||||
import org.apache.storm.topology.base.BaseRichBolt;
|
||||
import org.apache.storm.tuple.Tuple;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* 打印从Kafka中获取的数据
|
||||
*/
|
||||
public class LogConsoleBolt extends BaseRichBolt {
|
||||
|
||||
|
||||
private OutputCollector collector;
|
||||
|
||||
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
|
||||
this.collector=collector;
|
||||
}
|
||||
|
||||
public void execute(Tuple input) {
|
||||
try {
|
||||
String value = input.getStringByField("value");
|
||||
System.out.println("received from kafka : "+ value);
|
||||
// 必须ack,否则会重复消费kafka中的消息
|
||||
collector.ack(input);
|
||||
}catch (Exception e){
|
||||
e.printStackTrace();
|
||||
collector.fail(input);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void declareOutputFields(OutputFieldsDeclarer declarer) {
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
package com.heibaiying.kafka.read;
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.storm.Config;
|
||||
import org.apache.storm.LocalCluster;
|
||||
import org.apache.storm.StormSubmitter;
|
||||
import org.apache.storm.generated.AlreadyAliveException;
|
||||
import org.apache.storm.generated.AuthorizationException;
|
||||
import org.apache.storm.generated.InvalidTopologyException;
|
||||
import org.apache.storm.kafka.spout.KafkaSpout;
|
||||
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
|
||||
import org.apache.storm.kafka.spout.KafkaSpoutRetryExponentialBackoff;
|
||||
import org.apache.storm.kafka.spout.KafkaSpoutRetryExponentialBackoff.TimeInterval;
|
||||
import org.apache.storm.kafka.spout.KafkaSpoutRetryService;
|
||||
import org.apache.storm.topology.TopologyBuilder;
|
||||
|
||||
/**
|
||||
* 从Kafka中读取数据
|
||||
*/
|
||||
public class ReadingFromKafkaApp {
|
||||
|
||||
private static final String BOOTSTRAP_SERVERS = "hadoop001:9092";
|
||||
private static final String TOPIC_NAME = "storm-topic";
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
final TopologyBuilder builder = new TopologyBuilder();
|
||||
builder.setSpout("kafka_spout", new KafkaSpout<>(getKafkaSpoutConfig(BOOTSTRAP_SERVERS, TOPIC_NAME)), 1);
|
||||
builder.setBolt("bolt", new LogConsoleBolt()).shuffleGrouping("kafka_spout");
|
||||
|
||||
// 如果外部传参cluster则代表线上环境启动,否则代表本地启动
|
||||
if (args.length > 0 && args[0].equals("cluster")) {
|
||||
try {
|
||||
StormSubmitter.submitTopology("ClusterReadingFromKafkaApp", new Config(), builder.createTopology());
|
||||
} catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
} else {
|
||||
LocalCluster cluster = new LocalCluster();
|
||||
cluster.submitTopology("LocalReadingFromKafkaApp",
|
||||
new Config(), builder.createTopology());
|
||||
}
|
||||
}
|
||||
|
||||
private static KafkaSpoutConfig<String, String> getKafkaSpoutConfig(String bootstrapServers, String topic) {
|
||||
return KafkaSpoutConfig.builder(bootstrapServers, topic)
|
||||
// 除了分组ID,以下配置都是可选的。分组ID必须指定,否则会抛出InvalidGroupIdException异常
|
||||
.setProp(ConsumerConfig.GROUP_ID_CONFIG, "kafkaSpoutTestGroup")
|
||||
// 定义重试策略
|
||||
.setRetry(getRetryService())
|
||||
// 定时提交偏移量的时间间隔,默认是15s
|
||||
.setOffsetCommitPeriodMs(10_000)
|
||||
.build();
|
||||
}
|
||||
|
||||
// 定义重试策略
|
||||
private static KafkaSpoutRetryService getRetryService() {
|
||||
return new KafkaSpoutRetryExponentialBackoff(TimeInterval.microSeconds(500),
|
||||
TimeInterval.milliSeconds(2), Integer.MAX_VALUE, TimeInterval.seconds(10));
|
||||
}
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
package com.heibaiying.kafka.write;
|
||||
|
||||
import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
|
||||
import org.apache.storm.spout.SpoutOutputCollector;
|
||||
import org.apache.storm.task.TopologyContext;
|
||||
import org.apache.storm.topology.OutputFieldsDeclarer;
|
||||
import org.apache.storm.topology.base.BaseRichSpout;
|
||||
import org.apache.storm.tuple.Fields;
|
||||
import org.apache.storm.tuple.Values;
|
||||
import org.apache.storm.utils.Utils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* 产生词频样本的数据源
|
||||
*/
|
||||
public class DataSourceSpout extends BaseRichSpout {
|
||||
|
||||
private List<String> list = Arrays.asList("Spark", "Hadoop", "HBase", "Storm", "Flink", "Hive");
|
||||
|
||||
private SpoutOutputCollector spoutOutputCollector;
|
||||
|
||||
@Override
|
||||
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
|
||||
this.spoutOutputCollector = spoutOutputCollector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nextTuple() {
|
||||
// 模拟产生数据
|
||||
String lineData = productData();
|
||||
spoutOutputCollector.emit(new Values("key",lineData));
|
||||
Utils.sleep(1000);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
|
||||
outputFieldsDeclarer.declare( new Fields("key", "message"));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 模拟数据
|
||||
*/
|
||||
private String productData() {
|
||||
Collections.shuffle(list);
|
||||
Random random = new Random();
|
||||
int endIndex = random.nextInt(list.size()) % (list.size()) + 1;
|
||||
return StringUtils.join(list.toArray(), "\t", 0, endIndex);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,67 @@
|
||||
package com.heibaiying.kafka.write;
|
||||
|
||||
import org.apache.storm.Config;
|
||||
import org.apache.storm.LocalCluster;
|
||||
import org.apache.storm.StormSubmitter;
|
||||
import org.apache.storm.generated.AlreadyAliveException;
|
||||
import org.apache.storm.generated.AuthorizationException;
|
||||
import org.apache.storm.generated.InvalidTopologyException;
|
||||
import org.apache.storm.kafka.bolt.KafkaBolt;
|
||||
import org.apache.storm.kafka.bolt.mapper.FieldNameBasedTupleToKafkaMapper;
|
||||
import org.apache.storm.kafka.bolt.selector.DefaultTopicSelector;
|
||||
import org.apache.storm.topology.TopologyBuilder;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* 写入数据到Kafka的特定主题中
|
||||
*/
|
||||
public class WritingToKafkaApp {
|
||||
|
||||
private static final String BOOTSTRAP_SERVERS = "hadoop001:9092";
|
||||
private static final String TOPIC_NAME = "storm-topic";
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
|
||||
TopologyBuilder builder = new TopologyBuilder();
|
||||
|
||||
// 定义Kafka生产者属性
|
||||
Properties props = new Properties();
|
||||
/*
|
||||
* 指定broker的地址清单,清单里不需要包含所有的broker地址,生产者会从给定的broker里查找其他broker的信息。
|
||||
* 不过建议至少要提供两个broker的信息作为容错。
|
||||
*/
|
||||
props.put("bootstrap.servers", BOOTSTRAP_SERVERS);
|
||||
/*
|
||||
* acks 参数指定了必须要有多少个分区副本收到消息,生产者才会认为消息写入是成功的。
|
||||
* acks=0 : 生产者在成功写入消息之前不会等待任何来自服务器的响应。
|
||||
* acks=1 : 只要集群的首领节点收到消息,生产者就会收到一个来自服务器成功响应。
|
||||
* acks=all : 只有当所有参与复制的节点全部收到消息时,生产者才会收到一个来自服务器的成功响应。
|
||||
*/
|
||||
props.put("acks", "1");
|
||||
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
|
||||
|
||||
KafkaBolt bolt = new KafkaBolt<String, String>()
|
||||
.withProducerProperties(props)
|
||||
.withTopicSelector(new DefaultTopicSelector(TOPIC_NAME))
|
||||
.withTupleToKafkaMapper(new FieldNameBasedTupleToKafkaMapper<>());
|
||||
|
||||
builder.setSpout("sourceSpout", new DataSourceSpout(), 1);
|
||||
builder.setBolt("kafkaBolt", bolt, 1).shuffleGrouping("sourceSpout");
|
||||
|
||||
|
||||
if (args.length > 0 && args[0].equals("cluster")) {
|
||||
try {
|
||||
StormSubmitter.submitTopology("ClusterWritingToKafkaApp", new Config(), builder.createTopology());
|
||||
} catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
} else {
|
||||
LocalCluster cluster = new LocalCluster();
|
||||
cluster.submitTopology("LocalWritingToKafkaApp",
|
||||
new Config(), builder.createTopology());
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user