strom整合其他框架
This commit is contained in:
110
code/Storm/storm-hdfs-integration/pom.xml
Normal file
110
code/Storm/storm-hdfs-integration/pom.xml
Normal file
@ -0,0 +1,110 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.heibaiying</groupId>
|
||||
<artifactId>storm-hdfs-integration</artifactId>
|
||||
<version>1.0</version>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<storm.version>1.2.2</storm.version>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>cloudera</id>
|
||||
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!--使用java8编译-->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>8</source>
|
||||
<target>8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!--使用shade进行打包-->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<configuration>
|
||||
<createDependencyReducedPom>true</createDependencyReducedPom>
|
||||
<filters>
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>org.apache.storm:storm-core</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<transformers>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
|
||||
<transformer
|
||||
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.storm</groupId>
|
||||
<artifactId>storm-core</artifactId>
|
||||
<version>${storm.version}</version>
|
||||
</dependency>
|
||||
<!--Storm整合HDFS依赖-->
|
||||
<dependency>
|
||||
<groupId>org.apache.storm</groupId>
|
||||
<artifactId>storm-hdfs</artifactId>
|
||||
<version>${storm.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<version>2.6.0-cdh5.15.2</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs</artifactId>
|
||||
<version>2.6.0-cdh5.15.2</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
</project>
|
@ -0,0 +1,77 @@
|
||||
package com.heibaiying;
|
||||
|
||||
import com.heibaiying.component.DataSourceSpout;
|
||||
import org.apache.storm.Config;
|
||||
import org.apache.storm.LocalCluster;
|
||||
import org.apache.storm.StormSubmitter;
|
||||
import org.apache.storm.generated.AlreadyAliveException;
|
||||
import org.apache.storm.generated.AuthorizationException;
|
||||
import org.apache.storm.generated.InvalidTopologyException;
|
||||
import org.apache.storm.hdfs.bolt.HdfsBolt;
|
||||
import org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat;
|
||||
import org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat;
|
||||
import org.apache.storm.hdfs.bolt.format.FileNameFormat;
|
||||
import org.apache.storm.hdfs.bolt.format.RecordFormat;
|
||||
import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;
|
||||
import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy;
|
||||
import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy.Units;
|
||||
import org.apache.storm.hdfs.bolt.sync.CountSyncPolicy;
|
||||
import org.apache.storm.hdfs.bolt.sync.SyncPolicy;
|
||||
import org.apache.storm.topology.TopologyBuilder;
|
||||
|
||||
/**
|
||||
* 进行词频统计 并将统计结果存储到HDFS中
|
||||
* <p>
|
||||
* hdfs://hadoopp001:8020 path
|
||||
*/
|
||||
public class WordCountToHdfsApp {
|
||||
|
||||
private static final String DATA_SOURCE_SPOUT = "dataSourceSpout";
|
||||
private static final String HDFS_BOLT = "hdfsBolt";
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
// 定义存储文本的分隔符
|
||||
RecordFormat format = new DelimitedRecordFormat()
|
||||
.withFieldDelimiter("|");
|
||||
|
||||
// 同步策略: 每100个tuples之后就会把数据从缓存刷新到HDFS中
|
||||
SyncPolicy syncPolicy = new CountSyncPolicy(100);
|
||||
|
||||
// 文件策略: 每个文件大小上限1M,超过限定时,创建新文件并继续写入
|
||||
FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, Units.MB);
|
||||
|
||||
// 定义完整路径
|
||||
FileNameFormat fileNameFormat = new DefaultFileNameFormat()
|
||||
.withPath("/storm-hdfs/");
|
||||
|
||||
// 定义HdfsBolt
|
||||
HdfsBolt hdfsBolt = new HdfsBolt()
|
||||
.withFsUrl("hdfs://hadoop001:8020")
|
||||
.withFileNameFormat(fileNameFormat)
|
||||
.withRecordFormat(format)
|
||||
.withRotationPolicy(rotationPolicy)
|
||||
.withSyncPolicy(syncPolicy);
|
||||
|
||||
|
||||
// 构建Topology
|
||||
TopologyBuilder builder = new TopologyBuilder();
|
||||
builder.setSpout(DATA_SOURCE_SPOUT, new DataSourceSpout());
|
||||
// save to HBase
|
||||
builder.setBolt(HDFS_BOLT, hdfsBolt, 1).shuffleGrouping(DATA_SOURCE_SPOUT);
|
||||
|
||||
|
||||
// 如果外部传参cluster则代表线上环境启动,否则代表本地启动
|
||||
if (args.length > 0 && args[0].equals("cluster")) {
|
||||
try {
|
||||
StormSubmitter.submitTopology("ClusterWordCountToHdfsApp", new Config(), builder.createTopology());
|
||||
} catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
} else {
|
||||
LocalCluster cluster = new LocalCluster();
|
||||
cluster.submitTopology("LocalWordCountToHdfsApp",
|
||||
new Config(), builder.createTopology());
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
package com.heibaiying.component;
|
||||
|
||||
import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
|
||||
import org.apache.storm.spout.SpoutOutputCollector;
|
||||
import org.apache.storm.task.TopologyContext;
|
||||
import org.apache.storm.topology.OutputFieldsDeclarer;
|
||||
import org.apache.storm.topology.base.BaseRichSpout;
|
||||
import org.apache.storm.tuple.Fields;
|
||||
import org.apache.storm.tuple.Values;
|
||||
import org.apache.storm.utils.Utils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* 产生词频样本的数据源
|
||||
*/
|
||||
public class DataSourceSpout extends BaseRichSpout {
|
||||
|
||||
private List<String> list = Arrays.asList("Spark", "Hadoop", "HBase", "Storm", "Flink", "Hive");
|
||||
|
||||
private SpoutOutputCollector spoutOutputCollector;
|
||||
|
||||
@Override
|
||||
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
|
||||
this.spoutOutputCollector = spoutOutputCollector;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void nextTuple() {
|
||||
// 模拟产生数据
|
||||
String lineData = productData();
|
||||
spoutOutputCollector.emit(new Values(lineData));
|
||||
Utils.sleep(1000);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
|
||||
outputFieldsDeclarer.declare(new Fields("line"));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 模拟数据
|
||||
*/
|
||||
private String productData() {
|
||||
Collections.shuffle(list);
|
||||
Random random = new Random();
|
||||
int endIndex = random.nextInt(list.size()) % (list.size()) + 1;
|
||||
return StringUtils.join(list.toArray(), "\t", 0, endIndex);
|
||||
}
|
||||
|
||||
}
|
Reference in New Issue
Block a user