strom整合其他框架

2019-04-17 17:55:19 +08:00
parent a339956c1a
commit d4daa775cf
13 changed files with 517 additions and 14 deletions
--- a/code/Storm/storm-hdfs-integration/pom.xml
+++ b/code/Storm/storm-hdfs-integration/pom.xml
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>com.heibaiying</groupId>
+    <artifactId>storm-hdfs-integration</artifactId>
+    <version>1.0</version>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+        <storm.version>1.2.2</storm.version>
+    </properties>
+
+    <repositories>
+        <repository>
+            <id>cloudera</id>
+            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
+        </repository>
+    </repositories>
+
+
+    <build>
+        <plugins>
+            <!--使用java8编译-->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <configuration>
+                    <source>8</source>
+                    <target>8</target>
+                </configuration>
+            </plugin>
+
+            <!--使用shade进行打包-->
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <configuration>
+                    <createDependencyReducedPom>true</createDependencyReducedPom>
+                    <filters>
+                        <filter>
+                            <artifact>*:*</artifact>
+                            <excludes>
+                                <exclude>org.apache.storm:storm-core</exclude>
+                            </excludes>
+                        </filter>
+                    </filters>
+                </configuration>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                        <configuration>
+                            <transformers>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+                                <transformer
+                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
+                                </transformer>
+                            </transformers>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.storm</groupId>
+            <artifactId>storm-core</artifactId>
+            <version>${storm.version}</version>
+        </dependency>
+        <!--Storm整合HDFS依赖-->
+        <dependency>
+            <groupId>org.apache.storm</groupId>
+            <artifactId>storm-hdfs</artifactId>
+            <version>${storm.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-client</artifactId>
+            <version>2.6.0-cdh5.15.2</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-hdfs</artifactId>
+            <version>2.6.0-cdh5.15.2</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>slf4j-log4j12</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+    </dependencies>
+
+
+</project>
--- a/code/Storm/storm-hdfs-integration/src/main/java/com.heibaiying/WordCountToHdfsApp.java
+++ b/code/Storm/storm-hdfs-integration/src/main/java/com.heibaiying/WordCountToHdfsApp.java
@@ -0,0 +1,77 @@
+package com.heibaiying;
+
+import com.heibaiying.component.DataSourceSpout;
+import org.apache.storm.Config;
+import org.apache.storm.LocalCluster;
+import org.apache.storm.StormSubmitter;
+import org.apache.storm.generated.AlreadyAliveException;
+import org.apache.storm.generated.AuthorizationException;
+import org.apache.storm.generated.InvalidTopologyException;
+import org.apache.storm.hdfs.bolt.HdfsBolt;
+import org.apache.storm.hdfs.bolt.format.DefaultFileNameFormat;
+import org.apache.storm.hdfs.bolt.format.DelimitedRecordFormat;
+import org.apache.storm.hdfs.bolt.format.FileNameFormat;
+import org.apache.storm.hdfs.bolt.format.RecordFormat;
+import org.apache.storm.hdfs.bolt.rotation.FileRotationPolicy;
+import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy;
+import org.apache.storm.hdfs.bolt.rotation.FileSizeRotationPolicy.Units;
+import org.apache.storm.hdfs.bolt.sync.CountSyncPolicy;
+import org.apache.storm.hdfs.bolt.sync.SyncPolicy;
+import org.apache.storm.topology.TopologyBuilder;
+
+/**
+ * 进行词频统计 并将统计结果存储到HDFS中
+ * <p>
+ * hdfs://hadoopp001:8020 path
+ */
+public class WordCountToHdfsApp {
+
+    private static final String DATA_SOURCE_SPOUT = "dataSourceSpout";
+    private static final String HDFS_BOLT = "hdfsBolt";
+
+    public static void main(String[] args) {
+
+        // 定义存储文本的分隔符
+        RecordFormat format = new DelimitedRecordFormat()
+                .withFieldDelimiter("|");
+
+        // 同步策略: 每100个tuples之后就会把数据从缓存刷新到HDFS中
+        SyncPolicy syncPolicy = new CountSyncPolicy(100);
+
+        // 文件策略: 每个文件大小上限1M,超过限定时,创建新文件并继续写入
+        FileRotationPolicy rotationPolicy = new FileSizeRotationPolicy(1.0f, Units.MB);
+
+        // 定义完整路径
+        FileNameFormat fileNameFormat = new DefaultFileNameFormat()
+                .withPath("/storm-hdfs/");
+
+        // 定义HdfsBolt
+        HdfsBolt hdfsBolt = new HdfsBolt()
+                .withFsUrl("hdfs://hadoop001:8020")
+                .withFileNameFormat(fileNameFormat)
+                .withRecordFormat(format)
+                .withRotationPolicy(rotationPolicy)
+                .withSyncPolicy(syncPolicy);
+
+
+        // 构建Topology
+        TopologyBuilder builder = new TopologyBuilder();
+        builder.setSpout(DATA_SOURCE_SPOUT, new DataSourceSpout());
+        // save to HBase
+        builder.setBolt(HDFS_BOLT, hdfsBolt, 1).shuffleGrouping(DATA_SOURCE_SPOUT);
+
+
+        // 如果外部传参cluster则代表线上环境启动,否则代表本地启动
+        if (args.length > 0 && args[0].equals("cluster")) {
+            try {
+                StormSubmitter.submitTopology("ClusterWordCountToHdfsApp", new Config(), builder.createTopology());
+            } catch (AlreadyAliveException | InvalidTopologyException | AuthorizationException e) {
+                e.printStackTrace();
+            }
+        } else {
+            LocalCluster cluster = new LocalCluster();
+            cluster.submitTopology("LocalWordCountToHdfsApp",
+                    new Config(), builder.createTopology());
+        }
+    }
+}
--- a/code/Storm/storm-hdfs-integration/src/main/java/com.heibaiying/component/DataSourceSpout.java
+++ b/code/Storm/storm-hdfs-integration/src/main/java/com.heibaiying/component/DataSourceSpout.java
@@ -0,0 +1,53 @@
+package com.heibaiying.component;
+
+import org.apache.storm.shade.org.apache.commons.lang.StringUtils;
+import org.apache.storm.spout.SpoutOutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.storm.topology.OutputFieldsDeclarer;
+import org.apache.storm.topology.base.BaseRichSpout;
+import org.apache.storm.tuple.Fields;
+import org.apache.storm.tuple.Values;
+import org.apache.storm.utils.Utils;
+
+import java.util.*;
+
+
+/**
+ * 产生词频样本的数据源
+ */
+public class DataSourceSpout extends BaseRichSpout {
+
+    private List<String> list = Arrays.asList("Spark", "Hadoop", "HBase", "Storm", "Flink", "Hive");
+
+    private SpoutOutputCollector spoutOutputCollector;
+
+    @Override
+    public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
+        this.spoutOutputCollector = spoutOutputCollector;
+    }
+
+    @Override
+    public void nextTuple() {
+        // 模拟产生数据
+        String lineData = productData();
+        spoutOutputCollector.emit(new Values(lineData));
+        Utils.sleep(1000);
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
+        outputFieldsDeclarer.declare(new Fields("line"));
+    }
+
+
+    /**
+     * 模拟数据
+     */
+    private String productData() {
+        Collections.shuffle(list);
+        Random random = new Random();
+        int endIndex = random.nextInt(list.size()) % (list.size()) + 1;
+        return StringUtils.join(list.toArray(), "\t", 0, endIndex);
+    }
+
+}