diff --git a/.gitignore b/.gitignore index f5a2aab..4745931 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,7 @@ settings.xml target classes out +logs transaction-logs .flattened-pom.xml secrets.yml diff --git a/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555310515/1027/worker.yaml b/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555310515/1027/worker.yaml deleted file mode 100644 index ecad21e..0000000 --- a/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555310515/1027/worker.yaml +++ /dev/null @@ -1,4 +0,0 @@ -worker-id: 7b8e6dbf-1e3e-4368-8f0c-1a4936042ca7 -logs.users: [] -logs.groups: [] -topology.submitter.user: ciic diff --git a/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555311430/1027/worker.yaml b/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555311430/1027/worker.yaml deleted file mode 100644 index a3d2249..0000000 --- a/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555311430/1027/worker.yaml +++ /dev/null @@ -1,4 +0,0 @@ -worker-id: 931219fd-8b9a-4333-9fda-5d1df11a258c -logs.users: [] -logs.groups: [] -topology.submitter.user: ciic diff --git a/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555311784/1024/worker.yaml b/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555311784/1024/worker.yaml deleted file mode 100644 index e627b4a..0000000 --- a/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555311784/1024/worker.yaml +++ /dev/null @@ -1,4 +0,0 @@ -worker-id: 9cdf2e0f-b135-41c6-b3fd-1502afddf212 -logs.users: [] -logs.groups: [] -topology.submitter.user: ciic diff --git a/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555313003/1027/worker.yaml b/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555313003/1027/worker.yaml deleted file mode 100644 index 677f718..0000000 --- a/code/Storm/storm-word-count/logs/workers-artifacts/LocalWordCountTopology-1-1555313003/1027/worker.yaml +++ /dev/null @@ -1,4 +0,0 @@ -worker-id: 9837751b-8320-4651-b325-3c64898b976d -logs.users: [] -logs.groups: [] -topology.submitter.user: ciic diff --git a/code/Storm/storm-word-count/pom.xml b/code/Storm/storm-word-count/pom.xml index 0a39154..500f130 100644 --- a/code/Storm/storm-word-count/pom.xml +++ b/code/Storm/storm-word-count/pom.xml @@ -17,6 +17,19 @@ 8 + + maven-assembly-plugin + + + src/main/resources/assembly.xml + + + + com.heibaiying.wordcount.ClusterWordCountApp + + + + @@ -27,6 +40,12 @@ storm-core 1.2.2 + + org.apache.commons + commons-lang3 + 3.8.1 + + \ No newline at end of file diff --git a/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/CountBolt.java b/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/CountBolt.java index 5fd3170..bbca8a6 100644 --- a/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/CountBolt.java +++ b/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/CountBolt.java @@ -28,7 +28,7 @@ public class CountBolt extends BaseRichBolt { count++; counts.put(word, count); // 输出 - System.out.print("当前实时统计结果:"); + System.out.print("Real-time analysis results : "); counts.forEach((key, value) -> System.out.print(key + ":" + value + "; ")); System.out.println(); } diff --git a/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/DataSourceSpout.java b/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/DataSourceSpout.java index 8ca0d78..91db595 100644 --- a/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/DataSourceSpout.java +++ b/code/Storm/storm-word-count/src/main/java/com/heibaiying/wordcount/component/DataSourceSpout.java @@ -1,6 +1,6 @@ package com.heibaiying.wordcount.component; -import org.apache.storm.shade.org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.storm.spout.SpoutOutputCollector; import org.apache.storm.task.TopologyContext; import org.apache.storm.topology.OutputFieldsDeclarer; diff --git a/code/Storm/storm-word-count/src/main/resources/assembly.xml b/code/Storm/storm-word-count/src/main/resources/assembly.xml new file mode 100644 index 0000000..3c38fe4 --- /dev/null +++ b/code/Storm/storm-word-count/src/main/resources/assembly.xml @@ -0,0 +1,25 @@ + + + jar-with-dependencies + + + + jar + + + false + + + / + true + true + runtime + + + org.apache.storm:storm-core + + + + \ No newline at end of file diff --git a/notes/Storm编程模型详解.md b/notes/Storm编程模型详解.md index 79b0c72..6764d86 100644 --- a/notes/Storm编程模型详解.md +++ b/notes/Storm编程模型详解.md @@ -11,6 +11,7 @@     4.2 BaseRichBolt抽象类
五、词频统计案例
六、提交到服务器集群运行
+七、通用打包方法
@@ -424,7 +425,7 @@ public class ClusterWordCountApp { 打包后上传到服务器任意位置,这里我打包后的名称为`storm-word-count-1.0.jar` ```shell -# mvn clean package -DskipTests=true +# mvn clean package -Dmaven.test.skip=true ``` #### 6.3 提交Topology @@ -466,3 +467,139 @@ storm kill ClusterWordCountApp -w 3 +## 七、通用打包方法 + +#### 1. mvn package的局限性 + +上面我们直接使用`mvn package`进行项目打包,这对于没有使用外部依赖包的项目是可行的。但如果项目中使用了第三方JAR包,就会出现问题,因为`package`打包后的JAR中是不含有依赖包的,如果此时你提交到服务器上运行,就会出现找不到第三方依赖的异常。 + +这时候可能大家会有疑惑,在我们的项目中不是使用了`storm-core`这个依赖吗?其实上面之所以我们能运行成功,是因为在Storm的集群环境中提供了这个JAR包,在安装目录的lib目录下: + +
+ +为了说明这个问题我在Maven中引入了一个第三方的JAR包,并修改产生数据的方法: + +```xml + + org.apache.commons + commons-lang3 + 3.8.1 + +``` + +`StringUtils.join()`这个方法在`commons.lang3`和`storm-core`中都有,原来的代码无需任何更改,只需要在`import`时指明使用`commons.lang3`。 + +```java +import org.apache.commons.lang3.StringUtils; + +private String productData() { + Collections.shuffle(list); + Random random = new Random(); + int endIndex = random.nextInt(list.size()) % (list.size()) + 1; + return StringUtils.join(list.toArray(), "\t", 0, endIndex); +} +``` + +此时直接使用`mvn clean package`打包上传到服务器运行,就会抛出下图异常。 + +其实官方文档里面并没有推荐使用这种打包方法,而是网上很多词频统计的Demo使用了。所以在此说明一下:这种打包方式并不适用于实际的开发,因为实际开发中通常都是需要第三方的JAR包的。 + +
+ + + +#### 2. 官方推荐的的打包方法 + +>If you're using Maven, the [Maven Assembly Plugin](http://maven.apache.org/plugins/maven-assembly-plugin/) can do the packaging for you. Just add this to your pom.xml: +> +>```xml +> +> maven-assembly-plugin +> +> +> jar-with-dependencies +> +> +> +> com.path.to.main.Class +> +> +> +> +>``` +> +>Then run mvn assembly:assembly to get an appropriately packaged jar. Make sure you [exclude](http://maven.apache.org/plugins/maven-assembly-plugin/examples/single/including-and-excluding-artifacts.html) the Storm jars since the cluster already has Storm on the classpath. + +其实就是两点: + ++ 使用maven-assembly-plugin进行打包,因为maven-assembly-plugin会把所有的依赖一并打包到最后的JAR中; ++ 排除掉Storm集群环境中已经提供的Storm jars。 + +按照官方文档的说明,修改我们的POM文件,如下: + +```xml + + + + maven-assembly-plugin + + + src/main/resources/assembly.xml + + + + com.heibaiying.wordcount.ClusterWordCountApp + + + + + + +``` + +其中`assembly.xml`的文件内容如下: + +```xml + + + jar-with-dependencies + + + + jar + + + false + + + / + true + true + runtime + + + org.apache.storm:storm-core + + + + +``` + +打包命令为: + +```shell +# mvn clean assembly:assembly -Dmaven.test.skip=true +``` + +打包后会同时生成两个JAR包,其中后缀为`jar-with-dependencies`是含有第三方依赖的JAR包,通过压缩工具可以看到内部已经打入了依赖包。另外后缀是由`assembly.xml`中``标签指定的,你可以自定义修改。提交该JAR到集群环境即可。 + +
+ + + +## 参考资料 + +1. [Running Topologies on a Production Cluster](http://storm.apache.org/releases/2.0.0-SNAPSHOT/Running-topologies-on-a-production-cluster.html) +2. [Pre-defined Descriptor Files](http://maven.apache.org/plugins/maven-assembly-plugin/descriptor-refs.html) \ No newline at end of file diff --git a/pictures/storm-jar.png b/pictures/storm-jar.png new file mode 100644 index 0000000..d838151 Binary files /dev/null and b/pictures/storm-jar.png differ diff --git a/pictures/storm-lib.png b/pictures/storm-lib.png new file mode 100644 index 0000000..655ea57 Binary files /dev/null and b/pictures/storm-lib.png differ diff --git a/pictures/storm-package-error.png b/pictures/storm-package-error.png new file mode 100644 index 0000000..71d2bf6 Binary files /dev/null and b/pictures/storm-package-error.png differ diff --git a/pictures/storm-word-count-console.png b/pictures/storm-word-count-console.png index 597110c..3e81a00 100644 Binary files a/pictures/storm-word-count-console.png and b/pictures/storm-word-count-console.png differ