add Flink
This commit is contained in:
parent
de7e65c29a
commit
be4ec2130f
273
code/Flink/flink-basis/pom.xml
Normal file
273
code/Flink/flink-basis/pom.xml
Normal file
@ -0,0 +1,273 @@
|
|||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
or more contributor license agreements. See the NOTICE file
|
||||||
|
distributed with this work for additional information
|
||||||
|
regarding copyright ownership. The ASF licenses this file
|
||||||
|
to you under the Apache License, Version 2.0 (the
|
||||||
|
"License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing,
|
||||||
|
software distributed under the License is distributed on an
|
||||||
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
KIND, either express or implied. See the License for the
|
||||||
|
specific language governing permissions and limitations
|
||||||
|
under the License.
|
||||||
|
-->
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
<groupId>com.heibaiying</groupId>
|
||||||
|
<artifactId>flink-basis</artifactId>
|
||||||
|
<version>1.0</version>
|
||||||
|
<packaging>jar</packaging>
|
||||||
|
|
||||||
|
<name>Flink Quickstart Job</name>
|
||||||
|
<url>http://www.myorganization.org</url>
|
||||||
|
|
||||||
|
<repositories>
|
||||||
|
<repository>
|
||||||
|
<id>apache.snapshots</id>
|
||||||
|
<name>Apache Development Snapshot Repository</name>
|
||||||
|
<url>https://repository.apache.org/content/repositories/snapshots/</url>
|
||||||
|
<releases>
|
||||||
|
<enabled>false</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</snapshots>
|
||||||
|
</repository>
|
||||||
|
</repositories>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
<flink.version>1.9.0</flink.version>
|
||||||
|
<scala.binary.version>2.11</scala.binary.version>
|
||||||
|
<scala.version>2.11.12</scala.version>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<!-- Apache Flink dependencies -->
|
||||||
|
<!-- These dependencies are provided, because they should not be packaged into the JAR file. -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.flink</groupId>
|
||||||
|
<artifactId>flink-scala_${scala.binary.version}</artifactId>
|
||||||
|
<version>${flink.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.flink</groupId>
|
||||||
|
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
|
||||||
|
<version>${flink.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Scala Library, provided by Flink as well. -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.scala-lang</groupId>
|
||||||
|
<artifactId>scala-library</artifactId>
|
||||||
|
<version>${scala.version}</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- Add connector dependencies here. They must be in the default scope (compile). -->
|
||||||
|
|
||||||
|
<!-- Example:
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.flink</groupId>
|
||||||
|
<artifactId>flink-connector-kafka-0.10_${scala.binary.version}</artifactId>
|
||||||
|
<version>${flink.version}</version>
|
||||||
|
</dependency>
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Add logging framework, to produce console output when running in the IDE. -->
|
||||||
|
<!-- These dependencies are excluded from the application JAR by default. -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.slf4j</groupId>
|
||||||
|
<artifactId>slf4j-log4j12</artifactId>
|
||||||
|
<version>1.7.7</version>
|
||||||
|
<scope>runtime</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>log4j</groupId>
|
||||||
|
<artifactId>log4j</artifactId>
|
||||||
|
<version>1.2.17</version>
|
||||||
|
<scope>runtime</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<!-- We use the maven-shade plugin to create a fat jar that contains all necessary dependencies. -->
|
||||||
|
<!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-shade-plugin</artifactId>
|
||||||
|
<version>3.0.0</version>
|
||||||
|
<executions>
|
||||||
|
<!-- Run shade goal on package phase -->
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>shade</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<artifactSet>
|
||||||
|
<excludes>
|
||||||
|
<exclude>org.apache.flink:force-shading</exclude>
|
||||||
|
<exclude>com.google.code.findbugs:jsr305</exclude>
|
||||||
|
<exclude>org.slf4j:*</exclude>
|
||||||
|
<exclude>log4j:*</exclude>
|
||||||
|
</excludes>
|
||||||
|
</artifactSet>
|
||||||
|
<filters>
|
||||||
|
<filter>
|
||||||
|
<!-- Do not copy the signatures in the META-INF folder.
|
||||||
|
Otherwise, this might cause SecurityExceptions when using the JAR. -->
|
||||||
|
<artifact>*:*</artifact>
|
||||||
|
<excludes>
|
||||||
|
<exclude>META-INF/*.SF</exclude>
|
||||||
|
<exclude>META-INF/*.DSA</exclude>
|
||||||
|
<exclude>META-INF/*.RSA</exclude>
|
||||||
|
</excludes>
|
||||||
|
</filter>
|
||||||
|
</filters>
|
||||||
|
<transformers>
|
||||||
|
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||||
|
<mainClass>com.heibaiying.StreamingJob</mainClass>
|
||||||
|
</transformer>
|
||||||
|
</transformers>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
|
||||||
|
<!-- Java Compiler -->
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
|
<version>3.1</version>
|
||||||
|
<configuration>
|
||||||
|
<source>1.8</source>
|
||||||
|
<target>1.8</target>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
|
||||||
|
<!-- Scala Compiler -->
|
||||||
|
<plugin>
|
||||||
|
<groupId>net.alchim31.maven</groupId>
|
||||||
|
<artifactId>scala-maven-plugin</artifactId>
|
||||||
|
<version>3.2.2</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<goals>
|
||||||
|
<goal>compile</goal>
|
||||||
|
<goal>testCompile</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
|
||||||
|
<!-- Eclipse Scala Integration -->
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-eclipse-plugin</artifactId>
|
||||||
|
<version>2.8</version>
|
||||||
|
<configuration>
|
||||||
|
<downloadSources>true</downloadSources>
|
||||||
|
<projectnatures>
|
||||||
|
<projectnature>org.scala-ide.sdt.core.scalanature</projectnature>
|
||||||
|
<projectnature>org.eclipse.jdt.core.javanature</projectnature>
|
||||||
|
</projectnatures>
|
||||||
|
<buildcommands>
|
||||||
|
<buildcommand>org.scala-ide.sdt.core.scalabuilder</buildcommand>
|
||||||
|
</buildcommands>
|
||||||
|
<classpathContainers>
|
||||||
|
<classpathContainer>org.scala-ide.sdt.launching.SCALA_CONTAINER</classpathContainer>
|
||||||
|
<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer>
|
||||||
|
</classpathContainers>
|
||||||
|
<excludes>
|
||||||
|
<exclude>org.scala-lang:scala-library</exclude>
|
||||||
|
<exclude>org.scala-lang:scala-compiler</exclude>
|
||||||
|
</excludes>
|
||||||
|
<sourceIncludes>
|
||||||
|
<sourceInclude>**/*.scala</sourceInclude>
|
||||||
|
<sourceInclude>**/*.java</sourceInclude>
|
||||||
|
</sourceIncludes>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.codehaus.mojo</groupId>
|
||||||
|
<artifactId>build-helper-maven-plugin</artifactId>
|
||||||
|
<version>1.7</version>
|
||||||
|
<executions>
|
||||||
|
<!-- Add src/main/scala to eclipse build path -->
|
||||||
|
<execution>
|
||||||
|
<id>add-source</id>
|
||||||
|
<phase>generate-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/main/scala</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
<!-- Add src/test/scala to eclipse build path -->
|
||||||
|
<execution>
|
||||||
|
<id>add-test-source</id>
|
||||||
|
<phase>generate-test-sources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>add-test-source</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<sources>
|
||||||
|
<source>src/test/scala</source>
|
||||||
|
</sources>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
<!-- This profile helps to make things run out of the box in IntelliJ -->
|
||||||
|
<!-- Its adds Flink's core classes to the runtime class path. -->
|
||||||
|
<!-- Otherwise they are missing in IntelliJ, because the dependency is 'provided' -->
|
||||||
|
<profiles>
|
||||||
|
<profile>
|
||||||
|
<id>add-dependencies-for-IDEA</id>
|
||||||
|
|
||||||
|
<activation>
|
||||||
|
<property>
|
||||||
|
<name>idea.version</name>
|
||||||
|
</property>
|
||||||
|
</activation>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.flink</groupId>
|
||||||
|
<artifactId>flink-scala_${scala.binary.version}</artifactId>
|
||||||
|
<version>${flink.version}</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.flink</groupId>
|
||||||
|
<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId>
|
||||||
|
<version>${flink.version}</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.scala-lang</groupId>
|
||||||
|
<artifactId>scala-library</artifactId>
|
||||||
|
<version>${scala.version}</version>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</profile>
|
||||||
|
</profiles>
|
||||||
|
|
||||||
|
</project>
|
23
code/Flink/flink-basis/src/main/resources/log4j.properties
Normal file
23
code/Flink/flink-basis/src/main/resources/log4j.properties
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
################################################################################
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
# or more contributor license agreements. See the NOTICE file
|
||||||
|
# distributed with this work for additional information
|
||||||
|
# regarding copyright ownership. The ASF licenses this file
|
||||||
|
# to you under the Apache License, Version 2.0 (the
|
||||||
|
# "License"); you may not use this file except in compliance
|
||||||
|
# with the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
log4j.rootLogger=INFO, console
|
||||||
|
|
||||||
|
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||||
|
log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
4
code/Flink/flink-basis/src/main/resources/wordcount.txt
Normal file
4
code/Flink/flink-basis/src/main/resources/wordcount.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
a,a,a,a,a
|
||||||
|
b,b,b
|
||||||
|
c,c
|
||||||
|
d,d
|
@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.heibaiying
|
||||||
|
|
||||||
|
import org.apache.flink.api.scala._
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Skeleton for a Flink Batch Job.
|
||||||
|
*
|
||||||
|
* For a tutorial how to write a Flink batch application, check the
|
||||||
|
* tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>.
|
||||||
|
*
|
||||||
|
* To package your application into a JAR file for execution,
|
||||||
|
* change the main class in the POM.xml file to this class (simply search for 'mainClass')
|
||||||
|
* and run 'mvn clean package' on the command line.
|
||||||
|
*/
|
||||||
|
object BatchJob {
|
||||||
|
|
||||||
|
def main(args: Array[String]) {
|
||||||
|
// set up the batch execution environment
|
||||||
|
val env = ExecutionEnvironment.getExecutionEnvironment
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Here, you can start creating your execution plan for Flink.
|
||||||
|
*
|
||||||
|
* Start with getting some data from the environment, like
|
||||||
|
* env.readTextFile(textPath);
|
||||||
|
*
|
||||||
|
* then, transform the resulting DataSet[String] using operations
|
||||||
|
* like
|
||||||
|
* .filter()
|
||||||
|
* .flatMap()
|
||||||
|
* .join()
|
||||||
|
* .group()
|
||||||
|
*
|
||||||
|
* and many more.
|
||||||
|
* Have a look at the programming guide:
|
||||||
|
*
|
||||||
|
* http://flink.apache.org/docs/latest/apis/batch/index.html
|
||||||
|
*
|
||||||
|
* and the examples
|
||||||
|
*
|
||||||
|
* http://flink.apache.org/docs/latest/apis/batch/examples.html
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// execute program
|
||||||
|
env.execute("Flink Batch Scala API Skeleton")
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,63 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.heibaiying
|
||||||
|
|
||||||
|
import org.apache.flink.streaming.api.scala._
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Skeleton for a Flink Streaming Job.
|
||||||
|
*
|
||||||
|
* For a tutorial how to write a Flink streaming application, check the
|
||||||
|
* tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>.
|
||||||
|
*
|
||||||
|
* To package your application into a JAR file for execution, run
|
||||||
|
* 'mvn clean package' on the command line.
|
||||||
|
*
|
||||||
|
* If you change the name of the main class (with the public static void main(String[] args))
|
||||||
|
* method, change the respective entry in the POM.xml file (simply search for 'mainClass').
|
||||||
|
*/
|
||||||
|
object StreamingJob {
|
||||||
|
def main(args: Array[String]) {
|
||||||
|
// set up the streaming execution environment
|
||||||
|
val env = StreamExecutionEnvironment.getExecutionEnvironment
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Here, you can start creating your execution plan for Flink.
|
||||||
|
*
|
||||||
|
* Start with getting some data from the environment, like
|
||||||
|
* env.readTextFile(textPath);
|
||||||
|
*
|
||||||
|
* then, transform the resulting DataStream[String] using operations
|
||||||
|
* like
|
||||||
|
* .filter()
|
||||||
|
* .flatMap()
|
||||||
|
* .join()
|
||||||
|
* .group()
|
||||||
|
*
|
||||||
|
* and many more.
|
||||||
|
* Have a look at the programming guide:
|
||||||
|
*
|
||||||
|
* http://flink.apache.org/docs/latest/apis/streaming/index.html
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
// execute program
|
||||||
|
env.execute("Flink Streaming Scala API Skeleton")
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package com.heibaiying
|
||||||
|
|
||||||
|
import org.apache.flink.api.scala._
|
||||||
|
|
||||||
|
object WordCountBatch {
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
val benv = ExecutionEnvironment.getExecutionEnvironment
|
||||||
|
val text = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis\\src\\main\\resources\\wordcount.txt")
|
||||||
|
val counts = text.flatMap { _.toLowerCase.split(",") filter { _.nonEmpty } }.map { (_, 1) }.groupBy(0).sum(1)
|
||||||
|
counts.print()
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,25 @@
|
|||||||
|
package com.heibaiying
|
||||||
|
|
||||||
|
import org.apache.flink.streaming.api.scala._
|
||||||
|
import org.apache.flink.streaming.api.windowing.time.Time
|
||||||
|
|
||||||
|
|
||||||
|
object WordCountStreaming {
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
|
||||||
|
val senv = StreamExecutionEnvironment.getExecutionEnvironment
|
||||||
|
|
||||||
|
val text: DataStream[String] = senv.socketTextStream("192.168.200.229", 9999, '\n')
|
||||||
|
val windowCounts = text.flatMap { w => w.split(",") }.map { w => WordWithCount(w, 1) }.keyBy("word")
|
||||||
|
.timeWindow(Time.seconds(5)).sum("count")
|
||||||
|
|
||||||
|
windowCounts.print().setParallelism(1)
|
||||||
|
|
||||||
|
senv.execute("Streaming WordCount")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
case class WordWithCount(word: String, count: Long)
|
||||||
|
|
||||||
|
}
|
130
notes/Flink开发环境搭建.md
Normal file
130
notes/Flink开发环境搭建.md
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
# Flink 开发环境搭建
|
||||||
|
|
||||||
|
## 一、安装 Scala 插件
|
||||||
|
|
||||||
|
Flink 分别提供了基于 Java 语言和 Scala 语言的 API ,如果想要使用 Scala 语言来开发 Flink 程序,可以通过在 IDEA 中安装 Scala 插件来提供语法提示,代码高亮等功能。打开 IDEA , 依次点击 `File => settings => plugins` 打开插件安装页面,搜索 Scala 插件并进行安装,安装完成后,重启 IDEA 即可生效。
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## 二、Flink 项目初始化
|
||||||
|
|
||||||
|
### 2.1 官方项目初始化方式
|
||||||
|
|
||||||
|
Flink 官方支持使用 Maven 和 Gradle 两种构建工具来构建基于 Java 语言的 Flink 项目,支持使用 SBT 和 Maven 两种构建工具来构建基于 Scala 语言的 Flink 项目。 这里以 Maven 为例进行说明,因为其可以同时支持 Java 语言和 Scala 语言项目的构建。
|
||||||
|
|
||||||
|
需要注意的是 Flink 1.9 只支持 Maven 3.0.4 以上的版本,所以需要预先进行安装。安装完成后,可以通过以下两种方式来构建项目:
|
||||||
|
|
||||||
|
**1. 直接基于 Maven Archetype 构建**
|
||||||
|
|
||||||
|
直接使用下面的 maven 语句来进行构建,然后根据交互信息的提示,依次输入 groupId , artifactId 以及包名等信息后等待初始化的完成:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
$ mvn archetype:generate \
|
||||||
|
-DarchetypeGroupId=org.apache.flink \
|
||||||
|
-DarchetypeArtifactId=flink-quickstart-java \
|
||||||
|
-DarchetypeVersion=1.9.0
|
||||||
|
```
|
||||||
|
|
||||||
|
> 注:如果想要创建基于 Scala 语言的项目,只需要将 flink-quickstart-java 换成 flink-quickstart-scala 即可,后文亦同。
|
||||||
|
|
||||||
|
**2. 使用官方脚本快速构建**
|
||||||
|
|
||||||
|
为了更方便的初始化项目,官方提供了快速构建脚本,可以通过以下命令来直接进行调用:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ curl https://flink.apache.org/q/quickstart.sh | bash -s 1.9.0
|
||||||
|
```
|
||||||
|
|
||||||
|
该方式其实也是通过执行 maven archetype 命令来进行初始化,其脚本内容如下:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
PACKAGE=quickstart
|
||||||
|
|
||||||
|
mvn archetype:generate \
|
||||||
|
-DarchetypeGroupId=org.apache.flink \
|
||||||
|
-DarchetypeArtifactId=flink-quickstart-java \
|
||||||
|
-DarchetypeVersion=${1:-1.8.0} \
|
||||||
|
-DgroupId=org.myorg.quickstart \
|
||||||
|
-DartifactId=$PACKAGE \
|
||||||
|
-Dversion=0.1 \
|
||||||
|
-Dpackage=org.myorg.quickstart \
|
||||||
|
-DinteractiveMode=false
|
||||||
|
```
|
||||||
|
|
||||||
|
可以看到相比于第一种方式,该种方式只是直接指定好了 groupId ,artifactId ,version 等信息而已。
|
||||||
|
|
||||||
|
### 2.2 使用 IDEA 快速构建
|
||||||
|
|
||||||
|
如果你使用的是开发工具是 IDEA ,可以直接在项目创建页面选择 Maven Flink Archetype 进行项目初始化:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
如果你的 IDEA 没有上述 Archetype, 可以通过点击右上角的 `ADD ARCHETYPE` ,来进行添加,依次填入所需信息,这些信息都可以从上述的 `archetype:generate ` 语句中获取。点击 `OK` 保存后,该 Archetype 就会一直存在于你的 IDEA 中,之后每次创建项目时,只需要直接选择该 Archetype 即可。
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
选中 Flink Archetype ,然后点击 `NEXT` 按钮,之后的所有步骤都和正常的 Maven 工程相同。创建完成后的项目结构如下:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
## 三、词频统计案例
|
||||||
|
|
||||||
|
### 3.1 案例代码
|
||||||
|
|
||||||
|
创建完成后,可以先书写一个简单的词频统计的案例来尝试运行 Flink 项目,这里以 Scala 语言为例,代码如下:
|
||||||
|
|
||||||
|
```scala
|
||||||
|
package com.heibaiying
|
||||||
|
|
||||||
|
import org.apache.flink.api.scala._
|
||||||
|
|
||||||
|
object WordCountBatch {
|
||||||
|
|
||||||
|
def main(args: Array[String]): Unit = {
|
||||||
|
val benv = ExecutionEnvironment.getExecutionEnvironment
|
||||||
|
val text = benv.readTextFile("D:\\wordcount.txt")
|
||||||
|
val counts = text.flatMap { _.toLowerCase.split(",") filter { _.nonEmpty } }.map { (_, 1) }.groupBy(0).sum(1)
|
||||||
|
counts.print()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
其中 `wordcount.txt` 中的内容如下:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
a,a,a,a,a
|
||||||
|
b,b,b
|
||||||
|
c,c
|
||||||
|
d,d
|
||||||
|
```
|
||||||
|
|
||||||
|
本机不需要安装其他任何的 Flink 环境,直接运行 Main 方法即可,结果如下:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
### 3.1 常见异常
|
||||||
|
|
||||||
|
这里常见的一个启动异常是如下,之所以出现这样的情况,是因为 Maven 提供的 Flink Archetype 默认是以生产环境为标准的,因为 Flink 的安装包中默认就有 Flink 相关的 JAR 包,所以在 Maven 中这些 JAR 都被标识为 `<scope>provided</scope>` , 只需要去掉该标签即可。
|
||||||
|
|
||||||
|
```shell
|
||||||
|
Caused by: java.lang.ClassNotFoundException: org.apache.flink.api.common.typeinfo.TypeInformation
|
||||||
|
```
|
||||||
|
## 四、使用 Scala 命令行
|
||||||
|
|
||||||
|
https://flink.apache.org/downloads.html
|
||||||
|
|
||||||
|
start-scala-shell.sh
|
||||||
|
|
||||||
|
```shell
|
||||||
|
[root@hadoop001 bin]# ./start-scala-shell.sh
|
||||||
|
错误: 找不到或无法加载主类 org.apache.flink.api.scala.FlinkShell
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
BIN
pictures/flink-basis-project.png
Normal file
BIN
pictures/flink-basis-project.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 13 KiB |
BIN
pictures/flink-maven-new.png
Normal file
BIN
pictures/flink-maven-new.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 65 KiB |
BIN
pictures/flink-maven.png
Normal file
BIN
pictures/flink-maven.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 59 KiB |
BIN
pictures/flink-scala-shell.png
Normal file
BIN
pictures/flink-scala-shell.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 18 KiB |
BIN
pictures/flink-word-count.png
Normal file
BIN
pictures/flink-word-count.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
BIN
pictures/scala-plugin.png
Normal file
BIN
pictures/scala-plugin.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 47 KiB |
Loading…
x
Reference in New Issue
Block a user