add Flink
This commit is contained in:
		
							
								
								
									
										273
									
								
								code/Flink/flink-basis/pom.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										273
									
								
								code/Flink/flink-basis/pom.xml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,273 @@ | |||||||
|  | <!-- | ||||||
|  | Licensed to the Apache Software Foundation (ASF) under one | ||||||
|  | or more contributor license agreements.  See the NOTICE file | ||||||
|  | distributed with this work for additional information | ||||||
|  | regarding copyright ownership.  The ASF licenses this file | ||||||
|  | to you under the Apache License, Version 2.0 (the | ||||||
|  | "License"); you may not use this file except in compliance | ||||||
|  | with the License.  You may obtain a copy of the License at | ||||||
|  |  | ||||||
|  |   http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  |  | ||||||
|  | Unless required by applicable law or agreed to in writing, | ||||||
|  | software distributed under the License is distributed on an | ||||||
|  | "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||||||
|  | KIND, either express or implied.  See the License for the | ||||||
|  | specific language governing permissions and limitations | ||||||
|  | under the License. | ||||||
|  | --> | ||||||
|  | <project xmlns="http://maven.apache.org/POM/4.0.0" | ||||||
|  |          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||||
|  |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||||||
|  | 	<modelVersion>4.0.0</modelVersion> | ||||||
|  |  | ||||||
|  | 	<groupId>com.heibaiying</groupId> | ||||||
|  | 	<artifactId>flink-basis</artifactId> | ||||||
|  | 	<version>1.0</version> | ||||||
|  | 	<packaging>jar</packaging> | ||||||
|  |  | ||||||
|  | 	<name>Flink Quickstart Job</name> | ||||||
|  | 	<url>http://www.myorganization.org</url> | ||||||
|  |  | ||||||
|  | 	<repositories> | ||||||
|  | 		<repository> | ||||||
|  | 			<id>apache.snapshots</id> | ||||||
|  | 			<name>Apache Development Snapshot Repository</name> | ||||||
|  | 			<url>https://repository.apache.org/content/repositories/snapshots/</url> | ||||||
|  | 			<releases> | ||||||
|  | 				<enabled>false</enabled> | ||||||
|  | 			</releases> | ||||||
|  | 			<snapshots> | ||||||
|  | 				<enabled>true</enabled> | ||||||
|  | 			</snapshots> | ||||||
|  | 		</repository> | ||||||
|  | 	</repositories> | ||||||
|  |  | ||||||
|  | 	<properties> | ||||||
|  | 		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||||||
|  | 		<flink.version>1.9.0</flink.version> | ||||||
|  | 		<scala.binary.version>2.11</scala.binary.version> | ||||||
|  | 		<scala.version>2.11.12</scala.version> | ||||||
|  | 	</properties> | ||||||
|  |  | ||||||
|  | 	<dependencies> | ||||||
|  | 		<!-- Apache Flink dependencies --> | ||||||
|  | 		<!-- These dependencies are provided, because they should not be packaged into the JAR file. --> | ||||||
|  | 		<dependency> | ||||||
|  | 			<groupId>org.apache.flink</groupId> | ||||||
|  | 			<artifactId>flink-scala_${scala.binary.version}</artifactId> | ||||||
|  | 			<version>${flink.version}</version> | ||||||
|  | 		</dependency> | ||||||
|  | 		<dependency> | ||||||
|  | 			<groupId>org.apache.flink</groupId> | ||||||
|  | 			<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId> | ||||||
|  | 			<version>${flink.version}</version> | ||||||
|  | 		</dependency> | ||||||
|  |  | ||||||
|  | 		<!-- Scala Library, provided by Flink as well. --> | ||||||
|  | 		<dependency> | ||||||
|  | 			<groupId>org.scala-lang</groupId> | ||||||
|  | 			<artifactId>scala-library</artifactId> | ||||||
|  | 			<version>${scala.version}</version> | ||||||
|  | 		</dependency> | ||||||
|  |  | ||||||
|  | 		<!-- Add connector dependencies here. They must be in the default scope (compile). --> | ||||||
|  |  | ||||||
|  | 		<!-- Example: | ||||||
|  |  | ||||||
|  | 		<dependency> | ||||||
|  | 			<groupId>org.apache.flink</groupId> | ||||||
|  | 			<artifactId>flink-connector-kafka-0.10_${scala.binary.version}</artifactId> | ||||||
|  | 			<version>${flink.version}</version> | ||||||
|  | 		</dependency> | ||||||
|  | 		--> | ||||||
|  |  | ||||||
|  | 		<!-- Add logging framework, to produce console output when running in the IDE. --> | ||||||
|  | 		<!-- These dependencies are excluded from the application JAR by default. --> | ||||||
|  | 		<dependency> | ||||||
|  | 			<groupId>org.slf4j</groupId> | ||||||
|  | 			<artifactId>slf4j-log4j12</artifactId> | ||||||
|  | 			<version>1.7.7</version> | ||||||
|  | 			<scope>runtime</scope> | ||||||
|  | 		</dependency> | ||||||
|  | 		<dependency> | ||||||
|  | 			<groupId>log4j</groupId> | ||||||
|  | 			<artifactId>log4j</artifactId> | ||||||
|  | 			<version>1.2.17</version> | ||||||
|  | 			<scope>runtime</scope> | ||||||
|  | 		</dependency> | ||||||
|  | 	</dependencies> | ||||||
|  |  | ||||||
|  | 	<build> | ||||||
|  | 		<plugins> | ||||||
|  | 			<!-- We use the maven-shade plugin to create a fat jar that contains all necessary dependencies. --> | ||||||
|  | 			<!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. --> | ||||||
|  | 			<plugin> | ||||||
|  | 				<groupId>org.apache.maven.plugins</groupId> | ||||||
|  | 				<artifactId>maven-shade-plugin</artifactId> | ||||||
|  | 				<version>3.0.0</version> | ||||||
|  | 				<executions> | ||||||
|  | 					<!-- Run shade goal on package phase --> | ||||||
|  | 					<execution> | ||||||
|  | 						<phase>package</phase> | ||||||
|  | 						<goals> | ||||||
|  | 							<goal>shade</goal> | ||||||
|  | 						</goals> | ||||||
|  | 						<configuration> | ||||||
|  | 							<artifactSet> | ||||||
|  | 								<excludes> | ||||||
|  | 									<exclude>org.apache.flink:force-shading</exclude> | ||||||
|  | 									<exclude>com.google.code.findbugs:jsr305</exclude> | ||||||
|  | 									<exclude>org.slf4j:*</exclude> | ||||||
|  | 									<exclude>log4j:*</exclude> | ||||||
|  | 								</excludes> | ||||||
|  | 							</artifactSet> | ||||||
|  | 							<filters> | ||||||
|  | 								<filter> | ||||||
|  | 									<!-- Do not copy the signatures in the META-INF folder. | ||||||
|  | 									Otherwise, this might cause SecurityExceptions when using the JAR. --> | ||||||
|  | 									<artifact>*:*</artifact> | ||||||
|  | 									<excludes> | ||||||
|  | 										<exclude>META-INF/*.SF</exclude> | ||||||
|  | 										<exclude>META-INF/*.DSA</exclude> | ||||||
|  | 										<exclude>META-INF/*.RSA</exclude> | ||||||
|  | 									</excludes> | ||||||
|  | 								</filter> | ||||||
|  | 							</filters> | ||||||
|  | 							<transformers> | ||||||
|  | 								<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> | ||||||
|  | 									<mainClass>com.heibaiying.StreamingJob</mainClass> | ||||||
|  | 								</transformer> | ||||||
|  | 							</transformers> | ||||||
|  | 						</configuration> | ||||||
|  | 					</execution> | ||||||
|  | 				</executions> | ||||||
|  | 			</plugin> | ||||||
|  |  | ||||||
|  | 			<!-- Java Compiler --> | ||||||
|  | 			<plugin> | ||||||
|  | 				<groupId>org.apache.maven.plugins</groupId> | ||||||
|  | 				<artifactId>maven-compiler-plugin</artifactId> | ||||||
|  | 				<version>3.1</version> | ||||||
|  | 				<configuration> | ||||||
|  | 					<source>1.8</source> | ||||||
|  | 					<target>1.8</target> | ||||||
|  | 				</configuration> | ||||||
|  | 			</plugin> | ||||||
|  |  | ||||||
|  | 			<!-- Scala Compiler --> | ||||||
|  | 			<plugin> | ||||||
|  | 				<groupId>net.alchim31.maven</groupId> | ||||||
|  | 				<artifactId>scala-maven-plugin</artifactId> | ||||||
|  | 				<version>3.2.2</version> | ||||||
|  | 				<executions> | ||||||
|  | 					<execution> | ||||||
|  | 						<goals> | ||||||
|  | 							<goal>compile</goal> | ||||||
|  | 							<goal>testCompile</goal> | ||||||
|  | 						</goals> | ||||||
|  | 					</execution> | ||||||
|  | 				</executions> | ||||||
|  | 			</plugin> | ||||||
|  |  | ||||||
|  | 			<!-- Eclipse Scala Integration --> | ||||||
|  | 			<plugin> | ||||||
|  | 				<groupId>org.apache.maven.plugins</groupId> | ||||||
|  | 				<artifactId>maven-eclipse-plugin</artifactId> | ||||||
|  | 				<version>2.8</version> | ||||||
|  | 				<configuration> | ||||||
|  | 					<downloadSources>true</downloadSources> | ||||||
|  | 					<projectnatures> | ||||||
|  | 						<projectnature>org.scala-ide.sdt.core.scalanature</projectnature> | ||||||
|  | 						<projectnature>org.eclipse.jdt.core.javanature</projectnature> | ||||||
|  | 					</projectnatures> | ||||||
|  | 					<buildcommands> | ||||||
|  | 						<buildcommand>org.scala-ide.sdt.core.scalabuilder</buildcommand> | ||||||
|  | 					</buildcommands> | ||||||
|  | 					<classpathContainers> | ||||||
|  | 						<classpathContainer>org.scala-ide.sdt.launching.SCALA_CONTAINER</classpathContainer> | ||||||
|  | 						<classpathContainer>org.eclipse.jdt.launching.JRE_CONTAINER</classpathContainer> | ||||||
|  | 					</classpathContainers> | ||||||
|  | 					<excludes> | ||||||
|  | 						<exclude>org.scala-lang:scala-library</exclude> | ||||||
|  | 						<exclude>org.scala-lang:scala-compiler</exclude> | ||||||
|  | 					</excludes> | ||||||
|  | 					<sourceIncludes> | ||||||
|  | 						<sourceInclude>**/*.scala</sourceInclude> | ||||||
|  | 						<sourceInclude>**/*.java</sourceInclude> | ||||||
|  | 					</sourceIncludes> | ||||||
|  | 				</configuration> | ||||||
|  | 			</plugin> | ||||||
|  | 			<plugin> | ||||||
|  | 				<groupId>org.codehaus.mojo</groupId> | ||||||
|  | 				<artifactId>build-helper-maven-plugin</artifactId> | ||||||
|  | 				<version>1.7</version> | ||||||
|  | 				<executions> | ||||||
|  | 					<!-- Add src/main/scala to eclipse build path --> | ||||||
|  | 					<execution> | ||||||
|  | 						<id>add-source</id> | ||||||
|  | 						<phase>generate-sources</phase> | ||||||
|  | 						<goals> | ||||||
|  | 							<goal>add-source</goal> | ||||||
|  | 						</goals> | ||||||
|  | 						<configuration> | ||||||
|  | 							<sources> | ||||||
|  | 								<source>src/main/scala</source> | ||||||
|  | 							</sources> | ||||||
|  | 						</configuration> | ||||||
|  | 					</execution> | ||||||
|  | 					<!-- Add src/test/scala to eclipse build path --> | ||||||
|  | 					<execution> | ||||||
|  | 						<id>add-test-source</id> | ||||||
|  | 						<phase>generate-test-sources</phase> | ||||||
|  | 						<goals> | ||||||
|  | 							<goal>add-test-source</goal> | ||||||
|  | 						</goals> | ||||||
|  | 						<configuration> | ||||||
|  | 							<sources> | ||||||
|  | 								<source>src/test/scala</source> | ||||||
|  | 							</sources> | ||||||
|  | 						</configuration> | ||||||
|  | 					</execution> | ||||||
|  | 				</executions> | ||||||
|  | 			</plugin> | ||||||
|  | 		</plugins> | ||||||
|  | 	</build> | ||||||
|  |  | ||||||
|  | 	<!-- This profile helps to make things run out of the box in IntelliJ --> | ||||||
|  | 	<!-- Its adds Flink's core classes to the runtime class path. --> | ||||||
|  | 	<!-- Otherwise they are missing in IntelliJ, because the dependency is 'provided' --> | ||||||
|  | 	<profiles> | ||||||
|  | 		<profile> | ||||||
|  | 			<id>add-dependencies-for-IDEA</id> | ||||||
|  |  | ||||||
|  | 			<activation> | ||||||
|  | 				<property> | ||||||
|  | 					<name>idea.version</name> | ||||||
|  | 				</property> | ||||||
|  | 			</activation> | ||||||
|  |  | ||||||
|  | 			<dependencies> | ||||||
|  | 				<dependency> | ||||||
|  | 					<groupId>org.apache.flink</groupId> | ||||||
|  | 					<artifactId>flink-scala_${scala.binary.version}</artifactId> | ||||||
|  | 					<version>${flink.version}</version> | ||||||
|  | 					<scope>compile</scope> | ||||||
|  | 				</dependency> | ||||||
|  | 				<dependency> | ||||||
|  | 					<groupId>org.apache.flink</groupId> | ||||||
|  | 					<artifactId>flink-streaming-scala_${scala.binary.version}</artifactId> | ||||||
|  | 					<version>${flink.version}</version> | ||||||
|  | 					<scope>compile</scope> | ||||||
|  | 				</dependency> | ||||||
|  | 				<dependency> | ||||||
|  | 					<groupId>org.scala-lang</groupId> | ||||||
|  | 					<artifactId>scala-library</artifactId> | ||||||
|  | 					<version>${scala.version}</version> | ||||||
|  | 					<scope>compile</scope> | ||||||
|  | 				</dependency> | ||||||
|  | 			</dependencies> | ||||||
|  | 		</profile> | ||||||
|  | 	</profiles> | ||||||
|  |  | ||||||
|  | </project> | ||||||
							
								
								
									
										23
									
								
								code/Flink/flink-basis/src/main/resources/log4j.properties
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								code/Flink/flink-basis/src/main/resources/log4j.properties
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | |||||||
|  | ################################################################################ | ||||||
|  | #  Licensed to the Apache Software Foundation (ASF) under one | ||||||
|  | #  or more contributor license agreements.  See the NOTICE file | ||||||
|  | #  distributed with this work for additional information | ||||||
|  | #  regarding copyright ownership.  The ASF licenses this file | ||||||
|  | #  to you under the Apache License, Version 2.0 (the | ||||||
|  | #  "License"); you may not use this file except in compliance | ||||||
|  | #  with the License.  You may obtain a copy of the License at | ||||||
|  | # | ||||||
|  | #      http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  | # | ||||||
|  | #  Unless required by applicable law or agreed to in writing, software | ||||||
|  | #  distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | #  See the License for the specific language governing permissions and | ||||||
|  | # limitations under the License. | ||||||
|  | ################################################################################ | ||||||
|  |  | ||||||
|  | log4j.rootLogger=INFO, console | ||||||
|  |  | ||||||
|  | log4j.appender.console=org.apache.log4j.ConsoleAppender | ||||||
|  | log4j.appender.console.layout=org.apache.log4j.PatternLayout | ||||||
|  | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n | ||||||
							
								
								
									
										4
									
								
								code/Flink/flink-basis/src/main/resources/wordcount.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								code/Flink/flink-basis/src/main/resources/wordcount.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,4 @@ | |||||||
|  | a,a,a,a,a | ||||||
|  | b,b,b | ||||||
|  | c,c | ||||||
|  | d,d | ||||||
| @@ -0,0 +1,66 @@ | |||||||
|  | /* | ||||||
|  |  * Licensed to the Apache Software Foundation (ASF) under one | ||||||
|  |  * or more contributor license agreements.  See the NOTICE file | ||||||
|  |  * distributed with this work for additional information | ||||||
|  |  * regarding copyright ownership.  The ASF licenses this file | ||||||
|  |  * to you under the Apache License, Version 2.0 (the | ||||||
|  |  * "License"); you may not use this file except in compliance | ||||||
|  |  * with the License.  You may obtain a copy of the License at | ||||||
|  |  * | ||||||
|  |  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  |  * | ||||||
|  |  * Unless required by applicable law or agreed to in writing, software | ||||||
|  |  * distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  |  * See the License for the specific language governing permissions and | ||||||
|  |  * limitations under the License. | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | package com.heibaiying | ||||||
|  |  | ||||||
|  | import org.apache.flink.api.scala._ | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Skeleton for a Flink Batch Job. | ||||||
|  |  * | ||||||
|  |  * For a tutorial how to write a Flink batch application, check the | ||||||
|  |  * tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>. | ||||||
|  |  * | ||||||
|  |  * To package your application into a JAR file for execution, | ||||||
|  |  * change the main class in the POM.xml file to this class (simply search for 'mainClass') | ||||||
|  |  * and run 'mvn clean package' on the command line. | ||||||
|  |  */ | ||||||
|  | object BatchJob { | ||||||
|  |  | ||||||
|  |   def main(args: Array[String]) { | ||||||
|  |     // set up the batch execution environment | ||||||
|  |     val env = ExecutionEnvironment.getExecutionEnvironment | ||||||
|  |  | ||||||
|  |     /* | ||||||
|  |      * Here, you can start creating your execution plan for Flink. | ||||||
|  |      * | ||||||
|  |      * Start with getting some data from the environment, like | ||||||
|  |      *  env.readTextFile(textPath); | ||||||
|  |      * | ||||||
|  |      * then, transform the resulting DataSet[String] using operations | ||||||
|  |      * like | ||||||
|  |      *   .filter() | ||||||
|  |      *   .flatMap() | ||||||
|  |      *   .join() | ||||||
|  |      *   .group() | ||||||
|  |      * | ||||||
|  |      * and many more. | ||||||
|  |      * Have a look at the programming guide: | ||||||
|  |      * | ||||||
|  |      * http://flink.apache.org/docs/latest/apis/batch/index.html | ||||||
|  |      * | ||||||
|  |      * and the examples | ||||||
|  |      * | ||||||
|  |      * http://flink.apache.org/docs/latest/apis/batch/examples.html | ||||||
|  |      * | ||||||
|  |      */ | ||||||
|  |  | ||||||
|  |     // execute program | ||||||
|  |     env.execute("Flink Batch Scala API Skeleton") | ||||||
|  |   } | ||||||
|  | } | ||||||
| @@ -0,0 +1,63 @@ | |||||||
|  | /* | ||||||
|  |  * Licensed to the Apache Software Foundation (ASF) under one | ||||||
|  |  * or more contributor license agreements.  See the NOTICE file | ||||||
|  |  * distributed with this work for additional information | ||||||
|  |  * regarding copyright ownership.  The ASF licenses this file | ||||||
|  |  * to you under the Apache License, Version 2.0 (the | ||||||
|  |  * "License"); you may not use this file except in compliance | ||||||
|  |  * with the License.  You may obtain a copy of the License at | ||||||
|  |  * | ||||||
|  |  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  |  * | ||||||
|  |  * Unless required by applicable law or agreed to in writing, software | ||||||
|  |  * distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  |  * See the License for the specific language governing permissions and | ||||||
|  |  * limitations under the License. | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | package com.heibaiying | ||||||
|  |  | ||||||
|  | import org.apache.flink.streaming.api.scala._ | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * Skeleton for a Flink Streaming Job. | ||||||
|  |  * | ||||||
|  |  * For a tutorial how to write a Flink streaming application, check the | ||||||
|  |  * tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>. | ||||||
|  |  * | ||||||
|  |  * To package your application into a JAR file for execution, run | ||||||
|  |  * 'mvn clean package' on the command line. | ||||||
|  |  * | ||||||
|  |  * If you change the name of the main class (with the public static void main(String[] args)) | ||||||
|  |  * method, change the respective entry in the POM.xml file (simply search for 'mainClass'). | ||||||
|  |  */ | ||||||
|  | object StreamingJob { | ||||||
|  |   def main(args: Array[String]) { | ||||||
|  |     // set up the streaming execution environment | ||||||
|  |     val env = StreamExecutionEnvironment.getExecutionEnvironment | ||||||
|  |  | ||||||
|  |     /* | ||||||
|  |      * Here, you can start creating your execution plan for Flink. | ||||||
|  |      * | ||||||
|  |      * Start with getting some data from the environment, like | ||||||
|  |      *  env.readTextFile(textPath); | ||||||
|  |      * | ||||||
|  |      * then, transform the resulting DataStream[String] using operations | ||||||
|  |      * like | ||||||
|  |      *   .filter() | ||||||
|  |      *   .flatMap() | ||||||
|  |      *   .join() | ||||||
|  |      *   .group() | ||||||
|  |      * | ||||||
|  |      * and many more. | ||||||
|  |      * Have a look at the programming guide: | ||||||
|  |      * | ||||||
|  |      * http://flink.apache.org/docs/latest/apis/streaming/index.html | ||||||
|  |      * | ||||||
|  |      */ | ||||||
|  |  | ||||||
|  |     // execute program | ||||||
|  |     env.execute("Flink Streaming Scala API Skeleton") | ||||||
|  |   } | ||||||
|  | } | ||||||
| @@ -0,0 +1,13 @@ | |||||||
|  | package com.heibaiying | ||||||
|  |  | ||||||
|  | import org.apache.flink.api.scala._ | ||||||
|  |  | ||||||
|  | object WordCountBatch { | ||||||
|  |  | ||||||
|  |   def main(args: Array[String]): Unit = { | ||||||
|  |     val benv = ExecutionEnvironment.getExecutionEnvironment | ||||||
|  |     val text = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis\\src\\main\\resources\\wordcount.txt") | ||||||
|  |     val counts = text.flatMap { _.toLowerCase.split(",") filter { _.nonEmpty } }.map { (_, 1) }.groupBy(0).sum(1) | ||||||
|  |     counts.print() | ||||||
|  |   } | ||||||
|  | } | ||||||
| @@ -0,0 +1,25 @@ | |||||||
|  | package com.heibaiying | ||||||
|  |  | ||||||
|  | import org.apache.flink.streaming.api.scala._ | ||||||
|  | import org.apache.flink.streaming.api.windowing.time.Time | ||||||
|  |  | ||||||
|  |  | ||||||
|  | object WordCountStreaming { | ||||||
|  |  | ||||||
|  |   def main(args: Array[String]): Unit = { | ||||||
|  |  | ||||||
|  |     val senv = StreamExecutionEnvironment.getExecutionEnvironment | ||||||
|  |  | ||||||
|  |     val text: DataStream[String] = senv.socketTextStream("192.168.200.229", 9999, '\n') | ||||||
|  |     val windowCounts = text.flatMap { w => w.split(",") }.map { w => WordWithCount(w, 1) }.keyBy("word") | ||||||
|  |       .timeWindow(Time.seconds(5)).sum("count") | ||||||
|  |  | ||||||
|  |     windowCounts.print().setParallelism(1) | ||||||
|  |  | ||||||
|  |     senv.execute("Streaming WordCount") | ||||||
|  |  | ||||||
|  |   } | ||||||
|  |  | ||||||
|  |   case class WordWithCount(word: String, count: Long) | ||||||
|  |  | ||||||
|  | } | ||||||
							
								
								
									
										130
									
								
								notes/Flink开发环境搭建.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								notes/Flink开发环境搭建.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,130 @@ | |||||||
|  | # Flink 开发环境搭建 | ||||||
|  |  | ||||||
|  | ## 一、安装 Scala 插件 | ||||||
|  |  | ||||||
|  | Flink 分别提供了基于 Java 语言和 Scala 语言的 API ,如果想要使用 Scala 语言来开发 Flink 程序,可以通过在 IDEA 中安装 Scala 插件来提供语法提示,代码高亮等功能。打开 IDEA , 依次点击 `File => settings => plugins` 打开插件安装页面,搜索 Scala 插件并进行安装,安装完成后,重启 IDEA 即可生效。   | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## 二、Flink 项目初始化 | ||||||
|  |  | ||||||
|  | ### 2.1 官方项目初始化方式 | ||||||
|  |  | ||||||
|  | Flink 官方支持使用 Maven 和 Gradle 两种构建工具来构建基于 Java 语言的 Flink 项目,支持使用 SBT 和 Maven 两种构建工具来构建基于 Scala 语言的 Flink 项目。 这里以 Maven 为例进行说明,因为其可以同时支持 Java 语言和 Scala 语言项目的构建。 | ||||||
|  |  | ||||||
|  | 需要注意的是 Flink 1.9 只支持 Maven 3.0.4 以上的版本,所以需要预先进行安装。安装完成后,可以通过以下两种方式来构建项目: | ||||||
|  |  | ||||||
|  | **1. 直接基于 Maven Archetype 构建** | ||||||
|  |  | ||||||
|  | 直接使用下面的 maven 语句来进行构建,然后根据交互信息的提示,依次输入 groupId , artifactId 以及包名等信息后等待初始化的完成:  | ||||||
|  |  | ||||||
|  | ```bash | ||||||
|  | $ mvn archetype:generate                               \ | ||||||
|  |       -DarchetypeGroupId=org.apache.flink              \ | ||||||
|  |       -DarchetypeArtifactId=flink-quickstart-java      \ | ||||||
|  |       -DarchetypeVersion=1.9.0 | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | > 注:如果想要创建基于 Scala 语言的项目,只需要将 flink-quickstart-java 换成 flink-quickstart-scala 即可,后文亦同。 | ||||||
|  |  | ||||||
|  | **2. 使用官方脚本快速构建** | ||||||
|  |  | ||||||
|  | 为了更方便的初始化项目,官方提供了快速构建脚本,可以通过以下命令来直接进行调用: | ||||||
|  |  | ||||||
|  | ```shell | ||||||
|  | $ curl https://flink.apache.org/q/quickstart.sh | bash -s 1.9.0 | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | 该方式其实也是通过执行 maven archetype 命令来进行初始化,其脚本内容如下: | ||||||
|  |  | ||||||
|  | ```shell | ||||||
|  | PACKAGE=quickstart | ||||||
|  |  | ||||||
|  | mvn archetype:generate								\ | ||||||
|  |   -DarchetypeGroupId=org.apache.flink				\ | ||||||
|  |   -DarchetypeArtifactId=flink-quickstart-java		\ | ||||||
|  |   -DarchetypeVersion=${1:-1.8.0}							\ | ||||||
|  |   -DgroupId=org.myorg.quickstart					\ | ||||||
|  |   -DartifactId=$PACKAGE								\ | ||||||
|  |   -Dversion=0.1										\ | ||||||
|  |   -Dpackage=org.myorg.quickstart					\ | ||||||
|  |   -DinteractiveMode=false | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | 可以看到相比于第一种方式,该种方式只是直接指定好了 groupId ,artifactId ,version 等信息而已。 | ||||||
|  |  | ||||||
|  | ### 2.2 使用 IDEA 快速构建 | ||||||
|  |  | ||||||
|  | 如果你使用的是开发工具是 IDEA ,可以直接在项目创建页面选择 Maven Flink Archetype 进行项目初始化: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | 如果你的 IDEA 没有上述 Archetype, 可以通过点击右上角的 `ADD ARCHETYPE` ,来进行添加,依次填入所需信息,这些信息都可以从上述的 `archetype:generate ` 语句中获取。点击  `OK` 保存后,该 Archetype 就会一直存在于你的 IDEA 中,之后每次创建项目时,只需要直接选择该 Archetype 即可。 | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | 选中 Flink Archetype ,然后点击 `NEXT` 按钮,之后的所有步骤都和正常的 Maven 工程相同。创建完成后的项目结构如下: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ## 三、词频统计案例 | ||||||
|  |  | ||||||
|  | ### 3.1 案例代码 | ||||||
|  |  | ||||||
|  | 创建完成后,可以先书写一个简单的词频统计的案例来尝试运行 Flink 项目,这里以 Scala 语言为例,代码如下: | ||||||
|  |  | ||||||
|  | ```scala | ||||||
|  | package com.heibaiying | ||||||
|  |  | ||||||
|  | import org.apache.flink.api.scala._ | ||||||
|  |  | ||||||
|  | object WordCountBatch { | ||||||
|  |  | ||||||
|  |   def main(args: Array[String]): Unit = { | ||||||
|  |     val benv = ExecutionEnvironment.getExecutionEnvironment | ||||||
|  |     val text = benv.readTextFile("D:\\wordcount.txt") | ||||||
|  |     val counts = text.flatMap { _.toLowerCase.split(",") filter { _.nonEmpty } }.map { (_, 1) }.groupBy(0).sum(1) | ||||||
|  |     counts.print() | ||||||
|  |   } | ||||||
|  | } | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | 其中 `wordcount.txt` 中的内容如下: | ||||||
|  |  | ||||||
|  | ```shell | ||||||
|  | a,a,a,a,a | ||||||
|  | b,b,b | ||||||
|  | c,c | ||||||
|  | d,d | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  | 本机不需要安装其他任何的 Flink 环境,直接运行 Main 方法即可,结果如下: | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | ### 3.1 常见异常 | ||||||
|  |  | ||||||
|  | 这里常见的一个启动异常是如下,之所以出现这样的情况,是因为 Maven 提供的 Flink Archetype 默认是以生产环境为标准的,因为 Flink 的安装包中默认就有 Flink 相关的 JAR 包,所以在 Maven 中这些 JAR 都被标识为 `<scope>provided</scope>`  , 只需要去掉该标签即可。 | ||||||
|  |  | ||||||
|  | ```shell | ||||||
|  | Caused by: java.lang.ClassNotFoundException: org.apache.flink.api.common.typeinfo.TypeInformation | ||||||
|  | ``` | ||||||
|  | ## 四、使用 Scala 命令行 | ||||||
|  |  | ||||||
|  |  https://flink.apache.org/downloads.html  | ||||||
|  |  | ||||||
|  | start-scala-shell.sh | ||||||
|  |  | ||||||
|  | ```shell | ||||||
|  | [root@hadoop001 bin]# ./start-scala-shell.sh | ||||||
|  | 错误: 找不到或无法加载主类 org.apache.flink.api.scala.FlinkShell | ||||||
|  | ``` | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
							
								
								
									
										
											BIN
										
									
								
								pictures/flink-basis-project.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								pictures/flink-basis-project.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 13 KiB | 
							
								
								
									
										
											BIN
										
									
								
								pictures/flink-maven-new.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								pictures/flink-maven-new.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 65 KiB | 
							
								
								
									
										
											BIN
										
									
								
								pictures/flink-maven.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								pictures/flink-maven.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 59 KiB | 
							
								
								
									
										
											BIN
										
									
								
								pictures/flink-scala-shell.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								pictures/flink-scala-shell.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 18 KiB | 
							
								
								
									
										
											BIN
										
									
								
								pictures/flink-word-count.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								pictures/flink-word-count.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 26 KiB | 
							
								
								
									
										
											BIN
										
									
								
								pictures/scala-plugin.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								pictures/scala-plugin.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 47 KiB | 
		Reference in New Issue
	
	Block a user