diff --git a/code/Flink/flink-basis-java/src/main/java/com/heibaiying/BatchJob.java b/code/Flink/flink-basis-java/src/main/java/com/heibaiying/BatchJob.java
deleted file mode 100644
index d95a4bb..0000000
--- a/code/Flink/flink-basis-java/src/main/java/com/heibaiying/BatchJob.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.heibaiying;
-
-import org.apache.flink.api.common.functions.FlatMapFunction;
-import org.apache.flink.api.common.functions.MapFunction;
-import org.apache.flink.api.common.functions.ReduceFunction;
-import org.apache.flink.api.java.ExecutionEnvironment;
-import org.apache.flink.api.java.operators.DataSource;
-import org.apache.flink.api.java.operators.FlatMapOperator;
-import org.apache.flink.api.java.tuple.Tuple2;
-import org.apache.flink.streaming.api.datastream.DataStream;
-import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-import org.apache.flink.util.Collector;
-
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Skeleton for a Flink Batch Job.
- *
- *
For a tutorial how to write a Flink batch application, check the
- * tutorials and examples on the Flink Website.
- *
- *
To package your application into a JAR file for execution,
- * change the main class in the POM.xml file to this class (simply search for 'mainClass')
- * and run 'mvn clean package' on the command line.
- */
-public class BatchJob {
-
- public static void main(String[] args) throws Exception {
-
- // set up the batch execution environment
- final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
-
- env.execute("Flink Batch Java API Skeleton");
- }
-}
diff --git a/code/Flink/flink-basis-java/src/main/java/com/heibaiying/StreamingJob.java b/code/Flink/flink-basis-java/src/main/java/com/heibaiying/StreamingJob.java
index 5d248f7..a4c688f 100644
--- a/code/Flink/flink-basis-java/src/main/java/com/heibaiying/StreamingJob.java
+++ b/code/Flink/flink-basis-java/src/main/java/com/heibaiying/StreamingJob.java
@@ -1,56 +1,18 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
package com.heibaiying;
-import org.apache.flink.api.common.functions.*;
import org.apache.flink.api.java.operators.DataSource;
-import org.apache.flink.api.java.tuple.Tuple;
-import org.apache.flink.api.java.tuple.Tuple2;
-import org.apache.flink.api.java.tuple.Tuple3;
-import org.apache.flink.streaming.api.collector.selector.OutputSelector;
-import org.apache.flink.streaming.api.datastream.*;
+import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-import org.apache.flink.streaming.api.functions.co.CoMapFunction;
-import org.apache.flink.util.Collector;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-/**
- * Skeleton for a Flink Streaming Job.
- *
- *
For a tutorial how to write a Flink streaming application, check the
- * tutorials and examples on the Flink Website.
- *
- *
To package your application into a JAR file for execution, run
- * 'mvn clean package' on the command line.
- *
- *
If you change the name of the main class (with the public static void main(String[] args))
- * method, change the respective entry in the POM.xml file (simply search for 'mainClass').
- */
public class StreamingJob {
- public static void main(String[] args) throws Exception {
- // set up the streaming execution environment
- final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+ private static final String ROOT_PATH = "D:\\BigData-Notes\\code\\Flink\\flink-basis-java\\src\\main\\resources\\";
+ public static void main(String[] args) throws Exception {
+
+ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+ DataStreamSource streamSource = env.readTextFile(ROOT_PATH + "log4j.properties");
+ streamSource.writeAsText(ROOT_PATH + "out").setParallelism(1);
env.execute();
}
diff --git a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/BatchJob.scala b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/BatchJob.scala
deleted file mode 100644
index 6652d98..0000000
--- a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/BatchJob.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.heibaiying
-
-import org.apache.flink.api.scala._
-
-/**
- * Skeleton for a Flink Batch Job.
- *
- * For a tutorial how to write a Flink batch application, check the
- * tutorials and examples on the Flink Website.
- *
- * To package your application into a JAR file for execution,
- * change the main class in the POM.xml file to this class (simply search for 'mainClass')
- * and run 'mvn clean package' on the command line.
- */
-object BatchJob {
-
- def main(args: Array[String]) {
- // set up the batch execution environment
- val env = ExecutionEnvironment.getExecutionEnvironment
-
- /*
- * Here, you can start creating your execution plan for Flink.
- *
- * Start with getting some data from the environment, like
- * env.readTextFile(textPath);
- *
- * then, transform the resulting DataSet[String] using operations
- * like
- * .filter()
- * .flatMap()
- * .join()
- * .group()
- *
- * and many more.
- * Have a look at the programming guide:
- *
- * http://flink.apache.org/docs/latest/apis/batch/index.html
- *
- * and the examples
- *
- * http://flink.apache.org/docs/latest/apis/batch/examples.html
- *
- */
-
- // execute program
- env.execute("Flink Batch Scala API Skeleton")
- }
-}
diff --git a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/StreamingJob.scala b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/StreamingJob.scala
deleted file mode 100644
index 2325a6c..0000000
--- a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/StreamingJob.scala
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.heibaiying
-
-import org.apache.flink.streaming.api.scala._
-
-/**
- * Skeleton for a Flink Streaming Job.
- *
- * For a tutorial how to write a Flink streaming application, check the
- * tutorials and examples on the Flink Website.
- *
- * To package your application into a JAR file for execution, run
- * 'mvn clean package' on the command line.
- *
- * If you change the name of the main class (with the public static void main(String[] args))
- * method, change the respective entry in the POM.xml file (simply search for 'mainClass').
- */
-object StreamingJob {
- def main(args: Array[String]) {
- // set up the streaming execution environment
- val env = StreamExecutionEnvironment.getExecutionEnvironment
-
- /*
- * Here, you can start creating your execution plan for Flink.
- *
- * Start with getting some data from the environment, like
- * env.readTextFile(textPath);
- *
- * then, transform the resulting DataStream[String] using operations
- * like
- * .filter()
- * .flatMap()
- * .join()
- * .group()
- *
- * and many more.
- * Have a look at the programming guide:
- *
- * http://flink.apache.org/docs/latest/apis/streaming/index.html
- *
- */
-
- // execute program
- env.execute("Flink Streaming Scala API Skeleton")
- }
-}
diff --git a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountBatch.scala b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountBatch.scala
index ff52ae7..b7a51b3 100644
--- a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountBatch.scala
+++ b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountBatch.scala
@@ -6,8 +6,12 @@ object WordCountBatch {
def main(args: Array[String]): Unit = {
val benv = ExecutionEnvironment.getExecutionEnvironment
- val text = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis-scala\\src\\main\\resources\\wordcount.txt")
- val counts = text.flatMap { _.toLowerCase.split(",") filter { _.nonEmpty } }.map { (_, 1) }.groupBy(0).sum(1)
- counts.print()
+ val dataSet = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis-scala\\src\\main\\resources\\wordcount.txt")
+ dataSet.flatMap { _.toLowerCase.split(",")}
+ .filter (_.nonEmpty)
+ .map { (_, 1) }
+ .groupBy(0)
+ .sum(1)
+ .print()
}
}
diff --git a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountStreaming.scala b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountStreaming.scala
index daad9ce..6a71e5d 100644
--- a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountStreaming.scala
+++ b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountStreaming.scala
@@ -10,16 +10,14 @@ object WordCountStreaming {
val senv = StreamExecutionEnvironment.getExecutionEnvironment
- val text: DataStream[String] = senv.socketTextStream("192.168.200.229", 9999, '\n')
- val windowCounts = text.flatMap { w => w.split(",") }.map { w => WordWithCount(w, 1) }.keyBy("word")
- .timeWindow(Time.seconds(5)).sum("count")
-
- windowCounts.print().setParallelism(1)
-
+ val dataStream: DataStream[String] = senv.socketTextStream("192.168.0.229", 9999, '\n')
+ dataStream.flatMap { line => line.toLowerCase.split(",") }
+ .filter(_.nonEmpty)
+ .map { word => (word, 1) }
+ .keyBy(0)
+ .timeWindow(Time.seconds(3))
+ .sum(1)
+ .print()
senv.execute("Streaming WordCount")
-
}
-
- case class WordWithCount(word: String, count: Long)
-
}
diff --git a/code/Flink/flink-kafka-integration/pom.xml b/code/Flink/flink-kafka-integration/pom.xml
new file mode 100644
index 0000000..1b80867
--- /dev/null
+++ b/code/Flink/flink-kafka-integration/pom.xml
@@ -0,0 +1,242 @@
+
+
+ 4.0.0
+
+ com.heibaiying
+ flink-kafka-integration
+ 1.0
+ jar
+
+ Flink Quickstart Job
+ http://www.myorganization.org
+
+
+ UTF-8
+ 1.9.0
+ 1.8
+ 2.11
+ ${java.version}
+ ${java.version}
+
+
+
+
+ apache.snapshots
+ Apache Development Snapshot Repository
+ https://repository.apache.org/content/repositories/snapshots/
+
+ false
+
+
+ true
+
+
+
+
+
+
+
+
+ org.apache.flink
+ flink-java
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-streaming-java_${scala.binary.version}
+ ${flink.version}
+ provided
+
+
+
+
+
+
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ 1.7.7
+ runtime
+
+
+ log4j
+ log4j
+ 1.2.17
+ runtime
+
+
+ org.apache.flink
+ flink-connector-kafka_2.11
+ 1.9.0
+
+
+ mysql
+ mysql-connector-java
+ 8.0.16
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.1
+
+ ${java.version}
+ ${java.version}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.0.0
+
+
+
+ package
+
+ shade
+
+
+
+
+ org.apache.flink:force-shading
+ com.google.code.findbugs:jsr305
+ org.slf4j:*
+ log4j:*
+
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+ com.heibaiying.KafkaStreamingJob
+
+
+
+
+
+
+
+
+
+
+
+
+
+ org.eclipse.m2e
+ lifecycle-mapping
+ 1.0.0
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ [3.0.0,)
+
+ shade
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ [3.1,)
+
+ testCompile
+ compile
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ add-dependencies-for-IDEA
+
+
+
+ idea.version
+
+
+
+
+
+ org.apache.flink
+ flink-java
+ ${flink.version}
+ compile
+
+
+ org.apache.flink
+ flink-streaming-java_${scala.binary.version}
+ ${flink.version}
+ compile
+
+
+
+
+
+
diff --git a/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/CustomSinkJob.java b/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/CustomSinkJob.java
new file mode 100644
index 0000000..14861bc
--- /dev/null
+++ b/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/CustomSinkJob.java
@@ -0,0 +1,23 @@
+package com.heibaiying;
+
+import com.heibaiying.bean.Employee;
+import com.heibaiying.sink.FlinkToMySQL;
+import org.apache.flink.streaming.api.datastream.DataStreamSource;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+
+import java.sql.Date;
+
+public class CustomSinkJob {
+
+ public static void main(String[] args) throws Exception {
+
+ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+ Date date = new Date(System.currentTimeMillis());
+ DataStreamSource streamSource = env.fromElements(
+ new Employee("hei", 10, date),
+ new Employee("bai", 20, date),
+ new Employee("ying", 30, date));
+ streamSource.addSink(new FlinkToMySQL());
+ env.execute();
+ }
+}
diff --git a/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/KafkaStreamingJob.java b/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/KafkaStreamingJob.java
new file mode 100644
index 0000000..d26da5f
--- /dev/null
+++ b/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/KafkaStreamingJob.java
@@ -0,0 +1,43 @@
+package com.heibaiying;
+
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.serialization.SimpleStringSchema;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
+import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
+import org.apache.flink.streaming.connectors.kafka.KafkaSerializationSchema;
+import org.apache.kafka.clients.producer.ProducerRecord;
+
+import javax.annotation.Nullable;
+import java.util.Properties;
+
+public class KafkaStreamingJob {
+
+ public static void main(String[] args) throws Exception {
+
+ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+
+ // 1.指定Kafka的相关配置属性
+ Properties properties = new Properties();
+ properties.setProperty("bootstrap.servers", "192.168.200.229:9092");
+
+ // 2.接收Kafka上的数据
+ DataStream stream = env
+ .addSource(new FlinkKafkaConsumer<>("flink-stream-in-topic", new SimpleStringSchema(), properties));
+
+ // 3.定义计算结果到 Kafka ProducerRecord 的转换
+ KafkaSerializationSchema kafkaSerializationSchema = new KafkaSerializationSchema() {
+ @Override
+ public ProducerRecord serialize(String element, @Nullable Long timestamp) {
+ return new ProducerRecord<>("flink-stream-out-topic", element.getBytes());
+ }
+ };
+ // 4. 定义Flink Kafka生产者
+ FlinkKafkaProducer kafkaProducer = new FlinkKafkaProducer<>("flink-stream-out-topic",
+ kafkaSerializationSchema, properties, FlinkKafkaProducer.Semantic.AT_LEAST_ONCE, 5);
+ // 5. 将接收到输入元素*2后写出到Kafka
+ stream.map((MapFunction) value -> value + value).addSink(kafkaProducer);
+ env.execute("Flink Streaming");
+ }
+}
diff --git a/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/bean/Employee.java b/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/bean/Employee.java
new file mode 100644
index 0000000..24c2179
--- /dev/null
+++ b/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/bean/Employee.java
@@ -0,0 +1,42 @@
+package com.heibaiying.bean;
+
+import java.sql.Date;
+
+public class Employee {
+
+ private String name;
+ private int age;
+ private Date birthday;
+
+ Employee(){}
+
+ public Employee(String name, int age, Date birthday) {
+ this.name = name;
+ this.age = age;
+ this.birthday = birthday;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public int getAge() {
+ return age;
+ }
+
+ public void setAge(int age) {
+ this.age = age;
+ }
+
+ public Date getBirthday() {
+ return birthday;
+ }
+
+ public void setBirthday(Date birthday) {
+ this.birthday = birthday;
+ }
+}
diff --git a/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/sink/FlinkToMySQL.java b/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/sink/FlinkToMySQL.java
new file mode 100644
index 0000000..aa3e064
--- /dev/null
+++ b/code/Flink/flink-kafka-integration/src/main/java/com/heibaiying/sink/FlinkToMySQL.java
@@ -0,0 +1,43 @@
+package com.heibaiying.sink;
+
+import com.heibaiying.bean.Employee;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.PreparedStatement;
+
+public class FlinkToMySQL extends RichSinkFunction {
+
+ private PreparedStatement stmt;
+ private Connection conn;
+
+ @Override
+ public void open(Configuration parameters) throws Exception {
+ Class.forName("com.mysql.cj.jdbc.Driver");
+ conn = DriverManager.getConnection("jdbc:mysql://192.168.200.229:3306/employees?characterEncoding=UTF-8&serverTimezone=UTC&useSSL=false", "root", "123456");
+ String sql = "insert into emp(name, age, birthday) values(?, ?, ?)";
+ stmt = conn.prepareStatement(sql);
+ }
+
+ @Override
+ public void invoke(Employee value, Context context) throws Exception {
+ stmt.setString(1, value.getName());
+ stmt.setInt(2, value.getAge());
+ stmt.setDate(3, value.getBirthday());
+ stmt.executeUpdate();
+ }
+
+ @Override
+ public void close() throws Exception {
+ super.close();
+ if (stmt != null) {
+ stmt.close();
+ }
+ if (conn != null) {
+ conn.close();
+ }
+ }
+
+}
diff --git a/code/Flink/flink-kafka-integration/src/main/resources/log4j.properties b/code/Flink/flink-kafka-integration/src/main/resources/log4j.properties
new file mode 100644
index 0000000..da32ea0
--- /dev/null
+++ b/code/Flink/flink-kafka-integration/src/main/resources/log4j.properties
@@ -0,0 +1,23 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+log4j.rootLogger=INFO, console
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
diff --git a/code/Flink/flink-time-watermark/pom.xml b/code/Flink/flink-time-watermark/pom.xml
new file mode 100644
index 0000000..2dd15f1
--- /dev/null
+++ b/code/Flink/flink-time-watermark/pom.xml
@@ -0,0 +1,232 @@
+
+
+ 4.0.0
+
+ com.heibaiying
+ flink-time-watermark
+ 1.0
+ jar
+
+ Flink Quickstart Job
+ http://www.myorganization.org
+
+
+ UTF-8
+ 1.9.0
+ 1.8
+ 2.11
+ ${java.version}
+ ${java.version}
+
+
+
+
+ apache.snapshots
+ Apache Development Snapshot Repository
+ https://repository.apache.org/content/repositories/snapshots/
+
+ false
+
+
+ true
+
+
+
+
+
+
+
+
+ org.apache.flink
+ flink-java
+ ${flink.version}
+ provided
+
+
+ org.apache.flink
+ flink-streaming-java_${scala.binary.version}
+ ${flink.version}
+ provided
+
+
+
+
+
+
+
+
+
+ org.slf4j
+ slf4j-log4j12
+ 1.7.7
+ runtime
+
+
+ log4j
+ log4j
+ 1.2.17
+ runtime
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.1
+
+ ${java.version}
+ ${java.version}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.0.0
+
+
+
+ package
+
+ shade
+
+
+
+
+ org.apache.flink:force-shading
+ com.google.code.findbugs:jsr305
+ org.slf4j:*
+ log4j:*
+
+
+
+
+
+ *:*
+
+ META-INF/*.SF
+ META-INF/*.DSA
+ META-INF/*.RSA
+
+
+
+
+
+ com.heibaiying.SampleJob
+
+
+
+
+
+
+
+
+
+
+
+
+
+ org.eclipse.m2e
+ lifecycle-mapping
+ 1.0.0
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ [3.0.0,)
+
+ shade
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ [3.1,)
+
+ testCompile
+ compile
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ add-dependencies-for-IDEA
+
+
+
+ idea.version
+
+
+
+
+
+ org.apache.flink
+ flink-java
+ ${flink.version}
+ compile
+
+
+ org.apache.flink
+ flink-streaming-java_${scala.binary.version}
+ ${flink.version}
+ compile
+
+
+
+
+
+
diff --git a/code/Flink/flink-time-watermark/src/main/java/com/heibaiying/SampleJob.java b/code/Flink/flink-time-watermark/src/main/java/com/heibaiying/SampleJob.java
new file mode 100644
index 0000000..733b05d
--- /dev/null
+++ b/code/Flink/flink-time-watermark/src/main/java/com/heibaiying/SampleJob.java
@@ -0,0 +1,27 @@
+package com.heibaiying;
+
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.streaming.api.datastream.DataStreamSource;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.streaming.api.windowing.time.Time;
+import org.apache.flink.util.Collector;
+
+public class SampleJob {
+
+ public static void main(String[] args) throws Exception {
+
+ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+ DataStreamSource streamSource = env.socketTextStream("192.168.200.229", 9999, "\n", 3);
+ streamSource.flatMap(new FlatMapFunction>() {
+ @Override
+ public void flatMap(String value, Collector> out) throws Exception {
+ String[] words = value.split("\t");
+ for (String word : words) {
+ out.collect(new Tuple2<>(word, 1L));
+ }
+ }
+ }).keyBy(0).timeWindow(Time.seconds(3)).sum(1).print();
+ env.execute("Flink Streaming");
+ }
+}
diff --git a/code/Flink/flink-time-watermark/src/main/resources/log4j.properties b/code/Flink/flink-time-watermark/src/main/resources/log4j.properties
new file mode 100644
index 0000000..da32ea0
--- /dev/null
+++ b/code/Flink/flink-time-watermark/src/main/resources/log4j.properties
@@ -0,0 +1,23 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+log4j.rootLogger=INFO, console
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
diff --git a/notes/Flink_Data_Sink.md b/notes/Flink_Data_Sink.md
new file mode 100644
index 0000000..939ce8a
--- /dev/null
+++ b/notes/Flink_Data_Sink.md
@@ -0,0 +1,119 @@
+# Flink Sink
+
+## 一、Data Sinks
+
+在使用 Flink 进行数据处理时,数据经 Data Source 流入,然后通过系列 Transformations 的转化,最终可以通过 Sink 将计算结果进行输出,Flink Data Sinks 就是用于定义数据流最终的输出位置。Flink 提供了几个较为简单的 Sink API 用于日常的开发,具体如下:
+
+### 1.1 writeAsText
+
+`writeAsText` 用于将计算结果以文本的方式并行地写入到指定文件夹下,除了路径参数是必选外,该方法还可以通过指定第二个参数来定义输出模式,它有以下两个可选值:
+
++ **WriteMode.NO_OVERWRITE**:当指定路径上不存在任何文件时,才执行写出操作;
++ **WriteMode.OVERWRITE**:不论指定路径上是否存在文件,都执行写出操作;如果原来已有文件,则进行覆盖。
+
+使用示例如下:
+
+```java
+ streamSource.writeAsText("D:\\out", FileSystem.WriteMode.OVERWRITE);
+```
+
+以上写出是以并行的方式写出到多个文件,如果想要将输出结果全部写出到一个文件,需要设置其并行度为 1:
+
+```java
+streamSource.writeAsText("D:\\out", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
+```
+
+### 1.2 writeAsCsv
+
+`writeAsCsv` 用于将计算结果以 CSV 的文件格式写出到指定目录,除了路径参数是必选外,该方法还支持传入输出模式,行分隔符,和字段分隔符三个额外的参数,其方法定义如下:
+
+```java
+writeAsCsv(String path, WriteMode writeMode, String rowDelimiter, String fieldDelimiter)
+```
+
+### 1.3 print \ printToErr
+
+`print \ printToErr` 是测试当中最常用的方式,用于将计算结果以标准输出流或错误输出流的方式打印到控制台上。
+
+### 1.4 writeUsingOutputFormat
+
+采用自定义的输出格式将计算结果写出,上面介绍的 `writeAsText` 和 `writeAsCsv` 其底层调用的都是该方法,源码如下:
+
+```java
+public DataStreamSink writeAsText(String path, WriteMode writeMode) {
+ TextOutputFormat tof = new TextOutputFormat<>(new Path(path));
+ tof.setWriteMode(writeMode);
+ return writeUsingOutputFormat(tof);
+}
+```
+
+### 1.5 writeToSocket
+
+`writeToSocket` 用于将计算结果以指定的格式写出到 Socket 中,使用示例如下:
+
+```shell
+streamSource.writeToSocket("192.168.0.226", 9999, new SimpleStringSchema());
+```
+
+## 二、Streaming Connectors
+
+除了上述 API 外,Flink 中还内置了系列的 Connectors 连接器,用于将计算结果输入到常用的存储系统或者消息中间件中,具体如下:
+
+- Apache Kafka (支持 source 和 sink)
+- Apache Cassandra (sink)
+- Amazon Kinesis Streams (source/sink)
+- Elasticsearch (sink)
+- Hadoop FileSystem (sink)
+- RabbitMQ (source/sink)
+- Apache NiFi (source/sink)
+- Google PubSub (source/sink)
+
+除了内置的连接器外,你还可以通过 Apache Bahir 的连接器扩展 Flink。Apache Bahir 旨在为分布式数据分析系统 (如 Spark,Flink) 等提供功能上的扩展,当前其支持的与 Flink Sink 相关的连接器如下:
+
+- Apache ActiveMQ (source/sink)
+- Apache Flume (sink)
+- Redis (sink)
+- Akka (sink)
+
+这里接着在 Data Sources 章节介绍的整合 Kafka Source 的基础上,将 Kafka Sink 也一并进行整合,具体步骤如下。
+
+## 三、整合 Kafka Sink
+
+### 3.1 addSink
+
+### 3.2 创建输出主题
+
+```shell
+# 创建用于测试的输出主题
+bin/kafka-topics.sh --create \
+ --bootstrap-server hadoop001:9092 \
+ --replication-factor 1 \
+ --partitions 1 \
+ --topic flink-stream-out-topic
+
+# 查看所有主题
+ bin/kafka-topics.sh --list --bootstrap-server hadoop001:9092
+```
+
+### 3.3 启动消费者
+
+```java
+bin/kafka-console-consumer.sh --bootstrap-server hadoop001:9092 --topic flink-stream-out-topic
+```
+
+### 3.4 测试结果
+
+
+
+## 四、自定义 Sink
+
+### 4.1 导入依赖
+
+### 4.2 自定义 Sink
+
+### 4.3 测试结果
+
+
+
+
+
diff --git a/notes/Flink_Data_Source.md b/notes/Flink_Data_Source.md
new file mode 100644
index 0000000..7686cad
--- /dev/null
+++ b/notes/Flink_Data_Source.md
@@ -0,0 +1,280 @@
+# Flink Data Source
+
+
+## 一、内置 Data Source
+
+Flink Data Source 用于定义 Flink 程序的数据来源,Flink 官方内置提供了多种数据获取方法,用于帮助开发者简单快速地构建输入流,具体如下:
+
+### 1.1 基于文件构建
+
+**1. readTextFile(path)**:按照 TextInputFormat 格式读取文本文件,并将其内容以字符串的形式返回。示例如下:
+
+```java
+env.readTextFile(filePath).print();
+```
+
+**2. readFile(fileInputFormat, path)** :按照指定格式读取文件。
+
+**3. readFile(inputFormat, filePath, watchType, interval, typeInformation) **:按照指定格式周期性的读取文件。其中各个参数的含义如下:
+
++ **inputFormat**:数据流的输入格式。
++ **filePath**:文件路径,可以是本地文件系统上的路径,也可以是 HDFS 上的文件路径。
++ **watchType**:读取方式,它有两个可选值,分别是 `FileProcessingMode.PROCESS_ONCE` 和 `FileProcessingMode.PROCESS_CONTINUOUSLY`:前者表示对指定路径上的数据只读取一次,然后退出;后者表示对路径进行定期扫描从而可以获取到新的数据。需要注意的是如果 watchType 被设置为 `PROCESS_CONTINUOUSLY`,那么当文件被修改时,其所有的内容 (包含原有的内容和新增的内容) 都将被重新处理,因此这会打破 Flink 的 *exactly-once* 语义。
++ **interval**:定期扫描的时间间隔。
++ **typeInformation**:输入流中元素的类型。
+
+使用示例如下:
+
+```java
+final String filePath = "D:\\log4j.properties";
+final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+env.readFile(new TextInputFormat(new Path(filePath)),
+ filePath,
+ FileProcessingMode.PROCESS_ONCE,
+ 1,
+ BasicTypeInfo.STRING_TYPE_INFO).print();
+env.execute();
+```
+
+### 1.2 基于集合构建
+
+**1. fromCollection(Collection)**:基于集合构建,集合中的所有元素必须是同一类型。示例如下:
+
+```java
+env.fromCollection(Arrays.asList(1,2,3,4,5)).print();
+```
+
+**2. fromElements(T ...)**: 基于元素构建,所有元素必须是同一类型。示例如下:
+
+```java
+env.fromElements(1,2,3,4,5).print();
+```
+**3. generateSequence(from, to)**:基于给定的序列区间进行构建。示例如下:
+
+```java
+env.generateSequence(0,100);
+```
+
+**4. fromCollection(Iterator, Class)**:基于迭代器进行构建。第一个参数用于定义迭代器,第二个参数用于定义输出元素的类型。使用示例如下:
+
+```java
+env.fromCollection(new CustomIterator(), BasicTypeInfo.INT_TYPE_INFO).print();
+```
+
+其中 CustomIterator 为自定义的迭代器,这里以产生 1 到 100 区间内的数据为例,源码如下。需要注意的是自定义迭代器除了要实现 Iterator 接口外,还必须要实现序列化接口 Serializable ,否则会抛出序列化失败的异常:
+
+```java
+import java.io.Serializable;
+import java.util.Iterator;
+
+public class CustomIterator implements Iterator, Serializable {
+ private Integer i = 0;
+
+ @Override
+ public boolean hasNext() {
+ return i < 100;
+ }
+
+ @Override
+ public Integer next() {
+ i++;
+ return i;
+ }
+}
+```
+
+**5. fromParallelCollection(SplittableIterator, Class)**:方法接收两个参数,第二个参数用于定义输出元素的类型,第一个参数 SplittableIterator 是迭代器的抽象基类,它用于将原始迭代器的值拆分到多个不相交的迭代器中。
+
+### 1.3 基于 Socket 构建
+
+Flink 提供了 socketTextStream 方法用于构建基于 Socket 的数据流,socketTextStream 方法有以下四个主要参数:
+
+- **hostname**:主机名;
+- **port**:端口号,设置为 0 时,表示端口号自动分配;
+- **delimiter**:用于分隔每条记录的分隔符;
+- **maxRetry**:当 Socket 临时关闭时,程序的最大重试间隔,单位为秒。设置为 0 时表示不进行重试;设置为负值则表示一直重试。使用示例如下:
+
+```shell
+ env.socketTextStream("192.168.0.229", 9999, "\n", 3).print();
+```
+
+
+
+## 二、自定义 Data Source
+
+### 2.1 SourceFunction
+
+除了内置的数据源外,用户还可以使用 `addSource` 方法来添加自定义的数据源。自定义的数据源必须要实现 SourceFunction 接口,这里以产生 [0 , 1000) 区间内的数据为例,代码如下:
+
+```java
+final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
+
+env.addSource(new SourceFunction() {
+
+ private long count = 0L;
+ private volatile boolean isRunning = true;
+
+ public void run(SourceContext ctx) {
+ while (isRunning && count < 1000) {
+ // 通过collect将输入发送出去
+ ctx.collect(count);
+ count++;
+ }
+ }
+
+ public void cancel() {
+ isRunning = false;
+ }
+
+}).print();
+env.execute();
+```
+
+### 2.2 ParallelSourceFunction 和 RichParallelSourceFunction
+
+上面通过 SourceFunction 实现的数据源是不具有并行度的,即不支持在得到的 DataStream 上调用 `setParallelism(n)` 方法,此时会抛出如下的异常:
+
+```shell
+Exception in thread "main" java.lang.IllegalArgumentException: Source: 1 is not a parallel source
+```
+
+如果你想要实现具有并行度的输入流,则需要实现 ParallelSourceFunction 或 RichParallelSourceFunction 接口,其与 SourceFunction 的关系如下图:
+
+