flink datasource

2019-10-31 10:08:37 +08:00
parent 649bf7f78a
commit ecd8805ed5
31 changed files with 1245 additions and 264 deletions
--- a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/BatchJob.scala
+++ b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/BatchJob.scala
@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.heibaiying
-
-import org.apache.flink.api.scala._
-
-/**
- * Skeleton for a Flink Batch Job.
- *
- * For a tutorial how to write a Flink batch application, check the
- * tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>.
- *
- * To package your application into a JAR file for execution,
- * change the main class in the POM.xml file to this class (simply search for 'mainClass')
- * and run 'mvn clean package' on the command line.
- */
-object BatchJob {
-
-  def main(args: Array[String]) {
-    // set up the batch execution environment
-    val env = ExecutionEnvironment.getExecutionEnvironment
-
-    /*
-     * Here, you can start creating your execution plan for Flink.
-     *
-     * Start with getting some data from the environment, like
-     *  env.readTextFile(textPath);
-     *
-     * then, transform the resulting DataSet[String] using operations
-     * like
-     *   .filter()
-     *   .flatMap()
-     *   .join()
-     *   .group()
-     *
-     * and many more.
-     * Have a look at the programming guide:
-     *
-     * http://flink.apache.org/docs/latest/apis/batch/index.html
-     *
-     * and the examples
-     *
-     * http://flink.apache.org/docs/latest/apis/batch/examples.html
-     *
-     */
-
-    // execute program
-    env.execute("Flink Batch Scala API Skeleton")
-  }
-}
--- a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/StreamingJob.scala
+++ b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/StreamingJob.scala
@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.heibaiying
-
-import org.apache.flink.streaming.api.scala._
-
-/**
- * Skeleton for a Flink Streaming Job.
- *
- * For a tutorial how to write a Flink streaming application, check the
- * tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>.
- *
- * To package your application into a JAR file for execution, run
- * 'mvn clean package' on the command line.
- *
- * If you change the name of the main class (with the public static void main(String[] args))
- * method, change the respective entry in the POM.xml file (simply search for 'mainClass').
- */
-object StreamingJob {
-  def main(args: Array[String]) {
-    // set up the streaming execution environment
-    val env = StreamExecutionEnvironment.getExecutionEnvironment
-
-    /*
-     * Here, you can start creating your execution plan for Flink.
-     *
-     * Start with getting some data from the environment, like
-     *  env.readTextFile(textPath);
-     *
-     * then, transform the resulting DataStream[String] using operations
-     * like
-     *   .filter()
-     *   .flatMap()
-     *   .join()
-     *   .group()
-     *
-     * and many more.
-     * Have a look at the programming guide:
-     *
-     * http://flink.apache.org/docs/latest/apis/streaming/index.html
-     *
-     */
-
-    // execute program
-    env.execute("Flink Streaming Scala API Skeleton")
-  }
-}
--- a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountBatch.scala
+++ b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountBatch.scala
@ -6,8 +6,12 @@ object WordCountBatch {

  def main(args: Array[String]): Unit = {
    val benv = ExecutionEnvironment.getExecutionEnvironment
-    val text = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis-scala\\src\\main\\resources\\wordcount.txt")
-    val counts = text.flatMap { _.toLowerCase.split(",") filter { _.nonEmpty } }.map { (_, 1) }.groupBy(0).sum(1)
-    counts.print()
+    val dataSet = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis-scala\\src\\main\\resources\\wordcount.txt")
+    dataSet.flatMap { _.toLowerCase.split(",")}
+            .filter (_.nonEmpty)
+            .map { (_, 1) }
+            .groupBy(0)
+            .sum(1)
+            .print()
  }
 }
--- a/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountStreaming.scala
+++ b/code/Flink/flink-basis-scala/src/main/scala/com/heibaiying/WordCountStreaming.scala
@ -10,16 +10,14 @@ object WordCountStreaming {

    val senv = StreamExecutionEnvironment.getExecutionEnvironment

-    val text: DataStream[String] = senv.socketTextStream("192.168.200.229", 9999, '\n')
-    val windowCounts = text.flatMap { w => w.split(",") }.map { w => WordWithCount(w, 1) }.keyBy("word")
-      .timeWindow(Time.seconds(5)).sum("count")
-
-    windowCounts.print().setParallelism(1)
-
+    val dataStream: DataStream[String] = senv.socketTextStream("192.168.0.229", 9999, '\n')
+    dataStream.flatMap { line => line.toLowerCase.split(",") }
+              .filter(_.nonEmpty)
+              .map { word => (word, 1) }
+              .keyBy(0)
+              .timeWindow(Time.seconds(3))
+              .sum(1)
+              .print()
    senv.execute("Streaming WordCount")
-
  }
-
-  case class WordWithCount(word: String, count: Long)
-
 }