flink datasource

This commit is contained in:
罗祥
2019-10-31 10:08:37 +08:00
parent 649bf7f78a
commit ecd8805ed5
31 changed files with 1245 additions and 264 deletions

View File

@ -1,66 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.heibaiying
import org.apache.flink.api.scala._
/**
* Skeleton for a Flink Batch Job.
*
* For a tutorial how to write a Flink batch application, check the
* tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>.
*
* To package your application into a JAR file for execution,
* change the main class in the POM.xml file to this class (simply search for 'mainClass')
* and run 'mvn clean package' on the command line.
*/
object BatchJob {
def main(args: Array[String]) {
// set up the batch execution environment
val env = ExecutionEnvironment.getExecutionEnvironment
/*
* Here, you can start creating your execution plan for Flink.
*
* Start with getting some data from the environment, like
* env.readTextFile(textPath);
*
* then, transform the resulting DataSet[String] using operations
* like
* .filter()
* .flatMap()
* .join()
* .group()
*
* and many more.
* Have a look at the programming guide:
*
* http://flink.apache.org/docs/latest/apis/batch/index.html
*
* and the examples
*
* http://flink.apache.org/docs/latest/apis/batch/examples.html
*
*/
// execute program
env.execute("Flink Batch Scala API Skeleton")
}
}

View File

@ -1,63 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.heibaiying
import org.apache.flink.streaming.api.scala._
/**
* Skeleton for a Flink Streaming Job.
*
* For a tutorial how to write a Flink streaming application, check the
* tutorials and examples on the <a href="http://flink.apache.org/docs/stable/">Flink Website</a>.
*
* To package your application into a JAR file for execution, run
* 'mvn clean package' on the command line.
*
* If you change the name of the main class (with the public static void main(String[] args))
* method, change the respective entry in the POM.xml file (simply search for 'mainClass').
*/
object StreamingJob {
def main(args: Array[String]) {
// set up the streaming execution environment
val env = StreamExecutionEnvironment.getExecutionEnvironment
/*
* Here, you can start creating your execution plan for Flink.
*
* Start with getting some data from the environment, like
* env.readTextFile(textPath);
*
* then, transform the resulting DataStream[String] using operations
* like
* .filter()
* .flatMap()
* .join()
* .group()
*
* and many more.
* Have a look at the programming guide:
*
* http://flink.apache.org/docs/latest/apis/streaming/index.html
*
*/
// execute program
env.execute("Flink Streaming Scala API Skeleton")
}
}

View File

@ -6,8 +6,12 @@ object WordCountBatch {
def main(args: Array[String]): Unit = {
val benv = ExecutionEnvironment.getExecutionEnvironment
val text = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis-scala\\src\\main\\resources\\wordcount.txt")
val counts = text.flatMap { _.toLowerCase.split(",") filter { _.nonEmpty } }.map { (_, 1) }.groupBy(0).sum(1)
counts.print()
val dataSet = benv.readTextFile("D:\\BigData-Notes\\code\\Flink\\flink-basis-scala\\src\\main\\resources\\wordcount.txt")
dataSet.flatMap { _.toLowerCase.split(",")}
.filter (_.nonEmpty)
.map { (_, 1) }
.groupBy(0)
.sum(1)
.print()
}
}

View File

@ -10,16 +10,14 @@ object WordCountStreaming {
val senv = StreamExecutionEnvironment.getExecutionEnvironment
val text: DataStream[String] = senv.socketTextStream("192.168.200.229", 9999, '\n')
val windowCounts = text.flatMap { w => w.split(",") }.map { w => WordWithCount(w, 1) }.keyBy("word")
.timeWindow(Time.seconds(5)).sum("count")
windowCounts.print().setParallelism(1)
val dataStream: DataStream[String] = senv.socketTextStream("192.168.0.229", 9999, '\n')
dataStream.flatMap { line => line.toLowerCase.split(",") }
.filter(_.nonEmpty)
.map { word => (word, 1) }
.keyBy(0)
.timeWindow(Time.seconds(3))
.sum(1)
.print()
senv.execute("Streaming WordCount")
}
case class WordWithCount(word: String, count: Long)
}