add flink datasources and sinks
This commit is contained in:
		| @@ -144,7 +144,7 @@ under the License. | ||||
| 							</filters> | ||||
| 							<transformers> | ||||
| 								<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> | ||||
| 									<mainClass>com.heibaiying.SampleJob</mainClass> | ||||
| 									<mainClass>com.heibaiying.WaterMarkJob</mainClass> | ||||
| 								</transformer> | ||||
| 							</transformers> | ||||
| 						</configuration> | ||||
|   | ||||
| @@ -0,0 +1,52 @@ | ||||
| package com.heibaiying; | ||||
|  | ||||
| import org.apache.flink.api.common.functions.MapFunction; | ||||
| import org.apache.flink.api.java.tuple.Tuple3; | ||||
| import org.apache.flink.streaming.api.TimeCharacteristic; | ||||
| import org.apache.flink.streaming.api.datastream.DataStreamSource; | ||||
| import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; | ||||
| import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; | ||||
| import org.apache.flink.streaming.api.watermark.Watermark; | ||||
| import org.apache.flink.streaming.api.windowing.time.Time; | ||||
|  | ||||
| public class PeriodicWatermarksJob { | ||||
|  | ||||
|     public static void main(String[] args) throws Exception { | ||||
|  | ||||
|         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); | ||||
|         // 设置并行度为1 | ||||
|         env.setParallelism(1); | ||||
|         // 设置以事件时间为基准 | ||||
|         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); | ||||
|         DataStreamSource<String> streamSource = env.socketTextStream("192.168.200.229", 8888, "\n", 3); | ||||
|         streamSource.map(new MapFunction<String, Tuple3<Long, String, Long>>() { | ||||
|             @Override | ||||
|             public Tuple3<Long, String, Long> map(String value) throws Exception { | ||||
|                 String[] split = value.split(","); | ||||
|                 return new Tuple3<>(Long.valueOf(split[0]), split[1], 1L); | ||||
|             } | ||||
|         }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGenerator()) | ||||
|                 .keyBy(1).timeWindow(Time.seconds(3)).sum(2).print(); | ||||
|         env.execute(); | ||||
|  | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
| class BoundedOutOfOrdernessGenerator implements AssignerWithPeriodicWatermarks<Tuple3<Long, String, Long>> { | ||||
|  | ||||
|     private final long maxOutOfOrderness = 3000L; | ||||
|     private long currentMaxTimestamp = 0L; | ||||
|  | ||||
|     @Override | ||||
|     public long extractTimestamp(Tuple3<Long, String, Long> element, long previousElementTimestamp) { | ||||
|         long timestamp = element.f0; | ||||
|         currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp); | ||||
|         return timestamp; | ||||
|     } | ||||
|  | ||||
|     @Override | ||||
|     public Watermark getCurrentWatermark() { | ||||
|         return new Watermark(currentMaxTimestamp - maxOutOfOrderness); | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,46 @@ | ||||
| package com.heibaiying; | ||||
|  | ||||
| import org.apache.flink.api.common.functions.MapFunction; | ||||
| import org.apache.flink.api.java.tuple.Tuple3; | ||||
| import org.apache.flink.streaming.api.TimeCharacteristic; | ||||
| import org.apache.flink.streaming.api.datastream.DataStreamSource; | ||||
| import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; | ||||
| import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; | ||||
| import org.apache.flink.streaming.api.watermark.Watermark; | ||||
| import org.apache.flink.streaming.api.windowing.time.Time; | ||||
|  | ||||
| public class PunctuatedWatermarksJob { | ||||
|  | ||||
|     public static void main(String[] args) throws Exception { | ||||
|  | ||||
|         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); | ||||
|         env.setParallelism(1); | ||||
|         // 设置以事件时间为基准 | ||||
|         env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); | ||||
|         DataStreamSource<String> streamSource = env.socketTextStream("192.168.200.229", 8888, "\n", 3); | ||||
|         streamSource.map(new MapFunction<String, Tuple3<Long, String, Long>>() { | ||||
|             @Override | ||||
|             public Tuple3<Long, String, Long> map(String value) throws Exception { | ||||
|                 String[] split = value.split(","); | ||||
|                 return new Tuple3<>(Long.valueOf(split[0]), split[1], 1L); | ||||
|             } | ||||
|         }).assignTimestampsAndWatermarks(new PunctuatedAssigner()) | ||||
|                 .keyBy(1).timeWindow(Time.seconds(3)).sum(2).print(); | ||||
|         env.execute(); | ||||
|  | ||||
|     } | ||||
| } | ||||
|  | ||||
| class PunctuatedAssigner implements AssignerWithPunctuatedWatermarks<Tuple3<Long, String, Long>> { | ||||
|  | ||||
|     @Override | ||||
|     public long extractTimestamp(Tuple3<Long, String, Long> element, long previousElementTimestamp) { | ||||
|         return element.f0; | ||||
|     } | ||||
|  | ||||
|     @Override | ||||
|     public Watermark checkAndGetNextWatermark(Tuple3<Long, String, Long> lastElement, long extractedTimestamp) { | ||||
|         return new Watermark(extractedTimestamp); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -1,27 +0,0 @@ | ||||
| package com.heibaiying; | ||||
|  | ||||
| import org.apache.flink.api.common.functions.FlatMapFunction; | ||||
| import org.apache.flink.api.java.tuple.Tuple2; | ||||
| import org.apache.flink.streaming.api.datastream.DataStreamSource; | ||||
| import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; | ||||
| import org.apache.flink.streaming.api.windowing.time.Time; | ||||
| import org.apache.flink.util.Collector; | ||||
|  | ||||
| public class SampleJob { | ||||
|  | ||||
|     public static void main(String[] args) throws Exception { | ||||
|  | ||||
|         final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); | ||||
|         DataStreamSource<String> streamSource = env.socketTextStream("192.168.200.229", 9999, "\n", 3); | ||||
|         streamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() { | ||||
|             @Override | ||||
|             public void flatMap(String value, Collector<Tuple2<String, Long>> out) throws Exception { | ||||
|                 String[] words = value.split("\t"); | ||||
|                 for (String word : words) { | ||||
|                     out.collect(new Tuple2<>(word, 1L)); | ||||
|                 } | ||||
|             } | ||||
|         }).keyBy(0).timeWindow(Time.seconds(3)).sum(1).print(); | ||||
|         env.execute("Flink Streaming"); | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,18 @@ | ||||
| 1572501901000,hadoop | ||||
| 1572501902000,hadoop | ||||
| 1572501903000,hadoop | ||||
| 1572501904000,flink | ||||
| 1572501905000,spark | ||||
| 1572501906000,spark ->  (1572501901000,hadoop,2) | ||||
| 1572501907000,hive | ||||
| 1572501908000,hive | ||||
| 1572501909000,hive  ->  (1572501903000,hadoop,1) | ||||
|                         (1572501905000,spark ,1) | ||||
|                         (1572501904000,flink,1) | ||||
| 1572501910000,spark | ||||
| 1572501911000,storm | ||||
| 1572501912000,storm ->  (1572501906000,spark,1) | ||||
|                         (1572501907000,hive,2) | ||||
| 1572501915000,yarn  ->  (1572501911000,storm,1) | ||||
|                         (1572501909000,hive,1) | ||||
|                         (1572501910000,spark,1) | ||||
		Reference in New Issue
	
	Block a user