addflink
This commit is contained in:
@ -20,7 +20,7 @@ public class KafkaStreamingJob {
|
||||
|
||||
// 1.指定Kafka的相关配置属性
|
||||
Properties properties = new Properties();
|
||||
properties.setProperty("bootstrap.servers", "192.168.200.0:9092");
|
||||
properties.setProperty("bootstrap.servers", "192.168.0.229:9092");
|
||||
|
||||
// 2.接收Kafka上的数据
|
||||
DataStream<String> stream = env
|
||||
|
@ -1,232 +0,0 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.heibaiying</groupId>
|
||||
<artifactId>flink-time-watermark</artifactId>
|
||||
<version>1.0</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>Flink Quickstart Job</name>
|
||||
<url>http://www.myorganization.org</url>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<flink.version>1.9.0</flink.version>
|
||||
<java.version>1.8</java.version>
|
||||
<scala.binary.version>2.11</scala.binary.version>
|
||||
<maven.compiler.source>${java.version}</maven.compiler.source>
|
||||
<maven.compiler.target>${java.version}</maven.compiler.target>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>apache.snapshots</id>
|
||||
<name>Apache Development Snapshot Repository</name>
|
||||
<url>https://repository.apache.org/content/repositories/snapshots/</url>
|
||||
<releases>
|
||||
<enabled>false</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>true</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<dependencies>
|
||||
<!-- Apache Flink dependencies -->
|
||||
<!-- These dependencies are provided, because they should not be packaged into the JAR file. -->
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-java</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Add connector dependencies here. They must be in the default scope (compile). -->
|
||||
|
||||
<!-- Example:
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-connector-kafka-0.10_${scala.binary.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
-->
|
||||
|
||||
<!-- Add logging framework, to produce console output when running in the IDE. -->
|
||||
<!-- These dependencies are excluded from the application JAR by default. -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<version>1.7.7</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.17</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
|
||||
<!-- Java Compiler -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.1</version>
|
||||
<configuration>
|
||||
<source>${java.version}</source>
|
||||
<target>${java.version}</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!-- We use the maven-shade plugin to create a fat jar that contains all necessary dependencies. -->
|
||||
<!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
<executions>
|
||||
<!-- Run shade goal on package phase -->
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<artifactSet>
|
||||
<excludes>
|
||||
<exclude>org.apache.flink:force-shading</exclude>
|
||||
<exclude>com.google.code.findbugs:jsr305</exclude>
|
||||
<exclude>org.slf4j:*</exclude>
|
||||
<exclude>log4j:*</exclude>
|
||||
</excludes>
|
||||
</artifactSet>
|
||||
<filters>
|
||||
<filter>
|
||||
<!-- Do not copy the signatures in the META-INF folder.
|
||||
Otherwise, this might cause SecurityExceptions when using the JAR. -->
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>com.heibaiying.WaterMarkJob</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
|
||||
<!-- This improves the out-of-the-box experience in Eclipse by resolving some warnings. -->
|
||||
<plugin>
|
||||
<groupId>org.eclipse.m2e</groupId>
|
||||
<artifactId>lifecycle-mapping</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<configuration>
|
||||
<lifecycleMappingMetadata>
|
||||
<pluginExecutions>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<versionRange>[3.0.0,)</versionRange>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore/>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<versionRange>[3.1,)</versionRange>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore/>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
</pluginExecutions>
|
||||
</lifecycleMappingMetadata>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
<!-- This profile helps to make things run out of the box in IntelliJ -->
|
||||
<!-- Its adds Flink's core classes to the runtime class path. -->
|
||||
<!-- Otherwise they are missing in IntelliJ, because the dependency is 'provided' -->
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>add-dependencies-for-IDEA</id>
|
||||
|
||||
<activation>
|
||||
<property>
|
||||
<name>idea.version</name>
|
||||
</property>
|
||||
</activation>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-java</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
</profiles>
|
||||
|
||||
</project>
|
@ -1,52 +0,0 @@
|
||||
package com.heibaiying;
|
||||
|
||||
import org.apache.flink.api.common.functions.MapFunction;
|
||||
import org.apache.flink.api.java.tuple.Tuple3;
|
||||
import org.apache.flink.streaming.api.TimeCharacteristic;
|
||||
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
|
||||
import org.apache.flink.streaming.api.watermark.Watermark;
|
||||
import org.apache.flink.streaming.api.windowing.time.Time;
|
||||
|
||||
public class PeriodicWatermarksJob {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
// 设置并行度为1
|
||||
env.setParallelism(1);
|
||||
// 设置以事件时间为基准
|
||||
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
|
||||
DataStreamSource<String> streamSource = env.socketTextStream("192.168.200.229", 8888, "\n", 3);
|
||||
streamSource.map(new MapFunction<String, Tuple3<Long, String, Long>>() {
|
||||
@Override
|
||||
public Tuple3<Long, String, Long> map(String value) throws Exception {
|
||||
String[] split = value.split(",");
|
||||
return new Tuple3<>(Long.valueOf(split[0]), split[1], 1L);
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessGenerator())
|
||||
.keyBy(1).timeWindow(Time.seconds(3)).sum(2).print();
|
||||
env.execute();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class BoundedOutOfOrdernessGenerator implements AssignerWithPeriodicWatermarks<Tuple3<Long, String, Long>> {
|
||||
|
||||
private final long maxOutOfOrderness = 3000L;
|
||||
private long currentMaxTimestamp = 0L;
|
||||
|
||||
@Override
|
||||
public long extractTimestamp(Tuple3<Long, String, Long> element, long previousElementTimestamp) {
|
||||
long timestamp = element.f0;
|
||||
currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Watermark getCurrentWatermark() {
|
||||
return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
|
||||
}
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
package com.heibaiying;
|
||||
|
||||
import org.apache.flink.api.common.functions.MapFunction;
|
||||
import org.apache.flink.api.java.tuple.Tuple3;
|
||||
import org.apache.flink.streaming.api.TimeCharacteristic;
|
||||
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
|
||||
import org.apache.flink.streaming.api.watermark.Watermark;
|
||||
import org.apache.flink.streaming.api.windowing.time.Time;
|
||||
|
||||
public class PunctuatedWatermarksJob {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
||||
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
env.setParallelism(1);
|
||||
// 设置以事件时间为基准
|
||||
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
|
||||
DataStreamSource<String> streamSource = env.socketTextStream("192.168.200.229", 8888, "\n", 3);
|
||||
streamSource.map(new MapFunction<String, Tuple3<Long, String, Long>>() {
|
||||
@Override
|
||||
public Tuple3<Long, String, Long> map(String value) throws Exception {
|
||||
String[] split = value.split(",");
|
||||
return new Tuple3<>(Long.valueOf(split[0]), split[1], 1L);
|
||||
}
|
||||
}).assignTimestampsAndWatermarks(new PunctuatedAssigner())
|
||||
.keyBy(1).timeWindow(Time.seconds(3)).sum(2).print();
|
||||
env.execute();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
class PunctuatedAssigner implements AssignerWithPunctuatedWatermarks<Tuple3<Long, String, Long>> {
|
||||
|
||||
@Override
|
||||
public long extractTimestamp(Tuple3<Long, String, Long> element, long previousElementTimestamp) {
|
||||
return element.f0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Watermark checkAndGetNextWatermark(Tuple3<Long, String, Long> lastElement, long extractedTimestamp) {
|
||||
return new Watermark(extractedTimestamp);
|
||||
}
|
||||
}
|
||||
|
@ -1,23 +0,0 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
log4j.rootLogger=INFO, console
|
||||
|
||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
@ -1,18 +0,0 @@
|
||||
1572501901000,hadoop
|
||||
1572501902000,hadoop
|
||||
1572501903000,hadoop
|
||||
1572501904000,flink
|
||||
1572501905000,spark
|
||||
1572501906000,spark -> (1572501901000,hadoop,2)
|
||||
1572501907000,hive
|
||||
1572501908000,hive
|
||||
1572501909000,hive -> (1572501903000,hadoop,1)
|
||||
(1572501905000,spark ,1)
|
||||
(1572501904000,flink,1)
|
||||
1572501910000,spark
|
||||
1572501911000,storm
|
||||
1572501912000,storm -> (1572501906000,spark,1)
|
||||
(1572501907000,hive,2)
|
||||
1572501915000,yarn -> (1572501911000,storm,1)
|
||||
(1572501909000,hive,1)
|
||||
(1572501910000,spark,1)
|
Reference in New Issue
Block a user