flink状态管理
This commit is contained in:
237
code/Flink/flink-state-management/pom.xml
Normal file
237
code/Flink/flink-state-management/pom.xml
Normal file
@ -0,0 +1,237 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>com.heibaiying</groupId>
|
||||
<artifactId>flink-state-management</artifactId>
|
||||
<version>1.0</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>Flink Quickstart Job</name>
|
||||
<url>http://www.myorganization.org</url>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<flink.version>1.9.0</flink.version>
|
||||
<java.version>1.8</java.version>
|
||||
<scala.binary.version>2.11</scala.binary.version>
|
||||
<maven.compiler.source>${java.version}</maven.compiler.source>
|
||||
<maven.compiler.target>${java.version}</maven.compiler.target>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>apache.snapshots</id>
|
||||
<name>Apache Development Snapshot Repository</name>
|
||||
<url>https://repository.apache.org/content/repositories/snapshots/</url>
|
||||
<releases>
|
||||
<enabled>false</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>true</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<dependencies>
|
||||
<!-- Apache Flink dependencies -->
|
||||
<!-- These dependencies are provided, because they should not be packaged into the JAR file. -->
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-java</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>provided</scope>
|
||||
</dependency>
|
||||
|
||||
<!-- Add connector dependencies here. They must be in the default scope (compile). -->
|
||||
|
||||
<!-- Example:
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-connector-kafka-0.10_${scala.binary.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
</dependency>
|
||||
-->
|
||||
|
||||
<!-- Add logging framework, to produce console output when running in the IDE. -->
|
||||
<!-- These dependencies are excluded from the application JAR by default. -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<version>1.7.7</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.17</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-statebackend-rocksdb_2.11</artifactId>
|
||||
<version>1.9.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
|
||||
<!-- Java Compiler -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.1</version>
|
||||
<configuration>
|
||||
<source>${java.version}</source>
|
||||
<target>${java.version}</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!-- We use the maven-shade plugin to create a fat jar that contains all necessary dependencies. -->
|
||||
<!-- Change the value of <mainClass>...</mainClass> if your program entry point changes. -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.0.0</version>
|
||||
<executions>
|
||||
<!-- Run shade goal on package phase -->
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<artifactSet>
|
||||
<excludes>
|
||||
<exclude>org.apache.flink:force-shading</exclude>
|
||||
<exclude>com.google.code.findbugs:jsr305</exclude>
|
||||
<exclude>org.slf4j:*</exclude>
|
||||
<exclude>log4j:*</exclude>
|
||||
</excludes>
|
||||
</artifactSet>
|
||||
<filters>
|
||||
<filter>
|
||||
<!-- Do not copy the signatures in the META-INF folder.
|
||||
Otherwise, this might cause SecurityExceptions when using the JAR. -->
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/*.SF</exclude>
|
||||
<exclude>META-INF/*.DSA</exclude>
|
||||
<exclude>META-INF/*.RSA</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<mainClass>com.heibaiying.keyedstate.KeyedStateJob</mainClass>
|
||||
</transformer>
|
||||
</transformers>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
|
||||
<!-- This improves the out-of-the-box experience in Eclipse by resolving some warnings. -->
|
||||
<plugin>
|
||||
<groupId>org.eclipse.m2e</groupId>
|
||||
<artifactId>lifecycle-mapping</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<configuration>
|
||||
<lifecycleMappingMetadata>
|
||||
<pluginExecutions>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<versionRange>[3.0.0,)</versionRange>
|
||||
<goals>
|
||||
<goal>shade</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore/>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<versionRange>[3.1,)</versionRange>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore/>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
</pluginExecutions>
|
||||
</lifecycleMappingMetadata>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
<!-- This profile helps to make things run out of the box in IntelliJ -->
|
||||
<!-- Its adds Flink's core classes to the runtime class path. -->
|
||||
<!-- Otherwise they are missing in IntelliJ, because the dependency is 'provided' -->
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>add-dependencies-for-IDEA</id>
|
||||
|
||||
<activation>
|
||||
<property>
|
||||
<name>idea.version</name>
|
||||
</property>
|
||||
</activation>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-java</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.flink</groupId>
|
||||
<artifactId>flink-streaming-java_${scala.binary.version}</artifactId>
|
||||
<version>${flink.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
</profiles>
|
||||
|
||||
</project>
|
@ -0,0 +1,24 @@
|
||||
package com.heibaiying.keyedstate;
|
||||
|
||||
import org.apache.flink.api.java.tuple.Tuple2;
|
||||
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
|
||||
|
||||
public class KeyedStateJob {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
DataStreamSource<Tuple2<String, Long>> tuple2DataStreamSource = env.fromElements(
|
||||
Tuple2.of("a", 50L), Tuple2.of("a", 80L), Tuple2.of("a", 400L),
|
||||
Tuple2.of("a", 100L), Tuple2.of("a", 200L), Tuple2.of("a", 200L),
|
||||
Tuple2.of("b", 100L), Tuple2.of("b", 200L), Tuple2.of("b", 200L),
|
||||
Tuple2.of("b", 500L), Tuple2.of("b", 600L), Tuple2.of("b", 700L));
|
||||
tuple2DataStreamSource
|
||||
.keyBy(0)
|
||||
.flatMap(new ThresholdWarning(100L, 3))
|
||||
.printToErr();
|
||||
env.execute("Managed Keyed State");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
package com.heibaiying.keyedstate;
|
||||
|
||||
import org.apache.flink.api.common.functions.RichFlatMapFunction;
|
||||
import org.apache.flink.api.common.state.ListState;
|
||||
import org.apache.flink.api.common.state.ListStateDescriptor;
|
||||
import org.apache.flink.api.java.tuple.Tuple2;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
|
||||
import org.apache.flink.util.Collector;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ThresholdWarning extends RichFlatMapFunction<Tuple2<String, Long>, Tuple2<String, List<Long>>> {
|
||||
|
||||
// 通过ListState来存储非正常数据的状态
|
||||
private transient ListState<Long> abnormalData;
|
||||
// 需要监控阈值
|
||||
private Long threshold;
|
||||
// 达到阈值多少次后触发报警
|
||||
private Integer numberOfTimes;
|
||||
|
||||
ThresholdWarning(Long threshold, Integer numberOfTimes) {
|
||||
this.threshold = threshold;
|
||||
this.numberOfTimes = numberOfTimes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(Configuration parameters) {
|
||||
// 通过状态名称(句柄)获取状态实例,如果不存在则会自动创建
|
||||
abnormalData = getRuntimeContext().getListState(new ListStateDescriptor<>("abnormalData", Long.class));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flatMap(Tuple2<String, Long> value, Collector<Tuple2<String, List<Long>>> out) throws Exception {
|
||||
Long inputValue = value.f1;
|
||||
// 如果输入值超过阈值,则记录该次不正常的数据信息
|
||||
if (inputValue >= threshold) {
|
||||
abnormalData.add(inputValue);
|
||||
}
|
||||
ArrayList<Long> list = Lists.newArrayList(abnormalData.get().iterator());
|
||||
// 如果不正常的数据出现达到一定次数,则输出报警信息
|
||||
if (list.size() >= numberOfTimes) {
|
||||
out.collect(Tuple2.of(value.f0 + " 超过指定阈值 ", list));
|
||||
// 报警信息输出后,清空暂存的状态
|
||||
abnormalData.clear();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
package com.heibaiying.keyedstate;
|
||||
|
||||
import org.apache.flink.api.common.functions.RichFlatMapFunction;
|
||||
import org.apache.flink.api.common.state.ListState;
|
||||
import org.apache.flink.api.common.state.ListStateDescriptor;
|
||||
import org.apache.flink.api.common.state.StateTtlConfig;
|
||||
import org.apache.flink.api.common.time.Time;
|
||||
import org.apache.flink.api.java.tuple.Tuple2;
|
||||
import org.apache.flink.configuration.Configuration;
|
||||
import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
|
||||
import org.apache.flink.util.Collector;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ThresholdWarningWithTTL extends RichFlatMapFunction<Tuple2<String, Long>, Tuple2<String, List<Long>>> {
|
||||
|
||||
private transient ListState<Long> abnormalData;
|
||||
private Long threshold;
|
||||
private Integer numberOfTimes;
|
||||
|
||||
ThresholdWarningWithTTL(Long threshold, Integer numberOfTimes) {
|
||||
this.threshold = threshold;
|
||||
this.numberOfTimes = numberOfTimes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void open(Configuration parameters) {
|
||||
StateTtlConfig ttlConfig = StateTtlConfig
|
||||
// 设置有效期为 10 秒
|
||||
.newBuilder(Time.seconds(10))
|
||||
// 设置有效期更新规则,这里设置为当创建和写入时,都重置其有效期到规定的10秒
|
||||
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)
|
||||
// 设置只要值过期就不可见,另外一个可选值是 ReturnExpiredIfNotCleanedUp,代表即使值过期了,但如果还没有被删除,就是可见的
|
||||
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)
|
||||
.build();
|
||||
ListStateDescriptor<Long> descriptor = new ListStateDescriptor<>("abnormalData", Long.class);
|
||||
descriptor.enableTimeToLive(ttlConfig);
|
||||
this.abnormalData = getRuntimeContext().getListState(descriptor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flatMap(Tuple2<String, Long> value, Collector<Tuple2<String, List<Long>>> out) throws Exception {
|
||||
Long inputValue = value.f1;
|
||||
if (inputValue >= threshold) {
|
||||
abnormalData.add(inputValue);
|
||||
}
|
||||
ArrayList<Long> list = Lists.newArrayList(abnormalData.get().iterator());
|
||||
if (list.size() >= numberOfTimes) {
|
||||
out.collect(Tuple2.of(value.f0 + " 超过指定阈值 ", list));
|
||||
abnormalData.clear();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
package com.heibaiying.operatorstate;
|
||||
|
||||
import org.apache.flink.api.java.tuple.Tuple2;
|
||||
import org.apache.flink.streaming.api.datastream.DataStreamSource;
|
||||
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
|
||||
|
||||
|
||||
public class OperatorStateJob {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
|
||||
// 开启检查点机制
|
||||
env.enableCheckpointing(1000);
|
||||
// 设置并行度为1
|
||||
DataStreamSource<Tuple2<String, Long>> tuple2DataStreamSource = env.setParallelism(1).fromElements(
|
||||
Tuple2.of("a", 50L), Tuple2.of("a", 80L), Tuple2.of("a", 400L),
|
||||
Tuple2.of("a", 100L), Tuple2.of("a", 200L), Tuple2.of("a", 200L),
|
||||
Tuple2.of("b", 100L), Tuple2.of("b", 200L), Tuple2.of("b", 200L),
|
||||
Tuple2.of("b", 500L), Tuple2.of("b", 600L), Tuple2.of("b", 700L));
|
||||
tuple2DataStreamSource
|
||||
.flatMap(new ThresholdWarning(100L, 3))
|
||||
.printToErr();
|
||||
env.execute("Managed Keyed State");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,72 @@
|
||||
package com.heibaiying.operatorstate;
|
||||
|
||||
import org.apache.flink.api.common.functions.RichFlatMapFunction;
|
||||
import org.apache.flink.api.common.state.ListState;
|
||||
import org.apache.flink.api.common.state.ListStateDescriptor;
|
||||
import org.apache.flink.api.common.typeinfo.TypeHint;
|
||||
import org.apache.flink.api.common.typeinfo.TypeInformation;
|
||||
import org.apache.flink.api.java.tuple.Tuple2;
|
||||
import org.apache.flink.runtime.state.FunctionInitializationContext;
|
||||
import org.apache.flink.runtime.state.FunctionSnapshotContext;
|
||||
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
|
||||
import org.apache.flink.util.Collector;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ThresholdWarning extends RichFlatMapFunction<Tuple2<String, Long>, Tuple2<String, List<Tuple2<String, Long>>>> implements CheckpointedFunction {
|
||||
|
||||
// 非正常数据
|
||||
private List<Tuple2<String, Long>> bufferedData;
|
||||
// checkPointedState
|
||||
private transient ListState<Tuple2<String, Long>> checkPointedState;
|
||||
// 需要监控的阈值
|
||||
private Long threshold;
|
||||
// 次数
|
||||
private Integer numberOfTimes;
|
||||
|
||||
ThresholdWarning(Long threshold, Integer numberOfTimes) {
|
||||
this.threshold = threshold;
|
||||
this.numberOfTimes = numberOfTimes;
|
||||
this.bufferedData = new ArrayList<>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initializeState(FunctionInitializationContext context) throws Exception {
|
||||
// 注意这里获取的是OperatorStateStore
|
||||
checkPointedState = context.getOperatorStateStore().getListState(new ListStateDescriptor<>("abnormalData",
|
||||
TypeInformation.of(new TypeHint<Tuple2<String, Long>>() {
|
||||
})));
|
||||
// 如果发生重启,则需要从快照中将状态进行恢复
|
||||
if (context.isRestored()) {
|
||||
for (Tuple2<String, Long> element : checkPointedState.get()) {
|
||||
bufferedData.add(element);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flatMap(Tuple2<String, Long> value, Collector<Tuple2<String, List<Tuple2<String, Long>>>> out) {
|
||||
Long inputValue = value.f1;
|
||||
// 超过阈值则进行记录
|
||||
if (inputValue >= threshold) {
|
||||
bufferedData.add(value);
|
||||
}
|
||||
// 超过指定次数则输出报警信息
|
||||
if (bufferedData.size() >= numberOfTimes) {
|
||||
// 顺便输出状态实例的hashcode
|
||||
out.collect(Tuple2.of(checkPointedState.hashCode() + "阈值警报!", bufferedData));
|
||||
bufferedData.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void snapshotState(FunctionSnapshotContext context) throws Exception {
|
||||
// 在进行快照时,将数据存储到checkPointedState
|
||||
checkPointedState.clear();
|
||||
for (Tuple2<String, Long> element : bufferedData) {
|
||||
checkPointedState.add(element);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
################################################################################
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
################################################################################
|
||||
|
||||
log4j.rootLogger=INFO, console
|
||||
|
||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
Reference in New Issue
Block a user