mirror of
https://github.com/didi/KnowStreaming.git
synced 2026-01-02 02:02:13 +08:00
Add km module kafka
This commit is contained in:
1
streams/.gitignore
vendored
Normal file
1
streams/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
/bin/
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.pageview;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
|
||||
/**
|
||||
* A timestamp extractor implementation that tries to extract event time from
|
||||
* the "timestamp" field in the Json formatted message.
|
||||
*/
|
||||
public class JsonTimestampExtractor implements TimestampExtractor {
|
||||
|
||||
@Override
|
||||
public long extract(final ConsumerRecord<Object, Object> record, final long partitionTime) {
|
||||
if (record.value() instanceof PageViewTypedDemo.PageView) {
|
||||
return ((PageViewTypedDemo.PageView) record.value()).timestamp;
|
||||
}
|
||||
|
||||
if (record.value() instanceof PageViewTypedDemo.UserProfile) {
|
||||
return ((PageViewTypedDemo.UserProfile) record.value()).timestamp;
|
||||
}
|
||||
|
||||
if (record.value() instanceof JsonNode) {
|
||||
return ((JsonNode) record.value()).get("timestamp").longValue();
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("JsonTimestampExtractor cannot recognize the record value " + record.value());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,249 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.pageview;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonSubTypes;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeInfo;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import java.time.Duration;
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.kafka.common.errors.SerializationException;
|
||||
import org.apache.kafka.common.serialization.Deserializer;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.kstream.Consumed;
|
||||
import org.apache.kafka.streams.kstream.Grouped;
|
||||
import org.apache.kafka.streams.kstream.KStream;
|
||||
import org.apache.kafka.streams.kstream.KTable;
|
||||
import org.apache.kafka.streams.kstream.TimeWindows;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* Demonstrates how to perform a join between a KStream and a KTable, i.e. an example of a stateful computation,
|
||||
* using specific data types (here: JSON POJO; but can also be Avro specific bindings, etc.) for serdes
|
||||
* in Kafka Streams.
|
||||
*
|
||||
* In this example, we join a stream of pageviews (aka clickstreams) that reads from a topic named "streams-pageview-input"
|
||||
* with a user profile table that reads from a topic named "streams-userprofile-input", where the data format
|
||||
* is JSON string representing a record in the stream or table, to compute the number of pageviews per user region.
|
||||
*
|
||||
* Before running this example you must create the input topics and the output topic (e.g. via
|
||||
* bin/kafka-topics --create ...), and write some data to the input topics (e.g. via
|
||||
* bin/kafka-console-producer). Otherwise you won't see any data arriving in the output topic.
|
||||
*
|
||||
* The inputs for this example are:
|
||||
* - Topic: streams-pageview-input
|
||||
* Key Format: (String) USER_ID
|
||||
* Value Format: (JSON) {"_t": "pv", "user": (String USER_ID), "page": (String PAGE_ID), "timestamp": (long ms TIMESTAMP)}
|
||||
*
|
||||
* - Topic: streams-userprofile-input
|
||||
* Key Format: (String) USER_ID
|
||||
* Value Format: (JSON) {"_t": "up", "region": (String REGION), "timestamp": (long ms TIMESTAMP)}
|
||||
*
|
||||
* To observe the results, read the output topic (e.g., via bin/kafka-console-consumer)
|
||||
* - Topic: streams-pageviewstats-typed-output
|
||||
* Key Format: (JSON) {"_t": "wpvbr", "windowStart": (long ms WINDOW_TIMESTAMP), "region": (String REGION)}
|
||||
* Value Format: (JSON) {"_t": "rc", "count": (long REGION_COUNT), "region": (String REGION)}
|
||||
*
|
||||
* Note, the "_t" field is necessary to help Jackson identify the correct class for deserialization in the
|
||||
* generic {@link JSONSerde}. If you instead specify a specific serde per class, you won't need the extra "_t" field.
|
||||
*/
|
||||
@SuppressWarnings({"WeakerAccess", "unused"})
|
||||
public class PageViewTypedDemo {
|
||||
|
||||
/**
|
||||
* A serde for any class that implements {@link JSONSerdeCompatible}. Note that the classes also need to
|
||||
* be registered in the {@code @JsonSubTypes} annotation on {@link JSONSerdeCompatible}.
|
||||
*
|
||||
* @param <T> The concrete type of the class that gets de/serialized
|
||||
*/
|
||||
public static class JSONSerde<T extends JSONSerdeCompatible> implements Serializer<T>, Deserializer<T>, Serde<T> {
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
@Override
|
||||
public void configure(final Map<String, ?> configs, final boolean isKey) {}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public T deserialize(final String topic, final byte[] data) {
|
||||
if (data == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return (T) OBJECT_MAPPER.readValue(data, JSONSerdeCompatible.class);
|
||||
} catch (final IOException e) {
|
||||
throw new SerializationException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] serialize(final String topic, final T data) {
|
||||
if (data == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return OBJECT_MAPPER.writeValueAsBytes(data);
|
||||
} catch (final Exception e) {
|
||||
throw new SerializationException("Error serializing JSON message", e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
|
||||
@Override
|
||||
public Serializer<T> serializer() {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Deserializer<T> deserializer() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* An interface for registering types that can be de/serialized with {@link JSONSerde}.
|
||||
*/
|
||||
@SuppressWarnings("DefaultAnnotationParam") // being explicit for the example
|
||||
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "_t")
|
||||
@JsonSubTypes({
|
||||
@JsonSubTypes.Type(value = PageView.class, name = "pv"),
|
||||
@JsonSubTypes.Type(value = UserProfile.class, name = "up"),
|
||||
@JsonSubTypes.Type(value = PageViewByRegion.class, name = "pvbr"),
|
||||
@JsonSubTypes.Type(value = WindowedPageViewByRegion.class, name = "wpvbr"),
|
||||
@JsonSubTypes.Type(value = RegionCount.class, name = "rc")
|
||||
})
|
||||
public interface JSONSerdeCompatible {
|
||||
|
||||
}
|
||||
|
||||
// POJO classes
|
||||
static public class PageView implements JSONSerdeCompatible {
|
||||
public String user;
|
||||
public String page;
|
||||
public Long timestamp;
|
||||
}
|
||||
|
||||
static public class UserProfile implements JSONSerdeCompatible {
|
||||
public String region;
|
||||
public Long timestamp;
|
||||
}
|
||||
|
||||
static public class PageViewByRegion implements JSONSerdeCompatible {
|
||||
public String user;
|
||||
public String page;
|
||||
public String region;
|
||||
}
|
||||
|
||||
static public class WindowedPageViewByRegion implements JSONSerdeCompatible {
|
||||
public long windowStart;
|
||||
public String region;
|
||||
}
|
||||
|
||||
static public class RegionCount implements JSONSerdeCompatible {
|
||||
public long count;
|
||||
public String region;
|
||||
}
|
||||
|
||||
public static void main(final String[] args) {
|
||||
final Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-typed");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
|
||||
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, JSONSerde.class);
|
||||
props.put(StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS, JSONSerde.class);
|
||||
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, JSONSerde.class);
|
||||
props.put(StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS, JSONSerde.class);
|
||||
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
|
||||
props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
|
||||
|
||||
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
|
||||
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
|
||||
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
|
||||
final KStream<String, PageView> views = builder.stream("streams-pageview-input", Consumed.with(Serdes.String(), new JSONSerde<>()));
|
||||
|
||||
final KTable<String, UserProfile> users = builder.table("streams-userprofile-input", Consumed.with(Serdes.String(), new JSONSerde<>()));
|
||||
|
||||
final KStream<WindowedPageViewByRegion, RegionCount> regionCount = views
|
||||
.leftJoin(users, (view, profile) -> {
|
||||
final PageViewByRegion viewByRegion = new PageViewByRegion();
|
||||
viewByRegion.user = view.user;
|
||||
viewByRegion.page = view.page;
|
||||
|
||||
if (profile != null) {
|
||||
viewByRegion.region = profile.region;
|
||||
} else {
|
||||
viewByRegion.region = "UNKNOWN";
|
||||
}
|
||||
return viewByRegion;
|
||||
})
|
||||
.map((user, viewRegion) -> new KeyValue<>(viewRegion.region, viewRegion))
|
||||
.groupByKey(Grouped.with(Serdes.String(), new JSONSerde<>()))
|
||||
.windowedBy(TimeWindows.of(Duration.ofDays(7)).advanceBy(Duration.ofSeconds(1)))
|
||||
.count()
|
||||
.toStream()
|
||||
.map((key, value) -> {
|
||||
final WindowedPageViewByRegion wViewByRegion = new WindowedPageViewByRegion();
|
||||
wViewByRegion.windowStart = key.window().start();
|
||||
wViewByRegion.region = key.key();
|
||||
|
||||
final RegionCount rCount = new RegionCount();
|
||||
rCount.region = key.key();
|
||||
rCount.count = value;
|
||||
|
||||
return new KeyValue<>(wViewByRegion, rCount);
|
||||
});
|
||||
|
||||
// write to the result topic
|
||||
regionCount.to("streams-pageviewstats-typed-output");
|
||||
|
||||
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// attach shutdown handler to catch control-c
|
||||
Runtime.getRuntime().addShutdownHook(new Thread("streams-pipe-shutdown-hook") {
|
||||
@Override
|
||||
public void run() {
|
||||
streams.close();
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
streams.start();
|
||||
latch.await();
|
||||
} catch (final Throwable e) {
|
||||
e.printStackTrace();
|
||||
System.exit(1);
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.pageview;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import java.time.Duration;
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.kafka.common.serialization.Deserializer;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
import org.apache.kafka.connect.json.JsonDeserializer;
|
||||
import org.apache.kafka.connect.json.JsonSerializer;
|
||||
import org.apache.kafka.streams.kstream.Consumed;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.kstream.Grouped;
|
||||
import org.apache.kafka.streams.kstream.KStream;
|
||||
import org.apache.kafka.streams.kstream.KTable;
|
||||
import org.apache.kafka.streams.kstream.Produced;
|
||||
import org.apache.kafka.streams.kstream.TimeWindows;
|
||||
|
||||
import java.util.Properties;
|
||||
|
||||
/**
|
||||
* Demonstrates how to perform a join between a KStream and a KTable, i.e. an example of a stateful computation,
|
||||
* using general data types (here: JSON; but can also be Avro generic bindings, etc.) for serdes
|
||||
* in Kafka Streams.
|
||||
*
|
||||
* In this example, we join a stream of pageviews (aka clickstreams) that reads from a topic named "streams-pageview-input"
|
||||
* with a user profile table that reads from a topic named "streams-userprofile-input", where the data format
|
||||
* is JSON string representing a record in the stream or table, to compute the number of pageviews per user region.
|
||||
*
|
||||
* Before running this example you must create the input topics and the output topic (e.g. via
|
||||
* bin/kafka-topics.sh --create ...), and write some data to the input topics (e.g. via
|
||||
* bin/kafka-console-producer.sh). Otherwise you won't see any data arriving in the output topic.
|
||||
*/
|
||||
public class PageViewUntypedDemo {
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-untyped");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
|
||||
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
|
||||
|
||||
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
|
||||
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
|
||||
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
|
||||
final Serializer<JsonNode> jsonSerializer = new JsonSerializer();
|
||||
final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer();
|
||||
final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer);
|
||||
|
||||
final Consumed<String, JsonNode> consumed = Consumed.with(Serdes.String(), jsonSerde);
|
||||
final KStream<String, JsonNode> views = builder.stream("streams-pageview-input", consumed);
|
||||
|
||||
final KTable<String, JsonNode> users = builder.table("streams-userprofile-input", consumed);
|
||||
|
||||
final KTable<String, String> userRegions = users.mapValues(record -> record.get("region").textValue());
|
||||
|
||||
final KStream<JsonNode, JsonNode> regionCount = views
|
||||
.leftJoin(userRegions, (view, region) -> {
|
||||
final ObjectNode jNode = JsonNodeFactory.instance.objectNode();
|
||||
return (JsonNode) jNode.put("user", view.get("user").textValue())
|
||||
.put("page", view.get("page").textValue())
|
||||
.put("region", region == null ? "UNKNOWN" : region);
|
||||
|
||||
})
|
||||
.map((user, viewRegion) -> new KeyValue<>(viewRegion.get("region").textValue(), viewRegion))
|
||||
.groupByKey(Grouped.with(Serdes.String(), jsonSerde))
|
||||
.windowedBy(TimeWindows.of(Duration.ofDays(7)).advanceBy(Duration.ofSeconds(1)))
|
||||
.count()
|
||||
.toStream()
|
||||
.map((key, value) -> {
|
||||
final ObjectNode keyNode = JsonNodeFactory.instance.objectNode();
|
||||
keyNode.put("window-start", key.window().start())
|
||||
.put("region", key.key());
|
||||
|
||||
final ObjectNode valueNode = JsonNodeFactory.instance.objectNode();
|
||||
valueNode.put("count", value);
|
||||
|
||||
return new KeyValue<>((JsonNode) keyNode, (JsonNode) valueNode);
|
||||
});
|
||||
|
||||
// write to the result topic
|
||||
regionCount.to("streams-pageviewstats-untyped-output", Produced.with(jsonSerde, jsonSerde));
|
||||
|
||||
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
|
||||
streams.start();
|
||||
|
||||
// usually the stream application would be running forever,
|
||||
// in this example we just let it run for some time and stop since the input data is finite.
|
||||
Thread.sleep(5000L);
|
||||
|
||||
streams.close();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.pipe;
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* Demonstrates, using the high-level KStream DSL, how to read data from a source (input) topic and how to
|
||||
* write data to a sink (output) topic.
|
||||
*
|
||||
* In this example, we implement a simple "pipe" program that reads from a source topic "streams-plaintext-input"
|
||||
* and writes the data as-is (i.e. unmodified) into a sink topic "streams-pipe-output".
|
||||
*
|
||||
* Before running this example you must create the input topic and the output topic (e.g. via
|
||||
* bin/kafka-topics.sh --create ...), and write some data to the input topic (e.g. via
|
||||
* bin/kafka-console-producer.sh). Otherwise you won't see any data arriving in the output topic.
|
||||
*/
|
||||
public class PipeDemo {
|
||||
|
||||
public static void main(final String[] args) {
|
||||
final Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
|
||||
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
|
||||
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
|
||||
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
|
||||
builder.stream("streams-plaintext-input").to("streams-pipe-output");
|
||||
|
||||
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// attach shutdown handler to catch control-c
|
||||
Runtime.getRuntime().addShutdownHook(new Thread("streams-pipe-shutdown-hook") {
|
||||
@Override
|
||||
public void run() {
|
||||
streams.close();
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
streams.start();
|
||||
latch.await();
|
||||
} catch (final Throwable e) {
|
||||
System.exit(1);
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.temperature;
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.kstream.KStream;
|
||||
import org.apache.kafka.streams.kstream.Produced;
|
||||
import org.apache.kafka.streams.kstream.TimeWindows;
|
||||
import org.apache.kafka.streams.kstream.Windowed;
|
||||
import org.apache.kafka.streams.kstream.WindowedSerdes;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* Demonstrates, using the high-level KStream DSL, how to implement an IoT demo application
|
||||
* which ingests temperature value processing the maximum value in the latest TEMPERATURE_WINDOW_SIZE seconds (which
|
||||
* is 5 seconds) sending a new message if it exceeds the TEMPERATURE_THRESHOLD (which is 20)
|
||||
*
|
||||
* In this example, the input stream reads from a topic named "iot-temperature", where the values of messages
|
||||
* represent temperature values; using a TEMPERATURE_WINDOW_SIZE seconds "tumbling" window, the maximum value is processed and
|
||||
* sent to a topic named "iot-temperature-max" if it exceeds the TEMPERATURE_THRESHOLD.
|
||||
*
|
||||
* Before running this example you must create the input topic for temperature values in the following way :
|
||||
*
|
||||
* bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic iot-temperature
|
||||
*
|
||||
* and at same time the output topic for filtered values :
|
||||
*
|
||||
* bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic iot-temperature-max
|
||||
*
|
||||
* After that, a console consumer can be started in order to read filtered values from the "iot-temperature-max" topic :
|
||||
*
|
||||
* bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic iot-temperature-max --from-beginning
|
||||
*
|
||||
* On the other side, a console producer can be used for sending temperature values (which needs to be integers)
|
||||
* to "iot-temperature" typing them on the console :
|
||||
*
|
||||
* bin/kafka-console-producer.sh --broker-list localhost:9092 --topic iot-temperature
|
||||
* > 10
|
||||
* > 15
|
||||
* > 22
|
||||
*/
|
||||
public class TemperatureDemo {
|
||||
|
||||
// threshold used for filtering max temperature values
|
||||
private static final int TEMPERATURE_THRESHOLD = 20;
|
||||
// window size within which the filtering is applied
|
||||
private static final int TEMPERATURE_WINDOW_SIZE = 5;
|
||||
|
||||
public static void main(final String[] args) {
|
||||
|
||||
final Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-temperature");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
|
||||
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
|
||||
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
|
||||
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
|
||||
final KStream<String, String> source = builder.stream("iot-temperature");
|
||||
|
||||
final KStream<Windowed<String>, String> max = source
|
||||
// temperature values are sent without a key (null), so in order
|
||||
// to group and reduce them, a key is needed ("temp" has been chosen)
|
||||
.selectKey((key, value) -> "temp")
|
||||
.groupByKey()
|
||||
.windowedBy(TimeWindows.of(Duration.ofSeconds(TEMPERATURE_WINDOW_SIZE)))
|
||||
.reduce((value1, value2) -> {
|
||||
if (Integer.parseInt(value1) > Integer.parseInt(value2)) {
|
||||
return value1;
|
||||
} else {
|
||||
return value2;
|
||||
}
|
||||
})
|
||||
.toStream()
|
||||
.filter((key, value) -> Integer.parseInt(value) > TEMPERATURE_THRESHOLD);
|
||||
|
||||
final Serde<Windowed<String>> windowedSerde = WindowedSerdes.timeWindowedSerdeFrom(String.class);
|
||||
|
||||
// need to override key serde to Windowed<String> type
|
||||
max.to("iot-temperature-max", Produced.with(windowedSerde, Serdes.String()));
|
||||
|
||||
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// attach shutdown handler to catch control-c
|
||||
Runtime.getRuntime().addShutdownHook(new Thread("streams-temperature-shutdown-hook") {
|
||||
@Override
|
||||
public void run() {
|
||||
streams.close();
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
streams.start();
|
||||
latch.await();
|
||||
} catch (final Throwable e) {
|
||||
System.exit(1);
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.wordcount;
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.kstream.KStream;
|
||||
import org.apache.kafka.streams.kstream.KTable;
|
||||
import org.apache.kafka.streams.kstream.Produced;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* Demonstrates, using the high-level KStream DSL, how to implement the WordCount program
|
||||
* that computes a simple word occurrence histogram from an input text.
|
||||
* <p>
|
||||
* In this example, the input stream reads from a topic named "streams-plaintext-input", where the values of messages
|
||||
* represent lines of text; and the histogram output is written to topic "streams-wordcount-output" where each record
|
||||
* is an updated count of a single word.
|
||||
* <p>
|
||||
* Before running this example you must create the input topic and the output topic (e.g. via
|
||||
* {@code bin/kafka-topics.sh --create ...}), and write some data to the input topic (e.g. via
|
||||
* {@code bin/kafka-console-producer.sh}). Otherwise you won't see any data arriving in the output topic.
|
||||
*/
|
||||
public final class WordCountDemo {
|
||||
|
||||
public static final String INPUT_TOPIC = "streams-plaintext-input";
|
||||
public static final String OUTPUT_TOPIC = "streams-wordcount-output";
|
||||
|
||||
static Properties getStreamsConfig() {
|
||||
final Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
|
||||
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
|
||||
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
|
||||
|
||||
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
|
||||
// Note: To re-run the demo, you need to use the offset reset tool:
|
||||
// https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool
|
||||
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
|
||||
return props;
|
||||
}
|
||||
|
||||
static void createWordCountStream(final StreamsBuilder builder) {
|
||||
final KStream<String, String> source = builder.stream(INPUT_TOPIC);
|
||||
|
||||
final KTable<String, Long> counts = source
|
||||
.flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split(" ")))
|
||||
.groupBy((key, value) -> value)
|
||||
.count();
|
||||
|
||||
// need to override value serde to Long type
|
||||
counts.toStream().to(OUTPUT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()));
|
||||
}
|
||||
|
||||
public static void main(final String[] args) {
|
||||
final Properties props = getStreamsConfig();
|
||||
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
createWordCountStream(builder);
|
||||
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// attach shutdown handler to catch control-c
|
||||
Runtime.getRuntime().addShutdownHook(new Thread("streams-wordcount-shutdown-hook") {
|
||||
@Override
|
||||
public void run() {
|
||||
streams.close();
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
streams.start();
|
||||
latch.await();
|
||||
} catch (final Throwable e) {
|
||||
System.exit(1);
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.wordcount;
|
||||
|
||||
import java.time.Duration;
|
||||
import org.apache.kafka.clients.consumer.ConsumerConfig;
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.processor.Processor;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
import org.apache.kafka.streams.processor.ProcessorSupplier;
|
||||
import org.apache.kafka.streams.processor.PunctuationType;
|
||||
import org.apache.kafka.streams.state.KeyValueIterator;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
import org.apache.kafka.streams.state.Stores;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* Demonstrates, using the low-level Processor APIs, how to implement the WordCount program
|
||||
* that computes a simple word occurrence histogram from an input text.
|
||||
* <p>
|
||||
* <strong>Note: This is simplified code that only works correctly for single partition input topics.
|
||||
* Check out {@link WordCountDemo} for a generic example.</strong>
|
||||
* <p>
|
||||
* In this example, the input stream reads from a topic named "streams-plaintext-input", where the values of messages
|
||||
* represent lines of text; and the histogram output is written to topic "streams-wordcount-processor-output" where each record
|
||||
* is an updated count of a single word.
|
||||
* <p>
|
||||
* Before running this example you must create the input topic and the output topic (e.g. via
|
||||
* {@code bin/kafka-topics.sh --create ...}), and write some data to the input topic (e.g. via
|
||||
* {@code bin/kafka-console-producer.sh}). Otherwise you won't see any data arriving in the output topic.
|
||||
*/
|
||||
public final class WordCountProcessorDemo {
|
||||
|
||||
static class MyProcessorSupplier implements ProcessorSupplier<String, String> {
|
||||
|
||||
@Override
|
||||
public Processor<String, String> get() {
|
||||
return new Processor<String, String>() {
|
||||
private ProcessorContext context;
|
||||
private KeyValueStore<String, Integer> kvStore;
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void init(final ProcessorContext context) {
|
||||
this.context = context;
|
||||
this.context.schedule(Duration.ofSeconds(1), PunctuationType.STREAM_TIME, timestamp -> {
|
||||
try (final KeyValueIterator<String, Integer> iter = kvStore.all()) {
|
||||
System.out.println("----------- " + timestamp + " ----------- ");
|
||||
|
||||
while (iter.hasNext()) {
|
||||
final KeyValue<String, Integer> entry = iter.next();
|
||||
|
||||
System.out.println("[" + entry.key + ", " + entry.value + "]");
|
||||
|
||||
context.forward(entry.key, entry.value.toString());
|
||||
}
|
||||
}
|
||||
});
|
||||
this.kvStore = (KeyValueStore<String, Integer>) context.getStateStore("Counts");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(final String dummy, final String line) {
|
||||
final String[] words = line.toLowerCase(Locale.getDefault()).split(" ");
|
||||
|
||||
for (final String word : words) {
|
||||
final Integer oldValue = this.kvStore.get(word);
|
||||
|
||||
if (oldValue == null) {
|
||||
this.kvStore.put(word, 1);
|
||||
} else {
|
||||
this.kvStore.put(word, oldValue + 1);
|
||||
}
|
||||
}
|
||||
|
||||
context.commit();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(final String[] args) {
|
||||
final Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount-processor");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
|
||||
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
|
||||
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
|
||||
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
|
||||
|
||||
final Topology builder = new Topology();
|
||||
|
||||
builder.addSource("Source", "streams-plaintext-input");
|
||||
|
||||
builder.addProcessor("Process", new MyProcessorSupplier(), "Source");
|
||||
builder.addStateStore(Stores.keyValueStoreBuilder(
|
||||
Stores.inMemoryKeyValueStore("Counts"),
|
||||
Serdes.String(),
|
||||
Serdes.Integer()),
|
||||
"Process");
|
||||
|
||||
builder.addSink("Sink", "streams-wordcount-processor-output", "Process");
|
||||
|
||||
final KafkaStreams streams = new KafkaStreams(builder, props);
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// attach shutdown handler to catch control-c
|
||||
Runtime.getRuntime().addShutdownHook(new Thread("streams-wordcount-shutdown-hook") {
|
||||
@Override
|
||||
public void run() {
|
||||
streams.close();
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
streams.start();
|
||||
latch.await();
|
||||
} catch (final Throwable e) {
|
||||
System.exit(1);
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.docs;
|
||||
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.TopologyTestDriver;
|
||||
import org.apache.kafka.streams.processor.Processor;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
import org.apache.kafka.streams.processor.ProcessorSupplier;
|
||||
import org.apache.kafka.streams.processor.PunctuationType;
|
||||
import org.apache.kafka.streams.state.KeyValueIterator;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
import org.apache.kafka.streams.state.Stores;
|
||||
import org.apache.kafka.streams.TestInputTopic;
|
||||
import org.apache.kafka.streams.TestOutputTopic;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.Properties;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.equalTo;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.core.Is.is;
|
||||
|
||||
/**
|
||||
* This is code sample in docs/streams/developer-guide/testing.html
|
||||
*/
|
||||
|
||||
public class DeveloperGuideTesting {
|
||||
private TopologyTestDriver testDriver;
|
||||
private TestInputTopic<String, Long> inputTopic;
|
||||
private TestOutputTopic<String, Long> outputTopic;
|
||||
private KeyValueStore<String, Long> store;
|
||||
|
||||
private Serde<String> stringSerde = new Serdes.StringSerde();
|
||||
private Serde<Long> longSerde = new Serdes.LongSerde();
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
final Topology topology = new Topology();
|
||||
topology.addSource("sourceProcessor", "input-topic");
|
||||
topology.addProcessor("aggregator", new CustomMaxAggregatorSupplier(), "sourceProcessor");
|
||||
topology.addStateStore(
|
||||
Stores.keyValueStoreBuilder(
|
||||
Stores.inMemoryKeyValueStore("aggStore"),
|
||||
Serdes.String(),
|
||||
Serdes.Long()).withLoggingDisabled(), // need to disable logging to allow store pre-populating
|
||||
"aggregator");
|
||||
topology.addSink("sinkProcessor", "result-topic", "aggregator");
|
||||
|
||||
// setup test driver
|
||||
final Properties props = new Properties();
|
||||
props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "maxAggregation");
|
||||
props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234");
|
||||
props.setProperty(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
|
||||
props.setProperty(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());
|
||||
testDriver = new TopologyTestDriver(topology, props);
|
||||
|
||||
// setup test topics
|
||||
inputTopic = testDriver.createInputTopic("input-topic", stringSerde.serializer(), longSerde.serializer());
|
||||
outputTopic = testDriver.createOutputTopic("result-topic", stringSerde.deserializer(), longSerde.deserializer());
|
||||
|
||||
// pre-populate store
|
||||
store = testDriver.getKeyValueStore("aggStore");
|
||||
store.put("a", 21L);
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
testDriver.close();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void shouldFlushStoreForFirstInput() {
|
||||
inputTopic.pipeInput("a", 1L);
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
|
||||
assertThat(outputTopic.isEmpty(), is(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldNotUpdateStoreForSmallerValue() {
|
||||
inputTopic.pipeInput("a", 1L);
|
||||
assertThat(store.get("a"), equalTo(21L));
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
|
||||
assertThat(outputTopic.isEmpty(), is(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldNotUpdateStoreForLargerValue() {
|
||||
inputTopic.pipeInput("a", 42L);
|
||||
assertThat(store.get("a"), equalTo(42L));
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 42L)));
|
||||
assertThat(outputTopic.isEmpty(), is(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldUpdateStoreForNewKey() {
|
||||
inputTopic.pipeInput("b", 21L);
|
||||
assertThat(store.get("b"), equalTo(21L));
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("b", 21L)));
|
||||
assertThat(outputTopic.isEmpty(), is(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldPunctuateIfEvenTimeAdvances() {
|
||||
final Instant recordTime = Instant.now();
|
||||
inputTopic.pipeInput("a", 1L, recordTime);
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
|
||||
|
||||
inputTopic.pipeInput("a", 1L, recordTime);
|
||||
assertThat(outputTopic.isEmpty(), is(true));
|
||||
|
||||
inputTopic.pipeInput("a", 1L, recordTime.plusSeconds(10L));
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
|
||||
assertThat(outputTopic.isEmpty(), is(true));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void shouldPunctuateIfWallClockTimeAdvances() {
|
||||
testDriver.advanceWallClockTime(Duration.ofSeconds(60));
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
|
||||
assertThat(outputTopic.isEmpty(), is(true));
|
||||
}
|
||||
|
||||
public class CustomMaxAggregatorSupplier implements ProcessorSupplier<String, Long> {
|
||||
@Override
|
||||
public Processor<String, Long> get() {
|
||||
return new CustomMaxAggregator();
|
||||
}
|
||||
}
|
||||
|
||||
public class CustomMaxAggregator implements Processor<String, Long> {
|
||||
ProcessorContext context;
|
||||
private KeyValueStore<String, Long> store;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void init(final ProcessorContext context) {
|
||||
this.context = context;
|
||||
context.schedule(Duration.ofSeconds(60), PunctuationType.WALL_CLOCK_TIME, time -> flushStore());
|
||||
context.schedule(Duration.ofSeconds(10), PunctuationType.STREAM_TIME, time -> flushStore());
|
||||
store = (KeyValueStore<String, Long>) context.getStateStore("aggStore");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(final String key, final Long value) {
|
||||
final Long oldValue = store.get(key);
|
||||
if (oldValue == null || value > oldValue) {
|
||||
store.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
private void flushStore() {
|
||||
final KeyValueIterator<String, Long> it = store.all();
|
||||
while (it.hasNext()) {
|
||||
final KeyValue<String, Long> next = it.next();
|
||||
context.forward(next.key, next.value);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.wordcount;
|
||||
|
||||
import org.apache.kafka.common.serialization.LongDeserializer;
|
||||
import org.apache.kafka.common.serialization.StringDeserializer;
|
||||
import org.apache.kafka.common.serialization.StringSerializer;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.TopologyTestDriver;
|
||||
import org.apache.kafka.streams.TestInputTopic;
|
||||
import org.apache.kafka.streams.TestOutputTopic;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.equalTo;
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
|
||||
/**
|
||||
* Unit test of {@link WordCountDemo} stream using TopologyTestDriver.
|
||||
*/
|
||||
public class WordCountDemoTest {
|
||||
|
||||
private TopologyTestDriver testDriver;
|
||||
private TestInputTopic<String, String> inputTopic;
|
||||
private TestOutputTopic<String, Long> outputTopic;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
//Create Actual Stream Processing pipeline
|
||||
WordCountDemo.createWordCountStream(builder);
|
||||
testDriver = new TopologyTestDriver(builder.build(), WordCountDemo.getStreamsConfig());
|
||||
inputTopic = testDriver.createInputTopic(WordCountDemo.INPUT_TOPIC, new StringSerializer(), new StringSerializer());
|
||||
outputTopic = testDriver.createOutputTopic(WordCountDemo.OUTPUT_TOPIC, new StringDeserializer(), new LongDeserializer());
|
||||
}
|
||||
|
||||
@After
|
||||
public void tearDown() {
|
||||
try {
|
||||
testDriver.close();
|
||||
} catch (final RuntimeException e) {
|
||||
// https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it
|
||||
// Logged stacktrace cannot be avoided
|
||||
System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Simple test validating count of one word
|
||||
*/
|
||||
@Test
|
||||
public void testOneWord() {
|
||||
//Feed word "Hello" to inputTopic and no kafka key, timestamp is irrelevant in this case
|
||||
inputTopic.pipeInput("Hello");
|
||||
//Read and validate output to match word as key and count as value
|
||||
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("hello", 1L)));
|
||||
//No more output in topic
|
||||
assertThat(outputTopic.isEmpty(), is(true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test Word count of sentence list.
|
||||
*/
|
||||
@Test
|
||||
public void testCountListOfWords() {
|
||||
final List<String> inputValues = Arrays.asList(
|
||||
"Apache Kafka Streams Example",
|
||||
"Using Kafka Streams Test Utils",
|
||||
"Reading and Writing Kafka Topic"
|
||||
);
|
||||
final Map<String, Long> expectedWordCounts = new HashMap<>();
|
||||
expectedWordCounts.put("apache", 1L);
|
||||
expectedWordCounts.put("kafka", 3L);
|
||||
expectedWordCounts.put("streams", 2L);
|
||||
expectedWordCounts.put("example", 1L);
|
||||
expectedWordCounts.put("using", 1L);
|
||||
expectedWordCounts.put("test", 1L);
|
||||
expectedWordCounts.put("utils", 1L);
|
||||
expectedWordCounts.put("reading", 1L);
|
||||
expectedWordCounts.put("and", 1L);
|
||||
expectedWordCounts.put("writing", 1L);
|
||||
expectedWordCounts.put("topic", 1L);
|
||||
|
||||
inputTopic.pipeValueList(inputValues);
|
||||
final Map<String, Long> actualWordCounts = outputTopic.readKeyValuesToMap();
|
||||
assertThat(actualWordCounts, equalTo(expectedWordCounts));
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.examples.wordcount;
|
||||
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.processor.MockProcessorContext;
|
||||
import org.apache.kafka.streams.processor.Processor;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
import org.apache.kafka.streams.state.Stores;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* Demonstrate the use of {@link MockProcessorContext} for testing the {@link Processor} in the {@link WordCountProcessorDemo}.
|
||||
*/
|
||||
public class WordCountProcessorTest {
|
||||
@Test
|
||||
public void test() {
|
||||
final MockProcessorContext context = new MockProcessorContext();
|
||||
|
||||
// Create, initialize, and register the state store.
|
||||
final KeyValueStore<String, Integer> store =
|
||||
Stores.keyValueStoreBuilder(Stores.inMemoryKeyValueStore("Counts"), Serdes.String(), Serdes.Integer())
|
||||
.withLoggingDisabled() // Changelog is not supported by MockProcessorContext.
|
||||
.build();
|
||||
store.init(context, store);
|
||||
context.register(store, null);
|
||||
|
||||
// Create and initialize the processor under test
|
||||
final Processor<String, String> processor = new WordCountProcessorDemo.MyProcessorSupplier().get();
|
||||
processor.init(context);
|
||||
|
||||
// send a record to the processor
|
||||
processor.process("key", "alpha beta gamma alpha");
|
||||
|
||||
// note that the processor commits, but does not forward, during process()
|
||||
assertTrue(context.committed());
|
||||
assertTrue(context.forwarded().isEmpty());
|
||||
|
||||
// now, we trigger the punctuator, which iterates over the state store and forwards the contents.
|
||||
context.scheduledPunctuators().get(0).getPunctuator().punctuate(0L);
|
||||
|
||||
// finally, we can verify the output.
|
||||
final Iterator<MockProcessorContext.CapturedForward> capturedForwards = context.forwarded().iterator();
|
||||
assertEquals(new KeyValue<>("alpha", "2"), capturedForwards.next().keyValue());
|
||||
assertEquals(new KeyValue<>("beta", "1"), capturedForwards.next().keyValue());
|
||||
assertEquals(new KeyValue<>("gamma", "1"), capturedForwards.next().keyValue());
|
||||
assertFalse(capturedForwards.hasNext());
|
||||
}
|
||||
}
|
||||
36
streams/quickstart/java/pom.xml
Normal file
36
streams/quickstart/java/pom.xml
Normal file
@@ -0,0 +1,36 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.kafka</groupId>
|
||||
<artifactId>streams-quickstart</artifactId>
|
||||
<version>2.5.2-SNAPSHOT</version>
|
||||
<relativePath>..</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>streams-quickstart-java</artifactId>
|
||||
<packaging>maven-archetype</packaging>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,34 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<archetype-descriptor
|
||||
xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
|
||||
name="streams-quickstart-java">
|
||||
<fileSets>
|
||||
<fileSet filtered="true" packaged="true" encoding="UTF-8">
|
||||
<directory>src/main/java</directory>
|
||||
<includes>
|
||||
<include>**/*.java</include>
|
||||
</includes>
|
||||
</fileSet>
|
||||
<fileSet encoding="UTF-8">
|
||||
<directory>src/main/resources</directory>
|
||||
</fileSet>
|
||||
</fileSets>
|
||||
</archetype-descriptor>
|
||||
@@ -0,0 +1,136 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>${groupId}</groupId>
|
||||
<artifactId>${artifactId}</artifactId>
|
||||
<version>${version}</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>Kafka Streams Quickstart :: Java</name>
|
||||
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
<kafka.version>2.5.2-SNAPSHOT</kafka.version>
|
||||
<slf4j.version>1.7.7</slf4j.version>
|
||||
<log4j.version>1.2.17</log4j.version>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
<repository>
|
||||
<id>apache.snapshots</id>
|
||||
<name>Apache Development Snapshot Repository</name>
|
||||
<url>https://repository.apache.org/content/repositories/snapshots/</url>
|
||||
<releases>
|
||||
<enabled>false</enabled>
|
||||
</releases>
|
||||
<snapshots>
|
||||
<enabled>true</enabled>
|
||||
</snapshots>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<!--
|
||||
Execute "mvn clean package -Pbuild-jar"
|
||||
to build a jar file out of this project!
|
||||
-->
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.1</version>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.8</source>
|
||||
<target>1.8</target>
|
||||
<compilerId>jdt</compilerId>
|
||||
</configuration>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.eclipse.tycho</groupId>
|
||||
<artifactId>tycho-compiler-jdt</artifactId>
|
||||
<version>0.21.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.eclipse.m2e</groupId>
|
||||
<artifactId>lifecycle-mapping</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<configuration>
|
||||
<lifecycleMappingMetadata>
|
||||
<pluginExecutions>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<versionRange>[2.4,)</versionRange>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore/>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<versionRange>[3.1,)</versionRange>
|
||||
<goals>
|
||||
<goal>testCompile</goal>
|
||||
<goal>compile</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore/>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
</pluginExecutions>
|
||||
</lifecycleMappingMetadata>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<!-- Apache Kafka dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.apache.kafka</groupId>
|
||||
<artifactId>kafka-streams</artifactId>
|
||||
<version>${kafka.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ${package};
|
||||
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.kstream.ValueMapper;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* In this example, we implement a simple LineSplit program using the high-level Streams DSL
|
||||
* that reads from a source topic "streams-plaintext-input", where the values of messages represent lines of text;
|
||||
* the code split each text line in string into words and then write back into a sink topic "streams-linesplit-output" where
|
||||
* each record represents a single word.
|
||||
*/
|
||||
public class LineSplit {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-linesplit");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
|
||||
builder.<String, String>stream("streams-plaintext-input")
|
||||
.flatMapValues(value -> Arrays.asList(value.split("\\W+")))
|
||||
.to("streams-linesplit-output");
|
||||
|
||||
final Topology topology = builder.build();
|
||||
final KafkaStreams streams = new KafkaStreams(topology, props);
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// attach shutdown handler to catch control-c
|
||||
Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
|
||||
@Override
|
||||
public void run() {
|
||||
streams.close();
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
streams.start();
|
||||
latch.await();
|
||||
} catch (Throwable e) {
|
||||
System.exit(1);
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ${package};
|
||||
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* In this example, we implement a simple LineSplit program using the high-level Streams DSL
|
||||
* that reads from a source topic "streams-plaintext-input", where the values of messages represent lines of text,
|
||||
* and writes the messages as-is into a sink topic "streams-pipe-output".
|
||||
*/
|
||||
public class Pipe {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
|
||||
builder.stream("streams-plaintext-input").to("streams-pipe-output");
|
||||
|
||||
final Topology topology = builder.build();
|
||||
final KafkaStreams streams = new KafkaStreams(topology, props);
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// attach shutdown handler to catch control-c
|
||||
Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
|
||||
@Override
|
||||
public void run() {
|
||||
streams.close();
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
streams.start();
|
||||
latch.await();
|
||||
} catch (Throwable e) {
|
||||
System.exit(1);
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package ${package};
|
||||
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.kstream.KeyValueMapper;
|
||||
import org.apache.kafka.streams.kstream.Materialized;
|
||||
import org.apache.kafka.streams.kstream.Produced;
|
||||
import org.apache.kafka.streams.kstream.ValueMapper;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
/**
|
||||
* In this example, we implement a simple WordCount program using the high-level Streams DSL
|
||||
* that reads from a source topic "streams-plaintext-input", where the values of messages represent lines of text,
|
||||
* split each text line into words and then compute the word occurence histogram, write the continuous updated histogram
|
||||
* into a topic "streams-wordcount-output" where each record is an updated count of a single word.
|
||||
*/
|
||||
public class WordCount {
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Properties props = new Properties();
|
||||
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount");
|
||||
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
|
||||
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
|
||||
|
||||
final StreamsBuilder builder = new StreamsBuilder();
|
||||
|
||||
builder.<String, String>stream("streams-plaintext-input")
|
||||
.flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+")))
|
||||
.groupBy((key, value) -> value)
|
||||
.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"))
|
||||
.toStream()
|
||||
.to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));
|
||||
|
||||
final Topology topology = builder.build();
|
||||
final KafkaStreams streams = new KafkaStreams(topology, props);
|
||||
final CountDownLatch latch = new CountDownLatch(1);
|
||||
|
||||
// attach shutdown handler to catch control-c
|
||||
Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
|
||||
@Override
|
||||
public void run() {
|
||||
streams.close();
|
||||
latch.countDown();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
streams.start();
|
||||
latch.await();
|
||||
} catch (Throwable e) {
|
||||
System.exit(1);
|
||||
}
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
log4j.rootLogger=INFO, console
|
||||
|
||||
log4j.appender.console=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.console.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n
|
||||
@@ -0,0 +1,18 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
groupId=org.apache.kafka.archtypetest
|
||||
version=0.1
|
||||
artifactId=basic
|
||||
package=org.apache.kafka.archetypetest
|
||||
@@ -0,0 +1 @@
|
||||
compile
|
||||
121
streams/quickstart/pom.xml
Normal file
121
streams/quickstart/pom.xml
Normal file
@@ -0,0 +1,121 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.apache.kafka</groupId>
|
||||
<artifactId>streams-quickstart</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
<version>2.5.2-SNAPSHOT</version>
|
||||
|
||||
<name>Kafka Streams :: Quickstart</name>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache</groupId>
|
||||
<artifactId>apache</artifactId>
|
||||
<version>18</version>
|
||||
</parent>
|
||||
|
||||
<modules>
|
||||
<module>java</module>
|
||||
</modules>
|
||||
<build>
|
||||
<extensions>
|
||||
<extension>
|
||||
<groupId>org.apache.maven.archetype</groupId>
|
||||
<artifactId>archetype-packaging</artifactId>
|
||||
<version>2.2</version>
|
||||
</extension>
|
||||
</extensions>
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-archetype-plugin</artifactId>
|
||||
<version>2.2</version>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-archetype-plugin</artifactId>
|
||||
<version>2.2</version>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<!-- deactivate the shade plugin for the quickstart archetypes -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-shade-plugin</artifactId>
|
||||
<version>3.1.0</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase/>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<groupId>com.github.siom79.japicmp</groupId>
|
||||
<artifactId>japicmp-maven-plugin</artifactId>
|
||||
<version>0.11.0</version>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!-- use alternative delimiter for filtering resources -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<configuration>
|
||||
<useDefaultDelimiters>false</useDefaultDelimiters>
|
||||
<delimiters>
|
||||
<delimiter>@</delimiter>
|
||||
</delimiters>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-gpg-plugin</artifactId>
|
||||
<version>1.6</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>sign-artifacts</id>
|
||||
<phase>verify</phase>
|
||||
<goals>
|
||||
<goal>sign</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<keyname>${gpg.keyname}</keyname>
|
||||
<passphraseServerId>${gpg.keyname}</passphraseServerId>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>src/main/resources</directory>
|
||||
<filtering>true</filtering>
|
||||
</resource>
|
||||
</resources>
|
||||
</build>
|
||||
</project>
|
||||
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import org.apache.kafka.clients.admin.Admin;
|
||||
import org.apache.kafka.clients.admin.AdminClient;
|
||||
import org.apache.kafka.clients.consumer.Consumer;
|
||||
import org.apache.kafka.clients.producer.Producer;
|
||||
import org.apache.kafka.streams.kstream.GlobalKTable;
|
||||
import org.apache.kafka.streams.processor.StateStore;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* {@code KafkaClientSupplier} can be used to provide custom Kafka clients to a {@link KafkaStreams} instance.
|
||||
*
|
||||
* @see KafkaStreams#KafkaStreams(Topology, java.util.Properties, KafkaClientSupplier)
|
||||
*/
|
||||
public interface KafkaClientSupplier {
|
||||
/**
|
||||
* Create an {@link AdminClient} which is used for internal topic management.
|
||||
*
|
||||
* @param config Supplied by the {@link java.util.Properties} given to the {@link KafkaStreams}
|
||||
* @return an instance of {@link AdminClient}
|
||||
* @deprecated Not called by Kafka Streams, which now uses {@link #getAdmin} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
default AdminClient getAdminClient(final Map<String, Object> config) {
|
||||
throw new UnsupportedOperationException("Direct use of this method is deprecated. " +
|
||||
"Implementations of KafkaClientSupplier should implement the getAdmin() method instead. " +
|
||||
"The method will be removed in a future release.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an {@link Admin} which is used for internal topic management.
|
||||
*
|
||||
* @param config Supplied by the {@link java.util.Properties} given to the {@link KafkaStreams}
|
||||
* @return an instance of {@link Admin}
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
default Admin getAdmin(final Map<String, Object> config) {
|
||||
return getAdminClient(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link Producer} which is used to write records to sink topics.
|
||||
*
|
||||
* @param config {@link StreamsConfig#getProducerConfigs(String) producer config} which is supplied by the
|
||||
* {@link java.util.Properties} given to the {@link KafkaStreams} instance
|
||||
* @return an instance of Kafka producer
|
||||
*/
|
||||
Producer<byte[], byte[]> getProducer(final Map<String, Object> config);
|
||||
|
||||
/**
|
||||
* Create a {@link Consumer} which is used to read records of source topics.
|
||||
*
|
||||
* @param config {@link StreamsConfig#getMainConsumerConfigs(String, String, int) consumer config} which is
|
||||
* supplied by the {@link java.util.Properties} given to the {@link KafkaStreams} instance
|
||||
* @return an instance of Kafka consumer
|
||||
*/
|
||||
Consumer<byte[], byte[]> getConsumer(final Map<String, Object> config);
|
||||
|
||||
/**
|
||||
* Create a {@link Consumer} which is used to read records to restore {@link StateStore}s.
|
||||
*
|
||||
* @param config {@link StreamsConfig#getRestoreConsumerConfigs(String) restore consumer config} which is supplied
|
||||
* by the {@link java.util.Properties} given to the {@link KafkaStreams}
|
||||
* @return an instance of Kafka consumer
|
||||
*/
|
||||
Consumer<byte[], byte[]> getRestoreConsumer(final Map<String, Object> config);
|
||||
|
||||
/**
|
||||
* Create a {@link Consumer} which is used to consume records for {@link GlobalKTable}.
|
||||
*
|
||||
* @param config {@link StreamsConfig#getGlobalConsumerConfigs(String) global consumer config} which is supplied
|
||||
* by the {@link java.util.Properties} given to the {@link KafkaStreams}
|
||||
* @return an instance of Kafka consumer
|
||||
*/
|
||||
Consumer<byte[], byte[]> getGlobalConsumer(final Map<String, Object> config);
|
||||
}
|
||||
1289
streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java
Normal file
1289
streams/src/main/java/org/apache/kafka/streams/KafkaStreams.java
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import org.apache.kafka.streams.state.HostInfo;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Represents all the metadata related to a key, where a particular key resides in a {@link KafkaStreams} application.
|
||||
* It contains the active {@link HostInfo} and a set of standby {@link HostInfo}s, denoting the instances where the key resides.
|
||||
* It also contains the partition number where the key belongs, which could be useful when used in conjunction with other APIs.
|
||||
* e.g: Relating with lags for that store partition.
|
||||
* NOTE: This is a point in time view. It may change as rebalances happen.
|
||||
*/
|
||||
public class KeyQueryMetadata {
|
||||
/**
|
||||
* Sentinel to indicate that the KeyQueryMetadata is currently unavailable. This can occur during rebalance
|
||||
* operations.
|
||||
*/
|
||||
public static final KeyQueryMetadata NOT_AVAILABLE = new KeyQueryMetadata(new HostInfo("unavailable", -1),
|
||||
Collections.emptySet(),
|
||||
-1);
|
||||
|
||||
private final HostInfo activeHost;
|
||||
|
||||
private final Set<HostInfo> standbyHosts;
|
||||
|
||||
private final int partition;
|
||||
|
||||
public KeyQueryMetadata(final HostInfo activeHost, final Set<HostInfo> standbyHosts, final int partition) {
|
||||
this.activeHost = activeHost;
|
||||
this.standbyHosts = standbyHosts;
|
||||
this.partition = partition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Active streams instance for given key
|
||||
*
|
||||
* @return active instance's {@link HostInfo}
|
||||
*/
|
||||
public HostInfo getActiveHost() {
|
||||
return activeHost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Streams instances that host the key as standbys
|
||||
*
|
||||
* @return set of standby {@link HostInfo} or a empty set, if no standbys are configured
|
||||
*/
|
||||
public Set<HostInfo> getStandbyHosts() {
|
||||
return standbyHosts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Store partition corresponding to the key.
|
||||
*
|
||||
* @return store partition number
|
||||
*/
|
||||
public int getPartition() {
|
||||
return partition;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (!(obj instanceof KeyQueryMetadata)) {
|
||||
return false;
|
||||
}
|
||||
final KeyQueryMetadata keyQueryMetadata = (KeyQueryMetadata) obj;
|
||||
return Objects.equals(keyQueryMetadata.activeHost, activeHost)
|
||||
&& Objects.equals(keyQueryMetadata.standbyHosts, standbyHosts)
|
||||
&& Objects.equals(keyQueryMetadata.partition, partition);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "KeyQueryMetadata {" +
|
||||
"activeHost=" + activeHost +
|
||||
", standbyHosts=" + standbyHosts +
|
||||
", partition=" + partition +
|
||||
'}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(activeHost, standbyHosts, partition);
|
||||
}
|
||||
}
|
||||
83
streams/src/main/java/org/apache/kafka/streams/KeyValue.java
Normal file
83
streams/src/main/java/org/apache/kafka/streams/KeyValue.java
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* A key-value pair defined for a single Kafka Streams record.
|
||||
* If the record comes directly from a Kafka topic then its key/value are defined as the message key/value.
|
||||
*
|
||||
* @param <K> Key type
|
||||
* @param <V> Value type
|
||||
*/
|
||||
public class KeyValue<K, V> {
|
||||
|
||||
/** The key of the key-value pair. */
|
||||
public final K key;
|
||||
/** The value of the key-value pair. */
|
||||
public final V value;
|
||||
|
||||
/**
|
||||
* Create a new key-value pair.
|
||||
*
|
||||
* @param key the key
|
||||
* @param value the value
|
||||
*/
|
||||
public KeyValue(final K key, final V value) {
|
||||
this.key = key;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new key-value pair.
|
||||
*
|
||||
* @param key the key
|
||||
* @param value the value
|
||||
* @param <K> the type of the key
|
||||
* @param <V> the type of the value
|
||||
* @return a new key-value pair
|
||||
*/
|
||||
public static <K, V> KeyValue<K, V> pair(final K key, final V value) {
|
||||
return new KeyValue<>(key, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "KeyValue(" + key + ", " + value + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!(obj instanceof KeyValue)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final KeyValue other = (KeyValue) obj;
|
||||
return Objects.equals(key, other.key) && Objects.equals(value, other.value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(key, value);
|
||||
}
|
||||
|
||||
}
|
||||
91
streams/src/main/java/org/apache/kafka/streams/LagInfo.java
Normal file
91
streams/src/main/java/org/apache/kafka/streams/LagInfo.java
Normal file
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Encapsulates information about lag, at a store partition replica (active or standby). This information is constantly changing as the
|
||||
* tasks process records and thus, they should be treated as simply instantaenous measure of lag.
|
||||
*/
|
||||
public class LagInfo {
|
||||
|
||||
private final long currentOffsetPosition;
|
||||
|
||||
private final long endOffsetPosition;
|
||||
|
||||
private final long offsetLag;
|
||||
|
||||
LagInfo(final long currentOffsetPosition, final long endOffsetPosition) {
|
||||
this.currentOffsetPosition = currentOffsetPosition;
|
||||
this.endOffsetPosition = endOffsetPosition;
|
||||
this.offsetLag = Math.max(0, endOffsetPosition - currentOffsetPosition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current maximum offset on the store partition's changelog topic, that has been successfully written into
|
||||
* the store partition's state store.
|
||||
*
|
||||
* @return current consume offset for standby/restoring store partitions & simply endoffset for active store partition replicas
|
||||
*/
|
||||
public long currentOffsetPosition() {
|
||||
return this.currentOffsetPosition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the end offset position for this store partition's changelog topic on the Kafka brokers.
|
||||
*
|
||||
* @return last offset written to the changelog topic partition
|
||||
*/
|
||||
public long endOffsetPosition() {
|
||||
return this.endOffsetPosition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the measured lag between current and end offset positions, for this store partition replica
|
||||
*
|
||||
* @return lag as measured by message offsets
|
||||
*/
|
||||
public long offsetLag() {
|
||||
return this.offsetLag;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (!(obj instanceof LagInfo)) {
|
||||
return false;
|
||||
}
|
||||
final LagInfo other = (LagInfo) obj;
|
||||
return currentOffsetPosition == other.currentOffsetPosition
|
||||
&& endOffsetPosition == other.endOffsetPosition
|
||||
&& this.offsetLag == other.offsetLag;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(currentOffsetPosition, endOffsetPosition, offsetLag);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LagInfo {" +
|
||||
" currentOffsetPosition=" + currentOffsetPosition +
|
||||
", endOffsetPosition=" + endOffsetPosition +
|
||||
", offsetLag=" + offsetLag +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,129 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import org.apache.kafka.streams.state.QueryableStoreType;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* {@code StoreQueryParameters} allows you to pass a variety of parameters when fetching a store for interactive query.
|
||||
*/
|
||||
public class StoreQueryParameters<T> {
|
||||
|
||||
private Integer partition;
|
||||
private boolean staleStores;
|
||||
private final String storeName;
|
||||
private final QueryableStoreType<T> queryableStoreType;
|
||||
|
||||
private StoreQueryParameters(final String storeName, final QueryableStoreType<T> queryableStoreType, final Integer partition, final boolean staleStores) {
|
||||
this.storeName = storeName;
|
||||
this.queryableStoreType = queryableStoreType;
|
||||
this.partition = partition;
|
||||
this.staleStores = staleStores;
|
||||
}
|
||||
|
||||
public static <T> StoreQueryParameters<T> fromNameAndType(final String storeName,
|
||||
final QueryableStoreType<T> queryableStoreType) {
|
||||
return new StoreQueryParameters<T>(storeName, queryableStoreType, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a specific partition that should be queried exclusively.
|
||||
*
|
||||
* @param partition The specific integer partition to be fetched from the stores list by using {@link StoreQueryParameters}.
|
||||
*
|
||||
* @return StoreQueryParameters a new {@code StoreQueryParameters} instance configured with the specified partition
|
||||
*/
|
||||
public StoreQueryParameters<T> withPartition(final Integer partition) {
|
||||
return new StoreQueryParameters<T>(storeName, queryableStoreType, partition, staleStores);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable querying of stale state stores, i.e., allow to query active tasks during restore as well as standby tasks.
|
||||
*
|
||||
* @return StoreQueryParameters a new {@code StoreQueryParameters} instance configured with serving from stale stores enabled
|
||||
*/
|
||||
public StoreQueryParameters<T> enableStaleStores() {
|
||||
return new StoreQueryParameters<T>(storeName, queryableStoreType, partition, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the name of the state store that should be queried.
|
||||
*
|
||||
* @return String state store name
|
||||
*/
|
||||
public String storeName() {
|
||||
return storeName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the queryable store type for which key is queried by the user.
|
||||
*
|
||||
* @return QueryableStoreType type of queryable store
|
||||
*/
|
||||
public QueryableStoreType<T> queryableStoreType() {
|
||||
return queryableStoreType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the store partition that will be queried.
|
||||
* If the method returns {@code null}, it would mean that no specific partition has been requested,
|
||||
* so all the local partitions for the store will be queried.
|
||||
*
|
||||
* @return Integer partition
|
||||
*/
|
||||
public Integer partition() {
|
||||
return partition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the flag staleStores. If {@code true}, include standbys and recovering stores along with running stores.
|
||||
*
|
||||
* @return boolean staleStores
|
||||
*/
|
||||
public boolean staleStoresEnabled() {
|
||||
return staleStores;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (!(obj instanceof StoreQueryParameters)) {
|
||||
return false;
|
||||
}
|
||||
final StoreQueryParameters storeQueryParameters = (StoreQueryParameters) obj;
|
||||
return Objects.equals(storeQueryParameters.partition, partition)
|
||||
&& Objects.equals(storeQueryParameters.staleStores, staleStores)
|
||||
&& Objects.equals(storeQueryParameters.storeName, storeName)
|
||||
&& Objects.equals(storeQueryParameters.queryableStoreType, queryableStoreType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "StoreQueryParameters {" +
|
||||
"partition=" + partition +
|
||||
", staleStores=" + staleStores +
|
||||
", storeName=" + storeName +
|
||||
", queryableStoreType=" + queryableStoreType +
|
||||
'}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(partition, staleStores, storeName, queryableStoreType);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,565 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.errors.TopologyException;
|
||||
import org.apache.kafka.streams.kstream.Consumed;
|
||||
import org.apache.kafka.streams.kstream.GlobalKTable;
|
||||
import org.apache.kafka.streams.kstream.KGroupedStream;
|
||||
import org.apache.kafka.streams.kstream.KGroupedTable;
|
||||
import org.apache.kafka.streams.kstream.KStream;
|
||||
import org.apache.kafka.streams.kstream.KTable;
|
||||
import org.apache.kafka.streams.kstream.Materialized;
|
||||
import org.apache.kafka.streams.kstream.Transformer;
|
||||
import org.apache.kafka.streams.kstream.ValueTransformer;
|
||||
import org.apache.kafka.streams.kstream.internals.ConsumedInternal;
|
||||
import org.apache.kafka.streams.kstream.internals.InternalStreamsBuilder;
|
||||
import org.apache.kafka.streams.kstream.internals.MaterializedInternal;
|
||||
import org.apache.kafka.streams.processor.Processor;
|
||||
import org.apache.kafka.streams.processor.ProcessorSupplier;
|
||||
import org.apache.kafka.streams.processor.StateStore;
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
|
||||
import org.apache.kafka.streams.processor.internals.ProcessorNode;
|
||||
import org.apache.kafka.streams.processor.internals.SourceNode;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
import org.apache.kafka.streams.state.StoreBuilder;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Objects;
|
||||
import java.util.Properties;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* {@code StreamsBuilder} provide the high-level Kafka Streams DSL to specify a Kafka Streams topology.
|
||||
*
|
||||
* @see Topology
|
||||
* @see KStream
|
||||
* @see KTable
|
||||
* @see GlobalKTable
|
||||
*/
|
||||
public class StreamsBuilder {
|
||||
|
||||
/** The actual topology that is constructed by this StreamsBuilder. */
|
||||
private final Topology topology = new Topology();
|
||||
|
||||
/** The topology's internal builder. */
|
||||
final InternalTopologyBuilder internalTopologyBuilder = topology.internalTopologyBuilder;
|
||||
|
||||
private final InternalStreamsBuilder internalStreamsBuilder = new InternalStreamsBuilder(internalTopologyBuilder);
|
||||
|
||||
/**
|
||||
* Create a {@link KStream} from the specified topic.
|
||||
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
|
||||
* deserializers as specified in the {@link StreamsConfig config} are used.
|
||||
* <p>
|
||||
* If multiple topics are specified there is no ordering guarantee for records from different topics.
|
||||
* <p>
|
||||
* Note that the specified input topic must be partitioned by key.
|
||||
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
|
||||
* (like aggregation or join) is applied to the returned {@link KStream}.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @return a {@link KStream} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> KStream<K, V> stream(final String topic) {
|
||||
return stream(Collections.singleton(topic));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link KStream} from the specified topic.
|
||||
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
|
||||
* are defined by the options in {@link Consumed} are used.
|
||||
* <p>
|
||||
* Note that the specified input topic must be partitioned by key.
|
||||
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
|
||||
* (like aggregation or join) is applied to the returned {@link KStream}.
|
||||
*
|
||||
* @param topic the topic names; cannot be {@code null}
|
||||
* @param consumed the instance of {@link Consumed} used to define optional parameters
|
||||
* @return a {@link KStream} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> KStream<K, V> stream(final String topic,
|
||||
final Consumed<K, V> consumed) {
|
||||
return stream(Collections.singleton(topic), consumed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link KStream} from the specified topics.
|
||||
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
|
||||
* deserializers as specified in the {@link StreamsConfig config} are used.
|
||||
* <p>
|
||||
* If multiple topics are specified there is no ordering guarantee for records from different topics.
|
||||
* <p>
|
||||
* Note that the specified input topics must be partitioned by key.
|
||||
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
|
||||
* (like aggregation or join) is applied to the returned {@link KStream}.
|
||||
*
|
||||
* @param topics the topic names; must contain at least one topic name
|
||||
* @return a {@link KStream} for the specified topics
|
||||
*/
|
||||
public synchronized <K, V> KStream<K, V> stream(final Collection<String> topics) {
|
||||
return stream(topics, Consumed.with(null, null, null, null));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link KStream} from the specified topics.
|
||||
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
|
||||
* are defined by the options in {@link Consumed} are used.
|
||||
* <p>
|
||||
* If multiple topics are specified there is no ordering guarantee for records from different topics.
|
||||
* <p>
|
||||
* Note that the specified input topics must be partitioned by key.
|
||||
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
|
||||
* (like aggregation or join) is applied to the returned {@link KStream}.
|
||||
*
|
||||
* @param topics the topic names; must contain at least one topic name
|
||||
* @param consumed the instance of {@link Consumed} used to define optional parameters
|
||||
* @return a {@link KStream} for the specified topics
|
||||
*/
|
||||
public synchronized <K, V> KStream<K, V> stream(final Collection<String> topics,
|
||||
final Consumed<K, V> consumed) {
|
||||
Objects.requireNonNull(topics, "topics can't be null");
|
||||
Objects.requireNonNull(consumed, "consumed can't be null");
|
||||
return internalStreamsBuilder.stream(topics, new ConsumedInternal<>(consumed));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a {@link KStream} from the specified topic pattern.
|
||||
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
|
||||
* deserializers as specified in the {@link StreamsConfig config} are used.
|
||||
* <p>
|
||||
* If multiple topics are matched by the specified pattern, the created {@link KStream} will read data from all of
|
||||
* them and there is no ordering guarantee between records from different topics. This also means that the work
|
||||
* will not be parallelized for multiple topics, and the number of tasks will scale with the maximum partition
|
||||
* count of any matching topic rather than the total number of partitions across all topics.
|
||||
* <p>
|
||||
* Note that the specified input topics must be partitioned by key.
|
||||
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
|
||||
* (like aggregation or join) is applied to the returned {@link KStream}.
|
||||
*
|
||||
* @param topicPattern the pattern to match for topic names
|
||||
* @return a {@link KStream} for topics matching the regex pattern.
|
||||
*/
|
||||
public synchronized <K, V> KStream<K, V> stream(final Pattern topicPattern) {
|
||||
return stream(topicPattern, Consumed.with(null, null));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link KStream} from the specified topic pattern.
|
||||
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
|
||||
* are defined by the options in {@link Consumed} are used.
|
||||
* <p>
|
||||
* If multiple topics are matched by the specified pattern, the created {@link KStream} will read data from all of
|
||||
* them and there is no ordering guarantee between records from different topics. This also means that the work
|
||||
* will not be parallelized for multiple topics, and the number of tasks will scale with the maximum partition
|
||||
* count of any matching topic rather than the total number of partitions across all topics.
|
||||
* <p>
|
||||
* Note that the specified input topics must be partitioned by key.
|
||||
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
|
||||
* (like aggregation or join) is applied to the returned {@link KStream}.
|
||||
*
|
||||
* @param topicPattern the pattern to match for topic names
|
||||
* @param consumed the instance of {@link Consumed} used to define optional parameters
|
||||
* @return a {@link KStream} for topics matching the regex pattern.
|
||||
*/
|
||||
public synchronized <K, V> KStream<K, V> stream(final Pattern topicPattern,
|
||||
final Consumed<K, V> consumed) {
|
||||
Objects.requireNonNull(topicPattern, "topicPattern can't be null");
|
||||
Objects.requireNonNull(consumed, "consumed can't be null");
|
||||
return internalStreamsBuilder.stream(topicPattern, new ConsumedInternal<>(consumed));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link KTable} for the specified topic.
|
||||
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
|
||||
* are defined by the options in {@link Consumed} are used.
|
||||
* Input {@link KeyValue records} with {@code null} key will be dropped.
|
||||
* <p>
|
||||
* Note that the specified input topic must be partitioned by key.
|
||||
* If this is not the case the returned {@link KTable} will be corrupted.
|
||||
* <p>
|
||||
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} using the given
|
||||
* {@code Materialized} instance.
|
||||
* An internal changelog topic is created by default. Because the source topic can
|
||||
* be used for recovery, you can avoid creating the changelog topic by setting
|
||||
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
|
||||
* <p>
|
||||
* You should only specify serdes in the {@link Consumed} instance as these will also be used to overwrite the
|
||||
* serdes in {@link Materialized}, i.e.,
|
||||
* <pre> {@code
|
||||
* streamBuilder.table(topic, Consumed.with(Serde.String(), Serde.String()), Materialized.<String, String, KeyValueStore<Bytes, byte[]>as(storeName))
|
||||
* }
|
||||
* </pre>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ...
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long valueForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @param consumed the instance of {@link Consumed} used to define optional parameters; cannot be {@code null}
|
||||
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
|
||||
* @return a {@link KTable} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> KTable<K, V> table(final String topic,
|
||||
final Consumed<K, V> consumed,
|
||||
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
|
||||
Objects.requireNonNull(topic, "topic can't be null");
|
||||
Objects.requireNonNull(consumed, "consumed can't be null");
|
||||
Objects.requireNonNull(materialized, "materialized can't be null");
|
||||
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
|
||||
materialized.withKeySerde(consumedInternal.keySerde()).withValueSerde(consumedInternal.valueSerde());
|
||||
|
||||
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
|
||||
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
|
||||
|
||||
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link KTable} for the specified topic.
|
||||
* The default {@code "auto.offset.reset"} strategy and default key and value deserializers as specified in the
|
||||
* {@link StreamsConfig config} are used.
|
||||
* Input {@link KeyValue records} with {@code null} key will be dropped.
|
||||
* <p>
|
||||
* Note that the specified input topics must be partitioned by key.
|
||||
* If this is not the case the returned {@link KTable} will be corrupted.
|
||||
* <p>
|
||||
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} with an internal
|
||||
* store name. Note that store name may not be queriable through Interactive Queries.
|
||||
* An internal changelog topic is created by default. Because the source topic can
|
||||
* be used for recovery, you can avoid creating the changelog topic by setting
|
||||
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @return a {@link KTable} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> KTable<K, V> table(final String topic) {
|
||||
return table(topic, new ConsumedInternal<>());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link KTable} for the specified topic.
|
||||
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
|
||||
* are defined by the options in {@link Consumed} are used.
|
||||
* Input {@link KeyValue records} with {@code null} key will be dropped.
|
||||
* <p>
|
||||
* Note that the specified input topics must be partitioned by key.
|
||||
* If this is not the case the returned {@link KTable} will be corrupted.
|
||||
* <p>
|
||||
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} with an internal
|
||||
* store name. Note that store name may not be queriable through Interactive Queries.
|
||||
* An internal changelog topic is created by default. Because the source topic can
|
||||
* be used for recovery, you can avoid creating the changelog topic by setting
|
||||
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @param consumed the instance of {@link Consumed} used to define optional parameters; cannot be {@code null}
|
||||
* @return a {@link KTable} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> KTable<K, V> table(final String topic,
|
||||
final Consumed<K, V> consumed) {
|
||||
Objects.requireNonNull(topic, "topic can't be null");
|
||||
Objects.requireNonNull(consumed, "consumed can't be null");
|
||||
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
|
||||
|
||||
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
|
||||
new MaterializedInternal<>(
|
||||
Materialized.with(consumedInternal.keySerde(), consumedInternal.valueSerde()),
|
||||
internalStreamsBuilder,
|
||||
topic + "-");
|
||||
|
||||
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link KTable} for the specified topic.
|
||||
* The default {@code "auto.offset.reset"} strategy as specified in the {@link StreamsConfig config} are used.
|
||||
* Key and value deserializers as defined by the options in {@link Materialized} are used.
|
||||
* Input {@link KeyValue records} with {@code null} key will be dropped.
|
||||
* <p>
|
||||
* Note that the specified input topics must be partitioned by key.
|
||||
* If this is not the case the returned {@link KTable} will be corrupted.
|
||||
* <p>
|
||||
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} using the {@link Materialized} instance.
|
||||
* An internal changelog topic is created by default. Because the source topic can
|
||||
* be used for recovery, you can avoid creating the changelog topic by setting
|
||||
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
|
||||
* @return a {@link KTable} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> KTable<K, V> table(final String topic,
|
||||
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
|
||||
Objects.requireNonNull(topic, "topic can't be null");
|
||||
Objects.requireNonNull(materialized, "materialized can't be null");
|
||||
|
||||
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
|
||||
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
|
||||
|
||||
final ConsumedInternal<K, V> consumedInternal =
|
||||
new ConsumedInternal<>(Consumed.with(materializedInternal.keySerde(), materializedInternal.valueSerde()));
|
||||
|
||||
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link GlobalKTable} for the specified topic.
|
||||
* Input {@link KeyValue records} with {@code null} key will be dropped.
|
||||
* <p>
|
||||
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} with an internal
|
||||
* store name. Note that store name may not be queriable through Interactive Queries.
|
||||
* No internal changelog topic is created since the original input topic can be used for recovery (cf.
|
||||
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
|
||||
* <p>
|
||||
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
|
||||
* regardless of the specified value in {@link StreamsConfig} or {@link Consumed}.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @param consumed the instance of {@link Consumed} used to define optional parameters
|
||||
* @return a {@link GlobalKTable} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
|
||||
final Consumed<K, V> consumed) {
|
||||
Objects.requireNonNull(topic, "topic can't be null");
|
||||
Objects.requireNonNull(consumed, "consumed can't be null");
|
||||
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
|
||||
|
||||
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
|
||||
new MaterializedInternal<>(
|
||||
Materialized.with(consumedInternal.keySerde(), consumedInternal.valueSerde()),
|
||||
internalStreamsBuilder, topic + "-");
|
||||
|
||||
return internalStreamsBuilder.globalTable(topic, consumedInternal, materializedInternal);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link GlobalKTable} for the specified topic.
|
||||
* The default key and value deserializers as specified in the {@link StreamsConfig config} are used.
|
||||
* Input {@link KeyValue records} with {@code null} key will be dropped.
|
||||
* <p>
|
||||
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} with an internal
|
||||
* store name. Note that store name may not be queriable through Interactive Queries.
|
||||
* No internal changelog topic is created since the original input topic can be used for recovery (cf.
|
||||
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
|
||||
* <p>
|
||||
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
|
||||
* regardless of the specified value in {@link StreamsConfig}.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @return a {@link GlobalKTable} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic) {
|
||||
return globalTable(topic, Consumed.with(null, null));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link GlobalKTable} for the specified topic.
|
||||
*
|
||||
* Input {@link KeyValue} pairs with {@code null} key will be dropped.
|
||||
* <p>
|
||||
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} configured with
|
||||
* the provided instance of {@link Materialized}.
|
||||
* However, no internal changelog topic is created since the original input topic can be used for recovery (cf.
|
||||
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
|
||||
* <p>
|
||||
* You should only specify serdes in the {@link Consumed} instance as these will also be used to overwrite the
|
||||
* serdes in {@link Materialized}, i.e.,
|
||||
* <pre> {@code
|
||||
* streamBuilder.globalTable(topic, Consumed.with(Serde.String(), Serde.String()), Materialized.<String, String, KeyValueStore<Bytes, byte[]>as(storeName))
|
||||
* }
|
||||
* </pre>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ...
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long valueForKey = localStore.get(key);
|
||||
* }</pre>
|
||||
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
|
||||
* regardless of the specified value in {@link StreamsConfig} or {@link Consumed}.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @param consumed the instance of {@link Consumed} used to define optional parameters; can't be {@code null}
|
||||
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
|
||||
* @return a {@link GlobalKTable} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
|
||||
final Consumed<K, V> consumed,
|
||||
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
|
||||
Objects.requireNonNull(topic, "topic can't be null");
|
||||
Objects.requireNonNull(consumed, "consumed can't be null");
|
||||
Objects.requireNonNull(materialized, "materialized can't be null");
|
||||
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
|
||||
// always use the serdes from consumed
|
||||
materialized.withKeySerde(consumedInternal.keySerde()).withValueSerde(consumedInternal.valueSerde());
|
||||
|
||||
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
|
||||
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
|
||||
|
||||
return internalStreamsBuilder.globalTable(topic, consumedInternal, materializedInternal);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link GlobalKTable} for the specified topic.
|
||||
*
|
||||
* Input {@link KeyValue} pairs with {@code null} key will be dropped.
|
||||
* <p>
|
||||
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} configured with
|
||||
* the provided instance of {@link Materialized}.
|
||||
* However, no internal changelog topic is created since the original input topic can be used for recovery (cf.
|
||||
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ...
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long valueForKey = localStore.get(key);
|
||||
* }</pre>
|
||||
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
|
||||
* regardless of the specified value in {@link StreamsConfig}.
|
||||
*
|
||||
* @param topic the topic name; cannot be {@code null}
|
||||
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
|
||||
* @return a {@link GlobalKTable} for the specified topic
|
||||
*/
|
||||
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
|
||||
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
|
||||
Objects.requireNonNull(topic, "topic can't be null");
|
||||
Objects.requireNonNull(materialized, "materialized can't be null");
|
||||
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
|
||||
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
|
||||
|
||||
return internalStreamsBuilder.globalTable(topic,
|
||||
new ConsumedInternal<>(Consumed.with(materializedInternal.keySerde(),
|
||||
materializedInternal.valueSerde())),
|
||||
materializedInternal);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a state store to the underlying {@link Topology}.
|
||||
* <p>
|
||||
* It is required to connect state stores to {@link Processor Processors}, {@link Transformer Transformers},
|
||||
* or {@link ValueTransformer ValueTransformers} before they can be used.
|
||||
*
|
||||
* @param builder the builder used to obtain this state store {@link StateStore} instance
|
||||
* @return itself
|
||||
* @throws TopologyException if state store supplier is already added
|
||||
*/
|
||||
public synchronized StreamsBuilder addStateStore(final StoreBuilder builder) {
|
||||
Objects.requireNonNull(builder, "builder can't be null");
|
||||
internalStreamsBuilder.addStateStore(builder);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use {@link #addGlobalStore(StoreBuilder, String, Consumed, ProcessorSupplier)} instead
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
@Deprecated
|
||||
public synchronized StreamsBuilder addGlobalStore(final StoreBuilder storeBuilder,
|
||||
final String topic,
|
||||
final String sourceName,
|
||||
final Consumed consumed,
|
||||
final String processorName,
|
||||
final ProcessorSupplier stateUpdateSupplier) {
|
||||
Objects.requireNonNull(storeBuilder, "storeBuilder can't be null");
|
||||
Objects.requireNonNull(consumed, "consumed can't be null");
|
||||
internalStreamsBuilder.addGlobalStore(storeBuilder,
|
||||
sourceName,
|
||||
topic,
|
||||
new ConsumedInternal<>(consumed),
|
||||
processorName,
|
||||
stateUpdateSupplier);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a global {@link StateStore} to the topology.
|
||||
* The {@link StateStore} sources its data from all partitions of the provided input topic.
|
||||
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
|
||||
* <p>
|
||||
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
|
||||
* of the input topic.
|
||||
* <p>
|
||||
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
|
||||
* records forwarded from the {@link SourceNode}. NOTE: you should not use the {@code Processor} to insert transformed records into
|
||||
* the global state store. This store uses the source topic as changelog and during restore will insert records directly
|
||||
* from the source.
|
||||
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
|
||||
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
|
||||
* <p>
|
||||
* It is not required to connect a global store to {@link Processor Processors}, {@link Transformer Transformers},
|
||||
* or {@link ValueTransformer ValueTransformer}; those have read-only access to all global stores by default.
|
||||
*
|
||||
* @param storeBuilder user defined {@link StoreBuilder}; can't be {@code null}
|
||||
* @param topic the topic to source the data from
|
||||
* @param consumed the instance of {@link Consumed} used to define optional parameters; can't be {@code null}
|
||||
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
|
||||
* @return itself
|
||||
* @throws TopologyException if the processor of state is already registered
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public synchronized StreamsBuilder addGlobalStore(final StoreBuilder storeBuilder,
|
||||
final String topic,
|
||||
final Consumed consumed,
|
||||
final ProcessorSupplier stateUpdateSupplier) {
|
||||
Objects.requireNonNull(storeBuilder, "storeBuilder can't be null");
|
||||
Objects.requireNonNull(consumed, "consumed can't be null");
|
||||
internalStreamsBuilder.addGlobalStore(storeBuilder,
|
||||
topic,
|
||||
new ConsumedInternal<>(consumed),
|
||||
stateUpdateSupplier);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link Topology} that represents the specified processing logic.
|
||||
* Note that using this method means no optimizations are performed.
|
||||
*
|
||||
* @return the {@link Topology} that represents the specified processing logic
|
||||
*/
|
||||
public synchronized Topology build() {
|
||||
return build(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link Topology} that represents the specified processing logic and accepts
|
||||
* a {@link Properties} instance used to indicate whether to optimize topology or not.
|
||||
*
|
||||
* @param props the {@link Properties} used for building possibly optimized topology
|
||||
* @return the {@link Topology} that represents the specified processing logic
|
||||
*/
|
||||
public synchronized Topology build(final Properties props) {
|
||||
internalStreamsBuilder.buildAndOptimizeTopology(props);
|
||||
return topology;
|
||||
}
|
||||
}
|
||||
1310
streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java
Normal file
1310
streams/src/main/java/org/apache/kafka/streams/StreamsConfig.java
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,225 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import org.apache.kafka.common.Metric;
|
||||
import org.apache.kafka.common.MetricName;
|
||||
import org.apache.kafka.common.metrics.Sensor;
|
||||
import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The Kafka Streams metrics interface for adding metric sensors and collecting metric values.
|
||||
*/
|
||||
public interface StreamsMetrics {
|
||||
|
||||
/**
|
||||
* Get read-only handle on global metrics registry.
|
||||
*
|
||||
* @return Map of all metrics.
|
||||
*/
|
||||
Map<MetricName, ? extends Metric> metrics();
|
||||
|
||||
/**
|
||||
* Add a latency, rate and total sensor for a specific operation, which will include the following metrics:
|
||||
* <ol>
|
||||
* <li>average latency</li>
|
||||
* <li>max latency</li>
|
||||
* <li>invocation rate (num.operations / seconds)</li>
|
||||
* <li>total invocation count</li>
|
||||
* </ol>
|
||||
* Whenever a user records this sensor via {@link Sensor#record(double)} etc, it will be counted as one invocation
|
||||
* of the operation, and hence the rate / count metrics will be updated accordingly; and the recorded latency value
|
||||
* will be used to update the average / max latency as well.
|
||||
*
|
||||
* Note that you can add more metrics to this sensor after you created it, which can then be updated upon
|
||||
* {@link Sensor#record(double)} calls.
|
||||
*
|
||||
* The added sensor and its metrics can be removed with {@link #removeSensor(Sensor) removeSensor()}.
|
||||
*
|
||||
* @param scopeName name of the scope, which will be used as part of the metric type, e.g.: "stream-[scope]-metrics".
|
||||
* @param entityName name of the entity, which will be used as part of the metric tags, e.g.: "[scope]-id" = "[entity]".
|
||||
* @param operationName name of the operation, which will be used as the name of the metric, e.g.: "[operation]-latency-avg".
|
||||
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
|
||||
* @param tags additional tags of the sensor
|
||||
* @return The added sensor.
|
||||
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
|
||||
* @see #removeSensor(Sensor)
|
||||
* @see #addSensor(String, RecordingLevel, Sensor...)
|
||||
*/
|
||||
Sensor addLatencyRateTotalSensor(final String scopeName,
|
||||
final String entityName,
|
||||
final String operationName,
|
||||
final Sensor.RecordingLevel recordingLevel,
|
||||
final String... tags);
|
||||
|
||||
/**
|
||||
* Add a rate and a total sensor for a specific operation, which will include the following metrics:
|
||||
* <ol>
|
||||
* <li>invocation rate (num.operations / time unit)</li>
|
||||
* <li>total invocation count</li>
|
||||
* </ol>
|
||||
* Whenever a user records this sensor via {@link Sensor#record(double)} etc,
|
||||
* it will be counted as one invocation of the operation, and hence the rate / count metrics will be updated accordingly.
|
||||
*
|
||||
* Note that you can add more metrics to this sensor after you created it, which can then be updated upon
|
||||
* {@link Sensor#record(double)} calls.
|
||||
*
|
||||
* The added sensor and its metrics can be removed with {@link #removeSensor(Sensor) removeSensor()}.
|
||||
*
|
||||
* @param scopeName name of the scope, which will be used as part of the metrics type, e.g.: "stream-[scope]-metrics".
|
||||
* @param entityName name of the entity, which will be used as part of the metric tags, e.g.: "[scope]-id" = "[entity]".
|
||||
* @param operationName name of the operation, which will be used as the name of the metric, e.g.: "[operation]-total".
|
||||
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
|
||||
* @param tags additional tags of the sensor
|
||||
* @return The added sensor.
|
||||
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
|
||||
* @see #removeSensor(Sensor)
|
||||
* @see #addSensor(String, RecordingLevel, Sensor...)
|
||||
*/
|
||||
Sensor addRateTotalSensor(final String scopeName,
|
||||
final String entityName,
|
||||
final String operationName,
|
||||
final Sensor.RecordingLevel recordingLevel,
|
||||
final String... tags);
|
||||
|
||||
/**
|
||||
* Add a latency and throughput sensor for a specific operation, which will include the following sensors:
|
||||
* <ol>
|
||||
* <li>average latency</li>
|
||||
* <li>max latency</li>
|
||||
* <li>throughput (num.operations / time unit)</li>
|
||||
* </ol>
|
||||
* Also create a parent sensor with the same metrics that aggregates all entities with the same operation under the
|
||||
* same scope if it has not been created.
|
||||
*
|
||||
* @param scopeName name of the scope, could be the type of the state store, etc.
|
||||
* @param entityName name of the entity, could be the name of the state store instance, etc.
|
||||
* @param operationName name of the operation, could be get / put / delete / etc.
|
||||
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
|
||||
* @param tags additional tags of the sensor
|
||||
* @return The added sensor.
|
||||
* @deprecated since 2.5. Use {@link #addLatencyRateTotalSensor(String, String, String, Sensor.RecordingLevel, String...) addLatencyRateTotalSensor()}
|
||||
* instead.
|
||||
*/
|
||||
@Deprecated
|
||||
Sensor addLatencyAndThroughputSensor(final String scopeName,
|
||||
final String entityName,
|
||||
final String operationName,
|
||||
final Sensor.RecordingLevel recordingLevel,
|
||||
final String... tags);
|
||||
|
||||
/**
|
||||
* Record the given latency value of the sensor.
|
||||
* If the passed sensor includes throughput metrics, e.g., when created by the
|
||||
* {@link #addLatencyAndThroughputSensor(String, String, String, Sensor.RecordingLevel, String...)} method, then the
|
||||
* throughput metrics will also be recorded from this event.
|
||||
*
|
||||
* @param sensor sensor whose latency we are recording.
|
||||
* @param startNs start of measurement time in nanoseconds.
|
||||
* @param endNs end of measurement time in nanoseconds.
|
||||
* @deprecated since 2.5. Use {@link Sensor#record(double) Sensor#record()} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
void recordLatency(final Sensor sensor,
|
||||
final long startNs,
|
||||
final long endNs);
|
||||
|
||||
/**
|
||||
* Add a throughput sensor for a specific operation:
|
||||
* <ol>
|
||||
* <li>throughput (num.operations / time unit)</li>
|
||||
* </ol>
|
||||
* Also create a parent sensor with the same metrics that aggregates all entities with the same operation under the
|
||||
* same scope if it has not been created.
|
||||
* This sensor is a strict subset of the sensors created by
|
||||
* {@link #addLatencyAndThroughputSensor(String, String, String, Sensor.RecordingLevel, String...)}.
|
||||
*
|
||||
* @param scopeName name of the scope, could be the type of the state store, etc.
|
||||
* @param entityName name of the entity, could be the name of the state store instance, etc.
|
||||
* @param operationName name of the operation, could be get / put / delete / etc.
|
||||
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
|
||||
* @param tags additional tags of the sensor
|
||||
* @return The added sensor.
|
||||
* @deprecated since 2.5. Use {@link #addRateTotalSensor(String, String, String, Sensor.RecordingLevel, String...)
|
||||
* addRateTotalSensor()} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
Sensor addThroughputSensor(final String scopeName,
|
||||
final String entityName,
|
||||
final String operationName,
|
||||
final Sensor.RecordingLevel recordingLevel,
|
||||
final String... tags);
|
||||
|
||||
/**
|
||||
* Record the throughput value of a sensor.
|
||||
*
|
||||
* @param sensor add Sensor whose throughput we are recording
|
||||
* @param value throughput value
|
||||
* @deprecated since 2.5. Use {@link Sensor#record() Sensor#record()} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
void recordThroughput(final Sensor sensor,
|
||||
final long value);
|
||||
|
||||
|
||||
/**
|
||||
* Generic method to create a sensor.
|
||||
* Note that for most cases it is advisable to use
|
||||
* {@link #addRateTotalSensor(String, String, String, RecordingLevel, String...) addRateTotalSensor()}
|
||||
* or {@link #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...) addLatencyRateTotalSensor()}
|
||||
* to ensure metric name well-formedness and conformity with the rest of the Kafka Streams code base.
|
||||
* However, if the above two methods are not sufficient, this method can also be used.
|
||||
*
|
||||
* @param name name of the sensor.
|
||||
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor
|
||||
* @return The added sensor.
|
||||
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
|
||||
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
|
||||
* @see #removeSensor(Sensor)
|
||||
*/
|
||||
Sensor addSensor(final String name,
|
||||
final Sensor.RecordingLevel recordingLevel);
|
||||
|
||||
/**
|
||||
* Generic method to create a sensor with parent sensors.
|
||||
* Note that for most cases it is advisable to use
|
||||
* {@link #addRateTotalSensor(String, String, String, RecordingLevel, String...) addRateTotalSensor()}
|
||||
* or {@link #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...) addLatencyRateTotalSensor()}
|
||||
* to ensure metric name well-formedness and conformity with the rest of the Kafka Streams code base.
|
||||
* However, if the above two methods are not sufficient, this method can also be used.
|
||||
*
|
||||
* @param name name of the sensor
|
||||
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor
|
||||
* @return The added sensor.
|
||||
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
|
||||
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
|
||||
* @see #removeSensor(Sensor)
|
||||
*/
|
||||
Sensor addSensor(final String name,
|
||||
final Sensor.RecordingLevel recordingLevel,
|
||||
final Sensor... parents);
|
||||
|
||||
/**
|
||||
* Remove a sensor.
|
||||
* @param sensor sensor to be removed
|
||||
*/
|
||||
void removeSensor(final Sensor sensor);
|
||||
}
|
||||
|
||||
|
||||
772
streams/src/main/java/org/apache/kafka/streams/Topology.java
Normal file
772
streams/src/main/java/org/apache/kafka/streams/Topology.java
Normal file
@@ -0,0 +1,772 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import org.apache.kafka.common.serialization.Deserializer;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
import org.apache.kafka.streams.errors.TopologyException;
|
||||
import org.apache.kafka.streams.kstream.KStream;
|
||||
import org.apache.kafka.streams.kstream.KTable;
|
||||
import org.apache.kafka.streams.processor.Processor;
|
||||
import org.apache.kafka.streams.processor.ProcessorSupplier;
|
||||
import org.apache.kafka.streams.processor.StateStore;
|
||||
import org.apache.kafka.streams.processor.StreamPartitioner;
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
import org.apache.kafka.streams.processor.TopicNameExtractor;
|
||||
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
|
||||
import org.apache.kafka.streams.processor.internals.ProcessorNode;
|
||||
import org.apache.kafka.streams.processor.internals.ProcessorTopology;
|
||||
import org.apache.kafka.streams.processor.internals.SinkNode;
|
||||
import org.apache.kafka.streams.processor.internals.SourceNode;
|
||||
import org.apache.kafka.streams.state.StoreBuilder;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* A logical representation of a {@link ProcessorTopology}.
|
||||
* A topology is an acyclic graph of sources, processors, and sinks.
|
||||
* A {@link SourceNode source} is a node in the graph that consumes one or more Kafka topics and forwards them to its
|
||||
* successor nodes.
|
||||
* A {@link Processor processor} is a node in the graph that receives input records from upstream nodes, processes the
|
||||
* records, and optionally forwarding new records to one or all of its downstream nodes.
|
||||
* Finally, a {@link SinkNode sink} is a node in the graph that receives records from upstream nodes and writes them to
|
||||
* a Kafka topic.
|
||||
* A {@code Topology} allows you to construct an acyclic graph of these nodes, and then passed into a new
|
||||
* {@link KafkaStreams} instance that will then {@link KafkaStreams#start() begin consuming, processing, and producing
|
||||
* records}.
|
||||
*/
|
||||
public class Topology {
|
||||
|
||||
final InternalTopologyBuilder internalTopologyBuilder = new InternalTopologyBuilder();
|
||||
|
||||
/**
|
||||
* Sets the {@code auto.offset.reset} configuration when
|
||||
* {@link #addSource(AutoOffsetReset, String, String...) adding a source processor} or when creating {@link KStream}
|
||||
* or {@link KTable} via {@link StreamsBuilder}.
|
||||
*/
|
||||
public enum AutoOffsetReset {
|
||||
EARLIEST, LATEST
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
|
||||
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
|
||||
*
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param topics the name of one or more Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final String name,
|
||||
final String... topics) {
|
||||
internalTopologyBuilder.addSource(null, name, null, null, null, topics);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes from topics matching the given pattern
|
||||
* and forward the records to child processor and/or sink nodes.
|
||||
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
|
||||
*
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final String name,
|
||||
final Pattern topicPattern) {
|
||||
internalTopologyBuilder.addSource(null, name, null, null, null, topicPattern);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
|
||||
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
|
||||
*
|
||||
* @param offsetReset the auto offset reset policy to use for this source if no committed offsets found; acceptable values earliest or latest
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param topics the name of one or more Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
|
||||
final String name,
|
||||
final String... topics) {
|
||||
internalTopologyBuilder.addSource(offsetReset, name, null, null, null, topics);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes from topics matching the given pattern
|
||||
* and forward the records to child processor and/or sink nodes.
|
||||
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
|
||||
*
|
||||
* @param offsetReset the auto offset reset policy value for this source if no committed offsets found; acceptable values earliest or latest.
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
|
||||
final String name,
|
||||
final Pattern topicPattern) {
|
||||
internalTopologyBuilder.addSource(offsetReset, name, null, null, null, topicPattern);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
|
||||
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
*
|
||||
* @param timestampExtractor the stateless timestamp extractor used for this source,
|
||||
* if not specified the default extractor defined in the configs will be used
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param topics the name of one or more Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final TimestampExtractor timestampExtractor,
|
||||
final String name,
|
||||
final String... topics) {
|
||||
internalTopologyBuilder.addSource(null, name, timestampExtractor, null, null, topics);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes from topics matching the given pattern
|
||||
* and forward the records to child processor and/or sink nodes.
|
||||
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
*
|
||||
* @param timestampExtractor the stateless timestamp extractor used for this source,
|
||||
* if not specified the default extractor defined in the configs will be used
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final TimestampExtractor timestampExtractor,
|
||||
final String name,
|
||||
final Pattern topicPattern) {
|
||||
internalTopologyBuilder.addSource(null, name, timestampExtractor, null, null, topicPattern);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
|
||||
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
*
|
||||
* @param offsetReset the auto offset reset policy to use for this source if no committed offsets found;
|
||||
* acceptable values earliest or latest
|
||||
* @param timestampExtractor the stateless timestamp extractor used for this source,
|
||||
* if not specified the default extractor defined in the configs will be used
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param topics the name of one or more Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
|
||||
final TimestampExtractor timestampExtractor,
|
||||
final String name,
|
||||
final String... topics) {
|
||||
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, null, null, topics);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes from topics matching the given pattern and forward the records to child processor
|
||||
* and/or sink nodes.
|
||||
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
*
|
||||
* @param offsetReset the auto offset reset policy value for this source if no committed offsets found;
|
||||
* acceptable values earliest or latest.
|
||||
* @param timestampExtractor the stateless timestamp extractor used for this source,
|
||||
* if not specified the default extractor defined in the configs will be used
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
|
||||
final TimestampExtractor timestampExtractor,
|
||||
final String name,
|
||||
final Pattern topicPattern) {
|
||||
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, null, null, topicPattern);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes the named topics and forwards the records to child processor and/or sink nodes.
|
||||
* The source will use the specified key and value deserializers.
|
||||
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
|
||||
*
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
|
||||
* @param keyDeserializer key deserializer used to read this source, if not specified the default
|
||||
* key deserializer defined in the configs will be used
|
||||
* @param valueDeserializer value deserializer used to read this source,
|
||||
* if not specified the default value deserializer defined in the configs will be used
|
||||
* @param topics the name of one or more Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
public synchronized Topology addSource(final String name,
|
||||
final Deserializer keyDeserializer,
|
||||
final Deserializer valueDeserializer,
|
||||
final String... topics) {
|
||||
internalTopologyBuilder.addSource(null, name, null, keyDeserializer, valueDeserializer, topics);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
|
||||
* and/or sink nodes.
|
||||
* The source will use the specified key and value deserializers.
|
||||
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
|
||||
* topics that share the same key-value data format.
|
||||
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
|
||||
*
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
|
||||
* @param keyDeserializer key deserializer used to read this source, if not specified the default
|
||||
* key deserializer defined in the configs will be used
|
||||
* @param valueDeserializer value deserializer used to read this source,
|
||||
* if not specified the default value deserializer defined in the configs will be used
|
||||
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by name
|
||||
*/
|
||||
public synchronized Topology addSource(final String name,
|
||||
final Deserializer keyDeserializer,
|
||||
final Deserializer valueDeserializer,
|
||||
final Pattern topicPattern) {
|
||||
internalTopologyBuilder.addSource(null, name, null, keyDeserializer, valueDeserializer, topicPattern);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
|
||||
* and/or sink nodes.
|
||||
* The source will use the specified key and value deserializers.
|
||||
* The provided de-/serializers will be used for all the specified topics, so care should be taken when specifying
|
||||
* topics that share the same key-value data format.
|
||||
*
|
||||
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
|
||||
* acceptable values are earliest or latest
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
|
||||
* @param keyDeserializer key deserializer used to read this source, if not specified the default
|
||||
* key deserializer defined in the configs will be used
|
||||
* @param valueDeserializer value deserializer used to read this source,
|
||||
* if not specified the default value deserializer defined in the configs will be used
|
||||
* @param topics the name of one or more Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by name
|
||||
*/
|
||||
@SuppressWarnings("overloads")
|
||||
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
|
||||
final String name,
|
||||
final Deserializer keyDeserializer,
|
||||
final Deserializer valueDeserializer,
|
||||
final String... topics) {
|
||||
internalTopologyBuilder.addSource(offsetReset, name, null, keyDeserializer, valueDeserializer, topics);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
|
||||
* and/or sink nodes.
|
||||
* The source will use the specified key and value deserializers.
|
||||
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
|
||||
* topics that share the same key-value data format.
|
||||
*
|
||||
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
|
||||
* acceptable values are earliest or latest
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
|
||||
* @param keyDeserializer key deserializer used to read this source, if not specified the default
|
||||
* key deserializer defined in the configs will be used
|
||||
* @param valueDeserializer value deserializer used to read this source,
|
||||
* if not specified the default value deserializer defined in the configs will be used
|
||||
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by name
|
||||
*/
|
||||
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
|
||||
final String name,
|
||||
final Deserializer keyDeserializer,
|
||||
final Deserializer valueDeserializer,
|
||||
final Pattern topicPattern) {
|
||||
internalTopologyBuilder.addSource(offsetReset, name, null, keyDeserializer, valueDeserializer, topicPattern);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes the named topics and forwards the records to child processor and/or sink nodes.
|
||||
* The source will use the specified key and value deserializers.
|
||||
*
|
||||
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
|
||||
* acceptable values are earliest or latest.
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param timestampExtractor the stateless timestamp extractor used for this source,
|
||||
* if not specified the default extractor defined in the configs will be used
|
||||
* @param keyDeserializer key deserializer used to read this source, if not specified the default
|
||||
* key deserializer defined in the configs will be used
|
||||
* @param valueDeserializer value deserializer used to read this source,
|
||||
* if not specified the default value deserializer defined in the configs will be used
|
||||
* @param topics the name of one or more Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by another source
|
||||
*/
|
||||
@SuppressWarnings("overloads")
|
||||
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
|
||||
final String name,
|
||||
final TimestampExtractor timestampExtractor,
|
||||
final Deserializer keyDeserializer,
|
||||
final Deserializer valueDeserializer,
|
||||
final String... topics) {
|
||||
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, keyDeserializer, valueDeserializer, topics);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
|
||||
* and/or sink nodes.
|
||||
* The source will use the specified key and value deserializers.
|
||||
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
|
||||
* topics that share the same key-value data format.
|
||||
*
|
||||
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
|
||||
* acceptable values are earliest or latest
|
||||
* @param name the unique name of the source used to reference this node when
|
||||
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
|
||||
* @param timestampExtractor the stateless timestamp extractor used for this source,
|
||||
* if not specified the default extractor defined in the configs will be used
|
||||
* @param keyDeserializer key deserializer used to read this source, if not specified the default
|
||||
* key deserializer defined in the configs will be used
|
||||
* @param valueDeserializer value deserializer used to read this source,
|
||||
* if not specified the default value deserializer defined in the configs will be used
|
||||
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
|
||||
* @return itself
|
||||
* @throws TopologyException if processor is already added or if topics have already been registered by name
|
||||
*/
|
||||
@SuppressWarnings("overloads")
|
||||
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
|
||||
final String name,
|
||||
final TimestampExtractor timestampExtractor,
|
||||
final Deserializer keyDeserializer,
|
||||
final Deserializer valueDeserializer,
|
||||
final Pattern topicPattern) {
|
||||
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, keyDeserializer, valueDeserializer, topicPattern);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
|
||||
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
*
|
||||
* @param name the unique name of the sink
|
||||
* @param topic the name of the Kafka topic to which this sink should write its records
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
|
||||
* and write to its topic
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
* @see #addSink(String, String, StreamPartitioner, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
|
||||
*/
|
||||
public synchronized Topology addSink(final String name,
|
||||
final String topic,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addSink(name, topic, null, null, null, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic,
|
||||
* using the supplied partitioner.
|
||||
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
* <p>
|
||||
* The sink will also use the specified {@link StreamPartitioner} to determine how records are distributed among
|
||||
* the named Kafka topic's partitions.
|
||||
* Such control is often useful with topologies that use {@link #addStateStore(StoreBuilder, String...) state
|
||||
* stores} in its processors.
|
||||
* In most other cases, however, a partitioner needs not be specified and Kafka will automatically distribute
|
||||
* records among partitions using Kafka's default partitioning logic.
|
||||
*
|
||||
* @param name the unique name of the sink
|
||||
* @param topic the name of the Kafka topic to which this sink should write its records
|
||||
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
|
||||
* and write to its topic
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
* @see #addSink(String, String, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
|
||||
*/
|
||||
public synchronized <K, V> Topology addSink(final String name,
|
||||
final String topic,
|
||||
final StreamPartitioner<? super K, ? super V> partitioner,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addSink(name, topic, null, null, partitioner, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
|
||||
* The sink will use the specified key and value serializers.
|
||||
*
|
||||
* @param name the unique name of the sink
|
||||
* @param topic the name of the Kafka topic to which this sink should write its records
|
||||
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
|
||||
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}
|
||||
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
|
||||
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
|
||||
* and write to its topic
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
* @see #addSink(String, String, String...)
|
||||
* @see #addSink(String, String, StreamPartitioner, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
|
||||
*/
|
||||
public synchronized <K, V> Topology addSink(final String name,
|
||||
final String topic,
|
||||
final Serializer<K> keySerializer,
|
||||
final Serializer<V> valueSerializer,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addSink(name, topic, keySerializer, valueSerializer, null, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
|
||||
* The sink will use the specified key and value serializers, and the supplied partitioner.
|
||||
*
|
||||
* @param name the unique name of the sink
|
||||
* @param topic the name of the Kafka topic to which this sink should write its records
|
||||
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
|
||||
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}
|
||||
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
|
||||
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}
|
||||
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
|
||||
* and write to its topic
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
* @see #addSink(String, String, String...)
|
||||
* @see #addSink(String, String, StreamPartitioner, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, String...)
|
||||
*/
|
||||
public synchronized <K, V> Topology addSink(final String name,
|
||||
final String topic,
|
||||
final Serializer<K> keySerializer,
|
||||
final Serializer<V> valueSerializer,
|
||||
final StreamPartitioner<? super K, ? super V> partitioner,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addSink(name, topic, keySerializer, valueSerializer, partitioner, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
|
||||
* The topics that it may ever send to should be pre-created.
|
||||
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
*
|
||||
* @param name the unique name of the sink
|
||||
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
|
||||
* and dynamically write to topics
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
* @see #addSink(String, String, StreamPartitioner, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
|
||||
*/
|
||||
public synchronized <K, V> Topology addSink(final String name,
|
||||
final TopicNameExtractor<K, V> topicExtractor,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addSink(name, topicExtractor, null, null, null, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor},
|
||||
* using the supplied partitioner.
|
||||
* The topics that it may ever send to should be pre-created.
|
||||
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
|
||||
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}.
|
||||
* <p>
|
||||
* The sink will also use the specified {@link StreamPartitioner} to determine how records are distributed among
|
||||
* the named Kafka topic's partitions.
|
||||
* Such control is often useful with topologies that use {@link #addStateStore(StoreBuilder, String...) state
|
||||
* stores} in its processors.
|
||||
* In most other cases, however, a partitioner needs not be specified and Kafka will automatically distribute
|
||||
* records among partitions using Kafka's default partitioning logic.
|
||||
*
|
||||
* @param name the unique name of the sink
|
||||
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
|
||||
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
|
||||
* and dynamically write to topics
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
* @see #addSink(String, String, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
|
||||
*/
|
||||
public synchronized <K, V> Topology addSink(final String name,
|
||||
final TopicNameExtractor<K, V> topicExtractor,
|
||||
final StreamPartitioner<? super K, ? super V> partitioner,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addSink(name, topicExtractor, null, null, partitioner, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
|
||||
* The topics that it may ever send to should be pre-created.
|
||||
* The sink will use the specified key and value serializers.
|
||||
*
|
||||
* @param name the unique name of the sink
|
||||
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
|
||||
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
|
||||
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}
|
||||
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
|
||||
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
|
||||
* and dynamically write to topics
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
* @see #addSink(String, String, String...)
|
||||
* @see #addSink(String, String, StreamPartitioner, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
|
||||
*/
|
||||
public synchronized <K, V> Topology addSink(final String name,
|
||||
final TopicNameExtractor<K, V> topicExtractor,
|
||||
final Serializer<K> keySerializer,
|
||||
final Serializer<V> valueSerializer,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addSink(name, topicExtractor, keySerializer, valueSerializer, null, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
|
||||
* The topics that it may ever send to should be pre-created.
|
||||
* The sink will use the specified key and value serializers, and the supplied partitioner.
|
||||
*
|
||||
* @param name the unique name of the sink
|
||||
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
|
||||
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
|
||||
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}
|
||||
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
|
||||
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
|
||||
* {@link StreamsConfig stream configuration}
|
||||
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
|
||||
* and dynamically write to topics
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
* @see #addSink(String, String, String...)
|
||||
* @see #addSink(String, String, StreamPartitioner, String...)
|
||||
* @see #addSink(String, String, Serializer, Serializer, String...)
|
||||
*/
|
||||
public synchronized <K, V> Topology addSink(final String name,
|
||||
final TopicNameExtractor<K, V> topicExtractor,
|
||||
final Serializer<K> keySerializer,
|
||||
final Serializer<V> valueSerializer,
|
||||
final StreamPartitioner<? super K, ? super V> partitioner,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addSink(name, topicExtractor, keySerializer, valueSerializer, partitioner, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new processor node that receives and processes records output by one or more parent source or processor
|
||||
* node.
|
||||
* Any new record output by this processor will be forwarded to its child processor or sink nodes.
|
||||
*
|
||||
* @param name the unique name of the processor node
|
||||
* @param supplier the supplier used to obtain this node's {@link Processor} instance
|
||||
* @param parentNames the name of one or more source or processor nodes whose output records this processor should receive
|
||||
* and process
|
||||
* @return itself
|
||||
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
|
||||
*/
|
||||
public synchronized Topology addProcessor(final String name,
|
||||
final ProcessorSupplier supplier,
|
||||
final String... parentNames) {
|
||||
internalTopologyBuilder.addProcessor(name, supplier, parentNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a state store.
|
||||
*
|
||||
* @param storeBuilder the storeBuilder used to obtain this state store {@link StateStore} instance
|
||||
* @param processorNames the names of the processors that should be able to access the provided store
|
||||
* @return itself
|
||||
* @throws TopologyException if state store supplier is already added
|
||||
*/
|
||||
public synchronized Topology addStateStore(final StoreBuilder storeBuilder,
|
||||
final String... processorNames) {
|
||||
internalTopologyBuilder.addStateStore(storeBuilder, processorNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a global {@link StateStore} to the topology.
|
||||
* The {@link StateStore} sources its data from all partitions of the provided input topic.
|
||||
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
|
||||
* <p>
|
||||
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
|
||||
* of the input topic.
|
||||
* <p>
|
||||
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
|
||||
* records forwarded from the {@link SourceNode}.
|
||||
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
|
||||
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
|
||||
*
|
||||
* @param storeBuilder user defined state store builder
|
||||
* @param sourceName name of the {@link SourceNode} that will be automatically added
|
||||
* @param keyDeserializer the {@link Deserializer} to deserialize keys with
|
||||
* @param valueDeserializer the {@link Deserializer} to deserialize values with
|
||||
* @param topic the topic to source the data from
|
||||
* @param processorName the name of the {@link ProcessorSupplier}
|
||||
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
|
||||
* @return itself
|
||||
* @throws TopologyException if the processor of state is already registered
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public synchronized Topology addGlobalStore(final StoreBuilder storeBuilder,
|
||||
final String sourceName,
|
||||
final Deserializer keyDeserializer,
|
||||
final Deserializer valueDeserializer,
|
||||
final String topic,
|
||||
final String processorName,
|
||||
final ProcessorSupplier stateUpdateSupplier) {
|
||||
internalTopologyBuilder.addGlobalStore(storeBuilder, sourceName, null, keyDeserializer,
|
||||
valueDeserializer, topic, processorName, stateUpdateSupplier);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a global {@link StateStore} to the topology.
|
||||
* The {@link StateStore} sources its data from all partitions of the provided input topic.
|
||||
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
|
||||
* <p>
|
||||
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
|
||||
* of the input topic.
|
||||
* <p>
|
||||
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
|
||||
* records forwarded from the {@link SourceNode}.
|
||||
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
|
||||
*
|
||||
* @param storeBuilder user defined key value store builder
|
||||
* @param sourceName name of the {@link SourceNode} that will be automatically added
|
||||
* @param timestampExtractor the stateless timestamp extractor used for this source,
|
||||
* if not specified the default extractor defined in the configs will be used
|
||||
* @param keyDeserializer the {@link Deserializer} to deserialize keys with
|
||||
* @param valueDeserializer the {@link Deserializer} to deserialize values with
|
||||
* @param topic the topic to source the data from
|
||||
* @param processorName the name of the {@link ProcessorSupplier}
|
||||
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
|
||||
* @return itself
|
||||
* @throws TopologyException if the processor of state is already registered
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public synchronized Topology addGlobalStore(final StoreBuilder storeBuilder,
|
||||
final String sourceName,
|
||||
final TimestampExtractor timestampExtractor,
|
||||
final Deserializer keyDeserializer,
|
||||
final Deserializer valueDeserializer,
|
||||
final String topic,
|
||||
final String processorName,
|
||||
final ProcessorSupplier stateUpdateSupplier) {
|
||||
internalTopologyBuilder.addGlobalStore(storeBuilder, sourceName, timestampExtractor, keyDeserializer,
|
||||
valueDeserializer, topic, processorName, stateUpdateSupplier);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connects the processor and the state stores.
|
||||
*
|
||||
* @param processorName the name of the processor
|
||||
* @param stateStoreNames the names of state stores that the processor uses
|
||||
* @return itself
|
||||
* @throws TopologyException if the processor or a state store is unknown
|
||||
*/
|
||||
public synchronized Topology connectProcessorAndStateStores(final String processorName,
|
||||
final String... stateStoreNames) {
|
||||
internalTopologyBuilder.connectProcessorAndStateStores(processorName, stateStoreNames);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a description of the specified {@code Topology}.
|
||||
*
|
||||
* @return a description of the topology.
|
||||
*/
|
||||
|
||||
public synchronized TopologyDescription describe() {
|
||||
return internalTopologyBuilder.describe();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams;
|
||||
|
||||
import org.apache.kafka.streams.processor.TopicNameExtractor;
|
||||
import org.apache.kafka.streams.processor.internals.StreamTask;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* A meta representation of a {@link Topology topology}.
|
||||
* <p>
|
||||
* The nodes of a topology are grouped into {@link Subtopology sub-topologies} if they are connected.
|
||||
* In contrast, two sub-topologies are not connected but can be linked to each other via topics, i.e., if one
|
||||
* sub-topology {@link Topology#addSink(String, String, String...) writes} into a topic and another sub-topology
|
||||
* {@link Topology#addSource(String, String...) reads} from the same topic.
|
||||
* <p>
|
||||
* When {@link KafkaStreams#start()} is called, different sub-topologies will be constructed and executed as independent
|
||||
* {@link StreamTask tasks}.
|
||||
*/
|
||||
public interface TopologyDescription {
|
||||
/**
|
||||
* A connected sub-graph of a {@link Topology}.
|
||||
* <p>
|
||||
* Nodes of a {@code Subtopology} are connected {@link Topology#addProcessor(String,
|
||||
* org.apache.kafka.streams.processor.ProcessorSupplier, String...) directly} or indirectly via
|
||||
* {@link Topology#connectProcessorAndStateStores(String, String...) state stores}
|
||||
* (i.e., if multiple processors share the same state).
|
||||
*/
|
||||
interface Subtopology {
|
||||
/**
|
||||
* Internally assigned unique ID.
|
||||
* @return the ID of the sub-topology
|
||||
*/
|
||||
int id();
|
||||
|
||||
/**
|
||||
* All nodes of this sub-topology.
|
||||
* @return set of all nodes within the sub-topology
|
||||
*/
|
||||
Set<Node> nodes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a {@link Topology#addGlobalStore(org.apache.kafka.streams.state.StoreBuilder, String,
|
||||
* org.apache.kafka.common.serialization.Deserializer, org.apache.kafka.common.serialization.Deserializer, String,
|
||||
* String, org.apache.kafka.streams.processor.ProcessorSupplier) global store}.
|
||||
* Adding a global store results in adding a source node and one stateful processor node.
|
||||
* Note, that all added global stores form a single unit (similar to a {@link Subtopology}) even if different
|
||||
* global stores are not connected to each other.
|
||||
* Furthermore, global stores are available to all processors without connecting them explicitly, and thus global
|
||||
* stores will never be part of any {@link Subtopology}.
|
||||
*/
|
||||
interface GlobalStore {
|
||||
/**
|
||||
* The source node reading from a "global" topic.
|
||||
* @return the "global" source node
|
||||
*/
|
||||
Source source();
|
||||
|
||||
/**
|
||||
* The processor node maintaining the global store.
|
||||
* @return the "global" processor node
|
||||
*/
|
||||
Processor processor();
|
||||
|
||||
int id();
|
||||
}
|
||||
|
||||
/**
|
||||
* A node of a topology. Can be a source, sink, or processor node.
|
||||
*/
|
||||
interface Node {
|
||||
/**
|
||||
* The name of the node. Will never be {@code null}.
|
||||
* @return the name of the node
|
||||
*/
|
||||
String name();
|
||||
/**
|
||||
* The predecessors of this node within a sub-topology.
|
||||
* Note, sources do not have any predecessors.
|
||||
* Will never be {@code null}.
|
||||
* @return set of all predecessors
|
||||
*/
|
||||
Set<Node> predecessors();
|
||||
/**
|
||||
* The successor of this node within a sub-topology.
|
||||
* Note, sinks do not have any successors.
|
||||
* Will never be {@code null}.
|
||||
* @return set of all successor
|
||||
*/
|
||||
Set<Node> successors();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A source node of a topology.
|
||||
*/
|
||||
interface Source extends Node {
|
||||
/**
|
||||
* The topic names this source node is reading from.
|
||||
* @return comma separated list of topic names or pattern (as String)
|
||||
* @deprecated use {@link #topicSet()} or {@link #topicPattern()} instead
|
||||
*/
|
||||
@Deprecated
|
||||
String topics();
|
||||
|
||||
/**
|
||||
* The topic names this source node is reading from.
|
||||
* @return a set of topic names
|
||||
*/
|
||||
Set<String> topicSet();
|
||||
|
||||
/**
|
||||
* The pattern used to match topic names that is reading from.
|
||||
* @return the pattern used to match topic names
|
||||
*/
|
||||
Pattern topicPattern();
|
||||
}
|
||||
|
||||
/**
|
||||
* A processor node of a topology.
|
||||
*/
|
||||
interface Processor extends Node {
|
||||
/**
|
||||
* The names of all connected stores.
|
||||
* @return set of store names
|
||||
*/
|
||||
Set<String> stores();
|
||||
}
|
||||
|
||||
/**
|
||||
* A sink node of a topology.
|
||||
*/
|
||||
interface Sink extends Node {
|
||||
/**
|
||||
* The topic name this sink node is writing to.
|
||||
* Could be {@code null} if the topic name can only be dynamically determined based on {@link TopicNameExtractor}
|
||||
* @return a topic name
|
||||
*/
|
||||
String topic();
|
||||
|
||||
/**
|
||||
* The {@link TopicNameExtractor} class that this sink node uses to dynamically extract the topic name to write to.
|
||||
* Could be {@code null} if the topic name is not dynamically determined.
|
||||
* @return the {@link TopicNameExtractor} class used get the topic name
|
||||
*/
|
||||
TopicNameExtractor topicNameExtractor();
|
||||
}
|
||||
|
||||
/**
|
||||
* All sub-topologies of the represented topology.
|
||||
* @return set of all sub-topologies
|
||||
*/
|
||||
Set<Subtopology> subtopologies();
|
||||
|
||||
/**
|
||||
* All global stores of the represented topology.
|
||||
* @return set of all global stores
|
||||
*/
|
||||
Set<GlobalStore> globalStores();
|
||||
|
||||
}
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
/**
|
||||
* Indicates that none of the specified {@link org.apache.kafka.streams.StreamsConfig#BOOTSTRAP_SERVERS_CONFIG brokers}
|
||||
* could be found.
|
||||
*
|
||||
* @see org.apache.kafka.streams.StreamsConfig
|
||||
*/
|
||||
public class BrokerNotFoundException extends StreamsException {
|
||||
|
||||
private final static long serialVersionUID = 1L;
|
||||
|
||||
public BrokerNotFoundException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public BrokerNotFoundException(final String message, final Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
|
||||
public BrokerNotFoundException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
import java.util.Map;
|
||||
import org.apache.kafka.clients.producer.ProducerRecord;
|
||||
|
||||
/**
|
||||
* {@code ProductionExceptionHandler} that always instructs streams to fail when an exception
|
||||
* happens while attempting to produce result records.
|
||||
*/
|
||||
public class DefaultProductionExceptionHandler implements ProductionExceptionHandler {
|
||||
@Override
|
||||
public ProductionExceptionHandlerResponse handle(final ProducerRecord<byte[], byte[]> record,
|
||||
final Exception exception) {
|
||||
return ProductionExceptionHandlerResponse.FAIL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(final Map<String, ?> configs) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
||||
import org.apache.kafka.common.Configurable;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
|
||||
/**
|
||||
* Interface that specifies how an exception from source node deserialization
|
||||
* (e.g., reading from Kafka) should be handled.
|
||||
*/
|
||||
public interface DeserializationExceptionHandler extends Configurable {
|
||||
/**
|
||||
* Inspect a record and the exception received.
|
||||
* @param context processor context
|
||||
* @param record record that failed deserialization
|
||||
* @param exception the actual exception
|
||||
*/
|
||||
DeserializationHandlerResponse handle(final ProcessorContext context,
|
||||
final ConsumerRecord<byte[], byte[]> record,
|
||||
final Exception exception);
|
||||
|
||||
/**
|
||||
* Enumeration that describes the response from the exception handler.
|
||||
*/
|
||||
enum DeserializationHandlerResponse {
|
||||
/* continue with processing */
|
||||
CONTINUE(0, "CONTINUE"),
|
||||
/* fail the processing and stop */
|
||||
FAIL(1, "FAIL");
|
||||
|
||||
/** an english description of the api--this is for debugging and can change */
|
||||
public final String name;
|
||||
|
||||
/** the permanent and immutable id of an API--this can't change ever */
|
||||
public final int id;
|
||||
|
||||
DeserializationHandlerResponse(final int id, final String name) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
/**
|
||||
* Indicates that there was a problem when trying to access a
|
||||
* {@link org.apache.kafka.streams.processor.StateStore StateStore}, i.e, the Store is no longer valid because it is
|
||||
* closed or doesn't exist any more due to a rebalance.
|
||||
* <p>
|
||||
* These exceptions may be transient, i.e., during a rebalance it won't be possible to query the stores as they are
|
||||
* being (re)-initialized. Once the rebalance has completed the stores will be available again. Hence, it is valid
|
||||
* to backoff and retry when handling this exception.
|
||||
*/
|
||||
public class InvalidStateStoreException extends StreamsException {
|
||||
|
||||
private final static long serialVersionUID = 1L;
|
||||
|
||||
public InvalidStateStoreException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public InvalidStateStoreException(final String message, final Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
|
||||
public InvalidStateStoreException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
/**
|
||||
* Indicates that the state store directory lock could not be acquired because another thread holds the lock.
|
||||
*
|
||||
* @see org.apache.kafka.streams.processor.StateStore
|
||||
*/
|
||||
public class LockException extends StreamsException {
|
||||
|
||||
private final static long serialVersionUID = 1L;
|
||||
|
||||
public LockException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public LockException(final String message, final Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
|
||||
public LockException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Deserialization handler that logs a deserialization exception and then
|
||||
* signals the processing pipeline to continue processing more records.
|
||||
*/
|
||||
public class LogAndContinueExceptionHandler implements DeserializationExceptionHandler {
|
||||
private static final Logger log = LoggerFactory.getLogger(LogAndContinueExceptionHandler.class);
|
||||
|
||||
@Override
|
||||
public DeserializationHandlerResponse handle(final ProcessorContext context,
|
||||
final ConsumerRecord<byte[], byte[]> record,
|
||||
final Exception exception) {
|
||||
|
||||
log.warn("Exception caught during Deserialization, " +
|
||||
"taskId: {}, topic: {}, partition: {}, offset: {}",
|
||||
context.taskId(), record.topic(), record.partition(), record.offset(),
|
||||
exception);
|
||||
|
||||
return DeserializationHandlerResponse.CONTINUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(final Map<String, ?> configs) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* Deserialization handler that logs a deserialization exception and then
|
||||
* signals the processing pipeline to stop processing more records and fail.
|
||||
*/
|
||||
public class LogAndFailExceptionHandler implements DeserializationExceptionHandler {
|
||||
private static final Logger log = LoggerFactory.getLogger(LogAndFailExceptionHandler.class);
|
||||
|
||||
@Override
|
||||
public DeserializationHandlerResponse handle(final ProcessorContext context,
|
||||
final ConsumerRecord<byte[], byte[]> record,
|
||||
final Exception exception) {
|
||||
|
||||
log.error("Exception caught during Deserialization, " +
|
||||
"taskId: {}, topic: {}, partition: {}, offset: {}",
|
||||
context.taskId(), record.topic(), record.partition(), record.offset(),
|
||||
exception);
|
||||
|
||||
return DeserializationHandlerResponse.FAIL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void configure(final Map<String, ?> configs) {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
/**
|
||||
* Indicates a processor state operation (e.g. put, get) has failed.
|
||||
*
|
||||
* @see org.apache.kafka.streams.processor.StateStore
|
||||
*/
|
||||
public class ProcessorStateException extends StreamsException {
|
||||
|
||||
private final static long serialVersionUID = 1L;
|
||||
|
||||
public ProcessorStateException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public ProcessorStateException(final String message, final Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
|
||||
public ProcessorStateException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
import org.apache.kafka.clients.producer.ProducerRecord;
|
||||
import org.apache.kafka.common.Configurable;
|
||||
|
||||
/**
|
||||
* Interface that specifies how an exception when attempting to produce a result to
|
||||
* Kafka should be handled.
|
||||
*/
|
||||
public interface ProductionExceptionHandler extends Configurable {
|
||||
/**
|
||||
* Inspect a record that we attempted to produce, and the exception that resulted
|
||||
* from attempting to produce it and determine whether or not to continue processing.
|
||||
*
|
||||
* @param record The record that failed to produce
|
||||
* @param exception The exception that occurred during production
|
||||
*/
|
||||
ProductionExceptionHandlerResponse handle(final ProducerRecord<byte[], byte[]> record,
|
||||
final Exception exception);
|
||||
|
||||
enum ProductionExceptionHandlerResponse {
|
||||
/* continue processing */
|
||||
CONTINUE(0, "CONTINUE"),
|
||||
/* fail processing */
|
||||
FAIL(1, "FAIL");
|
||||
|
||||
/**
|
||||
* an english description of the api--this is for debugging and can change
|
||||
*/
|
||||
public final String name;
|
||||
|
||||
/**
|
||||
* the permanent and immutable id of an API--this can't change ever
|
||||
*/
|
||||
public final int id;
|
||||
|
||||
ProductionExceptionHandlerResponse(final int id,
|
||||
final String name) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
import org.apache.kafka.common.KafkaException;
|
||||
|
||||
/**
|
||||
* {@link StreamsException} is the top-level exception type generated by Kafka Streams.
|
||||
*/
|
||||
public class StreamsException extends KafkaException {
|
||||
|
||||
private final static long serialVersionUID = 1L;
|
||||
|
||||
public StreamsException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public StreamsException(final String message, final Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
|
||||
public StreamsException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
/**
|
||||
* Indicates a run time error incurred while trying to assign
|
||||
* {@link org.apache.kafka.streams.processor.internals.StreamTask stream tasks} to
|
||||
* {@link org.apache.kafka.streams.processor.internals.StreamThread threads}.
|
||||
*/
|
||||
public class TaskAssignmentException extends StreamsException {
|
||||
|
||||
private final static long serialVersionUID = 1L;
|
||||
|
||||
public TaskAssignmentException(final String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public TaskAssignmentException(final String message, final Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
|
||||
public TaskAssignmentException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
/**
|
||||
* Indicates a run time error incurred while trying parse the {@link org.apache.kafka.streams.processor.TaskId task id}
|
||||
* from the read string.
|
||||
*
|
||||
* @see org.apache.kafka.streams.processor.internals.StreamTask
|
||||
*/
|
||||
public class TaskIdFormatException extends StreamsException {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public TaskIdFormatException(final String message) {
|
||||
super("Task id cannot be parsed correctly" + (message == null ? "" : " from " + message));
|
||||
}
|
||||
|
||||
public TaskIdFormatException(final String message, final Throwable throwable) {
|
||||
super("Task id cannot be parsed correctly" + (message == null ? "" : " from " + message), throwable);
|
||||
}
|
||||
|
||||
public TaskIdFormatException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
import org.apache.kafka.common.TopicPartition;
|
||||
import org.apache.kafka.streams.processor.internals.Task;
|
||||
|
||||
/**
|
||||
* Indicates that a task got migrated to another thread.
|
||||
* Thus, the task raising this exception can be cleaned up and closed as "zombie".
|
||||
*/
|
||||
public class TaskMigratedException extends StreamsException {
|
||||
|
||||
private final static long serialVersionUID = 1L;
|
||||
|
||||
private final Task task;
|
||||
|
||||
// this is for unit test only
|
||||
public TaskMigratedException() {
|
||||
super("A task has been migrated unexpectedly", null);
|
||||
|
||||
this.task = null;
|
||||
}
|
||||
|
||||
public TaskMigratedException(final Task task,
|
||||
final TopicPartition topicPartition,
|
||||
final long endOffset,
|
||||
final long pos) {
|
||||
super(String.format("Log end offset of %s should not change while restoring: old end offset %d, current offset %d",
|
||||
topicPartition,
|
||||
endOffset,
|
||||
pos),
|
||||
null);
|
||||
|
||||
this.task = task;
|
||||
}
|
||||
|
||||
public TaskMigratedException(final Task task) {
|
||||
super(String.format("Task %s is unexpectedly closed during processing", task.id()), null);
|
||||
|
||||
this.task = task;
|
||||
}
|
||||
|
||||
public TaskMigratedException(final Task task,
|
||||
final Throwable throwable) {
|
||||
super(String.format("Client request for task %s has been fenced due to a rebalance", task.id()), throwable);
|
||||
|
||||
this.task = task;
|
||||
}
|
||||
|
||||
public Task migratedTask() {
|
||||
return task;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.errors;
|
||||
|
||||
|
||||
/**
|
||||
* Indicates a pre run time error occurred while parsing the {@link org.apache.kafka.streams.Topology logical topology}
|
||||
* to construct the {@link org.apache.kafka.streams.processor.internals.ProcessorTopology physical processor topology}.
|
||||
*/
|
||||
public class TopologyException extends StreamsException {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
public TopologyException(final String message) {
|
||||
super("Invalid topology" + (message == null ? "" : ": " + message));
|
||||
}
|
||||
|
||||
public TopologyException(final String message,
|
||||
final Throwable throwable) {
|
||||
super("Invalid topology" + (message == null ? "" : ": " + message), throwable);
|
||||
}
|
||||
|
||||
public TopologyException(final Throwable throwable) {
|
||||
super(throwable);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.internals;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
|
||||
import static java.lang.String.format;
|
||||
|
||||
public final class ApiUtils {
|
||||
|
||||
private static final String MILLISECOND_VALIDATION_FAIL_MSG_FRMT = "Invalid value for parameter \"%s\" (value was: %s). ";
|
||||
private static final String VALIDATE_MILLISECOND_NULL_SUFFIX = "It shouldn't be null.";
|
||||
private static final String VALIDATE_MILLISECOND_OVERFLOW_SUFFIX = "It can't be converted to milliseconds.";
|
||||
|
||||
private ApiUtils() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that milliseconds from {@code duration} can be retrieved.
|
||||
* @param duration Duration to check.
|
||||
* @param messagePrefix Prefix text for an error message.
|
||||
* @return Milliseconds from {@code duration}.
|
||||
*/
|
||||
public static long validateMillisecondDuration(final Duration duration, final String messagePrefix) {
|
||||
try {
|
||||
if (duration == null) {
|
||||
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_NULL_SUFFIX);
|
||||
}
|
||||
|
||||
return duration.toMillis();
|
||||
} catch (final ArithmeticException e) {
|
||||
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_OVERFLOW_SUFFIX, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates that milliseconds from {@code instant} can be retrieved.
|
||||
* @param instant Instant to check.
|
||||
* @param messagePrefix Prefix text for an error message.
|
||||
* @return Milliseconds from {@code instant}.
|
||||
*/
|
||||
public static long validateMillisecondInstant(final Instant instant, final String messagePrefix) {
|
||||
try {
|
||||
if (instant == null) {
|
||||
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_NULL_SUFFIX);
|
||||
}
|
||||
|
||||
return instant.toEpochMilli();
|
||||
} catch (final ArithmeticException e) {
|
||||
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_OVERFLOW_SUFFIX, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates the prefix message for validateMillisecondXXXXXX() utility
|
||||
* @param value Object to be converted to milliseconds
|
||||
* @param name Object name
|
||||
* @return Error message prefix to use in exception
|
||||
*/
|
||||
public static String prepareMillisCheckFailMsgPrefix(final Object value, final String name) {
|
||||
return format(MILLISECOND_VALIDATION_FAIL_MSG_FRMT, name, value);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.internals;
|
||||
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A {@link StreamsConfig} that does not log its configuration on construction.
|
||||
*
|
||||
* This producer cleaner output for unit tests using the {@code test-utils},
|
||||
* since logging the config is not really valuable in this context.
|
||||
*/
|
||||
public class QuietStreamsConfig extends StreamsConfig {
|
||||
public QuietStreamsConfig(final Map<?, ?> props) {
|
||||
super(props, false);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.internals.metrics;
|
||||
|
||||
import org.apache.kafka.common.metrics.Gauge;
|
||||
import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
|
||||
import org.apache.kafka.streams.KafkaStreams.State;
|
||||
import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Properties;
|
||||
|
||||
public class ClientMetrics {
|
||||
private ClientMetrics() {}
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(ClientMetrics.class);
|
||||
private static final String VERSION = "version";
|
||||
private static final String COMMIT_ID = "commit-id";
|
||||
private static final String APPLICATION_ID = "application-id";
|
||||
private static final String TOPOLOGY_DESCRIPTION = "topology-description";
|
||||
private static final String STATE = "state";
|
||||
private static final String VERSION_FROM_FILE;
|
||||
private static final String COMMIT_ID_FROM_FILE;
|
||||
private static final String DEFAULT_VALUE = "unknown";
|
||||
|
||||
static {
|
||||
final Properties props = new Properties();
|
||||
try (InputStream resourceStream = ClientMetrics.class.getResourceAsStream(
|
||||
"/kafka/kafka-streams-version.properties")) {
|
||||
|
||||
props.load(resourceStream);
|
||||
} catch (final Exception exception) {
|
||||
log.warn("Error while loading kafka-streams-version.properties", exception);
|
||||
}
|
||||
VERSION_FROM_FILE = props.getProperty("version", DEFAULT_VALUE).trim();
|
||||
COMMIT_ID_FROM_FILE = props.getProperty("commitId", DEFAULT_VALUE).trim();
|
||||
}
|
||||
|
||||
private static final String VERSION_DESCRIPTION = "The version of the Kafka Streams client";
|
||||
private static final String COMMIT_ID_DESCRIPTION = "The version control commit ID of the Kafka Streams client";
|
||||
private static final String APPLICATION_ID_DESCRIPTION = "The application ID of the Kafka Streams client";
|
||||
private static final String TOPOLOGY_DESCRIPTION_DESCRIPTION =
|
||||
"The description of the topology executed in the Kafka Streams client";
|
||||
private static final String STATE_DESCRIPTION = "The state of the Kafka Streams client";
|
||||
|
||||
public static String version() {
|
||||
return VERSION_FROM_FILE;
|
||||
}
|
||||
|
||||
public static String commitId() {
|
||||
return COMMIT_ID_FROM_FILE;
|
||||
}
|
||||
|
||||
public static void addVersionMetric(final StreamsMetricsImpl streamsMetrics) {
|
||||
streamsMetrics.addClientLevelImmutableMetric(
|
||||
VERSION,
|
||||
VERSION_DESCRIPTION,
|
||||
RecordingLevel.INFO,
|
||||
VERSION_FROM_FILE
|
||||
);
|
||||
}
|
||||
|
||||
public static void addCommitIdMetric(final StreamsMetricsImpl streamsMetrics) {
|
||||
streamsMetrics.addClientLevelImmutableMetric(
|
||||
COMMIT_ID,
|
||||
COMMIT_ID_DESCRIPTION,
|
||||
RecordingLevel.INFO,
|
||||
COMMIT_ID_FROM_FILE
|
||||
);
|
||||
}
|
||||
|
||||
public static void addApplicationIdMetric(final StreamsMetricsImpl streamsMetrics, final String applicationId) {
|
||||
streamsMetrics.addClientLevelImmutableMetric(
|
||||
APPLICATION_ID,
|
||||
APPLICATION_ID_DESCRIPTION,
|
||||
RecordingLevel.INFO,
|
||||
applicationId
|
||||
);
|
||||
}
|
||||
|
||||
public static void addTopologyDescriptionMetric(final StreamsMetricsImpl streamsMetrics,
|
||||
final String topologyDescription) {
|
||||
streamsMetrics.addClientLevelImmutableMetric(
|
||||
TOPOLOGY_DESCRIPTION,
|
||||
TOPOLOGY_DESCRIPTION_DESCRIPTION,
|
||||
RecordingLevel.INFO,
|
||||
topologyDescription
|
||||
);
|
||||
}
|
||||
|
||||
public static void addStateMetric(final StreamsMetricsImpl streamsMetrics,
|
||||
final Gauge<State> stateProvider) {
|
||||
streamsMetrics.addClientLevelMutableMetric(
|
||||
STATE,
|
||||
STATE_DESCRIPTION,
|
||||
RecordingLevel.INFO,
|
||||
stateProvider
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* The {@code Aggregator} interface for aggregating values of the given key.
|
||||
* This is a generalization of {@link Reducer} and allows to have different types for input value and aggregation
|
||||
* result.
|
||||
* {@code Aggregator} is used in combination with {@link Initializer} that provides an initial aggregation value.
|
||||
* <p>
|
||||
* {@code Aggregator} can be used to implement aggregation functions like count.
|
||||
|
||||
* @param <K> key type
|
||||
* @param <V> input value type
|
||||
* @param <VA> aggregate value type
|
||||
* @see Initializer
|
||||
* @see KGroupedStream#aggregate(Initializer, Aggregator)
|
||||
* @see KGroupedStream#aggregate(Initializer, Aggregator, Materialized)
|
||||
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator)
|
||||
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator, Materialized)
|
||||
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger)
|
||||
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger, Materialized)
|
||||
* @see Reducer
|
||||
*/
|
||||
public interface Aggregator<K, V, VA> {
|
||||
|
||||
/**
|
||||
* Compute a new aggregate from the key and value of a record and the current aggregate of the same key.
|
||||
*
|
||||
* @param key the key of the record
|
||||
* @param value the value of the record
|
||||
* @param aggregate the current aggregate value
|
||||
* @return the new aggregate value
|
||||
*/
|
||||
VA apply(final K key, final V value, final VA aggregate);
|
||||
}
|
||||
@@ -0,0 +1,286 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StoreQueryParameters;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
|
||||
/**
|
||||
* {@code CogroupedKStream} is an abstraction of multiple <i>grouped</i> record streams of {@link KeyValue} pairs.
|
||||
* <p>
|
||||
* It is an intermediate representation after a grouping of {@link KStream}s, before the
|
||||
* aggregations are applied to the new partitions resulting in a {@link KTable}.
|
||||
* <p>
|
||||
* A {@code CogroupedKStream} must be obtained from a {@link KGroupedStream} via
|
||||
* {@link KGroupedStream#cogroup(Aggregator) cogroup(...)}.
|
||||
*
|
||||
* @param <K> Type of keys
|
||||
* @param <VOut> Type of values after agg
|
||||
*/
|
||||
public interface CogroupedKStream<K, VOut> {
|
||||
|
||||
/**
|
||||
* Add an already {@link KGroupedStream grouped KStream} to this {@code CogroupedKStream}.
|
||||
* <p>
|
||||
* The added {@link KGroupedStream grouped KStream} must have the same number of partitions as all existing
|
||||
* streams of this {@code CogroupedKStream}.
|
||||
* If this is not the case, you would need to call {@link KStream#through(String)} before
|
||||
* {@link KStream#groupByKey() grouping} the {@link KStream}, using a pre-created topic with the "correct" number of
|
||||
* partitions.
|
||||
* <p>
|
||||
* The specified {@link Aggregator} is applied in the actual {@link #aggregate(Initializer) aggregation} step for
|
||||
* each input record and computes a new aggregate using the current aggregate (or for the very first record per key
|
||||
* using the initial intermediate aggregation result provided via the {@link Initializer} that is passed into
|
||||
* {@link #aggregate(Initializer)}) and the record's value.
|
||||
*
|
||||
* @param groupedStream a group stream
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result
|
||||
* @param <VIn> Type of input values
|
||||
* @return a {@code CogroupedKStream}
|
||||
*/
|
||||
<VIn> CogroupedKStream<K, VOut> cogroup(final KGroupedStream<K, VIn> groupedStream,
|
||||
final Aggregator<? super K, ? super VIn, VOut> aggregator);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in these streams by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* To compute the aggregation the corresponding {@link Aggregator} as specified in
|
||||
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
|
||||
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
|
||||
* same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // some aggregation on value type double
|
||||
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
|
||||
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
|
||||
* the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is a generated value, and
|
||||
* "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
|
||||
* result. Cannot be {@code null}.
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
|
||||
* represent the latest (rolling) aggregate for each key
|
||||
*/
|
||||
KTable<K, VOut> aggregate(final Initializer<VOut> initializer);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in these streams by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* To compute the aggregation the corresponding {@link Aggregator} as specified in
|
||||
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
|
||||
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* The specified {@link Named} is applied once to the processor combining the grouped streams.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
|
||||
* same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // some aggregation on value type double
|
||||
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
|
||||
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
|
||||
* the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
|
||||
* result. Cannot be {@code null}.
|
||||
* @param named name the processor. Cannot be {@code null}.
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
|
||||
* represent the latest (rolling) aggregate for each key
|
||||
*/
|
||||
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
|
||||
final Named named);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in these streams by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* To compute the aggregation the corresponding {@link Aggregator} as specified in
|
||||
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
|
||||
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
|
||||
* same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>
|
||||
* KafkaStreams streams = ... // some aggregation on value type double
|
||||
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
|
||||
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
|
||||
* the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
|
||||
* result. Cannot be {@code null}.
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store.
|
||||
* Cannot be {@code null}.
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
|
||||
* represent the latest (rolling) aggregate for each key
|
||||
*/
|
||||
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
|
||||
final Materialized<K, VOut, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in these streams by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* To compute the aggregation the corresponding {@link Aggregator} as specified in
|
||||
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
|
||||
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* The specified {@link Named} is used to name the processor combining the grouped streams.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
|
||||
* same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
|
||||
* <pre>
|
||||
* KafkaStreams streams = ... // some aggregation on value type double
|
||||
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
|
||||
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
|
||||
* the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
|
||||
* result. Cannot be {@code null}.
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store.
|
||||
* Cannot be {@code null}.
|
||||
* @param named name the processors. Cannot be {@code null}.
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
|
||||
* represent the latest (rolling) aggregate for each key
|
||||
*/
|
||||
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
|
||||
final Named named,
|
||||
final Materialized<K, VOut, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Create a new {@link TimeWindowedCogroupedKStream} instance that can be used to perform windowed
|
||||
* aggregations.
|
||||
*
|
||||
* @param windows the specification of the aggregation {@link Windows}
|
||||
* @param <W> the window type
|
||||
* @return an instance of {@link TimeWindowedCogroupedKStream}
|
||||
*/
|
||||
<W extends Window> TimeWindowedCogroupedKStream<K, VOut> windowedBy(final Windows<W> windows);
|
||||
|
||||
/**
|
||||
* Create a new {@link SessionWindowedCogroupedKStream} instance that can be used to perform session
|
||||
* windowed aggregations.
|
||||
*
|
||||
* @param windows the specification of the aggregation {@link SessionWindows}
|
||||
* @return an instance of {@link SessionWindowedCogroupedKStream}
|
||||
*/
|
||||
SessionWindowedCogroupedKStream<K, VOut> windowedBy(final SessionWindows windows);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,230 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* The {@code Consumed} class is used to define the optional parameters when using {@link StreamsBuilder} to
|
||||
* build instances of {@link KStream}, {@link KTable}, and {@link GlobalKTable}.
|
||||
* <p>
|
||||
* For example, you can read a topic as {@link KStream} with a custom timestamp extractor and specify the corresponding
|
||||
* key and value serdes like:
|
||||
* <pre>{@code
|
||||
* StreamsBuilder builder = new StreamsBuilder();
|
||||
* KStream<String, Long> stream = builder.stream(
|
||||
* "topicName",
|
||||
* Consumed.with(Serdes.String(), Serdes.Long())
|
||||
* .withTimestampExtractor(new LogAndSkipOnInvalidTimestamp()));
|
||||
* }</pre>
|
||||
* Similarly, you can read a topic as {@link KTable} with a custom {@code auto.offset.reset} configuration and force a
|
||||
* state store {@link org.apache.kafka.streams.kstream.Materialized materialization} to access the content via
|
||||
* interactive queries:
|
||||
* <pre>{@code
|
||||
* StreamsBuilder builder = new StreamsBuilder();
|
||||
* KTable<Integer, Integer> table = builder.table(
|
||||
* "topicName",
|
||||
* Consumed.with(AutoOffsetReset.LATEST),
|
||||
* Materialized.as("queryable-store-name"));
|
||||
* }</pre>
|
||||
*
|
||||
* @param <K> type of record key
|
||||
* @param <V> type of record value
|
||||
*/
|
||||
public class Consumed<K, V> implements NamedOperation<Consumed<K, V>> {
|
||||
|
||||
protected Serde<K> keySerde;
|
||||
protected Serde<V> valueSerde;
|
||||
protected TimestampExtractor timestampExtractor;
|
||||
protected Topology.AutoOffsetReset resetPolicy;
|
||||
protected String processorName;
|
||||
|
||||
private Consumed(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde,
|
||||
final TimestampExtractor timestampExtractor,
|
||||
final Topology.AutoOffsetReset resetPolicy,
|
||||
final String processorName) {
|
||||
this.keySerde = keySerde;
|
||||
this.valueSerde = valueSerde;
|
||||
this.timestampExtractor = timestampExtractor;
|
||||
this.resetPolicy = resetPolicy;
|
||||
this.processorName = processorName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@link Consumed} from an existing instance.
|
||||
* @param consumed the instance of {@link Consumed} to copy
|
||||
*/
|
||||
protected Consumed(final Consumed<K, V> consumed) {
|
||||
this(consumed.keySerde,
|
||||
consumed.valueSerde,
|
||||
consumed.timestampExtractor,
|
||||
consumed.resetPolicy,
|
||||
consumed.processorName
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@link Consumed} with the supplied arguments. {@code null} values are acceptable.
|
||||
*
|
||||
* @param keySerde the key serde. If {@code null} the default key serde from config will be used
|
||||
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
|
||||
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
|
||||
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return a new instance of {@link Consumed}
|
||||
*/
|
||||
public static <K, V> Consumed<K, V> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde,
|
||||
final TimestampExtractor timestampExtractor,
|
||||
final Topology.AutoOffsetReset resetPolicy) {
|
||||
return new Consumed<>(keySerde, valueSerde, timestampExtractor, resetPolicy, null);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@link Consumed} with key and value {@link Serde}s.
|
||||
*
|
||||
* @param keySerde the key serde. If {@code null} the default key serde from config will be used
|
||||
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return a new instance of {@link Consumed}
|
||||
*/
|
||||
public static <K, V> Consumed<K, V> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde) {
|
||||
return new Consumed<>(keySerde, valueSerde, null, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@link Consumed} with a {@link TimestampExtractor}.
|
||||
*
|
||||
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return a new instance of {@link Consumed}
|
||||
*/
|
||||
public static <K, V> Consumed<K, V> with(final TimestampExtractor timestampExtractor) {
|
||||
return new Consumed<>(null, null, timestampExtractor, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@link Consumed} with a {@link org.apache.kafka.streams.Topology.AutoOffsetReset Topology.AutoOffsetReset}.
|
||||
*
|
||||
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return a new instance of {@link Consumed}
|
||||
*/
|
||||
public static <K, V> Consumed<K, V> with(final Topology.AutoOffsetReset resetPolicy) {
|
||||
return new Consumed<>(null, null, null, resetPolicy, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@link Consumed} with provided processor name.
|
||||
*
|
||||
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return a new instance of {@link Consumed}
|
||||
*/
|
||||
public static <K, V> Consumed<K, V> as(final String processorName) {
|
||||
return new Consumed<>(null, null, null, null, processorName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the instance of {@link Consumed} with a key {@link Serde}.
|
||||
*
|
||||
* @param keySerde the key serde. If {@code null}the default key serde from config will be used
|
||||
* @return this
|
||||
*/
|
||||
public Consumed<K, V> withKeySerde(final Serde<K> keySerde) {
|
||||
this.keySerde = keySerde;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the instance of {@link Consumed} with a value {@link Serde}.
|
||||
*
|
||||
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
|
||||
* @return this
|
||||
*/
|
||||
public Consumed<K, V> withValueSerde(final Serde<V> valueSerde) {
|
||||
this.valueSerde = valueSerde;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the instance of {@link Consumed} with a {@link TimestampExtractor}.
|
||||
*
|
||||
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
|
||||
* @return this
|
||||
*/
|
||||
public Consumed<K, V> withTimestampExtractor(final TimestampExtractor timestampExtractor) {
|
||||
this.timestampExtractor = timestampExtractor;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the instance of {@link Consumed} with a {@link org.apache.kafka.streams.Topology.AutoOffsetReset Topology.AutoOffsetReset}.
|
||||
*
|
||||
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
|
||||
* @return this
|
||||
*/
|
||||
public Consumed<K, V> withOffsetResetPolicy(final Topology.AutoOffsetReset resetPolicy) {
|
||||
this.resetPolicy = resetPolicy;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the instance of {@link Consumed} with a processor name.
|
||||
*
|
||||
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
|
||||
* @return this
|
||||
*/
|
||||
@Override
|
||||
public Consumed<K, V> withName(final String processorName) {
|
||||
this.processorName = processorName;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final Consumed<?, ?> consumed = (Consumed<?, ?>) o;
|
||||
return Objects.equals(keySerde, consumed.keySerde) &&
|
||||
Objects.equals(valueSerde, consumed.valueSerde) &&
|
||||
Objects.equals(timestampExtractor, consumed.timestampExtractor) &&
|
||||
resetPolicy == consumed.resetPolicy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(keySerde, valueSerde, timestampExtractor, resetPolicy);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* The {@code ForeachAction} interface for performing an action on a {@link org.apache.kafka.streams.KeyValue key-value
|
||||
* pair}.
|
||||
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
|
||||
* record of a stream.
|
||||
* If stateful processing is required, consider using
|
||||
* {@link KStream#process(org.apache.kafka.streams.processor.ProcessorSupplier, String...) KStream#process(...)}.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @see KStream#foreach(ForeachAction)
|
||||
*/
|
||||
public interface ForeachAction<K, V> {
|
||||
|
||||
/**
|
||||
* Perform an action for each record of a stream.
|
||||
*
|
||||
* @param key the key of the record
|
||||
* @param value the value of the record
|
||||
*/
|
||||
void apply(final K key, final V value);
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StreamsBuilder;
|
||||
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
|
||||
|
||||
/**
|
||||
* {@code GlobalKTable} is an abstraction of a <i>changelog stream</i> from a primary-keyed table.
|
||||
* Each record in this changelog stream is an update on the primary-keyed table with the record key as the primary key.
|
||||
* <p>
|
||||
* {@code GlobalKTable} can only be used as right-hand side input for {@link KStream stream}-table joins.
|
||||
* <p>
|
||||
* In contrast to a {@link KTable} that is partitioned over all {@link KafkaStreams} instances, a {@code GlobalKTable}
|
||||
* is fully replicated per {@link KafkaStreams} instance.
|
||||
* Every partition of the underlying topic is consumed by each {@code GlobalKTable}, such that the full set of data is
|
||||
* available in every {@link KafkaStreams} instance.
|
||||
* This provides the ability to perform joins with {@link KStream} without having to repartition the input stream.
|
||||
* All joins with the {@code GlobalKTable} require that a {@link KeyValueMapper} is provided that can map from the
|
||||
* {@link KeyValue} of the left hand side {@link KStream} to the key of the right hand side {@code GlobalKTable}.
|
||||
* <p>
|
||||
* A {@code GlobalKTable} is created via a {@link StreamsBuilder}. For example:
|
||||
* <pre>{@code
|
||||
* builder.globalTable("topic-name", "queryable-store-name");
|
||||
* }</pre>
|
||||
* all {@code GlobalKTable}s are backed by a {@link ReadOnlyKeyValueStore} and are therefore queryable via the
|
||||
* interactive queries API.
|
||||
* For example:
|
||||
* <pre>{@code
|
||||
* final GlobalKTable globalOne = builder.globalTable("g1", "g1-store");
|
||||
* final GlobalKTable globalTwo = builder.globalTable("g2", "g2-store");
|
||||
* ...
|
||||
* final KafkaStreams streams = ...;
|
||||
* streams.start()
|
||||
* ...
|
||||
* ReadOnlyKeyValueStore view = streams.store("g1-store", QueryableStoreTypes.keyValueStore());
|
||||
* view.get(key); // can be done on any key, as all keys are present
|
||||
*}</pre>
|
||||
* Note that in contrast to {@link KTable} a {@code GlobalKTable}'s state holds a full copy of the underlying topic,
|
||||
* thus all keys can be queried locally.
|
||||
* <p>
|
||||
* Records from the source topic that have null keys are dropped.
|
||||
*
|
||||
* @param <K> Type of primary keys
|
||||
* @param <V> Type of value changes
|
||||
* @see KTable
|
||||
* @see StreamsBuilder#globalTable(String)
|
||||
* @see KStream#join(GlobalKTable, KeyValueMapper, ValueJoiner)
|
||||
* @see KStream#leftJoin(GlobalKTable, KeyValueMapper, ValueJoiner)
|
||||
*/
|
||||
public interface GlobalKTable<K, V> {
|
||||
/**
|
||||
* Get the name of the local state store that can be used to query this {@code GlobalKTable}.
|
||||
*
|
||||
* @return the underlying state store name, or {@code null} if this {@code GlobalKTable} cannot be queried.
|
||||
*/
|
||||
String queryableStoreName();
|
||||
}
|
||||
@@ -0,0 +1,159 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
|
||||
/**
|
||||
* The class that is used to capture the key and value {@link Serde}s and set the part of name used for
|
||||
* repartition topics when performing {@link KStream#groupBy(KeyValueMapper, Grouped)}, {@link
|
||||
* KStream#groupByKey(Grouped)}, or {@link KTable#groupBy(KeyValueMapper, Grouped)} operations. Note
|
||||
* that Kafka Streams does not always create repartition topics for grouping operations.
|
||||
*
|
||||
* @param <K> the key type
|
||||
* @param <V> the value type
|
||||
*/
|
||||
public class Grouped<K, V> implements NamedOperation<Grouped<K, V>> {
|
||||
|
||||
protected final Serde<K> keySerde;
|
||||
protected final Serde<V> valueSerde;
|
||||
protected final String name;
|
||||
|
||||
|
||||
private Grouped(final String name,
|
||||
final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde) {
|
||||
this.name = name;
|
||||
this.keySerde = keySerde;
|
||||
this.valueSerde = valueSerde;
|
||||
}
|
||||
|
||||
protected Grouped(final Grouped<K, V> grouped) {
|
||||
this(grouped.name, grouped.keySerde, grouped.valueSerde);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link Grouped} instance with the provided name used as part of the repartition topic if required.
|
||||
*
|
||||
* @param name the name used for a repartition topic if required
|
||||
* @return a new {@link Grouped} configured with the name
|
||||
* @see KStream#groupByKey(Grouped)
|
||||
* @see KStream#groupBy(KeyValueMapper, Grouped)
|
||||
* @see KTable#groupBy(KeyValueMapper, Grouped)
|
||||
*/
|
||||
public static <K, V> Grouped<K, V> as(final String name) {
|
||||
return new Grouped<>(name, null, null);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a {@link Grouped} instance with the provided keySerde. If {@code null} the default key serde from config will be used.
|
||||
*
|
||||
* @param keySerde the Serde used for serializing the key. If {@code null} the default key serde from config will be used
|
||||
* @return a new {@link Grouped} configured with the keySerde
|
||||
* @see KStream#groupByKey(Grouped)
|
||||
* @see KStream#groupBy(KeyValueMapper, Grouped)
|
||||
* @see KTable#groupBy(KeyValueMapper, Grouped)
|
||||
*/
|
||||
public static <K> Grouped keySerde(final Serde<K> keySerde) {
|
||||
return new Grouped<>(null, keySerde, null);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a {@link Grouped} instance with the provided valueSerde. If {@code null} the default value serde from config will be used.
|
||||
*
|
||||
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
|
||||
* @return a new {@link Grouped} configured with the valueSerde
|
||||
* @see KStream#groupByKey(Grouped)
|
||||
* @see KStream#groupBy(KeyValueMapper, Grouped)
|
||||
* @see KTable#groupBy(KeyValueMapper, Grouped)
|
||||
*/
|
||||
public static <V> Grouped valueSerde(final Serde<V> valueSerde) {
|
||||
return new Grouped<>(null, null, valueSerde);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a {@link Grouped} instance with the provided name, keySerde, and valueSerde. If the keySerde and/or the valueSerde is
|
||||
* {@code null} the default value for the respective serde from config will be used.
|
||||
*
|
||||
* @param name the name used as part of the repartition topic name if required
|
||||
* @param keySerde the {@link Serde} used for serializing the key. If {@code null} the default key serde from config will be used
|
||||
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
|
||||
* @return a new {@link Grouped} configured with the name, keySerde, and valueSerde
|
||||
* @see KStream#groupByKey(Grouped)
|
||||
* @see KStream#groupBy(KeyValueMapper, Grouped)
|
||||
* @see KTable#groupBy(KeyValueMapper, Grouped)
|
||||
*/
|
||||
public static <K, V> Grouped<K, V> with(final String name,
|
||||
final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde) {
|
||||
return new Grouped<>(name, keySerde, valueSerde);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a {@link Grouped} instance with the provided keySerde and valueSerde. If the keySerde and/or the valueSerde is
|
||||
* {@code null} the default value for the respective serde from config will be used.
|
||||
*
|
||||
* @param keySerde the {@link Serde} used for serializing the key. If {@code null} the default key serde from config will be used
|
||||
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
|
||||
* @return a new {@link Grouped} configured with the keySerde, and valueSerde
|
||||
* @see KStream#groupByKey(Grouped)
|
||||
* @see KStream#groupBy(KeyValueMapper, Grouped)
|
||||
* @see KTable#groupBy(KeyValueMapper, Grouped)
|
||||
*/
|
||||
public static <K, V> Grouped<K, V> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde) {
|
||||
return new Grouped<>(null, keySerde, valueSerde);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the grouping operation with the name for a repartition topic if required. Note
|
||||
* that Kafka Streams does not always create repartition topics for grouping operations.
|
||||
*
|
||||
* @param name the name used for the processor name and as part of the repartition topic name if required
|
||||
* @return a new {@link Grouped} instance configured with the name
|
||||
* */
|
||||
@Override
|
||||
public Grouped<K, V> withName(final String name) {
|
||||
return new Grouped<>(name, keySerde, valueSerde);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the grouping operation using the provided keySerde for serializing the key.
|
||||
*
|
||||
* @param keySerde {@link Serde} to use for serializing the key. If {@code null} the default key serde from config will be used
|
||||
* @return a new {@link Grouped} instance configured with the keySerde
|
||||
*/
|
||||
public Grouped<K, V> withKeySerde(final Serde<K> keySerde) {
|
||||
return new Grouped<>(name, keySerde, valueSerde);
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform the grouping operation using the provided valueSerde for serializing the value.
|
||||
*
|
||||
* @param valueSerde {@link Serde} to use for serializing the value. If {@code null} the default value serde from config will be used
|
||||
* @return a new {@link Grouped} instance configured with the valueSerde
|
||||
*/
|
||||
public Grouped<K, V> withValueSerde(final Serde<V> valueSerde) {
|
||||
return new Grouped<>(name, keySerde, valueSerde);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* The {@code Initializer} interface for creating an initial value in aggregations.
|
||||
* {@code Initializer} is used in combination with {@link Aggregator}.
|
||||
*
|
||||
* @param <VA> aggregate value type
|
||||
* @see Aggregator
|
||||
* @see KGroupedStream#aggregate(Initializer, Aggregator)
|
||||
* @see KGroupedStream#aggregate(Initializer, Aggregator, Materialized)
|
||||
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator)
|
||||
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator, Materialized)
|
||||
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger)
|
||||
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger, Materialized)
|
||||
*/
|
||||
public interface Initializer<VA> {
|
||||
|
||||
/**
|
||||
* Return the initial value for an aggregation.
|
||||
*
|
||||
* @return the initial value for an aggregation
|
||||
*/
|
||||
VA apply();
|
||||
}
|
||||
@@ -0,0 +1,309 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.internals.ApiUtils;
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
|
||||
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
|
||||
|
||||
/**
|
||||
* The window specifications used for joins.
|
||||
* <p>
|
||||
* A {@code JoinWindows} instance defines a maximum time difference for a {@link KStream#join(KStream, ValueJoiner,
|
||||
* JoinWindows) join over two streams} on the same key.
|
||||
* In SQL-style you would express this join as
|
||||
* <pre>{@code
|
||||
* SELECT * FROM stream1, stream2
|
||||
* WHERE
|
||||
* stream1.key = stream2.key
|
||||
* AND
|
||||
* stream1.ts - before <= stream2.ts AND stream2.ts <= stream1.ts + after
|
||||
* }</pre>
|
||||
* There are three different window configuration supported:
|
||||
* <ul>
|
||||
* <li>before = after = time-difference</li>
|
||||
* <li>before = 0 and after = time-difference</li>
|
||||
* <li>before = time-difference and after = 0</li>
|
||||
* </ul>
|
||||
* A join is symmetric in the sense, that a join specification on the first stream returns the same result record as
|
||||
* a join specification on the second stream with flipped before and after values.
|
||||
* <p>
|
||||
* Both values (before and after) must not result in an "inverse" window, i.e., upper-interval bound cannot be smaller
|
||||
* than lower-interval bound.
|
||||
* <p>
|
||||
* {@code JoinWindows} are sliding windows, thus, they are aligned to the actual record timestamps.
|
||||
* This implies, that each input record defines its own window with start and end time being relative to the record's
|
||||
* timestamp.
|
||||
* <p>
|
||||
* For time semantics, see {@link TimestampExtractor}.
|
||||
*
|
||||
* @see TimeWindows
|
||||
* @see UnlimitedWindows
|
||||
* @see SessionWindows
|
||||
* @see KStream#join(KStream, ValueJoiner, JoinWindows)
|
||||
* @see KStream#join(KStream, ValueJoiner, JoinWindows, StreamJoined)
|
||||
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows)
|
||||
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
|
||||
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows)
|
||||
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
|
||||
* @see TimestampExtractor
|
||||
*/
|
||||
public final class JoinWindows extends Windows<Window> {
|
||||
|
||||
private final long maintainDurationMs;
|
||||
|
||||
/** Maximum time difference for tuples that are before the join tuple. */
|
||||
public final long beforeMs;
|
||||
/** Maximum time difference for tuples that are after the join tuple. */
|
||||
public final long afterMs;
|
||||
|
||||
private final long graceMs;
|
||||
|
||||
private JoinWindows(final long beforeMs,
|
||||
final long afterMs,
|
||||
final long graceMs,
|
||||
final long maintainDurationMs) {
|
||||
if (beforeMs + afterMs < 0) {
|
||||
throw new IllegalArgumentException("Window interval (ie, beforeMs+afterMs) must not be negative.");
|
||||
}
|
||||
this.afterMs = afterMs;
|
||||
this.beforeMs = beforeMs;
|
||||
this.graceMs = graceMs;
|
||||
this.maintainDurationMs = maintainDurationMs;
|
||||
}
|
||||
|
||||
@Deprecated // removing segments from Windows will fix this
|
||||
private JoinWindows(final long beforeMs,
|
||||
final long afterMs,
|
||||
final long graceMs,
|
||||
final long maintainDurationMs,
|
||||
final int segments) {
|
||||
super(segments);
|
||||
if (beforeMs + afterMs < 0) {
|
||||
throw new IllegalArgumentException("Window interval (ie, beforeMs+afterMs) must not be negative.");
|
||||
}
|
||||
this.afterMs = afterMs;
|
||||
this.beforeMs = beforeMs;
|
||||
this.graceMs = graceMs;
|
||||
this.maintainDurationMs = maintainDurationMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies that records of the same key are joinable if their timestamps are within {@code timeDifferenceMs},
|
||||
* i.e., the timestamp of a record from the secondary stream is max {@code timeDifferenceMs} earlier or later than
|
||||
* the timestamp of the record from the primary stream.
|
||||
*
|
||||
* @param timeDifferenceMs join window interval in milliseconds
|
||||
* @throws IllegalArgumentException if {@code timeDifferenceMs} is negative
|
||||
* @deprecated Use {@link #of(Duration)} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static JoinWindows of(final long timeDifferenceMs) throws IllegalArgumentException {
|
||||
// This is a static factory method, so we initialize grace and retention to the defaults.
|
||||
return new JoinWindows(timeDifferenceMs, timeDifferenceMs, -1L, DEFAULT_RETENTION_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Specifies that records of the same key are joinable if their timestamps are within {@code timeDifference},
|
||||
* i.e., the timestamp of a record from the secondary stream is max {@code timeDifference} earlier or later than
|
||||
* the timestamp of the record from the primary stream.
|
||||
*
|
||||
* @param timeDifference join window interval
|
||||
* @throws IllegalArgumentException if {@code timeDifference} is negative or can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
public static JoinWindows of(final Duration timeDifference) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
|
||||
return of(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the start window boundary to {@code timeDifferenceMs} but keep the end window boundary as is.
|
||||
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
|
||||
* {@code timeDifferenceMs} earlier than the timestamp of the record from the primary stream.
|
||||
* {@code timeDifferenceMs} can be negative but its absolute value must not be larger than current window "after"
|
||||
* value (which would result in a negative window size).
|
||||
*
|
||||
* @param timeDifferenceMs relative window start time in milliseconds
|
||||
* @throws IllegalArgumentException if the resulting window size is negative
|
||||
* @deprecated Use {@link #before(Duration)} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public JoinWindows before(final long timeDifferenceMs) throws IllegalArgumentException {
|
||||
return new JoinWindows(timeDifferenceMs, afterMs, graceMs, maintainDurationMs, segments);
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the start window boundary to {@code timeDifference} but keep the end window boundary as is.
|
||||
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
|
||||
* {@code timeDifference} earlier than the timestamp of the record from the primary stream.
|
||||
* {@code timeDifference} can be negative but its absolute value must not be larger than current window "after"
|
||||
* value (which would result in a negative window size).
|
||||
*
|
||||
* @param timeDifference relative window start time
|
||||
* @throws IllegalArgumentException if the resulting window size is negative or {@code timeDifference} can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
public JoinWindows before(final Duration timeDifference) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
|
||||
return before(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the end window boundary to {@code timeDifferenceMs} but keep the start window boundary as is.
|
||||
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
|
||||
* {@code timeDifferenceMs} later than the timestamp of the record from the primary stream.
|
||||
* {@code timeDifferenceMs} can be negative but its absolute value must not be larger than current window "before"
|
||||
* value (which would result in a negative window size).
|
||||
*
|
||||
* @param timeDifferenceMs relative window end time in milliseconds
|
||||
* @throws IllegalArgumentException if the resulting window size is negative
|
||||
* @deprecated Use {@link #after(Duration)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public JoinWindows after(final long timeDifferenceMs) throws IllegalArgumentException {
|
||||
return new JoinWindows(beforeMs, timeDifferenceMs, graceMs, maintainDurationMs, segments);
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes the end window boundary to {@code timeDifference} but keep the start window boundary as is.
|
||||
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
|
||||
* {@code timeDifference} later than the timestamp of the record from the primary stream.
|
||||
* {@code timeDifference} can be negative but its absolute value must not be larger than current window "before"
|
||||
* value (which would result in a negative window size).
|
||||
*
|
||||
* @param timeDifference relative window end time
|
||||
* @throws IllegalArgumentException if the resulting window size is negative or {@code timeDifference} can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
public JoinWindows after(final Duration timeDifference) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
|
||||
return after(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
|
||||
}
|
||||
|
||||
/**
|
||||
* Not supported by {@code JoinWindows}.
|
||||
* Throws {@link UnsupportedOperationException}.
|
||||
*
|
||||
* @throws UnsupportedOperationException at every invocation
|
||||
*/
|
||||
@Override
|
||||
public Map<Long, Window> windowsFor(final long timestamp) {
|
||||
throw new UnsupportedOperationException("windowsFor() is not supported by JoinWindows.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() {
|
||||
return beforeMs + afterMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reject out-of-order events that are delayed more than {@code afterWindowEnd}
|
||||
* after the end of its window.
|
||||
* <p>
|
||||
* Delay is defined as (stream_time - record_timestamp).
|
||||
*
|
||||
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
|
||||
* @return this updated builder
|
||||
* @throws IllegalArgumentException if the {@code afterWindowEnd} is negative of can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
public JoinWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
|
||||
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
|
||||
if (afterWindowEndMs < 0) {
|
||||
throw new IllegalArgumentException("Grace period must not be negative.");
|
||||
}
|
||||
return new JoinWindows(beforeMs, afterMs, afterWindowEndMs, maintainDurationMs, segments);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long gracePeriodMs() {
|
||||
// NOTE: in the future, when we remove maintainMs,
|
||||
// we should default the grace period to 24h to maintain the default behavior,
|
||||
// or we can default to (24h - size) if you want to be super accurate.
|
||||
return graceMs != -1 ? graceMs : maintainMs() - size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param durationMs the window retention time in milliseconds
|
||||
* @return itself
|
||||
* @throws IllegalArgumentException if {@code durationMs} is smaller than the window size
|
||||
* @deprecated since 2.1. Use {@link JoinWindows#grace(Duration)} instead.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public JoinWindows until(final long durationMs) throws IllegalArgumentException {
|
||||
if (durationMs < size()) {
|
||||
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than the window size.");
|
||||
}
|
||||
return new JoinWindows(beforeMs, afterMs, graceMs, durationMs, segments);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* <p>
|
||||
* For {@link TimeWindows} the maintain duration is at least as small as the window size.
|
||||
*
|
||||
* @return the window maintain duration
|
||||
* @deprecated since 2.1. This function should not be used anymore, since {@link JoinWindows#until(long)}
|
||||
* is deprecated in favor of {@link JoinWindows#grace(Duration)}.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public long maintainMs() {
|
||||
return Math.max(maintainDurationMs, size());
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final JoinWindows that = (JoinWindows) o;
|
||||
return beforeMs == that.beforeMs &&
|
||||
afterMs == that.afterMs &&
|
||||
maintainDurationMs == that.maintainDurationMs &&
|
||||
segments == that.segments &&
|
||||
graceMs == that.graceMs;
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(beforeMs, afterMs, graceMs, maintainDurationMs, segments);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public String toString() {
|
||||
return "JoinWindows{" +
|
||||
"beforeMs=" + beforeMs +
|
||||
", afterMs=" + afterMs +
|
||||
", graceMs=" + graceMs +
|
||||
", maintainDurationMs=" + maintainDurationMs +
|
||||
", segments=" + segments +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,233 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
|
||||
/**
|
||||
* The {@code Joined} class represents optional params that can be passed to
|
||||
* {@link KStream#join(KTable, ValueJoiner, Joined) KStream#join(KTable,...)} and
|
||||
* {@link KStream#leftJoin(KTable, ValueJoiner) KStream#leftJoin(KTable,...)} operations.
|
||||
*/
|
||||
public class Joined<K, V, VO> implements NamedOperation<Joined<K, V, VO>> {
|
||||
|
||||
protected final Serde<K> keySerde;
|
||||
protected final Serde<V> valueSerde;
|
||||
protected final Serde<VO> otherValueSerde;
|
||||
protected final String name;
|
||||
|
||||
private Joined(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde,
|
||||
final Serde<VO> otherValueSerde,
|
||||
final String name) {
|
||||
this.keySerde = keySerde;
|
||||
this.valueSerde = valueSerde;
|
||||
this.otherValueSerde = otherValueSerde;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
protected Joined(final Joined<K, V, VO> joined) {
|
||||
this(joined.keySerde, joined.valueSerde, joined.otherValueSerde, joined.name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@code Joined} with key, value, and otherValue {@link Serde} instances.
|
||||
* {@code null} values are accepted and will be replaced by the default serdes as defined in config.
|
||||
*
|
||||
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be used
|
||||
* @param valueSerde the value serde to use. If {@code null} the default value serde from config will be used
|
||||
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde from config will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VO> other value type
|
||||
* @return new {@code Joined} instance with the provided serdes
|
||||
*/
|
||||
public static <K, V, VO> Joined<K, V, VO> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde,
|
||||
final Serde<VO> otherValueSerde) {
|
||||
return new Joined<>(keySerde, valueSerde, otherValueSerde, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@code Joined} with key, value, and otherValue {@link Serde} instances.
|
||||
* {@code null} values are accepted and will be replaced by the default serdes as defined in
|
||||
* config.
|
||||
*
|
||||
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be
|
||||
* used
|
||||
* @param valueSerde the value serde to use. If {@code null} the default value serde from config
|
||||
* will be used
|
||||
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde
|
||||
* from config will be used
|
||||
* @param name the name used as the base for naming components of the join including any
|
||||
* repartition topics
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VO> other value type
|
||||
* @return new {@code Joined} instance with the provided serdes
|
||||
*/
|
||||
public static <K, V, VO> Joined<K, V, VO> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde,
|
||||
final Serde<VO> otherValueSerde,
|
||||
final String name) {
|
||||
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@code Joined} with a key {@link Serde}.
|
||||
* {@code null} values are accepted and will be replaced by the default key serde as defined in config.
|
||||
*
|
||||
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VO> other value type
|
||||
* @return new {@code Joined} instance configured with the keySerde
|
||||
*/
|
||||
public static <K, V, VO> Joined<K, V, VO> keySerde(final Serde<K> keySerde) {
|
||||
return new Joined<>(keySerde, null, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@code Joined} with a value {@link Serde}.
|
||||
* {@code null} values are accepted and will be replaced by the default value serde as defined in config.
|
||||
*
|
||||
* @param valueSerde the value serde to use. If {@code null} the default value serde from config will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VO> other value type
|
||||
* @return new {@code Joined} instance configured with the valueSerde
|
||||
*/
|
||||
public static <K, V, VO> Joined<K, V, VO> valueSerde(final Serde<V> valueSerde) {
|
||||
return new Joined<>(null, valueSerde, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@code Joined} with an other value {@link Serde}.
|
||||
* {@code null} values are accepted and will be replaced by the default value serde as defined in config.
|
||||
*
|
||||
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde from config will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VO> other value type
|
||||
* @return new {@code Joined} instance configured with the otherValueSerde
|
||||
*/
|
||||
public static <K, V, VO> Joined<K, V, VO> otherValueSerde(final Serde<VO> otherValueSerde) {
|
||||
return new Joined<>(null, null, otherValueSerde, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@code Joined} with base name for all components of the join, this may
|
||||
* include any repartition topics created to complete the join.
|
||||
*
|
||||
* @param name the name used as the base for naming components of the join including any
|
||||
* repartition topics
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VO> other value type
|
||||
* @return new {@code Joined} instance configured with the name
|
||||
*
|
||||
* @deprecated use {@link #as(String)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static <K, V, VO> Joined<K, V, VO> named(final String name) {
|
||||
return new Joined<>(null, null, null, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@code Joined} with base name for all components of the join, this may
|
||||
* include any repartition topics created to complete the join.
|
||||
*
|
||||
* @param name the name used as the base for naming components of the join including any
|
||||
* repartition topics
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VO> other value type
|
||||
* @return new {@code Joined} instance configured with the name
|
||||
*
|
||||
*/
|
||||
public static <K, V, VO> Joined<K, V, VO> as(final String name) {
|
||||
return new Joined<>(null, null, null, name);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set the key {@link Serde} to be used. Null values are accepted and will be replaced by the default
|
||||
* key serde as defined in config
|
||||
*
|
||||
* @param keySerde the key serde to use. If null the default key serde from config will be used
|
||||
* @return new {@code Joined} instance configured with the {@code name}
|
||||
*/
|
||||
public Joined<K, V, VO> withKeySerde(final Serde<K> keySerde) {
|
||||
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value {@link Serde} to be used. Null values are accepted and will be replaced by the default
|
||||
* value serde as defined in config
|
||||
*
|
||||
* @param valueSerde the value serde to use. If null the default value serde from config will be used
|
||||
* @return new {@code Joined} instance configured with the {@code valueSerde}
|
||||
*/
|
||||
public Joined<K, V, VO> withValueSerde(final Serde<V> valueSerde) {
|
||||
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the otherValue {@link Serde} to be used. Null values are accepted and will be replaced by the default
|
||||
* value serde as defined in config
|
||||
*
|
||||
* @param otherValueSerde the otherValue serde to use. If null the default value serde from config will be used
|
||||
* @return new {@code Joined} instance configured with the {@code valueSerde}
|
||||
*/
|
||||
public Joined<K, V, VO> withOtherValueSerde(final Serde<VO> otherValueSerde) {
|
||||
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the base name used for all components of the join, this may include any repartition topics
|
||||
* created to complete the join.
|
||||
*
|
||||
* @param name the name used as the base for naming components of the join including any
|
||||
* repartition topics
|
||||
* @return new {@code Joined} instance configured with the {@code name}
|
||||
*/
|
||||
@Override
|
||||
public Joined<K, V, VO> withName(final String name) {
|
||||
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
|
||||
}
|
||||
|
||||
public Serde<K> keySerde() {
|
||||
return keySerde;
|
||||
}
|
||||
|
||||
public Serde<V> valueSerde() {
|
||||
return valueSerde;
|
||||
}
|
||||
|
||||
public Serde<VO> otherValueSerde() {
|
||||
return otherValueSerde;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated this method will be removed in a in a future release
|
||||
*/
|
||||
@Deprecated
|
||||
public String name() {
|
||||
return name;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,556 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StoreQueryParameters;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
|
||||
/**
|
||||
* {@code KGroupedStream} is an abstraction of a <i>grouped</i> record stream of {@link KeyValue} pairs.
|
||||
* It is an intermediate representation of a {@link KStream} in order to apply an aggregation operation on the original
|
||||
* {@link KStream} records.
|
||||
* <p>
|
||||
* It is an intermediate representation after a grouping of a {@link KStream} before an aggregation is applied to the
|
||||
* new partitions resulting in a {@link KTable}.
|
||||
* <p>
|
||||
* A {@code KGroupedStream} must be obtained from a {@link KStream} via {@link KStream#groupByKey() groupByKey()} or
|
||||
* {@link KStream#groupBy(KeyValueMapper) groupBy(...)}.
|
||||
*
|
||||
* @param <K> Type of keys
|
||||
* @param <V> Type of values
|
||||
* @see KStream
|
||||
*/
|
||||
public interface KGroupedStream<K, V> {
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
|
||||
* represent the latest (rolling) count (i.e., number of records) for each key
|
||||
*/
|
||||
KTable<K, Long> count();
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param named a {@link Named} config used to name the processor in the topology
|
||||
*
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
|
||||
* represent the latest (rolling) count (i.e., number of records) for each key
|
||||
*/
|
||||
KTable<K, Long> count(final Named named);
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* provided by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* String queryableStoreName = "storeName"; // the store name should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyKeyValueStore<String,Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-word";
|
||||
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
|
||||
* if there is no valueSerde provided
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
|
||||
* represent the latest (rolling) count (i.e., number of records) for each key
|
||||
*/
|
||||
KTable<K, Long> count(final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* provided by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* String queryableStoreName = "storeName"; // the store name should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyKeyValueStore<String,Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-word";
|
||||
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param named a {@link Named} config used to name the processor in the topology
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
|
||||
* if there is no valueSerde provided
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
|
||||
* represent the latest (rolling) count (i.e., number of records) for each key
|
||||
*/
|
||||
KTable<K, Long> count(final Named named,
|
||||
final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator)}).
|
||||
* <p>
|
||||
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate and the record's value.
|
||||
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
|
||||
* value as-is.
|
||||
* Thus, {@code reduce(Reducer)} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key. If the reduce function returns {@code null}, it is then interpreted as
|
||||
* deletion for the key, and future messages of the same key coming from upstream operators
|
||||
* will be handled as newly initialized value.
|
||||
*/
|
||||
KTable<K, V> reduce(final Reducer<V> reducer);
|
||||
|
||||
/**
|
||||
* Combine the value of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Materialized)}).
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* provided by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* <p>
|
||||
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
|
||||
* value as-is.
|
||||
* Thus, {@code reduce(Reducer, Materialized)} can be used to compute aggregate functions like sum, min, or
|
||||
* max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // compute sum
|
||||
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long sumForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
KTable<K, V> reduce(final Reducer<V> reducer,
|
||||
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
|
||||
/**
|
||||
* Combine the value of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Materialized)}).
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* provided by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* <p>
|
||||
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
|
||||
* value as-is.
|
||||
* Thus, {@code reduce(Reducer, Materialized)} can be used to compute aggregate functions like sum, min, or
|
||||
* max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // compute sum
|
||||
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long sumForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology.
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key. If the reduce function returns {@code null}, it is then interpreted as
|
||||
* deletion for the key, and future messages of the same key coming from upstream operators
|
||||
* will be handled as newly initialized value.
|
||||
*/
|
||||
KTable<K, V> reduce(final Reducer<V> reducer,
|
||||
final Named named,
|
||||
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied once directly before the first input record is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate(Initializer, Aggregator)} can be used to compute aggregate functions like
|
||||
* count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* The default value serde from config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #aggregate(Initializer, Aggregator, Materialized)}.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key. If the aggregate function returns {@code null}, it is then interpreted as
|
||||
* deletion for the key, and future messages of the same key coming from upstream operators
|
||||
* will be handled as newly initialized value.
|
||||
*/
|
||||
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied once directly before the first input record is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate(Initializer, Aggregator, Materialized)} can be used to compute aggregate functions like
|
||||
* count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // some aggregation on value type double
|
||||
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried by the given store name in {@code materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied once directly before the first input record is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate(Initializer, Aggregator, Materialized)} can be used to compute aggregate functions like
|
||||
* count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // some aggregation on value type double
|
||||
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-key";
|
||||
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result
|
||||
* @param named a {@link Named} config used to name the processor in the topology
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key. If the aggregate function returns {@code null}, it is then interpreted as
|
||||
* deletion for the key, and future messages of the same key coming from upstream operators
|
||||
* will be handled as newly initialized value.
|
||||
*/
|
||||
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Named named,
|
||||
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Create a new {@link TimeWindowedKStream} instance that can be used to perform windowed aggregations.
|
||||
* @param windows the specification of the aggregation {@link Windows}
|
||||
* @param <W> the window type
|
||||
* @return an instance of {@link TimeWindowedKStream}
|
||||
*/
|
||||
<W extends Window> TimeWindowedKStream<K, V> windowedBy(final Windows<W> windows);
|
||||
|
||||
/**
|
||||
* Create a new {@link SessionWindowedKStream} instance that can be used to perform session windowed aggregations.
|
||||
* @param windows the specification of the aggregation {@link SessionWindows}
|
||||
* @return an instance of {@link TimeWindowedKStream}
|
||||
*/
|
||||
SessionWindowedKStream<K, V> windowedBy(final SessionWindows windows);
|
||||
|
||||
/**
|
||||
* Create a new {@link CogroupedKStream} from the this grouped KStream to allow cogrouping other
|
||||
* {@code KGroupedStream} to it.
|
||||
* {@link CogroupedKStream} is an abstraction of multiple <i>grouped</i> record streams of {@link KeyValue} pairs.
|
||||
* It is an intermediate representation after a grouping of {@link KStream}s, before the
|
||||
* aggregations are applied to the new partitions resulting in a {@link KTable}.
|
||||
* <p>
|
||||
* The specified {@link Aggregator} is applied in the actual {@link CogroupedKStream#aggregate(Initializer)
|
||||
* aggregation} step for each input record and computes a new aggregate using the current aggregate (or for the very
|
||||
* first record per key using the initial intermediate aggregation result provided via the {@link Initializer} that
|
||||
* is passed into {@link CogroupedKStream#aggregate(Initializer)}) and the record's value.
|
||||
*
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result
|
||||
* @param <Vout> the type of the output values
|
||||
* @return a {@link CogroupedKStream}
|
||||
*/
|
||||
<Vout> CogroupedKStream<K, Vout> cogroup(final Aggregator<? super K, ? super V, Vout> aggregator);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,699 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.StoreQueryParameters;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
|
||||
/**
|
||||
* {@code KGroupedTable} is an abstraction of a <i>re-grouped changelog stream</i> from a primary-keyed table,
|
||||
* usually on a different grouping key than the original primary key.
|
||||
* <p>
|
||||
* It is an intermediate representation after a re-grouping of a {@link KTable} before an aggregation is applied to the
|
||||
* new partitions resulting in a new {@link KTable}.
|
||||
* <p>
|
||||
* A {@code KGroupedTable} must be obtained from a {@link KTable} via {@link KTable#groupBy(KeyValueMapper)
|
||||
* groupBy(...)}.
|
||||
*
|
||||
* @param <K> Type of keys
|
||||
* @param <V> Type of values
|
||||
* @see KTable
|
||||
*/
|
||||
public interface KGroupedTable<K, V> {
|
||||
|
||||
/**
|
||||
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
|
||||
* the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the provided {@code queryableStoreName}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-word";
|
||||
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
|
||||
* represent the latest (rolling) count (i.e., number of records) for each key
|
||||
*/
|
||||
KTable<K, Long> count(final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
|
||||
* the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the provided {@code queryableStoreName}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-word";
|
||||
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
*
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param named the {@link Named} config used to name the processor in the topology
|
||||
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
|
||||
* represent the latest (rolling) count (i.e., number of records) for each key
|
||||
*/
|
||||
KTable<K, Long> count(final Named named, final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
|
||||
* the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
|
||||
* represent the latest (rolling) count (i.e., number of records) for each key
|
||||
*/
|
||||
KTable<K, Long> count();
|
||||
|
||||
|
||||
/**
|
||||
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
|
||||
* the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param named the {@link Named} config used to name the processor in the topology
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
|
||||
* represent the latest (rolling) count (i.e., number of records) for each key
|
||||
*/
|
||||
KTable<K, Long> count(final Named named);
|
||||
|
||||
/**
|
||||
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
|
||||
* mapped} to the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}).
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the provided {@code queryableStoreName}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
|
||||
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
|
||||
* current aggregate (first argument) and the record's value (second argument) by adding the new record to the
|
||||
* aggregate.
|
||||
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
|
||||
* and computes a new aggregate using the current aggregate (first argument) and the record's value (second
|
||||
* argument) by "removing" the "replaced" record from the aggregate.
|
||||
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
|
||||
* value as-is.
|
||||
* Thus, {@code reduce(Reducer, Reducer, String)} can be used to compute aggregate functions like sum.
|
||||
* For sum, the adder and subtractor would work as follows:
|
||||
* <pre>{@code
|
||||
* public class SumAdder implements Reducer<Integer> {
|
||||
* public Integer apply(Integer currentAgg, Integer newValue) {
|
||||
* return currentAgg + newValue;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumSubtractor implements Reducer<Integer> {
|
||||
* public Integer apply(Integer currentAgg, Integer oldValue) {
|
||||
* return currentAgg - oldValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-word";
|
||||
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param adder a {@link Reducer} that adds a new value to the aggregate result
|
||||
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
|
||||
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
KTable<K, V> reduce(final Reducer<V> adder,
|
||||
final Reducer<V> subtractor,
|
||||
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
|
||||
/**
|
||||
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
|
||||
* mapped} to the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}).
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the provided {@code queryableStoreName}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
|
||||
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
|
||||
* current aggregate (first argument) and the record's value (second argument) by adding the new record to the
|
||||
* aggregate.
|
||||
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
|
||||
* and computes a new aggregate using the current aggregate (first argument) and the record's value (second
|
||||
* argument) by "removing" the "replaced" record from the aggregate.
|
||||
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
|
||||
* value as-is.
|
||||
* Thus, {@code reduce(Reducer, Reducer, String)} can be used to compute aggregate functions like sum.
|
||||
* For sum, the adder and subtractor would work as follows:
|
||||
* <pre>{@code
|
||||
* public class SumAdder implements Reducer<Integer> {
|
||||
* public Integer apply(Integer currentAgg, Integer newValue) {
|
||||
* return currentAgg + newValue;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumSubtractor implements Reducer<Integer> {
|
||||
* public Integer apply(Integer currentAgg, Integer oldValue) {
|
||||
* return currentAgg - oldValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-word";
|
||||
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param adder a {@link Reducer} that adds a new value to the aggregate result
|
||||
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
|
||||
* @param named a {@link Named} config used to name the processor in the topology
|
||||
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
KTable<K, V> reduce(final Reducer<V> adder,
|
||||
final Reducer<V> subtractor,
|
||||
final Named named,
|
||||
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
|
||||
* mapped} to the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator)}).
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
|
||||
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
|
||||
* current aggregate and the record's value by adding the new record to the aggregate.
|
||||
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
|
||||
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
|
||||
* record from the aggregate.
|
||||
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
|
||||
* value as-is.
|
||||
* Thus, {@code reduce(Reducer, Reducer)} can be used to compute aggregate functions like sum.
|
||||
* For sum, the adder and subtractor would work as follows:
|
||||
* <pre>{@code
|
||||
* public class SumAdder implements Reducer<Integer> {
|
||||
* public Integer apply(Integer currentAgg, Integer newValue) {
|
||||
* return currentAgg + newValue;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumSubtractor implements Reducer<Integer> {
|
||||
* public Integer apply(Integer currentAgg, Integer oldValue) {
|
||||
* return currentAgg - oldValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param adder a {@link Reducer} that adds a new value to the aggregate result
|
||||
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
KTable<K, V> reduce(final Reducer<V> adder,
|
||||
final Reducer<V> subtractor);
|
||||
|
||||
/**
|
||||
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
|
||||
* mapped} to the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer, Materialized) combining via reduce(...)} as it,
|
||||
* for example, allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the provided {@code queryableStoreName}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied once directly before the first input record is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
|
||||
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
|
||||
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
|
||||
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
|
||||
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
|
||||
* record from the aggregate.
|
||||
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, Materialized)} can be used to compute aggregate functions
|
||||
* like sum.
|
||||
* For sum, the initializer, adder, and subtractor would work as follows:
|
||||
* <pre>{@code
|
||||
* // in this example, LongSerde.class must be set as value serde in Materialized#withValueSerde
|
||||
* public class SumInitializer implements Initializer<Long> {
|
||||
* public Long apply() {
|
||||
* return 0L;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumAdder implements Aggregator<String, Integer, Long> {
|
||||
* public Long apply(String key, Integer newValue, Long aggregate) {
|
||||
* return aggregate + newValue;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
|
||||
* public Long apply(String key, Integer oldValue, Long aggregate) {
|
||||
* return aggregate - oldValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-word";
|
||||
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that provides an initial aggregate result value
|
||||
* @param adder an {@link Aggregator} that adds a new record to the aggregate result
|
||||
* @param subtractor an {@link Aggregator} that removed an old record from the aggregate result
|
||||
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
|
||||
* @param <VR> the value type of the aggregated {@link KTable}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> adder,
|
||||
final Aggregator<? super K, ? super V, VR> subtractor,
|
||||
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
|
||||
/**
|
||||
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
|
||||
* mapped} to the same key into a new instance of {@link KTable}.
|
||||
* Records with {@code null} key are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer, Materialized) combining via reduce(...)} as it,
|
||||
* for example, allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the provided {@code queryableStoreName}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied once directly before the first input record is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
|
||||
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
|
||||
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
|
||||
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
|
||||
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
|
||||
* record from the aggregate.
|
||||
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, Materialized)} can be used to compute aggregate functions
|
||||
* like sum.
|
||||
* For sum, the initializer, adder, and subtractor would work as follows:
|
||||
* <pre>{@code
|
||||
* // in this example, LongSerde.class must be set as value serde in Materialized#withValueSerde
|
||||
* public class SumInitializer implements Initializer<Long> {
|
||||
* public Long apply() {
|
||||
* return 0L;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumAdder implements Aggregator<String, Integer, Long> {
|
||||
* public Long apply(String key, Integer newValue, Long aggregate) {
|
||||
* return aggregate + newValue;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
|
||||
* public Long apply(String key, Integer oldValue, Long aggregate) {
|
||||
* return aggregate - oldValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link KeyValueStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
|
||||
* String key = "some-word";
|
||||
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that provides an initial aggregate result value
|
||||
* @param adder an {@link Aggregator} that adds a new record to the aggregate result
|
||||
* @param subtractor an {@link Aggregator} that removed an old record from the aggregate result
|
||||
* @param named a {@link Named} config used to name the processor in the topology
|
||||
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
|
||||
* @param <VR> the value type of the aggregated {@link KTable}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> adder,
|
||||
final Aggregator<? super K, ? super V, VR> subtractor,
|
||||
final Named named,
|
||||
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
|
||||
* mapped} to the same key into a new instance of {@link KTable} using default serializers and deserializers.
|
||||
* Records with {@code null} key are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer) combining via reduce(...)} as it,
|
||||
* for example, allows the result to have a different type than the input values.
|
||||
* If the result value type does not match the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value
|
||||
* serde} you should use {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied once directly before the first input record is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
|
||||
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
|
||||
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
|
||||
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
|
||||
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
|
||||
* record from the aggregate.
|
||||
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, String)} can be used to compute aggregate functions
|
||||
* like sum.
|
||||
* For sum, the initializer, adder, and subtractor would work as follows:
|
||||
* <pre>{@code
|
||||
* // in this example, LongSerde.class must be set as default value serde in StreamsConfig
|
||||
* public class SumInitializer implements Initializer<Long> {
|
||||
* public Long apply() {
|
||||
* return 0L;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumAdder implements Aggregator<String, Integer, Long> {
|
||||
* public Long apply(String key, Integer newValue, Long aggregate) {
|
||||
* return aggregate + newValue;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
|
||||
* public Long apply(String key, Integer oldValue, Long aggregate) {
|
||||
* return aggregate - oldValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer a {@link Initializer} that provides an initial aggregate result value
|
||||
* @param adder a {@link Aggregator} that adds a new record to the aggregate result
|
||||
* @param subtractor a {@link Aggregator} that removed an old record from the aggregate result
|
||||
* @param <VR> the value type of the aggregated {@link KTable}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> adder,
|
||||
final Aggregator<? super K, ? super V, VR> subtractor);
|
||||
|
||||
|
||||
/**
|
||||
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
|
||||
* mapped} to the same key into a new instance of {@link KTable} using default serializers and deserializers.
|
||||
* Records with {@code null} key are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer) combining via reduce(...)} as it,
|
||||
* for example, allows the result to have a different type than the input values.
|
||||
* If the result value type does not match the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value
|
||||
* serde} you should use {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}.
|
||||
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied once directly before the first input record is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record.
|
||||
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
|
||||
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
|
||||
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
|
||||
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
|
||||
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
|
||||
* record from the aggregate.
|
||||
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, String)} can be used to compute aggregate functions
|
||||
* like sum.
|
||||
* For sum, the initializer, adder, and subtractor would work as follows:
|
||||
* <pre>{@code
|
||||
* // in this example, LongSerde.class must be set as default value serde in StreamsConfig
|
||||
* public class SumInitializer implements Initializer<Long> {
|
||||
* public Long apply() {
|
||||
* return 0L;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumAdder implements Aggregator<String, Integer, Long> {
|
||||
* public Long apply(String key, Integer newValue, Long aggregate) {
|
||||
* return aggregate + newValue;
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
|
||||
* public Long apply(String key, Integer oldValue, Long aggregate) {
|
||||
* return aggregate - oldValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
*
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer a {@link Initializer} that provides an initial aggregate result value
|
||||
* @param adder a {@link Aggregator} that adds a new record to the aggregate result
|
||||
* @param subtractor a {@link Aggregator} that removed an old record from the aggregate result
|
||||
* @param named a {@link Named} config used to name the processor in the topology
|
||||
* @param <VR> the value type of the aggregated {@link KTable}
|
||||
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
|
||||
* latest (rolling) aggregate for each key
|
||||
*/
|
||||
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> adder,
|
||||
final Aggregator<? super K, ? super V, VR> subtractor,
|
||||
final Named named);
|
||||
}
|
||||
3559
streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java
Normal file
3559
streams/src/main/java/org/apache/kafka/streams/kstream/KStream.java
Normal file
File diff suppressed because it is too large
Load Diff
2292
streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java
Normal file
2292
streams/src/main/java/org/apache/kafka/streams/kstream/KTable.java
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
|
||||
/**
|
||||
* The {@code KeyValueMapper} interface for mapping a {@link KeyValue key-value pair} to a new value of arbitrary type.
|
||||
* For example, it can be used to
|
||||
* <ul>
|
||||
* <li>map from an input {@link KeyValue} pair to an output {@link KeyValue} pair with different key and/or value type
|
||||
* (for this case output type {@code VR == }{@link KeyValue KeyValue<NewKeyType,NewValueType>})</li>
|
||||
* <li>map from an input record to a new key (with arbitrary key type as specified by {@code VR})</li>
|
||||
* </ul>
|
||||
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
|
||||
* record of a stream (cf. {@link Transformer} for stateful record transformation).
|
||||
* {@code KeyValueMapper} is a generalization of {@link ValueMapper}.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VR> mapped value type
|
||||
* @see ValueMapper
|
||||
* @see Transformer
|
||||
* @see KStream#map(KeyValueMapper)
|
||||
* @see KStream#flatMap(KeyValueMapper)
|
||||
* @see KStream#selectKey(KeyValueMapper)
|
||||
* @see KStream#groupBy(KeyValueMapper)
|
||||
* @see KStream#groupBy(KeyValueMapper, Grouped)
|
||||
* @see KTable#groupBy(KeyValueMapper)
|
||||
* @see KTable#groupBy(KeyValueMapper, Grouped)
|
||||
* @see KTable#toStream(KeyValueMapper)
|
||||
*/
|
||||
public interface KeyValueMapper<K, V, VR> {
|
||||
|
||||
/**
|
||||
* Map a record with the given key and value to a new value.
|
||||
*
|
||||
* @param key the key of the record
|
||||
* @param value the value of the record
|
||||
* @return the new value
|
||||
*/
|
||||
VR apply(final K key, final V value);
|
||||
}
|
||||
@@ -0,0 +1,261 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.internals.ApiUtils;
|
||||
import org.apache.kafka.streams.processor.StateStore;
|
||||
import org.apache.kafka.streams.state.KeyValueBytesStoreSupplier;
|
||||
import org.apache.kafka.streams.state.KeyValueStore;
|
||||
import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
|
||||
import org.apache.kafka.streams.state.SessionStore;
|
||||
import org.apache.kafka.streams.state.StoreSupplier;
|
||||
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
|
||||
import org.apache.kafka.streams.state.WindowStore;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
|
||||
|
||||
/**
|
||||
* Used to describe how a {@link StateStore} should be materialized.
|
||||
* You can either provide a custom {@link StateStore} backend through one of the provided methods accepting a supplier
|
||||
* or use the default RocksDB backends by providing just a store name.
|
||||
* <p>
|
||||
* For example, you can read a topic as {@link KTable} and force a state store materialization to access the content
|
||||
* via Interactive Queries API:
|
||||
* <pre>{@code
|
||||
* StreamsBuilder builder = new StreamsBuilder();
|
||||
* KTable<Integer, Integer> table = builder.table(
|
||||
* "topicName",
|
||||
* Materialized.as("queryable-store-name"));
|
||||
* }</pre>
|
||||
*
|
||||
* @param <K> type of record key
|
||||
* @param <V> type of record value
|
||||
* @param <S> type of state store (note: state stores always have key/value types {@code <Bytes,byte[]>}
|
||||
*
|
||||
* @see org.apache.kafka.streams.state.Stores
|
||||
*/
|
||||
public class Materialized<K, V, S extends StateStore> {
|
||||
protected StoreSupplier<S> storeSupplier;
|
||||
protected String storeName;
|
||||
protected Serde<V> valueSerde;
|
||||
protected Serde<K> keySerde;
|
||||
protected boolean loggingEnabled = true;
|
||||
protected boolean cachingEnabled = true;
|
||||
protected Map<String, String> topicConfig = new HashMap<>();
|
||||
protected Duration retention;
|
||||
|
||||
private Materialized(final StoreSupplier<S> storeSupplier) {
|
||||
this.storeSupplier = storeSupplier;
|
||||
}
|
||||
|
||||
private Materialized(final String storeName) {
|
||||
this.storeName = storeName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @param materialized the {@link Materialized} instance to copy.
|
||||
*/
|
||||
protected Materialized(final Materialized<K, V, S> materialized) {
|
||||
this.storeSupplier = materialized.storeSupplier;
|
||||
this.storeName = materialized.storeName;
|
||||
this.keySerde = materialized.keySerde;
|
||||
this.valueSerde = materialized.valueSerde;
|
||||
this.loggingEnabled = materialized.loggingEnabled;
|
||||
this.cachingEnabled = materialized.cachingEnabled;
|
||||
this.topicConfig = materialized.topicConfig;
|
||||
this.retention = materialized.retention;
|
||||
}
|
||||
|
||||
/**
|
||||
* Materialize a {@link StateStore} with the given name.
|
||||
*
|
||||
* @param storeName the name of the underlying {@link KTable} state store; valid characters are ASCII
|
||||
* alphanumerics, '.', '_' and '-'.
|
||||
* @param <K> key type of the store
|
||||
* @param <V> value type of the store
|
||||
* @param <S> type of the {@link StateStore}
|
||||
* @return a new {@link Materialized} instance with the given storeName
|
||||
*/
|
||||
public static <K, V, S extends StateStore> Materialized<K, V, S> as(final String storeName) {
|
||||
Named.validate(storeName);
|
||||
return new Materialized<>(storeName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Materialize a {@link WindowStore} using the provided {@link WindowBytesStoreSupplier}.
|
||||
*
|
||||
* Important: Custom subclasses are allowed here, but they should respect the retention contract:
|
||||
* Window stores are required to retain windows at least as long as (window size + window grace period).
|
||||
* Stores constructed via {@link org.apache.kafka.streams.state.Stores} already satisfy this contract.
|
||||
*
|
||||
* @param supplier the {@link WindowBytesStoreSupplier} used to materialize the store
|
||||
* @param <K> key type of the store
|
||||
* @param <V> value type of the store
|
||||
* @return a new {@link Materialized} instance with the given supplier
|
||||
*/
|
||||
public static <K, V> Materialized<K, V, WindowStore<Bytes, byte[]>> as(final WindowBytesStoreSupplier supplier) {
|
||||
Objects.requireNonNull(supplier, "supplier can't be null");
|
||||
return new Materialized<>(supplier);
|
||||
}
|
||||
|
||||
/**
|
||||
* Materialize a {@link SessionStore} using the provided {@link SessionBytesStoreSupplier}.
|
||||
*
|
||||
* Important: Custom subclasses are allowed here, but they should respect the retention contract:
|
||||
* Session stores are required to retain windows at least as long as (session inactivity gap + session grace period).
|
||||
* Stores constructed via {@link org.apache.kafka.streams.state.Stores} already satisfy this contract.
|
||||
*
|
||||
* @param supplier the {@link SessionBytesStoreSupplier} used to materialize the store
|
||||
* @param <K> key type of the store
|
||||
* @param <V> value type of the store
|
||||
* @return a new {@link Materialized} instance with the given sup
|
||||
* plier
|
||||
*/
|
||||
public static <K, V> Materialized<K, V, SessionStore<Bytes, byte[]>> as(final SessionBytesStoreSupplier supplier) {
|
||||
Objects.requireNonNull(supplier, "supplier can't be null");
|
||||
return new Materialized<>(supplier);
|
||||
}
|
||||
|
||||
/**
|
||||
* Materialize a {@link KeyValueStore} using the provided {@link KeyValueBytesStoreSupplier}.
|
||||
*
|
||||
* @param supplier the {@link KeyValueBytesStoreSupplier} used to materialize the store
|
||||
* @param <K> key type of the store
|
||||
* @param <V> value type of the store
|
||||
* @return a new {@link Materialized} instance with the given supplier
|
||||
*/
|
||||
public static <K, V> Materialized<K, V, KeyValueStore<Bytes, byte[]>> as(final KeyValueBytesStoreSupplier supplier) {
|
||||
Objects.requireNonNull(supplier, "supplier can't be null");
|
||||
return new Materialized<>(supplier);
|
||||
}
|
||||
|
||||
/**
|
||||
* Materialize a {@link StateStore} with the provided key and value {@link Serde}s.
|
||||
* An internal name will be used for the store.
|
||||
*
|
||||
* @param keySerde the key {@link Serde} to use. If the {@link Serde} is null, then the default key
|
||||
* serde from configs will be used
|
||||
* @param valueSerde the value {@link Serde} to use. If the {@link Serde} is null, then the default value
|
||||
* serde from configs will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <S> store type
|
||||
* @return a new {@link Materialized} instance with the given key and value serdes
|
||||
*/
|
||||
public static <K, V, S extends StateStore> Materialized<K, V, S> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde) {
|
||||
return new Materialized<K, V, S>((String) null).withKeySerde(keySerde).withValueSerde(valueSerde);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the valueSerde the materialized {@link StateStore} will use.
|
||||
*
|
||||
* @param valueSerde the value {@link Serde} to use. If the {@link Serde} is null, then the default value
|
||||
* serde from configs will be used. If the serialized bytes is null for put operations,
|
||||
* it is treated as delete operation
|
||||
* @return itself
|
||||
*/
|
||||
public Materialized<K, V, S> withValueSerde(final Serde<V> valueSerde) {
|
||||
this.valueSerde = valueSerde;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the keySerde the materialize {@link StateStore} will use.
|
||||
* @param keySerde the key {@link Serde} to use. If the {@link Serde} is null, then the default key
|
||||
* serde from configs will be used
|
||||
* @return itself
|
||||
*/
|
||||
public Materialized<K, V, S> withKeySerde(final Serde<K> keySerde) {
|
||||
this.keySerde = keySerde;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicates that a changelog should be created for the store. The changelog will be created
|
||||
* with the provided configs.
|
||||
* <p>
|
||||
* Note: Any unrecognized configs will be ignored.
|
||||
* @param config any configs that should be applied to the changelog
|
||||
* @return itself
|
||||
*/
|
||||
public Materialized<K, V, S> withLoggingEnabled(final Map<String, String> config) {
|
||||
loggingEnabled = true;
|
||||
this.topicConfig = config;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable change logging for the materialized {@link StateStore}.
|
||||
* @return itself
|
||||
*/
|
||||
public Materialized<K, V, S> withLoggingDisabled() {
|
||||
loggingEnabled = false;
|
||||
this.topicConfig.clear();
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable caching for the materialized {@link StateStore}.
|
||||
* @return itself
|
||||
*/
|
||||
public Materialized<K, V, S> withCachingEnabled() {
|
||||
cachingEnabled = true;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable caching for the materialized {@link StateStore}.
|
||||
* @return itself
|
||||
*/
|
||||
public Materialized<K, V, S> withCachingDisabled() {
|
||||
cachingEnabled = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure retention period for window and session stores. Ignored for key/value stores.
|
||||
*
|
||||
* Overridden by pre-configured store suppliers
|
||||
* ({@link Materialized#as(SessionBytesStoreSupplier)} or {@link Materialized#as(WindowBytesStoreSupplier)}).
|
||||
*
|
||||
* Note that the retention period must be at least long enough to contain the windowed data's entire life cycle,
|
||||
* from window-start through window-end, and for the entire grace period.
|
||||
*
|
||||
* @param retention the retention time
|
||||
* @return itself
|
||||
* @throws IllegalArgumentException if retention is negative or can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
public Materialized<K, V, S> withRetention(final Duration retention) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(retention, "retention");
|
||||
final long retenationMs = ApiUtils.validateMillisecondDuration(retention, msgPrefix);
|
||||
|
||||
if (retenationMs < 0) {
|
||||
throw new IllegalArgumentException("Retention must not be negative.");
|
||||
}
|
||||
this.retention = retention;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* The interface for merging aggregate values for {@link SessionWindows} with the given key.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> aggregate value type
|
||||
*/
|
||||
public interface Merger<K, V> {
|
||||
|
||||
/**
|
||||
* Compute a new aggregate from the key and two aggregates.
|
||||
*
|
||||
* @param aggKey the key of the record
|
||||
* @param aggOne the first aggregate
|
||||
* @param aggTwo the second aggregate
|
||||
* @return the new aggregate value
|
||||
*/
|
||||
V apply(final K aggKey, final V aggOne, final V aggTwo);
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.errors.TopologyException;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public class Named implements NamedOperation<Named> {
|
||||
|
||||
private static final int MAX_NAME_LENGTH = 249;
|
||||
|
||||
protected String name;
|
||||
|
||||
protected Named(final Named named) {
|
||||
this(Objects.requireNonNull(named, "named can't be null").name);
|
||||
}
|
||||
|
||||
protected Named(final String name) {
|
||||
this.name = name;
|
||||
if (name != null) {
|
||||
validate(name);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Named instance with provided name.
|
||||
*
|
||||
* @param name the processor name to be used. If {@code null} a default processor name will be generated.
|
||||
* @return A new {@link Named} instance configured with name
|
||||
*
|
||||
* @throws TopologyException if an invalid name is specified; valid characters are ASCII alphanumerics, '.', '_' and '-'.
|
||||
*/
|
||||
public static Named as(final String name) {
|
||||
Objects.requireNonNull(name, "name can't be null");
|
||||
return new Named(name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Named withName(final String name) {
|
||||
return new Named(name);
|
||||
}
|
||||
|
||||
protected static void validate(final String name) {
|
||||
if (name.isEmpty())
|
||||
throw new TopologyException("Name is illegal, it can't be empty");
|
||||
if (name.equals(".") || name.equals(".."))
|
||||
throw new TopologyException("Name cannot be \".\" or \"..\"");
|
||||
if (name.length() > MAX_NAME_LENGTH)
|
||||
throw new TopologyException("Name is illegal, it can't be longer than " + MAX_NAME_LENGTH +
|
||||
" characters, name: " + name);
|
||||
if (!containsValidPattern(name))
|
||||
throw new TopologyException("Name \"" + name + "\" is illegal, it contains a character other than " +
|
||||
"ASCII alphanumerics, '.', '_' and '-'");
|
||||
}
|
||||
|
||||
/**
|
||||
* Valid characters for Kafka topics are the ASCII alphanumerics, '.', '_', and '-'
|
||||
*/
|
||||
private static boolean containsValidPattern(final String topic) {
|
||||
for (int i = 0; i < topic.length(); ++i) {
|
||||
final char c = topic.charAt(i);
|
||||
|
||||
// We don't use Character.isLetterOrDigit(c) because it's slower
|
||||
final boolean validLetterOrDigit = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z');
|
||||
final boolean validChar = validLetterOrDigit || c == '.' || c == '_' || c == '-';
|
||||
if (!validChar) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
/**
|
||||
* Default interface which can be used to personalized the named of operations, internal topics or store.
|
||||
*/
|
||||
interface NamedOperation<T extends NamedOperation<T>> {
|
||||
|
||||
/**
|
||||
* Sets the name to be used for an operation.
|
||||
*
|
||||
* @param name the name to use.
|
||||
* @return an instance of {@link NamedOperation}
|
||||
*/
|
||||
T withName(final String name);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
|
||||
/**
|
||||
* The {@code Predicate} interface represents a predicate (boolean-valued function) of a {@link KeyValue} pair.
|
||||
* This is a stateless record-by-record operation, i.e, {@link #test(Object, Object)} is invoked individually for each
|
||||
* record of a stream.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @see KStream#filter(Predicate)
|
||||
* @see KStream#filterNot(Predicate)
|
||||
* @see KStream#branch(Predicate[])
|
||||
* @see KTable#filter(Predicate)
|
||||
* @see KTable#filterNot(Predicate)
|
||||
*/
|
||||
public interface Predicate<K, V> {
|
||||
|
||||
/**
|
||||
* Test if the record with the given key and value satisfies the predicate.
|
||||
*
|
||||
* @param key the key of the record
|
||||
* @param value the value of the record
|
||||
* @return {@code true} if the {@link KeyValue} pair satisfies the predicate—{@code false} otherwise
|
||||
*/
|
||||
boolean test(final K key, final V value);
|
||||
}
|
||||
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.errors.TopologyException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* An object to define the options used when printing a {@link KStream}.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @see KStream#print(Printed)
|
||||
*/
|
||||
public class Printed<K, V> implements NamedOperation<Printed<K, V>> {
|
||||
protected final OutputStream outputStream;
|
||||
protected String label;
|
||||
protected String processorName;
|
||||
protected KeyValueMapper<? super K, ? super V, String> mapper =
|
||||
(KeyValueMapper<K, V, String>) (key, value) -> String.format("%s, %s", key, value);
|
||||
|
||||
private Printed(final OutputStream outputStream) {
|
||||
this.outputStream = outputStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor.
|
||||
* @param printed instance of {@link Printed} to copy
|
||||
*/
|
||||
protected Printed(final Printed<K, V> printed) {
|
||||
this.outputStream = printed.outputStream;
|
||||
this.label = printed.label;
|
||||
this.mapper = printed.mapper;
|
||||
this.processorName = printed.processorName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print the records of a {@link KStream} to a file.
|
||||
*
|
||||
* @param filePath path of the file
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return a new Printed instance
|
||||
*/
|
||||
public static <K, V> Printed<K, V> toFile(final String filePath) {
|
||||
Objects.requireNonNull(filePath, "filePath can't be null");
|
||||
if (filePath.trim().isEmpty()) {
|
||||
throw new TopologyException("filePath can't be an empty string");
|
||||
}
|
||||
try {
|
||||
return new Printed<>(Files.newOutputStream(Paths.get(filePath)));
|
||||
} catch (final IOException e) {
|
||||
throw new TopologyException("Unable to write stream to file at [" + filePath + "] " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Print the records of a {@link KStream} to system out.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return a new Printed instance
|
||||
*/
|
||||
public static <K, V> Printed<K, V> toSysOut() {
|
||||
return new Printed<>(System.out);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print the records of a {@link KStream} with the provided label.
|
||||
*
|
||||
* @param label label to use
|
||||
* @return this
|
||||
*/
|
||||
public Printed<K, V> withLabel(final String label) {
|
||||
Objects.requireNonNull(label, "label can't be null");
|
||||
this.label = label;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print the records of a {@link KStream} with the provided {@link KeyValueMapper}
|
||||
* The provided KeyValueMapper's mapped value type must be {@code String}.
|
||||
* <p>
|
||||
* The example below shows how to customize output data.
|
||||
* <pre>{@code
|
||||
* final KeyValueMapper<Integer, String, String> mapper = new KeyValueMapper<Integer, String, String>() {
|
||||
* public String apply(Integer key, String value) {
|
||||
* return String.format("(%d, %s)", key, value);
|
||||
* }
|
||||
* };
|
||||
* }</pre>
|
||||
*
|
||||
* Implementors will need to override {@code toString()} for keys and values that are not of type {@link String},
|
||||
* {@link Integer} etc. to get meaningful information.
|
||||
*
|
||||
* @param mapper mapper to use
|
||||
* @return this
|
||||
*/
|
||||
public Printed<K, V> withKeyValueMapper(final KeyValueMapper<? super K, ? super V, String> mapper) {
|
||||
Objects.requireNonNull(mapper, "mapper can't be null");
|
||||
this.mapper = mapper;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print the records of a {@link KStream} with provided processor name.
|
||||
*
|
||||
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
|
||||
** @return this
|
||||
*/
|
||||
@Override
|
||||
public Printed<K, V> withName(final String processorName) {
|
||||
this.processorName = processorName;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,201 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
|
||||
import org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner;
|
||||
import org.apache.kafka.streams.processor.StreamPartitioner;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* This class is used to provide the optional parameters when producing to new topics
|
||||
* using {@link KStream#through(String, Produced)} or {@link KStream#to(String, Produced)}.
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
*/
|
||||
public class Produced<K, V> implements NamedOperation<Produced<K, V>> {
|
||||
|
||||
protected Serde<K> keySerde;
|
||||
protected Serde<V> valueSerde;
|
||||
protected StreamPartitioner<? super K, ? super V> partitioner;
|
||||
protected String processorName;
|
||||
|
||||
private Produced(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde,
|
||||
final StreamPartitioner<? super K, ? super V> partitioner,
|
||||
final String processorName) {
|
||||
this.keySerde = keySerde;
|
||||
this.valueSerde = valueSerde;
|
||||
this.partitioner = partitioner;
|
||||
this.processorName = processorName;
|
||||
}
|
||||
|
||||
protected Produced(final Produced<K, V> produced) {
|
||||
this.keySerde = produced.keySerde;
|
||||
this.valueSerde = produced.valueSerde;
|
||||
this.partitioner = produced.partitioner;
|
||||
this.processorName = produced.processorName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Produced instance with provided keySerde and valueSerde.
|
||||
* @param keySerde Serde to use for serializing the key
|
||||
* @param valueSerde Serde to use for serializing the value
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return A new {@link Produced} instance configured with keySerde and valueSerde
|
||||
* @see KStream#through(String, Produced)
|
||||
* @see KStream#to(String, Produced)
|
||||
*/
|
||||
public static <K, V> Produced<K, V> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde) {
|
||||
return new Produced<>(keySerde, valueSerde, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Produced instance with provided keySerde, valueSerde, and partitioner.
|
||||
* @param keySerde Serde to use for serializing the key
|
||||
* @param valueSerde Serde to use for serializing the value
|
||||
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
|
||||
* if not specified and {@code keySerde} provides a {@link WindowedSerializer} for the key
|
||||
* {@link WindowedStreamPartitioner} will be used—otherwise {@link DefaultPartitioner}
|
||||
* will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return A new {@link Produced} instance configured with keySerde, valueSerde, and partitioner
|
||||
* @see KStream#through(String, Produced)
|
||||
* @see KStream#to(String, Produced)
|
||||
*/
|
||||
public static <K, V> Produced<K, V> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde,
|
||||
final StreamPartitioner<? super K, ? super V> partitioner) {
|
||||
return new Produced<>(keySerde, valueSerde, partitioner, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an instance of {@link Produced} with provided processor name.
|
||||
*
|
||||
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return a new instance of {@link Produced}
|
||||
*/
|
||||
public static <K, V> Produced<K, V> as(final String processorName) {
|
||||
return new Produced<>(null, null, null, processorName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Produced instance with provided keySerde.
|
||||
* @param keySerde Serde to use for serializing the key
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return A new {@link Produced} instance configured with keySerde
|
||||
* @see KStream#through(String, Produced)
|
||||
* @see KStream#to(String, Produced)
|
||||
*/
|
||||
public static <K, V> Produced<K, V> keySerde(final Serde<K> keySerde) {
|
||||
return new Produced<>(keySerde, null, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Produced instance with provided valueSerde.
|
||||
* @param valueSerde Serde to use for serializing the key
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return A new {@link Produced} instance configured with valueSerde
|
||||
* @see KStream#through(String, Produced)
|
||||
* @see KStream#to(String, Produced)
|
||||
*/
|
||||
public static <K, V> Produced<K, V> valueSerde(final Serde<V> valueSerde) {
|
||||
return new Produced<>(null, valueSerde, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Produced instance with provided partitioner.
|
||||
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
|
||||
* if not specified and the key serde provides a {@link WindowedSerializer} for the key
|
||||
* {@link WindowedStreamPartitioner} will be used—otherwise {@link DefaultPartitioner} will be used
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @return A new {@link Produced} instance configured with partitioner
|
||||
* @see KStream#through(String, Produced)
|
||||
* @see KStream#to(String, Produced)
|
||||
*/
|
||||
public static <K, V> Produced<K, V> streamPartitioner(final StreamPartitioner<? super K, ? super V> partitioner) {
|
||||
return new Produced<>(null, null, partitioner, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce records using the provided partitioner.
|
||||
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
|
||||
* if not specified and the key serde provides a {@link WindowedSerializer} for the key
|
||||
* {@link WindowedStreamPartitioner} will be used—otherwise {@link DefaultPartitioner} wil be used
|
||||
* @return this
|
||||
*/
|
||||
public Produced<K, V> withStreamPartitioner(final StreamPartitioner<? super K, ? super V> partitioner) {
|
||||
this.partitioner = partitioner;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce records using the provided valueSerde.
|
||||
* @param valueSerde Serde to use for serializing the value
|
||||
* @return this
|
||||
*/
|
||||
public Produced<K, V> withValueSerde(final Serde<V> valueSerde) {
|
||||
this.valueSerde = valueSerde;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Produce records using the provided keySerde.
|
||||
* @param keySerde Serde to use for serializing the key
|
||||
* @return this
|
||||
*/
|
||||
public Produced<K, V> withKeySerde(final Serde<K> keySerde) {
|
||||
this.keySerde = keySerde;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final Produced<?, ?> produced = (Produced<?, ?>) o;
|
||||
return Objects.equals(keySerde, produced.keySerde) &&
|
||||
Objects.equals(valueSerde, produced.valueSerde) &&
|
||||
Objects.equals(partitioner, produced.partitioner);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(keySerde, valueSerde, partitioner);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Produced<K, V> withName(final String name) {
|
||||
this.processorName = name;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
|
||||
/**
|
||||
* The {@code Reducer} interface for combining two values of the same type into a new value.
|
||||
* In contrast to {@link Aggregator} the result type must be the same as the input type.
|
||||
* <p>
|
||||
* The provided values can be either original values from input {@link KeyValue} pair records or be a previously
|
||||
* computed result from {@link Reducer#apply(Object, Object)}.
|
||||
* <p>
|
||||
* {@code Reducer} can be used to implement aggregation functions like sum, min, or max.
|
||||
*
|
||||
* @param <V> value type
|
||||
* @see KGroupedStream#reduce(Reducer)
|
||||
* @see KGroupedStream#reduce(Reducer, Materialized)
|
||||
* @see TimeWindowedKStream#reduce(Reducer)
|
||||
* @see TimeWindowedKStream#reduce(Reducer, Materialized)
|
||||
* @see SessionWindowedKStream#reduce(Reducer)
|
||||
* @see SessionWindowedKStream#reduce(Reducer, Materialized)
|
||||
* @see Aggregator
|
||||
*/
|
||||
public interface Reducer<V> {
|
||||
|
||||
/**
|
||||
* Aggregate the two given values into a single one.
|
||||
*
|
||||
* @param value1 the first value for the aggregation
|
||||
* @param value2 the second value for the aggregation
|
||||
* @return the aggregated value
|
||||
*/
|
||||
V apply(final V value1, final V value2);
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
|
||||
/**
|
||||
* The class that is used to capture the key and value {@link Serde}s used when performing
|
||||
* {@link KStream#groupBy(KeyValueMapper, Serialized)} and {@link KStream#groupByKey(Serialized)} operations.
|
||||
*
|
||||
* @param <K> the key type
|
||||
* @param <V> the value type
|
||||
*
|
||||
* @deprecated since 2.1. Use {@link org.apache.kafka.streams.kstream.Grouped} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public class Serialized<K, V> {
|
||||
|
||||
protected final Serde<K> keySerde;
|
||||
protected final Serde<V> valueSerde;
|
||||
|
||||
private Serialized(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde) {
|
||||
this.keySerde = keySerde;
|
||||
this.valueSerde = valueSerde;
|
||||
}
|
||||
|
||||
protected Serialized(final Serialized<K, V> serialized) {
|
||||
this(serialized.keySerde, serialized.valueSerde);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a {@code Serialized} instance with the provided key and value {@link Serde}s.
|
||||
* If the {@link Serde} params are {@code null} the default serdes defined in the configs will be used.
|
||||
*
|
||||
* @param keySerde keySerde that will be used to materialize a stream
|
||||
* if not specified the default serdes defined in the configs will be used
|
||||
* @param valueSerde valueSerde that will be used to materialize a stream
|
||||
* if not specified the default serdes defined in the configs will be used
|
||||
* @param <K> the key type
|
||||
* @param <V> the value type
|
||||
* @return a new instance of {@link Serialized} configured with the provided serdes
|
||||
*/
|
||||
public static <K, V> Serialized<K, V> with(final Serde<K> keySerde,
|
||||
final Serde<V> valueSerde) {
|
||||
return new Serialized<>(keySerde, valueSerde);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a {@code Serialized} instance with the provided key {@link Serde}.
|
||||
* If the {@link Serde} params are null the default serdes defined in the configs will be used.
|
||||
*
|
||||
* @param keySerde keySerde that will be used to materialize a stream
|
||||
* if not specified the default serdes defined in the configs will be used
|
||||
* @return a new instance of {@link Serialized} configured with the provided key serde
|
||||
*/
|
||||
public Serialized<K, V> withKeySerde(final Serde<K> keySerde) {
|
||||
return new Serialized<>(keySerde, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a {@code Serialized} instance with the provided value {@link Serde}.
|
||||
* If the {@link Serde} params are null the default serdes defined in the configs will be used.
|
||||
*
|
||||
* @param valueSerde valueSerde that will be used to materialize a stream
|
||||
* if not specified the default serdes defined in the configs will be used
|
||||
* @return a new instance of {@link Serialized} configured with the provided key serde
|
||||
*/
|
||||
public Serialized<K, V> withValueSerde(final Serde<V> valueSerde) {
|
||||
return new Serialized<>(null, valueSerde);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,265 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StoreQueryParameters;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.state.SessionStore;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* {@code SessionWindowedCogroupKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
|
||||
* It is an intermediate representation of a {@link CogroupedKStream} in order to apply a windowed aggregation operation
|
||||
* on the original {@link KGroupedStream} records resulting in a windowed {@link KTable} (a <emph>windowed</emph>
|
||||
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
|
||||
* <p>
|
||||
* {@link SessionWindows} are dynamic data driven windows.
|
||||
* They have no fixed time boundaries, rather the size of the window is determined by the records.
|
||||
* <p>
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating
|
||||
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
|
||||
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
|
||||
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
|
||||
* New events are added to sessions until their grace period ends (see {@link SessionWindows#grace(Duration)}).
|
||||
* <p>
|
||||
* A {@code SessionWindowedCogroupedKStream} must be obtained from a {@link CogroupedKStream} via
|
||||
* {@link CogroupedKStream#windowedBy(SessionWindows)}.
|
||||
*
|
||||
* @param <K> Type of keys
|
||||
* @param <V> Type of values
|
||||
* @see KStream
|
||||
* @see KGroupedStream
|
||||
* @see SessionWindows
|
||||
* @see CogroupedKStream
|
||||
*/
|
||||
public interface SessionWindowedCogroupedKStream<K, V> {
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in these streams by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record per session.
|
||||
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
|
||||
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
|
||||
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
|
||||
* provided via the {@link Initializer}) and the record's value.
|
||||
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
|
||||
* they are merged into a single session and the old sessions are discarded.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
|
||||
* <p>
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #aggregate(Initializer, Merger, Materialized)}.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
|
||||
final Merger<? super K, V> sessionMerger);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in these streams by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record per session.
|
||||
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
|
||||
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
|
||||
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
|
||||
* provided via the {@link Initializer}) and the record's value.
|
||||
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
|
||||
* they are merged into a single session and the old sessions are discarded.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
|
||||
* <p>
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use
|
||||
* {@link #aggregate(Initializer, Merger, Named, Materialized)}.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
|
||||
final Merger<? super K, V> sessionMerger,
|
||||
final Named named);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in these streams by the grouped key and defined sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the session (per key).
|
||||
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
|
||||
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
|
||||
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
|
||||
* provided via the {@link Initializer}) and the record's value.
|
||||
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
|
||||
* they are merged into a single session and the old sessions are discarded.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link SessionStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
|
||||
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
|
||||
final Merger<? super K, V> sessionMerger,
|
||||
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in these streams by the grouped key and defined sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the session (per key).
|
||||
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
|
||||
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
|
||||
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
|
||||
* provided via the {@link Initializer}) and the record's value.
|
||||
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
|
||||
* they are merged into a single session and the old sessions are discarded.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link SessionStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // some windowed aggregation on value type double
|
||||
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
|
||||
* String key = "some-key";
|
||||
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
|
||||
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
|
||||
final Merger<? super K, V> sessionMerger,
|
||||
final Named named,
|
||||
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.config.ConfigException;
|
||||
import org.apache.kafka.common.serialization.Deserializer;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.utils.Utils;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.state.internals.SessionKeySchema;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The inner serde class can be specified by setting the property
|
||||
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
|
||||
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
|
||||
* if the no-arg constructor is called and hence it is not passed during initialization.
|
||||
*/
|
||||
public class SessionWindowedDeserializer<T> implements Deserializer<Windowed<T>> {
|
||||
|
||||
private Deserializer<T> inner;
|
||||
|
||||
// Default constructor needed by Kafka
|
||||
public SessionWindowedDeserializer() {}
|
||||
|
||||
public SessionWindowedDeserializer(final Deserializer<T> inner) {
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void configure(final Map<String, ?> configs, final boolean isKey) {
|
||||
if (inner == null) {
|
||||
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
|
||||
final String value = (String) configs.get(propertyName);
|
||||
try {
|
||||
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).deserializer();
|
||||
inner.configure(configs, isKey);
|
||||
} catch (final ClassNotFoundException e) {
|
||||
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Windowed<T> deserialize(final String topic, final byte[] data) {
|
||||
WindowedSerdes.verifyInnerDeserializerNotNull(inner, this);
|
||||
|
||||
if (data == null || data.length == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// for either key or value, their schema is the same hence we will just use session key schema
|
||||
return SessionKeySchema.from(data, inner, topic);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (inner != null) {
|
||||
inner.close();
|
||||
}
|
||||
}
|
||||
|
||||
// Only for testing
|
||||
Deserializer<T> innerDeserializer() {
|
||||
return inner;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,646 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StoreQueryParameters;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.state.SessionStore;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* {@code SessionWindowedKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
|
||||
* It is an intermediate representation after a grouping and windowing of a {@link KStream} before an aggregation is
|
||||
* applied to the new (partitioned) windows resulting in a windowed {@link KTable} (a <emph>windowed</emph>
|
||||
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
|
||||
* <p>
|
||||
* {@link SessionWindows} are dynamic data driven windows.
|
||||
* They have no fixed time boundaries, rather the size of the window is determined by the records.
|
||||
* <p>
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating
|
||||
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
|
||||
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
|
||||
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
|
||||
* New events are added to sessions until their grace period ends (see {@link SessionWindows#grace(Duration)}).
|
||||
* <p>
|
||||
* A {@code SessionWindowedKStream} must be obtained from a {@link KGroupedStream} via
|
||||
* {@link KGroupedStream#windowedBy(SessionWindows)}.
|
||||
*
|
||||
* @param <K> Type of keys
|
||||
* @param <V> Type of values
|
||||
* @see KStream
|
||||
* @see KGroupedStream
|
||||
* @see SessionWindows
|
||||
*/
|
||||
public interface SessionWindowedKStream<K, V> {
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* <p>
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
|
||||
* The default key serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #count(Materialized)}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same session and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
|
||||
* that represent the latest (rolling) count (i.e., number of records) for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, Long> count();
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* <p>
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
|
||||
* The default key serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #count(Named, Materialized)}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same session and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
|
||||
* that represent the latest (rolling) count (i.e., number of records) for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, Long> count(final Named named);
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* <p>
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the name provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link SessionStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // compute sum
|
||||
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
|
||||
* String key = "some-key";
|
||||
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
|
||||
* if there is no valueSerde provided
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
|
||||
* that represent the latest (rolling) count (i.e., number of records) for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, Long> count(final Materialized<K, Long, SessionStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* <p>
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the name provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link SessionStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // compute sum
|
||||
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
|
||||
* String key = "some-key";
|
||||
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
|
||||
* if there is no valueSerde provided
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
|
||||
* that represent the latest (rolling) count (i.e., number of records) for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, Long> count(final Named named,
|
||||
final Materialized<K, Long, SessionStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record per session.
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
|
||||
* they are merged into a single session and the old sessions are discarded.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use
|
||||
* {@link #aggregate(Initializer, Aggregator, Merger, Materialized)}.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Merger<? super K, VR> sessionMerger);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record per session.
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
|
||||
* they are merged into a single session and the old sessions are discarded.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use
|
||||
* {@link #aggregate(Initializer, Aggregator, Merger, Named, Materialized)}.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Merger<? super K, VR> sessionMerger,
|
||||
final Named named);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record per session.
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
|
||||
* they are merged into a single session and the old sessions are discarded.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link SessionStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // some windowed aggregation on value type double
|
||||
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
|
||||
* String key = "some-key";
|
||||
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
|
||||
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Merger<? super K, VR> sessionMerger,
|
||||
final Materialized<K, VR, SessionStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
|
||||
* provide an initial intermediate aggregation result that is used to process the first record per session.
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
|
||||
* they are merged into a single session and the old sessions are discarded.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link SessionStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // some windowed aggregation on value type double
|
||||
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
|
||||
* String key = "some-key";
|
||||
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
|
||||
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Merger<? super K, VR> sessionMerger,
|
||||
final Named named,
|
||||
final Materialized<K, VR, SessionStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #reduce(Reducer, Materialized)} .
|
||||
* <p>
|
||||
* The value of the first record per session initialized the session result.
|
||||
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
|
||||
* aggregate using the current aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #reduce(Reducer, Named, Materialized)} .
|
||||
* <p>
|
||||
* The value of the first record per session initialized the session result.
|
||||
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
|
||||
* aggregate using the current aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer, final Named named);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The value of the first record per session initialized the session result.
|
||||
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
|
||||
* aggregate using the current aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link SessionStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // compute sum
|
||||
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
|
||||
* String key = "some-key";
|
||||
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
|
||||
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key and defined sessions.
|
||||
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
|
||||
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The value of the first record per session initialized the session result.
|
||||
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
|
||||
* aggregate using the current aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link SessionStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // compute sum
|
||||
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
|
||||
* String key = "some-key";
|
||||
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key per session
|
||||
*/
|
||||
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
|
||||
final Named named,
|
||||
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.config.ConfigException;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
import org.apache.kafka.common.utils.Utils;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
|
||||
import org.apache.kafka.streams.state.internals.SessionKeySchema;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The inner serde class can be specified by setting the property
|
||||
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
|
||||
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
|
||||
* if the no-arg constructor is called and hence it is not passed during initialization.
|
||||
*/
|
||||
public class SessionWindowedSerializer<T> implements WindowedSerializer<T> {
|
||||
|
||||
private Serializer<T> inner;
|
||||
|
||||
// Default constructor needed by Kafka
|
||||
public SessionWindowedSerializer() {}
|
||||
|
||||
public SessionWindowedSerializer(final Serializer<T> inner) {
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void configure(final Map<String, ?> configs, final boolean isKey) {
|
||||
if (inner == null) {
|
||||
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
|
||||
final String value = (String) configs.get(propertyName);
|
||||
try {
|
||||
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).serializer();
|
||||
inner.configure(configs, isKey);
|
||||
} catch (final ClassNotFoundException e) {
|
||||
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] serialize(final String topic, final Windowed<T> data) {
|
||||
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
|
||||
|
||||
if (data == null) {
|
||||
return null;
|
||||
}
|
||||
// for either key or value, their schema is the same hence we will just use session key schema
|
||||
return SessionKeySchema.toBinary(data, inner, topic);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (inner != null) {
|
||||
inner.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] serializeBaseKey(final String topic, final Windowed<T> data) {
|
||||
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
|
||||
|
||||
return inner.serialize(topic, data.key());
|
||||
}
|
||||
|
||||
// Only for testing
|
||||
Serializer<T> innerSerializer() {
|
||||
return inner;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,221 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.internals.ApiUtils;
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
|
||||
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
|
||||
|
||||
|
||||
/**
|
||||
* A session based window specification used for aggregating events into sessions.
|
||||
* <p>
|
||||
* Sessions represent a period of activity separated by a defined gap of inactivity.
|
||||
* Any events processed that fall within the inactivity gap of any existing sessions are merged into the existing sessions.
|
||||
* If the event falls outside of the session gap then a new session will be created.
|
||||
* <p>
|
||||
* For example, if we have a session gap of 5 and the following data arrives:
|
||||
* <pre>
|
||||
* +--------------------------------------+
|
||||
* | key | value | time |
|
||||
* +-----------+-------------+------------+
|
||||
* | A | 1 | 10 |
|
||||
* +-----------+-------------+------------+
|
||||
* | A | 2 | 12 |
|
||||
* +-----------+-------------+------------+
|
||||
* | A | 3 | 20 |
|
||||
* +-----------+-------------+------------+
|
||||
* </pre>
|
||||
* We'd have 2 sessions for key A.
|
||||
* One starting from time 10 and ending at time 12 and another starting and ending at time 20.
|
||||
* The length of the session is driven by the timestamps of the data within the session.
|
||||
* Thus, session windows are no fixed-size windows (c.f. {@link TimeWindows} and {@link JoinWindows}).
|
||||
* <p>
|
||||
* If we then received another record:
|
||||
* <pre>
|
||||
* +--------------------------------------+
|
||||
* | key | value | time |
|
||||
* +-----------+-------------+------------+
|
||||
* | A | 4 | 16 |
|
||||
* +-----------+-------------+------------+
|
||||
* </pre>
|
||||
* The previous 2 sessions would be merged into a single session with start time 10 and end time 20.
|
||||
* The aggregate value for this session would be the result of aggregating all 4 values.
|
||||
* <p>
|
||||
* For time semantics, see {@link TimestampExtractor}.
|
||||
*
|
||||
* @see TimeWindows
|
||||
* @see UnlimitedWindows
|
||||
* @see JoinWindows
|
||||
* @see KGroupedStream#windowedBy(SessionWindows)
|
||||
* @see TimestampExtractor
|
||||
*/
|
||||
public final class SessionWindows {
|
||||
|
||||
private final long gapMs;
|
||||
private final long maintainDurationMs;
|
||||
private final long graceMs;
|
||||
|
||||
|
||||
private SessionWindows(final long gapMs, final long maintainDurationMs, final long graceMs) {
|
||||
this.gapMs = gapMs;
|
||||
this.maintainDurationMs = maintainDurationMs;
|
||||
this.graceMs = graceMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new window specification with the specified inactivity gap in milliseconds.
|
||||
*
|
||||
* @param inactivityGapMs the gap of inactivity between sessions in milliseconds
|
||||
* @return a new window specification with default maintain duration of 1 day
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code inactivityGapMs} is zero or negative
|
||||
* @deprecated Use {@link #with(Duration)} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public static SessionWindows with(final long inactivityGapMs) {
|
||||
if (inactivityGapMs <= 0) {
|
||||
throw new IllegalArgumentException("Gap time (inactivityGapMs) cannot be zero or negative.");
|
||||
}
|
||||
return new SessionWindows(inactivityGapMs, DEFAULT_RETENTION_MS, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new window specification with the specified inactivity gap.
|
||||
*
|
||||
* @param inactivityGap the gap of inactivity between sessions
|
||||
* @return a new window specification with default maintain duration of 1 day
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code inactivityGap} is zero or negative or can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
public static SessionWindows with(final Duration inactivityGap) {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(inactivityGap, "inactivityGap");
|
||||
return with(ApiUtils.validateMillisecondDuration(inactivityGap, msgPrefix));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the window maintain duration (retention time) in milliseconds.
|
||||
* This retention time is a guaranteed <i>lower bound</i> for how long a window will be maintained.
|
||||
*
|
||||
* @return itself
|
||||
* @throws IllegalArgumentException if {@code durationMs} is smaller than window gap
|
||||
*
|
||||
* @deprecated since 2.1. Use {@link Materialized#retention}
|
||||
* or directly configure the retention in a store supplier and use
|
||||
* {@link Materialized#as(SessionBytesStoreSupplier)}.
|
||||
*/
|
||||
@Deprecated
|
||||
public SessionWindows until(final long durationMs) throws IllegalArgumentException {
|
||||
if (durationMs < gapMs) {
|
||||
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than window gap.");
|
||||
}
|
||||
|
||||
return new SessionWindows(gapMs, durationMs, graceMs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reject out-of-order events that arrive more than {@code afterWindowEnd}
|
||||
* after the end of its window.
|
||||
* <p>
|
||||
* Note that new events may change the boundaries of session windows, so aggressive
|
||||
* close times can lead to surprising results in which an out-of-order event is rejected and then
|
||||
* a subsequent event moves the window boundary forward.
|
||||
*
|
||||
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
|
||||
* @return this updated builder
|
||||
* @throws IllegalArgumentException if the {@code afterWindowEnd} is negative of can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
public SessionWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
|
||||
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
|
||||
|
||||
if (afterWindowEndMs < 0) {
|
||||
throw new IllegalArgumentException("Grace period must not be negative.");
|
||||
}
|
||||
|
||||
return new SessionWindows(
|
||||
gapMs,
|
||||
maintainDurationMs,
|
||||
afterWindowEndMs
|
||||
);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // continuing to support Windows#maintainMs/segmentInterval in fallback mode
|
||||
public long gracePeriodMs() {
|
||||
// NOTE: in the future, when we remove maintainMs,
|
||||
// we should default the grace period to 24h to maintain the default behavior,
|
||||
// or we can default to (24h - gapMs) if you want to be super accurate.
|
||||
return graceMs != -1 ? graceMs : maintainMs() - inactivityGap();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the specified gap for the session windows in milliseconds.
|
||||
*
|
||||
* @return the inactivity gap of the specified windows
|
||||
*/
|
||||
public long inactivityGap() {
|
||||
return gapMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the window maintain duration (retention time) in milliseconds.
|
||||
* <p>
|
||||
* For {@code SessionWindows} the maintain duration is at least as small as the window gap.
|
||||
*
|
||||
* @return the window maintain duration
|
||||
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public long maintainMs() {
|
||||
return Math.max(maintainDurationMs, gapMs);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final SessionWindows that = (SessionWindows) o;
|
||||
return gapMs == that.gapMs &&
|
||||
maintainDurationMs == that.maintainDurationMs &&
|
||||
graceMs == that.graceMs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(gapMs, maintainDurationMs, graceMs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SessionWindows{" +
|
||||
"gapMs=" + gapMs +
|
||||
", maintainDurationMs=" + maintainDurationMs +
|
||||
", graceMs=" + graceMs +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,286 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
|
||||
|
||||
/**
|
||||
* Class used to configure the name of the join processor, the repartition topic name,
|
||||
* state stores or state store names in Stream-Stream join.
|
||||
* @param <K> the key type
|
||||
* @param <V1> this value type
|
||||
* @param <V2> other value type
|
||||
*/
|
||||
public class StreamJoined<K, V1, V2> implements NamedOperation<StreamJoined<K, V1, V2>> {
|
||||
|
||||
protected final Serde<K> keySerde;
|
||||
protected final Serde<V1> valueSerde;
|
||||
protected final Serde<V2> otherValueSerde;
|
||||
protected final WindowBytesStoreSupplier thisStoreSupplier;
|
||||
protected final WindowBytesStoreSupplier otherStoreSupplier;
|
||||
protected final String name;
|
||||
protected final String storeName;
|
||||
|
||||
protected StreamJoined(final StreamJoined<K, V1, V2> streamJoined) {
|
||||
this(streamJoined.keySerde,
|
||||
streamJoined.valueSerde,
|
||||
streamJoined.otherValueSerde,
|
||||
streamJoined.thisStoreSupplier,
|
||||
streamJoined.otherStoreSupplier,
|
||||
streamJoined.name,
|
||||
streamJoined.storeName);
|
||||
}
|
||||
|
||||
private StreamJoined(final Serde<K> keySerde,
|
||||
final Serde<V1> valueSerde,
|
||||
final Serde<V2> otherValueSerde,
|
||||
final WindowBytesStoreSupplier thisStoreSupplier,
|
||||
final WindowBytesStoreSupplier otherStoreSupplier,
|
||||
final String name,
|
||||
final String storeName) {
|
||||
this.keySerde = keySerde;
|
||||
this.valueSerde = valueSerde;
|
||||
this.otherValueSerde = otherValueSerde;
|
||||
this.thisStoreSupplier = thisStoreSupplier;
|
||||
this.otherStoreSupplier = otherStoreSupplier;
|
||||
this.name = name;
|
||||
this.storeName = storeName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a StreamJoined instance with the provided store suppliers. The store suppliers must implement
|
||||
* the {@link WindowBytesStoreSupplier} interface. The store suppliers must provide unique names or a
|
||||
* {@link org.apache.kafka.streams.errors.StreamsException} is thrown.
|
||||
*
|
||||
* @param storeSupplier this store supplier
|
||||
* @param otherStoreSupplier other store supplier
|
||||
* @param <K> the key type
|
||||
* @param <V1> this value type
|
||||
* @param <V2> other value type
|
||||
* @return {@link StreamJoined} instance
|
||||
*/
|
||||
public static <K, V1, V2> StreamJoined<K, V1, V2> with(final WindowBytesStoreSupplier storeSupplier,
|
||||
final WindowBytesStoreSupplier otherStoreSupplier) {
|
||||
return new StreamJoined<>(
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
storeSupplier,
|
||||
otherStoreSupplier,
|
||||
null,
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link StreamJoined} instance using the provided name for the state stores and hence the changelog
|
||||
* topics for the join stores. The name for the stores will be ${applicationId}-<storeName>-this-join and ${applicationId}-<storeName>-other-join
|
||||
* or ${applicationId}-<storeName>-outer-this-join and ${applicationId}-<storeName>-outer-other-join depending if the join is an inner-join
|
||||
* or an outer join. The changelog topics will have the -changelog suffix. The user should note that even though the join stores will have a
|
||||
* specified name, the stores will remain unavailable for querying.
|
||||
*
|
||||
* @param storeName The name to use for the store
|
||||
* @param <K> The key type
|
||||
* @param <V1> This value type
|
||||
* @param <V2> Other value type
|
||||
* @return {@link StreamJoined} instance
|
||||
*/
|
||||
public static <K, V1, V2> StreamJoined<K, V1, V2> as(final String storeName) {
|
||||
return new StreamJoined<>(
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
storeName
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a {@link StreamJoined} instance with the provided serdes to configure the stores
|
||||
* for the join.
|
||||
* @param keySerde The key serde
|
||||
* @param valueSerde This value serde
|
||||
* @param otherValueSerde Other value serde
|
||||
* @param <K> The key type
|
||||
* @param <V1> This value type
|
||||
* @param <V2> Other value type
|
||||
* @return {@link StreamJoined} instance
|
||||
*/
|
||||
public static <K, V1, V2> StreamJoined<K, V1, V2> with(final Serde<K> keySerde,
|
||||
final Serde<V1> valueSerde,
|
||||
final Serde<V2> otherValueSerde
|
||||
) {
|
||||
return new StreamJoined<>(
|
||||
keySerde,
|
||||
valueSerde,
|
||||
otherValueSerde,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the name to use for the join processor and the repartition topic(s) if required.
|
||||
* @param name the name to use
|
||||
* @return a new {@link StreamJoined} instance
|
||||
*/
|
||||
@Override
|
||||
public StreamJoined<K, V1, V2> withName(final String name) {
|
||||
return new StreamJoined<>(
|
||||
keySerde,
|
||||
valueSerde,
|
||||
otherValueSerde,
|
||||
thisStoreSupplier,
|
||||
otherStoreSupplier,
|
||||
name,
|
||||
storeName
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the base store name to use for both sides of the join. The name for the state stores and hence the changelog
|
||||
* topics for the join stores. The name for the stores will be ${applicationId}-<storeName>-this-join and ${applicationId}-<storeName>-other-join
|
||||
* or ${applicationId}-<storeName>-outer-this-join and ${applicationId}-<storeName>-outer-other-join depending if the join is an inner-join
|
||||
* or an outer join. The changelog topics will have the -changelog suffix. The user should note that even though the join stores will have a
|
||||
* specified name, the stores will remain unavailable for querying.
|
||||
*
|
||||
* @param storeName the storeName to use
|
||||
* @return a new {@link StreamJoined} instance
|
||||
*/
|
||||
public StreamJoined<K, V1, V2> withStoreName(final String storeName) {
|
||||
return new StreamJoined<>(
|
||||
keySerde,
|
||||
valueSerde,
|
||||
otherValueSerde,
|
||||
thisStoreSupplier,
|
||||
otherStoreSupplier,
|
||||
name,
|
||||
storeName
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure with the provided {@link Serde Serde<K>} for the key
|
||||
* @param keySerde the serde to use for the key
|
||||
* @return a new {@link StreamJoined} configured with the keySerde
|
||||
*/
|
||||
public StreamJoined<K, V1, V2> withKeySerde(final Serde<K> keySerde) {
|
||||
return new StreamJoined<>(
|
||||
keySerde,
|
||||
valueSerde,
|
||||
otherValueSerde,
|
||||
thisStoreSupplier,
|
||||
otherStoreSupplier,
|
||||
name,
|
||||
storeName
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure with the provided {@link Serde Serde<V1>} for this value
|
||||
* @param valueSerde the serde to use for this value (calling or left side of the join)
|
||||
* @return a new {@link StreamJoined} configured with the valueSerde
|
||||
*/
|
||||
public StreamJoined<K, V1, V2> withValueSerde(final Serde<V1> valueSerde) {
|
||||
return new StreamJoined<>(
|
||||
keySerde,
|
||||
valueSerde,
|
||||
otherValueSerde,
|
||||
thisStoreSupplier,
|
||||
otherStoreSupplier,
|
||||
name,
|
||||
storeName
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure with the provided {@link Serde Serde<V2>} for the other value
|
||||
* @param otherValueSerde the serde to use for the other value (other or right side of the join)
|
||||
* @return a new {@link StreamJoined} configured with the otherValueSerde
|
||||
*/
|
||||
public StreamJoined<K, V1, V2> withOtherValueSerde(final Serde<V2> otherValueSerde) {
|
||||
return new StreamJoined<>(
|
||||
keySerde,
|
||||
valueSerde,
|
||||
otherValueSerde,
|
||||
thisStoreSupplier,
|
||||
otherStoreSupplier,
|
||||
name,
|
||||
storeName
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure with the provided {@link WindowBytesStoreSupplier} for this store supplier. Please note
|
||||
* this method only provides the store supplier for the left side of the join. If you wish to also provide a
|
||||
* store supplier for the right (i.e., other) side you must use the {@link StreamJoined#withOtherStoreSupplier(WindowBytesStoreSupplier)}
|
||||
* method
|
||||
* @param thisStoreSupplier the store supplier to use for this store supplier (calling or left side of the join)
|
||||
* @return a new {@link StreamJoined} configured with thisStoreSupplier
|
||||
*/
|
||||
public StreamJoined<K, V1, V2> withThisStoreSupplier(final WindowBytesStoreSupplier thisStoreSupplier) {
|
||||
return new StreamJoined<>(
|
||||
keySerde,
|
||||
valueSerde,
|
||||
otherValueSerde,
|
||||
thisStoreSupplier,
|
||||
otherStoreSupplier,
|
||||
name,
|
||||
storeName
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure with the provided {@link WindowBytesStoreSupplier} for the other store supplier. Please note
|
||||
* this method only provides the store supplier for the right side of the join. If you wish to also provide a
|
||||
* store supplier for the left side you must use the {@link StreamJoined#withThisStoreSupplier(WindowBytesStoreSupplier)}
|
||||
* method
|
||||
* @param otherStoreSupplier the store supplier to use for the other store supplier (other or right side of the join)
|
||||
* @return a new {@link StreamJoined} configured with otherStoreSupplier
|
||||
*/
|
||||
public StreamJoined<K, V1, V2> withOtherStoreSupplier(final WindowBytesStoreSupplier otherStoreSupplier) {
|
||||
return new StreamJoined<>(
|
||||
keySerde,
|
||||
valueSerde,
|
||||
otherValueSerde,
|
||||
thisStoreSupplier,
|
||||
otherStoreSupplier,
|
||||
name,
|
||||
storeName
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "StreamJoin{" +
|
||||
"keySerde=" + keySerde +
|
||||
", valueSerde=" + valueSerde +
|
||||
", otherValueSerde=" + otherValueSerde +
|
||||
", thisStoreSupplier=" + thisStoreSupplier +
|
||||
", otherStoreSupplier=" + otherStoreSupplier +
|
||||
", name='" + name + '\'' +
|
||||
", storeName='" + storeName + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,177 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.kstream.internals.suppress.EagerBufferConfigImpl;
|
||||
import org.apache.kafka.streams.kstream.internals.suppress.FinalResultsSuppressionBuilder;
|
||||
import org.apache.kafka.streams.kstream.internals.suppress.StrictBufferConfigImpl;
|
||||
import org.apache.kafka.streams.kstream.internals.suppress.SuppressedInternal;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
public interface Suppressed<K> extends NamedOperation<Suppressed<K>> {
|
||||
|
||||
/**
|
||||
* Marker interface for a buffer configuration that is "strict" in the sense that it will strictly
|
||||
* enforce the time bound and never emit early.
|
||||
*/
|
||||
interface StrictBufferConfig extends BufferConfig<StrictBufferConfig> {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Marker interface for a buffer configuration that will strictly enforce size constraints
|
||||
* (bytes and/or number of records) on the buffer, so it is suitable for reducing duplicate
|
||||
* results downstream, but does not promise to eliminate them entirely.
|
||||
*/
|
||||
interface EagerBufferConfig extends BufferConfig<EagerBufferConfig> {
|
||||
|
||||
}
|
||||
|
||||
interface BufferConfig<BC extends BufferConfig<BC>> {
|
||||
/**
|
||||
* Create a size-constrained buffer in terms of the maximum number of keys it will store.
|
||||
*/
|
||||
static EagerBufferConfig maxRecords(final long recordLimit) {
|
||||
return new EagerBufferConfigImpl(recordLimit, Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a size constraint on the buffer in terms of the maximum number of keys it will store.
|
||||
*/
|
||||
BC withMaxRecords(final long recordLimit);
|
||||
|
||||
/**
|
||||
* Create a size-constrained buffer in terms of the maximum number of bytes it will use.
|
||||
*/
|
||||
static EagerBufferConfig maxBytes(final long byteLimit) {
|
||||
return new EagerBufferConfigImpl(Long.MAX_VALUE, byteLimit);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a size constraint on the buffer, the maximum number of bytes it will use.
|
||||
*/
|
||||
BC withMaxBytes(final long byteLimit);
|
||||
|
||||
/**
|
||||
* Create a buffer unconstrained by size (either keys or bytes).
|
||||
*
|
||||
* As a result, the buffer will consume as much memory as it needs, dictated by the time bound.
|
||||
*
|
||||
* If there isn't enough heap available to meet the demand, the application will encounter an
|
||||
* {@link OutOfMemoryError} and shut down (not guaranteed to be a graceful exit). Also, note that
|
||||
* JVM processes under extreme memory pressure may exhibit poor GC behavior.
|
||||
*
|
||||
* This is a convenient option if you doubt that your buffer will be that large, but also don't
|
||||
* wish to pick particular constraints, such as in testing.
|
||||
*
|
||||
* This buffer is "strict" in the sense that it will enforce the time bound or crash.
|
||||
* It will never emit early.
|
||||
*/
|
||||
static StrictBufferConfig unbounded() {
|
||||
return new StrictBufferConfigImpl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the buffer to be unconstrained by size (either keys or bytes).
|
||||
*
|
||||
* As a result, the buffer will consume as much memory as it needs, dictated by the time bound.
|
||||
*
|
||||
* If there isn't enough heap available to meet the demand, the application will encounter an
|
||||
* {@link OutOfMemoryError} and shut down (not guaranteed to be a graceful exit). Also, note that
|
||||
* JVM processes under extreme memory pressure may exhibit poor GC behavior.
|
||||
*
|
||||
* This is a convenient option if you doubt that your buffer will be that large, but also don't
|
||||
* wish to pick particular constraints, such as in testing.
|
||||
*
|
||||
* This buffer is "strict" in the sense that it will enforce the time bound or crash.
|
||||
* It will never emit early.
|
||||
*/
|
||||
StrictBufferConfig withNoBound();
|
||||
|
||||
/**
|
||||
* Set the buffer to gracefully shut down the application when any of its constraints are violated
|
||||
*
|
||||
* This buffer is "strict" in the sense that it will enforce the time bound or shut down.
|
||||
* It will never emit early.
|
||||
*/
|
||||
StrictBufferConfig shutDownWhenFull();
|
||||
|
||||
/**
|
||||
* Set the buffer to just emit the oldest records when any of its constraints are violated.
|
||||
*
|
||||
* This buffer is "not strict" in the sense that it may emit early, so it is suitable for reducing
|
||||
* duplicate results downstream, but does not promise to eliminate them.
|
||||
*/
|
||||
EagerBufferConfig emitEarlyWhenFull();
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the suppression to emit only the "final results" from the window.
|
||||
*
|
||||
* By default all Streams operators emit results whenever new results are available.
|
||||
* This includes windowed operations.
|
||||
*
|
||||
* This configuration will instead emit just one result per key for each window, guaranteeing
|
||||
* to deliver only the final result. This option is suitable for use cases in which the business logic
|
||||
* requires a hard guarantee that only the final result is propagated. For example, sending alerts.
|
||||
*
|
||||
* To accomplish this, the operator will buffer events from the window until the window close (that is,
|
||||
* until the end-time passes, and additionally until the grace period expires). Since windowed operators
|
||||
* are required to reject out-of-order events for a window whose grace period is expired, there is an additional
|
||||
* guarantee that the final results emitted from this suppression will match any queriable state upstream.
|
||||
*
|
||||
* @param bufferConfig A configuration specifying how much space to use for buffering intermediate results.
|
||||
* This is required to be a "strict" config, since it would violate the "final results"
|
||||
* property to emit early and then issue an update later.
|
||||
* @return a "final results" mode suppression configuration
|
||||
*/
|
||||
static Suppressed<Windowed> untilWindowCloses(final StrictBufferConfig bufferConfig) {
|
||||
return new FinalResultsSuppressionBuilder<>(null, bufferConfig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure the suppression to wait {@code timeToWaitForMoreEvents} amount of time after receiving a record
|
||||
* before emitting it further downstream. If another record for the same key arrives in the mean time, it replaces
|
||||
* the first record in the buffer but does <em>not</em> re-start the timer.
|
||||
*
|
||||
* @param timeToWaitForMoreEvents The amount of time to wait, per record, for new events.
|
||||
* @param bufferConfig A configuration specifying how much space to use for buffering intermediate results.
|
||||
* @param <K> The key type for the KTable to apply this suppression to.
|
||||
* @return a suppression configuration
|
||||
*/
|
||||
static <K> Suppressed<K> untilTimeLimit(final Duration timeToWaitForMoreEvents, final BufferConfig bufferConfig) {
|
||||
return new SuppressedInternal<>(null, timeToWaitForMoreEvents, bufferConfig, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the specified name for the suppression node in the topology.
|
||||
* <p>
|
||||
* This can be used to insert a suppression without changing the rest of the topology names
|
||||
* (and therefore not requiring an application reset).
|
||||
* <p>
|
||||
* Note however, that once a suppression has buffered some records, removing it from the topology would cause
|
||||
* the loss of those records.
|
||||
* <p>
|
||||
* A suppression can be "disabled" with the configuration {@code untilTimeLimit(Duration.ZERO, ...}.
|
||||
*
|
||||
* @param name The name to be used for the suppression node and changelog topic
|
||||
* @return The same configuration with the addition of the given {@code name}.
|
||||
*/
|
||||
@Override
|
||||
Suppressed<K> withName(final String name);
|
||||
}
|
||||
@@ -0,0 +1,248 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StoreQueryParameters;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.state.WindowStore;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* {@code TimeWindowedCogroupKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
|
||||
* It is an intermediate representation of a {@link CogroupedKStream} in order to apply a windowed aggregation operation
|
||||
* on the original {@link KGroupedStream} records resulting in a windowed {@link KTable} (a <emph>windowed</emph>
|
||||
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
|
||||
* <p>
|
||||
* The specified {@code windows} define either hopping time windows that can be overlapping or tumbling (c.f.
|
||||
* {@link TimeWindows}) or they define landmark windows (c.f. {@link UnlimitedWindows}).
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating
|
||||
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
|
||||
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
|
||||
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
|
||||
* New events are added to windows until their grace period ends (see {@link TimeWindows#grace(Duration)}).
|
||||
* <p>
|
||||
* A {@code TimeWindowedCogroupedKStream} must be obtained from a {@link CogroupedKStream} via
|
||||
* {@link CogroupedKStream#windowedBy(Windows)}.
|
||||
*
|
||||
* @param <K> Type of keys
|
||||
* @param <V> Type of values
|
||||
* @see KStream
|
||||
* @see KGroupedStream
|
||||
* @see CogroupedKStream
|
||||
*/
|
||||
public interface TimeWindowedCogroupedKStream<K, V> {
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the window (per key).
|
||||
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
|
||||
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
|
||||
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
|
||||
* provided via the {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
|
||||
* <p>
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #aggregate(Initializer, Materialized)}.
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the window (per key).
|
||||
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
|
||||
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
|
||||
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
|
||||
* provided via the {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
|
||||
* <p>
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #aggregate(Initializer, Named, Materialized)}.
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
|
||||
final Named named);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the window (per key).
|
||||
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
|
||||
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
|
||||
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
|
||||
* provided via the {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link WindowStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
|
||||
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
|
||||
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the window (per key).
|
||||
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
|
||||
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
|
||||
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
|
||||
* provided via the {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link WindowStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
|
||||
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
|
||||
final Named named,
|
||||
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
|
||||
}
|
||||
@@ -0,0 +1,108 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.config.ConfigException;
|
||||
import org.apache.kafka.common.serialization.Deserializer;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.utils.Utils;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.state.internals.WindowKeySchema;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The inner serde class can be specified by setting the property
|
||||
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
|
||||
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
|
||||
* if the no-arg constructor is called and hence it is not passed during initialization.
|
||||
*/
|
||||
public class TimeWindowedDeserializer<T> implements Deserializer<Windowed<T>> {
|
||||
|
||||
private final Long windowSize;
|
||||
private boolean isChangelogTopic;
|
||||
|
||||
private Deserializer<T> inner;
|
||||
|
||||
// Default constructor needed by Kafka
|
||||
public TimeWindowedDeserializer() {
|
||||
this(null, Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
// TODO: fix this part as last bits of KAFKA-4468
|
||||
public TimeWindowedDeserializer(final Deserializer<T> inner) {
|
||||
this(inner, Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
public TimeWindowedDeserializer(final Deserializer<T> inner, final long windowSize) {
|
||||
this.inner = inner;
|
||||
this.windowSize = windowSize;
|
||||
this.isChangelogTopic = false;
|
||||
}
|
||||
|
||||
public Long getWindowSize() {
|
||||
return this.windowSize;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void configure(final Map<String, ?> configs, final boolean isKey) {
|
||||
if (inner == null) {
|
||||
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
|
||||
final String value = (String) configs.get(propertyName);
|
||||
try {
|
||||
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).deserializer();
|
||||
inner.configure(configs, isKey);
|
||||
} catch (final ClassNotFoundException e) {
|
||||
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Windowed<T> deserialize(final String topic, final byte[] data) {
|
||||
WindowedSerdes.verifyInnerDeserializerNotNull(inner, this);
|
||||
|
||||
if (data == null || data.length == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// toStoreKeyBinary was used to serialize the data.
|
||||
if (this.isChangelogTopic) {
|
||||
return WindowKeySchema.fromStoreKey(data, windowSize, inner, topic);
|
||||
}
|
||||
|
||||
// toBinary was used to serialize the data
|
||||
return WindowKeySchema.from(data, windowSize, inner, topic);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (inner != null) {
|
||||
inner.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void setIsChangelogTopic(final boolean isChangelogTopic) {
|
||||
this.isChangelogTopic = isChangelogTopic;
|
||||
}
|
||||
|
||||
// Only for testing
|
||||
Deserializer<T> innerDeserializer() {
|
||||
return inner;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,637 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.utils.Bytes;
|
||||
import org.apache.kafka.streams.KafkaStreams;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.StoreQueryParameters;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.Topology;
|
||||
import org.apache.kafka.streams.state.WindowStore;
|
||||
|
||||
import java.time.Duration;
|
||||
|
||||
/**
|
||||
* {@code TimeWindowedKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
|
||||
* It is an intermediate representation after a grouping and windowing of a {@link KStream} before an aggregation is
|
||||
* applied to the new (partitioned) windows resulting in a windowed {@link KTable} (a <emph>windowed</emph>
|
||||
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
|
||||
* <p>
|
||||
* The specified {@code windows} define either hopping time windows that can be overlapping or tumbling (c.f.
|
||||
* {@link TimeWindows}) or they define landmark windows (c.f. {@link UnlimitedWindows}).
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating
|
||||
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
|
||||
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
|
||||
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
|
||||
* New events are added to {@link TimeWindows} until their grace period ends (see {@link TimeWindows#grace(Duration)}).
|
||||
* <p>
|
||||
* A {@code TimeWindowedKStream} must be obtained from a {@link KGroupedStream} via
|
||||
* {@link KGroupedStream#windowedBy(Windows)}.
|
||||
*
|
||||
* @param <K> Type of keys
|
||||
* @param <V> Type of values
|
||||
* @see KStream
|
||||
* @see KGroupedStream
|
||||
*/
|
||||
public interface TimeWindowedKStream<K, V> {
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
|
||||
* The default key serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #count(Materialized)}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
|
||||
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, Long> count();
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
|
||||
* The default key serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #count(Named, Materialized)}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
|
||||
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, Long> count(final Named named);
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the name provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link WindowStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> countForWordsForWindows = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
|
||||
* if there is no valueSerde provided
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
|
||||
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, Long> count(final Materialized<K, Long, WindowStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Count the number of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the name provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}
|
||||
* <p>
|
||||
* To query the local {@link WindowStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> countForWordsForWindows = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
|
||||
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
|
||||
* if there is no valueSerde provided
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
|
||||
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, Long> count(final Named named,
|
||||
final Materialized<K, Long, WindowStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the window (per key).
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #aggregate(Initializer, Aggregator, Materialized)}.
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the window (per key).
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use
|
||||
* {@link #aggregate(Initializer, Aggregator, Named, Materialized)}.
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* Note that the internal store name may not be queriable through Interactive Queries.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Named named);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the window (per key).
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link WindowStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
|
||||
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Materialized<K, VR, WindowStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Aggregate the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
|
||||
* allows the result to have a different type than the input values.
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
|
||||
* processed to provide an initial intermediate aggregation result that is used to process the first record for
|
||||
* the window (per key).
|
||||
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
|
||||
* aggregate (or for the very first record using the intermediate aggregation result provided via the
|
||||
* {@link Initializer}) and the record's value.
|
||||
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}
|
||||
* <p>
|
||||
* To query the local {@link WindowStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
|
||||
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
|
||||
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
|
||||
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @param <VR> the value type of the resulting {@link KTable}
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
|
||||
final Aggregator<? super K, ? super V, VR> aggregator,
|
||||
final Named named,
|
||||
final Materialized<K, VR, WindowStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value
|
||||
* (c.f. {@link #aggregate(Initializer, Aggregator)}).
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #reduce(Reducer, Materialized)} .
|
||||
* <p>
|
||||
* The value of the first record per window initialized the aggregation result.
|
||||
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
|
||||
* aggregate using the current aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value.
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* The default key and value serde from the config will be used for serializing the result.
|
||||
* If a different serde is required then you should use {@link #reduce(Reducer, Named, Materialized)} .
|
||||
* <p>
|
||||
* The value of the first record per window initialized the aggregation result.
|
||||
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
|
||||
* aggregate using the current aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
|
||||
* the same window and key.
|
||||
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
|
||||
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
|
||||
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
|
||||
* and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer, final Named named);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value.
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The value of the first record per window initialized the aggregation result.
|
||||
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
|
||||
* aggregate using the current aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link WindowStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> reduceStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@code Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
|
||||
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
|
||||
|
||||
/**
|
||||
* Combine the values of records in this stream by the grouped key and defined windows.
|
||||
* Records with {@code null} key or value are ignored.
|
||||
* Combining implies that the type of the aggregate result is the same as the type of the input value.
|
||||
* <p>
|
||||
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
|
||||
* that can be queried using the store name as provided with {@link Materialized}.
|
||||
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
|
||||
* <p>
|
||||
* The value of the first record per window initialized the aggregation result.
|
||||
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
|
||||
* aggregate using the current aggregate (first argument) and the record's value (second argument):
|
||||
* <pre>{@code
|
||||
* // At the example of a Reducer<Long>
|
||||
* new Reducer<Long>() {
|
||||
* public Long apply(Long aggValue, Long currValue) {
|
||||
* return aggValue + currValue;
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
|
||||
* <p>
|
||||
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
|
||||
* to the same window and key if caching is enabled on the {@link Materialized} instance.
|
||||
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
|
||||
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
|
||||
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
|
||||
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
|
||||
* <p>
|
||||
* To query the local {@link WindowStore} it must be obtained via
|
||||
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
|
||||
* <pre>{@code
|
||||
* KafkaStreams streams = ... // counting words
|
||||
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
|
||||
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
|
||||
*
|
||||
* String key = "some-word";
|
||||
* long fromTime = ...;
|
||||
* long toTime = ...;
|
||||
* WindowStoreIterator<Long> reduceStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
|
||||
* }</pre>
|
||||
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
|
||||
* query the value of the key on a parallel running instance of your Kafka Streams application.
|
||||
* <p>
|
||||
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
|
||||
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
|
||||
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
|
||||
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
|
||||
* user-specified in {@link StreamsConfig} via parameter
|
||||
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
|
||||
* in {@link Materialized}, and "-changelog" is a fixed suffix.
|
||||
* <p>
|
||||
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
|
||||
*
|
||||
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
|
||||
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
|
||||
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
|
||||
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
|
||||
* the latest (rolling) aggregate for each key within a window
|
||||
*/
|
||||
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
|
||||
final Named named,
|
||||
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
|
||||
}
|
||||
@@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.config.ConfigException;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
import org.apache.kafka.common.utils.Utils;
|
||||
import org.apache.kafka.streams.StreamsConfig;
|
||||
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
|
||||
import org.apache.kafka.streams.state.internals.WindowKeySchema;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The inner serde class can be specified by setting the property
|
||||
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
|
||||
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
|
||||
* if the no-arg constructor is called and hence it is not passed during initialization.
|
||||
*/
|
||||
public class TimeWindowedSerializer<T> implements WindowedSerializer<T> {
|
||||
|
||||
private Serializer<T> inner;
|
||||
|
||||
// Default constructor needed by Kafka
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public TimeWindowedSerializer() {}
|
||||
|
||||
public TimeWindowedSerializer(final Serializer<T> inner) {
|
||||
this.inner = inner;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void configure(final Map<String, ?> configs, final boolean isKey) {
|
||||
if (inner == null) {
|
||||
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
|
||||
final String value = (String) configs.get(propertyName);
|
||||
try {
|
||||
inner = Utils.newInstance(value, Serde.class).serializer();
|
||||
inner.configure(configs, isKey);
|
||||
} catch (final ClassNotFoundException e) {
|
||||
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] serialize(final String topic, final Windowed<T> data) {
|
||||
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
|
||||
|
||||
if (data == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return WindowKeySchema.toBinary(data, inner, topic);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if (inner != null) {
|
||||
inner.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] serializeBaseKey(final String topic, final Windowed<T> data) {
|
||||
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
|
||||
|
||||
return inner.serialize(topic, data.key());
|
||||
}
|
||||
|
||||
// Only for testing
|
||||
Serializer<T> innerSerializer() {
|
||||
return inner;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.internals.ApiUtils;
|
||||
import org.apache.kafka.streams.kstream.internals.TimeWindow;
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
|
||||
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
|
||||
|
||||
/**
|
||||
* The fixed-size time-based window specifications used for aggregations.
|
||||
* <p>
|
||||
* The semantics of time-based aggregation windows are: Every T1 (advance) milliseconds, compute the aggregate total for
|
||||
* T2 (size) milliseconds.
|
||||
* <ul>
|
||||
* <li> If {@code advance < size} a hopping windows is defined:<br />
|
||||
* it discretize a stream into overlapping windows, which implies that a record maybe contained in one and or
|
||||
* more "adjacent" windows.</li>
|
||||
* <li> If {@code advance == size} a tumbling window is defined:<br />
|
||||
* it discretize a stream into non-overlapping windows, which implies that a record is only ever contained in
|
||||
* one and only one tumbling window.</li>
|
||||
* </ul>
|
||||
* Thus, the specified {@link TimeWindow}s are aligned to the epoch.
|
||||
* Aligned to the epoch means, that the first window starts at timestamp zero.
|
||||
* For example, hopping windows with size of 5000ms and advance of 3000ms, have window boundaries
|
||||
* [0;5000),[3000;8000),... and not [1000;6000),[4000;9000),... or even something "random" like [1452;6452),[4452;9452),...
|
||||
* <p>
|
||||
* For time semantics, see {@link TimestampExtractor}.
|
||||
*
|
||||
* @see SessionWindows
|
||||
* @see UnlimitedWindows
|
||||
* @see JoinWindows
|
||||
* @see KGroupedStream#windowedBy(Windows)
|
||||
* @see TimestampExtractor
|
||||
*/
|
||||
public final class TimeWindows extends Windows<TimeWindow> {
|
||||
|
||||
private final long maintainDurationMs;
|
||||
|
||||
/** The size of the windows in milliseconds. */
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public final long sizeMs;
|
||||
|
||||
/**
|
||||
* The size of the window's advance interval in milliseconds, i.e., by how much a window moves forward relative to
|
||||
* the previous one.
|
||||
*/
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public final long advanceMs;
|
||||
private final long graceMs;
|
||||
|
||||
private TimeWindows(final long sizeMs, final long advanceMs, final long graceMs, final long maintainDurationMs) {
|
||||
this.sizeMs = sizeMs;
|
||||
this.advanceMs = advanceMs;
|
||||
this.graceMs = graceMs;
|
||||
this.maintainDurationMs = maintainDurationMs;
|
||||
}
|
||||
|
||||
/** Private constructor for preserving segments. Can be removed along with Windows.segments. **/
|
||||
@Deprecated
|
||||
private TimeWindows(final long sizeMs,
|
||||
final long advanceMs,
|
||||
final long graceMs,
|
||||
final long maintainDurationMs,
|
||||
final int segments) {
|
||||
super(segments);
|
||||
this.sizeMs = sizeMs;
|
||||
this.advanceMs = advanceMs;
|
||||
this.graceMs = graceMs;
|
||||
this.maintainDurationMs = maintainDurationMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a window definition with the given window size, and with the advance interval being equal to the window
|
||||
* size.
|
||||
* The time interval represented by the N-th window is: {@code [N * size, N * size + size)}.
|
||||
* <p>
|
||||
* This provides the semantics of tumbling windows, which are fixed-sized, gap-less, non-overlapping windows.
|
||||
* Tumbling windows are a special case of hopping windows with {@code advance == size}.
|
||||
*
|
||||
* @param sizeMs The size of the window in milliseconds
|
||||
* @return a new window definition with default maintain duration of 1 day
|
||||
* @throws IllegalArgumentException if the specified window size is zero or negative
|
||||
* @deprecated Use {@link #of(Duration)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public static TimeWindows of(final long sizeMs) throws IllegalArgumentException {
|
||||
if (sizeMs <= 0) {
|
||||
throw new IllegalArgumentException("Window size (sizeMs) must be larger than zero.");
|
||||
}
|
||||
// This is a static factory method, so we initialize grace and retention to the defaults.
|
||||
return new TimeWindows(sizeMs, sizeMs, -1, DEFAULT_RETENTION_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a window definition with the given window size, and with the advance interval being equal to the window
|
||||
* size.
|
||||
* The time interval represented by the N-th window is: {@code [N * size, N * size + size)}.
|
||||
* <p>
|
||||
* This provides the semantics of tumbling windows, which are fixed-sized, gap-less, non-overlapping windows.
|
||||
* Tumbling windows are a special case of hopping windows with {@code advance == size}.
|
||||
*
|
||||
* @param size The size of the window
|
||||
* @return a new window definition with default maintain duration of 1 day
|
||||
* @throws IllegalArgumentException if the specified window size is zero or negative or can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
@SuppressWarnings("deprecation") // removing #of(final long sizeMs) will fix this
|
||||
public static TimeWindows of(final Duration size) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(size, "size");
|
||||
return of(ApiUtils.validateMillisecondDuration(size, msgPrefix));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a window definition with the original size, but advance ("hop") the window by the given interval, which
|
||||
* specifies by how much a window moves forward relative to the previous one.
|
||||
* The time interval represented by the N-th window is: {@code [N * advance, N * advance + size)}.
|
||||
* <p>
|
||||
* This provides the semantics of hopping windows, which are fixed-sized, overlapping windows.
|
||||
*
|
||||
* @param advanceMs The advance interval ("hop") in milliseconds of the window, with the requirement that {@code 0 < advanceMs <= sizeMs}.
|
||||
* @return a new window definition with default maintain duration of 1 day
|
||||
* @throws IllegalArgumentException if the advance interval is negative, zero, or larger than the window size
|
||||
* @deprecated Use {@link #advanceBy(Duration)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public TimeWindows advanceBy(final long advanceMs) {
|
||||
if (advanceMs <= 0 || advanceMs > sizeMs) {
|
||||
throw new IllegalArgumentException(String.format("Window advancement interval should be more than zero " +
|
||||
"and less than window duration which is %d ms, but given advancement interval is: %d ms", sizeMs, advanceMs));
|
||||
}
|
||||
return new TimeWindows(sizeMs, advanceMs, graceMs, maintainDurationMs, segments);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a window definition with the original size, but advance ("hop") the window by the given interval, which
|
||||
* specifies by how much a window moves forward relative to the previous one.
|
||||
* The time interval represented by the N-th window is: {@code [N * advance, N * advance + size)}.
|
||||
* <p>
|
||||
* This provides the semantics of hopping windows, which are fixed-sized, overlapping windows.
|
||||
*
|
||||
* @param advance The advance interval ("hop") of the window, with the requirement that {@code 0 < advance.toMillis() <= sizeMs}.
|
||||
* @return a new window definition with default maintain duration of 1 day
|
||||
* @throws IllegalArgumentException if the advance interval is negative, zero, or larger than the window size
|
||||
*/
|
||||
@SuppressWarnings("deprecation") // removing #advanceBy(final long advanceMs) will fix this
|
||||
public TimeWindows advanceBy(final Duration advance) {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(advance, "advance");
|
||||
return advanceBy(ApiUtils.validateMillisecondDuration(advance, msgPrefix));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Long, TimeWindow> windowsFor(final long timestamp) {
|
||||
long windowStart = (Math.max(0, timestamp - sizeMs + advanceMs) / advanceMs) * advanceMs;
|
||||
final Map<Long, TimeWindow> windows = new LinkedHashMap<>();
|
||||
while (windowStart <= timestamp) {
|
||||
final TimeWindow window = new TimeWindow(windowStart, windowStart + sizeMs);
|
||||
windows.put(windowStart, window);
|
||||
windowStart += advanceMs;
|
||||
}
|
||||
return windows;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() {
|
||||
return sizeMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reject out-of-order events that arrive more than {@code millisAfterWindowEnd}
|
||||
* after the end of its window.
|
||||
* <p>
|
||||
* Delay is defined as (stream_time - record_timestamp).
|
||||
*
|
||||
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
|
||||
* @return this updated builder
|
||||
* @throws IllegalArgumentException if {@code afterWindowEnd} is negative or can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
@SuppressWarnings("deprecation") // will be fixed when we remove segments from Windows
|
||||
public TimeWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
|
||||
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
|
||||
if (afterWindowEndMs < 0) {
|
||||
throw new IllegalArgumentException("Grace period must not be negative.");
|
||||
}
|
||||
|
||||
return new TimeWindows(sizeMs, advanceMs, afterWindowEndMs, maintainDurationMs, segments);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // continuing to support Windows#maintainMs/segmentInterval in fallback mode
|
||||
@Override
|
||||
public long gracePeriodMs() {
|
||||
// NOTE: in the future, when we remove maintainMs,
|
||||
// we should default the grace period to 24h to maintain the default behavior,
|
||||
// or we can default to (24h - size) if you want to be super accurate.
|
||||
return graceMs != -1 ? graceMs : maintainMs() - size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param durationMs the window retention time
|
||||
* @return itself
|
||||
* @throws IllegalArgumentException if {@code duration} is smaller than the window size
|
||||
*
|
||||
* @deprecated since 2.1. Use {@link Materialized#retention} or directly configure the retention in a store supplier
|
||||
* and use {@link Materialized#as(WindowBytesStoreSupplier)}.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public TimeWindows until(final long durationMs) throws IllegalArgumentException {
|
||||
if (durationMs < sizeMs) {
|
||||
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than the window size.");
|
||||
}
|
||||
return new TimeWindows(sizeMs, advanceMs, graceMs, durationMs, segments);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* <p>
|
||||
* For {@code TimeWindows} the maintain duration is at least as small as the window size.
|
||||
*
|
||||
* @return the window maintain duration
|
||||
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public long maintainMs() {
|
||||
return Math.max(maintainDurationMs, sizeMs);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final TimeWindows that = (TimeWindows) o;
|
||||
return maintainDurationMs == that.maintainDurationMs &&
|
||||
segments == that.segments &&
|
||||
sizeMs == that.sizeMs &&
|
||||
advanceMs == that.advanceMs &&
|
||||
graceMs == that.graceMs;
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(maintainDurationMs, segments, sizeMs, advanceMs, graceMs);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TimeWindows{" +
|
||||
"maintainDurationMs=" + maintainDurationMs +
|
||||
", sizeMs=" + sizeMs +
|
||||
", advanceMs=" + advanceMs +
|
||||
", graceMs=" + graceMs +
|
||||
", segments=" + segments +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import java.time.Duration;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
import org.apache.kafka.streams.processor.PunctuationType;
|
||||
import org.apache.kafka.streams.processor.Punctuator;
|
||||
import org.apache.kafka.streams.processor.StateStore;
|
||||
import org.apache.kafka.streams.processor.To;
|
||||
|
||||
/**
|
||||
* The {@code Transformer} interface is for stateful mapping of an input record to zero, one, or multiple new output
|
||||
* records (both key and value type can be altered arbitrarily).
|
||||
* This is a stateful record-by-record operation, i.e, {@link #transform(Object, Object)} is invoked individually for
|
||||
* each record of a stream and can access and modify a state that is available beyond a single call of
|
||||
* {@link #transform(Object, Object)} (cf. {@link KeyValueMapper} for stateless record transformation).
|
||||
* Additionally, this {@code Transformer} can {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule}
|
||||
* a method to be {@link Punctuator#punctuate(long) called periodically} with the provided context.
|
||||
* <p>
|
||||
* Use {@link TransformerSupplier} to provide new instances of {@code Transformer} to Kafka Stream's runtime.
|
||||
* <p>
|
||||
* If only a record's value should be modified {@link ValueTransformer} can be used.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <R> {@link KeyValue} return type (both key and value type can be set
|
||||
* arbitrarily)
|
||||
* @see TransformerSupplier
|
||||
* @see KStream#transform(TransformerSupplier, String...)
|
||||
* @see ValueTransformer
|
||||
* @see KStream#map(KeyValueMapper)
|
||||
* @see KStream#flatMap(KeyValueMapper)
|
||||
*/
|
||||
public interface Transformer<K, V, R> {
|
||||
|
||||
/**
|
||||
* Initialize this transformer.
|
||||
* This is called once per instance when the topology gets initialized.
|
||||
* When the framework is done with the transformer, {@link #close()} will be called on it; the
|
||||
* framework may later re-use the transformer by calling {@link #init(ProcessorContext)} again.
|
||||
* <p>
|
||||
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
|
||||
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
|
||||
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
|
||||
* <p>
|
||||
* Note, that {@link ProcessorContext} is updated in the background with the current record's meta data.
|
||||
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object, Object)}.
|
||||
*
|
||||
* @param context the context
|
||||
*/
|
||||
void init(final ProcessorContext context);
|
||||
|
||||
/**
|
||||
* Transform the record with the given key and value.
|
||||
* Additionally, any {@link StateStore state} that is {@link KStream#transform(TransformerSupplier, String...)
|
||||
* attached} to this operator can be accessed and modified
|
||||
* arbitrarily (cf. {@link ProcessorContext#getStateStore(String)}).
|
||||
* <p>
|
||||
* If only one record should be forward downstream, {@code transform} can return a new {@link KeyValue}. If
|
||||
* more than one output record should be forwarded downstream, {@link ProcessorContext#forward(Object, Object)}
|
||||
* and {@link ProcessorContext#forward(Object, Object, To)} can be used.
|
||||
* If no record should be forwarded downstream, {@code transform} can return {@code null}.
|
||||
*
|
||||
* Note that returning a new {@link KeyValue} is merely for convenience. The same can be achieved by using
|
||||
* {@link ProcessorContext#forward(Object, Object)} and returning {@code null}.
|
||||
*
|
||||
* @param key the key for the record
|
||||
* @param value the value for the record
|
||||
* @return new {@link KeyValue} pair—if {@code null} no key-value pair will
|
||||
* be forwarded to down stream
|
||||
*/
|
||||
R transform(final K key, final V value);
|
||||
|
||||
/**
|
||||
* Close this transformer and clean up any resources. The framework may
|
||||
* later re-use this transformer by calling {@link #init(ProcessorContext)} on it again.
|
||||
* <p>
|
||||
* To generate new {@link KeyValue} pairs {@link ProcessorContext#forward(Object, Object)} and
|
||||
* {@link ProcessorContext#forward(Object, Object, To)} can be used.
|
||||
*/
|
||||
void close();
|
||||
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* A {@code TransformerSupplier} interface which can create one or more {@link Transformer} instances.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <R> {@link org.apache.kafka.streams.KeyValue KeyValue} return type (both key and value type can be set
|
||||
* arbitrarily)
|
||||
* @see Transformer
|
||||
* @see KStream#transform(TransformerSupplier, String...)
|
||||
* @see ValueTransformer
|
||||
* @see ValueTransformerSupplier
|
||||
* @see KStream#transformValues(ValueTransformerSupplier, String...)
|
||||
*/
|
||||
public interface TransformerSupplier<K, V, R> {
|
||||
|
||||
/**
|
||||
* Return a new {@link Transformer} instance.
|
||||
*
|
||||
* @return a new {@link Transformer} instance
|
||||
*/
|
||||
Transformer<K, V, R> get();
|
||||
}
|
||||
@@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.internals.ApiUtils;
|
||||
import org.apache.kafka.streams.kstream.internals.UnlimitedWindow;
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
|
||||
|
||||
/**
|
||||
* The unlimited window specifications used for aggregations.
|
||||
* <p>
|
||||
* An unlimited time window is also called landmark window.
|
||||
* It has a fixed starting point while its window end is defined as infinite.
|
||||
* With this regard, it is a fixed-size window with infinite window size.
|
||||
* <p>
|
||||
* For time semantics, see {@link TimestampExtractor}.
|
||||
*
|
||||
* @see TimeWindows
|
||||
* @see SessionWindows
|
||||
* @see JoinWindows
|
||||
* @see KGroupedStream#windowedBy(Windows)
|
||||
* @see TimestampExtractor
|
||||
*/
|
||||
public final class UnlimitedWindows extends Windows<UnlimitedWindow> {
|
||||
|
||||
private static final long DEFAULT_START_TIMESTAMP_MS = 0L;
|
||||
|
||||
/** The start timestamp of the window. */
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
public final long startMs;
|
||||
|
||||
private UnlimitedWindows(final long startMs) {
|
||||
this.startMs = startMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an unlimited window starting at timestamp zero.
|
||||
*/
|
||||
public static UnlimitedWindows of() {
|
||||
return new UnlimitedWindows(DEFAULT_START_TIMESTAMP_MS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a new unlimited window for the specified start timestamp.
|
||||
*
|
||||
* @param startMs the window start time
|
||||
* @return a new unlimited window that starts at {@code startMs}
|
||||
* @throws IllegalArgumentException if the start time is negative
|
||||
* @deprecated Use {@link #startOn(Instant)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public UnlimitedWindows startOn(final long startMs) throws IllegalArgumentException {
|
||||
if (startMs < 0) {
|
||||
throw new IllegalArgumentException("Window start time (startMs) cannot be negative.");
|
||||
}
|
||||
return new UnlimitedWindows(startMs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a new unlimited window for the specified start timestamp.
|
||||
*
|
||||
* @param start the window start time
|
||||
* @return a new unlimited window that starts at {@code start}
|
||||
* @throws IllegalArgumentException if the start time is negative or can't be represented as {@code long milliseconds}
|
||||
*/
|
||||
public UnlimitedWindows startOn(final Instant start) throws IllegalArgumentException {
|
||||
final String msgPrefix = prepareMillisCheckFailMsgPrefix(start, "start");
|
||||
return startOn(ApiUtils.validateMillisecondInstant(start, msgPrefix));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Long, UnlimitedWindow> windowsFor(final long timestamp) {
|
||||
// always return the single unlimited window
|
||||
|
||||
// we cannot use Collections.singleMap since it does not support remove()
|
||||
final Map<Long, UnlimitedWindow> windows = new HashMap<>();
|
||||
if (timestamp >= startMs) {
|
||||
windows.put(startMs, new UnlimitedWindow(startMs));
|
||||
}
|
||||
return windows;
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* As unlimited windows have conceptually infinite size, this methods just returns {@link Long#MAX_VALUE}.
|
||||
*
|
||||
* @return the size of the specified windows which is {@link Long#MAX_VALUE}
|
||||
*/
|
||||
@Override
|
||||
public long size() {
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws an {@link IllegalArgumentException} because the retention time for unlimited windows is always infinite
|
||||
* and cannot be changed.
|
||||
*
|
||||
* @throws IllegalArgumentException on every invocation.
|
||||
* @deprecated since 2.1.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public UnlimitedWindows until(final long durationMs) {
|
||||
throw new IllegalArgumentException("Window retention time (durationMs) cannot be set for UnlimitedWindows.");
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
* The retention time for unlimited windows in infinite and thus represented as {@link Long#MAX_VALUE}.
|
||||
*
|
||||
* @return the window retention time that is {@link Long#MAX_VALUE}
|
||||
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
|
||||
*/
|
||||
@Override
|
||||
@Deprecated
|
||||
public long maintainMs() {
|
||||
return Long.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long gracePeriodMs() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final UnlimitedWindows that = (UnlimitedWindows) o;
|
||||
return startMs == that.startMs && segments == that.segments;
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(startMs, segments);
|
||||
}
|
||||
|
||||
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
|
||||
@Override
|
||||
public String toString() {
|
||||
return "UnlimitedWindows{" +
|
||||
"startMs=" + startMs +
|
||||
", segments=" + segments +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* The {@code ValueJoiner} interface for joining two values into a new value of arbitrary type.
|
||||
* This is a stateless operation, i.e, {@link #apply(Object, Object)} is invoked individually for each joining
|
||||
* record-pair of a {@link KStream}-{@link KStream}, {@link KStream}-{@link KTable}, or {@link KTable}-{@link KTable}
|
||||
* join.
|
||||
*
|
||||
* @param <V1> first value type
|
||||
* @param <V2> second value type
|
||||
* @param <VR> joined value type
|
||||
* @see KStream#join(KStream, ValueJoiner, JoinWindows)
|
||||
* @see KStream#join(KStream, ValueJoiner, JoinWindows, StreamJoined)
|
||||
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows)
|
||||
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
|
||||
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows)
|
||||
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
|
||||
* @see KStream#join(KTable, ValueJoiner)
|
||||
* @see KStream#join(KTable, ValueJoiner, Joined)
|
||||
* @see KStream#leftJoin(KTable, ValueJoiner)
|
||||
* @see KStream#leftJoin(KTable, ValueJoiner, Joined)
|
||||
* @see KTable#join(KTable, ValueJoiner)
|
||||
* @see KTable#leftJoin(KTable, ValueJoiner)
|
||||
* @see KTable#outerJoin(KTable, ValueJoiner)
|
||||
*/
|
||||
public interface ValueJoiner<V1, V2, VR> {
|
||||
|
||||
/**
|
||||
* Return a joined value consisting of {@code value1} and {@code value2}.
|
||||
*
|
||||
* @param value1 the first value for joining
|
||||
* @param value2 the second value for joining
|
||||
* @return the joined value
|
||||
*/
|
||||
VR apply(final V1 value1, final V2 value2);
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* The {@code ValueMapper} interface for mapping a value to a new value of arbitrary type.
|
||||
* This is a stateless record-by-record operation, i.e, {@link #apply(Object)} is invoked individually for each record
|
||||
* of a stream (cf. {@link ValueTransformer} for stateful value transformation).
|
||||
* If {@code ValueMapper} is applied to a {@link org.apache.kafka.streams.KeyValue key-value pair} record the record's
|
||||
* key is preserved.
|
||||
* If a record's key and value should be modified {@link KeyValueMapper} can be used.
|
||||
*
|
||||
* @param <V> value type
|
||||
* @param <VR> mapped value type
|
||||
* @see KeyValueMapper
|
||||
* @see ValueTransformer
|
||||
* @see ValueTransformerWithKey
|
||||
* @see KStream#mapValues(ValueMapper)
|
||||
* @see KStream#mapValues(ValueMapperWithKey)
|
||||
* @see KStream#flatMapValues(ValueMapper)
|
||||
* @see KStream#flatMapValues(ValueMapperWithKey)
|
||||
* @see KTable#mapValues(ValueMapper)
|
||||
* @see KTable#mapValues(ValueMapperWithKey)
|
||||
*/
|
||||
public interface ValueMapper<V, VR> {
|
||||
|
||||
/**
|
||||
* Map the given value to a new value.
|
||||
*
|
||||
* @param value the value to be mapped
|
||||
* @return the new value
|
||||
*/
|
||||
VR apply(final V value);
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
/**
|
||||
* The {@code ValueMapperWithKey} interface for mapping a value to a new value of arbitrary type.
|
||||
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
|
||||
* record of a stream (cf. {@link ValueTransformer} for stateful value transformation).
|
||||
* If {@code ValueMapperWithKey} is applied to a {@link org.apache.kafka.streams.KeyValue key-value pair} record the
|
||||
* record's key is preserved.
|
||||
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
|
||||
* If a record's key and value should be modified {@link KeyValueMapper} can be used.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VR> mapped value type
|
||||
* @see KeyValueMapper
|
||||
* @see ValueTransformer
|
||||
* @see ValueTransformerWithKey
|
||||
* @see KStream#mapValues(ValueMapper)
|
||||
* @see KStream#mapValues(ValueMapperWithKey)
|
||||
* @see KStream#flatMapValues(ValueMapper)
|
||||
* @see KStream#flatMapValues(ValueMapperWithKey)
|
||||
* @see KTable#mapValues(ValueMapper)
|
||||
* @see KTable#mapValues(ValueMapperWithKey)
|
||||
*/
|
||||
|
||||
public interface ValueMapperWithKey<K, V, VR> {
|
||||
|
||||
/**
|
||||
* Map the given [key and ]value to a new value.
|
||||
*
|
||||
* @param readOnlyKey the read-only key
|
||||
* @param value the value to be mapped
|
||||
* @return the new value
|
||||
*/
|
||||
VR apply(final K readOnlyKey, final V value);
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import java.time.Duration;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.errors.StreamsException;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
import org.apache.kafka.streams.processor.PunctuationType;
|
||||
import org.apache.kafka.streams.processor.Punctuator;
|
||||
import org.apache.kafka.streams.processor.StateStore;
|
||||
import org.apache.kafka.streams.processor.To;
|
||||
|
||||
/**
|
||||
* The {@code ValueTransformer} interface for stateful mapping of a value to a new value (with possible new type).
|
||||
* This is a stateful record-by-record operation, i.e, {@link #transform(Object)} is invoked individually for each
|
||||
* record of a stream and can access and modify a state that is available beyond a single call of
|
||||
* {@link #transform(Object)} (cf. {@link ValueMapper} for stateless value transformation).
|
||||
* Additionally, this {@code ValueTransformer} can {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule}
|
||||
* a method to be {@link Punctuator#punctuate(long) called periodically} with the provided context.
|
||||
* If {@code ValueTransformer} is applied to a {@link KeyValue} pair record the record's key is preserved.
|
||||
* <p>
|
||||
* Use {@link ValueTransformerSupplier} to provide new instances of {@code ValueTransformer} to Kafka Stream's runtime.
|
||||
* <p>
|
||||
* If a record's key and value should be modified {@link Transformer} can be used.
|
||||
*
|
||||
* @param <V> value type
|
||||
* @param <VR> transformed value type
|
||||
* @see ValueTransformerSupplier
|
||||
* @see ValueTransformerWithKeySupplier
|
||||
* @see KStream#transformValues(ValueTransformerSupplier, String...)
|
||||
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
|
||||
* @see Transformer
|
||||
*/
|
||||
public interface ValueTransformer<V, VR> {
|
||||
|
||||
/**
|
||||
* Initialize this transformer.
|
||||
* This is called once per instance when the topology gets initialized.
|
||||
* When the framework is done with the transformer, {@link #close()} will be called on it; the
|
||||
* framework may later re-use the transformer by calling {@link #init(ProcessorContext)} again.
|
||||
* <p>
|
||||
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
|
||||
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
|
||||
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
|
||||
* <p>
|
||||
* Note that {@link ProcessorContext} is updated in the background with the current record's meta data.
|
||||
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object)}.
|
||||
* <p>
|
||||
* Note that using {@link ProcessorContext#forward(Object, Object)} or
|
||||
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within any method of
|
||||
* {@code ValueTransformer} and will result in an {@link StreamsException exception}.
|
||||
*
|
||||
* @param context the context
|
||||
* @throws IllegalStateException If store gets registered after initialization is already finished
|
||||
* @throws StreamsException if the store's change log does not contain the partition
|
||||
*/
|
||||
void init(final ProcessorContext context);
|
||||
|
||||
/**
|
||||
* Transform the given value to a new value.
|
||||
* Additionally, any {@link StateStore} that is {@link KStream#transformValues(ValueTransformerSupplier, String...)
|
||||
* attached} to this operator can be accessed and modified arbitrarily (cf.
|
||||
* {@link ProcessorContext#getStateStore(String)}).
|
||||
* <p>
|
||||
* Note, that using {@link ProcessorContext#forward(Object, Object)} or
|
||||
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within {@code transform} and
|
||||
* will result in an {@link StreamsException exception}.
|
||||
*
|
||||
* @param value the value to be transformed
|
||||
* @return the new value
|
||||
*/
|
||||
VR transform(final V value);
|
||||
|
||||
/**
|
||||
* Close this transformer and clean up any resources. The framework may
|
||||
* later re-use this transformer by calling {@link #init(ProcessorContext)} on it again.
|
||||
* <p>
|
||||
* It is not possible to return any new output records within {@code close()}.
|
||||
* Using {@link ProcessorContext#forward(Object, Object)} or {@link ProcessorContext#forward(Object, Object, To)}
|
||||
* will result in an {@link StreamsException exception}.
|
||||
*/
|
||||
void close();
|
||||
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* A {@code ValueTransformerSupplier} interface which can create one or more {@link ValueTransformer} instances.
|
||||
*
|
||||
* @param <V> value type
|
||||
* @param <VR> transformed value type
|
||||
* @see ValueTransformer
|
||||
* @see ValueTransformerWithKey
|
||||
* @see ValueTransformerWithKeySupplier
|
||||
* @see KStream#transformValues(ValueTransformerSupplier, String...)
|
||||
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
|
||||
* @see Transformer
|
||||
* @see TransformerSupplier
|
||||
* @see KStream#transform(TransformerSupplier, String...)
|
||||
*/
|
||||
public interface ValueTransformerSupplier<V, VR> {
|
||||
|
||||
/**
|
||||
* Return a new {@link ValueTransformer} instance.
|
||||
*
|
||||
* @return a new {@link ValueTransformer} instance.
|
||||
*/
|
||||
ValueTransformer<V, VR> get();
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import java.time.Duration;
|
||||
import org.apache.kafka.streams.KeyValue;
|
||||
import org.apache.kafka.streams.errors.StreamsException;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
import org.apache.kafka.streams.processor.PunctuationType;
|
||||
import org.apache.kafka.streams.processor.Punctuator;
|
||||
import org.apache.kafka.streams.processor.StateStore;
|
||||
import org.apache.kafka.streams.processor.To;
|
||||
|
||||
/**
|
||||
* The {@code ValueTransformerWithKey} interface for stateful mapping of a value to a new value (with possible new type).
|
||||
* This is a stateful record-by-record operation, i.e, {@link #transform(Object, Object)} is invoked individually for each
|
||||
* record of a stream and can access and modify a state that is available beyond a single call of
|
||||
* {@link #transform(Object, Object)} (cf. {@link ValueMapper} for stateless value transformation).
|
||||
* Additionally, this {@code ValueTransformerWithKey} can
|
||||
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
|
||||
* {@link Punctuator#punctuate(long) called periodically} with the provided context.
|
||||
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
|
||||
* If {@code ValueTransformerWithKey} is applied to a {@link KeyValue} pair record the record's key is preserved.
|
||||
* <p>
|
||||
* Use {@link ValueTransformerWithKeySupplier} to provide new instances of {@link ValueTransformerWithKey} to
|
||||
* Kafka Stream's runtime.
|
||||
* <p>
|
||||
* If a record's key and value should be modified {@link Transformer} can be used.
|
||||
*
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VR> transformed value type
|
||||
* @see ValueTransformer
|
||||
* @see ValueTransformerWithKeySupplier
|
||||
* @see KStream#transformValues(ValueTransformerSupplier, String...)
|
||||
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
|
||||
* @see Transformer
|
||||
*/
|
||||
|
||||
public interface ValueTransformerWithKey<K, V, VR> {
|
||||
|
||||
/**
|
||||
* Initialize this transformer.
|
||||
* This is called once per instance when the topology gets initialized.
|
||||
* <p>
|
||||
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
|
||||
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
|
||||
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
|
||||
* <p>
|
||||
* Note that {@link ProcessorContext} is updated in the background with the current record's meta data.
|
||||
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object, Object)}.
|
||||
* <p>
|
||||
* Note that using {@link ProcessorContext#forward(Object, Object)} or
|
||||
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within any method of
|
||||
* {@code ValueTransformerWithKey} and will result in an {@link StreamsException exception}.
|
||||
*
|
||||
* @param context the context
|
||||
* @throws IllegalStateException If store gets registered after initialization is already finished
|
||||
* @throws StreamsException if the store's change log does not contain the partition
|
||||
*/
|
||||
void init(final ProcessorContext context);
|
||||
|
||||
/**
|
||||
* Transform the given [key and ]value to a new value.
|
||||
* Additionally, any {@link StateStore} that is {@link KStream#transformValues(ValueTransformerWithKeySupplier, String...)
|
||||
* attached} to this operator can be accessed and modified arbitrarily (cf.
|
||||
* {@link ProcessorContext#getStateStore(String)}).
|
||||
* <p>
|
||||
* Note, that using {@link ProcessorContext#forward(Object, Object)} or
|
||||
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within {@code transform} and
|
||||
* will result in an {@link StreamsException exception}.
|
||||
*
|
||||
* @param readOnlyKey the read-only key
|
||||
* @param value the value to be transformed
|
||||
* @return the new value
|
||||
*/
|
||||
VR transform(final K readOnlyKey, final V value);
|
||||
|
||||
/**
|
||||
* Close this processor and clean up any resources.
|
||||
* <p>
|
||||
* It is not possible to return any new output records within {@code close()}.
|
||||
* Using {@link ProcessorContext#forward(Object, Object)} or {@link ProcessorContext#forward(Object, Object, To)},
|
||||
* will result in an {@link StreamsException exception}.
|
||||
*/
|
||||
void close();
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
/**
|
||||
* @param <K> key type
|
||||
* @param <V> value type
|
||||
* @param <VR> transformed value type
|
||||
* @see ValueTransformer
|
||||
* @see ValueTransformerWithKey
|
||||
* @see KStream#transformValues(ValueTransformerSupplier, String...)
|
||||
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
|
||||
* @see Transformer
|
||||
* @see TransformerSupplier
|
||||
* @see KStream#transform(TransformerSupplier, String...)
|
||||
*/
|
||||
public interface ValueTransformerWithKeySupplier<K, V, VR> {
|
||||
ValueTransformerWithKey<K, V, VR> get();
|
||||
}
|
||||
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
|
||||
import java.time.Instant;
|
||||
|
||||
/**
|
||||
* A single window instance, defined by its start and end timestamp.
|
||||
* {@code Window} is agnostic if start/end boundaries are inclusive or exclusive; this is defined by concrete
|
||||
* window implementations.
|
||||
* <p>
|
||||
* To specify how {@code Window} boundaries are defined use {@link Windows}.
|
||||
* For time semantics, see {@link TimestampExtractor}.
|
||||
*
|
||||
* @see Windows
|
||||
* @see org.apache.kafka.streams.kstream.internals.TimeWindow
|
||||
* @see org.apache.kafka.streams.kstream.internals.SessionWindow
|
||||
* @see org.apache.kafka.streams.kstream.internals.UnlimitedWindow
|
||||
* @see TimestampExtractor
|
||||
*/
|
||||
public abstract class Window {
|
||||
|
||||
protected final long startMs;
|
||||
protected final long endMs;
|
||||
private final Instant startTime;
|
||||
private final Instant endTime;
|
||||
|
||||
|
||||
/**
|
||||
* Create a new window for the given start and end time.
|
||||
*
|
||||
* @param startMs the start timestamp of the window
|
||||
* @param endMs the end timestamp of the window
|
||||
* @throws IllegalArgumentException if {@code startMs} is negative or if {@code endMs} is smaller than {@code startMs}
|
||||
*/
|
||||
public Window(final long startMs, final long endMs) throws IllegalArgumentException {
|
||||
if (startMs < 0) {
|
||||
throw new IllegalArgumentException("Window startMs time cannot be negative.");
|
||||
}
|
||||
if (endMs < startMs) {
|
||||
throw new IllegalArgumentException("Window endMs time cannot be smaller than window startMs time.");
|
||||
}
|
||||
this.startMs = startMs;
|
||||
this.endMs = endMs;
|
||||
|
||||
this.startTime = Instant.ofEpochMilli(startMs);
|
||||
this.endTime = Instant.ofEpochMilli(endMs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the start timestamp of this window.
|
||||
*
|
||||
* @return The start timestamp of this window.
|
||||
*/
|
||||
public long start() {
|
||||
return startMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the end timestamp of this window.
|
||||
*
|
||||
* @return The end timestamp of this window.
|
||||
*/
|
||||
public long end() {
|
||||
return endMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the start time of this window.
|
||||
*
|
||||
* @return The start time of this window.
|
||||
*/
|
||||
public Instant startTime() {
|
||||
return startTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the end time of this window.
|
||||
*
|
||||
* @return The end time of this window.
|
||||
*/
|
||||
public Instant endTime() {
|
||||
return endTime;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the given window overlaps with this window.
|
||||
* Should throw an {@link IllegalArgumentException} if the {@code other} window has a different type than {@code
|
||||
* this} window.
|
||||
*
|
||||
* @param other another window of the same type
|
||||
* @return {@code true} if {@code other} overlaps with this window—{@code false} otherwise
|
||||
*/
|
||||
public abstract boolean overlap(final Window other);
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (obj == this) {
|
||||
return true;
|
||||
}
|
||||
if (obj == null) {
|
||||
return false;
|
||||
}
|
||||
if (getClass() != obj.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
final Window other = (Window) obj;
|
||||
return startMs == other.startMs && endMs == other.endMs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (int) (((startMs << 32) | endMs) % 0xFFFFFFFFL);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Window{" +
|
||||
"startMs=" + startMs +
|
||||
", endMs=" + endMs +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
|
||||
/**
|
||||
* The result key type of a windowed stream aggregation.
|
||||
* <p>
|
||||
* If a {@link KStream} gets grouped and aggregated using a window-aggregation the resulting {@link KTable} is a
|
||||
* so-called "windowed {@link KTable}" with a combined key type that encodes the corresponding aggregation window and
|
||||
* the original record key.
|
||||
* Thus, a windowed {@link KTable} has type {@code <Windowed<K>,V>}.
|
||||
*
|
||||
* @param <K> type of the key
|
||||
* @see KGroupedStream#windowedBy(Windows)
|
||||
* @see KGroupedStream#windowedBy(SessionWindows)
|
||||
*/
|
||||
public class Windowed<K> {
|
||||
|
||||
private final K key;
|
||||
|
||||
private final Window window;
|
||||
|
||||
public Windowed(final K key, final Window window) {
|
||||
this.key = key;
|
||||
this.window = window;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the key of the window.
|
||||
*
|
||||
* @return the key of the window
|
||||
*/
|
||||
public K key() {
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the window containing the values associated with this key.
|
||||
*
|
||||
* @return the window containing the values
|
||||
*/
|
||||
public Window window() {
|
||||
return window;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "[" + key + "@" + window.start() + "/" + window.end() + "]";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object obj) {
|
||||
if (obj == this) {
|
||||
return true;
|
||||
}
|
||||
if (!(obj instanceof Windowed)) {
|
||||
return false;
|
||||
}
|
||||
final Windowed<?> that = (Windowed) obj;
|
||||
return window.equals(that.window) && key.equals(that.key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final long n = ((long) window.hashCode() << 32) | key.hashCode();
|
||||
return (int) (n % 0xFFFFFFFFL);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.common.serialization.Deserializer;
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.common.serialization.Serdes;
|
||||
import org.apache.kafka.common.serialization.Serializer;
|
||||
|
||||
public class WindowedSerdes {
|
||||
|
||||
static public class TimeWindowedSerde<T> extends Serdes.WrapperSerde<Windowed<T>> {
|
||||
// Default constructor needed for reflection object creation
|
||||
public TimeWindowedSerde() {
|
||||
super(new TimeWindowedSerializer<>(), new TimeWindowedDeserializer<>());
|
||||
}
|
||||
|
||||
public TimeWindowedSerde(final Serde<T> inner) {
|
||||
super(new TimeWindowedSerializer<>(inner.serializer()), new TimeWindowedDeserializer<>(inner.deserializer()));
|
||||
}
|
||||
|
||||
// This constructor can be used for serialize/deserialize a windowed topic
|
||||
public TimeWindowedSerde(final Serde<T> inner, final long windowSize) {
|
||||
super(new TimeWindowedSerializer<>(inner.serializer()), new TimeWindowedDeserializer<>(inner.deserializer(), windowSize));
|
||||
}
|
||||
|
||||
// Helper method for users to specify whether the input topic is a changelog topic for deserializing the key properly.
|
||||
public TimeWindowedSerde<T> forChangelog(final boolean isChangelogTopic) {
|
||||
final TimeWindowedDeserializer deserializer = (TimeWindowedDeserializer) this.deserializer();
|
||||
deserializer.setIsChangelogTopic(isChangelogTopic);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
static public class SessionWindowedSerde<T> extends Serdes.WrapperSerde<Windowed<T>> {
|
||||
// Default constructor needed for reflection object creation
|
||||
public SessionWindowedSerde() {
|
||||
super(new SessionWindowedSerializer<>(), new SessionWindowedDeserializer<>());
|
||||
}
|
||||
|
||||
public SessionWindowedSerde(final Serde<T> inner) {
|
||||
super(new SessionWindowedSerializer<>(inner.serializer()), new SessionWindowedDeserializer<>(inner.deserializer()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a {@code TimeWindowedSerde} object for the specified inner class type.
|
||||
*/
|
||||
static public <T> Serde<Windowed<T>> timeWindowedSerdeFrom(final Class<T> type) {
|
||||
return new TimeWindowedSerde<>(Serdes.serdeFrom(type));
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a {@code TimeWindowedSerde} object to deserialize changelog topic
|
||||
* for the specified inner class type and window size.
|
||||
*/
|
||||
static public <T> Serde<Windowed<T>> timeWindowedSerdeFrom(final Class<T> type, final long windowSize) {
|
||||
return new TimeWindowedSerde<>(Serdes.serdeFrom(type), windowSize);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a {@code SessionWindowedSerde} object for the specified inner class type.
|
||||
*/
|
||||
static public <T> Serde<Windowed<T>> sessionWindowedSerdeFrom(final Class<T> type) {
|
||||
return new SessionWindowedSerde<>(Serdes.serdeFrom(type));
|
||||
}
|
||||
|
||||
static void verifyInnerSerializerNotNull(final Serializer inner,
|
||||
final Serializer wrapper) {
|
||||
if (inner == null) {
|
||||
throw new NullPointerException("Inner serializer is `null`. " +
|
||||
"User code must use constructor `" + wrapper.getClass().getSimpleName() + "(final Serializer<T> inner)` " +
|
||||
"instead of the no-arg constructor.");
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyInnerDeserializerNotNull(final Deserializer inner,
|
||||
final Deserializer wrapper) {
|
||||
if (inner == null) {
|
||||
throw new NullPointerException("Inner deserializer is `null`. " +
|
||||
"User code must use constructor `" + wrapper.getClass().getSimpleName() + "(final Deserializer<T> inner)` " +
|
||||
"instead of the no-arg constructor.");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream;
|
||||
|
||||
import org.apache.kafka.streams.processor.TimestampExtractor;
|
||||
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
|
||||
|
||||
/**
|
||||
* The window specification for fixed size windows that is used to define window boundaries and grace period.
|
||||
* <p>
|
||||
* Grace period defines how long to wait on out-of-order events. That is, windows will continue to accept new records until {@code stream_time >= window_end + grace_period}.
|
||||
* Records that arrive after the grace period passed are considered <em>late</em> and will not be processed but are dropped.
|
||||
* <p>
|
||||
* Warning: It may be unsafe to use objects of this class in set- or map-like collections,
|
||||
* since the equals and hashCode methods depend on mutable fields.
|
||||
*
|
||||
* @param <W> type of the window instance
|
||||
* @see TimeWindows
|
||||
* @see UnlimitedWindows
|
||||
* @see JoinWindows
|
||||
* @see SessionWindows
|
||||
* @see TimestampExtractor
|
||||
*/
|
||||
public abstract class Windows<W extends Window> {
|
||||
|
||||
private long maintainDurationMs = DEFAULT_RETENTION_MS;
|
||||
@Deprecated public int segments = 3;
|
||||
|
||||
protected Windows() {}
|
||||
|
||||
@Deprecated // remove this constructor when we remove segments.
|
||||
Windows(final int segments) {
|
||||
this.segments = segments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the window maintain duration (retention time) in milliseconds.
|
||||
* This retention time is a guaranteed <i>lower bound</i> for how long a window will be maintained.
|
||||
*
|
||||
* @param durationMs the window retention time in milliseconds
|
||||
* @return itself
|
||||
* @throws IllegalArgumentException if {@code durationMs} is negative
|
||||
* @deprecated since 2.1. Use {@link Materialized#withRetention(Duration)}
|
||||
* or directly configure the retention in a store supplier and use {@link Materialized#as(WindowBytesStoreSupplier)}.
|
||||
*/
|
||||
@Deprecated
|
||||
public Windows<W> until(final long durationMs) throws IllegalArgumentException {
|
||||
if (durationMs < 0) {
|
||||
throw new IllegalArgumentException("Window retention time (durationMs) cannot be negative.");
|
||||
}
|
||||
maintainDurationMs = durationMs;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the window maintain duration (retention time) in milliseconds.
|
||||
*
|
||||
* @return the window maintain duration
|
||||
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public long maintainMs() {
|
||||
return maintainDurationMs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the number of segments to be used for rolling the window store.
|
||||
* This function is not exposed to users but can be called by developers that extend this class.
|
||||
*
|
||||
* @param segments the number of segments to be used
|
||||
* @return itself
|
||||
* @throws IllegalArgumentException if specified segments is small than 2
|
||||
* @deprecated since 2.1 Override segmentInterval() instead.
|
||||
*/
|
||||
@Deprecated
|
||||
protected Windows<W> segments(final int segments) throws IllegalArgumentException {
|
||||
if (segments < 2) {
|
||||
throw new IllegalArgumentException("Number of segments must be at least 2.");
|
||||
}
|
||||
this.segments = segments;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create all windows that contain the provided timestamp, indexed by non-negative window start timestamps.
|
||||
*
|
||||
* @param timestamp the timestamp window should get created for
|
||||
* @return a map of {@code windowStartTimestamp -> Window} entries
|
||||
*/
|
||||
public abstract Map<Long, W> windowsFor(final long timestamp);
|
||||
|
||||
/**
|
||||
* Return the size of the specified windows in milliseconds.
|
||||
*
|
||||
* @return the size of the specified windows
|
||||
*/
|
||||
public abstract long size();
|
||||
|
||||
/**
|
||||
* Return the window grace period (the time to admit
|
||||
* out-of-order events after the end of the window.)
|
||||
*
|
||||
* Delay is defined as (stream_time - record_timestamp).
|
||||
*/
|
||||
public abstract long gracePeriodMs();
|
||||
}
|
||||
@@ -0,0 +1,139 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream.internals;
|
||||
|
||||
import org.apache.kafka.common.serialization.Serde;
|
||||
import org.apache.kafka.streams.kstream.ValueJoiner;
|
||||
import org.apache.kafka.streams.kstream.ValueMapper;
|
||||
import org.apache.kafka.streams.kstream.ValueMapperWithKey;
|
||||
import org.apache.kafka.streams.kstream.ValueTransformer;
|
||||
import org.apache.kafka.streams.kstream.ValueTransformerSupplier;
|
||||
import org.apache.kafka.streams.kstream.ValueTransformerWithKey;
|
||||
import org.apache.kafka.streams.kstream.ValueTransformerWithKeySupplier;
|
||||
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
|
||||
import org.apache.kafka.streams.processor.ProcessorContext;
|
||||
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
/*
|
||||
* Any classes (KTable, KStream, etc) extending this class should follow the serde specification precedence ordering as:
|
||||
*
|
||||
* 1) Overridden values via control objects (e.g. Materialized, Serialized, Consumed, etc)
|
||||
* 2) Serdes that can be inferred from the operator itself (e.g. groupBy().count(), where value serde can default to `LongSerde`).
|
||||
* 3) Serde inherited from parent operator if possible (note if the key / value types have been changed, then the corresponding serde cannot be inherited).
|
||||
* 4) Default serde specified in the config.
|
||||
*/
|
||||
public abstract class AbstractStream<K, V> {
|
||||
|
||||
protected final String name;
|
||||
protected final Serde<K> keySerde;
|
||||
protected final Serde<V> valSerde;
|
||||
protected final Set<String> subTopologySourceNodes;
|
||||
protected final StreamsGraphNode streamsGraphNode;
|
||||
protected final InternalStreamsBuilder builder;
|
||||
|
||||
// This copy-constructor will allow to extend KStream
|
||||
// and KTable APIs with new methods without impacting the public interface.
|
||||
public AbstractStream(final AbstractStream<K, V> stream) {
|
||||
this.name = stream.name;
|
||||
this.builder = stream.builder;
|
||||
this.keySerde = stream.keySerde;
|
||||
this.valSerde = stream.valSerde;
|
||||
this.subTopologySourceNodes = stream.subTopologySourceNodes;
|
||||
this.streamsGraphNode = stream.streamsGraphNode;
|
||||
}
|
||||
|
||||
AbstractStream(final String name,
|
||||
final Serde<K> keySerde,
|
||||
final Serde<V> valSerde,
|
||||
final Set<String> subTopologySourceNodes,
|
||||
final StreamsGraphNode streamsGraphNode,
|
||||
final InternalStreamsBuilder builder) {
|
||||
if (subTopologySourceNodes == null || subTopologySourceNodes.isEmpty()) {
|
||||
throw new IllegalArgumentException("parameter <sourceNodes> must not be null or empty");
|
||||
}
|
||||
|
||||
this.name = name;
|
||||
this.builder = builder;
|
||||
this.keySerde = keySerde;
|
||||
this.valSerde = valSerde;
|
||||
this.subTopologySourceNodes = subTopologySourceNodes;
|
||||
this.streamsGraphNode = streamsGraphNode;
|
||||
}
|
||||
|
||||
// This method allows to expose the InternalTopologyBuilder instance
|
||||
// to subclasses that extend AbstractStream class.
|
||||
protected InternalTopologyBuilder internalTopologyBuilder() {
|
||||
return builder.internalTopologyBuilder;
|
||||
}
|
||||
|
||||
Set<String> ensureCopartitionWith(final Collection<? extends AbstractStream<K, ?>> otherStreams) {
|
||||
final Set<String> allSourceNodes = new HashSet<>(subTopologySourceNodes);
|
||||
for (final AbstractStream<K, ?> other: otherStreams) {
|
||||
allSourceNodes.addAll(other.subTopologySourceNodes);
|
||||
}
|
||||
builder.internalTopologyBuilder.copartitionSources(allSourceNodes);
|
||||
|
||||
return allSourceNodes;
|
||||
}
|
||||
|
||||
static <T2, T1, R> ValueJoiner<T2, T1, R> reverseJoiner(final ValueJoiner<T1, T2, R> joiner) {
|
||||
return (value2, value1) -> joiner.apply(value1, value2);
|
||||
}
|
||||
|
||||
static <K, V, VR> ValueMapperWithKey<K, V, VR> withKey(final ValueMapper<V, VR> valueMapper) {
|
||||
Objects.requireNonNull(valueMapper, "valueMapper can't be null");
|
||||
return (readOnlyKey, value) -> valueMapper.apply(value);
|
||||
}
|
||||
|
||||
static <K, V, VR> ValueTransformerWithKeySupplier<K, V, VR> toValueTransformerWithKeySupplier(
|
||||
final ValueTransformerSupplier<V, VR> valueTransformerSupplier) {
|
||||
Objects.requireNonNull(valueTransformerSupplier, "valueTransformerSupplier can't be null");
|
||||
return () -> {
|
||||
final ValueTransformer<V, VR> valueTransformer = valueTransformerSupplier.get();
|
||||
return new ValueTransformerWithKey<K, V, VR>() {
|
||||
@Override
|
||||
public void init(final ProcessorContext context) {
|
||||
valueTransformer.init(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public VR transform(final K readOnlyKey, final V value) {
|
||||
return valueTransformer.transform(value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
valueTransformer.close();
|
||||
}
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
// for testing only
|
||||
public Serde<K> keySerde() {
|
||||
return keySerde;
|
||||
}
|
||||
|
||||
public Serde<V> valueSerde() {
|
||||
return valSerde;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.kafka.streams.kstream.internals;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public class Change<T> {
|
||||
|
||||
public final T newValue;
|
||||
public final T oldValue;
|
||||
|
||||
public Change(final T newValue, final T oldValue) {
|
||||
this.newValue = newValue;
|
||||
this.oldValue = oldValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "(" + newValue + "<-" + oldValue + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
final Change<?> change = (Change<?>) o;
|
||||
return Objects.equals(newValue, change.newValue) &&
|
||||
Objects.equals(oldValue, change.oldValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(newValue, oldValue);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user