Add km module kafka

This commit is contained in:
leewei
2023-02-14 16:27:47 +08:00
parent 229140f067
commit 0b8160a714
4039 changed files with 718112 additions and 46204 deletions

1
streams/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/bin/

View File

@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.pageview;
import com.fasterxml.jackson.databind.JsonNode;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.streams.processor.TimestampExtractor;
/**
* A timestamp extractor implementation that tries to extract event time from
* the "timestamp" field in the Json formatted message.
*/
public class JsonTimestampExtractor implements TimestampExtractor {
@Override
public long extract(final ConsumerRecord<Object, Object> record, final long partitionTime) {
if (record.value() instanceof PageViewTypedDemo.PageView) {
return ((PageViewTypedDemo.PageView) record.value()).timestamp;
}
if (record.value() instanceof PageViewTypedDemo.UserProfile) {
return ((PageViewTypedDemo.UserProfile) record.value()).timestamp;
}
if (record.value() instanceof JsonNode) {
return ((JsonNode) record.value()).get("timestamp").longValue();
}
throw new IllegalArgumentException("JsonTimestampExtractor cannot recognize the record value " + record.value());
}
}

View File

@@ -0,0 +1,249 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.pageview;
import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.time.Duration;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.errors.SerializationException;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.Grouped;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.TimeWindows;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
/**
* Demonstrates how to perform a join between a KStream and a KTable, i.e. an example of a stateful computation,
* using specific data types (here: JSON POJO; but can also be Avro specific bindings, etc.) for serdes
* in Kafka Streams.
*
* In this example, we join a stream of pageviews (aka clickstreams) that reads from a topic named "streams-pageview-input"
* with a user profile table that reads from a topic named "streams-userprofile-input", where the data format
* is JSON string representing a record in the stream or table, to compute the number of pageviews per user region.
*
* Before running this example you must create the input topics and the output topic (e.g. via
* bin/kafka-topics --create ...), and write some data to the input topics (e.g. via
* bin/kafka-console-producer). Otherwise you won't see any data arriving in the output topic.
*
* The inputs for this example are:
* - Topic: streams-pageview-input
* Key Format: (String) USER_ID
* Value Format: (JSON) {"_t": "pv", "user": (String USER_ID), "page": (String PAGE_ID), "timestamp": (long ms TIMESTAMP)}
*
* - Topic: streams-userprofile-input
* Key Format: (String) USER_ID
* Value Format: (JSON) {"_t": "up", "region": (String REGION), "timestamp": (long ms TIMESTAMP)}
*
* To observe the results, read the output topic (e.g., via bin/kafka-console-consumer)
* - Topic: streams-pageviewstats-typed-output
* Key Format: (JSON) {"_t": "wpvbr", "windowStart": (long ms WINDOW_TIMESTAMP), "region": (String REGION)}
* Value Format: (JSON) {"_t": "rc", "count": (long REGION_COUNT), "region": (String REGION)}
*
* Note, the "_t" field is necessary to help Jackson identify the correct class for deserialization in the
* generic {@link JSONSerde}. If you instead specify a specific serde per class, you won't need the extra "_t" field.
*/
@SuppressWarnings({"WeakerAccess", "unused"})
public class PageViewTypedDemo {
/**
* A serde for any class that implements {@link JSONSerdeCompatible}. Note that the classes also need to
* be registered in the {@code @JsonSubTypes} annotation on {@link JSONSerdeCompatible}.
*
* @param <T> The concrete type of the class that gets de/serialized
*/
public static class JSONSerde<T extends JSONSerdeCompatible> implements Serializer<T>, Deserializer<T>, Serde<T> {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {}
@SuppressWarnings("unchecked")
@Override
public T deserialize(final String topic, final byte[] data) {
if (data == null) {
return null;
}
try {
return (T) OBJECT_MAPPER.readValue(data, JSONSerdeCompatible.class);
} catch (final IOException e) {
throw new SerializationException(e);
}
}
@Override
public byte[] serialize(final String topic, final T data) {
if (data == null) {
return null;
}
try {
return OBJECT_MAPPER.writeValueAsBytes(data);
} catch (final Exception e) {
throw new SerializationException("Error serializing JSON message", e);
}
}
@Override
public void close() {}
@Override
public Serializer<T> serializer() {
return this;
}
@Override
public Deserializer<T> deserializer() {
return this;
}
}
/**
* An interface for registering types that can be de/serialized with {@link JSONSerde}.
*/
@SuppressWarnings("DefaultAnnotationParam") // being explicit for the example
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "_t")
@JsonSubTypes({
@JsonSubTypes.Type(value = PageView.class, name = "pv"),
@JsonSubTypes.Type(value = UserProfile.class, name = "up"),
@JsonSubTypes.Type(value = PageViewByRegion.class, name = "pvbr"),
@JsonSubTypes.Type(value = WindowedPageViewByRegion.class, name = "wpvbr"),
@JsonSubTypes.Type(value = RegionCount.class, name = "rc")
})
public interface JSONSerdeCompatible {
}
// POJO classes
static public class PageView implements JSONSerdeCompatible {
public String user;
public String page;
public Long timestamp;
}
static public class UserProfile implements JSONSerdeCompatible {
public String region;
public Long timestamp;
}
static public class PageViewByRegion implements JSONSerdeCompatible {
public String user;
public String page;
public String region;
}
static public class WindowedPageViewByRegion implements JSONSerdeCompatible {
public long windowStart;
public String region;
}
static public class RegionCount implements JSONSerdeCompatible {
public long count;
public String region;
}
public static void main(final String[] args) {
final Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-typed");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, JSONSerde.class);
props.put(StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS, JSONSerde.class);
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, JSONSerde.class);
props.put(StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS, JSONSerde.class);
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
props.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG, 1000);
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, PageView> views = builder.stream("streams-pageview-input", Consumed.with(Serdes.String(), new JSONSerde<>()));
final KTable<String, UserProfile> users = builder.table("streams-userprofile-input", Consumed.with(Serdes.String(), new JSONSerde<>()));
final KStream<WindowedPageViewByRegion, RegionCount> regionCount = views
.leftJoin(users, (view, profile) -> {
final PageViewByRegion viewByRegion = new PageViewByRegion();
viewByRegion.user = view.user;
viewByRegion.page = view.page;
if (profile != null) {
viewByRegion.region = profile.region;
} else {
viewByRegion.region = "UNKNOWN";
}
return viewByRegion;
})
.map((user, viewRegion) -> new KeyValue<>(viewRegion.region, viewRegion))
.groupByKey(Grouped.with(Serdes.String(), new JSONSerde<>()))
.windowedBy(TimeWindows.of(Duration.ofDays(7)).advanceBy(Duration.ofSeconds(1)))
.count()
.toStream()
.map((key, value) -> {
final WindowedPageViewByRegion wViewByRegion = new WindowedPageViewByRegion();
wViewByRegion.windowStart = key.window().start();
wViewByRegion.region = key.key();
final RegionCount rCount = new RegionCount();
rCount.region = key.key();
rCount.count = value;
return new KeyValue<>(wViewByRegion, rCount);
});
// write to the result topic
regionCount.to("streams-pageviewstats-typed-output");
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-pipe-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (final Throwable e) {
e.printStackTrace();
System.exit(1);
}
System.exit(0);
}
}

View File

@@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.pageview;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.fasterxml.jackson.databind.node.ObjectNode;
import java.time.Duration;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.connect.json.JsonDeserializer;
import org.apache.kafka.connect.json.JsonSerializer;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.Grouped;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Produced;
import org.apache.kafka.streams.kstream.TimeWindows;
import java.util.Properties;
/**
* Demonstrates how to perform a join between a KStream and a KTable, i.e. an example of a stateful computation,
* using general data types (here: JSON; but can also be Avro generic bindings, etc.) for serdes
* in Kafka Streams.
*
* In this example, we join a stream of pageviews (aka clickstreams) that reads from a topic named "streams-pageview-input"
* with a user profile table that reads from a topic named "streams-userprofile-input", where the data format
* is JSON string representing a record in the stream or table, to compute the number of pageviews per user region.
*
* Before running this example you must create the input topics and the output topic (e.g. via
* bin/kafka-topics.sh --create ...), and write some data to the input topics (e.g. via
* bin/kafka-console-producer.sh). Otherwise you won't see any data arriving in the output topic.
*/
public class PageViewUntypedDemo {
public static void main(final String[] args) throws Exception {
final Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pageview-untyped");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_TIMESTAMP_EXTRACTOR_CLASS_CONFIG, JsonTimestampExtractor.class);
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
final StreamsBuilder builder = new StreamsBuilder();
final Serializer<JsonNode> jsonSerializer = new JsonSerializer();
final Deserializer<JsonNode> jsonDeserializer = new JsonDeserializer();
final Serde<JsonNode> jsonSerde = Serdes.serdeFrom(jsonSerializer, jsonDeserializer);
final Consumed<String, JsonNode> consumed = Consumed.with(Serdes.String(), jsonSerde);
final KStream<String, JsonNode> views = builder.stream("streams-pageview-input", consumed);
final KTable<String, JsonNode> users = builder.table("streams-userprofile-input", consumed);
final KTable<String, String> userRegions = users.mapValues(record -> record.get("region").textValue());
final KStream<JsonNode, JsonNode> regionCount = views
.leftJoin(userRegions, (view, region) -> {
final ObjectNode jNode = JsonNodeFactory.instance.objectNode();
return (JsonNode) jNode.put("user", view.get("user").textValue())
.put("page", view.get("page").textValue())
.put("region", region == null ? "UNKNOWN" : region);
})
.map((user, viewRegion) -> new KeyValue<>(viewRegion.get("region").textValue(), viewRegion))
.groupByKey(Grouped.with(Serdes.String(), jsonSerde))
.windowedBy(TimeWindows.of(Duration.ofDays(7)).advanceBy(Duration.ofSeconds(1)))
.count()
.toStream()
.map((key, value) -> {
final ObjectNode keyNode = JsonNodeFactory.instance.objectNode();
keyNode.put("window-start", key.window().start())
.put("region", key.key());
final ObjectNode valueNode = JsonNodeFactory.instance.objectNode();
valueNode.put("count", value);
return new KeyValue<>((JsonNode) keyNode, (JsonNode) valueNode);
});
// write to the result topic
regionCount.to("streams-pageviewstats-untyped-output", Produced.with(jsonSerde, jsonSerde));
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
streams.start();
// usually the stream application would be running forever,
// in this example we just let it run for some time and stop since the input data is finite.
Thread.sleep(5000L);
streams.close();
}
}

View File

@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.pipe;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
/**
* Demonstrates, using the high-level KStream DSL, how to read data from a source (input) topic and how to
* write data to a sink (output) topic.
*
* In this example, we implement a simple "pipe" program that reads from a source topic "streams-plaintext-input"
* and writes the data as-is (i.e. unmodified) into a sink topic "streams-pipe-output".
*
* Before running this example you must create the input topic and the output topic (e.g. via
* bin/kafka-topics.sh --create ...), and write some data to the input topic (e.g. via
* bin/kafka-console-producer.sh). Otherwise you won't see any data arriving in the output topic.
*/
public class PipeDemo {
public static void main(final String[] args) {
final Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
final StreamsBuilder builder = new StreamsBuilder();
builder.stream("streams-plaintext-input").to("streams-pipe-output");
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-pipe-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (final Throwable e) {
System.exit(1);
}
System.exit(0);
}
}

View File

@@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.temperature;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.Produced;
import org.apache.kafka.streams.kstream.TimeWindows;
import org.apache.kafka.streams.kstream.Windowed;
import org.apache.kafka.streams.kstream.WindowedSerdes;
import java.time.Duration;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
/**
* Demonstrates, using the high-level KStream DSL, how to implement an IoT demo application
* which ingests temperature value processing the maximum value in the latest TEMPERATURE_WINDOW_SIZE seconds (which
* is 5 seconds) sending a new message if it exceeds the TEMPERATURE_THRESHOLD (which is 20)
*
* In this example, the input stream reads from a topic named "iot-temperature", where the values of messages
* represent temperature values; using a TEMPERATURE_WINDOW_SIZE seconds "tumbling" window, the maximum value is processed and
* sent to a topic named "iot-temperature-max" if it exceeds the TEMPERATURE_THRESHOLD.
*
* Before running this example you must create the input topic for temperature values in the following way :
*
* bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic iot-temperature
*
* and at same time the output topic for filtered values :
*
* bin/kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic iot-temperature-max
*
* After that, a console consumer can be started in order to read filtered values from the "iot-temperature-max" topic :
*
* bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic iot-temperature-max --from-beginning
*
* On the other side, a console producer can be used for sending temperature values (which needs to be integers)
* to "iot-temperature" typing them on the console :
*
* bin/kafka-console-producer.sh --broker-list localhost:9092 --topic iot-temperature
* > 10
* > 15
* > 22
*/
public class TemperatureDemo {
// threshold used for filtering max temperature values
private static final int TEMPERATURE_THRESHOLD = 20;
// window size within which the filtering is applied
private static final int TEMPERATURE_WINDOW_SIZE = 5;
public static void main(final String[] args) {
final Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-temperature");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
final StreamsBuilder builder = new StreamsBuilder();
final KStream<String, String> source = builder.stream("iot-temperature");
final KStream<Windowed<String>, String> max = source
// temperature values are sent without a key (null), so in order
// to group and reduce them, a key is needed ("temp" has been chosen)
.selectKey((key, value) -> "temp")
.groupByKey()
.windowedBy(TimeWindows.of(Duration.ofSeconds(TEMPERATURE_WINDOW_SIZE)))
.reduce((value1, value2) -> {
if (Integer.parseInt(value1) > Integer.parseInt(value2)) {
return value1;
} else {
return value2;
}
})
.toStream()
.filter((key, value) -> Integer.parseInt(value) > TEMPERATURE_THRESHOLD);
final Serde<Windowed<String>> windowedSerde = WindowedSerdes.timeWindowedSerdeFrom(String.class);
// need to override key serde to Windowed<String> type
max.to("iot-temperature-max", Produced.with(windowedSerde, Serdes.String()));
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-temperature-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (final Throwable e) {
System.exit(1);
}
System.exit(0);
}
}

View File

@@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.wordcount;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Produced;
import java.util.Arrays;
import java.util.Locale;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
/**
* Demonstrates, using the high-level KStream DSL, how to implement the WordCount program
* that computes a simple word occurrence histogram from an input text.
* <p>
* In this example, the input stream reads from a topic named "streams-plaintext-input", where the values of messages
* represent lines of text; and the histogram output is written to topic "streams-wordcount-output" where each record
* is an updated count of a single word.
* <p>
* Before running this example you must create the input topic and the output topic (e.g. via
* {@code bin/kafka-topics.sh --create ...}), and write some data to the input topic (e.g. via
* {@code bin/kafka-console-producer.sh}). Otherwise you won't see any data arriving in the output topic.
*/
public final class WordCountDemo {
public static final String INPUT_TOPIC = "streams-plaintext-input";
public static final String OUTPUT_TOPIC = "streams-wordcount-output";
static Properties getStreamsConfig() {
final Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
// Note: To re-run the demo, you need to use the offset reset tool:
// https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
return props;
}
static void createWordCountStream(final StreamsBuilder builder) {
final KStream<String, String> source = builder.stream(INPUT_TOPIC);
final KTable<String, Long> counts = source
.flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split(" ")))
.groupBy((key, value) -> value)
.count();
// need to override value serde to Long type
counts.toStream().to(OUTPUT_TOPIC, Produced.with(Serdes.String(), Serdes.Long()));
}
public static void main(final String[] args) {
final Properties props = getStreamsConfig();
final StreamsBuilder builder = new StreamsBuilder();
createWordCountStream(builder);
final KafkaStreams streams = new KafkaStreams(builder.build(), props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-wordcount-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (final Throwable e) {
System.exit(1);
}
System.exit(0);
}
}

View File

@@ -0,0 +1,150 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.wordcount;
import java.time.Duration;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.state.KeyValueIterator;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.Stores;
import java.util.Locale;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
/**
* Demonstrates, using the low-level Processor APIs, how to implement the WordCount program
* that computes a simple word occurrence histogram from an input text.
* <p>
* <strong>Note: This is simplified code that only works correctly for single partition input topics.
* Check out {@link WordCountDemo} for a generic example.</strong>
* <p>
* In this example, the input stream reads from a topic named "streams-plaintext-input", where the values of messages
* represent lines of text; and the histogram output is written to topic "streams-wordcount-processor-output" where each record
* is an updated count of a single word.
* <p>
* Before running this example you must create the input topic and the output topic (e.g. via
* {@code bin/kafka-topics.sh --create ...}), and write some data to the input topic (e.g. via
* {@code bin/kafka-console-producer.sh}). Otherwise you won't see any data arriving in the output topic.
*/
public final class WordCountProcessorDemo {
static class MyProcessorSupplier implements ProcessorSupplier<String, String> {
@Override
public Processor<String, String> get() {
return new Processor<String, String>() {
private ProcessorContext context;
private KeyValueStore<String, Integer> kvStore;
@Override
@SuppressWarnings("unchecked")
public void init(final ProcessorContext context) {
this.context = context;
this.context.schedule(Duration.ofSeconds(1), PunctuationType.STREAM_TIME, timestamp -> {
try (final KeyValueIterator<String, Integer> iter = kvStore.all()) {
System.out.println("----------- " + timestamp + " ----------- ");
while (iter.hasNext()) {
final KeyValue<String, Integer> entry = iter.next();
System.out.println("[" + entry.key + ", " + entry.value + "]");
context.forward(entry.key, entry.value.toString());
}
}
});
this.kvStore = (KeyValueStore<String, Integer>) context.getStateStore("Counts");
}
@Override
public void process(final String dummy, final String line) {
final String[] words = line.toLowerCase(Locale.getDefault()).split(" ");
for (final String word : words) {
final Integer oldValue = this.kvStore.get(word);
if (oldValue == null) {
this.kvStore.put(word, 1);
} else {
this.kvStore.put(word, oldValue + 1);
}
}
context.commit();
}
@Override
public void close() {}
};
}
}
public static void main(final String[] args) {
final Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount-processor");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
// setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
final Topology builder = new Topology();
builder.addSource("Source", "streams-plaintext-input");
builder.addProcessor("Process", new MyProcessorSupplier(), "Source");
builder.addStateStore(Stores.keyValueStoreBuilder(
Stores.inMemoryKeyValueStore("Counts"),
Serdes.String(),
Serdes.Integer()),
"Process");
builder.addSink("Sink", "streams-wordcount-processor-output", "Process");
final KafkaStreams streams = new KafkaStreams(builder, props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-wordcount-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (final Throwable e) {
System.exit(1);
}
System.exit(0);
}
}

View File

@@ -0,0 +1,187 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.docs;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.TopologyTestDriver;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.state.KeyValueIterator;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.Stores;
import org.apache.kafka.streams.TestInputTopic;
import org.apache.kafka.streams.TestOutputTopic;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.time.Duration;
import java.time.Instant;
import java.util.Properties;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.core.Is.is;
/**
* This is code sample in docs/streams/developer-guide/testing.html
*/
public class DeveloperGuideTesting {
private TopologyTestDriver testDriver;
private TestInputTopic<String, Long> inputTopic;
private TestOutputTopic<String, Long> outputTopic;
private KeyValueStore<String, Long> store;
private Serde<String> stringSerde = new Serdes.StringSerde();
private Serde<Long> longSerde = new Serdes.LongSerde();
@Before
public void setup() {
final Topology topology = new Topology();
topology.addSource("sourceProcessor", "input-topic");
topology.addProcessor("aggregator", new CustomMaxAggregatorSupplier(), "sourceProcessor");
topology.addStateStore(
Stores.keyValueStoreBuilder(
Stores.inMemoryKeyValueStore("aggStore"),
Serdes.String(),
Serdes.Long()).withLoggingDisabled(), // need to disable logging to allow store pre-populating
"aggregator");
topology.addSink("sinkProcessor", "result-topic", "aggregator");
// setup test driver
final Properties props = new Properties();
props.setProperty(StreamsConfig.APPLICATION_ID_CONFIG, "maxAggregation");
props.setProperty(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "dummy:1234");
props.setProperty(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
props.setProperty(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.Long().getClass().getName());
testDriver = new TopologyTestDriver(topology, props);
// setup test topics
inputTopic = testDriver.createInputTopic("input-topic", stringSerde.serializer(), longSerde.serializer());
outputTopic = testDriver.createOutputTopic("result-topic", stringSerde.deserializer(), longSerde.deserializer());
// pre-populate store
store = testDriver.getKeyValueStore("aggStore");
store.put("a", 21L);
}
@After
public void tearDown() {
testDriver.close();
}
@Test
public void shouldFlushStoreForFirstInput() {
inputTopic.pipeInput("a", 1L);
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
assertThat(outputTopic.isEmpty(), is(true));
}
@Test
public void shouldNotUpdateStoreForSmallerValue() {
inputTopic.pipeInput("a", 1L);
assertThat(store.get("a"), equalTo(21L));
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
assertThat(outputTopic.isEmpty(), is(true));
}
@Test
public void shouldNotUpdateStoreForLargerValue() {
inputTopic.pipeInput("a", 42L);
assertThat(store.get("a"), equalTo(42L));
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 42L)));
assertThat(outputTopic.isEmpty(), is(true));
}
@Test
public void shouldUpdateStoreForNewKey() {
inputTopic.pipeInput("b", 21L);
assertThat(store.get("b"), equalTo(21L));
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("b", 21L)));
assertThat(outputTopic.isEmpty(), is(true));
}
@Test
public void shouldPunctuateIfEvenTimeAdvances() {
final Instant recordTime = Instant.now();
inputTopic.pipeInput("a", 1L, recordTime);
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
inputTopic.pipeInput("a", 1L, recordTime);
assertThat(outputTopic.isEmpty(), is(true));
inputTopic.pipeInput("a", 1L, recordTime.plusSeconds(10L));
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
assertThat(outputTopic.isEmpty(), is(true));
}
@Test
public void shouldPunctuateIfWallClockTimeAdvances() {
testDriver.advanceWallClockTime(Duration.ofSeconds(60));
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("a", 21L)));
assertThat(outputTopic.isEmpty(), is(true));
}
public class CustomMaxAggregatorSupplier implements ProcessorSupplier<String, Long> {
@Override
public Processor<String, Long> get() {
return new CustomMaxAggregator();
}
}
public class CustomMaxAggregator implements Processor<String, Long> {
ProcessorContext context;
private KeyValueStore<String, Long> store;
@SuppressWarnings("unchecked")
@Override
public void init(final ProcessorContext context) {
this.context = context;
context.schedule(Duration.ofSeconds(60), PunctuationType.WALL_CLOCK_TIME, time -> flushStore());
context.schedule(Duration.ofSeconds(10), PunctuationType.STREAM_TIME, time -> flushStore());
store = (KeyValueStore<String, Long>) context.getStateStore("aggStore");
}
@Override
public void process(final String key, final Long value) {
final Long oldValue = store.get(key);
if (oldValue == null || value > oldValue) {
store.put(key, value);
}
}
private void flushStore() {
final KeyValueIterator<String, Long> it = store.all();
while (it.hasNext()) {
final KeyValue<String, Long> next = it.next();
context.forward(next.key, next.value);
}
}
@Override
public void close() {}
}
}

View File

@@ -0,0 +1,112 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.wordcount;
import org.apache.kafka.common.serialization.LongDeserializer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.TopologyTestDriver;
import org.apache.kafka.streams.TestInputTopic;
import org.apache.kafka.streams.TestOutputTopic;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
/**
* Unit test of {@link WordCountDemo} stream using TopologyTestDriver.
*/
public class WordCountDemoTest {
private TopologyTestDriver testDriver;
private TestInputTopic<String, String> inputTopic;
private TestOutputTopic<String, Long> outputTopic;
@Before
public void setup() {
final StreamsBuilder builder = new StreamsBuilder();
//Create Actual Stream Processing pipeline
WordCountDemo.createWordCountStream(builder);
testDriver = new TopologyTestDriver(builder.build(), WordCountDemo.getStreamsConfig());
inputTopic = testDriver.createInputTopic(WordCountDemo.INPUT_TOPIC, new StringSerializer(), new StringSerializer());
outputTopic = testDriver.createOutputTopic(WordCountDemo.OUTPUT_TOPIC, new StringDeserializer(), new LongDeserializer());
}
@After
public void tearDown() {
try {
testDriver.close();
} catch (final RuntimeException e) {
// https://issues.apache.org/jira/browse/KAFKA-6647 causes exception when executed in Windows, ignoring it
// Logged stacktrace cannot be avoided
System.out.println("Ignoring exception, test failing in Windows due this exception:" + e.getLocalizedMessage());
}
}
/**
* Simple test validating count of one word
*/
@Test
public void testOneWord() {
//Feed word "Hello" to inputTopic and no kafka key, timestamp is irrelevant in this case
inputTopic.pipeInput("Hello");
//Read and validate output to match word as key and count as value
assertThat(outputTopic.readKeyValue(), equalTo(new KeyValue<>("hello", 1L)));
//No more output in topic
assertThat(outputTopic.isEmpty(), is(true));
}
/**
* Test Word count of sentence list.
*/
@Test
public void testCountListOfWords() {
final List<String> inputValues = Arrays.asList(
"Apache Kafka Streams Example",
"Using Kafka Streams Test Utils",
"Reading and Writing Kafka Topic"
);
final Map<String, Long> expectedWordCounts = new HashMap<>();
expectedWordCounts.put("apache", 1L);
expectedWordCounts.put("kafka", 3L);
expectedWordCounts.put("streams", 2L);
expectedWordCounts.put("example", 1L);
expectedWordCounts.put("using", 1L);
expectedWordCounts.put("test", 1L);
expectedWordCounts.put("utils", 1L);
expectedWordCounts.put("reading", 1L);
expectedWordCounts.put("and", 1L);
expectedWordCounts.put("writing", 1L);
expectedWordCounts.put("topic", 1L);
inputTopic.pipeValueList(inputValues);
final Map<String, Long> actualWordCounts = outputTopic.readKeyValuesToMap();
assertThat(actualWordCounts, equalTo(expectedWordCounts));
}
}

View File

@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.examples.wordcount;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.processor.MockProcessorContext;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.Stores;
import org.junit.Test;
import java.util.Iterator;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/**
* Demonstrate the use of {@link MockProcessorContext} for testing the {@link Processor} in the {@link WordCountProcessorDemo}.
*/
public class WordCountProcessorTest {
@Test
public void test() {
final MockProcessorContext context = new MockProcessorContext();
// Create, initialize, and register the state store.
final KeyValueStore<String, Integer> store =
Stores.keyValueStoreBuilder(Stores.inMemoryKeyValueStore("Counts"), Serdes.String(), Serdes.Integer())
.withLoggingDisabled() // Changelog is not supported by MockProcessorContext.
.build();
store.init(context, store);
context.register(store, null);
// Create and initialize the processor under test
final Processor<String, String> processor = new WordCountProcessorDemo.MyProcessorSupplier().get();
processor.init(context);
// send a record to the processor
processor.process("key", "alpha beta gamma alpha");
// note that the processor commits, but does not forward, during process()
assertTrue(context.committed());
assertTrue(context.forwarded().isEmpty());
// now, we trigger the punctuator, which iterates over the state store and forwards the contents.
context.scheduledPunctuators().get(0).getPunctuator().punctuate(0L);
// finally, we can verify the output.
final Iterator<MockProcessorContext.CapturedForward> capturedForwards = context.forwarded().iterator();
assertEquals(new KeyValue<>("alpha", "2"), capturedForwards.next().keyValue());
assertEquals(new KeyValue<>("beta", "1"), capturedForwards.next().keyValue());
assertEquals(new KeyValue<>("gamma", "1"), capturedForwards.next().keyValue());
assertFalse(capturedForwards.hasNext());
}
}

View File

@@ -0,0 +1,36 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<parent>
<groupId>org.apache.kafka</groupId>
<artifactId>streams-quickstart</artifactId>
<version>2.5.2-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<artifactId>streams-quickstart-java</artifactId>
<packaging>maven-archetype</packaging>
</project>

View File

@@ -0,0 +1,34 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<archetype-descriptor
xmlns="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-archetype-plugin/archetype-descriptor/1.0.0 http://maven.apache.org/xsd/archetype-descriptor-1.0.0.xsd"
name="streams-quickstart-java">
<fileSets>
<fileSet filtered="true" packaged="true" encoding="UTF-8">
<directory>src/main/java</directory>
<includes>
<include>**/*.java</include>
</includes>
</fileSet>
<fileSet encoding="UTF-8">
<directory>src/main/resources</directory>
</fileSet>
</fileSets>
</archetype-descriptor>

View File

@@ -0,0 +1,136 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>${groupId}</groupId>
<artifactId>${artifactId}</artifactId>
<version>${version}</version>
<packaging>jar</packaging>
<name>Kafka Streams Quickstart :: Java</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<kafka.version>2.5.2-SNAPSHOT</kafka.version>
<slf4j.version>1.7.7</slf4j.version>
<log4j.version>1.2.17</log4j.version>
</properties>
<repositories>
<repository>
<id>apache.snapshots</id>
<name>Apache Development Snapshot Repository</name>
<url>https://repository.apache.org/content/repositories/snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<!--
Execute "mvn clean package -Pbuild-jar"
to build a jar file out of this project!
-->
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
<compilerId>jdt</compilerId>
</configuration>
<dependencies>
<dependency>
<groupId>org.eclipse.tycho</groupId>
<artifactId>tycho-compiler-jdt</artifactId>
<version>0.21.0</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<versionRange>[2.4,)</versionRange>
<goals>
<goal>single</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<versionRange>[3.1,)</versionRange>
<goals>
<goal>testCompile</goal>
<goal>compile</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<dependencies>
<!-- Apache Kafka dependencies -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-streams</artifactId>
<version>${kafka.version}</version>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ${package};
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.kstream.ValueMapper;
import java.util.Arrays;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
/**
* In this example, we implement a simple LineSplit program using the high-level Streams DSL
* that reads from a source topic "streams-plaintext-input", where the values of messages represent lines of text;
* the code split each text line in string into words and then write back into a sink topic "streams-linesplit-output" where
* each record represents a single word.
*/
public class LineSplit {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-linesplit");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
final StreamsBuilder builder = new StreamsBuilder();
builder.<String, String>stream("streams-plaintext-input")
.flatMapValues(value -> Arrays.asList(value.split("\\W+")))
.to("streams-linesplit-output");
final Topology topology = builder.build();
final KafkaStreams streams = new KafkaStreams(topology, props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (Throwable e) {
System.exit(1);
}
System.exit(0);
}
}

View File

@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ${package};
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
/**
* In this example, we implement a simple LineSplit program using the high-level Streams DSL
* that reads from a source topic "streams-plaintext-input", where the values of messages represent lines of text,
* and writes the messages as-is into a sink topic "streams-pipe-output".
*/
public class Pipe {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
final StreamsBuilder builder = new StreamsBuilder();
builder.stream("streams-plaintext-input").to("streams-pipe-output");
final Topology topology = builder.build();
final KafkaStreams streams = new KafkaStreams(topology, props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (Throwable e) {
System.exit(1);
}
System.exit(0);
}
}

View File

@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ${package};
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.kstream.KeyValueMapper;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Produced;
import org.apache.kafka.streams.kstream.ValueMapper;
import org.apache.kafka.streams.state.KeyValueStore;
import java.util.Arrays;
import java.util.Locale;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
/**
* In this example, we implement a simple WordCount program using the high-level Streams DSL
* that reads from a source topic "streams-plaintext-input", where the values of messages represent lines of text,
* split each text line into words and then compute the word occurence histogram, write the continuous updated histogram
* into a topic "streams-wordcount-output" where each record is an updated count of a single word.
*/
public class WordCount {
public static void main(String[] args) throws Exception {
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
final StreamsBuilder builder = new StreamsBuilder();
builder.<String, String>stream("streams-plaintext-input")
.flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split("\\W+")))
.groupBy((key, value) -> value)
.count(Materialized.<String, Long, KeyValueStore<Bytes, byte[]>>as("counts-store"))
.toStream()
.to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));
final Topology topology = builder.build();
final KafkaStreams streams = new KafkaStreams(topology, props);
final CountDownLatch latch = new CountDownLatch(1);
// attach shutdown handler to catch control-c
Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
@Override
public void run() {
streams.close();
latch.countDown();
}
});
try {
streams.start();
latch.await();
} catch (Throwable e) {
System.exit(1);
}
System.exit(0);
}
}

View File

@@ -0,0 +1,19 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
log4j.rootLogger=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss,SSS} %-5p %-60c %x - %m%n

View File

@@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
groupId=org.apache.kafka.archtypetest
version=0.1
artifactId=basic
package=org.apache.kafka.archetypetest

View File

@@ -0,0 +1 @@
compile

121
streams/quickstart/pom.xml Normal file
View File

@@ -0,0 +1,121 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.kafka</groupId>
<artifactId>streams-quickstart</artifactId>
<packaging>pom</packaging>
<version>2.5.2-SNAPSHOT</version>
<name>Kafka Streams :: Quickstart</name>
<parent>
<groupId>org.apache</groupId>
<artifactId>apache</artifactId>
<version>18</version>
</parent>
<modules>
<module>java</module>
</modules>
<build>
<extensions>
<extension>
<groupId>org.apache.maven.archetype</groupId>
<artifactId>archetype-packaging</artifactId>
<version>2.2</version>
</extension>
</extensions>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-archetype-plugin</artifactId>
<version>2.2</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<artifactId>maven-archetype-plugin</artifactId>
<version>2.2</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<!-- deactivate the shade plugin for the quickstart archetypes -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.1.0</version>
<executions>
<execution>
<phase/>
</execution>
</executions>
</plugin>
<plugin>
<groupId>com.github.siom79.japicmp</groupId>
<artifactId>japicmp-maven-plugin</artifactId>
<version>0.11.0</version>
<configuration>
<skip>true</skip>
</configuration>
</plugin>
<!-- use alternative delimiter for filtering resources -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<configuration>
<useDefaultDelimiters>false</useDefaultDelimiters>
<delimiters>
<delimiter>@</delimiter>
</delimiters>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<version>1.6</version>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
<configuration>
<keyname>${gpg.keyname}</keyname>
<passphraseServerId>${gpg.keyname}</passphraseServerId>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
</build>
</project>

View File

@@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.clients.admin.Admin;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.streams.kstream.GlobalKTable;
import org.apache.kafka.streams.processor.StateStore;
import java.util.Map;
/**
* {@code KafkaClientSupplier} can be used to provide custom Kafka clients to a {@link KafkaStreams} instance.
*
* @see KafkaStreams#KafkaStreams(Topology, java.util.Properties, KafkaClientSupplier)
*/
public interface KafkaClientSupplier {
/**
* Create an {@link AdminClient} which is used for internal topic management.
*
* @param config Supplied by the {@link java.util.Properties} given to the {@link KafkaStreams}
* @return an instance of {@link AdminClient}
* @deprecated Not called by Kafka Streams, which now uses {@link #getAdmin} instead.
*/
@Deprecated
default AdminClient getAdminClient(final Map<String, Object> config) {
throw new UnsupportedOperationException("Direct use of this method is deprecated. " +
"Implementations of KafkaClientSupplier should implement the getAdmin() method instead. " +
"The method will be removed in a future release.");
}
/**
* Create an {@link Admin} which is used for internal topic management.
*
* @param config Supplied by the {@link java.util.Properties} given to the {@link KafkaStreams}
* @return an instance of {@link Admin}
*/
@SuppressWarnings("deprecation")
default Admin getAdmin(final Map<String, Object> config) {
return getAdminClient(config);
}
/**
* Create a {@link Producer} which is used to write records to sink topics.
*
* @param config {@link StreamsConfig#getProducerConfigs(String) producer config} which is supplied by the
* {@link java.util.Properties} given to the {@link KafkaStreams} instance
* @return an instance of Kafka producer
*/
Producer<byte[], byte[]> getProducer(final Map<String, Object> config);
/**
* Create a {@link Consumer} which is used to read records of source topics.
*
* @param config {@link StreamsConfig#getMainConsumerConfigs(String, String, int) consumer config} which is
* supplied by the {@link java.util.Properties} given to the {@link KafkaStreams} instance
* @return an instance of Kafka consumer
*/
Consumer<byte[], byte[]> getConsumer(final Map<String, Object> config);
/**
* Create a {@link Consumer} which is used to read records to restore {@link StateStore}s.
*
* @param config {@link StreamsConfig#getRestoreConsumerConfigs(String) restore consumer config} which is supplied
* by the {@link java.util.Properties} given to the {@link KafkaStreams}
* @return an instance of Kafka consumer
*/
Consumer<byte[], byte[]> getRestoreConsumer(final Map<String, Object> config);
/**
* Create a {@link Consumer} which is used to consume records for {@link GlobalKTable}.
*
* @param config {@link StreamsConfig#getGlobalConsumerConfigs(String) global consumer config} which is supplied
* by the {@link java.util.Properties} given to the {@link KafkaStreams}
* @return an instance of Kafka consumer
*/
Consumer<byte[], byte[]> getGlobalConsumer(final Map<String, Object> config);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.streams.state.HostInfo;
import java.util.Collections;
import java.util.Objects;
import java.util.Set;
/**
* Represents all the metadata related to a key, where a particular key resides in a {@link KafkaStreams} application.
* It contains the active {@link HostInfo} and a set of standby {@link HostInfo}s, denoting the instances where the key resides.
* It also contains the partition number where the key belongs, which could be useful when used in conjunction with other APIs.
* e.g: Relating with lags for that store partition.
* NOTE: This is a point in time view. It may change as rebalances happen.
*/
public class KeyQueryMetadata {
/**
* Sentinel to indicate that the KeyQueryMetadata is currently unavailable. This can occur during rebalance
* operations.
*/
public static final KeyQueryMetadata NOT_AVAILABLE = new KeyQueryMetadata(new HostInfo("unavailable", -1),
Collections.emptySet(),
-1);
private final HostInfo activeHost;
private final Set<HostInfo> standbyHosts;
private final int partition;
public KeyQueryMetadata(final HostInfo activeHost, final Set<HostInfo> standbyHosts, final int partition) {
this.activeHost = activeHost;
this.standbyHosts = standbyHosts;
this.partition = partition;
}
/**
* Get the Active streams instance for given key
*
* @return active instance's {@link HostInfo}
*/
public HostInfo getActiveHost() {
return activeHost;
}
/**
* Get the Streams instances that host the key as standbys
*
* @return set of standby {@link HostInfo} or a empty set, if no standbys are configured
*/
public Set<HostInfo> getStandbyHosts() {
return standbyHosts;
}
/**
* Get the Store partition corresponding to the key.
*
* @return store partition number
*/
public int getPartition() {
return partition;
}
@Override
public boolean equals(final Object obj) {
if (!(obj instanceof KeyQueryMetadata)) {
return false;
}
final KeyQueryMetadata keyQueryMetadata = (KeyQueryMetadata) obj;
return Objects.equals(keyQueryMetadata.activeHost, activeHost)
&& Objects.equals(keyQueryMetadata.standbyHosts, standbyHosts)
&& Objects.equals(keyQueryMetadata.partition, partition);
}
@Override
public String toString() {
return "KeyQueryMetadata {" +
"activeHost=" + activeHost +
", standbyHosts=" + standbyHosts +
", partition=" + partition +
'}';
}
@Override
public int hashCode() {
return Objects.hash(activeHost, standbyHosts, partition);
}
}

View File

@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import java.util.Objects;
/**
* A key-value pair defined for a single Kafka Streams record.
* If the record comes directly from a Kafka topic then its key/value are defined as the message key/value.
*
* @param <K> Key type
* @param <V> Value type
*/
public class KeyValue<K, V> {
/** The key of the key-value pair. */
public final K key;
/** The value of the key-value pair. */
public final V value;
/**
* Create a new key-value pair.
*
* @param key the key
* @param value the value
*/
public KeyValue(final K key, final V value) {
this.key = key;
this.value = value;
}
/**
* Create a new key-value pair.
*
* @param key the key
* @param value the value
* @param <K> the type of the key
* @param <V> the type of the value
* @return a new key-value pair
*/
public static <K, V> KeyValue<K, V> pair(final K key, final V value) {
return new KeyValue<>(key, value);
}
@Override
public String toString() {
return "KeyValue(" + key + ", " + value + ")";
}
@Override
public boolean equals(final Object obj) {
if (this == obj) {
return true;
}
if (!(obj instanceof KeyValue)) {
return false;
}
final KeyValue other = (KeyValue) obj;
return Objects.equals(key, other.key) && Objects.equals(value, other.value);
}
@Override
public int hashCode() {
return Objects.hash(key, value);
}
}

View File

@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import java.util.Objects;
/**
* Encapsulates information about lag, at a store partition replica (active or standby). This information is constantly changing as the
* tasks process records and thus, they should be treated as simply instantaenous measure of lag.
*/
public class LagInfo {
private final long currentOffsetPosition;
private final long endOffsetPosition;
private final long offsetLag;
LagInfo(final long currentOffsetPosition, final long endOffsetPosition) {
this.currentOffsetPosition = currentOffsetPosition;
this.endOffsetPosition = endOffsetPosition;
this.offsetLag = Math.max(0, endOffsetPosition - currentOffsetPosition);
}
/**
* Get the current maximum offset on the store partition's changelog topic, that has been successfully written into
* the store partition's state store.
*
* @return current consume offset for standby/restoring store partitions & simply endoffset for active store partition replicas
*/
public long currentOffsetPosition() {
return this.currentOffsetPosition;
}
/**
* Get the end offset position for this store partition's changelog topic on the Kafka brokers.
*
* @return last offset written to the changelog topic partition
*/
public long endOffsetPosition() {
return this.endOffsetPosition;
}
/**
* Get the measured lag between current and end offset positions, for this store partition replica
*
* @return lag as measured by message offsets
*/
public long offsetLag() {
return this.offsetLag;
}
@Override
public boolean equals(final Object obj) {
if (!(obj instanceof LagInfo)) {
return false;
}
final LagInfo other = (LagInfo) obj;
return currentOffsetPosition == other.currentOffsetPosition
&& endOffsetPosition == other.endOffsetPosition
&& this.offsetLag == other.offsetLag;
}
@Override
public int hashCode() {
return Objects.hash(currentOffsetPosition, endOffsetPosition, offsetLag);
}
@Override
public String toString() {
return "LagInfo {" +
" currentOffsetPosition=" + currentOffsetPosition +
", endOffsetPosition=" + endOffsetPosition +
", offsetLag=" + offsetLag +
'}';
}
}

View File

@@ -0,0 +1,129 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.streams.state.QueryableStoreType;
import java.util.Objects;
/**
* {@code StoreQueryParameters} allows you to pass a variety of parameters when fetching a store for interactive query.
*/
public class StoreQueryParameters<T> {
private Integer partition;
private boolean staleStores;
private final String storeName;
private final QueryableStoreType<T> queryableStoreType;
private StoreQueryParameters(final String storeName, final QueryableStoreType<T> queryableStoreType, final Integer partition, final boolean staleStores) {
this.storeName = storeName;
this.queryableStoreType = queryableStoreType;
this.partition = partition;
this.staleStores = staleStores;
}
public static <T> StoreQueryParameters<T> fromNameAndType(final String storeName,
final QueryableStoreType<T> queryableStoreType) {
return new StoreQueryParameters<T>(storeName, queryableStoreType, null, false);
}
/**
* Set a specific partition that should be queried exclusively.
*
* @param partition The specific integer partition to be fetched from the stores list by using {@link StoreQueryParameters}.
*
* @return StoreQueryParameters a new {@code StoreQueryParameters} instance configured with the specified partition
*/
public StoreQueryParameters<T> withPartition(final Integer partition) {
return new StoreQueryParameters<T>(storeName, queryableStoreType, partition, staleStores);
}
/**
* Enable querying of stale state stores, i.e., allow to query active tasks during restore as well as standby tasks.
*
* @return StoreQueryParameters a new {@code StoreQueryParameters} instance configured with serving from stale stores enabled
*/
public StoreQueryParameters<T> enableStaleStores() {
return new StoreQueryParameters<T>(storeName, queryableStoreType, partition, true);
}
/**
* Get the name of the state store that should be queried.
*
* @return String state store name
*/
public String storeName() {
return storeName;
}
/**
* Get the queryable store type for which key is queried by the user.
*
* @return QueryableStoreType type of queryable store
*/
public QueryableStoreType<T> queryableStoreType() {
return queryableStoreType;
}
/**
* Get the store partition that will be queried.
* If the method returns {@code null}, it would mean that no specific partition has been requested,
* so all the local partitions for the store will be queried.
*
* @return Integer partition
*/
public Integer partition() {
return partition;
}
/**
* Get the flag staleStores. If {@code true}, include standbys and recovering stores along with running stores.
*
* @return boolean staleStores
*/
public boolean staleStoresEnabled() {
return staleStores;
}
@Override
public boolean equals(final Object obj) {
if (!(obj instanceof StoreQueryParameters)) {
return false;
}
final StoreQueryParameters storeQueryParameters = (StoreQueryParameters) obj;
return Objects.equals(storeQueryParameters.partition, partition)
&& Objects.equals(storeQueryParameters.staleStores, staleStores)
&& Objects.equals(storeQueryParameters.storeName, storeName)
&& Objects.equals(storeQueryParameters.queryableStoreType, queryableStoreType);
}
@Override
public String toString() {
return "StoreQueryParameters {" +
"partition=" + partition +
", staleStores=" + staleStores +
", storeName=" + storeName +
", queryableStoreType=" + queryableStoreType +
'}';
}
@Override
public int hashCode() {
return Objects.hash(partition, staleStores, storeName, queryableStoreType);
}
}

View File

@@ -0,0 +1,565 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.errors.TopologyException;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.GlobalKTable;
import org.apache.kafka.streams.kstream.KGroupedStream;
import org.apache.kafka.streams.kstream.KGroupedTable;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Transformer;
import org.apache.kafka.streams.kstream.ValueTransformer;
import org.apache.kafka.streams.kstream.internals.ConsumedInternal;
import org.apache.kafka.streams.kstream.internals.InternalStreamsBuilder;
import org.apache.kafka.streams.kstream.internals.MaterializedInternal;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
import org.apache.kafka.streams.processor.internals.ProcessorNode;
import org.apache.kafka.streams.processor.internals.SourceNode;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.StoreBuilder;
import java.util.Collection;
import java.util.Collections;
import java.util.Objects;
import java.util.Properties;
import java.util.regex.Pattern;
/**
* {@code StreamsBuilder} provide the high-level Kafka Streams DSL to specify a Kafka Streams topology.
*
* @see Topology
* @see KStream
* @see KTable
* @see GlobalKTable
*/
public class StreamsBuilder {
/** The actual topology that is constructed by this StreamsBuilder. */
private final Topology topology = new Topology();
/** The topology's internal builder. */
final InternalTopologyBuilder internalTopologyBuilder = topology.internalTopologyBuilder;
private final InternalStreamsBuilder internalStreamsBuilder = new InternalStreamsBuilder(internalTopologyBuilder);
/**
* Create a {@link KStream} from the specified topic.
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
* deserializers as specified in the {@link StreamsConfig config} are used.
* <p>
* If multiple topics are specified there is no ordering guarantee for records from different topics.
* <p>
* Note that the specified input topic must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topic the topic name; cannot be {@code null}
* @return a {@link KStream} for the specified topic
*/
public synchronized <K, V> KStream<K, V> stream(final String topic) {
return stream(Collections.singleton(topic));
}
/**
* Create a {@link KStream} from the specified topic.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* <p>
* Note that the specified input topic must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topic the topic names; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters
* @return a {@link KStream} for the specified topic
*/
public synchronized <K, V> KStream<K, V> stream(final String topic,
final Consumed<K, V> consumed) {
return stream(Collections.singleton(topic), consumed);
}
/**
* Create a {@link KStream} from the specified topics.
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
* deserializers as specified in the {@link StreamsConfig config} are used.
* <p>
* If multiple topics are specified there is no ordering guarantee for records from different topics.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topics the topic names; must contain at least one topic name
* @return a {@link KStream} for the specified topics
*/
public synchronized <K, V> KStream<K, V> stream(final Collection<String> topics) {
return stream(topics, Consumed.with(null, null, null, null));
}
/**
* Create a {@link KStream} from the specified topics.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* <p>
* If multiple topics are specified there is no ordering guarantee for records from different topics.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topics the topic names; must contain at least one topic name
* @param consumed the instance of {@link Consumed} used to define optional parameters
* @return a {@link KStream} for the specified topics
*/
public synchronized <K, V> KStream<K, V> stream(final Collection<String> topics,
final Consumed<K, V> consumed) {
Objects.requireNonNull(topics, "topics can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
return internalStreamsBuilder.stream(topics, new ConsumedInternal<>(consumed));
}
/**
* Create a {@link KStream} from the specified topic pattern.
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
* deserializers as specified in the {@link StreamsConfig config} are used.
* <p>
* If multiple topics are matched by the specified pattern, the created {@link KStream} will read data from all of
* them and there is no ordering guarantee between records from different topics. This also means that the work
* will not be parallelized for multiple topics, and the number of tasks will scale with the maximum partition
* count of any matching topic rather than the total number of partitions across all topics.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topicPattern the pattern to match for topic names
* @return a {@link KStream} for topics matching the regex pattern.
*/
public synchronized <K, V> KStream<K, V> stream(final Pattern topicPattern) {
return stream(topicPattern, Consumed.with(null, null));
}
/**
* Create a {@link KStream} from the specified topic pattern.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* <p>
* If multiple topics are matched by the specified pattern, the created {@link KStream} will read data from all of
* them and there is no ordering guarantee between records from different topics. This also means that the work
* will not be parallelized for multiple topics, and the number of tasks will scale with the maximum partition
* count of any matching topic rather than the total number of partitions across all topics.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topicPattern the pattern to match for topic names
* @param consumed the instance of {@link Consumed} used to define optional parameters
* @return a {@link KStream} for topics matching the regex pattern.
*/
public synchronized <K, V> KStream<K, V> stream(final Pattern topicPattern,
final Consumed<K, V> consumed) {
Objects.requireNonNull(topicPattern, "topicPattern can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
return internalStreamsBuilder.stream(topicPattern, new ConsumedInternal<>(consumed));
}
/**
* Create a {@link KTable} for the specified topic.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* Note that the specified input topic must be partitioned by key.
* If this is not the case the returned {@link KTable} will be corrupted.
* <p>
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} using the given
* {@code Materialized} instance.
* An internal changelog topic is created by default. Because the source topic can
* be used for recovery, you can avoid creating the changelog topic by setting
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
* <p>
* You should only specify serdes in the {@link Consumed} instance as these will also be used to overwrite the
* serdes in {@link Materialized}, i.e.,
* <pre> {@code
* streamBuilder.table(topic, Consumed.with(Serde.String(), Serde.String()), Materialized.<String, String, KeyValueStore<Bytes, byte[]>as(storeName))
* }
* </pre>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ...
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long valueForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* @param topic the topic name; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters; cannot be {@code null}
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
* @return a {@link KTable} for the specified topic
*/
public synchronized <K, V> KTable<K, V> table(final String topic,
final Consumed<K, V> consumed,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
materialized.withKeySerde(consumedInternal.keySerde()).withValueSerde(consumedInternal.valueSerde());
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link KTable} for the specified topic.
* The default {@code "auto.offset.reset"} strategy and default key and value deserializers as specified in the
* {@link StreamsConfig config} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case the returned {@link KTable} will be corrupted.
* <p>
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} with an internal
* store name. Note that store name may not be queriable through Interactive Queries.
* An internal changelog topic is created by default. Because the source topic can
* be used for recovery, you can avoid creating the changelog topic by setting
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @return a {@link KTable} for the specified topic
*/
public synchronized <K, V> KTable<K, V> table(final String topic) {
return table(topic, new ConsumedInternal<>());
}
/**
* Create a {@link KTable} for the specified topic.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case the returned {@link KTable} will be corrupted.
* <p>
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} with an internal
* store name. Note that store name may not be queriable through Interactive Queries.
* An internal changelog topic is created by default. Because the source topic can
* be used for recovery, you can avoid creating the changelog topic by setting
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters; cannot be {@code null}
* @return a {@link KTable} for the specified topic
*/
public synchronized <K, V> KTable<K, V> table(final String topic,
final Consumed<K, V> consumed) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(
Materialized.with(consumedInternal.keySerde(), consumedInternal.valueSerde()),
internalStreamsBuilder,
topic + "-");
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link KTable} for the specified topic.
* The default {@code "auto.offset.reset"} strategy as specified in the {@link StreamsConfig config} are used.
* Key and value deserializers as defined by the options in {@link Materialized} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case the returned {@link KTable} will be corrupted.
* <p>
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} using the {@link Materialized} instance.
* An internal changelog topic is created by default. Because the source topic can
* be used for recovery, you can avoid creating the changelog topic by setting
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
* @return a {@link KTable} for the specified topic
*/
public synchronized <K, V> KTable<K, V> table(final String topic,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
final ConsumedInternal<K, V> consumedInternal =
new ConsumedInternal<>(Consumed.with(materializedInternal.keySerde(), materializedInternal.valueSerde()));
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link GlobalKTable} for the specified topic.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} with an internal
* store name. Note that store name may not be queriable through Interactive Queries.
* No internal changelog topic is created since the original input topic can be used for recovery (cf.
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
* <p>
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
* regardless of the specified value in {@link StreamsConfig} or {@link Consumed}.
*
* @param topic the topic name; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters
* @return a {@link GlobalKTable} for the specified topic
*/
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
final Consumed<K, V> consumed) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(
Materialized.with(consumedInternal.keySerde(), consumedInternal.valueSerde()),
internalStreamsBuilder, topic + "-");
return internalStreamsBuilder.globalTable(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link GlobalKTable} for the specified topic.
* The default key and value deserializers as specified in the {@link StreamsConfig config} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} with an internal
* store name. Note that store name may not be queriable through Interactive Queries.
* No internal changelog topic is created since the original input topic can be used for recovery (cf.
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
* <p>
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
* regardless of the specified value in {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @return a {@link GlobalKTable} for the specified topic
*/
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic) {
return globalTable(topic, Consumed.with(null, null));
}
/**
* Create a {@link GlobalKTable} for the specified topic.
*
* Input {@link KeyValue} pairs with {@code null} key will be dropped.
* <p>
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} configured with
* the provided instance of {@link Materialized}.
* However, no internal changelog topic is created since the original input topic can be used for recovery (cf.
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
* <p>
* You should only specify serdes in the {@link Consumed} instance as these will also be used to overwrite the
* serdes in {@link Materialized}, i.e.,
* <pre> {@code
* streamBuilder.globalTable(topic, Consumed.with(Serde.String(), Serde.String()), Materialized.<String, String, KeyValueStore<Bytes, byte[]>as(storeName))
* }
* </pre>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ...
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long valueForKey = localStore.get(key);
* }</pre>
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
* regardless of the specified value in {@link StreamsConfig} or {@link Consumed}.
*
* @param topic the topic name; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters; can't be {@code null}
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
* @return a {@link GlobalKTable} for the specified topic
*/
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
final Consumed<K, V> consumed,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
// always use the serdes from consumed
materialized.withKeySerde(consumedInternal.keySerde()).withValueSerde(consumedInternal.valueSerde());
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
return internalStreamsBuilder.globalTable(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link GlobalKTable} for the specified topic.
*
* Input {@link KeyValue} pairs with {@code null} key will be dropped.
* <p>
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} configured with
* the provided instance of {@link Materialized}.
* However, no internal changelog topic is created since the original input topic can be used for recovery (cf.
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ...
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long valueForKey = localStore.get(key);
* }</pre>
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
* regardless of the specified value in {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
* @return a {@link GlobalKTable} for the specified topic
*/
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
return internalStreamsBuilder.globalTable(topic,
new ConsumedInternal<>(Consumed.with(materializedInternal.keySerde(),
materializedInternal.valueSerde())),
materializedInternal);
}
/**
* Adds a state store to the underlying {@link Topology}.
* <p>
* It is required to connect state stores to {@link Processor Processors}, {@link Transformer Transformers},
* or {@link ValueTransformer ValueTransformers} before they can be used.
*
* @param builder the builder used to obtain this state store {@link StateStore} instance
* @return itself
* @throws TopologyException if state store supplier is already added
*/
public synchronized StreamsBuilder addStateStore(final StoreBuilder builder) {
Objects.requireNonNull(builder, "builder can't be null");
internalStreamsBuilder.addStateStore(builder);
return this;
}
/**
* @deprecated use {@link #addGlobalStore(StoreBuilder, String, Consumed, ProcessorSupplier)} instead
*/
@SuppressWarnings("unchecked")
@Deprecated
public synchronized StreamsBuilder addGlobalStore(final StoreBuilder storeBuilder,
final String topic,
final String sourceName,
final Consumed consumed,
final String processorName,
final ProcessorSupplier stateUpdateSupplier) {
Objects.requireNonNull(storeBuilder, "storeBuilder can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
internalStreamsBuilder.addGlobalStore(storeBuilder,
sourceName,
topic,
new ConsumedInternal<>(consumed),
processorName,
stateUpdateSupplier);
return this;
}
/**
* Adds a global {@link StateStore} to the topology.
* The {@link StateStore} sources its data from all partitions of the provided input topic.
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
* <p>
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
* of the input topic.
* <p>
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
* records forwarded from the {@link SourceNode}. NOTE: you should not use the {@code Processor} to insert transformed records into
* the global state store. This store uses the source topic as changelog and during restore will insert records directly
* from the source.
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
* <p>
* It is not required to connect a global store to {@link Processor Processors}, {@link Transformer Transformers},
* or {@link ValueTransformer ValueTransformer}; those have read-only access to all global stores by default.
*
* @param storeBuilder user defined {@link StoreBuilder}; can't be {@code null}
* @param topic the topic to source the data from
* @param consumed the instance of {@link Consumed} used to define optional parameters; can't be {@code null}
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
* @return itself
* @throws TopologyException if the processor of state is already registered
*/
@SuppressWarnings("unchecked")
public synchronized StreamsBuilder addGlobalStore(final StoreBuilder storeBuilder,
final String topic,
final Consumed consumed,
final ProcessorSupplier stateUpdateSupplier) {
Objects.requireNonNull(storeBuilder, "storeBuilder can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
internalStreamsBuilder.addGlobalStore(storeBuilder,
topic,
new ConsumedInternal<>(consumed),
stateUpdateSupplier);
return this;
}
/**
* Returns the {@link Topology} that represents the specified processing logic.
* Note that using this method means no optimizations are performed.
*
* @return the {@link Topology} that represents the specified processing logic
*/
public synchronized Topology build() {
return build(null);
}
/**
* Returns the {@link Topology} that represents the specified processing logic and accepts
* a {@link Properties} instance used to indicate whether to optimize topology or not.
*
* @param props the {@link Properties} used for building possibly optimized topology
* @return the {@link Topology} that represents the specified processing logic
*/
public synchronized Topology build(final Properties props) {
internalStreamsBuilder.buildAndOptimizeTopology(props);
return topology;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,225 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.common.Metric;
import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
import java.util.Map;
/**
* The Kafka Streams metrics interface for adding metric sensors and collecting metric values.
*/
public interface StreamsMetrics {
/**
* Get read-only handle on global metrics registry.
*
* @return Map of all metrics.
*/
Map<MetricName, ? extends Metric> metrics();
/**
* Add a latency, rate and total sensor for a specific operation, which will include the following metrics:
* <ol>
* <li>average latency</li>
* <li>max latency</li>
* <li>invocation rate (num.operations / seconds)</li>
* <li>total invocation count</li>
* </ol>
* Whenever a user records this sensor via {@link Sensor#record(double)} etc, it will be counted as one invocation
* of the operation, and hence the rate / count metrics will be updated accordingly; and the recorded latency value
* will be used to update the average / max latency as well.
*
* Note that you can add more metrics to this sensor after you created it, which can then be updated upon
* {@link Sensor#record(double)} calls.
*
* The added sensor and its metrics can be removed with {@link #removeSensor(Sensor) removeSensor()}.
*
* @param scopeName name of the scope, which will be used as part of the metric type, e.g.: "stream-[scope]-metrics".
* @param entityName name of the entity, which will be used as part of the metric tags, e.g.: "[scope]-id" = "[entity]".
* @param operationName name of the operation, which will be used as the name of the metric, e.g.: "[operation]-latency-avg".
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
* @param tags additional tags of the sensor
* @return The added sensor.
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #removeSensor(Sensor)
* @see #addSensor(String, RecordingLevel, Sensor...)
*/
Sensor addLatencyRateTotalSensor(final String scopeName,
final String entityName,
final String operationName,
final Sensor.RecordingLevel recordingLevel,
final String... tags);
/**
* Add a rate and a total sensor for a specific operation, which will include the following metrics:
* <ol>
* <li>invocation rate (num.operations / time unit)</li>
* <li>total invocation count</li>
* </ol>
* Whenever a user records this sensor via {@link Sensor#record(double)} etc,
* it will be counted as one invocation of the operation, and hence the rate / count metrics will be updated accordingly.
*
* Note that you can add more metrics to this sensor after you created it, which can then be updated upon
* {@link Sensor#record(double)} calls.
*
* The added sensor and its metrics can be removed with {@link #removeSensor(Sensor) removeSensor()}.
*
* @param scopeName name of the scope, which will be used as part of the metrics type, e.g.: "stream-[scope]-metrics".
* @param entityName name of the entity, which will be used as part of the metric tags, e.g.: "[scope]-id" = "[entity]".
* @param operationName name of the operation, which will be used as the name of the metric, e.g.: "[operation]-total".
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
* @param tags additional tags of the sensor
* @return The added sensor.
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #removeSensor(Sensor)
* @see #addSensor(String, RecordingLevel, Sensor...)
*/
Sensor addRateTotalSensor(final String scopeName,
final String entityName,
final String operationName,
final Sensor.RecordingLevel recordingLevel,
final String... tags);
/**
* Add a latency and throughput sensor for a specific operation, which will include the following sensors:
* <ol>
* <li>average latency</li>
* <li>max latency</li>
* <li>throughput (num.operations / time unit)</li>
* </ol>
* Also create a parent sensor with the same metrics that aggregates all entities with the same operation under the
* same scope if it has not been created.
*
* @param scopeName name of the scope, could be the type of the state store, etc.
* @param entityName name of the entity, could be the name of the state store instance, etc.
* @param operationName name of the operation, could be get / put / delete / etc.
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
* @param tags additional tags of the sensor
* @return The added sensor.
* @deprecated since 2.5. Use {@link #addLatencyRateTotalSensor(String, String, String, Sensor.RecordingLevel, String...) addLatencyRateTotalSensor()}
* instead.
*/
@Deprecated
Sensor addLatencyAndThroughputSensor(final String scopeName,
final String entityName,
final String operationName,
final Sensor.RecordingLevel recordingLevel,
final String... tags);
/**
* Record the given latency value of the sensor.
* If the passed sensor includes throughput metrics, e.g., when created by the
* {@link #addLatencyAndThroughputSensor(String, String, String, Sensor.RecordingLevel, String...)} method, then the
* throughput metrics will also be recorded from this event.
*
* @param sensor sensor whose latency we are recording.
* @param startNs start of measurement time in nanoseconds.
* @param endNs end of measurement time in nanoseconds.
* @deprecated since 2.5. Use {@link Sensor#record(double) Sensor#record()} instead.
*/
@Deprecated
void recordLatency(final Sensor sensor,
final long startNs,
final long endNs);
/**
* Add a throughput sensor for a specific operation:
* <ol>
* <li>throughput (num.operations / time unit)</li>
* </ol>
* Also create a parent sensor with the same metrics that aggregates all entities with the same operation under the
* same scope if it has not been created.
* This sensor is a strict subset of the sensors created by
* {@link #addLatencyAndThroughputSensor(String, String, String, Sensor.RecordingLevel, String...)}.
*
* @param scopeName name of the scope, could be the type of the state store, etc.
* @param entityName name of the entity, could be the name of the state store instance, etc.
* @param operationName name of the operation, could be get / put / delete / etc.
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
* @param tags additional tags of the sensor
* @return The added sensor.
* @deprecated since 2.5. Use {@link #addRateTotalSensor(String, String, String, Sensor.RecordingLevel, String...)
* addRateTotalSensor()} instead.
*/
@Deprecated
Sensor addThroughputSensor(final String scopeName,
final String entityName,
final String operationName,
final Sensor.RecordingLevel recordingLevel,
final String... tags);
/**
* Record the throughput value of a sensor.
*
* @param sensor add Sensor whose throughput we are recording
* @param value throughput value
* @deprecated since 2.5. Use {@link Sensor#record() Sensor#record()} instead.
*/
@Deprecated
void recordThroughput(final Sensor sensor,
final long value);
/**
* Generic method to create a sensor.
* Note that for most cases it is advisable to use
* {@link #addRateTotalSensor(String, String, String, RecordingLevel, String...) addRateTotalSensor()}
* or {@link #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...) addLatencyRateTotalSensor()}
* to ensure metric name well-formedness and conformity with the rest of the Kafka Streams code base.
* However, if the above two methods are not sufficient, this method can also be used.
*
* @param name name of the sensor.
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor
* @return The added sensor.
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #removeSensor(Sensor)
*/
Sensor addSensor(final String name,
final Sensor.RecordingLevel recordingLevel);
/**
* Generic method to create a sensor with parent sensors.
* Note that for most cases it is advisable to use
* {@link #addRateTotalSensor(String, String, String, RecordingLevel, String...) addRateTotalSensor()}
* or {@link #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...) addLatencyRateTotalSensor()}
* to ensure metric name well-formedness and conformity with the rest of the Kafka Streams code base.
* However, if the above two methods are not sufficient, this method can also be used.
*
* @param name name of the sensor
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor
* @return The added sensor.
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #removeSensor(Sensor)
*/
Sensor addSensor(final String name,
final Sensor.RecordingLevel recordingLevel,
final Sensor... parents);
/**
* Remove a sensor.
* @param sensor sensor to be removed
*/
void removeSensor(final Sensor sensor);
}

View File

@@ -0,0 +1,772 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.streams.errors.TopologyException;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.StreamPartitioner;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.processor.TopicNameExtractor;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
import org.apache.kafka.streams.processor.internals.ProcessorNode;
import org.apache.kafka.streams.processor.internals.ProcessorTopology;
import org.apache.kafka.streams.processor.internals.SinkNode;
import org.apache.kafka.streams.processor.internals.SourceNode;
import org.apache.kafka.streams.state.StoreBuilder;
import java.util.regex.Pattern;
/**
* A logical representation of a {@link ProcessorTopology}.
* A topology is an acyclic graph of sources, processors, and sinks.
* A {@link SourceNode source} is a node in the graph that consumes one or more Kafka topics and forwards them to its
* successor nodes.
* A {@link Processor processor} is a node in the graph that receives input records from upstream nodes, processes the
* records, and optionally forwarding new records to one or all of its downstream nodes.
* Finally, a {@link SinkNode sink} is a node in the graph that receives records from upstream nodes and writes them to
* a Kafka topic.
* A {@code Topology} allows you to construct an acyclic graph of these nodes, and then passed into a new
* {@link KafkaStreams} instance that will then {@link KafkaStreams#start() begin consuming, processing, and producing
* records}.
*/
public class Topology {
final InternalTopologyBuilder internalTopologyBuilder = new InternalTopologyBuilder();
/**
* Sets the {@code auto.offset.reset} configuration when
* {@link #addSource(AutoOffsetReset, String, String...) adding a source processor} or when creating {@link KStream}
* or {@link KTable} via {@link StreamsBuilder}.
*/
public enum AutoOffsetReset {
EARLIEST, LATEST
}
/**
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final String name,
final String... topics) {
internalTopologyBuilder.addSource(null, name, null, null, null, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern
* and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final String name,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(null, name, null, null, null, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param offsetReset the auto offset reset policy to use for this source if no committed offsets found; acceptable values earliest or latest
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final String... topics) {
internalTopologyBuilder.addSource(offsetReset, name, null, null, null, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern
* and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param offsetReset the auto offset reset policy value for this source if no committed offsets found; acceptable values earliest or latest.
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(offsetReset, name, null, null, null, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final TimestampExtractor timestampExtractor,
final String name,
final String... topics) {
internalTopologyBuilder.addSource(null, name, timestampExtractor, null, null, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern
* and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final TimestampExtractor timestampExtractor,
final String name,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(null, name, timestampExtractor, null, null, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param offsetReset the auto offset reset policy to use for this source if no committed offsets found;
* acceptable values earliest or latest
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final TimestampExtractor timestampExtractor,
final String name,
final String... topics) {
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, null, null, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forward the records to child processor
* and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param offsetReset the auto offset reset policy value for this source if no committed offsets found;
* acceptable values earliest or latest.
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final TimestampExtractor timestampExtractor,
final String name,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, null, null, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forwards the records to child processor and/or sink nodes.
* The source will use the specified key and value deserializers.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final String name,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String... topics) {
internalTopologyBuilder.addSource(null, name, null, keyDeserializer, valueDeserializer, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
* and/or sink nodes.
* The source will use the specified key and value deserializers.
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
* topics that share the same key-value data format.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by name
*/
public synchronized Topology addSource(final String name,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(null, name, null, keyDeserializer, valueDeserializer, topicPattern);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
* and/or sink nodes.
* The source will use the specified key and value deserializers.
* The provided de-/serializers will be used for all the specified topics, so care should be taken when specifying
* topics that share the same key-value data format.
*
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
* acceptable values are earliest or latest
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by name
*/
@SuppressWarnings("overloads")
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String... topics) {
internalTopologyBuilder.addSource(offsetReset, name, null, keyDeserializer, valueDeserializer, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
* and/or sink nodes.
* The source will use the specified key and value deserializers.
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
* topics that share the same key-value data format.
*
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
* acceptable values are earliest or latest
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by name
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(offsetReset, name, null, keyDeserializer, valueDeserializer, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forwards the records to child processor and/or sink nodes.
* The source will use the specified key and value deserializers.
*
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
* acceptable values are earliest or latest.
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
@SuppressWarnings("overloads")
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final TimestampExtractor timestampExtractor,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String... topics) {
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, keyDeserializer, valueDeserializer, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
* and/or sink nodes.
* The source will use the specified key and value deserializers.
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
* topics that share the same key-value data format.
*
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
* acceptable values are earliest or latest
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by name
*/
@SuppressWarnings("overloads")
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final TimestampExtractor timestampExtractor,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, keyDeserializer, valueDeserializer, topicPattern);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its records
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and write to its topic
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized Topology addSink(final String name,
final String topic,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topic, null, null, null, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic,
* using the supplied partitioner.
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}.
* <p>
* The sink will also use the specified {@link StreamPartitioner} to determine how records are distributed among
* the named Kafka topic's partitions.
* Such control is often useful with topologies that use {@link #addStateStore(StoreBuilder, String...) state
* stores} in its processors.
* In most other cases, however, a partitioner needs not be specified and Kafka will automatically distribute
* records among partitions using Kafka's default partitioning logic.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its records
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and write to its topic
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final String topic,
final StreamPartitioner<? super K, ? super V> partitioner,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topic, null, null, partitioner, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
* The sink will use the specified key and value serializers.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its records
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and write to its topic
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final String topic,
final Serializer<K> keySerializer,
final Serializer<V> valueSerializer,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topic, keySerializer, valueSerializer, null, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
* The sink will use the specified key and value serializers, and the supplied partitioner.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its records
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and write to its topic
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final String topic,
final Serializer<K> keySerializer,
final Serializer<V> valueSerializer,
final StreamPartitioner<? super K, ? super V> partitioner,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topic, keySerializer, valueSerializer, partitioner, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
* The topics that it may ever send to should be pre-created.
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param name the unique name of the sink
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and dynamically write to topics
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final TopicNameExtractor<K, V> topicExtractor,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topicExtractor, null, null, null, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor},
* using the supplied partitioner.
* The topics that it may ever send to should be pre-created.
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}.
* <p>
* The sink will also use the specified {@link StreamPartitioner} to determine how records are distributed among
* the named Kafka topic's partitions.
* Such control is often useful with topologies that use {@link #addStateStore(StoreBuilder, String...) state
* stores} in its processors.
* In most other cases, however, a partitioner needs not be specified and Kafka will automatically distribute
* records among partitions using Kafka's default partitioning logic.
*
* @param name the unique name of the sink
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and dynamically write to topics
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final TopicNameExtractor<K, V> topicExtractor,
final StreamPartitioner<? super K, ? super V> partitioner,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topicExtractor, null, null, partitioner, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
* The topics that it may ever send to should be pre-created.
* The sink will use the specified key and value serializers.
*
* @param name the unique name of the sink
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and dynamically write to topics
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final TopicNameExtractor<K, V> topicExtractor,
final Serializer<K> keySerializer,
final Serializer<V> valueSerializer,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topicExtractor, keySerializer, valueSerializer, null, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
* The topics that it may ever send to should be pre-created.
* The sink will use the specified key and value serializers, and the supplied partitioner.
*
* @param name the unique name of the sink
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and dynamically write to topics
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final TopicNameExtractor<K, V> topicExtractor,
final Serializer<K> keySerializer,
final Serializer<V> valueSerializer,
final StreamPartitioner<? super K, ? super V> partitioner,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topicExtractor, keySerializer, valueSerializer, partitioner, parentNames);
return this;
}
/**
* Add a new processor node that receives and processes records output by one or more parent source or processor
* node.
* Any new record output by this processor will be forwarded to its child processor or sink nodes.
*
* @param name the unique name of the processor node
* @param supplier the supplier used to obtain this node's {@link Processor} instance
* @param parentNames the name of one or more source or processor nodes whose output records this processor should receive
* and process
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
*/
public synchronized Topology addProcessor(final String name,
final ProcessorSupplier supplier,
final String... parentNames) {
internalTopologyBuilder.addProcessor(name, supplier, parentNames);
return this;
}
/**
* Adds a state store.
*
* @param storeBuilder the storeBuilder used to obtain this state store {@link StateStore} instance
* @param processorNames the names of the processors that should be able to access the provided store
* @return itself
* @throws TopologyException if state store supplier is already added
*/
public synchronized Topology addStateStore(final StoreBuilder storeBuilder,
final String... processorNames) {
internalTopologyBuilder.addStateStore(storeBuilder, processorNames);
return this;
}
/**
* Adds a global {@link StateStore} to the topology.
* The {@link StateStore} sources its data from all partitions of the provided input topic.
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
* <p>
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
* of the input topic.
* <p>
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
* records forwarded from the {@link SourceNode}.
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param storeBuilder user defined state store builder
* @param sourceName name of the {@link SourceNode} that will be automatically added
* @param keyDeserializer the {@link Deserializer} to deserialize keys with
* @param valueDeserializer the {@link Deserializer} to deserialize values with
* @param topic the topic to source the data from
* @param processorName the name of the {@link ProcessorSupplier}
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
* @return itself
* @throws TopologyException if the processor of state is already registered
*/
@SuppressWarnings("unchecked")
public synchronized Topology addGlobalStore(final StoreBuilder storeBuilder,
final String sourceName,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String topic,
final String processorName,
final ProcessorSupplier stateUpdateSupplier) {
internalTopologyBuilder.addGlobalStore(storeBuilder, sourceName, null, keyDeserializer,
valueDeserializer, topic, processorName, stateUpdateSupplier);
return this;
}
/**
* Adds a global {@link StateStore} to the topology.
* The {@link StateStore} sources its data from all partitions of the provided input topic.
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
* <p>
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
* of the input topic.
* <p>
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
* records forwarded from the {@link SourceNode}.
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
*
* @param storeBuilder user defined key value store builder
* @param sourceName name of the {@link SourceNode} that will be automatically added
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param keyDeserializer the {@link Deserializer} to deserialize keys with
* @param valueDeserializer the {@link Deserializer} to deserialize values with
* @param topic the topic to source the data from
* @param processorName the name of the {@link ProcessorSupplier}
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
* @return itself
* @throws TopologyException if the processor of state is already registered
*/
@SuppressWarnings("unchecked")
public synchronized Topology addGlobalStore(final StoreBuilder storeBuilder,
final String sourceName,
final TimestampExtractor timestampExtractor,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String topic,
final String processorName,
final ProcessorSupplier stateUpdateSupplier) {
internalTopologyBuilder.addGlobalStore(storeBuilder, sourceName, timestampExtractor, keyDeserializer,
valueDeserializer, topic, processorName, stateUpdateSupplier);
return this;
}
/**
* Connects the processor and the state stores.
*
* @param processorName the name of the processor
* @param stateStoreNames the names of state stores that the processor uses
* @return itself
* @throws TopologyException if the processor or a state store is unknown
*/
public synchronized Topology connectProcessorAndStateStores(final String processorName,
final String... stateStoreNames) {
internalTopologyBuilder.connectProcessorAndStateStores(processorName, stateStoreNames);
return this;
}
/**
* Returns a description of the specified {@code Topology}.
*
* @return a description of the topology.
*/
public synchronized TopologyDescription describe() {
return internalTopologyBuilder.describe();
}
}

View File

@@ -0,0 +1,179 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.streams.processor.TopicNameExtractor;
import org.apache.kafka.streams.processor.internals.StreamTask;
import java.util.Set;
import java.util.regex.Pattern;
/**
* A meta representation of a {@link Topology topology}.
* <p>
* The nodes of a topology are grouped into {@link Subtopology sub-topologies} if they are connected.
* In contrast, two sub-topologies are not connected but can be linked to each other via topics, i.e., if one
* sub-topology {@link Topology#addSink(String, String, String...) writes} into a topic and another sub-topology
* {@link Topology#addSource(String, String...) reads} from the same topic.
* <p>
* When {@link KafkaStreams#start()} is called, different sub-topologies will be constructed and executed as independent
* {@link StreamTask tasks}.
*/
public interface TopologyDescription {
/**
* A connected sub-graph of a {@link Topology}.
* <p>
* Nodes of a {@code Subtopology} are connected {@link Topology#addProcessor(String,
* org.apache.kafka.streams.processor.ProcessorSupplier, String...) directly} or indirectly via
* {@link Topology#connectProcessorAndStateStores(String, String...) state stores}
* (i.e., if multiple processors share the same state).
*/
interface Subtopology {
/**
* Internally assigned unique ID.
* @return the ID of the sub-topology
*/
int id();
/**
* All nodes of this sub-topology.
* @return set of all nodes within the sub-topology
*/
Set<Node> nodes();
}
/**
* Represents a {@link Topology#addGlobalStore(org.apache.kafka.streams.state.StoreBuilder, String,
* org.apache.kafka.common.serialization.Deserializer, org.apache.kafka.common.serialization.Deserializer, String,
* String, org.apache.kafka.streams.processor.ProcessorSupplier) global store}.
* Adding a global store results in adding a source node and one stateful processor node.
* Note, that all added global stores form a single unit (similar to a {@link Subtopology}) even if different
* global stores are not connected to each other.
* Furthermore, global stores are available to all processors without connecting them explicitly, and thus global
* stores will never be part of any {@link Subtopology}.
*/
interface GlobalStore {
/**
* The source node reading from a "global" topic.
* @return the "global" source node
*/
Source source();
/**
* The processor node maintaining the global store.
* @return the "global" processor node
*/
Processor processor();
int id();
}
/**
* A node of a topology. Can be a source, sink, or processor node.
*/
interface Node {
/**
* The name of the node. Will never be {@code null}.
* @return the name of the node
*/
String name();
/**
* The predecessors of this node within a sub-topology.
* Note, sources do not have any predecessors.
* Will never be {@code null}.
* @return set of all predecessors
*/
Set<Node> predecessors();
/**
* The successor of this node within a sub-topology.
* Note, sinks do not have any successors.
* Will never be {@code null}.
* @return set of all successor
*/
Set<Node> successors();
}
/**
* A source node of a topology.
*/
interface Source extends Node {
/**
* The topic names this source node is reading from.
* @return comma separated list of topic names or pattern (as String)
* @deprecated use {@link #topicSet()} or {@link #topicPattern()} instead
*/
@Deprecated
String topics();
/**
* The topic names this source node is reading from.
* @return a set of topic names
*/
Set<String> topicSet();
/**
* The pattern used to match topic names that is reading from.
* @return the pattern used to match topic names
*/
Pattern topicPattern();
}
/**
* A processor node of a topology.
*/
interface Processor extends Node {
/**
* The names of all connected stores.
* @return set of store names
*/
Set<String> stores();
}
/**
* A sink node of a topology.
*/
interface Sink extends Node {
/**
* The topic name this sink node is writing to.
* Could be {@code null} if the topic name can only be dynamically determined based on {@link TopicNameExtractor}
* @return a topic name
*/
String topic();
/**
* The {@link TopicNameExtractor} class that this sink node uses to dynamically extract the topic name to write to.
* Could be {@code null} if the topic name is not dynamically determined.
* @return the {@link TopicNameExtractor} class used get the topic name
*/
TopicNameExtractor topicNameExtractor();
}
/**
* All sub-topologies of the represented topology.
* @return set of all sub-topologies
*/
Set<Subtopology> subtopologies();
/**
* All global stores of the represented topology.
* @return set of all global stores
*/
Set<GlobalStore> globalStores();
}

View File

@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates that none of the specified {@link org.apache.kafka.streams.StreamsConfig#BOOTSTRAP_SERVERS_CONFIG brokers}
* could be found.
*
* @see org.apache.kafka.streams.StreamsConfig
*/
public class BrokerNotFoundException extends StreamsException {
private final static long serialVersionUID = 1L;
public BrokerNotFoundException(final String message) {
super(message);
}
public BrokerNotFoundException(final String message, final Throwable throwable) {
super(message, throwable);
}
public BrokerNotFoundException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import java.util.Map;
import org.apache.kafka.clients.producer.ProducerRecord;
/**
* {@code ProductionExceptionHandler} that always instructs streams to fail when an exception
* happens while attempting to produce result records.
*/
public class DefaultProductionExceptionHandler implements ProductionExceptionHandler {
@Override
public ProductionExceptionHandlerResponse handle(final ProducerRecord<byte[], byte[]> record,
final Exception exception) {
return ProductionExceptionHandlerResponse.FAIL;
}
@Override
public void configure(final Map<String, ?> configs) {
// ignore
}
}

View File

@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.Configurable;
import org.apache.kafka.streams.processor.ProcessorContext;
/**
* Interface that specifies how an exception from source node deserialization
* (e.g., reading from Kafka) should be handled.
*/
public interface DeserializationExceptionHandler extends Configurable {
/**
* Inspect a record and the exception received.
* @param context processor context
* @param record record that failed deserialization
* @param exception the actual exception
*/
DeserializationHandlerResponse handle(final ProcessorContext context,
final ConsumerRecord<byte[], byte[]> record,
final Exception exception);
/**
* Enumeration that describes the response from the exception handler.
*/
enum DeserializationHandlerResponse {
/* continue with processing */
CONTINUE(0, "CONTINUE"),
/* fail the processing and stop */
FAIL(1, "FAIL");
/** an english description of the api--this is for debugging and can change */
public final String name;
/** the permanent and immutable id of an API--this can't change ever */
public final int id;
DeserializationHandlerResponse(final int id, final String name) {
this.id = id;
this.name = name;
}
}
}

View File

@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates that there was a problem when trying to access a
* {@link org.apache.kafka.streams.processor.StateStore StateStore}, i.e, the Store is no longer valid because it is
* closed or doesn't exist any more due to a rebalance.
* <p>
* These exceptions may be transient, i.e., during a rebalance it won't be possible to query the stores as they are
* being (re)-initialized. Once the rebalance has completed the stores will be available again. Hence, it is valid
* to backoff and retry when handling this exception.
*/
public class InvalidStateStoreException extends StreamsException {
private final static long serialVersionUID = 1L;
public InvalidStateStoreException(final String message) {
super(message);
}
public InvalidStateStoreException(final String message, final Throwable throwable) {
super(message, throwable);
}
public InvalidStateStoreException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates that the state store directory lock could not be acquired because another thread holds the lock.
*
* @see org.apache.kafka.streams.processor.StateStore
*/
public class LockException extends StreamsException {
private final static long serialVersionUID = 1L;
public LockException(final String message) {
super(message);
}
public LockException(final String message, final Throwable throwable) {
super(message, throwable);
}
public LockException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
/**
* Deserialization handler that logs a deserialization exception and then
* signals the processing pipeline to continue processing more records.
*/
public class LogAndContinueExceptionHandler implements DeserializationExceptionHandler {
private static final Logger log = LoggerFactory.getLogger(LogAndContinueExceptionHandler.class);
@Override
public DeserializationHandlerResponse handle(final ProcessorContext context,
final ConsumerRecord<byte[], byte[]> record,
final Exception exception) {
log.warn("Exception caught during Deserialization, " +
"taskId: {}, topic: {}, partition: {}, offset: {}",
context.taskId(), record.topic(), record.partition(), record.offset(),
exception);
return DeserializationHandlerResponse.CONTINUE;
}
@Override
public void configure(final Map<String, ?> configs) {
// ignore
}
}

View File

@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
/**
* Deserialization handler that logs a deserialization exception and then
* signals the processing pipeline to stop processing more records and fail.
*/
public class LogAndFailExceptionHandler implements DeserializationExceptionHandler {
private static final Logger log = LoggerFactory.getLogger(LogAndFailExceptionHandler.class);
@Override
public DeserializationHandlerResponse handle(final ProcessorContext context,
final ConsumerRecord<byte[], byte[]> record,
final Exception exception) {
log.error("Exception caught during Deserialization, " +
"taskId: {}, topic: {}, partition: {}, offset: {}",
context.taskId(), record.topic(), record.partition(), record.offset(),
exception);
return DeserializationHandlerResponse.FAIL;
}
@Override
public void configure(final Map<String, ?> configs) {
// ignore
}
}

View File

@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates a processor state operation (e.g. put, get) has failed.
*
* @see org.apache.kafka.streams.processor.StateStore
*/
public class ProcessorStateException extends StreamsException {
private final static long serialVersionUID = 1L;
public ProcessorStateException(final String message) {
super(message);
}
public ProcessorStateException(final String message, final Throwable throwable) {
super(message, throwable);
}
public ProcessorStateException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.Configurable;
/**
* Interface that specifies how an exception when attempting to produce a result to
* Kafka should be handled.
*/
public interface ProductionExceptionHandler extends Configurable {
/**
* Inspect a record that we attempted to produce, and the exception that resulted
* from attempting to produce it and determine whether or not to continue processing.
*
* @param record The record that failed to produce
* @param exception The exception that occurred during production
*/
ProductionExceptionHandlerResponse handle(final ProducerRecord<byte[], byte[]> record,
final Exception exception);
enum ProductionExceptionHandlerResponse {
/* continue processing */
CONTINUE(0, "CONTINUE"),
/* fail processing */
FAIL(1, "FAIL");
/**
* an english description of the api--this is for debugging and can change
*/
public final String name;
/**
* the permanent and immutable id of an API--this can't change ever
*/
public final int id;
ProductionExceptionHandlerResponse(final int id,
final String name) {
this.id = id;
this.name = name;
}
}
}

View File

@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.common.KafkaException;
/**
* {@link StreamsException} is the top-level exception type generated by Kafka Streams.
*/
public class StreamsException extends KafkaException {
private final static long serialVersionUID = 1L;
public StreamsException(final String message) {
super(message);
}
public StreamsException(final String message, final Throwable throwable) {
super(message, throwable);
}
public StreamsException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates a run time error incurred while trying to assign
* {@link org.apache.kafka.streams.processor.internals.StreamTask stream tasks} to
* {@link org.apache.kafka.streams.processor.internals.StreamThread threads}.
*/
public class TaskAssignmentException extends StreamsException {
private final static long serialVersionUID = 1L;
public TaskAssignmentException(final String message) {
super(message);
}
public TaskAssignmentException(final String message, final Throwable throwable) {
super(message, throwable);
}
public TaskAssignmentException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates a run time error incurred while trying parse the {@link org.apache.kafka.streams.processor.TaskId task id}
* from the read string.
*
* @see org.apache.kafka.streams.processor.internals.StreamTask
*/
public class TaskIdFormatException extends StreamsException {
private static final long serialVersionUID = 1L;
public TaskIdFormatException(final String message) {
super("Task id cannot be parsed correctly" + (message == null ? "" : " from " + message));
}
public TaskIdFormatException(final String message, final Throwable throwable) {
super("Task id cannot be parsed correctly" + (message == null ? "" : " from " + message), throwable);
}
public TaskIdFormatException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.streams.processor.internals.Task;
/**
* Indicates that a task got migrated to another thread.
* Thus, the task raising this exception can be cleaned up and closed as "zombie".
*/
public class TaskMigratedException extends StreamsException {
private final static long serialVersionUID = 1L;
private final Task task;
// this is for unit test only
public TaskMigratedException() {
super("A task has been migrated unexpectedly", null);
this.task = null;
}
public TaskMigratedException(final Task task,
final TopicPartition topicPartition,
final long endOffset,
final long pos) {
super(String.format("Log end offset of %s should not change while restoring: old end offset %d, current offset %d",
topicPartition,
endOffset,
pos),
null);
this.task = task;
}
public TaskMigratedException(final Task task) {
super(String.format("Task %s is unexpectedly closed during processing", task.id()), null);
this.task = task;
}
public TaskMigratedException(final Task task,
final Throwable throwable) {
super(String.format("Client request for task %s has been fenced due to a rebalance", task.id()), throwable);
this.task = task;
}
public Task migratedTask() {
return task;
}
}

View File

@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates a pre run time error occurred while parsing the {@link org.apache.kafka.streams.Topology logical topology}
* to construct the {@link org.apache.kafka.streams.processor.internals.ProcessorTopology physical processor topology}.
*/
public class TopologyException extends StreamsException {
private static final long serialVersionUID = 1L;
public TopologyException(final String message) {
super("Invalid topology" + (message == null ? "" : ": " + message));
}
public TopologyException(final String message,
final Throwable throwable) {
super("Invalid topology" + (message == null ? "" : ": " + message), throwable);
}
public TopologyException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.internals;
import java.time.Duration;
import java.time.Instant;
import static java.lang.String.format;
public final class ApiUtils {
private static final String MILLISECOND_VALIDATION_FAIL_MSG_FRMT = "Invalid value for parameter \"%s\" (value was: %s). ";
private static final String VALIDATE_MILLISECOND_NULL_SUFFIX = "It shouldn't be null.";
private static final String VALIDATE_MILLISECOND_OVERFLOW_SUFFIX = "It can't be converted to milliseconds.";
private ApiUtils() {
}
/**
* Validates that milliseconds from {@code duration} can be retrieved.
* @param duration Duration to check.
* @param messagePrefix Prefix text for an error message.
* @return Milliseconds from {@code duration}.
*/
public static long validateMillisecondDuration(final Duration duration, final String messagePrefix) {
try {
if (duration == null) {
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_NULL_SUFFIX);
}
return duration.toMillis();
} catch (final ArithmeticException e) {
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_OVERFLOW_SUFFIX, e);
}
}
/**
* Validates that milliseconds from {@code instant} can be retrieved.
* @param instant Instant to check.
* @param messagePrefix Prefix text for an error message.
* @return Milliseconds from {@code instant}.
*/
public static long validateMillisecondInstant(final Instant instant, final String messagePrefix) {
try {
if (instant == null) {
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_NULL_SUFFIX);
}
return instant.toEpochMilli();
} catch (final ArithmeticException e) {
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_OVERFLOW_SUFFIX, e);
}
}
/**
* Generates the prefix message for validateMillisecondXXXXXX() utility
* @param value Object to be converted to milliseconds
* @param name Object name
* @return Error message prefix to use in exception
*/
public static String prepareMillisCheckFailMsgPrefix(final Object value, final String name) {
return format(MILLISECOND_VALIDATION_FAIL_MSG_FRMT, name, value);
}
}

View File

@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.internals;
import org.apache.kafka.streams.StreamsConfig;
import java.util.Map;
/**
* A {@link StreamsConfig} that does not log its configuration on construction.
*
* This producer cleaner output for unit tests using the {@code test-utils},
* since logging the config is not really valuable in this context.
*/
public class QuietStreamsConfig extends StreamsConfig {
public QuietStreamsConfig(final Map<?, ?> props) {
super(props, false);
}
}

View File

@@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.internals.metrics;
import org.apache.kafka.common.metrics.Gauge;
import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
import org.apache.kafka.streams.KafkaStreams.State;
import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStream;
import java.util.Properties;
public class ClientMetrics {
private ClientMetrics() {}
private static final Logger log = LoggerFactory.getLogger(ClientMetrics.class);
private static final String VERSION = "version";
private static final String COMMIT_ID = "commit-id";
private static final String APPLICATION_ID = "application-id";
private static final String TOPOLOGY_DESCRIPTION = "topology-description";
private static final String STATE = "state";
private static final String VERSION_FROM_FILE;
private static final String COMMIT_ID_FROM_FILE;
private static final String DEFAULT_VALUE = "unknown";
static {
final Properties props = new Properties();
try (InputStream resourceStream = ClientMetrics.class.getResourceAsStream(
"/kafka/kafka-streams-version.properties")) {
props.load(resourceStream);
} catch (final Exception exception) {
log.warn("Error while loading kafka-streams-version.properties", exception);
}
VERSION_FROM_FILE = props.getProperty("version", DEFAULT_VALUE).trim();
COMMIT_ID_FROM_FILE = props.getProperty("commitId", DEFAULT_VALUE).trim();
}
private static final String VERSION_DESCRIPTION = "The version of the Kafka Streams client";
private static final String COMMIT_ID_DESCRIPTION = "The version control commit ID of the Kafka Streams client";
private static final String APPLICATION_ID_DESCRIPTION = "The application ID of the Kafka Streams client";
private static final String TOPOLOGY_DESCRIPTION_DESCRIPTION =
"The description of the topology executed in the Kafka Streams client";
private static final String STATE_DESCRIPTION = "The state of the Kafka Streams client";
public static String version() {
return VERSION_FROM_FILE;
}
public static String commitId() {
return COMMIT_ID_FROM_FILE;
}
public static void addVersionMetric(final StreamsMetricsImpl streamsMetrics) {
streamsMetrics.addClientLevelImmutableMetric(
VERSION,
VERSION_DESCRIPTION,
RecordingLevel.INFO,
VERSION_FROM_FILE
);
}
public static void addCommitIdMetric(final StreamsMetricsImpl streamsMetrics) {
streamsMetrics.addClientLevelImmutableMetric(
COMMIT_ID,
COMMIT_ID_DESCRIPTION,
RecordingLevel.INFO,
COMMIT_ID_FROM_FILE
);
}
public static void addApplicationIdMetric(final StreamsMetricsImpl streamsMetrics, final String applicationId) {
streamsMetrics.addClientLevelImmutableMetric(
APPLICATION_ID,
APPLICATION_ID_DESCRIPTION,
RecordingLevel.INFO,
applicationId
);
}
public static void addTopologyDescriptionMetric(final StreamsMetricsImpl streamsMetrics,
final String topologyDescription) {
streamsMetrics.addClientLevelImmutableMetric(
TOPOLOGY_DESCRIPTION,
TOPOLOGY_DESCRIPTION_DESCRIPTION,
RecordingLevel.INFO,
topologyDescription
);
}
public static void addStateMetric(final StreamsMetricsImpl streamsMetrics,
final Gauge<State> stateProvider) {
streamsMetrics.addClientLevelMutableMetric(
STATE,
STATE_DESCRIPTION,
RecordingLevel.INFO,
stateProvider
);
}
}

View File

@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code Aggregator} interface for aggregating values of the given key.
* This is a generalization of {@link Reducer} and allows to have different types for input value and aggregation
* result.
* {@code Aggregator} is used in combination with {@link Initializer} that provides an initial aggregation value.
* <p>
* {@code Aggregator} can be used to implement aggregation functions like count.
* @param <K> key type
* @param <V> input value type
* @param <VA> aggregate value type
* @see Initializer
* @see KGroupedStream#aggregate(Initializer, Aggregator)
* @see KGroupedStream#aggregate(Initializer, Aggregator, Materialized)
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator)
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator, Materialized)
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger)
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger, Materialized)
* @see Reducer
*/
public interface Aggregator<K, V, VA> {
/**
* Compute a new aggregate from the key and value of a record and the current aggregate of the same key.
*
* @param key the key of the record
* @param value the value of the record
* @param aggregate the current aggregate value
* @return the new aggregate value
*/
VA apply(final K key, final V value, final VA aggregate);
}

View File

@@ -0,0 +1,286 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.KeyValueStore;
/**
* {@code CogroupedKStream} is an abstraction of multiple <i>grouped</i> record streams of {@link KeyValue} pairs.
* <p>
* It is an intermediate representation after a grouping of {@link KStream}s, before the
* aggregations are applied to the new partitions resulting in a {@link KTable}.
* <p>
* A {@code CogroupedKStream} must be obtained from a {@link KGroupedStream} via
* {@link KGroupedStream#cogroup(Aggregator) cogroup(...)}.
*
* @param <K> Type of keys
* @param <VOut> Type of values after agg
*/
public interface CogroupedKStream<K, VOut> {
/**
* Add an already {@link KGroupedStream grouped KStream} to this {@code CogroupedKStream}.
* <p>
* The added {@link KGroupedStream grouped KStream} must have the same number of partitions as all existing
* streams of this {@code CogroupedKStream}.
* If this is not the case, you would need to call {@link KStream#through(String)} before
* {@link KStream#groupByKey() grouping} the {@link KStream}, using a pre-created topic with the "correct" number of
* partitions.
* <p>
* The specified {@link Aggregator} is applied in the actual {@link #aggregate(Initializer) aggregation} step for
* each input record and computes a new aggregate using the current aggregate (or for the very first record per key
* using the initial intermediate aggregation result provided via the {@link Initializer} that is passed into
* {@link #aggregate(Initializer)}) and the record's value.
*
* @param groupedStream a group stream
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param <VIn> Type of input values
* @return a {@code CogroupedKStream}
*/
<VIn> CogroupedKStream<K, VOut> cogroup(final KGroupedStream<K, VIn> groupedStream,
final Aggregator<? super K, ? super VIn, VOut> aggregator);
/**
* Aggregate the values of records in these streams by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* To compute the aggregation the corresponding {@link Aggregator} as specified in
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
* processed to provide an initial intermediate aggregation result that is used to process the first record.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
* same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
* the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is a generated value, and
* "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
* result. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
* represent the latest (rolling) aggregate for each key
*/
KTable<K, VOut> aggregate(final Initializer<VOut> initializer);
/**
* Aggregate the values of records in these streams by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* To compute the aggregation the corresponding {@link Aggregator} as specified in
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
* processed to provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Named} is applied once to the processor combining the grouped streams.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
* same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
* the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
* result. Cannot be {@code null}.
* @param named name the processor. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
* represent the latest (rolling) aggregate for each key
*/
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
final Named named);
/**
* Aggregate the values of records in these streams by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* To compute the aggregation the corresponding {@link Aggregator} as specified in
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
* processed to provide an initial intermediate aggregation result that is used to process the first record.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
* same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
* the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
* result. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store.
* Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
* represent the latest (rolling) aggregate for each key
*/
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
final Materialized<K, VOut, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in these streams by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* To compute the aggregation the corresponding {@link Aggregator} as specified in
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
* processed to provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Named} is used to name the processor combining the grouped streams.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
* same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
* the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
* result. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store.
* Cannot be {@code null}.
* @param named name the processors. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
* represent the latest (rolling) aggregate for each key
*/
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
final Named named,
final Materialized<K, VOut, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Create a new {@link TimeWindowedCogroupedKStream} instance that can be used to perform windowed
* aggregations.
*
* @param windows the specification of the aggregation {@link Windows}
* @param <W> the window type
* @return an instance of {@link TimeWindowedCogroupedKStream}
*/
<W extends Window> TimeWindowedCogroupedKStream<K, VOut> windowedBy(final Windows<W> windows);
/**
* Create a new {@link SessionWindowedCogroupedKStream} instance that can be used to perform session
* windowed aggregations.
*
* @param windows the specification of the aggregation {@link SessionWindows}
* @return an instance of {@link SessionWindowedCogroupedKStream}
*/
SessionWindowedCogroupedKStream<K, VOut> windowedBy(final SessionWindows windows);
}

View File

@@ -0,0 +1,230 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.processor.TimestampExtractor;
import java.util.Objects;
/**
* The {@code Consumed} class is used to define the optional parameters when using {@link StreamsBuilder} to
* build instances of {@link KStream}, {@link KTable}, and {@link GlobalKTable}.
* <p>
* For example, you can read a topic as {@link KStream} with a custom timestamp extractor and specify the corresponding
* key and value serdes like:
* <pre>{@code
* StreamsBuilder builder = new StreamsBuilder();
* KStream<String, Long> stream = builder.stream(
* "topicName",
* Consumed.with(Serdes.String(), Serdes.Long())
* .withTimestampExtractor(new LogAndSkipOnInvalidTimestamp()));
* }</pre>
* Similarly, you can read a topic as {@link KTable} with a custom {@code auto.offset.reset} configuration and force a
* state store {@link org.apache.kafka.streams.kstream.Materialized materialization} to access the content via
* interactive queries:
* <pre>{@code
* StreamsBuilder builder = new StreamsBuilder();
* KTable<Integer, Integer> table = builder.table(
* "topicName",
* Consumed.with(AutoOffsetReset.LATEST),
* Materialized.as("queryable-store-name"));
* }</pre>
*
* @param <K> type of record key
* @param <V> type of record value
*/
public class Consumed<K, V> implements NamedOperation<Consumed<K, V>> {
protected Serde<K> keySerde;
protected Serde<V> valueSerde;
protected TimestampExtractor timestampExtractor;
protected Topology.AutoOffsetReset resetPolicy;
protected String processorName;
private Consumed(final Serde<K> keySerde,
final Serde<V> valueSerde,
final TimestampExtractor timestampExtractor,
final Topology.AutoOffsetReset resetPolicy,
final String processorName) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
this.timestampExtractor = timestampExtractor;
this.resetPolicy = resetPolicy;
this.processorName = processorName;
}
/**
* Create an instance of {@link Consumed} from an existing instance.
* @param consumed the instance of {@link Consumed} to copy
*/
protected Consumed(final Consumed<K, V> consumed) {
this(consumed.keySerde,
consumed.valueSerde,
consumed.timestampExtractor,
consumed.resetPolicy,
consumed.processorName
);
}
/**
* Create an instance of {@link Consumed} with the supplied arguments. {@code null} values are acceptable.
*
* @param keySerde the key serde. If {@code null} the default key serde from config will be used
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde,
final TimestampExtractor timestampExtractor,
final Topology.AutoOffsetReset resetPolicy) {
return new Consumed<>(keySerde, valueSerde, timestampExtractor, resetPolicy, null);
}
/**
* Create an instance of {@link Consumed} with key and value {@link Serde}s.
*
* @param keySerde the key serde. If {@code null} the default key serde from config will be used
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Consumed<>(keySerde, valueSerde, null, null, null);
}
/**
* Create an instance of {@link Consumed} with a {@link TimestampExtractor}.
*
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> with(final TimestampExtractor timestampExtractor) {
return new Consumed<>(null, null, timestampExtractor, null, null);
}
/**
* Create an instance of {@link Consumed} with a {@link org.apache.kafka.streams.Topology.AutoOffsetReset Topology.AutoOffsetReset}.
*
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> with(final Topology.AutoOffsetReset resetPolicy) {
return new Consumed<>(null, null, null, resetPolicy, null);
}
/**
* Create an instance of {@link Consumed} with provided processor name.
*
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> as(final String processorName) {
return new Consumed<>(null, null, null, null, processorName);
}
/**
* Configure the instance of {@link Consumed} with a key {@link Serde}.
*
* @param keySerde the key serde. If {@code null}the default key serde from config will be used
* @return this
*/
public Consumed<K, V> withKeySerde(final Serde<K> keySerde) {
this.keySerde = keySerde;
return this;
}
/**
* Configure the instance of {@link Consumed} with a value {@link Serde}.
*
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
* @return this
*/
public Consumed<K, V> withValueSerde(final Serde<V> valueSerde) {
this.valueSerde = valueSerde;
return this;
}
/**
* Configure the instance of {@link Consumed} with a {@link TimestampExtractor}.
*
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
* @return this
*/
public Consumed<K, V> withTimestampExtractor(final TimestampExtractor timestampExtractor) {
this.timestampExtractor = timestampExtractor;
return this;
}
/**
* Configure the instance of {@link Consumed} with a {@link org.apache.kafka.streams.Topology.AutoOffsetReset Topology.AutoOffsetReset}.
*
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
* @return this
*/
public Consumed<K, V> withOffsetResetPolicy(final Topology.AutoOffsetReset resetPolicy) {
this.resetPolicy = resetPolicy;
return this;
}
/**
* Configure the instance of {@link Consumed} with a processor name.
*
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
* @return this
*/
@Override
public Consumed<K, V> withName(final String processorName) {
this.processorName = processorName;
return this;
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final Consumed<?, ?> consumed = (Consumed<?, ?>) o;
return Objects.equals(keySerde, consumed.keySerde) &&
Objects.equals(valueSerde, consumed.valueSerde) &&
Objects.equals(timestampExtractor, consumed.timestampExtractor) &&
resetPolicy == consumed.resetPolicy;
}
@Override
public int hashCode() {
return Objects.hash(keySerde, valueSerde, timestampExtractor, resetPolicy);
}
}

View File

@@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code ForeachAction} interface for performing an action on a {@link org.apache.kafka.streams.KeyValue key-value
* pair}.
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
* record of a stream.
* If stateful processing is required, consider using
* {@link KStream#process(org.apache.kafka.streams.processor.ProcessorSupplier, String...) KStream#process(...)}.
*
* @param <K> key type
* @param <V> value type
* @see KStream#foreach(ForeachAction)
*/
public interface ForeachAction<K, V> {
/**
* Perform an action for each record of a stream.
*
* @param key the key of the record
* @param value the value of the record
*/
void apply(final K key, final V value);
}

View File

@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
/**
* {@code GlobalKTable} is an abstraction of a <i>changelog stream</i> from a primary-keyed table.
* Each record in this changelog stream is an update on the primary-keyed table with the record key as the primary key.
* <p>
* {@code GlobalKTable} can only be used as right-hand side input for {@link KStream stream}-table joins.
* <p>
* In contrast to a {@link KTable} that is partitioned over all {@link KafkaStreams} instances, a {@code GlobalKTable}
* is fully replicated per {@link KafkaStreams} instance.
* Every partition of the underlying topic is consumed by each {@code GlobalKTable}, such that the full set of data is
* available in every {@link KafkaStreams} instance.
* This provides the ability to perform joins with {@link KStream} without having to repartition the input stream.
* All joins with the {@code GlobalKTable} require that a {@link KeyValueMapper} is provided that can map from the
* {@link KeyValue} of the left hand side {@link KStream} to the key of the right hand side {@code GlobalKTable}.
* <p>
* A {@code GlobalKTable} is created via a {@link StreamsBuilder}. For example:
* <pre>{@code
* builder.globalTable("topic-name", "queryable-store-name");
* }</pre>
* all {@code GlobalKTable}s are backed by a {@link ReadOnlyKeyValueStore} and are therefore queryable via the
* interactive queries API.
* For example:
* <pre>{@code
* final GlobalKTable globalOne = builder.globalTable("g1", "g1-store");
* final GlobalKTable globalTwo = builder.globalTable("g2", "g2-store");
* ...
* final KafkaStreams streams = ...;
* streams.start()
* ...
* ReadOnlyKeyValueStore view = streams.store("g1-store", QueryableStoreTypes.keyValueStore());
* view.get(key); // can be done on any key, as all keys are present
*}</pre>
* Note that in contrast to {@link KTable} a {@code GlobalKTable}'s state holds a full copy of the underlying topic,
* thus all keys can be queried locally.
* <p>
* Records from the source topic that have null keys are dropped.
*
* @param <K> Type of primary keys
* @param <V> Type of value changes
* @see KTable
* @see StreamsBuilder#globalTable(String)
* @see KStream#join(GlobalKTable, KeyValueMapper, ValueJoiner)
* @see KStream#leftJoin(GlobalKTable, KeyValueMapper, ValueJoiner)
*/
public interface GlobalKTable<K, V> {
/**
* Get the name of the local state store that can be used to query this {@code GlobalKTable}.
*
* @return the underlying state store name, or {@code null} if this {@code GlobalKTable} cannot be queried.
*/
String queryableStoreName();
}

View File

@@ -0,0 +1,159 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
/**
* The class that is used to capture the key and value {@link Serde}s and set the part of name used for
* repartition topics when performing {@link KStream#groupBy(KeyValueMapper, Grouped)}, {@link
* KStream#groupByKey(Grouped)}, or {@link KTable#groupBy(KeyValueMapper, Grouped)} operations. Note
* that Kafka Streams does not always create repartition topics for grouping operations.
*
* @param <K> the key type
* @param <V> the value type
*/
public class Grouped<K, V> implements NamedOperation<Grouped<K, V>> {
protected final Serde<K> keySerde;
protected final Serde<V> valueSerde;
protected final String name;
private Grouped(final String name,
final Serde<K> keySerde,
final Serde<V> valueSerde) {
this.name = name;
this.keySerde = keySerde;
this.valueSerde = valueSerde;
}
protected Grouped(final Grouped<K, V> grouped) {
this(grouped.name, grouped.keySerde, grouped.valueSerde);
}
/**
* Create a {@link Grouped} instance with the provided name used as part of the repartition topic if required.
*
* @param name the name used for a repartition topic if required
* @return a new {@link Grouped} configured with the name
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <K, V> Grouped<K, V> as(final String name) {
return new Grouped<>(name, null, null);
}
/**
* Create a {@link Grouped} instance with the provided keySerde. If {@code null} the default key serde from config will be used.
*
* @param keySerde the Serde used for serializing the key. If {@code null} the default key serde from config will be used
* @return a new {@link Grouped} configured with the keySerde
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <K> Grouped keySerde(final Serde<K> keySerde) {
return new Grouped<>(null, keySerde, null);
}
/**
* Create a {@link Grouped} instance with the provided valueSerde. If {@code null} the default value serde from config will be used.
*
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
* @return a new {@link Grouped} configured with the valueSerde
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <V> Grouped valueSerde(final Serde<V> valueSerde) {
return new Grouped<>(null, null, valueSerde);
}
/**
* Create a {@link Grouped} instance with the provided name, keySerde, and valueSerde. If the keySerde and/or the valueSerde is
* {@code null} the default value for the respective serde from config will be used.
*
* @param name the name used as part of the repartition topic name if required
* @param keySerde the {@link Serde} used for serializing the key. If {@code null} the default key serde from config will be used
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
* @return a new {@link Grouped} configured with the name, keySerde, and valueSerde
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <K, V> Grouped<K, V> with(final String name,
final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Grouped<>(name, keySerde, valueSerde);
}
/**
* Create a {@link Grouped} instance with the provided keySerde and valueSerde. If the keySerde and/or the valueSerde is
* {@code null} the default value for the respective serde from config will be used.
*
* @param keySerde the {@link Serde} used for serializing the key. If {@code null} the default key serde from config will be used
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
* @return a new {@link Grouped} configured with the keySerde, and valueSerde
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <K, V> Grouped<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Grouped<>(null, keySerde, valueSerde);
}
/**
* Perform the grouping operation with the name for a repartition topic if required. Note
* that Kafka Streams does not always create repartition topics for grouping operations.
*
* @param name the name used for the processor name and as part of the repartition topic name if required
* @return a new {@link Grouped} instance configured with the name
* */
@Override
public Grouped<K, V> withName(final String name) {
return new Grouped<>(name, keySerde, valueSerde);
}
/**
* Perform the grouping operation using the provided keySerde for serializing the key.
*
* @param keySerde {@link Serde} to use for serializing the key. If {@code null} the default key serde from config will be used
* @return a new {@link Grouped} instance configured with the keySerde
*/
public Grouped<K, V> withKeySerde(final Serde<K> keySerde) {
return new Grouped<>(name, keySerde, valueSerde);
}
/**
* Perform the grouping operation using the provided valueSerde for serializing the value.
*
* @param valueSerde {@link Serde} to use for serializing the value. If {@code null} the default value serde from config will be used
* @return a new {@link Grouped} instance configured with the valueSerde
*/
public Grouped<K, V> withValueSerde(final Serde<V> valueSerde) {
return new Grouped<>(name, keySerde, valueSerde);
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code Initializer} interface for creating an initial value in aggregations.
* {@code Initializer} is used in combination with {@link Aggregator}.
*
* @param <VA> aggregate value type
* @see Aggregator
* @see KGroupedStream#aggregate(Initializer, Aggregator)
* @see KGroupedStream#aggregate(Initializer, Aggregator, Materialized)
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator)
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator, Materialized)
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger)
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger, Materialized)
*/
public interface Initializer<VA> {
/**
* Return the initial value for an aggregation.
*
* @return the initial value for an aggregation
*/
VA apply();
}

View File

@@ -0,0 +1,309 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.processor.TimestampExtractor;
import java.time.Duration;
import java.util.Map;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
/**
* The window specifications used for joins.
* <p>
* A {@code JoinWindows} instance defines a maximum time difference for a {@link KStream#join(KStream, ValueJoiner,
* JoinWindows) join over two streams} on the same key.
* In SQL-style you would express this join as
* <pre>{@code
* SELECT * FROM stream1, stream2
* WHERE
* stream1.key = stream2.key
* AND
* stream1.ts - before <= stream2.ts AND stream2.ts <= stream1.ts + after
* }</pre>
* There are three different window configuration supported:
* <ul>
* <li>before = after = time-difference</li>
* <li>before = 0 and after = time-difference</li>
* <li>before = time-difference and after = 0</li>
* </ul>
* A join is symmetric in the sense, that a join specification on the first stream returns the same result record as
* a join specification on the second stream with flipped before and after values.
* <p>
* Both values (before and after) must not result in an "inverse" window, i.e., upper-interval bound cannot be smaller
* than lower-interval bound.
* <p>
* {@code JoinWindows} are sliding windows, thus, they are aligned to the actual record timestamps.
* This implies, that each input record defines its own window with start and end time being relative to the record's
* timestamp.
* <p>
* For time semantics, see {@link TimestampExtractor}.
*
* @see TimeWindows
* @see UnlimitedWindows
* @see SessionWindows
* @see KStream#join(KStream, ValueJoiner, JoinWindows)
* @see KStream#join(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows)
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows)
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see TimestampExtractor
*/
public final class JoinWindows extends Windows<Window> {
private final long maintainDurationMs;
/** Maximum time difference for tuples that are before the join tuple. */
public final long beforeMs;
/** Maximum time difference for tuples that are after the join tuple. */
public final long afterMs;
private final long graceMs;
private JoinWindows(final long beforeMs,
final long afterMs,
final long graceMs,
final long maintainDurationMs) {
if (beforeMs + afterMs < 0) {
throw new IllegalArgumentException("Window interval (ie, beforeMs+afterMs) must not be negative.");
}
this.afterMs = afterMs;
this.beforeMs = beforeMs;
this.graceMs = graceMs;
this.maintainDurationMs = maintainDurationMs;
}
@Deprecated // removing segments from Windows will fix this
private JoinWindows(final long beforeMs,
final long afterMs,
final long graceMs,
final long maintainDurationMs,
final int segments) {
super(segments);
if (beforeMs + afterMs < 0) {
throw new IllegalArgumentException("Window interval (ie, beforeMs+afterMs) must not be negative.");
}
this.afterMs = afterMs;
this.beforeMs = beforeMs;
this.graceMs = graceMs;
this.maintainDurationMs = maintainDurationMs;
}
/**
* Specifies that records of the same key are joinable if their timestamps are within {@code timeDifferenceMs},
* i.e., the timestamp of a record from the secondary stream is max {@code timeDifferenceMs} earlier or later than
* the timestamp of the record from the primary stream.
*
* @param timeDifferenceMs join window interval in milliseconds
* @throws IllegalArgumentException if {@code timeDifferenceMs} is negative
* @deprecated Use {@link #of(Duration)} instead.
*/
@Deprecated
public static JoinWindows of(final long timeDifferenceMs) throws IllegalArgumentException {
// This is a static factory method, so we initialize grace and retention to the defaults.
return new JoinWindows(timeDifferenceMs, timeDifferenceMs, -1L, DEFAULT_RETENTION_MS);
}
/**
* Specifies that records of the same key are joinable if their timestamps are within {@code timeDifference},
* i.e., the timestamp of a record from the secondary stream is max {@code timeDifference} earlier or later than
* the timestamp of the record from the primary stream.
*
* @param timeDifference join window interval
* @throws IllegalArgumentException if {@code timeDifference} is negative or can't be represented as {@code long milliseconds}
*/
public static JoinWindows of(final Duration timeDifference) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
return of(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
}
/**
* Changes the start window boundary to {@code timeDifferenceMs} but keep the end window boundary as is.
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
* {@code timeDifferenceMs} earlier than the timestamp of the record from the primary stream.
* {@code timeDifferenceMs} can be negative but its absolute value must not be larger than current window "after"
* value (which would result in a negative window size).
*
* @param timeDifferenceMs relative window start time in milliseconds
* @throws IllegalArgumentException if the resulting window size is negative
* @deprecated Use {@link #before(Duration)} instead.
*/
@Deprecated
public JoinWindows before(final long timeDifferenceMs) throws IllegalArgumentException {
return new JoinWindows(timeDifferenceMs, afterMs, graceMs, maintainDurationMs, segments);
}
/**
* Changes the start window boundary to {@code timeDifference} but keep the end window boundary as is.
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
* {@code timeDifference} earlier than the timestamp of the record from the primary stream.
* {@code timeDifference} can be negative but its absolute value must not be larger than current window "after"
* value (which would result in a negative window size).
*
* @param timeDifference relative window start time
* @throws IllegalArgumentException if the resulting window size is negative or {@code timeDifference} can't be represented as {@code long milliseconds}
*/
public JoinWindows before(final Duration timeDifference) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
return before(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
}
/**
* Changes the end window boundary to {@code timeDifferenceMs} but keep the start window boundary as is.
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
* {@code timeDifferenceMs} later than the timestamp of the record from the primary stream.
* {@code timeDifferenceMs} can be negative but its absolute value must not be larger than current window "before"
* value (which would result in a negative window size).
*
* @param timeDifferenceMs relative window end time in milliseconds
* @throws IllegalArgumentException if the resulting window size is negative
* @deprecated Use {@link #after(Duration)} instead
*/
@Deprecated
public JoinWindows after(final long timeDifferenceMs) throws IllegalArgumentException {
return new JoinWindows(beforeMs, timeDifferenceMs, graceMs, maintainDurationMs, segments);
}
/**
* Changes the end window boundary to {@code timeDifference} but keep the start window boundary as is.
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
* {@code timeDifference} later than the timestamp of the record from the primary stream.
* {@code timeDifference} can be negative but its absolute value must not be larger than current window "before"
* value (which would result in a negative window size).
*
* @param timeDifference relative window end time
* @throws IllegalArgumentException if the resulting window size is negative or {@code timeDifference} can't be represented as {@code long milliseconds}
*/
public JoinWindows after(final Duration timeDifference) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
return after(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
}
/**
* Not supported by {@code JoinWindows}.
* Throws {@link UnsupportedOperationException}.
*
* @throws UnsupportedOperationException at every invocation
*/
@Override
public Map<Long, Window> windowsFor(final long timestamp) {
throw new UnsupportedOperationException("windowsFor() is not supported by JoinWindows.");
}
@Override
public long size() {
return beforeMs + afterMs;
}
/**
* Reject out-of-order events that are delayed more than {@code afterWindowEnd}
* after the end of its window.
* <p>
* Delay is defined as (stream_time - record_timestamp).
*
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
* @return this updated builder
* @throws IllegalArgumentException if the {@code afterWindowEnd} is negative of can't be represented as {@code long milliseconds}
*/
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
public JoinWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
if (afterWindowEndMs < 0) {
throw new IllegalArgumentException("Grace period must not be negative.");
}
return new JoinWindows(beforeMs, afterMs, afterWindowEndMs, maintainDurationMs, segments);
}
@Override
public long gracePeriodMs() {
// NOTE: in the future, when we remove maintainMs,
// we should default the grace period to 24h to maintain the default behavior,
// or we can default to (24h - size) if you want to be super accurate.
return graceMs != -1 ? graceMs : maintainMs() - size();
}
/**
* @param durationMs the window retention time in milliseconds
* @return itself
* @throws IllegalArgumentException if {@code durationMs} is smaller than the window size
* @deprecated since 2.1. Use {@link JoinWindows#grace(Duration)} instead.
*/
@Override
@Deprecated
public JoinWindows until(final long durationMs) throws IllegalArgumentException {
if (durationMs < size()) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than the window size.");
}
return new JoinWindows(beforeMs, afterMs, graceMs, durationMs, segments);
}
/**
* {@inheritDoc}
* <p>
* For {@link TimeWindows} the maintain duration is at least as small as the window size.
*
* @return the window maintain duration
* @deprecated since 2.1. This function should not be used anymore, since {@link JoinWindows#until(long)}
* is deprecated in favor of {@link JoinWindows#grace(Duration)}.
*/
@Override
@Deprecated
public long maintainMs() {
return Math.max(maintainDurationMs, size());
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final JoinWindows that = (JoinWindows) o;
return beforeMs == that.beforeMs &&
afterMs == that.afterMs &&
maintainDurationMs == that.maintainDurationMs &&
segments == that.segments &&
graceMs == that.graceMs;
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public int hashCode() {
return Objects.hash(beforeMs, afterMs, graceMs, maintainDurationMs, segments);
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public String toString() {
return "JoinWindows{" +
"beforeMs=" + beforeMs +
", afterMs=" + afterMs +
", graceMs=" + graceMs +
", maintainDurationMs=" + maintainDurationMs +
", segments=" + segments +
'}';
}
}

View File

@@ -0,0 +1,233 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
/**
* The {@code Joined} class represents optional params that can be passed to
* {@link KStream#join(KTable, ValueJoiner, Joined) KStream#join(KTable,...)} and
* {@link KStream#leftJoin(KTable, ValueJoiner) KStream#leftJoin(KTable,...)} operations.
*/
public class Joined<K, V, VO> implements NamedOperation<Joined<K, V, VO>> {
protected final Serde<K> keySerde;
protected final Serde<V> valueSerde;
protected final Serde<VO> otherValueSerde;
protected final String name;
private Joined(final Serde<K> keySerde,
final Serde<V> valueSerde,
final Serde<VO> otherValueSerde,
final String name) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
this.otherValueSerde = otherValueSerde;
this.name = name;
}
protected Joined(final Joined<K, V, VO> joined) {
this(joined.keySerde, joined.valueSerde, joined.otherValueSerde, joined.name);
}
/**
* Create an instance of {@code Joined} with key, value, and otherValue {@link Serde} instances.
* {@code null} values are accepted and will be replaced by the default serdes as defined in config.
*
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be used
* @param valueSerde the value serde to use. If {@code null} the default value serde from config will be used
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde from config will be used
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance with the provided serdes
*/
public static <K, V, VO> Joined<K, V, VO> with(final Serde<K> keySerde,
final Serde<V> valueSerde,
final Serde<VO> otherValueSerde) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, null);
}
/**
* Create an instance of {@code Joined} with key, value, and otherValue {@link Serde} instances.
* {@code null} values are accepted and will be replaced by the default serdes as defined in
* config.
*
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be
* used
* @param valueSerde the value serde to use. If {@code null} the default value serde from config
* will be used
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde
* from config will be used
* @param name the name used as the base for naming components of the join including any
* repartition topics
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance with the provided serdes
*/
public static <K, V, VO> Joined<K, V, VO> with(final Serde<K> keySerde,
final Serde<V> valueSerde,
final Serde<VO> otherValueSerde,
final String name) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
/**
* Create an instance of {@code Joined} with a key {@link Serde}.
* {@code null} values are accepted and will be replaced by the default key serde as defined in config.
*
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be used
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the keySerde
*/
public static <K, V, VO> Joined<K, V, VO> keySerde(final Serde<K> keySerde) {
return new Joined<>(keySerde, null, null, null);
}
/**
* Create an instance of {@code Joined} with a value {@link Serde}.
* {@code null} values are accepted and will be replaced by the default value serde as defined in config.
*
* @param valueSerde the value serde to use. If {@code null} the default value serde from config will be used
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the valueSerde
*/
public static <K, V, VO> Joined<K, V, VO> valueSerde(final Serde<V> valueSerde) {
return new Joined<>(null, valueSerde, null, null);
}
/**
* Create an instance of {@code Joined} with an other value {@link Serde}.
* {@code null} values are accepted and will be replaced by the default value serde as defined in config.
*
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde from config will be used
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the otherValueSerde
*/
public static <K, V, VO> Joined<K, V, VO> otherValueSerde(final Serde<VO> otherValueSerde) {
return new Joined<>(null, null, otherValueSerde, null);
}
/**
* Create an instance of {@code Joined} with base name for all components of the join, this may
* include any repartition topics created to complete the join.
*
* @param name the name used as the base for naming components of the join including any
* repartition topics
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the name
*
* @deprecated use {@link #as(String)} instead
*/
@Deprecated
public static <K, V, VO> Joined<K, V, VO> named(final String name) {
return new Joined<>(null, null, null, name);
}
/**
* Create an instance of {@code Joined} with base name for all components of the join, this may
* include any repartition topics created to complete the join.
*
* @param name the name used as the base for naming components of the join including any
* repartition topics
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the name
*
*/
public static <K, V, VO> Joined<K, V, VO> as(final String name) {
return new Joined<>(null, null, null, name);
}
/**
* Set the key {@link Serde} to be used. Null values are accepted and will be replaced by the default
* key serde as defined in config
*
* @param keySerde the key serde to use. If null the default key serde from config will be used
* @return new {@code Joined} instance configured with the {@code name}
*/
public Joined<K, V, VO> withKeySerde(final Serde<K> keySerde) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
/**
* Set the value {@link Serde} to be used. Null values are accepted and will be replaced by the default
* value serde as defined in config
*
* @param valueSerde the value serde to use. If null the default value serde from config will be used
* @return new {@code Joined} instance configured with the {@code valueSerde}
*/
public Joined<K, V, VO> withValueSerde(final Serde<V> valueSerde) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
/**
* Set the otherValue {@link Serde} to be used. Null values are accepted and will be replaced by the default
* value serde as defined in config
*
* @param otherValueSerde the otherValue serde to use. If null the default value serde from config will be used
* @return new {@code Joined} instance configured with the {@code valueSerde}
*/
public Joined<K, V, VO> withOtherValueSerde(final Serde<VO> otherValueSerde) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
/**
* Set the base name used for all components of the join, this may include any repartition topics
* created to complete the join.
*
* @param name the name used as the base for naming components of the join including any
* repartition topics
* @return new {@code Joined} instance configured with the {@code name}
*/
@Override
public Joined<K, V, VO> withName(final String name) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
public Serde<K> keySerde() {
return keySerde;
}
public Serde<V> valueSerde() {
return valueSerde;
}
public Serde<VO> otherValueSerde() {
return otherValueSerde;
}
/**
* @deprecated this method will be removed in a in a future release
*/
@Deprecated
public String name() {
return name;
}
}

View File

@@ -0,0 +1,556 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.KeyValueStore;
/**
* {@code KGroupedStream} is an abstraction of a <i>grouped</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation of a {@link KStream} in order to apply an aggregation operation on the original
* {@link KStream} records.
* <p>
* It is an intermediate representation after a grouping of a {@link KStream} before an aggregation is applied to the
* new partitions resulting in a {@link KTable}.
* <p>
* A {@code KGroupedStream} must be obtained from a {@link KStream} via {@link KStream#groupByKey() groupByKey()} or
* {@link KStream#groupBy(KeyValueMapper) groupBy(...)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
*/
public interface KGroupedStream<K, V> {
/**
* Count the number of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count();
/**
* Count the number of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology
*
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Named named);
/**
* Count the number of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* provided by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
* <pre>{@code
* KafkaStreams streams = ... // counting words
* String queryableStoreName = "storeName"; // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String,Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Count the number of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* provided by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
* <pre>{@code
* KafkaStreams streams = ... // counting words
* String queryableStoreName = "storeName"; // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String,Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Named named,
final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator)}).
* <p>
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
* aggregate and the record's value.
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer)} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key. If the reduce function returns {@code null}, it is then interpreted as
* deletion for the key, and future messages of the same key coming from upstream operators
* will be handled as newly initialized value.
*/
KTable<K, V> reduce(final Reducer<V> reducer);
/**
* Combine the value of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Materialized)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* provided by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
* aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* <p>
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Materialized)} can be used to compute aggregate functions like sum, min, or
* max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long sumForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
KTable<K, V> reduce(final Reducer<V> reducer,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Combine the value of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Materialized)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* provided by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
* aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* <p>
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Materialized)} can be used to compute aggregate functions like sum, min, or
* max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long sumForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology.
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key. If the reduce function returns {@code null}, it is then interpreted as
* deletion for the key, and future messages of the same key coming from upstream operators
* will be handled as newly initialized value.
*/
KTable<K, V> reduce(final Reducer<V> reducer,
final Named named,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate(Initializer, Aggregator)} can be used to compute aggregate functions like
* count (c.f. {@link #count()}).
* <p>
* The default value serde from config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Aggregator, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param <VR> the value type of the resulting {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key. If the aggregate function returns {@code null}, it is then interpreted as
* deletion for the key, and future messages of the same key coming from upstream operators
* will be handled as newly initialized value.
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator);
/**
* Aggregate the values of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate(Initializer, Aggregator, Materialized)} can be used to compute aggregate functions like
* count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate(Initializer, Aggregator, Materialized)} can be used to compute aggregate functions like
* count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key. If the aggregate function returns {@code null}, it is then interpreted as
* deletion for the key, and future messages of the same key coming from upstream operators
* will be handled as newly initialized value.
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Named named,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Create a new {@link TimeWindowedKStream} instance that can be used to perform windowed aggregations.
* @param windows the specification of the aggregation {@link Windows}
* @param <W> the window type
* @return an instance of {@link TimeWindowedKStream}
*/
<W extends Window> TimeWindowedKStream<K, V> windowedBy(final Windows<W> windows);
/**
* Create a new {@link SessionWindowedKStream} instance that can be used to perform session windowed aggregations.
* @param windows the specification of the aggregation {@link SessionWindows}
* @return an instance of {@link TimeWindowedKStream}
*/
SessionWindowedKStream<K, V> windowedBy(final SessionWindows windows);
/**
* Create a new {@link CogroupedKStream} from the this grouped KStream to allow cogrouping other
* {@code KGroupedStream} to it.
* {@link CogroupedKStream} is an abstraction of multiple <i>grouped</i> record streams of {@link KeyValue} pairs.
* It is an intermediate representation after a grouping of {@link KStream}s, before the
* aggregations are applied to the new partitions resulting in a {@link KTable}.
* <p>
* The specified {@link Aggregator} is applied in the actual {@link CogroupedKStream#aggregate(Initializer)
* aggregation} step for each input record and computes a new aggregate using the current aggregate (or for the very
* first record per key using the initial intermediate aggregation result provided via the {@link Initializer} that
* is passed into {@link CogroupedKStream#aggregate(Initializer)}) and the record's value.
*
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param <Vout> the type of the output values
* @return a {@link CogroupedKStream}
*/
<Vout> CogroupedKStream<K, Vout> cogroup(final Aggregator<? super K, ? super V, Vout> aggregator);
}

View File

@@ -0,0 +1,699 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.KeyValueStore;
/**
* {@code KGroupedTable} is an abstraction of a <i>re-grouped changelog stream</i> from a primary-keyed table,
* usually on a different grouping key than the original primary key.
* <p>
* It is an intermediate representation after a re-grouping of a {@link KTable} before an aggregation is applied to the
* new partitions resulting in a new {@link KTable}.
* <p>
* A {@code KGroupedTable} must be obtained from a {@link KTable} via {@link KTable#groupBy(KeyValueMapper)
* groupBy(...)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KTable
*/
public interface KGroupedTable<K, V> {
/**
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
* the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
* the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named the {@link Named} config used to name the processor in the topology
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Named named, final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
* the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count();
/**
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
* the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named the {@link Named} config used to name the processor in the topology
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Named named);
/**
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
* current aggregate (first argument) and the record's value (second argument) by adding the new record to the
* aggregate.
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate (first argument) and the record's value (second
* argument) by "removing" the "replaced" record from the aggregate.
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Reducer, String)} can be used to compute aggregate functions like sum.
* For sum, the adder and subtractor would work as follows:
* <pre>{@code
* public class SumAdder implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer newValue) {
* return currentAgg + newValue;
* }
* }
*
* public class SumSubtractor implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer oldValue) {
* return currentAgg - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param adder a {@link Reducer} that adds a new value to the aggregate result
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
* current aggregate (first argument) and the record's value (second argument) by adding the new record to the
* aggregate.
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate (first argument) and the record's value (second
* argument) by "removing" the "replaced" record from the aggregate.
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Reducer, String)} can be used to compute aggregate functions like sum.
* For sum, the adder and subtractor would work as follows:
* <pre>{@code
* public class SumAdder implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer newValue) {
* return currentAgg + newValue;
* }
* }
*
* public class SumSubtractor implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer oldValue) {
* return currentAgg - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param adder a {@link Reducer} that adds a new value to the aggregate result
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor,
final Named named,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
* current aggregate and the record's value by adding the new record to the aggregate.
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Reducer)} can be used to compute aggregate functions like sum.
* For sum, the adder and subtractor would work as follows:
* <pre>{@code
* public class SumAdder implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer newValue) {
* return currentAgg + newValue;
* }
* }
*
* public class SumSubtractor implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer oldValue) {
* return currentAgg - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param adder a {@link Reducer} that adds a new value to the aggregate result
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor);
/**
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer, Materialized) combining via reduce(...)} as it,
* for example, allows the result to have a different type than the input values.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, Materialized)} can be used to compute aggregate functions
* like sum.
* For sum, the initializer, adder, and subtractor would work as follows:
* <pre>{@code
* // in this example, LongSerde.class must be set as value serde in Materialized#withValueSerde
* public class SumInitializer implements Initializer<Long> {
* public Long apply() {
* return 0L;
* }
* }
*
* public class SumAdder implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer newValue, Long aggregate) {
* return aggregate + newValue;
* }
* }
*
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer oldValue, Long aggregate) {
* return aggregate - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that provides an initial aggregate result value
* @param adder an {@link Aggregator} that adds a new record to the aggregate result
* @param subtractor an {@link Aggregator} that removed an old record from the aggregate result
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @param <VR> the value type of the aggregated {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer, Materialized) combining via reduce(...)} as it,
* for example, allows the result to have a different type than the input values.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, Materialized)} can be used to compute aggregate functions
* like sum.
* For sum, the initializer, adder, and subtractor would work as follows:
* <pre>{@code
* // in this example, LongSerde.class must be set as value serde in Materialized#withValueSerde
* public class SumInitializer implements Initializer<Long> {
* public Long apply() {
* return 0L;
* }
* }
*
* public class SumAdder implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer newValue, Long aggregate) {
* return aggregate + newValue;
* }
* }
*
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer oldValue, Long aggregate) {
* return aggregate - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that provides an initial aggregate result value
* @param adder an {@link Aggregator} that adds a new record to the aggregate result
* @param subtractor an {@link Aggregator} that removed an old record from the aggregate result
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @param <VR> the value type of the aggregated {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor,
final Named named,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable} using default serializers and deserializers.
* Records with {@code null} key are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer) combining via reduce(...)} as it,
* for example, allows the result to have a different type than the input values.
* If the result value type does not match the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value
* serde} you should use {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, String)} can be used to compute aggregate functions
* like sum.
* For sum, the initializer, adder, and subtractor would work as follows:
* <pre>{@code
* // in this example, LongSerde.class must be set as default value serde in StreamsConfig
* public class SumInitializer implements Initializer<Long> {
* public Long apply() {
* return 0L;
* }
* }
*
* public class SumAdder implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer newValue, Long aggregate) {
* return aggregate + newValue;
* }
* }
*
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer oldValue, Long aggregate) {
* return aggregate - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer a {@link Initializer} that provides an initial aggregate result value
* @param adder a {@link Aggregator} that adds a new record to the aggregate result
* @param subtractor a {@link Aggregator} that removed an old record from the aggregate result
* @param <VR> the value type of the aggregated {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor);
/**
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable} using default serializers and deserializers.
* Records with {@code null} key are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer) combining via reduce(...)} as it,
* for example, allows the result to have a different type than the input values.
* If the result value type does not match the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value
* serde} you should use {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, String)} can be used to compute aggregate functions
* like sum.
* For sum, the initializer, adder, and subtractor would work as follows:
* <pre>{@code
* // in this example, LongSerde.class must be set as default value serde in StreamsConfig
* public class SumInitializer implements Initializer<Long> {
* public Long apply() {
* return 0L;
* }
* }
*
* public class SumAdder implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer newValue, Long aggregate) {
* return aggregate + newValue;
* }
* }
*
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer oldValue, Long aggregate) {
* return aggregate - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer a {@link Initializer} that provides an initial aggregate result value
* @param adder a {@link Aggregator} that adds a new record to the aggregate result
* @param subtractor a {@link Aggregator} that removed an old record from the aggregate result
* @param named a {@link Named} config used to name the processor in the topology
* @param <VR> the value type of the aggregated {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor,
final Named named);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.KeyValue;
/**
* The {@code KeyValueMapper} interface for mapping a {@link KeyValue key-value pair} to a new value of arbitrary type.
* For example, it can be used to
* <ul>
* <li>map from an input {@link KeyValue} pair to an output {@link KeyValue} pair with different key and/or value type
* (for this case output type {@code VR == }{@link KeyValue KeyValue&lt;NewKeyType,NewValueType&gt;})</li>
* <li>map from an input record to a new key (with arbitrary key type as specified by {@code VR})</li>
* </ul>
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
* record of a stream (cf. {@link Transformer} for stateful record transformation).
* {@code KeyValueMapper} is a generalization of {@link ValueMapper}.
*
* @param <K> key type
* @param <V> value type
* @param <VR> mapped value type
* @see ValueMapper
* @see Transformer
* @see KStream#map(KeyValueMapper)
* @see KStream#flatMap(KeyValueMapper)
* @see KStream#selectKey(KeyValueMapper)
* @see KStream#groupBy(KeyValueMapper)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper)
* @see KTable#groupBy(KeyValueMapper, Grouped)
* @see KTable#toStream(KeyValueMapper)
*/
public interface KeyValueMapper<K, V, VR> {
/**
* Map a record with the given key and value to a new value.
*
* @param key the key of the record
* @param value the value of the record
* @return the new value
*/
VR apply(final K key, final V value);
}

View File

@@ -0,0 +1,261 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.state.KeyValueBytesStoreSupplier;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
import org.apache.kafka.streams.state.SessionStore;
import org.apache.kafka.streams.state.StoreSupplier;
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
import org.apache.kafka.streams.state.WindowStore;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
/**
* Used to describe how a {@link StateStore} should be materialized.
* You can either provide a custom {@link StateStore} backend through one of the provided methods accepting a supplier
* or use the default RocksDB backends by providing just a store name.
* <p>
* For example, you can read a topic as {@link KTable} and force a state store materialization to access the content
* via Interactive Queries API:
* <pre>{@code
* StreamsBuilder builder = new StreamsBuilder();
* KTable<Integer, Integer> table = builder.table(
* "topicName",
* Materialized.as("queryable-store-name"));
* }</pre>
*
* @param <K> type of record key
* @param <V> type of record value
* @param <S> type of state store (note: state stores always have key/value types {@code <Bytes,byte[]>}
*
* @see org.apache.kafka.streams.state.Stores
*/
public class Materialized<K, V, S extends StateStore> {
protected StoreSupplier<S> storeSupplier;
protected String storeName;
protected Serde<V> valueSerde;
protected Serde<K> keySerde;
protected boolean loggingEnabled = true;
protected boolean cachingEnabled = true;
protected Map<String, String> topicConfig = new HashMap<>();
protected Duration retention;
private Materialized(final StoreSupplier<S> storeSupplier) {
this.storeSupplier = storeSupplier;
}
private Materialized(final String storeName) {
this.storeName = storeName;
}
/**
* Copy constructor.
* @param materialized the {@link Materialized} instance to copy.
*/
protected Materialized(final Materialized<K, V, S> materialized) {
this.storeSupplier = materialized.storeSupplier;
this.storeName = materialized.storeName;
this.keySerde = materialized.keySerde;
this.valueSerde = materialized.valueSerde;
this.loggingEnabled = materialized.loggingEnabled;
this.cachingEnabled = materialized.cachingEnabled;
this.topicConfig = materialized.topicConfig;
this.retention = materialized.retention;
}
/**
* Materialize a {@link StateStore} with the given name.
*
* @param storeName the name of the underlying {@link KTable} state store; valid characters are ASCII
* alphanumerics, '.', '_' and '-'.
* @param <K> key type of the store
* @param <V> value type of the store
* @param <S> type of the {@link StateStore}
* @return a new {@link Materialized} instance with the given storeName
*/
public static <K, V, S extends StateStore> Materialized<K, V, S> as(final String storeName) {
Named.validate(storeName);
return new Materialized<>(storeName);
}
/**
* Materialize a {@link WindowStore} using the provided {@link WindowBytesStoreSupplier}.
*
* Important: Custom subclasses are allowed here, but they should respect the retention contract:
* Window stores are required to retain windows at least as long as (window size + window grace period).
* Stores constructed via {@link org.apache.kafka.streams.state.Stores} already satisfy this contract.
*
* @param supplier the {@link WindowBytesStoreSupplier} used to materialize the store
* @param <K> key type of the store
* @param <V> value type of the store
* @return a new {@link Materialized} instance with the given supplier
*/
public static <K, V> Materialized<K, V, WindowStore<Bytes, byte[]>> as(final WindowBytesStoreSupplier supplier) {
Objects.requireNonNull(supplier, "supplier can't be null");
return new Materialized<>(supplier);
}
/**
* Materialize a {@link SessionStore} using the provided {@link SessionBytesStoreSupplier}.
*
* Important: Custom subclasses are allowed here, but they should respect the retention contract:
* Session stores are required to retain windows at least as long as (session inactivity gap + session grace period).
* Stores constructed via {@link org.apache.kafka.streams.state.Stores} already satisfy this contract.
*
* @param supplier the {@link SessionBytesStoreSupplier} used to materialize the store
* @param <K> key type of the store
* @param <V> value type of the store
* @return a new {@link Materialized} instance with the given sup
* plier
*/
public static <K, V> Materialized<K, V, SessionStore<Bytes, byte[]>> as(final SessionBytesStoreSupplier supplier) {
Objects.requireNonNull(supplier, "supplier can't be null");
return new Materialized<>(supplier);
}
/**
* Materialize a {@link KeyValueStore} using the provided {@link KeyValueBytesStoreSupplier}.
*
* @param supplier the {@link KeyValueBytesStoreSupplier} used to materialize the store
* @param <K> key type of the store
* @param <V> value type of the store
* @return a new {@link Materialized} instance with the given supplier
*/
public static <K, V> Materialized<K, V, KeyValueStore<Bytes, byte[]>> as(final KeyValueBytesStoreSupplier supplier) {
Objects.requireNonNull(supplier, "supplier can't be null");
return new Materialized<>(supplier);
}
/**
* Materialize a {@link StateStore} with the provided key and value {@link Serde}s.
* An internal name will be used for the store.
*
* @param keySerde the key {@link Serde} to use. If the {@link Serde} is null, then the default key
* serde from configs will be used
* @param valueSerde the value {@link Serde} to use. If the {@link Serde} is null, then the default value
* serde from configs will be used
* @param <K> key type
* @param <V> value type
* @param <S> store type
* @return a new {@link Materialized} instance with the given key and value serdes
*/
public static <K, V, S extends StateStore> Materialized<K, V, S> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Materialized<K, V, S>((String) null).withKeySerde(keySerde).withValueSerde(valueSerde);
}
/**
* Set the valueSerde the materialized {@link StateStore} will use.
*
* @param valueSerde the value {@link Serde} to use. If the {@link Serde} is null, then the default value
* serde from configs will be used. If the serialized bytes is null for put operations,
* it is treated as delete operation
* @return itself
*/
public Materialized<K, V, S> withValueSerde(final Serde<V> valueSerde) {
this.valueSerde = valueSerde;
return this;
}
/**
* Set the keySerde the materialize {@link StateStore} will use.
* @param keySerde the key {@link Serde} to use. If the {@link Serde} is null, then the default key
* serde from configs will be used
* @return itself
*/
public Materialized<K, V, S> withKeySerde(final Serde<K> keySerde) {
this.keySerde = keySerde;
return this;
}
/**
* Indicates that a changelog should be created for the store. The changelog will be created
* with the provided configs.
* <p>
* Note: Any unrecognized configs will be ignored.
* @param config any configs that should be applied to the changelog
* @return itself
*/
public Materialized<K, V, S> withLoggingEnabled(final Map<String, String> config) {
loggingEnabled = true;
this.topicConfig = config;
return this;
}
/**
* Disable change logging for the materialized {@link StateStore}.
* @return itself
*/
public Materialized<K, V, S> withLoggingDisabled() {
loggingEnabled = false;
this.topicConfig.clear();
return this;
}
/**
* Enable caching for the materialized {@link StateStore}.
* @return itself
*/
public Materialized<K, V, S> withCachingEnabled() {
cachingEnabled = true;
return this;
}
/**
* Disable caching for the materialized {@link StateStore}.
* @return itself
*/
public Materialized<K, V, S> withCachingDisabled() {
cachingEnabled = false;
return this;
}
/**
* Configure retention period for window and session stores. Ignored for key/value stores.
*
* Overridden by pre-configured store suppliers
* ({@link Materialized#as(SessionBytesStoreSupplier)} or {@link Materialized#as(WindowBytesStoreSupplier)}).
*
* Note that the retention period must be at least long enough to contain the windowed data's entire life cycle,
* from window-start through window-end, and for the entire grace period.
*
* @param retention the retention time
* @return itself
* @throws IllegalArgumentException if retention is negative or can't be represented as {@code long milliseconds}
*/
public Materialized<K, V, S> withRetention(final Duration retention) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(retention, "retention");
final long retenationMs = ApiUtils.validateMillisecondDuration(retention, msgPrefix);
if (retenationMs < 0) {
throw new IllegalArgumentException("Retention must not be negative.");
}
this.retention = retention;
return this;
}
}

View File

@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The interface for merging aggregate values for {@link SessionWindows} with the given key.
*
* @param <K> key type
* @param <V> aggregate value type
*/
public interface Merger<K, V> {
/**
* Compute a new aggregate from the key and two aggregates.
*
* @param aggKey the key of the record
* @param aggOne the first aggregate
* @param aggTwo the second aggregate
* @return the new aggregate value
*/
V apply(final K aggKey, final V aggOne, final V aggTwo);
}

View File

@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.errors.TopologyException;
import java.util.Objects;
public class Named implements NamedOperation<Named> {
private static final int MAX_NAME_LENGTH = 249;
protected String name;
protected Named(final Named named) {
this(Objects.requireNonNull(named, "named can't be null").name);
}
protected Named(final String name) {
this.name = name;
if (name != null) {
validate(name);
}
}
/**
* Create a Named instance with provided name.
*
* @param name the processor name to be used. If {@code null} a default processor name will be generated.
* @return A new {@link Named} instance configured with name
*
* @throws TopologyException if an invalid name is specified; valid characters are ASCII alphanumerics, '.', '_' and '-'.
*/
public static Named as(final String name) {
Objects.requireNonNull(name, "name can't be null");
return new Named(name);
}
@Override
public Named withName(final String name) {
return new Named(name);
}
protected static void validate(final String name) {
if (name.isEmpty())
throw new TopologyException("Name is illegal, it can't be empty");
if (name.equals(".") || name.equals(".."))
throw new TopologyException("Name cannot be \".\" or \"..\"");
if (name.length() > MAX_NAME_LENGTH)
throw new TopologyException("Name is illegal, it can't be longer than " + MAX_NAME_LENGTH +
" characters, name: " + name);
if (!containsValidPattern(name))
throw new TopologyException("Name \"" + name + "\" is illegal, it contains a character other than " +
"ASCII alphanumerics, '.', '_' and '-'");
}
/**
* Valid characters for Kafka topics are the ASCII alphanumerics, '.', '_', and '-'
*/
private static boolean containsValidPattern(final String topic) {
for (int i = 0; i < topic.length(); ++i) {
final char c = topic.charAt(i);
// We don't use Character.isLetterOrDigit(c) because it's slower
final boolean validLetterOrDigit = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z');
final boolean validChar = validLetterOrDigit || c == '.' || c == '_' || c == '-';
if (!validChar) {
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* Default interface which can be used to personalized the named of operations, internal topics or store.
*/
interface NamedOperation<T extends NamedOperation<T>> {
/**
* Sets the name to be used for an operation.
*
* @param name the name to use.
* @return an instance of {@link NamedOperation}
*/
T withName(final String name);
}

View File

@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.KeyValue;
/**
* The {@code Predicate} interface represents a predicate (boolean-valued function) of a {@link KeyValue} pair.
* This is a stateless record-by-record operation, i.e, {@link #test(Object, Object)} is invoked individually for each
* record of a stream.
*
* @param <K> key type
* @param <V> value type
* @see KStream#filter(Predicate)
* @see KStream#filterNot(Predicate)
* @see KStream#branch(Predicate[])
* @see KTable#filter(Predicate)
* @see KTable#filterNot(Predicate)
*/
public interface Predicate<K, V> {
/**
* Test if the record with the given key and value satisfies the predicate.
*
* @param key the key of the record
* @param value the value of the record
* @return {@code true} if the {@link KeyValue} pair satisfies the predicate&mdash;{@code false} otherwise
*/
boolean test(final K key, final V value);
}

View File

@@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.errors.TopologyException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Objects;
/**
* An object to define the options used when printing a {@link KStream}.
*
* @param <K> key type
* @param <V> value type
* @see KStream#print(Printed)
*/
public class Printed<K, V> implements NamedOperation<Printed<K, V>> {
protected final OutputStream outputStream;
protected String label;
protected String processorName;
protected KeyValueMapper<? super K, ? super V, String> mapper =
(KeyValueMapper<K, V, String>) (key, value) -> String.format("%s, %s", key, value);
private Printed(final OutputStream outputStream) {
this.outputStream = outputStream;
}
/**
* Copy constructor.
* @param printed instance of {@link Printed} to copy
*/
protected Printed(final Printed<K, V> printed) {
this.outputStream = printed.outputStream;
this.label = printed.label;
this.mapper = printed.mapper;
this.processorName = printed.processorName;
}
/**
* Print the records of a {@link KStream} to a file.
*
* @param filePath path of the file
* @param <K> key type
* @param <V> value type
* @return a new Printed instance
*/
public static <K, V> Printed<K, V> toFile(final String filePath) {
Objects.requireNonNull(filePath, "filePath can't be null");
if (filePath.trim().isEmpty()) {
throw new TopologyException("filePath can't be an empty string");
}
try {
return new Printed<>(Files.newOutputStream(Paths.get(filePath)));
} catch (final IOException e) {
throw new TopologyException("Unable to write stream to file at [" + filePath + "] " + e.getMessage());
}
}
/**
* Print the records of a {@link KStream} to system out.
*
* @param <K> key type
* @param <V> value type
* @return a new Printed instance
*/
public static <K, V> Printed<K, V> toSysOut() {
return new Printed<>(System.out);
}
/**
* Print the records of a {@link KStream} with the provided label.
*
* @param label label to use
* @return this
*/
public Printed<K, V> withLabel(final String label) {
Objects.requireNonNull(label, "label can't be null");
this.label = label;
return this;
}
/**
* Print the records of a {@link KStream} with the provided {@link KeyValueMapper}
* The provided KeyValueMapper's mapped value type must be {@code String}.
* <p>
* The example below shows how to customize output data.
* <pre>{@code
* final KeyValueMapper<Integer, String, String> mapper = new KeyValueMapper<Integer, String, String>() {
* public String apply(Integer key, String value) {
* return String.format("(%d, %s)", key, value);
* }
* };
* }</pre>
*
* Implementors will need to override {@code toString()} for keys and values that are not of type {@link String},
* {@link Integer} etc. to get meaningful information.
*
* @param mapper mapper to use
* @return this
*/
public Printed<K, V> withKeyValueMapper(final KeyValueMapper<? super K, ? super V, String> mapper) {
Objects.requireNonNull(mapper, "mapper can't be null");
this.mapper = mapper;
return this;
}
/**
* Print the records of a {@link KStream} with provided processor name.
*
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
** @return this
*/
@Override
public Printed<K, V> withName(final String processorName) {
this.processorName = processorName;
return this;
}
}

View File

@@ -0,0 +1,201 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
import org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner;
import org.apache.kafka.streams.processor.StreamPartitioner;
import java.util.Objects;
/**
* This class is used to provide the optional parameters when producing to new topics
* using {@link KStream#through(String, Produced)} or {@link KStream#to(String, Produced)}.
* @param <K> key type
* @param <V> value type
*/
public class Produced<K, V> implements NamedOperation<Produced<K, V>> {
protected Serde<K> keySerde;
protected Serde<V> valueSerde;
protected StreamPartitioner<? super K, ? super V> partitioner;
protected String processorName;
private Produced(final Serde<K> keySerde,
final Serde<V> valueSerde,
final StreamPartitioner<? super K, ? super V> partitioner,
final String processorName) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
this.partitioner = partitioner;
this.processorName = processorName;
}
protected Produced(final Produced<K, V> produced) {
this.keySerde = produced.keySerde;
this.valueSerde = produced.valueSerde;
this.partitioner = produced.partitioner;
this.processorName = produced.processorName;
}
/**
* Create a Produced instance with provided keySerde and valueSerde.
* @param keySerde Serde to use for serializing the key
* @param valueSerde Serde to use for serializing the value
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with keySerde and valueSerde
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Produced<>(keySerde, valueSerde, null, null);
}
/**
* Create a Produced instance with provided keySerde, valueSerde, and partitioner.
* @param keySerde Serde to use for serializing the key
* @param valueSerde Serde to use for serializing the value
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
* if not specified and {@code keySerde} provides a {@link WindowedSerializer} for the key
* {@link WindowedStreamPartitioner} will be used&mdash;otherwise {@link DefaultPartitioner}
* will be used
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with keySerde, valueSerde, and partitioner
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde,
final StreamPartitioner<? super K, ? super V> partitioner) {
return new Produced<>(keySerde, valueSerde, partitioner, null);
}
/**
* Create an instance of {@link Produced} with provided processor name.
*
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Produced}
*/
public static <K, V> Produced<K, V> as(final String processorName) {
return new Produced<>(null, null, null, processorName);
}
/**
* Create a Produced instance with provided keySerde.
* @param keySerde Serde to use for serializing the key
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with keySerde
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> keySerde(final Serde<K> keySerde) {
return new Produced<>(keySerde, null, null, null);
}
/**
* Create a Produced instance with provided valueSerde.
* @param valueSerde Serde to use for serializing the key
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with valueSerde
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> valueSerde(final Serde<V> valueSerde) {
return new Produced<>(null, valueSerde, null, null);
}
/**
* Create a Produced instance with provided partitioner.
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
* if not specified and the key serde provides a {@link WindowedSerializer} for the key
* {@link WindowedStreamPartitioner} will be used&mdash;otherwise {@link DefaultPartitioner} will be used
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with partitioner
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> streamPartitioner(final StreamPartitioner<? super K, ? super V> partitioner) {
return new Produced<>(null, null, partitioner, null);
}
/**
* Produce records using the provided partitioner.
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
* if not specified and the key serde provides a {@link WindowedSerializer} for the key
* {@link WindowedStreamPartitioner} will be used&mdash;otherwise {@link DefaultPartitioner} wil be used
* @return this
*/
public Produced<K, V> withStreamPartitioner(final StreamPartitioner<? super K, ? super V> partitioner) {
this.partitioner = partitioner;
return this;
}
/**
* Produce records using the provided valueSerde.
* @param valueSerde Serde to use for serializing the value
* @return this
*/
public Produced<K, V> withValueSerde(final Serde<V> valueSerde) {
this.valueSerde = valueSerde;
return this;
}
/**
* Produce records using the provided keySerde.
* @param keySerde Serde to use for serializing the key
* @return this
*/
public Produced<K, V> withKeySerde(final Serde<K> keySerde) {
this.keySerde = keySerde;
return this;
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final Produced<?, ?> produced = (Produced<?, ?>) o;
return Objects.equals(keySerde, produced.keySerde) &&
Objects.equals(valueSerde, produced.valueSerde) &&
Objects.equals(partitioner, produced.partitioner);
}
@Override
public int hashCode() {
return Objects.hash(keySerde, valueSerde, partitioner);
}
@Override
public Produced<K, V> withName(final String name) {
this.processorName = name;
return this;
}
}

View File

@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.KeyValue;
/**
* The {@code Reducer} interface for combining two values of the same type into a new value.
* In contrast to {@link Aggregator} the result type must be the same as the input type.
* <p>
* The provided values can be either original values from input {@link KeyValue} pair records or be a previously
* computed result from {@link Reducer#apply(Object, Object)}.
* <p>
* {@code Reducer} can be used to implement aggregation functions like sum, min, or max.
*
* @param <V> value type
* @see KGroupedStream#reduce(Reducer)
* @see KGroupedStream#reduce(Reducer, Materialized)
* @see TimeWindowedKStream#reduce(Reducer)
* @see TimeWindowedKStream#reduce(Reducer, Materialized)
* @see SessionWindowedKStream#reduce(Reducer)
* @see SessionWindowedKStream#reduce(Reducer, Materialized)
* @see Aggregator
*/
public interface Reducer<V> {
/**
* Aggregate the two given values into a single one.
*
* @param value1 the first value for the aggregation
* @param value2 the second value for the aggregation
* @return the aggregated value
*/
V apply(final V value1, final V value2);
}

View File

@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
/**
* The class that is used to capture the key and value {@link Serde}s used when performing
* {@link KStream#groupBy(KeyValueMapper, Serialized)} and {@link KStream#groupByKey(Serialized)} operations.
*
* @param <K> the key type
* @param <V> the value type
*
* @deprecated since 2.1. Use {@link org.apache.kafka.streams.kstream.Grouped} instead
*/
@Deprecated
public class Serialized<K, V> {
protected final Serde<K> keySerde;
protected final Serde<V> valueSerde;
private Serialized(final Serde<K> keySerde,
final Serde<V> valueSerde) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
}
protected Serialized(final Serialized<K, V> serialized) {
this(serialized.keySerde, serialized.valueSerde);
}
/**
* Construct a {@code Serialized} instance with the provided key and value {@link Serde}s.
* If the {@link Serde} params are {@code null} the default serdes defined in the configs will be used.
*
* @param keySerde keySerde that will be used to materialize a stream
* if not specified the default serdes defined in the configs will be used
* @param valueSerde valueSerde that will be used to materialize a stream
* if not specified the default serdes defined in the configs will be used
* @param <K> the key type
* @param <V> the value type
* @return a new instance of {@link Serialized} configured with the provided serdes
*/
public static <K, V> Serialized<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Serialized<>(keySerde, valueSerde);
}
/**
* Construct a {@code Serialized} instance with the provided key {@link Serde}.
* If the {@link Serde} params are null the default serdes defined in the configs will be used.
*
* @param keySerde keySerde that will be used to materialize a stream
* if not specified the default serdes defined in the configs will be used
* @return a new instance of {@link Serialized} configured with the provided key serde
*/
public Serialized<K, V> withKeySerde(final Serde<K> keySerde) {
return new Serialized<>(keySerde, null);
}
/**
* Construct a {@code Serialized} instance with the provided value {@link Serde}.
* If the {@link Serde} params are null the default serdes defined in the configs will be used.
*
* @param valueSerde valueSerde that will be used to materialize a stream
* if not specified the default serdes defined in the configs will be used
* @return a new instance of {@link Serialized} configured with the provided key serde
*/
public Serialized<K, V> withValueSerde(final Serde<V> valueSerde) {
return new Serialized<>(null, valueSerde);
}
}

View File

@@ -0,0 +1,265 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.SessionStore;
import java.time.Duration;
/**
* {@code SessionWindowedCogroupKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation of a {@link CogroupedKStream} in order to apply a windowed aggregation operation
* on the original {@link KGroupedStream} records resulting in a windowed {@link KTable} (a <emph>windowed</emph>
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
* <p>
* {@link SessionWindows} are dynamic data driven windows.
* They have no fixed time boundaries, rather the size of the window is determined by the records.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
* New events are added to sessions until their grace period ends (see {@link SessionWindows#grace(Duration)}).
* <p>
* A {@code SessionWindowedCogroupedKStream} must be obtained from a {@link CogroupedKStream} via
* {@link CogroupedKStream#windowedBy(SessionWindows)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
* @see KGroupedStream
* @see SessionWindows
* @see CogroupedKStream
*/
public interface SessionWindowedCogroupedKStream<K, V> {
/**
* Aggregate the values of records in these streams by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Merger, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Merger<? super K, V> sessionMerger);
/**
* Aggregate the values of records in these streams by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use
* {@link #aggregate(Initializer, Merger, Named, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Merger<? super K, V> sessionMerger,
final Named named);
/**
* Aggregate the values of records in these streams by the grouped key and defined sessions.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the session (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Merger<? super K, V> sessionMerger,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in these streams by the grouped key and defined sessions.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the session (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some windowed aggregation on value type double
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Merger<? super K, V> sessionMerger,
final Named named,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
}

View File

@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.state.internals.SessionKeySchema;
import java.util.Map;
/**
* The inner serde class can be specified by setting the property
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
* if the no-arg constructor is called and hence it is not passed during initialization.
*/
public class SessionWindowedDeserializer<T> implements Deserializer<Windowed<T>> {
private Deserializer<T> inner;
// Default constructor needed by Kafka
public SessionWindowedDeserializer() {}
public SessionWindowedDeserializer(final Deserializer<T> inner) {
this.inner = inner;
}
@SuppressWarnings("unchecked")
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {
if (inner == null) {
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
final String value = (String) configs.get(propertyName);
try {
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).deserializer();
inner.configure(configs, isKey);
} catch (final ClassNotFoundException e) {
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
}
}
}
@Override
public Windowed<T> deserialize(final String topic, final byte[] data) {
WindowedSerdes.verifyInnerDeserializerNotNull(inner, this);
if (data == null || data.length == 0) {
return null;
}
// for either key or value, their schema is the same hence we will just use session key schema
return SessionKeySchema.from(data, inner, topic);
}
@Override
public void close() {
if (inner != null) {
inner.close();
}
}
// Only for testing
Deserializer<T> innerDeserializer() {
return inner;
}
}

View File

@@ -0,0 +1,646 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.SessionStore;
import java.time.Duration;
/**
* {@code SessionWindowedKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation after a grouping and windowing of a {@link KStream} before an aggregation is
* applied to the new (partitioned) windows resulting in a windowed {@link KTable} (a <emph>windowed</emph>
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
* <p>
* {@link SessionWindows} are dynamic data driven windows.
* They have no fixed time boundaries, rather the size of the window is determined by the records.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
* New events are added to sessions until their grace period ends (see {@link SessionWindows#grace(Duration)}).
* <p>
* A {@code SessionWindowedKStream} must be obtained from a {@link KGroupedStream} via
* {@link KGroupedStream#windowedBy(SessionWindows)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
* @see KGroupedStream
* @see SessionWindows
*/
public interface SessionWindowedKStream<K, V> {
/**
* Count the number of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* The default key serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #count(Materialized)}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same session and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key per session
*/
KTable<Windowed<K>, Long> count();
/**
* Count the number of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* The default key serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #count(Named, Materialized)}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same session and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key per session
*/
KTable<Windowed<K>, Long> count(final Named named);
/**
* Count the number of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the name provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key per session
*/
KTable<Windowed<K>, Long> count(final Materialized<K, Long, SessionStore<Bytes, byte[]>> materialized);
/**
* Count the number of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the name provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key per session
*/
KTable<Windowed<K>, Long> count(final Named named,
final Materialized<K, Long, SessionStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use
* {@link #aggregate(Initializer, Aggregator, Merger, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Merger<? super K, VR> sessionMerger);
/**
* Aggregate the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use
* {@link #aggregate(Initializer, Aggregator, Merger, Named, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Merger<? super K, VR> sessionMerger,
final Named named);
/**
* Aggregate the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some windowed aggregation on value type double
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Merger<? super K, VR> sessionMerger,
final Materialized<K, VR, SessionStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some windowed aggregation on value type double
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Merger<? super K, VR> sessionMerger,
final Named named,
final Materialized<K, VR, SessionStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #reduce(Reducer, Materialized)} .
* <p>
* The value of the first record per session initialized the session result.
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer);
/**
* Combine the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #reduce(Reducer, Named, Materialized)} .
* <p>
* The value of the first record per session initialized the session result.
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer, final Named named);
/**
* Combine the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The value of the first record per session initialized the session result.
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The value of the first record per session initialized the session result.
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
final Named named,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
}

View File

@@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
import org.apache.kafka.streams.state.internals.SessionKeySchema;
import java.util.Map;
/**
* The inner serde class can be specified by setting the property
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
* if the no-arg constructor is called and hence it is not passed during initialization.
*/
public class SessionWindowedSerializer<T> implements WindowedSerializer<T> {
private Serializer<T> inner;
// Default constructor needed by Kafka
public SessionWindowedSerializer() {}
public SessionWindowedSerializer(final Serializer<T> inner) {
this.inner = inner;
}
@SuppressWarnings("unchecked")
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {
if (inner == null) {
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
final String value = (String) configs.get(propertyName);
try {
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).serializer();
inner.configure(configs, isKey);
} catch (final ClassNotFoundException e) {
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
}
}
}
@Override
public byte[] serialize(final String topic, final Windowed<T> data) {
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
if (data == null) {
return null;
}
// for either key or value, their schema is the same hence we will just use session key schema
return SessionKeySchema.toBinary(data, inner, topic);
}
@Override
public void close() {
if (inner != null) {
inner.close();
}
}
@Override
public byte[] serializeBaseKey(final String topic, final Windowed<T> data) {
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
return inner.serialize(topic, data.key());
}
// Only for testing
Serializer<T> innerSerializer() {
return inner;
}
}

View File

@@ -0,0 +1,221 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
import java.time.Duration;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
/**
* A session based window specification used for aggregating events into sessions.
* <p>
* Sessions represent a period of activity separated by a defined gap of inactivity.
* Any events processed that fall within the inactivity gap of any existing sessions are merged into the existing sessions.
* If the event falls outside of the session gap then a new session will be created.
* <p>
* For example, if we have a session gap of 5 and the following data arrives:
* <pre>
* +--------------------------------------+
* | key | value | time |
* +-----------+-------------+------------+
* | A | 1 | 10 |
* +-----------+-------------+------------+
* | A | 2 | 12 |
* +-----------+-------------+------------+
* | A | 3 | 20 |
* +-----------+-------------+------------+
* </pre>
* We'd have 2 sessions for key A.
* One starting from time 10 and ending at time 12 and another starting and ending at time 20.
* The length of the session is driven by the timestamps of the data within the session.
* Thus, session windows are no fixed-size windows (c.f. {@link TimeWindows} and {@link JoinWindows}).
* <p>
* If we then received another record:
* <pre>
* +--------------------------------------+
* | key | value | time |
* +-----------+-------------+------------+
* | A | 4 | 16 |
* +-----------+-------------+------------+
* </pre>
* The previous 2 sessions would be merged into a single session with start time 10 and end time 20.
* The aggregate value for this session would be the result of aggregating all 4 values.
* <p>
* For time semantics, see {@link TimestampExtractor}.
*
* @see TimeWindows
* @see UnlimitedWindows
* @see JoinWindows
* @see KGroupedStream#windowedBy(SessionWindows)
* @see TimestampExtractor
*/
public final class SessionWindows {
private final long gapMs;
private final long maintainDurationMs;
private final long graceMs;
private SessionWindows(final long gapMs, final long maintainDurationMs, final long graceMs) {
this.gapMs = gapMs;
this.maintainDurationMs = maintainDurationMs;
this.graceMs = graceMs;
}
/**
* Create a new window specification with the specified inactivity gap in milliseconds.
*
* @param inactivityGapMs the gap of inactivity between sessions in milliseconds
* @return a new window specification with default maintain duration of 1 day
*
* @throws IllegalArgumentException if {@code inactivityGapMs} is zero or negative
* @deprecated Use {@link #with(Duration)} instead.
*/
@Deprecated
public static SessionWindows with(final long inactivityGapMs) {
if (inactivityGapMs <= 0) {
throw new IllegalArgumentException("Gap time (inactivityGapMs) cannot be zero or negative.");
}
return new SessionWindows(inactivityGapMs, DEFAULT_RETENTION_MS, -1);
}
/**
* Create a new window specification with the specified inactivity gap.
*
* @param inactivityGap the gap of inactivity between sessions
* @return a new window specification with default maintain duration of 1 day
*
* @throws IllegalArgumentException if {@code inactivityGap} is zero or negative or can't be represented as {@code long milliseconds}
*/
public static SessionWindows with(final Duration inactivityGap) {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(inactivityGap, "inactivityGap");
return with(ApiUtils.validateMillisecondDuration(inactivityGap, msgPrefix));
}
/**
* Set the window maintain duration (retention time) in milliseconds.
* This retention time is a guaranteed <i>lower bound</i> for how long a window will be maintained.
*
* @return itself
* @throws IllegalArgumentException if {@code durationMs} is smaller than window gap
*
* @deprecated since 2.1. Use {@link Materialized#retention}
* or directly configure the retention in a store supplier and use
* {@link Materialized#as(SessionBytesStoreSupplier)}.
*/
@Deprecated
public SessionWindows until(final long durationMs) throws IllegalArgumentException {
if (durationMs < gapMs) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than window gap.");
}
return new SessionWindows(gapMs, durationMs, graceMs);
}
/**
* Reject out-of-order events that arrive more than {@code afterWindowEnd}
* after the end of its window.
* <p>
* Note that new events may change the boundaries of session windows, so aggressive
* close times can lead to surprising results in which an out-of-order event is rejected and then
* a subsequent event moves the window boundary forward.
*
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
* @return this updated builder
* @throws IllegalArgumentException if the {@code afterWindowEnd} is negative of can't be represented as {@code long milliseconds}
*/
public SessionWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
if (afterWindowEndMs < 0) {
throw new IllegalArgumentException("Grace period must not be negative.");
}
return new SessionWindows(
gapMs,
maintainDurationMs,
afterWindowEndMs
);
}
@SuppressWarnings("deprecation") // continuing to support Windows#maintainMs/segmentInterval in fallback mode
public long gracePeriodMs() {
// NOTE: in the future, when we remove maintainMs,
// we should default the grace period to 24h to maintain the default behavior,
// or we can default to (24h - gapMs) if you want to be super accurate.
return graceMs != -1 ? graceMs : maintainMs() - inactivityGap();
}
/**
* Return the specified gap for the session windows in milliseconds.
*
* @return the inactivity gap of the specified windows
*/
public long inactivityGap() {
return gapMs;
}
/**
* Return the window maintain duration (retention time) in milliseconds.
* <p>
* For {@code SessionWindows} the maintain duration is at least as small as the window gap.
*
* @return the window maintain duration
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
*/
@Deprecated
public long maintainMs() {
return Math.max(maintainDurationMs, gapMs);
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final SessionWindows that = (SessionWindows) o;
return gapMs == that.gapMs &&
maintainDurationMs == that.maintainDurationMs &&
graceMs == that.graceMs;
}
@Override
public int hashCode() {
return Objects.hash(gapMs, maintainDurationMs, graceMs);
}
@Override
public String toString() {
return "SessionWindows{" +
"gapMs=" + gapMs +
", maintainDurationMs=" + maintainDurationMs +
", graceMs=" + graceMs +
'}';
}
}

View File

@@ -0,0 +1,286 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
/**
* Class used to configure the name of the join processor, the repartition topic name,
* state stores or state store names in Stream-Stream join.
* @param <K> the key type
* @param <V1> this value type
* @param <V2> other value type
*/
public class StreamJoined<K, V1, V2> implements NamedOperation<StreamJoined<K, V1, V2>> {
protected final Serde<K> keySerde;
protected final Serde<V1> valueSerde;
protected final Serde<V2> otherValueSerde;
protected final WindowBytesStoreSupplier thisStoreSupplier;
protected final WindowBytesStoreSupplier otherStoreSupplier;
protected final String name;
protected final String storeName;
protected StreamJoined(final StreamJoined<K, V1, V2> streamJoined) {
this(streamJoined.keySerde,
streamJoined.valueSerde,
streamJoined.otherValueSerde,
streamJoined.thisStoreSupplier,
streamJoined.otherStoreSupplier,
streamJoined.name,
streamJoined.storeName);
}
private StreamJoined(final Serde<K> keySerde,
final Serde<V1> valueSerde,
final Serde<V2> otherValueSerde,
final WindowBytesStoreSupplier thisStoreSupplier,
final WindowBytesStoreSupplier otherStoreSupplier,
final String name,
final String storeName) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
this.otherValueSerde = otherValueSerde;
this.thisStoreSupplier = thisStoreSupplier;
this.otherStoreSupplier = otherStoreSupplier;
this.name = name;
this.storeName = storeName;
}
/**
* Creates a StreamJoined instance with the provided store suppliers. The store suppliers must implement
* the {@link WindowBytesStoreSupplier} interface. The store suppliers must provide unique names or a
* {@link org.apache.kafka.streams.errors.StreamsException} is thrown.
*
* @param storeSupplier this store supplier
* @param otherStoreSupplier other store supplier
* @param <K> the key type
* @param <V1> this value type
* @param <V2> other value type
* @return {@link StreamJoined} instance
*/
public static <K, V1, V2> StreamJoined<K, V1, V2> with(final WindowBytesStoreSupplier storeSupplier,
final WindowBytesStoreSupplier otherStoreSupplier) {
return new StreamJoined<>(
null,
null,
null,
storeSupplier,
otherStoreSupplier,
null,
null
);
}
/**
* Creates a {@link StreamJoined} instance using the provided name for the state stores and hence the changelog
* topics for the join stores. The name for the stores will be ${applicationId}-&lt;storeName&gt;-this-join and ${applicationId}-&lt;storeName&gt;-other-join
* or ${applicationId}-&lt;storeName&gt;-outer-this-join and ${applicationId}-&lt;storeName&gt;-outer-other-join depending if the join is an inner-join
* or an outer join. The changelog topics will have the -changelog suffix. The user should note that even though the join stores will have a
* specified name, the stores will remain unavailable for querying.
*
* @param storeName The name to use for the store
* @param <K> The key type
* @param <V1> This value type
* @param <V2> Other value type
* @return {@link StreamJoined} instance
*/
public static <K, V1, V2> StreamJoined<K, V1, V2> as(final String storeName) {
return new StreamJoined<>(
null,
null,
null,
null,
null,
null,
storeName
);
}
/**
* Creates a {@link StreamJoined} instance with the provided serdes to configure the stores
* for the join.
* @param keySerde The key serde
* @param valueSerde This value serde
* @param otherValueSerde Other value serde
* @param <K> The key type
* @param <V1> This value type
* @param <V2> Other value type
* @return {@link StreamJoined} instance
*/
public static <K, V1, V2> StreamJoined<K, V1, V2> with(final Serde<K> keySerde,
final Serde<V1> valueSerde,
final Serde<V2> otherValueSerde
) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
null,
null,
null,
null
);
}
/**
* Set the name to use for the join processor and the repartition topic(s) if required.
* @param name the name to use
* @return a new {@link StreamJoined} instance
*/
@Override
public StreamJoined<K, V1, V2> withName(final String name) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Sets the base store name to use for both sides of the join. The name for the state stores and hence the changelog
* topics for the join stores. The name for the stores will be ${applicationId}-&lt;storeName&gt;-this-join and ${applicationId}-&lt;storeName&gt;-other-join
* or ${applicationId}-&lt;storeName&gt;-outer-this-join and ${applicationId}-&lt;storeName&gt;-outer-other-join depending if the join is an inner-join
* or an outer join. The changelog topics will have the -changelog suffix. The user should note that even though the join stores will have a
* specified name, the stores will remain unavailable for querying.
*
* @param storeName the storeName to use
* @return a new {@link StreamJoined} instance
*/
public StreamJoined<K, V1, V2> withStoreName(final String storeName) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link Serde Serde<K>} for the key
* @param keySerde the serde to use for the key
* @return a new {@link StreamJoined} configured with the keySerde
*/
public StreamJoined<K, V1, V2> withKeySerde(final Serde<K> keySerde) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link Serde Serde<V1>} for this value
* @param valueSerde the serde to use for this value (calling or left side of the join)
* @return a new {@link StreamJoined} configured with the valueSerde
*/
public StreamJoined<K, V1, V2> withValueSerde(final Serde<V1> valueSerde) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link Serde Serde<V2>} for the other value
* @param otherValueSerde the serde to use for the other value (other or right side of the join)
* @return a new {@link StreamJoined} configured with the otherValueSerde
*/
public StreamJoined<K, V1, V2> withOtherValueSerde(final Serde<V2> otherValueSerde) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link WindowBytesStoreSupplier} for this store supplier. Please note
* this method only provides the store supplier for the left side of the join. If you wish to also provide a
* store supplier for the right (i.e., other) side you must use the {@link StreamJoined#withOtherStoreSupplier(WindowBytesStoreSupplier)}
* method
* @param thisStoreSupplier the store supplier to use for this store supplier (calling or left side of the join)
* @return a new {@link StreamJoined} configured with thisStoreSupplier
*/
public StreamJoined<K, V1, V2> withThisStoreSupplier(final WindowBytesStoreSupplier thisStoreSupplier) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link WindowBytesStoreSupplier} for the other store supplier. Please note
* this method only provides the store supplier for the right side of the join. If you wish to also provide a
* store supplier for the left side you must use the {@link StreamJoined#withThisStoreSupplier(WindowBytesStoreSupplier)}
* method
* @param otherStoreSupplier the store supplier to use for the other store supplier (other or right side of the join)
* @return a new {@link StreamJoined} configured with otherStoreSupplier
*/
public StreamJoined<K, V1, V2> withOtherStoreSupplier(final WindowBytesStoreSupplier otherStoreSupplier) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
@Override
public String toString() {
return "StreamJoin{" +
"keySerde=" + keySerde +
", valueSerde=" + valueSerde +
", otherValueSerde=" + otherValueSerde +
", thisStoreSupplier=" + thisStoreSupplier +
", otherStoreSupplier=" + otherStoreSupplier +
", name='" + name + '\'' +
", storeName='" + storeName + '\'' +
'}';
}
}

View File

@@ -0,0 +1,177 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.kstream.internals.suppress.EagerBufferConfigImpl;
import org.apache.kafka.streams.kstream.internals.suppress.FinalResultsSuppressionBuilder;
import org.apache.kafka.streams.kstream.internals.suppress.StrictBufferConfigImpl;
import org.apache.kafka.streams.kstream.internals.suppress.SuppressedInternal;
import java.time.Duration;
public interface Suppressed<K> extends NamedOperation<Suppressed<K>> {
/**
* Marker interface for a buffer configuration that is "strict" in the sense that it will strictly
* enforce the time bound and never emit early.
*/
interface StrictBufferConfig extends BufferConfig<StrictBufferConfig> {
}
/**
* Marker interface for a buffer configuration that will strictly enforce size constraints
* (bytes and/or number of records) on the buffer, so it is suitable for reducing duplicate
* results downstream, but does not promise to eliminate them entirely.
*/
interface EagerBufferConfig extends BufferConfig<EagerBufferConfig> {
}
interface BufferConfig<BC extends BufferConfig<BC>> {
/**
* Create a size-constrained buffer in terms of the maximum number of keys it will store.
*/
static EagerBufferConfig maxRecords(final long recordLimit) {
return new EagerBufferConfigImpl(recordLimit, Long.MAX_VALUE);
}
/**
* Set a size constraint on the buffer in terms of the maximum number of keys it will store.
*/
BC withMaxRecords(final long recordLimit);
/**
* Create a size-constrained buffer in terms of the maximum number of bytes it will use.
*/
static EagerBufferConfig maxBytes(final long byteLimit) {
return new EagerBufferConfigImpl(Long.MAX_VALUE, byteLimit);
}
/**
* Set a size constraint on the buffer, the maximum number of bytes it will use.
*/
BC withMaxBytes(final long byteLimit);
/**
* Create a buffer unconstrained by size (either keys or bytes).
*
* As a result, the buffer will consume as much memory as it needs, dictated by the time bound.
*
* If there isn't enough heap available to meet the demand, the application will encounter an
* {@link OutOfMemoryError} and shut down (not guaranteed to be a graceful exit). Also, note that
* JVM processes under extreme memory pressure may exhibit poor GC behavior.
*
* This is a convenient option if you doubt that your buffer will be that large, but also don't
* wish to pick particular constraints, such as in testing.
*
* This buffer is "strict" in the sense that it will enforce the time bound or crash.
* It will never emit early.
*/
static StrictBufferConfig unbounded() {
return new StrictBufferConfigImpl();
}
/**
* Set the buffer to be unconstrained by size (either keys or bytes).
*
* As a result, the buffer will consume as much memory as it needs, dictated by the time bound.
*
* If there isn't enough heap available to meet the demand, the application will encounter an
* {@link OutOfMemoryError} and shut down (not guaranteed to be a graceful exit). Also, note that
* JVM processes under extreme memory pressure may exhibit poor GC behavior.
*
* This is a convenient option if you doubt that your buffer will be that large, but also don't
* wish to pick particular constraints, such as in testing.
*
* This buffer is "strict" in the sense that it will enforce the time bound or crash.
* It will never emit early.
*/
StrictBufferConfig withNoBound();
/**
* Set the buffer to gracefully shut down the application when any of its constraints are violated
*
* This buffer is "strict" in the sense that it will enforce the time bound or shut down.
* It will never emit early.
*/
StrictBufferConfig shutDownWhenFull();
/**
* Set the buffer to just emit the oldest records when any of its constraints are violated.
*
* This buffer is "not strict" in the sense that it may emit early, so it is suitable for reducing
* duplicate results downstream, but does not promise to eliminate them.
*/
EagerBufferConfig emitEarlyWhenFull();
}
/**
* Configure the suppression to emit only the "final results" from the window.
*
* By default all Streams operators emit results whenever new results are available.
* This includes windowed operations.
*
* This configuration will instead emit just one result per key for each window, guaranteeing
* to deliver only the final result. This option is suitable for use cases in which the business logic
* requires a hard guarantee that only the final result is propagated. For example, sending alerts.
*
* To accomplish this, the operator will buffer events from the window until the window close (that is,
* until the end-time passes, and additionally until the grace period expires). Since windowed operators
* are required to reject out-of-order events for a window whose grace period is expired, there is an additional
* guarantee that the final results emitted from this suppression will match any queriable state upstream.
*
* @param bufferConfig A configuration specifying how much space to use for buffering intermediate results.
* This is required to be a "strict" config, since it would violate the "final results"
* property to emit early and then issue an update later.
* @return a "final results" mode suppression configuration
*/
static Suppressed<Windowed> untilWindowCloses(final StrictBufferConfig bufferConfig) {
return new FinalResultsSuppressionBuilder<>(null, bufferConfig);
}
/**
* Configure the suppression to wait {@code timeToWaitForMoreEvents} amount of time after receiving a record
* before emitting it further downstream. If another record for the same key arrives in the mean time, it replaces
* the first record in the buffer but does <em>not</em> re-start the timer.
*
* @param timeToWaitForMoreEvents The amount of time to wait, per record, for new events.
* @param bufferConfig A configuration specifying how much space to use for buffering intermediate results.
* @param <K> The key type for the KTable to apply this suppression to.
* @return a suppression configuration
*/
static <K> Suppressed<K> untilTimeLimit(final Duration timeToWaitForMoreEvents, final BufferConfig bufferConfig) {
return new SuppressedInternal<>(null, timeToWaitForMoreEvents, bufferConfig, null, false);
}
/**
* Use the specified name for the suppression node in the topology.
* <p>
* This can be used to insert a suppression without changing the rest of the topology names
* (and therefore not requiring an application reset).
* <p>
* Note however, that once a suppression has buffered some records, removing it from the topology would cause
* the loss of those records.
* <p>
* A suppression can be "disabled" with the configuration {@code untilTimeLimit(Duration.ZERO, ...}.
*
* @param name The name to be used for the suppression node and changelog topic
* @return The same configuration with the addition of the given {@code name}.
*/
@Override
Suppressed<K> withName(final String name);
}

View File

@@ -0,0 +1,248 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.WindowStore;
import java.time.Duration;
/**
* {@code TimeWindowedCogroupKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation of a {@link CogroupedKStream} in order to apply a windowed aggregation operation
* on the original {@link KGroupedStream} records resulting in a windowed {@link KTable} (a <emph>windowed</emph>
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
* <p>
* The specified {@code windows} define either hopping time windows that can be overlapping or tumbling (c.f.
* {@link TimeWindows}) or they define landmark windows (c.f. {@link UnlimitedWindows}).
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
* New events are added to windows until their grace period ends (see {@link TimeWindows#grace(Duration)}).
* <p>
* A {@code TimeWindowedCogroupedKStream} must be obtained from a {@link CogroupedKStream} via
* {@link CogroupedKStream#windowedBy(Windows)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
* @see KGroupedStream
* @see CogroupedKStream
*/
public interface TimeWindowedCogroupedKStream<K, V> {
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Materialized)}.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Named, Materialized)}.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Named named);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Named named,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
}

View File

@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.state.internals.WindowKeySchema;
import java.util.Map;
/**
* The inner serde class can be specified by setting the property
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
* if the no-arg constructor is called and hence it is not passed during initialization.
*/
public class TimeWindowedDeserializer<T> implements Deserializer<Windowed<T>> {
private final Long windowSize;
private boolean isChangelogTopic;
private Deserializer<T> inner;
// Default constructor needed by Kafka
public TimeWindowedDeserializer() {
this(null, Long.MAX_VALUE);
}
// TODO: fix this part as last bits of KAFKA-4468
public TimeWindowedDeserializer(final Deserializer<T> inner) {
this(inner, Long.MAX_VALUE);
}
public TimeWindowedDeserializer(final Deserializer<T> inner, final long windowSize) {
this.inner = inner;
this.windowSize = windowSize;
this.isChangelogTopic = false;
}
public Long getWindowSize() {
return this.windowSize;
}
@SuppressWarnings("unchecked")
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {
if (inner == null) {
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
final String value = (String) configs.get(propertyName);
try {
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).deserializer();
inner.configure(configs, isKey);
} catch (final ClassNotFoundException e) {
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
}
}
}
@Override
public Windowed<T> deserialize(final String topic, final byte[] data) {
WindowedSerdes.verifyInnerDeserializerNotNull(inner, this);
if (data == null || data.length == 0) {
return null;
}
// toStoreKeyBinary was used to serialize the data.
if (this.isChangelogTopic) {
return WindowKeySchema.fromStoreKey(data, windowSize, inner, topic);
}
// toBinary was used to serialize the data
return WindowKeySchema.from(data, windowSize, inner, topic);
}
@Override
public void close() {
if (inner != null) {
inner.close();
}
}
public void setIsChangelogTopic(final boolean isChangelogTopic) {
this.isChangelogTopic = isChangelogTopic;
}
// Only for testing
Deserializer<T> innerDeserializer() {
return inner;
}
}

View File

@@ -0,0 +1,637 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.WindowStore;
import java.time.Duration;
/**
* {@code TimeWindowedKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation after a grouping and windowing of a {@link KStream} before an aggregation is
* applied to the new (partitioned) windows resulting in a windowed {@link KTable} (a <emph>windowed</emph>
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
* <p>
* The specified {@code windows} define either hopping time windows that can be overlapping or tumbling (c.f.
* {@link TimeWindows}) or they define landmark windows (c.f. {@link UnlimitedWindows}).
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
* New events are added to {@link TimeWindows} until their grace period ends (see {@link TimeWindows#grace(Duration)}).
* <p>
* A {@code TimeWindowedKStream} must be obtained from a {@link KGroupedStream} via
* {@link KGroupedStream#windowedBy(Windows)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
* @see KGroupedStream
*/
public interface TimeWindowedKStream<K, V> {
/**
* Count the number of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* The default key serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #count(Materialized)}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
*/
KTable<Windowed<K>, Long> count();
/**
* Count the number of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* The default key serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #count(Named, Materialized)}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
*/
KTable<Windowed<K>, Long> count(final Named named);
/**
* Count the number of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the name provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> countForWordsForWindows = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
*/
KTable<Windowed<K>, Long> count(final Materialized<K, Long, WindowStore<Bytes, byte[]>> materialized);
/**
* Count the number of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the name provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> countForWordsForWindows = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
*/
KTable<Windowed<K>, Long> count(final Named named,
final Materialized<K, Long, WindowStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Aggregator, Materialized)}.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use
* {@link #aggregate(Initializer, Aggregator, Named, Materialized)}.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Named named);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Materialized<K, VR, WindowStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Named named,
final Materialized<K, VR, WindowStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator)}).
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #reduce(Reducer, Materialized)} .
* <p>
* The value of the first record per window initialized the aggregation result.
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer);
/**
* Combine the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #reduce(Reducer, Named, Materialized)} .
* <p>
* The value of the first record per window initialized the aggregation result.
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer, final Named named);
/**
* Combine the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The value of the first record per window initialized the aggregation result.
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> reduceStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The value of the first record per window initialized the aggregation result.
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> reduceStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
final Named named,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
}

View File

@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
import org.apache.kafka.streams.state.internals.WindowKeySchema;
import java.util.Map;
/**
* The inner serde class can be specified by setting the property
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
* if the no-arg constructor is called and hence it is not passed during initialization.
*/
public class TimeWindowedSerializer<T> implements WindowedSerializer<T> {
private Serializer<T> inner;
// Default constructor needed by Kafka
@SuppressWarnings("WeakerAccess")
public TimeWindowedSerializer() {}
public TimeWindowedSerializer(final Serializer<T> inner) {
this.inner = inner;
}
@SuppressWarnings("unchecked")
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {
if (inner == null) {
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
final String value = (String) configs.get(propertyName);
try {
inner = Utils.newInstance(value, Serde.class).serializer();
inner.configure(configs, isKey);
} catch (final ClassNotFoundException e) {
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
}
}
}
@Override
public byte[] serialize(final String topic, final Windowed<T> data) {
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
if (data == null) {
return null;
}
return WindowKeySchema.toBinary(data, inner, topic);
}
@Override
public void close() {
if (inner != null) {
inner.close();
}
}
@Override
public byte[] serializeBaseKey(final String topic, final Windowed<T> data) {
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
return inner.serialize(topic, data.key());
}
// Only for testing
Serializer<T> innerSerializer() {
return inner;
}
}

View File

@@ -0,0 +1,285 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.kstream.internals.TimeWindow;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
import java.time.Duration;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
/**
* The fixed-size time-based window specifications used for aggregations.
* <p>
* The semantics of time-based aggregation windows are: Every T1 (advance) milliseconds, compute the aggregate total for
* T2 (size) milliseconds.
* <ul>
* <li> If {@code advance < size} a hopping windows is defined:<br />
* it discretize a stream into overlapping windows, which implies that a record maybe contained in one and or
* more "adjacent" windows.</li>
* <li> If {@code advance == size} a tumbling window is defined:<br />
* it discretize a stream into non-overlapping windows, which implies that a record is only ever contained in
* one and only one tumbling window.</li>
* </ul>
* Thus, the specified {@link TimeWindow}s are aligned to the epoch.
* Aligned to the epoch means, that the first window starts at timestamp zero.
* For example, hopping windows with size of 5000ms and advance of 3000ms, have window boundaries
* [0;5000),[3000;8000),... and not [1000;6000),[4000;9000),... or even something "random" like [1452;6452),[4452;9452),...
* <p>
* For time semantics, see {@link TimestampExtractor}.
*
* @see SessionWindows
* @see UnlimitedWindows
* @see JoinWindows
* @see KGroupedStream#windowedBy(Windows)
* @see TimestampExtractor
*/
public final class TimeWindows extends Windows<TimeWindow> {
private final long maintainDurationMs;
/** The size of the windows in milliseconds. */
@SuppressWarnings("WeakerAccess")
public final long sizeMs;
/**
* The size of the window's advance interval in milliseconds, i.e., by how much a window moves forward relative to
* the previous one.
*/
@SuppressWarnings("WeakerAccess")
public final long advanceMs;
private final long graceMs;
private TimeWindows(final long sizeMs, final long advanceMs, final long graceMs, final long maintainDurationMs) {
this.sizeMs = sizeMs;
this.advanceMs = advanceMs;
this.graceMs = graceMs;
this.maintainDurationMs = maintainDurationMs;
}
/** Private constructor for preserving segments. Can be removed along with Windows.segments. **/
@Deprecated
private TimeWindows(final long sizeMs,
final long advanceMs,
final long graceMs,
final long maintainDurationMs,
final int segments) {
super(segments);
this.sizeMs = sizeMs;
this.advanceMs = advanceMs;
this.graceMs = graceMs;
this.maintainDurationMs = maintainDurationMs;
}
/**
* Return a window definition with the given window size, and with the advance interval being equal to the window
* size.
* The time interval represented by the N-th window is: {@code [N * size, N * size + size)}.
* <p>
* This provides the semantics of tumbling windows, which are fixed-sized, gap-less, non-overlapping windows.
* Tumbling windows are a special case of hopping windows with {@code advance == size}.
*
* @param sizeMs The size of the window in milliseconds
* @return a new window definition with default maintain duration of 1 day
* @throws IllegalArgumentException if the specified window size is zero or negative
* @deprecated Use {@link #of(Duration)} instead
*/
@Deprecated
public static TimeWindows of(final long sizeMs) throws IllegalArgumentException {
if (sizeMs <= 0) {
throw new IllegalArgumentException("Window size (sizeMs) must be larger than zero.");
}
// This is a static factory method, so we initialize grace and retention to the defaults.
return new TimeWindows(sizeMs, sizeMs, -1, DEFAULT_RETENTION_MS);
}
/**
* Return a window definition with the given window size, and with the advance interval being equal to the window
* size.
* The time interval represented by the N-th window is: {@code [N * size, N * size + size)}.
* <p>
* This provides the semantics of tumbling windows, which are fixed-sized, gap-less, non-overlapping windows.
* Tumbling windows are a special case of hopping windows with {@code advance == size}.
*
* @param size The size of the window
* @return a new window definition with default maintain duration of 1 day
* @throws IllegalArgumentException if the specified window size is zero or negative or can't be represented as {@code long milliseconds}
*/
@SuppressWarnings("deprecation") // removing #of(final long sizeMs) will fix this
public static TimeWindows of(final Duration size) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(size, "size");
return of(ApiUtils.validateMillisecondDuration(size, msgPrefix));
}
/**
* Return a window definition with the original size, but advance ("hop") the window by the given interval, which
* specifies by how much a window moves forward relative to the previous one.
* The time interval represented by the N-th window is: {@code [N * advance, N * advance + size)}.
* <p>
* This provides the semantics of hopping windows, which are fixed-sized, overlapping windows.
*
* @param advanceMs The advance interval ("hop") in milliseconds of the window, with the requirement that {@code 0 < advanceMs <= sizeMs}.
* @return a new window definition with default maintain duration of 1 day
* @throws IllegalArgumentException if the advance interval is negative, zero, or larger than the window size
* @deprecated Use {@link #advanceBy(Duration)} instead
*/
@Deprecated
public TimeWindows advanceBy(final long advanceMs) {
if (advanceMs <= 0 || advanceMs > sizeMs) {
throw new IllegalArgumentException(String.format("Window advancement interval should be more than zero " +
"and less than window duration which is %d ms, but given advancement interval is: %d ms", sizeMs, advanceMs));
}
return new TimeWindows(sizeMs, advanceMs, graceMs, maintainDurationMs, segments);
}
/**
* Return a window definition with the original size, but advance ("hop") the window by the given interval, which
* specifies by how much a window moves forward relative to the previous one.
* The time interval represented by the N-th window is: {@code [N * advance, N * advance + size)}.
* <p>
* This provides the semantics of hopping windows, which are fixed-sized, overlapping windows.
*
* @param advance The advance interval ("hop") of the window, with the requirement that {@code 0 < advance.toMillis() <= sizeMs}.
* @return a new window definition with default maintain duration of 1 day
* @throws IllegalArgumentException if the advance interval is negative, zero, or larger than the window size
*/
@SuppressWarnings("deprecation") // removing #advanceBy(final long advanceMs) will fix this
public TimeWindows advanceBy(final Duration advance) {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(advance, "advance");
return advanceBy(ApiUtils.validateMillisecondDuration(advance, msgPrefix));
}
@Override
public Map<Long, TimeWindow> windowsFor(final long timestamp) {
long windowStart = (Math.max(0, timestamp - sizeMs + advanceMs) / advanceMs) * advanceMs;
final Map<Long, TimeWindow> windows = new LinkedHashMap<>();
while (windowStart <= timestamp) {
final TimeWindow window = new TimeWindow(windowStart, windowStart + sizeMs);
windows.put(windowStart, window);
windowStart += advanceMs;
}
return windows;
}
@Override
public long size() {
return sizeMs;
}
/**
* Reject out-of-order events that arrive more than {@code millisAfterWindowEnd}
* after the end of its window.
* <p>
* Delay is defined as (stream_time - record_timestamp).
*
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
* @return this updated builder
* @throws IllegalArgumentException if {@code afterWindowEnd} is negative or can't be represented as {@code long milliseconds}
*/
@SuppressWarnings("deprecation") // will be fixed when we remove segments from Windows
public TimeWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
if (afterWindowEndMs < 0) {
throw new IllegalArgumentException("Grace period must not be negative.");
}
return new TimeWindows(sizeMs, advanceMs, afterWindowEndMs, maintainDurationMs, segments);
}
@SuppressWarnings("deprecation") // continuing to support Windows#maintainMs/segmentInterval in fallback mode
@Override
public long gracePeriodMs() {
// NOTE: in the future, when we remove maintainMs,
// we should default the grace period to 24h to maintain the default behavior,
// or we can default to (24h - size) if you want to be super accurate.
return graceMs != -1 ? graceMs : maintainMs() - size();
}
/**
* @param durationMs the window retention time
* @return itself
* @throws IllegalArgumentException if {@code duration} is smaller than the window size
*
* @deprecated since 2.1. Use {@link Materialized#retention} or directly configure the retention in a store supplier
* and use {@link Materialized#as(WindowBytesStoreSupplier)}.
*/
@Override
@Deprecated
public TimeWindows until(final long durationMs) throws IllegalArgumentException {
if (durationMs < sizeMs) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than the window size.");
}
return new TimeWindows(sizeMs, advanceMs, graceMs, durationMs, segments);
}
/**
* {@inheritDoc}
* <p>
* For {@code TimeWindows} the maintain duration is at least as small as the window size.
*
* @return the window maintain duration
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
*/
@Override
@Deprecated
public long maintainMs() {
return Math.max(maintainDurationMs, sizeMs);
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final TimeWindows that = (TimeWindows) o;
return maintainDurationMs == that.maintainDurationMs &&
segments == that.segments &&
sizeMs == that.sizeMs &&
advanceMs == that.advanceMs &&
graceMs == that.graceMs;
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public int hashCode() {
return Objects.hash(maintainDurationMs, segments, sizeMs, advanceMs, graceMs);
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public String toString() {
return "TimeWindows{" +
"maintainDurationMs=" + maintainDurationMs +
", sizeMs=" + sizeMs +
", advanceMs=" + advanceMs +
", graceMs=" + graceMs +
", segments=" + segments +
'}';
}
}

View File

@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import java.time.Duration;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.processor.Punctuator;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.To;
/**
* The {@code Transformer} interface is for stateful mapping of an input record to zero, one, or multiple new output
* records (both key and value type can be altered arbitrarily).
* This is a stateful record-by-record operation, i.e, {@link #transform(Object, Object)} is invoked individually for
* each record of a stream and can access and modify a state that is available beyond a single call of
* {@link #transform(Object, Object)} (cf. {@link KeyValueMapper} for stateless record transformation).
* Additionally, this {@code Transformer} can {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule}
* a method to be {@link Punctuator#punctuate(long) called periodically} with the provided context.
* <p>
* Use {@link TransformerSupplier} to provide new instances of {@code Transformer} to Kafka Stream's runtime.
* <p>
* If only a record's value should be modified {@link ValueTransformer} can be used.
*
* @param <K> key type
* @param <V> value type
* @param <R> {@link KeyValue} return type (both key and value type can be set
* arbitrarily)
* @see TransformerSupplier
* @see KStream#transform(TransformerSupplier, String...)
* @see ValueTransformer
* @see KStream#map(KeyValueMapper)
* @see KStream#flatMap(KeyValueMapper)
*/
public interface Transformer<K, V, R> {
/**
* Initialize this transformer.
* This is called once per instance when the topology gets initialized.
* When the framework is done with the transformer, {@link #close()} will be called on it; the
* framework may later re-use the transformer by calling {@link #init(ProcessorContext)} again.
* <p>
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
* <p>
* Note, that {@link ProcessorContext} is updated in the background with the current record's meta data.
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object, Object)}.
*
* @param context the context
*/
void init(final ProcessorContext context);
/**
* Transform the record with the given key and value.
* Additionally, any {@link StateStore state} that is {@link KStream#transform(TransformerSupplier, String...)
* attached} to this operator can be accessed and modified
* arbitrarily (cf. {@link ProcessorContext#getStateStore(String)}).
* <p>
* If only one record should be forward downstream, {@code transform} can return a new {@link KeyValue}. If
* more than one output record should be forwarded downstream, {@link ProcessorContext#forward(Object, Object)}
* and {@link ProcessorContext#forward(Object, Object, To)} can be used.
* If no record should be forwarded downstream, {@code transform} can return {@code null}.
*
* Note that returning a new {@link KeyValue} is merely for convenience. The same can be achieved by using
* {@link ProcessorContext#forward(Object, Object)} and returning {@code null}.
*
* @param key the key for the record
* @param value the value for the record
* @return new {@link KeyValue} pair&mdash;if {@code null} no key-value pair will
* be forwarded to down stream
*/
R transform(final K key, final V value);
/**
* Close this transformer and clean up any resources. The framework may
* later re-use this transformer by calling {@link #init(ProcessorContext)} on it again.
* <p>
* To generate new {@link KeyValue} pairs {@link ProcessorContext#forward(Object, Object)} and
* {@link ProcessorContext#forward(Object, Object, To)} can be used.
*/
void close();
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* A {@code TransformerSupplier} interface which can create one or more {@link Transformer} instances.
*
* @param <K> key type
* @param <V> value type
* @param <R> {@link org.apache.kafka.streams.KeyValue KeyValue} return type (both key and value type can be set
* arbitrarily)
* @see Transformer
* @see KStream#transform(TransformerSupplier, String...)
* @see ValueTransformer
* @see ValueTransformerSupplier
* @see KStream#transformValues(ValueTransformerSupplier, String...)
*/
public interface TransformerSupplier<K, V, R> {
/**
* Return a new {@link Transformer} instance.
*
* @return a new {@link Transformer} instance
*/
Transformer<K, V, R> get();
}

View File

@@ -0,0 +1,173 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.kstream.internals.UnlimitedWindow;
import org.apache.kafka.streams.processor.TimestampExtractor;
import java.time.Instant;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
/**
* The unlimited window specifications used for aggregations.
* <p>
* An unlimited time window is also called landmark window.
* It has a fixed starting point while its window end is defined as infinite.
* With this regard, it is a fixed-size window with infinite window size.
* <p>
* For time semantics, see {@link TimestampExtractor}.
*
* @see TimeWindows
* @see SessionWindows
* @see JoinWindows
* @see KGroupedStream#windowedBy(Windows)
* @see TimestampExtractor
*/
public final class UnlimitedWindows extends Windows<UnlimitedWindow> {
private static final long DEFAULT_START_TIMESTAMP_MS = 0L;
/** The start timestamp of the window. */
@SuppressWarnings("WeakerAccess")
public final long startMs;
private UnlimitedWindows(final long startMs) {
this.startMs = startMs;
}
/**
* Return an unlimited window starting at timestamp zero.
*/
public static UnlimitedWindows of() {
return new UnlimitedWindows(DEFAULT_START_TIMESTAMP_MS);
}
/**
* Return a new unlimited window for the specified start timestamp.
*
* @param startMs the window start time
* @return a new unlimited window that starts at {@code startMs}
* @throws IllegalArgumentException if the start time is negative
* @deprecated Use {@link #startOn(Instant)} instead
*/
@Deprecated
public UnlimitedWindows startOn(final long startMs) throws IllegalArgumentException {
if (startMs < 0) {
throw new IllegalArgumentException("Window start time (startMs) cannot be negative.");
}
return new UnlimitedWindows(startMs);
}
/**
* Return a new unlimited window for the specified start timestamp.
*
* @param start the window start time
* @return a new unlimited window that starts at {@code start}
* @throws IllegalArgumentException if the start time is negative or can't be represented as {@code long milliseconds}
*/
public UnlimitedWindows startOn(final Instant start) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(start, "start");
return startOn(ApiUtils.validateMillisecondInstant(start, msgPrefix));
}
@Override
public Map<Long, UnlimitedWindow> windowsFor(final long timestamp) {
// always return the single unlimited window
// we cannot use Collections.singleMap since it does not support remove()
final Map<Long, UnlimitedWindow> windows = new HashMap<>();
if (timestamp >= startMs) {
windows.put(startMs, new UnlimitedWindow(startMs));
}
return windows;
}
/**
* {@inheritDoc}
* As unlimited windows have conceptually infinite size, this methods just returns {@link Long#MAX_VALUE}.
*
* @return the size of the specified windows which is {@link Long#MAX_VALUE}
*/
@Override
public long size() {
return Long.MAX_VALUE;
}
/**
* Throws an {@link IllegalArgumentException} because the retention time for unlimited windows is always infinite
* and cannot be changed.
*
* @throws IllegalArgumentException on every invocation.
* @deprecated since 2.1.
*/
@Override
@Deprecated
public UnlimitedWindows until(final long durationMs) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be set for UnlimitedWindows.");
}
/**
* {@inheritDoc}
* The retention time for unlimited windows in infinite and thus represented as {@link Long#MAX_VALUE}.
*
* @return the window retention time that is {@link Long#MAX_VALUE}
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
*/
@Override
@Deprecated
public long maintainMs() {
return Long.MAX_VALUE;
}
@Override
public long gracePeriodMs() {
return 0L;
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final UnlimitedWindows that = (UnlimitedWindows) o;
return startMs == that.startMs && segments == that.segments;
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public int hashCode() {
return Objects.hash(startMs, segments);
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public String toString() {
return "UnlimitedWindows{" +
"startMs=" + startMs +
", segments=" + segments +
'}';
}
}

View File

@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code ValueJoiner} interface for joining two values into a new value of arbitrary type.
* This is a stateless operation, i.e, {@link #apply(Object, Object)} is invoked individually for each joining
* record-pair of a {@link KStream}-{@link KStream}, {@link KStream}-{@link KTable}, or {@link KTable}-{@link KTable}
* join.
*
* @param <V1> first value type
* @param <V2> second value type
* @param <VR> joined value type
* @see KStream#join(KStream, ValueJoiner, JoinWindows)
* @see KStream#join(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows)
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows)
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#join(KTable, ValueJoiner)
* @see KStream#join(KTable, ValueJoiner, Joined)
* @see KStream#leftJoin(KTable, ValueJoiner)
* @see KStream#leftJoin(KTable, ValueJoiner, Joined)
* @see KTable#join(KTable, ValueJoiner)
* @see KTable#leftJoin(KTable, ValueJoiner)
* @see KTable#outerJoin(KTable, ValueJoiner)
*/
public interface ValueJoiner<V1, V2, VR> {
/**
* Return a joined value consisting of {@code value1} and {@code value2}.
*
* @param value1 the first value for joining
* @param value2 the second value for joining
* @return the joined value
*/
VR apply(final V1 value1, final V2 value2);
}

View File

@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code ValueMapper} interface for mapping a value to a new value of arbitrary type.
* This is a stateless record-by-record operation, i.e, {@link #apply(Object)} is invoked individually for each record
* of a stream (cf. {@link ValueTransformer} for stateful value transformation).
* If {@code ValueMapper} is applied to a {@link org.apache.kafka.streams.KeyValue key-value pair} record the record's
* key is preserved.
* If a record's key and value should be modified {@link KeyValueMapper} can be used.
*
* @param <V> value type
* @param <VR> mapped value type
* @see KeyValueMapper
* @see ValueTransformer
* @see ValueTransformerWithKey
* @see KStream#mapValues(ValueMapper)
* @see KStream#mapValues(ValueMapperWithKey)
* @see KStream#flatMapValues(ValueMapper)
* @see KStream#flatMapValues(ValueMapperWithKey)
* @see KTable#mapValues(ValueMapper)
* @see KTable#mapValues(ValueMapperWithKey)
*/
public interface ValueMapper<V, VR> {
/**
* Map the given value to a new value.
*
* @param value the value to be mapped
* @return the new value
*/
VR apply(final V value);
}

View File

@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code ValueMapperWithKey} interface for mapping a value to a new value of arbitrary type.
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
* record of a stream (cf. {@link ValueTransformer} for stateful value transformation).
* If {@code ValueMapperWithKey} is applied to a {@link org.apache.kafka.streams.KeyValue key-value pair} record the
* record's key is preserved.
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
* If a record's key and value should be modified {@link KeyValueMapper} can be used.
*
* @param <K> key type
* @param <V> value type
* @param <VR> mapped value type
* @see KeyValueMapper
* @see ValueTransformer
* @see ValueTransformerWithKey
* @see KStream#mapValues(ValueMapper)
* @see KStream#mapValues(ValueMapperWithKey)
* @see KStream#flatMapValues(ValueMapper)
* @see KStream#flatMapValues(ValueMapperWithKey)
* @see KTable#mapValues(ValueMapper)
* @see KTable#mapValues(ValueMapperWithKey)
*/
public interface ValueMapperWithKey<K, V, VR> {
/**
* Map the given [key and ]value to a new value.
*
* @param readOnlyKey the read-only key
* @param value the value to be mapped
* @return the new value
*/
VR apply(final K readOnlyKey, final V value);
}

View File

@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import java.time.Duration;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.processor.Punctuator;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.To;
/**
* The {@code ValueTransformer} interface for stateful mapping of a value to a new value (with possible new type).
* This is a stateful record-by-record operation, i.e, {@link #transform(Object)} is invoked individually for each
* record of a stream and can access and modify a state that is available beyond a single call of
* {@link #transform(Object)} (cf. {@link ValueMapper} for stateless value transformation).
* Additionally, this {@code ValueTransformer} can {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule}
* a method to be {@link Punctuator#punctuate(long) called periodically} with the provided context.
* If {@code ValueTransformer} is applied to a {@link KeyValue} pair record the record's key is preserved.
* <p>
* Use {@link ValueTransformerSupplier} to provide new instances of {@code ValueTransformer} to Kafka Stream's runtime.
* <p>
* If a record's key and value should be modified {@link Transformer} can be used.
*
* @param <V> value type
* @param <VR> transformed value type
* @see ValueTransformerSupplier
* @see ValueTransformerWithKeySupplier
* @see KStream#transformValues(ValueTransformerSupplier, String...)
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* @see Transformer
*/
public interface ValueTransformer<V, VR> {
/**
* Initialize this transformer.
* This is called once per instance when the topology gets initialized.
* When the framework is done with the transformer, {@link #close()} will be called on it; the
* framework may later re-use the transformer by calling {@link #init(ProcessorContext)} again.
* <p>
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
* <p>
* Note that {@link ProcessorContext} is updated in the background with the current record's meta data.
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object)}.
* <p>
* Note that using {@link ProcessorContext#forward(Object, Object)} or
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within any method of
* {@code ValueTransformer} and will result in an {@link StreamsException exception}.
*
* @param context the context
* @throws IllegalStateException If store gets registered after initialization is already finished
* @throws StreamsException if the store's change log does not contain the partition
*/
void init(final ProcessorContext context);
/**
* Transform the given value to a new value.
* Additionally, any {@link StateStore} that is {@link KStream#transformValues(ValueTransformerSupplier, String...)
* attached} to this operator can be accessed and modified arbitrarily (cf.
* {@link ProcessorContext#getStateStore(String)}).
* <p>
* Note, that using {@link ProcessorContext#forward(Object, Object)} or
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within {@code transform} and
* will result in an {@link StreamsException exception}.
*
* @param value the value to be transformed
* @return the new value
*/
VR transform(final V value);
/**
* Close this transformer and clean up any resources. The framework may
* later re-use this transformer by calling {@link #init(ProcessorContext)} on it again.
* <p>
* It is not possible to return any new output records within {@code close()}.
* Using {@link ProcessorContext#forward(Object, Object)} or {@link ProcessorContext#forward(Object, Object, To)}
* will result in an {@link StreamsException exception}.
*/
void close();
}

View File

@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* A {@code ValueTransformerSupplier} interface which can create one or more {@link ValueTransformer} instances.
*
* @param <V> value type
* @param <VR> transformed value type
* @see ValueTransformer
* @see ValueTransformerWithKey
* @see ValueTransformerWithKeySupplier
* @see KStream#transformValues(ValueTransformerSupplier, String...)
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* @see Transformer
* @see TransformerSupplier
* @see KStream#transform(TransformerSupplier, String...)
*/
public interface ValueTransformerSupplier<V, VR> {
/**
* Return a new {@link ValueTransformer} instance.
*
* @return a new {@link ValueTransformer} instance.
*/
ValueTransformer<V, VR> get();
}

View File

@@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import java.time.Duration;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.processor.Punctuator;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.To;
/**
* The {@code ValueTransformerWithKey} interface for stateful mapping of a value to a new value (with possible new type).
* This is a stateful record-by-record operation, i.e, {@link #transform(Object, Object)} is invoked individually for each
* record of a stream and can access and modify a state that is available beyond a single call of
* {@link #transform(Object, Object)} (cf. {@link ValueMapper} for stateless value transformation).
* Additionally, this {@code ValueTransformerWithKey} can
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} with the provided context.
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
* If {@code ValueTransformerWithKey} is applied to a {@link KeyValue} pair record the record's key is preserved.
* <p>
* Use {@link ValueTransformerWithKeySupplier} to provide new instances of {@link ValueTransformerWithKey} to
* Kafka Stream's runtime.
* <p>
* If a record's key and value should be modified {@link Transformer} can be used.
*
* @param <K> key type
* @param <V> value type
* @param <VR> transformed value type
* @see ValueTransformer
* @see ValueTransformerWithKeySupplier
* @see KStream#transformValues(ValueTransformerSupplier, String...)
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* @see Transformer
*/
public interface ValueTransformerWithKey<K, V, VR> {
/**
* Initialize this transformer.
* This is called once per instance when the topology gets initialized.
* <p>
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
* <p>
* Note that {@link ProcessorContext} is updated in the background with the current record's meta data.
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object, Object)}.
* <p>
* Note that using {@link ProcessorContext#forward(Object, Object)} or
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within any method of
* {@code ValueTransformerWithKey} and will result in an {@link StreamsException exception}.
*
* @param context the context
* @throws IllegalStateException If store gets registered after initialization is already finished
* @throws StreamsException if the store's change log does not contain the partition
*/
void init(final ProcessorContext context);
/**
* Transform the given [key and ]value to a new value.
* Additionally, any {@link StateStore} that is {@link KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* attached} to this operator can be accessed and modified arbitrarily (cf.
* {@link ProcessorContext#getStateStore(String)}).
* <p>
* Note, that using {@link ProcessorContext#forward(Object, Object)} or
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within {@code transform} and
* will result in an {@link StreamsException exception}.
*
* @param readOnlyKey the read-only key
* @param value the value to be transformed
* @return the new value
*/
VR transform(final K readOnlyKey, final V value);
/**
* Close this processor and clean up any resources.
* <p>
* It is not possible to return any new output records within {@code close()}.
* Using {@link ProcessorContext#forward(Object, Object)} or {@link ProcessorContext#forward(Object, Object, To)},
* will result in an {@link StreamsException exception}.
*/
void close();
}

View File

@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* @param <K> key type
* @param <V> value type
* @param <VR> transformed value type
* @see ValueTransformer
* @see ValueTransformerWithKey
* @see KStream#transformValues(ValueTransformerSupplier, String...)
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* @see Transformer
* @see TransformerSupplier
* @see KStream#transform(TransformerSupplier, String...)
*/
public interface ValueTransformerWithKeySupplier<K, V, VR> {
ValueTransformerWithKey<K, V, VR> get();
}

View File

@@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.processor.TimestampExtractor;
import java.time.Instant;
/**
* A single window instance, defined by its start and end timestamp.
* {@code Window} is agnostic if start/end boundaries are inclusive or exclusive; this is defined by concrete
* window implementations.
* <p>
* To specify how {@code Window} boundaries are defined use {@link Windows}.
* For time semantics, see {@link TimestampExtractor}.
*
* @see Windows
* @see org.apache.kafka.streams.kstream.internals.TimeWindow
* @see org.apache.kafka.streams.kstream.internals.SessionWindow
* @see org.apache.kafka.streams.kstream.internals.UnlimitedWindow
* @see TimestampExtractor
*/
public abstract class Window {
protected final long startMs;
protected final long endMs;
private final Instant startTime;
private final Instant endTime;
/**
* Create a new window for the given start and end time.
*
* @param startMs the start timestamp of the window
* @param endMs the end timestamp of the window
* @throws IllegalArgumentException if {@code startMs} is negative or if {@code endMs} is smaller than {@code startMs}
*/
public Window(final long startMs, final long endMs) throws IllegalArgumentException {
if (startMs < 0) {
throw new IllegalArgumentException("Window startMs time cannot be negative.");
}
if (endMs < startMs) {
throw new IllegalArgumentException("Window endMs time cannot be smaller than window startMs time.");
}
this.startMs = startMs;
this.endMs = endMs;
this.startTime = Instant.ofEpochMilli(startMs);
this.endTime = Instant.ofEpochMilli(endMs);
}
/**
* Return the start timestamp of this window.
*
* @return The start timestamp of this window.
*/
public long start() {
return startMs;
}
/**
* Return the end timestamp of this window.
*
* @return The end timestamp of this window.
*/
public long end() {
return endMs;
}
/**
* Return the start time of this window.
*
* @return The start time of this window.
*/
public Instant startTime() {
return startTime;
}
/**
* Return the end time of this window.
*
* @return The end time of this window.
*/
public Instant endTime() {
return endTime;
}
/**
* Check if the given window overlaps with this window.
* Should throw an {@link IllegalArgumentException} if the {@code other} window has a different type than {@code
* this} window.
*
* @param other another window of the same type
* @return {@code true} if {@code other} overlaps with this window&mdash;{@code false} otherwise
*/
public abstract boolean overlap(final Window other);
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final Window other = (Window) obj;
return startMs == other.startMs && endMs == other.endMs;
}
@Override
public int hashCode() {
return (int) (((startMs << 32) | endMs) % 0xFFFFFFFFL);
}
@Override
public String toString() {
return "Window{" +
"startMs=" + startMs +
", endMs=" + endMs +
'}';
}
}

View File

@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The result key type of a windowed stream aggregation.
* <p>
* If a {@link KStream} gets grouped and aggregated using a window-aggregation the resulting {@link KTable} is a
* so-called "windowed {@link KTable}" with a combined key type that encodes the corresponding aggregation window and
* the original record key.
* Thus, a windowed {@link KTable} has type {@code <Windowed<K>,V>}.
*
* @param <K> type of the key
* @see KGroupedStream#windowedBy(Windows)
* @see KGroupedStream#windowedBy(SessionWindows)
*/
public class Windowed<K> {
private final K key;
private final Window window;
public Windowed(final K key, final Window window) {
this.key = key;
this.window = window;
}
/**
* Return the key of the window.
*
* @return the key of the window
*/
public K key() {
return key;
}
/**
* Return the window containing the values associated with this key.
*
* @return the window containing the values
*/
public Window window() {
return window;
}
@Override
public String toString() {
return "[" + key + "@" + window.start() + "/" + window.end() + "]";
}
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof Windowed)) {
return false;
}
final Windowed<?> that = (Windowed) obj;
return window.equals(that.window) && key.equals(that.key);
}
@Override
public int hashCode() {
final long n = ((long) window.hashCode() << 32) | key.hashCode();
return (int) (n % 0xFFFFFFFFL);
}
}

View File

@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.serialization.Serializer;
public class WindowedSerdes {
static public class TimeWindowedSerde<T> extends Serdes.WrapperSerde<Windowed<T>> {
// Default constructor needed for reflection object creation
public TimeWindowedSerde() {
super(new TimeWindowedSerializer<>(), new TimeWindowedDeserializer<>());
}
public TimeWindowedSerde(final Serde<T> inner) {
super(new TimeWindowedSerializer<>(inner.serializer()), new TimeWindowedDeserializer<>(inner.deserializer()));
}
// This constructor can be used for serialize/deserialize a windowed topic
public TimeWindowedSerde(final Serde<T> inner, final long windowSize) {
super(new TimeWindowedSerializer<>(inner.serializer()), new TimeWindowedDeserializer<>(inner.deserializer(), windowSize));
}
// Helper method for users to specify whether the input topic is a changelog topic for deserializing the key properly.
public TimeWindowedSerde<T> forChangelog(final boolean isChangelogTopic) {
final TimeWindowedDeserializer deserializer = (TimeWindowedDeserializer) this.deserializer();
deserializer.setIsChangelogTopic(isChangelogTopic);
return this;
}
}
static public class SessionWindowedSerde<T> extends Serdes.WrapperSerde<Windowed<T>> {
// Default constructor needed for reflection object creation
public SessionWindowedSerde() {
super(new SessionWindowedSerializer<>(), new SessionWindowedDeserializer<>());
}
public SessionWindowedSerde(final Serde<T> inner) {
super(new SessionWindowedSerializer<>(inner.serializer()), new SessionWindowedDeserializer<>(inner.deserializer()));
}
}
/**
* Construct a {@code TimeWindowedSerde} object for the specified inner class type.
*/
static public <T> Serde<Windowed<T>> timeWindowedSerdeFrom(final Class<T> type) {
return new TimeWindowedSerde<>(Serdes.serdeFrom(type));
}
/**
* Construct a {@code TimeWindowedSerde} object to deserialize changelog topic
* for the specified inner class type and window size.
*/
static public <T> Serde<Windowed<T>> timeWindowedSerdeFrom(final Class<T> type, final long windowSize) {
return new TimeWindowedSerde<>(Serdes.serdeFrom(type), windowSize);
}
/**
* Construct a {@code SessionWindowedSerde} object for the specified inner class type.
*/
static public <T> Serde<Windowed<T>> sessionWindowedSerdeFrom(final Class<T> type) {
return new SessionWindowedSerde<>(Serdes.serdeFrom(type));
}
static void verifyInnerSerializerNotNull(final Serializer inner,
final Serializer wrapper) {
if (inner == null) {
throw new NullPointerException("Inner serializer is `null`. " +
"User code must use constructor `" + wrapper.getClass().getSimpleName() + "(final Serializer<T> inner)` " +
"instead of the no-arg constructor.");
}
}
static void verifyInnerDeserializerNotNull(final Deserializer inner,
final Deserializer wrapper) {
if (inner == null) {
throw new NullPointerException("Inner deserializer is `null`. " +
"User code must use constructor `" + wrapper.getClass().getSimpleName() + "(final Deserializer<T> inner)` " +
"instead of the no-arg constructor.");
}
}
}

View File

@@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
import java.time.Duration;
import java.util.Map;
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
/**
* The window specification for fixed size windows that is used to define window boundaries and grace period.
* <p>
* Grace period defines how long to wait on out-of-order events. That is, windows will continue to accept new records until {@code stream_time >= window_end + grace_period}.
* Records that arrive after the grace period passed are considered <em>late</em> and will not be processed but are dropped.
* <p>
* Warning: It may be unsafe to use objects of this class in set- or map-like collections,
* since the equals and hashCode methods depend on mutable fields.
*
* @param <W> type of the window instance
* @see TimeWindows
* @see UnlimitedWindows
* @see JoinWindows
* @see SessionWindows
* @see TimestampExtractor
*/
public abstract class Windows<W extends Window> {
private long maintainDurationMs = DEFAULT_RETENTION_MS;
@Deprecated public int segments = 3;
protected Windows() {}
@Deprecated // remove this constructor when we remove segments.
Windows(final int segments) {
this.segments = segments;
}
/**
* Set the window maintain duration (retention time) in milliseconds.
* This retention time is a guaranteed <i>lower bound</i> for how long a window will be maintained.
*
* @param durationMs the window retention time in milliseconds
* @return itself
* @throws IllegalArgumentException if {@code durationMs} is negative
* @deprecated since 2.1. Use {@link Materialized#withRetention(Duration)}
* or directly configure the retention in a store supplier and use {@link Materialized#as(WindowBytesStoreSupplier)}.
*/
@Deprecated
public Windows<W> until(final long durationMs) throws IllegalArgumentException {
if (durationMs < 0) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be negative.");
}
maintainDurationMs = durationMs;
return this;
}
/**
* Return the window maintain duration (retention time) in milliseconds.
*
* @return the window maintain duration
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
*/
@Deprecated
public long maintainMs() {
return maintainDurationMs;
}
/**
* Set the number of segments to be used for rolling the window store.
* This function is not exposed to users but can be called by developers that extend this class.
*
* @param segments the number of segments to be used
* @return itself
* @throws IllegalArgumentException if specified segments is small than 2
* @deprecated since 2.1 Override segmentInterval() instead.
*/
@Deprecated
protected Windows<W> segments(final int segments) throws IllegalArgumentException {
if (segments < 2) {
throw new IllegalArgumentException("Number of segments must be at least 2.");
}
this.segments = segments;
return this;
}
/**
* Create all windows that contain the provided timestamp, indexed by non-negative window start timestamps.
*
* @param timestamp the timestamp window should get created for
* @return a map of {@code windowStartTimestamp -> Window} entries
*/
public abstract Map<Long, W> windowsFor(final long timestamp);
/**
* Return the size of the specified windows in milliseconds.
*
* @return the size of the specified windows
*/
public abstract long size();
/**
* Return the window grace period (the time to admit
* out-of-order events after the end of the window.)
*
* Delay is defined as (stream_time - record_timestamp).
*/
public abstract long gracePeriodMs();
}

View File

@@ -0,0 +1,139 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.ValueJoiner;
import org.apache.kafka.streams.kstream.ValueMapper;
import org.apache.kafka.streams.kstream.ValueMapperWithKey;
import org.apache.kafka.streams.kstream.ValueTransformer;
import org.apache.kafka.streams.kstream.ValueTransformerSupplier;
import org.apache.kafka.streams.kstream.ValueTransformerWithKey;
import org.apache.kafka.streams.kstream.ValueTransformerWithKeySupplier;
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
import java.util.Collection;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
/*
* Any classes (KTable, KStream, etc) extending this class should follow the serde specification precedence ordering as:
*
* 1) Overridden values via control objects (e.g. Materialized, Serialized, Consumed, etc)
* 2) Serdes that can be inferred from the operator itself (e.g. groupBy().count(), where value serde can default to `LongSerde`).
* 3) Serde inherited from parent operator if possible (note if the key / value types have been changed, then the corresponding serde cannot be inherited).
* 4) Default serde specified in the config.
*/
public abstract class AbstractStream<K, V> {
protected final String name;
protected final Serde<K> keySerde;
protected final Serde<V> valSerde;
protected final Set<String> subTopologySourceNodes;
protected final StreamsGraphNode streamsGraphNode;
protected final InternalStreamsBuilder builder;
// This copy-constructor will allow to extend KStream
// and KTable APIs with new methods without impacting the public interface.
public AbstractStream(final AbstractStream<K, V> stream) {
this.name = stream.name;
this.builder = stream.builder;
this.keySerde = stream.keySerde;
this.valSerde = stream.valSerde;
this.subTopologySourceNodes = stream.subTopologySourceNodes;
this.streamsGraphNode = stream.streamsGraphNode;
}
AbstractStream(final String name,
final Serde<K> keySerde,
final Serde<V> valSerde,
final Set<String> subTopologySourceNodes,
final StreamsGraphNode streamsGraphNode,
final InternalStreamsBuilder builder) {
if (subTopologySourceNodes == null || subTopologySourceNodes.isEmpty()) {
throw new IllegalArgumentException("parameter <sourceNodes> must not be null or empty");
}
this.name = name;
this.builder = builder;
this.keySerde = keySerde;
this.valSerde = valSerde;
this.subTopologySourceNodes = subTopologySourceNodes;
this.streamsGraphNode = streamsGraphNode;
}
// This method allows to expose the InternalTopologyBuilder instance
// to subclasses that extend AbstractStream class.
protected InternalTopologyBuilder internalTopologyBuilder() {
return builder.internalTopologyBuilder;
}
Set<String> ensureCopartitionWith(final Collection<? extends AbstractStream<K, ?>> otherStreams) {
final Set<String> allSourceNodes = new HashSet<>(subTopologySourceNodes);
for (final AbstractStream<K, ?> other: otherStreams) {
allSourceNodes.addAll(other.subTopologySourceNodes);
}
builder.internalTopologyBuilder.copartitionSources(allSourceNodes);
return allSourceNodes;
}
static <T2, T1, R> ValueJoiner<T2, T1, R> reverseJoiner(final ValueJoiner<T1, T2, R> joiner) {
return (value2, value1) -> joiner.apply(value1, value2);
}
static <K, V, VR> ValueMapperWithKey<K, V, VR> withKey(final ValueMapper<V, VR> valueMapper) {
Objects.requireNonNull(valueMapper, "valueMapper can't be null");
return (readOnlyKey, value) -> valueMapper.apply(value);
}
static <K, V, VR> ValueTransformerWithKeySupplier<K, V, VR> toValueTransformerWithKeySupplier(
final ValueTransformerSupplier<V, VR> valueTransformerSupplier) {
Objects.requireNonNull(valueTransformerSupplier, "valueTransformerSupplier can't be null");
return () -> {
final ValueTransformer<V, VR> valueTransformer = valueTransformerSupplier.get();
return new ValueTransformerWithKey<K, V, VR>() {
@Override
public void init(final ProcessorContext context) {
valueTransformer.init(context);
}
@Override
public VR transform(final K readOnlyKey, final V value) {
return valueTransformer.transform(value);
}
@Override
public void close() {
valueTransformer.close();
}
};
};
}
// for testing only
public Serde<K> keySerde() {
return keySerde;
}
public Serde<V> valueSerde() {
return valSerde;
}
}

View File

@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import java.util.Objects;
public class Change<T> {
public final T newValue;
public final T oldValue;
public Change(final T newValue, final T oldValue) {
this.newValue = newValue;
this.oldValue = oldValue;
}
@Override
public String toString() {
return "(" + newValue + "<-" + oldValue + ")";
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final Change<?> change = (Change<?>) o;
return Objects.equals(newValue, change.newValue) &&
Objects.equals(oldValue, change.oldValue);
}
@Override
public int hashCode() {
return Objects.hash(newValue, oldValue);
}
}

Some files were not shown because too many files have changed in this diff Show More