Add km module kafka

This commit is contained in:
leewei
2023-02-14 14:57:39 +08:00
parent 229140f067
commit 469baad65b
4310 changed files with 736354 additions and 46204 deletions

View File

@@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.clients.admin.Admin;
import org.apache.kafka.clients.admin.AdminClient;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.streams.kstream.GlobalKTable;
import org.apache.kafka.streams.processor.StateStore;
import java.util.Map;
/**
* {@code KafkaClientSupplier} can be used to provide custom Kafka clients to a {@link KafkaStreams} instance.
*
* @see KafkaStreams#KafkaStreams(Topology, java.util.Properties, KafkaClientSupplier)
*/
public interface KafkaClientSupplier {
/**
* Create an {@link AdminClient} which is used for internal topic management.
*
* @param config Supplied by the {@link java.util.Properties} given to the {@link KafkaStreams}
* @return an instance of {@link AdminClient}
* @deprecated Not called by Kafka Streams, which now uses {@link #getAdmin} instead.
*/
@Deprecated
default AdminClient getAdminClient(final Map<String, Object> config) {
throw new UnsupportedOperationException("Direct use of this method is deprecated. " +
"Implementations of KafkaClientSupplier should implement the getAdmin() method instead. " +
"The method will be removed in a future release.");
}
/**
* Create an {@link Admin} which is used for internal topic management.
*
* @param config Supplied by the {@link java.util.Properties} given to the {@link KafkaStreams}
* @return an instance of {@link Admin}
*/
@SuppressWarnings("deprecation")
default Admin getAdmin(final Map<String, Object> config) {
return getAdminClient(config);
}
/**
* Create a {@link Producer} which is used to write records to sink topics.
*
* @param config {@link StreamsConfig#getProducerConfigs(String) producer config} which is supplied by the
* {@link java.util.Properties} given to the {@link KafkaStreams} instance
* @return an instance of Kafka producer
*/
Producer<byte[], byte[]> getProducer(final Map<String, Object> config);
/**
* Create a {@link Consumer} which is used to read records of source topics.
*
* @param config {@link StreamsConfig#getMainConsumerConfigs(String, String, int) consumer config} which is
* supplied by the {@link java.util.Properties} given to the {@link KafkaStreams} instance
* @return an instance of Kafka consumer
*/
Consumer<byte[], byte[]> getConsumer(final Map<String, Object> config);
/**
* Create a {@link Consumer} which is used to read records to restore {@link StateStore}s.
*
* @param config {@link StreamsConfig#getRestoreConsumerConfigs(String) restore consumer config} which is supplied
* by the {@link java.util.Properties} given to the {@link KafkaStreams}
* @return an instance of Kafka consumer
*/
Consumer<byte[], byte[]> getRestoreConsumer(final Map<String, Object> config);
/**
* Create a {@link Consumer} which is used to consume records for {@link GlobalKTable}.
*
* @param config {@link StreamsConfig#getGlobalConsumerConfigs(String) global consumer config} which is supplied
* by the {@link java.util.Properties} given to the {@link KafkaStreams}
* @return an instance of Kafka consumer
*/
Consumer<byte[], byte[]> getGlobalConsumer(final Map<String, Object> config);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.streams.state.HostInfo;
import java.util.Collections;
import java.util.Objects;
import java.util.Set;
/**
* Represents all the metadata related to a key, where a particular key resides in a {@link KafkaStreams} application.
* It contains the active {@link HostInfo} and a set of standby {@link HostInfo}s, denoting the instances where the key resides.
* It also contains the partition number where the key belongs, which could be useful when used in conjunction with other APIs.
* e.g: Relating with lags for that store partition.
* NOTE: This is a point in time view. It may change as rebalances happen.
*/
public class KeyQueryMetadata {
/**
* Sentinel to indicate that the KeyQueryMetadata is currently unavailable. This can occur during rebalance
* operations.
*/
public static final KeyQueryMetadata NOT_AVAILABLE = new KeyQueryMetadata(new HostInfo("unavailable", -1),
Collections.emptySet(),
-1);
private final HostInfo activeHost;
private final Set<HostInfo> standbyHosts;
private final int partition;
public KeyQueryMetadata(final HostInfo activeHost, final Set<HostInfo> standbyHosts, final int partition) {
this.activeHost = activeHost;
this.standbyHosts = standbyHosts;
this.partition = partition;
}
/**
* Get the Active streams instance for given key
*
* @return active instance's {@link HostInfo}
*/
public HostInfo getActiveHost() {
return activeHost;
}
/**
* Get the Streams instances that host the key as standbys
*
* @return set of standby {@link HostInfo} or a empty set, if no standbys are configured
*/
public Set<HostInfo> getStandbyHosts() {
return standbyHosts;
}
/**
* Get the Store partition corresponding to the key.
*
* @return store partition number
*/
public int getPartition() {
return partition;
}
@Override
public boolean equals(final Object obj) {
if (!(obj instanceof KeyQueryMetadata)) {
return false;
}
final KeyQueryMetadata keyQueryMetadata = (KeyQueryMetadata) obj;
return Objects.equals(keyQueryMetadata.activeHost, activeHost)
&& Objects.equals(keyQueryMetadata.standbyHosts, standbyHosts)
&& Objects.equals(keyQueryMetadata.partition, partition);
}
@Override
public String toString() {
return "KeyQueryMetadata {" +
"activeHost=" + activeHost +
", standbyHosts=" + standbyHosts +
", partition=" + partition +
'}';
}
@Override
public int hashCode() {
return Objects.hash(activeHost, standbyHosts, partition);
}
}

View File

@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import java.util.Objects;
/**
* A key-value pair defined for a single Kafka Streams record.
* If the record comes directly from a Kafka topic then its key/value are defined as the message key/value.
*
* @param <K> Key type
* @param <V> Value type
*/
public class KeyValue<K, V> {
/** The key of the key-value pair. */
public final K key;
/** The value of the key-value pair. */
public final V value;
/**
* Create a new key-value pair.
*
* @param key the key
* @param value the value
*/
public KeyValue(final K key, final V value) {
this.key = key;
this.value = value;
}
/**
* Create a new key-value pair.
*
* @param key the key
* @param value the value
* @param <K> the type of the key
* @param <V> the type of the value
* @return a new key-value pair
*/
public static <K, V> KeyValue<K, V> pair(final K key, final V value) {
return new KeyValue<>(key, value);
}
@Override
public String toString() {
return "KeyValue(" + key + ", " + value + ")";
}
@Override
public boolean equals(final Object obj) {
if (this == obj) {
return true;
}
if (!(obj instanceof KeyValue)) {
return false;
}
final KeyValue other = (KeyValue) obj;
return Objects.equals(key, other.key) && Objects.equals(value, other.value);
}
@Override
public int hashCode() {
return Objects.hash(key, value);
}
}

View File

@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import java.util.Objects;
/**
* Encapsulates information about lag, at a store partition replica (active or standby). This information is constantly changing as the
* tasks process records and thus, they should be treated as simply instantaenous measure of lag.
*/
public class LagInfo {
private final long currentOffsetPosition;
private final long endOffsetPosition;
private final long offsetLag;
LagInfo(final long currentOffsetPosition, final long endOffsetPosition) {
this.currentOffsetPosition = currentOffsetPosition;
this.endOffsetPosition = endOffsetPosition;
this.offsetLag = Math.max(0, endOffsetPosition - currentOffsetPosition);
}
/**
* Get the current maximum offset on the store partition's changelog topic, that has been successfully written into
* the store partition's state store.
*
* @return current consume offset for standby/restoring store partitions & simply endoffset for active store partition replicas
*/
public long currentOffsetPosition() {
return this.currentOffsetPosition;
}
/**
* Get the end offset position for this store partition's changelog topic on the Kafka brokers.
*
* @return last offset written to the changelog topic partition
*/
public long endOffsetPosition() {
return this.endOffsetPosition;
}
/**
* Get the measured lag between current and end offset positions, for this store partition replica
*
* @return lag as measured by message offsets
*/
public long offsetLag() {
return this.offsetLag;
}
@Override
public boolean equals(final Object obj) {
if (!(obj instanceof LagInfo)) {
return false;
}
final LagInfo other = (LagInfo) obj;
return currentOffsetPosition == other.currentOffsetPosition
&& endOffsetPosition == other.endOffsetPosition
&& this.offsetLag == other.offsetLag;
}
@Override
public int hashCode() {
return Objects.hash(currentOffsetPosition, endOffsetPosition, offsetLag);
}
@Override
public String toString() {
return "LagInfo {" +
" currentOffsetPosition=" + currentOffsetPosition +
", endOffsetPosition=" + endOffsetPosition +
", offsetLag=" + offsetLag +
'}';
}
}

View File

@@ -0,0 +1,129 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.streams.state.QueryableStoreType;
import java.util.Objects;
/**
* {@code StoreQueryParameters} allows you to pass a variety of parameters when fetching a store for interactive query.
*/
public class StoreQueryParameters<T> {
private Integer partition;
private boolean staleStores;
private final String storeName;
private final QueryableStoreType<T> queryableStoreType;
private StoreQueryParameters(final String storeName, final QueryableStoreType<T> queryableStoreType, final Integer partition, final boolean staleStores) {
this.storeName = storeName;
this.queryableStoreType = queryableStoreType;
this.partition = partition;
this.staleStores = staleStores;
}
public static <T> StoreQueryParameters<T> fromNameAndType(final String storeName,
final QueryableStoreType<T> queryableStoreType) {
return new StoreQueryParameters<T>(storeName, queryableStoreType, null, false);
}
/**
* Set a specific partition that should be queried exclusively.
*
* @param partition The specific integer partition to be fetched from the stores list by using {@link StoreQueryParameters}.
*
* @return StoreQueryParameters a new {@code StoreQueryParameters} instance configured with the specified partition
*/
public StoreQueryParameters<T> withPartition(final Integer partition) {
return new StoreQueryParameters<T>(storeName, queryableStoreType, partition, staleStores);
}
/**
* Enable querying of stale state stores, i.e., allow to query active tasks during restore as well as standby tasks.
*
* @return StoreQueryParameters a new {@code StoreQueryParameters} instance configured with serving from stale stores enabled
*/
public StoreQueryParameters<T> enableStaleStores() {
return new StoreQueryParameters<T>(storeName, queryableStoreType, partition, true);
}
/**
* Get the name of the state store that should be queried.
*
* @return String state store name
*/
public String storeName() {
return storeName;
}
/**
* Get the queryable store type for which key is queried by the user.
*
* @return QueryableStoreType type of queryable store
*/
public QueryableStoreType<T> queryableStoreType() {
return queryableStoreType;
}
/**
* Get the store partition that will be queried.
* If the method returns {@code null}, it would mean that no specific partition has been requested,
* so all the local partitions for the store will be queried.
*
* @return Integer partition
*/
public Integer partition() {
return partition;
}
/**
* Get the flag staleStores. If {@code true}, include standbys and recovering stores along with running stores.
*
* @return boolean staleStores
*/
public boolean staleStoresEnabled() {
return staleStores;
}
@Override
public boolean equals(final Object obj) {
if (!(obj instanceof StoreQueryParameters)) {
return false;
}
final StoreQueryParameters storeQueryParameters = (StoreQueryParameters) obj;
return Objects.equals(storeQueryParameters.partition, partition)
&& Objects.equals(storeQueryParameters.staleStores, staleStores)
&& Objects.equals(storeQueryParameters.storeName, storeName)
&& Objects.equals(storeQueryParameters.queryableStoreType, queryableStoreType);
}
@Override
public String toString() {
return "StoreQueryParameters {" +
"partition=" + partition +
", staleStores=" + staleStores +
", storeName=" + storeName +
", queryableStoreType=" + queryableStoreType +
'}';
}
@Override
public int hashCode() {
return Objects.hash(partition, staleStores, storeName, queryableStoreType);
}
}

View File

@@ -0,0 +1,565 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.errors.TopologyException;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.GlobalKTable;
import org.apache.kafka.streams.kstream.KGroupedStream;
import org.apache.kafka.streams.kstream.KGroupedTable;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Transformer;
import org.apache.kafka.streams.kstream.ValueTransformer;
import org.apache.kafka.streams.kstream.internals.ConsumedInternal;
import org.apache.kafka.streams.kstream.internals.InternalStreamsBuilder;
import org.apache.kafka.streams.kstream.internals.MaterializedInternal;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
import org.apache.kafka.streams.processor.internals.ProcessorNode;
import org.apache.kafka.streams.processor.internals.SourceNode;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.StoreBuilder;
import java.util.Collection;
import java.util.Collections;
import java.util.Objects;
import java.util.Properties;
import java.util.regex.Pattern;
/**
* {@code StreamsBuilder} provide the high-level Kafka Streams DSL to specify a Kafka Streams topology.
*
* @see Topology
* @see KStream
* @see KTable
* @see GlobalKTable
*/
public class StreamsBuilder {
/** The actual topology that is constructed by this StreamsBuilder. */
private final Topology topology = new Topology();
/** The topology's internal builder. */
final InternalTopologyBuilder internalTopologyBuilder = topology.internalTopologyBuilder;
private final InternalStreamsBuilder internalStreamsBuilder = new InternalStreamsBuilder(internalTopologyBuilder);
/**
* Create a {@link KStream} from the specified topic.
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
* deserializers as specified in the {@link StreamsConfig config} are used.
* <p>
* If multiple topics are specified there is no ordering guarantee for records from different topics.
* <p>
* Note that the specified input topic must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topic the topic name; cannot be {@code null}
* @return a {@link KStream} for the specified topic
*/
public synchronized <K, V> KStream<K, V> stream(final String topic) {
return stream(Collections.singleton(topic));
}
/**
* Create a {@link KStream} from the specified topic.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* <p>
* Note that the specified input topic must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topic the topic names; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters
* @return a {@link KStream} for the specified topic
*/
public synchronized <K, V> KStream<K, V> stream(final String topic,
final Consumed<K, V> consumed) {
return stream(Collections.singleton(topic), consumed);
}
/**
* Create a {@link KStream} from the specified topics.
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
* deserializers as specified in the {@link StreamsConfig config} are used.
* <p>
* If multiple topics are specified there is no ordering guarantee for records from different topics.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topics the topic names; must contain at least one topic name
* @return a {@link KStream} for the specified topics
*/
public synchronized <K, V> KStream<K, V> stream(final Collection<String> topics) {
return stream(topics, Consumed.with(null, null, null, null));
}
/**
* Create a {@link KStream} from the specified topics.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* <p>
* If multiple topics are specified there is no ordering guarantee for records from different topics.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topics the topic names; must contain at least one topic name
* @param consumed the instance of {@link Consumed} used to define optional parameters
* @return a {@link KStream} for the specified topics
*/
public synchronized <K, V> KStream<K, V> stream(final Collection<String> topics,
final Consumed<K, V> consumed) {
Objects.requireNonNull(topics, "topics can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
return internalStreamsBuilder.stream(topics, new ConsumedInternal<>(consumed));
}
/**
* Create a {@link KStream} from the specified topic pattern.
* The default {@code "auto.offset.reset"} strategy, default {@link TimestampExtractor}, and default key and value
* deserializers as specified in the {@link StreamsConfig config} are used.
* <p>
* If multiple topics are matched by the specified pattern, the created {@link KStream} will read data from all of
* them and there is no ordering guarantee between records from different topics. This also means that the work
* will not be parallelized for multiple topics, and the number of tasks will scale with the maximum partition
* count of any matching topic rather than the total number of partitions across all topics.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topicPattern the pattern to match for topic names
* @return a {@link KStream} for topics matching the regex pattern.
*/
public synchronized <K, V> KStream<K, V> stream(final Pattern topicPattern) {
return stream(topicPattern, Consumed.with(null, null));
}
/**
* Create a {@link KStream} from the specified topic pattern.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* <p>
* If multiple topics are matched by the specified pattern, the created {@link KStream} will read data from all of
* them and there is no ordering guarantee between records from different topics. This also means that the work
* will not be parallelized for multiple topics, and the number of tasks will scale with the maximum partition
* count of any matching topic rather than the total number of partitions across all topics.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case it is the user's responsibility to repartition the data before any key based operation
* (like aggregation or join) is applied to the returned {@link KStream}.
*
* @param topicPattern the pattern to match for topic names
* @param consumed the instance of {@link Consumed} used to define optional parameters
* @return a {@link KStream} for topics matching the regex pattern.
*/
public synchronized <K, V> KStream<K, V> stream(final Pattern topicPattern,
final Consumed<K, V> consumed) {
Objects.requireNonNull(topicPattern, "topicPattern can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
return internalStreamsBuilder.stream(topicPattern, new ConsumedInternal<>(consumed));
}
/**
* Create a {@link KTable} for the specified topic.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* Note that the specified input topic must be partitioned by key.
* If this is not the case the returned {@link KTable} will be corrupted.
* <p>
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} using the given
* {@code Materialized} instance.
* An internal changelog topic is created by default. Because the source topic can
* be used for recovery, you can avoid creating the changelog topic by setting
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
* <p>
* You should only specify serdes in the {@link Consumed} instance as these will also be used to overwrite the
* serdes in {@link Materialized}, i.e.,
* <pre> {@code
* streamBuilder.table(topic, Consumed.with(Serde.String(), Serde.String()), Materialized.<String, String, KeyValueStore<Bytes, byte[]>as(storeName))
* }
* </pre>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ...
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long valueForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* @param topic the topic name; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters; cannot be {@code null}
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
* @return a {@link KTable} for the specified topic
*/
public synchronized <K, V> KTable<K, V> table(final String topic,
final Consumed<K, V> consumed,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
materialized.withKeySerde(consumedInternal.keySerde()).withValueSerde(consumedInternal.valueSerde());
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link KTable} for the specified topic.
* The default {@code "auto.offset.reset"} strategy and default key and value deserializers as specified in the
* {@link StreamsConfig config} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case the returned {@link KTable} will be corrupted.
* <p>
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} with an internal
* store name. Note that store name may not be queriable through Interactive Queries.
* An internal changelog topic is created by default. Because the source topic can
* be used for recovery, you can avoid creating the changelog topic by setting
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @return a {@link KTable} for the specified topic
*/
public synchronized <K, V> KTable<K, V> table(final String topic) {
return table(topic, new ConsumedInternal<>());
}
/**
* Create a {@link KTable} for the specified topic.
* The {@code "auto.offset.reset"} strategy, {@link TimestampExtractor}, key and value deserializers
* are defined by the options in {@link Consumed} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case the returned {@link KTable} will be corrupted.
* <p>
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} with an internal
* store name. Note that store name may not be queriable through Interactive Queries.
* An internal changelog topic is created by default. Because the source topic can
* be used for recovery, you can avoid creating the changelog topic by setting
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters; cannot be {@code null}
* @return a {@link KTable} for the specified topic
*/
public synchronized <K, V> KTable<K, V> table(final String topic,
final Consumed<K, V> consumed) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(
Materialized.with(consumedInternal.keySerde(), consumedInternal.valueSerde()),
internalStreamsBuilder,
topic + "-");
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link KTable} for the specified topic.
* The default {@code "auto.offset.reset"} strategy as specified in the {@link StreamsConfig config} are used.
* Key and value deserializers as defined by the options in {@link Materialized} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* Note that the specified input topics must be partitioned by key.
* If this is not the case the returned {@link KTable} will be corrupted.
* <p>
* The resulting {@link KTable} will be materialized in a local {@link KeyValueStore} using the {@link Materialized} instance.
* An internal changelog topic is created by default. Because the source topic can
* be used for recovery, you can avoid creating the changelog topic by setting
* the {@code "topology.optimization"} to {@code "all"} in the {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
* @return a {@link KTable} for the specified topic
*/
public synchronized <K, V> KTable<K, V> table(final String topic,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
final ConsumedInternal<K, V> consumedInternal =
new ConsumedInternal<>(Consumed.with(materializedInternal.keySerde(), materializedInternal.valueSerde()));
return internalStreamsBuilder.table(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link GlobalKTable} for the specified topic.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} with an internal
* store name. Note that store name may not be queriable through Interactive Queries.
* No internal changelog topic is created since the original input topic can be used for recovery (cf.
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
* <p>
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
* regardless of the specified value in {@link StreamsConfig} or {@link Consumed}.
*
* @param topic the topic name; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters
* @return a {@link GlobalKTable} for the specified topic
*/
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
final Consumed<K, V> consumed) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(
Materialized.with(consumedInternal.keySerde(), consumedInternal.valueSerde()),
internalStreamsBuilder, topic + "-");
return internalStreamsBuilder.globalTable(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link GlobalKTable} for the specified topic.
* The default key and value deserializers as specified in the {@link StreamsConfig config} are used.
* Input {@link KeyValue records} with {@code null} key will be dropped.
* <p>
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} with an internal
* store name. Note that store name may not be queriable through Interactive Queries.
* No internal changelog topic is created since the original input topic can be used for recovery (cf.
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
* <p>
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
* regardless of the specified value in {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @return a {@link GlobalKTable} for the specified topic
*/
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic) {
return globalTable(topic, Consumed.with(null, null));
}
/**
* Create a {@link GlobalKTable} for the specified topic.
*
* Input {@link KeyValue} pairs with {@code null} key will be dropped.
* <p>
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} configured with
* the provided instance of {@link Materialized}.
* However, no internal changelog topic is created since the original input topic can be used for recovery (cf.
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
* <p>
* You should only specify serdes in the {@link Consumed} instance as these will also be used to overwrite the
* serdes in {@link Materialized}, i.e.,
* <pre> {@code
* streamBuilder.globalTable(topic, Consumed.with(Serde.String(), Serde.String()), Materialized.<String, String, KeyValueStore<Bytes, byte[]>as(storeName))
* }
* </pre>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ...
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long valueForKey = localStore.get(key);
* }</pre>
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
* regardless of the specified value in {@link StreamsConfig} or {@link Consumed}.
*
* @param topic the topic name; cannot be {@code null}
* @param consumed the instance of {@link Consumed} used to define optional parameters; can't be {@code null}
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
* @return a {@link GlobalKTable} for the specified topic
*/
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
final Consumed<K, V> consumed,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final ConsumedInternal<K, V> consumedInternal = new ConsumedInternal<>(consumed);
// always use the serdes from consumed
materialized.withKeySerde(consumedInternal.keySerde()).withValueSerde(consumedInternal.valueSerde());
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
return internalStreamsBuilder.globalTable(topic, consumedInternal, materializedInternal);
}
/**
* Create a {@link GlobalKTable} for the specified topic.
*
* Input {@link KeyValue} pairs with {@code null} key will be dropped.
* <p>
* The resulting {@link GlobalKTable} will be materialized in a local {@link KeyValueStore} configured with
* the provided instance of {@link Materialized}.
* However, no internal changelog topic is created since the original input topic can be used for recovery (cf.
* methods of {@link KGroupedStream} and {@link KGroupedTable} that return a {@link KTable}).
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ...
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long valueForKey = localStore.get(key);
* }</pre>
* Note that {@link GlobalKTable} always applies {@code "auto.offset.reset"} strategy {@code "earliest"}
* regardless of the specified value in {@link StreamsConfig}.
*
* @param topic the topic name; cannot be {@code null}
* @param materialized the instance of {@link Materialized} used to materialize a state store; cannot be {@code null}
* @return a {@link GlobalKTable} for the specified topic
*/
public synchronized <K, V> GlobalKTable<K, V> globalTable(final String topic,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(topic, "topic can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, internalStreamsBuilder, topic + "-");
return internalStreamsBuilder.globalTable(topic,
new ConsumedInternal<>(Consumed.with(materializedInternal.keySerde(),
materializedInternal.valueSerde())),
materializedInternal);
}
/**
* Adds a state store to the underlying {@link Topology}.
* <p>
* It is required to connect state stores to {@link Processor Processors}, {@link Transformer Transformers},
* or {@link ValueTransformer ValueTransformers} before they can be used.
*
* @param builder the builder used to obtain this state store {@link StateStore} instance
* @return itself
* @throws TopologyException if state store supplier is already added
*/
public synchronized StreamsBuilder addStateStore(final StoreBuilder builder) {
Objects.requireNonNull(builder, "builder can't be null");
internalStreamsBuilder.addStateStore(builder);
return this;
}
/**
* @deprecated use {@link #addGlobalStore(StoreBuilder, String, Consumed, ProcessorSupplier)} instead
*/
@SuppressWarnings("unchecked")
@Deprecated
public synchronized StreamsBuilder addGlobalStore(final StoreBuilder storeBuilder,
final String topic,
final String sourceName,
final Consumed consumed,
final String processorName,
final ProcessorSupplier stateUpdateSupplier) {
Objects.requireNonNull(storeBuilder, "storeBuilder can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
internalStreamsBuilder.addGlobalStore(storeBuilder,
sourceName,
topic,
new ConsumedInternal<>(consumed),
processorName,
stateUpdateSupplier);
return this;
}
/**
* Adds a global {@link StateStore} to the topology.
* The {@link StateStore} sources its data from all partitions of the provided input topic.
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
* <p>
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
* of the input topic.
* <p>
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
* records forwarded from the {@link SourceNode}. NOTE: you should not use the {@code Processor} to insert transformed records into
* the global state store. This store uses the source topic as changelog and during restore will insert records directly
* from the source.
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
* <p>
* It is not required to connect a global store to {@link Processor Processors}, {@link Transformer Transformers},
* or {@link ValueTransformer ValueTransformer}; those have read-only access to all global stores by default.
*
* @param storeBuilder user defined {@link StoreBuilder}; can't be {@code null}
* @param topic the topic to source the data from
* @param consumed the instance of {@link Consumed} used to define optional parameters; can't be {@code null}
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
* @return itself
* @throws TopologyException if the processor of state is already registered
*/
@SuppressWarnings("unchecked")
public synchronized StreamsBuilder addGlobalStore(final StoreBuilder storeBuilder,
final String topic,
final Consumed consumed,
final ProcessorSupplier stateUpdateSupplier) {
Objects.requireNonNull(storeBuilder, "storeBuilder can't be null");
Objects.requireNonNull(consumed, "consumed can't be null");
internalStreamsBuilder.addGlobalStore(storeBuilder,
topic,
new ConsumedInternal<>(consumed),
stateUpdateSupplier);
return this;
}
/**
* Returns the {@link Topology} that represents the specified processing logic.
* Note that using this method means no optimizations are performed.
*
* @return the {@link Topology} that represents the specified processing logic
*/
public synchronized Topology build() {
return build(null);
}
/**
* Returns the {@link Topology} that represents the specified processing logic and accepts
* a {@link Properties} instance used to indicate whether to optimize topology or not.
*
* @param props the {@link Properties} used for building possibly optimized topology
* @return the {@link Topology} that represents the specified processing logic
*/
public synchronized Topology build(final Properties props) {
internalStreamsBuilder.buildAndOptimizeTopology(props);
return topology;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,225 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.common.Metric;
import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
import java.util.Map;
/**
* The Kafka Streams metrics interface for adding metric sensors and collecting metric values.
*/
public interface StreamsMetrics {
/**
* Get read-only handle on global metrics registry.
*
* @return Map of all metrics.
*/
Map<MetricName, ? extends Metric> metrics();
/**
* Add a latency, rate and total sensor for a specific operation, which will include the following metrics:
* <ol>
* <li>average latency</li>
* <li>max latency</li>
* <li>invocation rate (num.operations / seconds)</li>
* <li>total invocation count</li>
* </ol>
* Whenever a user records this sensor via {@link Sensor#record(double)} etc, it will be counted as one invocation
* of the operation, and hence the rate / count metrics will be updated accordingly; and the recorded latency value
* will be used to update the average / max latency as well.
*
* Note that you can add more metrics to this sensor after you created it, which can then be updated upon
* {@link Sensor#record(double)} calls.
*
* The added sensor and its metrics can be removed with {@link #removeSensor(Sensor) removeSensor()}.
*
* @param scopeName name of the scope, which will be used as part of the metric type, e.g.: "stream-[scope]-metrics".
* @param entityName name of the entity, which will be used as part of the metric tags, e.g.: "[scope]-id" = "[entity]".
* @param operationName name of the operation, which will be used as the name of the metric, e.g.: "[operation]-latency-avg".
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
* @param tags additional tags of the sensor
* @return The added sensor.
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #removeSensor(Sensor)
* @see #addSensor(String, RecordingLevel, Sensor...)
*/
Sensor addLatencyRateTotalSensor(final String scopeName,
final String entityName,
final String operationName,
final Sensor.RecordingLevel recordingLevel,
final String... tags);
/**
* Add a rate and a total sensor for a specific operation, which will include the following metrics:
* <ol>
* <li>invocation rate (num.operations / time unit)</li>
* <li>total invocation count</li>
* </ol>
* Whenever a user records this sensor via {@link Sensor#record(double)} etc,
* it will be counted as one invocation of the operation, and hence the rate / count metrics will be updated accordingly.
*
* Note that you can add more metrics to this sensor after you created it, which can then be updated upon
* {@link Sensor#record(double)} calls.
*
* The added sensor and its metrics can be removed with {@link #removeSensor(Sensor) removeSensor()}.
*
* @param scopeName name of the scope, which will be used as part of the metrics type, e.g.: "stream-[scope]-metrics".
* @param entityName name of the entity, which will be used as part of the metric tags, e.g.: "[scope]-id" = "[entity]".
* @param operationName name of the operation, which will be used as the name of the metric, e.g.: "[operation]-total".
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
* @param tags additional tags of the sensor
* @return The added sensor.
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #removeSensor(Sensor)
* @see #addSensor(String, RecordingLevel, Sensor...)
*/
Sensor addRateTotalSensor(final String scopeName,
final String entityName,
final String operationName,
final Sensor.RecordingLevel recordingLevel,
final String... tags);
/**
* Add a latency and throughput sensor for a specific operation, which will include the following sensors:
* <ol>
* <li>average latency</li>
* <li>max latency</li>
* <li>throughput (num.operations / time unit)</li>
* </ol>
* Also create a parent sensor with the same metrics that aggregates all entities with the same operation under the
* same scope if it has not been created.
*
* @param scopeName name of the scope, could be the type of the state store, etc.
* @param entityName name of the entity, could be the name of the state store instance, etc.
* @param operationName name of the operation, could be get / put / delete / etc.
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
* @param tags additional tags of the sensor
* @return The added sensor.
* @deprecated since 2.5. Use {@link #addLatencyRateTotalSensor(String, String, String, Sensor.RecordingLevel, String...) addLatencyRateTotalSensor()}
* instead.
*/
@Deprecated
Sensor addLatencyAndThroughputSensor(final String scopeName,
final String entityName,
final String operationName,
final Sensor.RecordingLevel recordingLevel,
final String... tags);
/**
* Record the given latency value of the sensor.
* If the passed sensor includes throughput metrics, e.g., when created by the
* {@link #addLatencyAndThroughputSensor(String, String, String, Sensor.RecordingLevel, String...)} method, then the
* throughput metrics will also be recorded from this event.
*
* @param sensor sensor whose latency we are recording.
* @param startNs start of measurement time in nanoseconds.
* @param endNs end of measurement time in nanoseconds.
* @deprecated since 2.5. Use {@link Sensor#record(double) Sensor#record()} instead.
*/
@Deprecated
void recordLatency(final Sensor sensor,
final long startNs,
final long endNs);
/**
* Add a throughput sensor for a specific operation:
* <ol>
* <li>throughput (num.operations / time unit)</li>
* </ol>
* Also create a parent sensor with the same metrics that aggregates all entities with the same operation under the
* same scope if it has not been created.
* This sensor is a strict subset of the sensors created by
* {@link #addLatencyAndThroughputSensor(String, String, String, Sensor.RecordingLevel, String...)}.
*
* @param scopeName name of the scope, could be the type of the state store, etc.
* @param entityName name of the entity, could be the name of the state store instance, etc.
* @param operationName name of the operation, could be get / put / delete / etc.
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor.
* @param tags additional tags of the sensor
* @return The added sensor.
* @deprecated since 2.5. Use {@link #addRateTotalSensor(String, String, String, Sensor.RecordingLevel, String...)
* addRateTotalSensor()} instead.
*/
@Deprecated
Sensor addThroughputSensor(final String scopeName,
final String entityName,
final String operationName,
final Sensor.RecordingLevel recordingLevel,
final String... tags);
/**
* Record the throughput value of a sensor.
*
* @param sensor add Sensor whose throughput we are recording
* @param value throughput value
* @deprecated since 2.5. Use {@link Sensor#record() Sensor#record()} instead.
*/
@Deprecated
void recordThroughput(final Sensor sensor,
final long value);
/**
* Generic method to create a sensor.
* Note that for most cases it is advisable to use
* {@link #addRateTotalSensor(String, String, String, RecordingLevel, String...) addRateTotalSensor()}
* or {@link #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...) addLatencyRateTotalSensor()}
* to ensure metric name well-formedness and conformity with the rest of the Kafka Streams code base.
* However, if the above two methods are not sufficient, this method can also be used.
*
* @param name name of the sensor.
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor
* @return The added sensor.
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #removeSensor(Sensor)
*/
Sensor addSensor(final String name,
final Sensor.RecordingLevel recordingLevel);
/**
* Generic method to create a sensor with parent sensors.
* Note that for most cases it is advisable to use
* {@link #addRateTotalSensor(String, String, String, RecordingLevel, String...) addRateTotalSensor()}
* or {@link #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...) addLatencyRateTotalSensor()}
* to ensure metric name well-formedness and conformity with the rest of the Kafka Streams code base.
* However, if the above two methods are not sufficient, this method can also be used.
*
* @param name name of the sensor
* @param recordingLevel the recording level (e.g., INFO or DEBUG) for this sensor
* @return The added sensor.
* @see #addRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #addLatencyRateTotalSensor(String, String, String, RecordingLevel, String...)
* @see #removeSensor(Sensor)
*/
Sensor addSensor(final String name,
final Sensor.RecordingLevel recordingLevel,
final Sensor... parents);
/**
* Remove a sensor.
* @param sensor sensor to be removed
*/
void removeSensor(final Sensor sensor);
}

View File

@@ -0,0 +1,772 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.streams.errors.TopologyException;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.StreamPartitioner;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.processor.TopicNameExtractor;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
import org.apache.kafka.streams.processor.internals.ProcessorNode;
import org.apache.kafka.streams.processor.internals.ProcessorTopology;
import org.apache.kafka.streams.processor.internals.SinkNode;
import org.apache.kafka.streams.processor.internals.SourceNode;
import org.apache.kafka.streams.state.StoreBuilder;
import java.util.regex.Pattern;
/**
* A logical representation of a {@link ProcessorTopology}.
* A topology is an acyclic graph of sources, processors, and sinks.
* A {@link SourceNode source} is a node in the graph that consumes one or more Kafka topics and forwards them to its
* successor nodes.
* A {@link Processor processor} is a node in the graph that receives input records from upstream nodes, processes the
* records, and optionally forwarding new records to one or all of its downstream nodes.
* Finally, a {@link SinkNode sink} is a node in the graph that receives records from upstream nodes and writes them to
* a Kafka topic.
* A {@code Topology} allows you to construct an acyclic graph of these nodes, and then passed into a new
* {@link KafkaStreams} instance that will then {@link KafkaStreams#start() begin consuming, processing, and producing
* records}.
*/
public class Topology {
final InternalTopologyBuilder internalTopologyBuilder = new InternalTopologyBuilder();
/**
* Sets the {@code auto.offset.reset} configuration when
* {@link #addSource(AutoOffsetReset, String, String...) adding a source processor} or when creating {@link KStream}
* or {@link KTable} via {@link StreamsBuilder}.
*/
public enum AutoOffsetReset {
EARLIEST, LATEST
}
/**
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final String name,
final String... topics) {
internalTopologyBuilder.addSource(null, name, null, null, null, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern
* and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final String name,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(null, name, null, null, null, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param offsetReset the auto offset reset policy to use for this source if no committed offsets found; acceptable values earliest or latest
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final String... topics) {
internalTopologyBuilder.addSource(offsetReset, name, null, null, null, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern
* and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param offsetReset the auto offset reset policy value for this source if no committed offsets found; acceptable values earliest or latest.
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(offsetReset, name, null, null, null, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final TimestampExtractor timestampExtractor,
final String name,
final String... topics) {
internalTopologyBuilder.addSource(null, name, timestampExtractor, null, null, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern
* and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final TimestampExtractor timestampExtractor,
final String name,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(null, name, timestampExtractor, null, null, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forward the records to child processor and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param offsetReset the auto offset reset policy to use for this source if no committed offsets found;
* acceptable values earliest or latest
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final TimestampExtractor timestampExtractor,
final String name,
final String... topics) {
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, null, null, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forward the records to child processor
* and/or sink nodes.
* The source will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key deserializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value deserializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param offsetReset the auto offset reset policy value for this source if no committed offsets found;
* acceptable values earliest or latest.
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final TimestampExtractor timestampExtractor,
final String name,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, null, null, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forwards the records to child processor and/or sink nodes.
* The source will use the specified key and value deserializers.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
public synchronized Topology addSource(final String name,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String... topics) {
internalTopologyBuilder.addSource(null, name, null, keyDeserializer, valueDeserializer, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
* and/or sink nodes.
* The source will use the specified key and value deserializers.
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
* topics that share the same key-value data format.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by name
*/
public synchronized Topology addSource(final String name,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(null, name, null, keyDeserializer, valueDeserializer, topicPattern);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
* and/or sink nodes.
* The source will use the specified key and value deserializers.
* The provided de-/serializers will be used for all the specified topics, so care should be taken when specifying
* topics that share the same key-value data format.
*
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
* acceptable values are earliest or latest
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by name
*/
@SuppressWarnings("overloads")
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String... topics) {
internalTopologyBuilder.addSource(offsetReset, name, null, keyDeserializer, valueDeserializer, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
* and/or sink nodes.
* The source will use the specified key and value deserializers.
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
* topics that share the same key-value data format.
*
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
* acceptable values are earliest or latest
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by name
*/
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(offsetReset, name, null, keyDeserializer, valueDeserializer, topicPattern);
return this;
}
/**
* Add a new source that consumes the named topics and forwards the records to child processor and/or sink nodes.
* The source will use the specified key and value deserializers.
*
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
* acceptable values are earliest or latest.
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topics the name of one or more Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by another source
*/
@SuppressWarnings("overloads")
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final TimestampExtractor timestampExtractor,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String... topics) {
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, keyDeserializer, valueDeserializer, topics);
return this;
}
/**
* Add a new source that consumes from topics matching the given pattern and forwards the records to child processor
* and/or sink nodes.
* The source will use the specified key and value deserializers.
* The provided de-/serializers will be used for all matched topics, so care should be taken to specify patterns for
* topics that share the same key-value data format.
*
* @param offsetReset the auto offset reset policy to use for this stream if no committed offsets found;
* acceptable values are earliest or latest
* @param name the unique name of the source used to reference this node when
* {@link #addProcessor(String, ProcessorSupplier, String...) adding processor children}.
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param keyDeserializer key deserializer used to read this source, if not specified the default
* key deserializer defined in the configs will be used
* @param valueDeserializer value deserializer used to read this source,
* if not specified the default value deserializer defined in the configs will be used
* @param topicPattern regular expression pattern to match Kafka topics that this source is to consume
* @return itself
* @throws TopologyException if processor is already added or if topics have already been registered by name
*/
@SuppressWarnings("overloads")
public synchronized Topology addSource(final AutoOffsetReset offsetReset,
final String name,
final TimestampExtractor timestampExtractor,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final Pattern topicPattern) {
internalTopologyBuilder.addSource(offsetReset, name, timestampExtractor, keyDeserializer, valueDeserializer, topicPattern);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its records
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and write to its topic
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized Topology addSink(final String name,
final String topic,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topic, null, null, null, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic,
* using the supplied partitioner.
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}.
* <p>
* The sink will also use the specified {@link StreamPartitioner} to determine how records are distributed among
* the named Kafka topic's partitions.
* Such control is often useful with topologies that use {@link #addStateStore(StoreBuilder, String...) state
* stores} in its processors.
* In most other cases, however, a partitioner needs not be specified and Kafka will automatically distribute
* records among partitions using Kafka's default partitioning logic.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its records
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and write to its topic
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final String topic,
final StreamPartitioner<? super K, ? super V> partitioner,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topic, null, null, partitioner, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
* The sink will use the specified key and value serializers.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its records
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and write to its topic
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final String topic,
final Serializer<K> keySerializer,
final Serializer<V> valueSerializer,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topic, keySerializer, valueSerializer, null, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to the named Kafka topic.
* The sink will use the specified key and value serializers, and the supplied partitioner.
*
* @param name the unique name of the sink
* @param topic the name of the Kafka topic to which this sink should write its records
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and write to its topic
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final String topic,
final Serializer<K> keySerializer,
final Serializer<V> valueSerializer,
final StreamPartitioner<? super K, ? super V> partitioner,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topic, keySerializer, valueSerializer, partitioner, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
* The topics that it may ever send to should be pre-created.
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}.
*
* @param name the unique name of the sink
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and dynamically write to topics
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final TopicNameExtractor<K, V> topicExtractor,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topicExtractor, null, null, null, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor},
* using the supplied partitioner.
* The topics that it may ever send to should be pre-created.
* The sink will use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} and
* {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}.
* <p>
* The sink will also use the specified {@link StreamPartitioner} to determine how records are distributed among
* the named Kafka topic's partitions.
* Such control is often useful with topologies that use {@link #addStateStore(StoreBuilder, String...) state
* stores} in its processors.
* In most other cases, however, a partitioner needs not be specified and Kafka will automatically distribute
* records among partitions using Kafka's default partitioning logic.
*
* @param name the unique name of the sink
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and dynamically write to topics
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final TopicNameExtractor<K, V> topicExtractor,
final StreamPartitioner<? super K, ? super V> partitioner,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topicExtractor, null, null, partitioner, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
* The topics that it may ever send to should be pre-created.
* The sink will use the specified key and value serializers.
*
* @param name the unique name of the sink
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and dynamically write to topics
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, StreamPartitioner, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final TopicNameExtractor<K, V> topicExtractor,
final Serializer<K> keySerializer,
final Serializer<V> valueSerializer,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topicExtractor, keySerializer, valueSerializer, null, parentNames);
return this;
}
/**
* Add a new sink that forwards records from upstream parent processor and/or source nodes to Kafka topics based on {@code topicExtractor}.
* The topics that it may ever send to should be pre-created.
* The sink will use the specified key and value serializers, and the supplied partitioner.
*
* @param name the unique name of the sink
* @param topicExtractor the extractor to determine the name of the Kafka topic to which this sink should write for each record
* @param keySerializer the {@link Serializer key serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_KEY_SERDE_CLASS_CONFIG default key serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param valueSerializer the {@link Serializer value serializer} used when consuming records; may be null if the sink
* should use the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value serializer} specified in the
* {@link StreamsConfig stream configuration}
* @param partitioner the function that should be used to determine the partition for each record processed by the sink
* @param parentNames the name of one or more source or processor nodes whose output records this sink should consume
* and dynamically write to topics
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
* @see #addSink(String, String, String...)
* @see #addSink(String, String, StreamPartitioner, String...)
* @see #addSink(String, String, Serializer, Serializer, String...)
*/
public synchronized <K, V> Topology addSink(final String name,
final TopicNameExtractor<K, V> topicExtractor,
final Serializer<K> keySerializer,
final Serializer<V> valueSerializer,
final StreamPartitioner<? super K, ? super V> partitioner,
final String... parentNames) {
internalTopologyBuilder.addSink(name, topicExtractor, keySerializer, valueSerializer, partitioner, parentNames);
return this;
}
/**
* Add a new processor node that receives and processes records output by one or more parent source or processor
* node.
* Any new record output by this processor will be forwarded to its child processor or sink nodes.
*
* @param name the unique name of the processor node
* @param supplier the supplier used to obtain this node's {@link Processor} instance
* @param parentNames the name of one or more source or processor nodes whose output records this processor should receive
* and process
* @return itself
* @throws TopologyException if parent processor is not added yet, or if this processor's name is equal to the parent's name
*/
public synchronized Topology addProcessor(final String name,
final ProcessorSupplier supplier,
final String... parentNames) {
internalTopologyBuilder.addProcessor(name, supplier, parentNames);
return this;
}
/**
* Adds a state store.
*
* @param storeBuilder the storeBuilder used to obtain this state store {@link StateStore} instance
* @param processorNames the names of the processors that should be able to access the provided store
* @return itself
* @throws TopologyException if state store supplier is already added
*/
public synchronized Topology addStateStore(final StoreBuilder storeBuilder,
final String... processorNames) {
internalTopologyBuilder.addStateStore(storeBuilder, processorNames);
return this;
}
/**
* Adds a global {@link StateStore} to the topology.
* The {@link StateStore} sources its data from all partitions of the provided input topic.
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
* <p>
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
* of the input topic.
* <p>
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
* records forwarded from the {@link SourceNode}.
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
* The default {@link TimestampExtractor} as specified in the {@link StreamsConfig config} is used.
*
* @param storeBuilder user defined state store builder
* @param sourceName name of the {@link SourceNode} that will be automatically added
* @param keyDeserializer the {@link Deserializer} to deserialize keys with
* @param valueDeserializer the {@link Deserializer} to deserialize values with
* @param topic the topic to source the data from
* @param processorName the name of the {@link ProcessorSupplier}
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
* @return itself
* @throws TopologyException if the processor of state is already registered
*/
@SuppressWarnings("unchecked")
public synchronized Topology addGlobalStore(final StoreBuilder storeBuilder,
final String sourceName,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String topic,
final String processorName,
final ProcessorSupplier stateUpdateSupplier) {
internalTopologyBuilder.addGlobalStore(storeBuilder, sourceName, null, keyDeserializer,
valueDeserializer, topic, processorName, stateUpdateSupplier);
return this;
}
/**
* Adds a global {@link StateStore} to the topology.
* The {@link StateStore} sources its data from all partitions of the provided input topic.
* There will be exactly one instance of this {@link StateStore} per Kafka Streams instance.
* <p>
* A {@link SourceNode} with the provided sourceName will be added to consume the data arriving from the partitions
* of the input topic.
* <p>
* The provided {@link ProcessorSupplier} will be used to create an {@link ProcessorNode} that will receive all
* records forwarded from the {@link SourceNode}.
* This {@link ProcessorNode} should be used to keep the {@link StateStore} up-to-date.
*
* @param storeBuilder user defined key value store builder
* @param sourceName name of the {@link SourceNode} that will be automatically added
* @param timestampExtractor the stateless timestamp extractor used for this source,
* if not specified the default extractor defined in the configs will be used
* @param keyDeserializer the {@link Deserializer} to deserialize keys with
* @param valueDeserializer the {@link Deserializer} to deserialize values with
* @param topic the topic to source the data from
* @param processorName the name of the {@link ProcessorSupplier}
* @param stateUpdateSupplier the instance of {@link ProcessorSupplier}
* @return itself
* @throws TopologyException if the processor of state is already registered
*/
@SuppressWarnings("unchecked")
public synchronized Topology addGlobalStore(final StoreBuilder storeBuilder,
final String sourceName,
final TimestampExtractor timestampExtractor,
final Deserializer keyDeserializer,
final Deserializer valueDeserializer,
final String topic,
final String processorName,
final ProcessorSupplier stateUpdateSupplier) {
internalTopologyBuilder.addGlobalStore(storeBuilder, sourceName, timestampExtractor, keyDeserializer,
valueDeserializer, topic, processorName, stateUpdateSupplier);
return this;
}
/**
* Connects the processor and the state stores.
*
* @param processorName the name of the processor
* @param stateStoreNames the names of state stores that the processor uses
* @return itself
* @throws TopologyException if the processor or a state store is unknown
*/
public synchronized Topology connectProcessorAndStateStores(final String processorName,
final String... stateStoreNames) {
internalTopologyBuilder.connectProcessorAndStateStores(processorName, stateStoreNames);
return this;
}
/**
* Returns a description of the specified {@code Topology}.
*
* @return a description of the topology.
*/
public synchronized TopologyDescription describe() {
return internalTopologyBuilder.describe();
}
}

View File

@@ -0,0 +1,179 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams;
import org.apache.kafka.streams.processor.TopicNameExtractor;
import org.apache.kafka.streams.processor.internals.StreamTask;
import java.util.Set;
import java.util.regex.Pattern;
/**
* A meta representation of a {@link Topology topology}.
* <p>
* The nodes of a topology are grouped into {@link Subtopology sub-topologies} if they are connected.
* In contrast, two sub-topologies are not connected but can be linked to each other via topics, i.e., if one
* sub-topology {@link Topology#addSink(String, String, String...) writes} into a topic and another sub-topology
* {@link Topology#addSource(String, String...) reads} from the same topic.
* <p>
* When {@link KafkaStreams#start()} is called, different sub-topologies will be constructed and executed as independent
* {@link StreamTask tasks}.
*/
public interface TopologyDescription {
/**
* A connected sub-graph of a {@link Topology}.
* <p>
* Nodes of a {@code Subtopology} are connected {@link Topology#addProcessor(String,
* org.apache.kafka.streams.processor.ProcessorSupplier, String...) directly} or indirectly via
* {@link Topology#connectProcessorAndStateStores(String, String...) state stores}
* (i.e., if multiple processors share the same state).
*/
interface Subtopology {
/**
* Internally assigned unique ID.
* @return the ID of the sub-topology
*/
int id();
/**
* All nodes of this sub-topology.
* @return set of all nodes within the sub-topology
*/
Set<Node> nodes();
}
/**
* Represents a {@link Topology#addGlobalStore(org.apache.kafka.streams.state.StoreBuilder, String,
* org.apache.kafka.common.serialization.Deserializer, org.apache.kafka.common.serialization.Deserializer, String,
* String, org.apache.kafka.streams.processor.ProcessorSupplier) global store}.
* Adding a global store results in adding a source node and one stateful processor node.
* Note, that all added global stores form a single unit (similar to a {@link Subtopology}) even if different
* global stores are not connected to each other.
* Furthermore, global stores are available to all processors without connecting them explicitly, and thus global
* stores will never be part of any {@link Subtopology}.
*/
interface GlobalStore {
/**
* The source node reading from a "global" topic.
* @return the "global" source node
*/
Source source();
/**
* The processor node maintaining the global store.
* @return the "global" processor node
*/
Processor processor();
int id();
}
/**
* A node of a topology. Can be a source, sink, or processor node.
*/
interface Node {
/**
* The name of the node. Will never be {@code null}.
* @return the name of the node
*/
String name();
/**
* The predecessors of this node within a sub-topology.
* Note, sources do not have any predecessors.
* Will never be {@code null}.
* @return set of all predecessors
*/
Set<Node> predecessors();
/**
* The successor of this node within a sub-topology.
* Note, sinks do not have any successors.
* Will never be {@code null}.
* @return set of all successor
*/
Set<Node> successors();
}
/**
* A source node of a topology.
*/
interface Source extends Node {
/**
* The topic names this source node is reading from.
* @return comma separated list of topic names or pattern (as String)
* @deprecated use {@link #topicSet()} or {@link #topicPattern()} instead
*/
@Deprecated
String topics();
/**
* The topic names this source node is reading from.
* @return a set of topic names
*/
Set<String> topicSet();
/**
* The pattern used to match topic names that is reading from.
* @return the pattern used to match topic names
*/
Pattern topicPattern();
}
/**
* A processor node of a topology.
*/
interface Processor extends Node {
/**
* The names of all connected stores.
* @return set of store names
*/
Set<String> stores();
}
/**
* A sink node of a topology.
*/
interface Sink extends Node {
/**
* The topic name this sink node is writing to.
* Could be {@code null} if the topic name can only be dynamically determined based on {@link TopicNameExtractor}
* @return a topic name
*/
String topic();
/**
* The {@link TopicNameExtractor} class that this sink node uses to dynamically extract the topic name to write to.
* Could be {@code null} if the topic name is not dynamically determined.
* @return the {@link TopicNameExtractor} class used get the topic name
*/
TopicNameExtractor topicNameExtractor();
}
/**
* All sub-topologies of the represented topology.
* @return set of all sub-topologies
*/
Set<Subtopology> subtopologies();
/**
* All global stores of the represented topology.
* @return set of all global stores
*/
Set<GlobalStore> globalStores();
}

View File

@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates that none of the specified {@link org.apache.kafka.streams.StreamsConfig#BOOTSTRAP_SERVERS_CONFIG brokers}
* could be found.
*
* @see org.apache.kafka.streams.StreamsConfig
*/
public class BrokerNotFoundException extends StreamsException {
private final static long serialVersionUID = 1L;
public BrokerNotFoundException(final String message) {
super(message);
}
public BrokerNotFoundException(final String message, final Throwable throwable) {
super(message, throwable);
}
public BrokerNotFoundException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import java.util.Map;
import org.apache.kafka.clients.producer.ProducerRecord;
/**
* {@code ProductionExceptionHandler} that always instructs streams to fail when an exception
* happens while attempting to produce result records.
*/
public class DefaultProductionExceptionHandler implements ProductionExceptionHandler {
@Override
public ProductionExceptionHandlerResponse handle(final ProducerRecord<byte[], byte[]> record,
final Exception exception) {
return ProductionExceptionHandlerResponse.FAIL;
}
@Override
public void configure(final Map<String, ?> configs) {
// ignore
}
}

View File

@@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.Configurable;
import org.apache.kafka.streams.processor.ProcessorContext;
/**
* Interface that specifies how an exception from source node deserialization
* (e.g., reading from Kafka) should be handled.
*/
public interface DeserializationExceptionHandler extends Configurable {
/**
* Inspect a record and the exception received.
* @param context processor context
* @param record record that failed deserialization
* @param exception the actual exception
*/
DeserializationHandlerResponse handle(final ProcessorContext context,
final ConsumerRecord<byte[], byte[]> record,
final Exception exception);
/**
* Enumeration that describes the response from the exception handler.
*/
enum DeserializationHandlerResponse {
/* continue with processing */
CONTINUE(0, "CONTINUE"),
/* fail the processing and stop */
FAIL(1, "FAIL");
/** an english description of the api--this is for debugging and can change */
public final String name;
/** the permanent and immutable id of an API--this can't change ever */
public final int id;
DeserializationHandlerResponse(final int id, final String name) {
this.id = id;
this.name = name;
}
}
}

View File

@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates that there was a problem when trying to access a
* {@link org.apache.kafka.streams.processor.StateStore StateStore}, i.e, the Store is no longer valid because it is
* closed or doesn't exist any more due to a rebalance.
* <p>
* These exceptions may be transient, i.e., during a rebalance it won't be possible to query the stores as they are
* being (re)-initialized. Once the rebalance has completed the stores will be available again. Hence, it is valid
* to backoff and retry when handling this exception.
*/
public class InvalidStateStoreException extends StreamsException {
private final static long serialVersionUID = 1L;
public InvalidStateStoreException(final String message) {
super(message);
}
public InvalidStateStoreException(final String message, final Throwable throwable) {
super(message, throwable);
}
public InvalidStateStoreException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates that the state store directory lock could not be acquired because another thread holds the lock.
*
* @see org.apache.kafka.streams.processor.StateStore
*/
public class LockException extends StreamsException {
private final static long serialVersionUID = 1L;
public LockException(final String message) {
super(message);
}
public LockException(final String message, final Throwable throwable) {
super(message, throwable);
}
public LockException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
/**
* Deserialization handler that logs a deserialization exception and then
* signals the processing pipeline to continue processing more records.
*/
public class LogAndContinueExceptionHandler implements DeserializationExceptionHandler {
private static final Logger log = LoggerFactory.getLogger(LogAndContinueExceptionHandler.class);
@Override
public DeserializationHandlerResponse handle(final ProcessorContext context,
final ConsumerRecord<byte[], byte[]> record,
final Exception exception) {
log.warn("Exception caught during Deserialization, " +
"taskId: {}, topic: {}, partition: {}, offset: {}",
context.taskId(), record.topic(), record.partition(), record.offset(),
exception);
return DeserializationHandlerResponse.CONTINUE;
}
@Override
public void configure(final Map<String, ?> configs) {
// ignore
}
}

View File

@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Map;
/**
* Deserialization handler that logs a deserialization exception and then
* signals the processing pipeline to stop processing more records and fail.
*/
public class LogAndFailExceptionHandler implements DeserializationExceptionHandler {
private static final Logger log = LoggerFactory.getLogger(LogAndFailExceptionHandler.class);
@Override
public DeserializationHandlerResponse handle(final ProcessorContext context,
final ConsumerRecord<byte[], byte[]> record,
final Exception exception) {
log.error("Exception caught during Deserialization, " +
"taskId: {}, topic: {}, partition: {}, offset: {}",
context.taskId(), record.topic(), record.partition(), record.offset(),
exception);
return DeserializationHandlerResponse.FAIL;
}
@Override
public void configure(final Map<String, ?> configs) {
// ignore
}
}

View File

@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates a processor state operation (e.g. put, get) has failed.
*
* @see org.apache.kafka.streams.processor.StateStore
*/
public class ProcessorStateException extends StreamsException {
private final static long serialVersionUID = 1L;
public ProcessorStateException(final String message) {
super(message);
}
public ProcessorStateException(final String message, final Throwable throwable) {
super(message, throwable);
}
public ProcessorStateException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.Configurable;
/**
* Interface that specifies how an exception when attempting to produce a result to
* Kafka should be handled.
*/
public interface ProductionExceptionHandler extends Configurable {
/**
* Inspect a record that we attempted to produce, and the exception that resulted
* from attempting to produce it and determine whether or not to continue processing.
*
* @param record The record that failed to produce
* @param exception The exception that occurred during production
*/
ProductionExceptionHandlerResponse handle(final ProducerRecord<byte[], byte[]> record,
final Exception exception);
enum ProductionExceptionHandlerResponse {
/* continue processing */
CONTINUE(0, "CONTINUE"),
/* fail processing */
FAIL(1, "FAIL");
/**
* an english description of the api--this is for debugging and can change
*/
public final String name;
/**
* the permanent and immutable id of an API--this can't change ever
*/
public final int id;
ProductionExceptionHandlerResponse(final int id,
final String name) {
this.id = id;
this.name = name;
}
}
}

View File

@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.common.KafkaException;
/**
* {@link StreamsException} is the top-level exception type generated by Kafka Streams.
*/
public class StreamsException extends KafkaException {
private final static long serialVersionUID = 1L;
public StreamsException(final String message) {
super(message);
}
public StreamsException(final String message, final Throwable throwable) {
super(message, throwable);
}
public StreamsException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates a run time error incurred while trying to assign
* {@link org.apache.kafka.streams.processor.internals.StreamTask stream tasks} to
* {@link org.apache.kafka.streams.processor.internals.StreamThread threads}.
*/
public class TaskAssignmentException extends StreamsException {
private final static long serialVersionUID = 1L;
public TaskAssignmentException(final String message) {
super(message);
}
public TaskAssignmentException(final String message, final Throwable throwable) {
super(message, throwable);
}
public TaskAssignmentException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates a run time error incurred while trying parse the {@link org.apache.kafka.streams.processor.TaskId task id}
* from the read string.
*
* @see org.apache.kafka.streams.processor.internals.StreamTask
*/
public class TaskIdFormatException extends StreamsException {
private static final long serialVersionUID = 1L;
public TaskIdFormatException(final String message) {
super("Task id cannot be parsed correctly" + (message == null ? "" : " from " + message));
}
public TaskIdFormatException(final String message, final Throwable throwable) {
super("Task id cannot be parsed correctly" + (message == null ? "" : " from " + message), throwable);
}
public TaskIdFormatException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.streams.processor.internals.Task;
/**
* Indicates that a task got migrated to another thread.
* Thus, the task raising this exception can be cleaned up and closed as "zombie".
*/
public class TaskMigratedException extends StreamsException {
private final static long serialVersionUID = 1L;
private final Task task;
// this is for unit test only
public TaskMigratedException() {
super("A task has been migrated unexpectedly", null);
this.task = null;
}
public TaskMigratedException(final Task task,
final TopicPartition topicPartition,
final long endOffset,
final long pos) {
super(String.format("Log end offset of %s should not change while restoring: old end offset %d, current offset %d",
topicPartition,
endOffset,
pos),
null);
this.task = task;
}
public TaskMigratedException(final Task task) {
super(String.format("Task %s is unexpectedly closed during processing", task.id()), null);
this.task = task;
}
public TaskMigratedException(final Task task,
final Throwable throwable) {
super(String.format("Client request for task %s has been fenced due to a rebalance", task.id()), throwable);
this.task = task;
}
public Task migratedTask() {
return task;
}
}

View File

@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.errors;
/**
* Indicates a pre run time error occurred while parsing the {@link org.apache.kafka.streams.Topology logical topology}
* to construct the {@link org.apache.kafka.streams.processor.internals.ProcessorTopology physical processor topology}.
*/
public class TopologyException extends StreamsException {
private static final long serialVersionUID = 1L;
public TopologyException(final String message) {
super("Invalid topology" + (message == null ? "" : ": " + message));
}
public TopologyException(final String message,
final Throwable throwable) {
super("Invalid topology" + (message == null ? "" : ": " + message), throwable);
}
public TopologyException(final Throwable throwable) {
super(throwable);
}
}

View File

@@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.internals;
import java.time.Duration;
import java.time.Instant;
import static java.lang.String.format;
public final class ApiUtils {
private static final String MILLISECOND_VALIDATION_FAIL_MSG_FRMT = "Invalid value for parameter \"%s\" (value was: %s). ";
private static final String VALIDATE_MILLISECOND_NULL_SUFFIX = "It shouldn't be null.";
private static final String VALIDATE_MILLISECOND_OVERFLOW_SUFFIX = "It can't be converted to milliseconds.";
private ApiUtils() {
}
/**
* Validates that milliseconds from {@code duration} can be retrieved.
* @param duration Duration to check.
* @param messagePrefix Prefix text for an error message.
* @return Milliseconds from {@code duration}.
*/
public static long validateMillisecondDuration(final Duration duration, final String messagePrefix) {
try {
if (duration == null) {
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_NULL_SUFFIX);
}
return duration.toMillis();
} catch (final ArithmeticException e) {
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_OVERFLOW_SUFFIX, e);
}
}
/**
* Validates that milliseconds from {@code instant} can be retrieved.
* @param instant Instant to check.
* @param messagePrefix Prefix text for an error message.
* @return Milliseconds from {@code instant}.
*/
public static long validateMillisecondInstant(final Instant instant, final String messagePrefix) {
try {
if (instant == null) {
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_NULL_SUFFIX);
}
return instant.toEpochMilli();
} catch (final ArithmeticException e) {
throw new IllegalArgumentException(messagePrefix + VALIDATE_MILLISECOND_OVERFLOW_SUFFIX, e);
}
}
/**
* Generates the prefix message for validateMillisecondXXXXXX() utility
* @param value Object to be converted to milliseconds
* @param name Object name
* @return Error message prefix to use in exception
*/
public static String prepareMillisCheckFailMsgPrefix(final Object value, final String name) {
return format(MILLISECOND_VALIDATION_FAIL_MSG_FRMT, name, value);
}
}

View File

@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.internals;
import org.apache.kafka.streams.StreamsConfig;
import java.util.Map;
/**
* A {@link StreamsConfig} that does not log its configuration on construction.
*
* This producer cleaner output for unit tests using the {@code test-utils},
* since logging the config is not really valuable in this context.
*/
public class QuietStreamsConfig extends StreamsConfig {
public QuietStreamsConfig(final Map<?, ?> props) {
super(props, false);
}
}

View File

@@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.internals.metrics;
import org.apache.kafka.common.metrics.Gauge;
import org.apache.kafka.common.metrics.Sensor.RecordingLevel;
import org.apache.kafka.streams.KafkaStreams.State;
import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStream;
import java.util.Properties;
public class ClientMetrics {
private ClientMetrics() {}
private static final Logger log = LoggerFactory.getLogger(ClientMetrics.class);
private static final String VERSION = "version";
private static final String COMMIT_ID = "commit-id";
private static final String APPLICATION_ID = "application-id";
private static final String TOPOLOGY_DESCRIPTION = "topology-description";
private static final String STATE = "state";
private static final String VERSION_FROM_FILE;
private static final String COMMIT_ID_FROM_FILE;
private static final String DEFAULT_VALUE = "unknown";
static {
final Properties props = new Properties();
try (InputStream resourceStream = ClientMetrics.class.getResourceAsStream(
"/kafka/kafka-streams-version.properties")) {
props.load(resourceStream);
} catch (final Exception exception) {
log.warn("Error while loading kafka-streams-version.properties", exception);
}
VERSION_FROM_FILE = props.getProperty("version", DEFAULT_VALUE).trim();
COMMIT_ID_FROM_FILE = props.getProperty("commitId", DEFAULT_VALUE).trim();
}
private static final String VERSION_DESCRIPTION = "The version of the Kafka Streams client";
private static final String COMMIT_ID_DESCRIPTION = "The version control commit ID of the Kafka Streams client";
private static final String APPLICATION_ID_DESCRIPTION = "The application ID of the Kafka Streams client";
private static final String TOPOLOGY_DESCRIPTION_DESCRIPTION =
"The description of the topology executed in the Kafka Streams client";
private static final String STATE_DESCRIPTION = "The state of the Kafka Streams client";
public static String version() {
return VERSION_FROM_FILE;
}
public static String commitId() {
return COMMIT_ID_FROM_FILE;
}
public static void addVersionMetric(final StreamsMetricsImpl streamsMetrics) {
streamsMetrics.addClientLevelImmutableMetric(
VERSION,
VERSION_DESCRIPTION,
RecordingLevel.INFO,
VERSION_FROM_FILE
);
}
public static void addCommitIdMetric(final StreamsMetricsImpl streamsMetrics) {
streamsMetrics.addClientLevelImmutableMetric(
COMMIT_ID,
COMMIT_ID_DESCRIPTION,
RecordingLevel.INFO,
COMMIT_ID_FROM_FILE
);
}
public static void addApplicationIdMetric(final StreamsMetricsImpl streamsMetrics, final String applicationId) {
streamsMetrics.addClientLevelImmutableMetric(
APPLICATION_ID,
APPLICATION_ID_DESCRIPTION,
RecordingLevel.INFO,
applicationId
);
}
public static void addTopologyDescriptionMetric(final StreamsMetricsImpl streamsMetrics,
final String topologyDescription) {
streamsMetrics.addClientLevelImmutableMetric(
TOPOLOGY_DESCRIPTION,
TOPOLOGY_DESCRIPTION_DESCRIPTION,
RecordingLevel.INFO,
topologyDescription
);
}
public static void addStateMetric(final StreamsMetricsImpl streamsMetrics,
final Gauge<State> stateProvider) {
streamsMetrics.addClientLevelMutableMetric(
STATE,
STATE_DESCRIPTION,
RecordingLevel.INFO,
stateProvider
);
}
}

View File

@@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code Aggregator} interface for aggregating values of the given key.
* This is a generalization of {@link Reducer} and allows to have different types for input value and aggregation
* result.
* {@code Aggregator} is used in combination with {@link Initializer} that provides an initial aggregation value.
* <p>
* {@code Aggregator} can be used to implement aggregation functions like count.
* @param <K> key type
* @param <V> input value type
* @param <VA> aggregate value type
* @see Initializer
* @see KGroupedStream#aggregate(Initializer, Aggregator)
* @see KGroupedStream#aggregate(Initializer, Aggregator, Materialized)
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator)
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator, Materialized)
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger)
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger, Materialized)
* @see Reducer
*/
public interface Aggregator<K, V, VA> {
/**
* Compute a new aggregate from the key and value of a record and the current aggregate of the same key.
*
* @param key the key of the record
* @param value the value of the record
* @param aggregate the current aggregate value
* @return the new aggregate value
*/
VA apply(final K key, final V value, final VA aggregate);
}

View File

@@ -0,0 +1,286 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.KeyValueStore;
/**
* {@code CogroupedKStream} is an abstraction of multiple <i>grouped</i> record streams of {@link KeyValue} pairs.
* <p>
* It is an intermediate representation after a grouping of {@link KStream}s, before the
* aggregations are applied to the new partitions resulting in a {@link KTable}.
* <p>
* A {@code CogroupedKStream} must be obtained from a {@link KGroupedStream} via
* {@link KGroupedStream#cogroup(Aggregator) cogroup(...)}.
*
* @param <K> Type of keys
* @param <VOut> Type of values after agg
*/
public interface CogroupedKStream<K, VOut> {
/**
* Add an already {@link KGroupedStream grouped KStream} to this {@code CogroupedKStream}.
* <p>
* The added {@link KGroupedStream grouped KStream} must have the same number of partitions as all existing
* streams of this {@code CogroupedKStream}.
* If this is not the case, you would need to call {@link KStream#through(String)} before
* {@link KStream#groupByKey() grouping} the {@link KStream}, using a pre-created topic with the "correct" number of
* partitions.
* <p>
* The specified {@link Aggregator} is applied in the actual {@link #aggregate(Initializer) aggregation} step for
* each input record and computes a new aggregate using the current aggregate (or for the very first record per key
* using the initial intermediate aggregation result provided via the {@link Initializer} that is passed into
* {@link #aggregate(Initializer)}) and the record's value.
*
* @param groupedStream a group stream
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param <VIn> Type of input values
* @return a {@code CogroupedKStream}
*/
<VIn> CogroupedKStream<K, VOut> cogroup(final KGroupedStream<K, VIn> groupedStream,
final Aggregator<? super K, ? super VIn, VOut> aggregator);
/**
* Aggregate the values of records in these streams by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* To compute the aggregation the corresponding {@link Aggregator} as specified in
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
* processed to provide an initial intermediate aggregation result that is used to process the first record.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
* same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
* the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is a generated value, and
* "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
* result. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
* represent the latest (rolling) aggregate for each key
*/
KTable<K, VOut> aggregate(final Initializer<VOut> initializer);
/**
* Aggregate the values of records in these streams by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* To compute the aggregation the corresponding {@link Aggregator} as specified in
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
* processed to provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Named} is applied once to the processor combining the grouped streams.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
* same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
* the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
* result. Cannot be {@code null}.
* @param named name the processor. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
* represent the latest (rolling) aggregate for each key
*/
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
final Named named);
/**
* Aggregate the values of records in these streams by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* To compute the aggregation the corresponding {@link Aggregator} as specified in
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
* processed to provide an initial intermediate aggregation result that is used to process the first record.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
* same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
* the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
* result. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store.
* Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
* represent the latest (rolling) aggregate for each key
*/
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
final Materialized<K, VOut, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in these streams by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* To compute the aggregation the corresponding {@link Aggregator} as specified in
* {@link #cogroup(KGroupedStream, Aggregator) cogroup(...)} is used per input stream.
* The specified {@link Initializer} is applied once per key, directly before the first input record per key is
* processed to provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Named} is used to name the processor combining the grouped streams.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to the
* same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* KeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to query
* the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation
* result. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store.
* Cannot be {@code null}.
* @param named name the processors. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that
* represent the latest (rolling) aggregate for each key
*/
KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
final Named named,
final Materialized<K, VOut, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Create a new {@link TimeWindowedCogroupedKStream} instance that can be used to perform windowed
* aggregations.
*
* @param windows the specification of the aggregation {@link Windows}
* @param <W> the window type
* @return an instance of {@link TimeWindowedCogroupedKStream}
*/
<W extends Window> TimeWindowedCogroupedKStream<K, VOut> windowedBy(final Windows<W> windows);
/**
* Create a new {@link SessionWindowedCogroupedKStream} instance that can be used to perform session
* windowed aggregations.
*
* @param windows the specification of the aggregation {@link SessionWindows}
* @return an instance of {@link SessionWindowedCogroupedKStream}
*/
SessionWindowedCogroupedKStream<K, VOut> windowedBy(final SessionWindows windows);
}

View File

@@ -0,0 +1,230 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.processor.TimestampExtractor;
import java.util.Objects;
/**
* The {@code Consumed} class is used to define the optional parameters when using {@link StreamsBuilder} to
* build instances of {@link KStream}, {@link KTable}, and {@link GlobalKTable}.
* <p>
* For example, you can read a topic as {@link KStream} with a custom timestamp extractor and specify the corresponding
* key and value serdes like:
* <pre>{@code
* StreamsBuilder builder = new StreamsBuilder();
* KStream<String, Long> stream = builder.stream(
* "topicName",
* Consumed.with(Serdes.String(), Serdes.Long())
* .withTimestampExtractor(new LogAndSkipOnInvalidTimestamp()));
* }</pre>
* Similarly, you can read a topic as {@link KTable} with a custom {@code auto.offset.reset} configuration and force a
* state store {@link org.apache.kafka.streams.kstream.Materialized materialization} to access the content via
* interactive queries:
* <pre>{@code
* StreamsBuilder builder = new StreamsBuilder();
* KTable<Integer, Integer> table = builder.table(
* "topicName",
* Consumed.with(AutoOffsetReset.LATEST),
* Materialized.as("queryable-store-name"));
* }</pre>
*
* @param <K> type of record key
* @param <V> type of record value
*/
public class Consumed<K, V> implements NamedOperation<Consumed<K, V>> {
protected Serde<K> keySerde;
protected Serde<V> valueSerde;
protected TimestampExtractor timestampExtractor;
protected Topology.AutoOffsetReset resetPolicy;
protected String processorName;
private Consumed(final Serde<K> keySerde,
final Serde<V> valueSerde,
final TimestampExtractor timestampExtractor,
final Topology.AutoOffsetReset resetPolicy,
final String processorName) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
this.timestampExtractor = timestampExtractor;
this.resetPolicy = resetPolicy;
this.processorName = processorName;
}
/**
* Create an instance of {@link Consumed} from an existing instance.
* @param consumed the instance of {@link Consumed} to copy
*/
protected Consumed(final Consumed<K, V> consumed) {
this(consumed.keySerde,
consumed.valueSerde,
consumed.timestampExtractor,
consumed.resetPolicy,
consumed.processorName
);
}
/**
* Create an instance of {@link Consumed} with the supplied arguments. {@code null} values are acceptable.
*
* @param keySerde the key serde. If {@code null} the default key serde from config will be used
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde,
final TimestampExtractor timestampExtractor,
final Topology.AutoOffsetReset resetPolicy) {
return new Consumed<>(keySerde, valueSerde, timestampExtractor, resetPolicy, null);
}
/**
* Create an instance of {@link Consumed} with key and value {@link Serde}s.
*
* @param keySerde the key serde. If {@code null} the default key serde from config will be used
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Consumed<>(keySerde, valueSerde, null, null, null);
}
/**
* Create an instance of {@link Consumed} with a {@link TimestampExtractor}.
*
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> with(final TimestampExtractor timestampExtractor) {
return new Consumed<>(null, null, timestampExtractor, null, null);
}
/**
* Create an instance of {@link Consumed} with a {@link org.apache.kafka.streams.Topology.AutoOffsetReset Topology.AutoOffsetReset}.
*
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> with(final Topology.AutoOffsetReset resetPolicy) {
return new Consumed<>(null, null, null, resetPolicy, null);
}
/**
* Create an instance of {@link Consumed} with provided processor name.
*
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Consumed}
*/
public static <K, V> Consumed<K, V> as(final String processorName) {
return new Consumed<>(null, null, null, null, processorName);
}
/**
* Configure the instance of {@link Consumed} with a key {@link Serde}.
*
* @param keySerde the key serde. If {@code null}the default key serde from config will be used
* @return this
*/
public Consumed<K, V> withKeySerde(final Serde<K> keySerde) {
this.keySerde = keySerde;
return this;
}
/**
* Configure the instance of {@link Consumed} with a value {@link Serde}.
*
* @param valueSerde the value serde. If {@code null} the default value serde from config will be used
* @return this
*/
public Consumed<K, V> withValueSerde(final Serde<V> valueSerde) {
this.valueSerde = valueSerde;
return this;
}
/**
* Configure the instance of {@link Consumed} with a {@link TimestampExtractor}.
*
* @param timestampExtractor the timestamp extractor to used. If {@code null} the default timestamp extractor from config will be used
* @return this
*/
public Consumed<K, V> withTimestampExtractor(final TimestampExtractor timestampExtractor) {
this.timestampExtractor = timestampExtractor;
return this;
}
/**
* Configure the instance of {@link Consumed} with a {@link org.apache.kafka.streams.Topology.AutoOffsetReset Topology.AutoOffsetReset}.
*
* @param resetPolicy the offset reset policy to be used. If {@code null} the default reset policy from config will be used
* @return this
*/
public Consumed<K, V> withOffsetResetPolicy(final Topology.AutoOffsetReset resetPolicy) {
this.resetPolicy = resetPolicy;
return this;
}
/**
* Configure the instance of {@link Consumed} with a processor name.
*
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
* @return this
*/
@Override
public Consumed<K, V> withName(final String processorName) {
this.processorName = processorName;
return this;
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final Consumed<?, ?> consumed = (Consumed<?, ?>) o;
return Objects.equals(keySerde, consumed.keySerde) &&
Objects.equals(valueSerde, consumed.valueSerde) &&
Objects.equals(timestampExtractor, consumed.timestampExtractor) &&
resetPolicy == consumed.resetPolicy;
}
@Override
public int hashCode() {
return Objects.hash(keySerde, valueSerde, timestampExtractor, resetPolicy);
}
}

View File

@@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code ForeachAction} interface for performing an action on a {@link org.apache.kafka.streams.KeyValue key-value
* pair}.
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
* record of a stream.
* If stateful processing is required, consider using
* {@link KStream#process(org.apache.kafka.streams.processor.ProcessorSupplier, String...) KStream#process(...)}.
*
* @param <K> key type
* @param <V> value type
* @see KStream#foreach(ForeachAction)
*/
public interface ForeachAction<K, V> {
/**
* Perform an action for each record of a stream.
*
* @param key the key of the record
* @param value the value of the record
*/
void apply(final K key, final V value);
}

View File

@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.state.ReadOnlyKeyValueStore;
/**
* {@code GlobalKTable} is an abstraction of a <i>changelog stream</i> from a primary-keyed table.
* Each record in this changelog stream is an update on the primary-keyed table with the record key as the primary key.
* <p>
* {@code GlobalKTable} can only be used as right-hand side input for {@link KStream stream}-table joins.
* <p>
* In contrast to a {@link KTable} that is partitioned over all {@link KafkaStreams} instances, a {@code GlobalKTable}
* is fully replicated per {@link KafkaStreams} instance.
* Every partition of the underlying topic is consumed by each {@code GlobalKTable}, such that the full set of data is
* available in every {@link KafkaStreams} instance.
* This provides the ability to perform joins with {@link KStream} without having to repartition the input stream.
* All joins with the {@code GlobalKTable} require that a {@link KeyValueMapper} is provided that can map from the
* {@link KeyValue} of the left hand side {@link KStream} to the key of the right hand side {@code GlobalKTable}.
* <p>
* A {@code GlobalKTable} is created via a {@link StreamsBuilder}. For example:
* <pre>{@code
* builder.globalTable("topic-name", "queryable-store-name");
* }</pre>
* all {@code GlobalKTable}s are backed by a {@link ReadOnlyKeyValueStore} and are therefore queryable via the
* interactive queries API.
* For example:
* <pre>{@code
* final GlobalKTable globalOne = builder.globalTable("g1", "g1-store");
* final GlobalKTable globalTwo = builder.globalTable("g2", "g2-store");
* ...
* final KafkaStreams streams = ...;
* streams.start()
* ...
* ReadOnlyKeyValueStore view = streams.store("g1-store", QueryableStoreTypes.keyValueStore());
* view.get(key); // can be done on any key, as all keys are present
*}</pre>
* Note that in contrast to {@link KTable} a {@code GlobalKTable}'s state holds a full copy of the underlying topic,
* thus all keys can be queried locally.
* <p>
* Records from the source topic that have null keys are dropped.
*
* @param <K> Type of primary keys
* @param <V> Type of value changes
* @see KTable
* @see StreamsBuilder#globalTable(String)
* @see KStream#join(GlobalKTable, KeyValueMapper, ValueJoiner)
* @see KStream#leftJoin(GlobalKTable, KeyValueMapper, ValueJoiner)
*/
public interface GlobalKTable<K, V> {
/**
* Get the name of the local state store that can be used to query this {@code GlobalKTable}.
*
* @return the underlying state store name, or {@code null} if this {@code GlobalKTable} cannot be queried.
*/
String queryableStoreName();
}

View File

@@ -0,0 +1,159 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
/**
* The class that is used to capture the key and value {@link Serde}s and set the part of name used for
* repartition topics when performing {@link KStream#groupBy(KeyValueMapper, Grouped)}, {@link
* KStream#groupByKey(Grouped)}, or {@link KTable#groupBy(KeyValueMapper, Grouped)} operations. Note
* that Kafka Streams does not always create repartition topics for grouping operations.
*
* @param <K> the key type
* @param <V> the value type
*/
public class Grouped<K, V> implements NamedOperation<Grouped<K, V>> {
protected final Serde<K> keySerde;
protected final Serde<V> valueSerde;
protected final String name;
private Grouped(final String name,
final Serde<K> keySerde,
final Serde<V> valueSerde) {
this.name = name;
this.keySerde = keySerde;
this.valueSerde = valueSerde;
}
protected Grouped(final Grouped<K, V> grouped) {
this(grouped.name, grouped.keySerde, grouped.valueSerde);
}
/**
* Create a {@link Grouped} instance with the provided name used as part of the repartition topic if required.
*
* @param name the name used for a repartition topic if required
* @return a new {@link Grouped} configured with the name
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <K, V> Grouped<K, V> as(final String name) {
return new Grouped<>(name, null, null);
}
/**
* Create a {@link Grouped} instance with the provided keySerde. If {@code null} the default key serde from config will be used.
*
* @param keySerde the Serde used for serializing the key. If {@code null} the default key serde from config will be used
* @return a new {@link Grouped} configured with the keySerde
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <K> Grouped keySerde(final Serde<K> keySerde) {
return new Grouped<>(null, keySerde, null);
}
/**
* Create a {@link Grouped} instance with the provided valueSerde. If {@code null} the default value serde from config will be used.
*
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
* @return a new {@link Grouped} configured with the valueSerde
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <V> Grouped valueSerde(final Serde<V> valueSerde) {
return new Grouped<>(null, null, valueSerde);
}
/**
* Create a {@link Grouped} instance with the provided name, keySerde, and valueSerde. If the keySerde and/or the valueSerde is
* {@code null} the default value for the respective serde from config will be used.
*
* @param name the name used as part of the repartition topic name if required
* @param keySerde the {@link Serde} used for serializing the key. If {@code null} the default key serde from config will be used
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
* @return a new {@link Grouped} configured with the name, keySerde, and valueSerde
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <K, V> Grouped<K, V> with(final String name,
final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Grouped<>(name, keySerde, valueSerde);
}
/**
* Create a {@link Grouped} instance with the provided keySerde and valueSerde. If the keySerde and/or the valueSerde is
* {@code null} the default value for the respective serde from config will be used.
*
* @param keySerde the {@link Serde} used for serializing the key. If {@code null} the default key serde from config will be used
* @param valueSerde the {@link Serde} used for serializing the value. If {@code null} the default value serde from config will be used
* @return a new {@link Grouped} configured with the keySerde, and valueSerde
* @see KStream#groupByKey(Grouped)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper, Grouped)
*/
public static <K, V> Grouped<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Grouped<>(null, keySerde, valueSerde);
}
/**
* Perform the grouping operation with the name for a repartition topic if required. Note
* that Kafka Streams does not always create repartition topics for grouping operations.
*
* @param name the name used for the processor name and as part of the repartition topic name if required
* @return a new {@link Grouped} instance configured with the name
* */
@Override
public Grouped<K, V> withName(final String name) {
return new Grouped<>(name, keySerde, valueSerde);
}
/**
* Perform the grouping operation using the provided keySerde for serializing the key.
*
* @param keySerde {@link Serde} to use for serializing the key. If {@code null} the default key serde from config will be used
* @return a new {@link Grouped} instance configured with the keySerde
*/
public Grouped<K, V> withKeySerde(final Serde<K> keySerde) {
return new Grouped<>(name, keySerde, valueSerde);
}
/**
* Perform the grouping operation using the provided valueSerde for serializing the value.
*
* @param valueSerde {@link Serde} to use for serializing the value. If {@code null} the default value serde from config will be used
* @return a new {@link Grouped} instance configured with the valueSerde
*/
public Grouped<K, V> withValueSerde(final Serde<V> valueSerde) {
return new Grouped<>(name, keySerde, valueSerde);
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code Initializer} interface for creating an initial value in aggregations.
* {@code Initializer} is used in combination with {@link Aggregator}.
*
* @param <VA> aggregate value type
* @see Aggregator
* @see KGroupedStream#aggregate(Initializer, Aggregator)
* @see KGroupedStream#aggregate(Initializer, Aggregator, Materialized)
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator)
* @see TimeWindowedKStream#aggregate(Initializer, Aggregator, Materialized)
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger)
* @see SessionWindowedKStream#aggregate(Initializer, Aggregator, Merger, Materialized)
*/
public interface Initializer<VA> {
/**
* Return the initial value for an aggregation.
*
* @return the initial value for an aggregation
*/
VA apply();
}

View File

@@ -0,0 +1,309 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.processor.TimestampExtractor;
import java.time.Duration;
import java.util.Map;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
/**
* The window specifications used for joins.
* <p>
* A {@code JoinWindows} instance defines a maximum time difference for a {@link KStream#join(KStream, ValueJoiner,
* JoinWindows) join over two streams} on the same key.
* In SQL-style you would express this join as
* <pre>{@code
* SELECT * FROM stream1, stream2
* WHERE
* stream1.key = stream2.key
* AND
* stream1.ts - before <= stream2.ts AND stream2.ts <= stream1.ts + after
* }</pre>
* There are three different window configuration supported:
* <ul>
* <li>before = after = time-difference</li>
* <li>before = 0 and after = time-difference</li>
* <li>before = time-difference and after = 0</li>
* </ul>
* A join is symmetric in the sense, that a join specification on the first stream returns the same result record as
* a join specification on the second stream with flipped before and after values.
* <p>
* Both values (before and after) must not result in an "inverse" window, i.e., upper-interval bound cannot be smaller
* than lower-interval bound.
* <p>
* {@code JoinWindows} are sliding windows, thus, they are aligned to the actual record timestamps.
* This implies, that each input record defines its own window with start and end time being relative to the record's
* timestamp.
* <p>
* For time semantics, see {@link TimestampExtractor}.
*
* @see TimeWindows
* @see UnlimitedWindows
* @see SessionWindows
* @see KStream#join(KStream, ValueJoiner, JoinWindows)
* @see KStream#join(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows)
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows)
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see TimestampExtractor
*/
public final class JoinWindows extends Windows<Window> {
private final long maintainDurationMs;
/** Maximum time difference for tuples that are before the join tuple. */
public final long beforeMs;
/** Maximum time difference for tuples that are after the join tuple. */
public final long afterMs;
private final long graceMs;
private JoinWindows(final long beforeMs,
final long afterMs,
final long graceMs,
final long maintainDurationMs) {
if (beforeMs + afterMs < 0) {
throw new IllegalArgumentException("Window interval (ie, beforeMs+afterMs) must not be negative.");
}
this.afterMs = afterMs;
this.beforeMs = beforeMs;
this.graceMs = graceMs;
this.maintainDurationMs = maintainDurationMs;
}
@Deprecated // removing segments from Windows will fix this
private JoinWindows(final long beforeMs,
final long afterMs,
final long graceMs,
final long maintainDurationMs,
final int segments) {
super(segments);
if (beforeMs + afterMs < 0) {
throw new IllegalArgumentException("Window interval (ie, beforeMs+afterMs) must not be negative.");
}
this.afterMs = afterMs;
this.beforeMs = beforeMs;
this.graceMs = graceMs;
this.maintainDurationMs = maintainDurationMs;
}
/**
* Specifies that records of the same key are joinable if their timestamps are within {@code timeDifferenceMs},
* i.e., the timestamp of a record from the secondary stream is max {@code timeDifferenceMs} earlier or later than
* the timestamp of the record from the primary stream.
*
* @param timeDifferenceMs join window interval in milliseconds
* @throws IllegalArgumentException if {@code timeDifferenceMs} is negative
* @deprecated Use {@link #of(Duration)} instead.
*/
@Deprecated
public static JoinWindows of(final long timeDifferenceMs) throws IllegalArgumentException {
// This is a static factory method, so we initialize grace and retention to the defaults.
return new JoinWindows(timeDifferenceMs, timeDifferenceMs, -1L, DEFAULT_RETENTION_MS);
}
/**
* Specifies that records of the same key are joinable if their timestamps are within {@code timeDifference},
* i.e., the timestamp of a record from the secondary stream is max {@code timeDifference} earlier or later than
* the timestamp of the record from the primary stream.
*
* @param timeDifference join window interval
* @throws IllegalArgumentException if {@code timeDifference} is negative or can't be represented as {@code long milliseconds}
*/
public static JoinWindows of(final Duration timeDifference) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
return of(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
}
/**
* Changes the start window boundary to {@code timeDifferenceMs} but keep the end window boundary as is.
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
* {@code timeDifferenceMs} earlier than the timestamp of the record from the primary stream.
* {@code timeDifferenceMs} can be negative but its absolute value must not be larger than current window "after"
* value (which would result in a negative window size).
*
* @param timeDifferenceMs relative window start time in milliseconds
* @throws IllegalArgumentException if the resulting window size is negative
* @deprecated Use {@link #before(Duration)} instead.
*/
@Deprecated
public JoinWindows before(final long timeDifferenceMs) throws IllegalArgumentException {
return new JoinWindows(timeDifferenceMs, afterMs, graceMs, maintainDurationMs, segments);
}
/**
* Changes the start window boundary to {@code timeDifference} but keep the end window boundary as is.
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
* {@code timeDifference} earlier than the timestamp of the record from the primary stream.
* {@code timeDifference} can be negative but its absolute value must not be larger than current window "after"
* value (which would result in a negative window size).
*
* @param timeDifference relative window start time
* @throws IllegalArgumentException if the resulting window size is negative or {@code timeDifference} can't be represented as {@code long milliseconds}
*/
public JoinWindows before(final Duration timeDifference) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
return before(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
}
/**
* Changes the end window boundary to {@code timeDifferenceMs} but keep the start window boundary as is.
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
* {@code timeDifferenceMs} later than the timestamp of the record from the primary stream.
* {@code timeDifferenceMs} can be negative but its absolute value must not be larger than current window "before"
* value (which would result in a negative window size).
*
* @param timeDifferenceMs relative window end time in milliseconds
* @throws IllegalArgumentException if the resulting window size is negative
* @deprecated Use {@link #after(Duration)} instead
*/
@Deprecated
public JoinWindows after(final long timeDifferenceMs) throws IllegalArgumentException {
return new JoinWindows(beforeMs, timeDifferenceMs, graceMs, maintainDurationMs, segments);
}
/**
* Changes the end window boundary to {@code timeDifference} but keep the start window boundary as is.
* Thus, records of the same key are joinable if the timestamp of a record from the secondary stream is at most
* {@code timeDifference} later than the timestamp of the record from the primary stream.
* {@code timeDifference} can be negative but its absolute value must not be larger than current window "before"
* value (which would result in a negative window size).
*
* @param timeDifference relative window end time
* @throws IllegalArgumentException if the resulting window size is negative or {@code timeDifference} can't be represented as {@code long milliseconds}
*/
public JoinWindows after(final Duration timeDifference) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(timeDifference, "timeDifference");
return after(ApiUtils.validateMillisecondDuration(timeDifference, msgPrefix));
}
/**
* Not supported by {@code JoinWindows}.
* Throws {@link UnsupportedOperationException}.
*
* @throws UnsupportedOperationException at every invocation
*/
@Override
public Map<Long, Window> windowsFor(final long timestamp) {
throw new UnsupportedOperationException("windowsFor() is not supported by JoinWindows.");
}
@Override
public long size() {
return beforeMs + afterMs;
}
/**
* Reject out-of-order events that are delayed more than {@code afterWindowEnd}
* after the end of its window.
* <p>
* Delay is defined as (stream_time - record_timestamp).
*
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
* @return this updated builder
* @throws IllegalArgumentException if the {@code afterWindowEnd} is negative of can't be represented as {@code long milliseconds}
*/
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
public JoinWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
if (afterWindowEndMs < 0) {
throw new IllegalArgumentException("Grace period must not be negative.");
}
return new JoinWindows(beforeMs, afterMs, afterWindowEndMs, maintainDurationMs, segments);
}
@Override
public long gracePeriodMs() {
// NOTE: in the future, when we remove maintainMs,
// we should default the grace period to 24h to maintain the default behavior,
// or we can default to (24h - size) if you want to be super accurate.
return graceMs != -1 ? graceMs : maintainMs() - size();
}
/**
* @param durationMs the window retention time in milliseconds
* @return itself
* @throws IllegalArgumentException if {@code durationMs} is smaller than the window size
* @deprecated since 2.1. Use {@link JoinWindows#grace(Duration)} instead.
*/
@Override
@Deprecated
public JoinWindows until(final long durationMs) throws IllegalArgumentException {
if (durationMs < size()) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than the window size.");
}
return new JoinWindows(beforeMs, afterMs, graceMs, durationMs, segments);
}
/**
* {@inheritDoc}
* <p>
* For {@link TimeWindows} the maintain duration is at least as small as the window size.
*
* @return the window maintain duration
* @deprecated since 2.1. This function should not be used anymore, since {@link JoinWindows#until(long)}
* is deprecated in favor of {@link JoinWindows#grace(Duration)}.
*/
@Override
@Deprecated
public long maintainMs() {
return Math.max(maintainDurationMs, size());
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final JoinWindows that = (JoinWindows) o;
return beforeMs == that.beforeMs &&
afterMs == that.afterMs &&
maintainDurationMs == that.maintainDurationMs &&
segments == that.segments &&
graceMs == that.graceMs;
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public int hashCode() {
return Objects.hash(beforeMs, afterMs, graceMs, maintainDurationMs, segments);
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public String toString() {
return "JoinWindows{" +
"beforeMs=" + beforeMs +
", afterMs=" + afterMs +
", graceMs=" + graceMs +
", maintainDurationMs=" + maintainDurationMs +
", segments=" + segments +
'}';
}
}

View File

@@ -0,0 +1,233 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
/**
* The {@code Joined} class represents optional params that can be passed to
* {@link KStream#join(KTable, ValueJoiner, Joined) KStream#join(KTable,...)} and
* {@link KStream#leftJoin(KTable, ValueJoiner) KStream#leftJoin(KTable,...)} operations.
*/
public class Joined<K, V, VO> implements NamedOperation<Joined<K, V, VO>> {
protected final Serde<K> keySerde;
protected final Serde<V> valueSerde;
protected final Serde<VO> otherValueSerde;
protected final String name;
private Joined(final Serde<K> keySerde,
final Serde<V> valueSerde,
final Serde<VO> otherValueSerde,
final String name) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
this.otherValueSerde = otherValueSerde;
this.name = name;
}
protected Joined(final Joined<K, V, VO> joined) {
this(joined.keySerde, joined.valueSerde, joined.otherValueSerde, joined.name);
}
/**
* Create an instance of {@code Joined} with key, value, and otherValue {@link Serde} instances.
* {@code null} values are accepted and will be replaced by the default serdes as defined in config.
*
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be used
* @param valueSerde the value serde to use. If {@code null} the default value serde from config will be used
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde from config will be used
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance with the provided serdes
*/
public static <K, V, VO> Joined<K, V, VO> with(final Serde<K> keySerde,
final Serde<V> valueSerde,
final Serde<VO> otherValueSerde) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, null);
}
/**
* Create an instance of {@code Joined} with key, value, and otherValue {@link Serde} instances.
* {@code null} values are accepted and will be replaced by the default serdes as defined in
* config.
*
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be
* used
* @param valueSerde the value serde to use. If {@code null} the default value serde from config
* will be used
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde
* from config will be used
* @param name the name used as the base for naming components of the join including any
* repartition topics
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance with the provided serdes
*/
public static <K, V, VO> Joined<K, V, VO> with(final Serde<K> keySerde,
final Serde<V> valueSerde,
final Serde<VO> otherValueSerde,
final String name) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
/**
* Create an instance of {@code Joined} with a key {@link Serde}.
* {@code null} values are accepted and will be replaced by the default key serde as defined in config.
*
* @param keySerde the key serde to use. If {@code null} the default key serde from config will be used
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the keySerde
*/
public static <K, V, VO> Joined<K, V, VO> keySerde(final Serde<K> keySerde) {
return new Joined<>(keySerde, null, null, null);
}
/**
* Create an instance of {@code Joined} with a value {@link Serde}.
* {@code null} values are accepted and will be replaced by the default value serde as defined in config.
*
* @param valueSerde the value serde to use. If {@code null} the default value serde from config will be used
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the valueSerde
*/
public static <K, V, VO> Joined<K, V, VO> valueSerde(final Serde<V> valueSerde) {
return new Joined<>(null, valueSerde, null, null);
}
/**
* Create an instance of {@code Joined} with an other value {@link Serde}.
* {@code null} values are accepted and will be replaced by the default value serde as defined in config.
*
* @param otherValueSerde the otherValue serde to use. If {@code null} the default value serde from config will be used
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the otherValueSerde
*/
public static <K, V, VO> Joined<K, V, VO> otherValueSerde(final Serde<VO> otherValueSerde) {
return new Joined<>(null, null, otherValueSerde, null);
}
/**
* Create an instance of {@code Joined} with base name for all components of the join, this may
* include any repartition topics created to complete the join.
*
* @param name the name used as the base for naming components of the join including any
* repartition topics
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the name
*
* @deprecated use {@link #as(String)} instead
*/
@Deprecated
public static <K, V, VO> Joined<K, V, VO> named(final String name) {
return new Joined<>(null, null, null, name);
}
/**
* Create an instance of {@code Joined} with base name for all components of the join, this may
* include any repartition topics created to complete the join.
*
* @param name the name used as the base for naming components of the join including any
* repartition topics
* @param <K> key type
* @param <V> value type
* @param <VO> other value type
* @return new {@code Joined} instance configured with the name
*
*/
public static <K, V, VO> Joined<K, V, VO> as(final String name) {
return new Joined<>(null, null, null, name);
}
/**
* Set the key {@link Serde} to be used. Null values are accepted and will be replaced by the default
* key serde as defined in config
*
* @param keySerde the key serde to use. If null the default key serde from config will be used
* @return new {@code Joined} instance configured with the {@code name}
*/
public Joined<K, V, VO> withKeySerde(final Serde<K> keySerde) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
/**
* Set the value {@link Serde} to be used. Null values are accepted and will be replaced by the default
* value serde as defined in config
*
* @param valueSerde the value serde to use. If null the default value serde from config will be used
* @return new {@code Joined} instance configured with the {@code valueSerde}
*/
public Joined<K, V, VO> withValueSerde(final Serde<V> valueSerde) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
/**
* Set the otherValue {@link Serde} to be used. Null values are accepted and will be replaced by the default
* value serde as defined in config
*
* @param otherValueSerde the otherValue serde to use. If null the default value serde from config will be used
* @return new {@code Joined} instance configured with the {@code valueSerde}
*/
public Joined<K, V, VO> withOtherValueSerde(final Serde<VO> otherValueSerde) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
/**
* Set the base name used for all components of the join, this may include any repartition topics
* created to complete the join.
*
* @param name the name used as the base for naming components of the join including any
* repartition topics
* @return new {@code Joined} instance configured with the {@code name}
*/
@Override
public Joined<K, V, VO> withName(final String name) {
return new Joined<>(keySerde, valueSerde, otherValueSerde, name);
}
public Serde<K> keySerde() {
return keySerde;
}
public Serde<V> valueSerde() {
return valueSerde;
}
public Serde<VO> otherValueSerde() {
return otherValueSerde;
}
/**
* @deprecated this method will be removed in a in a future release
*/
@Deprecated
public String name() {
return name;
}
}

View File

@@ -0,0 +1,556 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.KeyValueStore;
/**
* {@code KGroupedStream} is an abstraction of a <i>grouped</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation of a {@link KStream} in order to apply an aggregation operation on the original
* {@link KStream} records.
* <p>
* It is an intermediate representation after a grouping of a {@link KStream} before an aggregation is applied to the
* new partitions resulting in a {@link KTable}.
* <p>
* A {@code KGroupedStream} must be obtained from a {@link KStream} via {@link KStream#groupByKey() groupByKey()} or
* {@link KStream#groupBy(KeyValueMapper) groupBy(...)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
*/
public interface KGroupedStream<K, V> {
/**
* Count the number of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count();
/**
* Count the number of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology
*
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Named named);
/**
* Count the number of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* provided by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
* <pre>{@code
* KafkaStreams streams = ... // counting words
* String queryableStoreName = "storeName"; // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String,Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Count the number of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* provided by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
* <pre>{@code
* KafkaStreams streams = ... // counting words
* String queryableStoreName = "storeName"; // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String,Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Named named,
final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator)}).
* <p>
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
* aggregate and the record's value.
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer)} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key. If the reduce function returns {@code null}, it is then interpreted as
* deletion for the key, and future messages of the same key coming from upstream operators
* will be handled as newly initialized value.
*/
KTable<K, V> reduce(final Reducer<V> reducer);
/**
* Combine the value of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Materialized)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* provided by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
* aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* <p>
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Materialized)} can be used to compute aggregate functions like sum, min, or
* max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long sumForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
KTable<K, V> reduce(final Reducer<V> reducer,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Combine the value of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Materialized)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* provided by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Reducer} is applied for each input record and computes a new aggregate using the current
* aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* <p>
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Materialized)} can be used to compute aggregate functions like sum, min, or
* max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}.
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long sumForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology.
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key. If the reduce function returns {@code null}, it is then interpreted as
* deletion for the key, and future messages of the same key coming from upstream operators
* will be handled as newly initialized value.
*/
KTable<K, V> reduce(final Reducer<V> reducer,
final Named named,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate(Initializer, Aggregator)} can be used to compute aggregate functions like
* count (c.f. {@link #count()}).
* <p>
* The default value serde from config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Aggregator, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param <VR> the value type of the resulting {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key. If the aggregate function returns {@code null}, it is then interpreted as
* deletion for the key, and future messages of the same key coming from upstream operators
* will be handled as newly initialized value.
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator);
/**
* Aggregate the values of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate(Initializer, Aggregator, Materialized)} can be used to compute aggregate functions like
* count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried by the given store name in {@code materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate(Initializer, Aggregator, Materialized)} can be used to compute aggregate functions like
* count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some aggregation on value type double
* String queryableStoreName = "storeName" // the store name should be the name of the store as defined by the Materialized instance
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-key";
* Long aggForKey = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key. If the aggregate function returns {@code null}, it is then interpreted as
* deletion for the key, and future messages of the same key coming from upstream operators
* will be handled as newly initialized value.
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Named named,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Create a new {@link TimeWindowedKStream} instance that can be used to perform windowed aggregations.
* @param windows the specification of the aggregation {@link Windows}
* @param <W> the window type
* @return an instance of {@link TimeWindowedKStream}
*/
<W extends Window> TimeWindowedKStream<K, V> windowedBy(final Windows<W> windows);
/**
* Create a new {@link SessionWindowedKStream} instance that can be used to perform session windowed aggregations.
* @param windows the specification of the aggregation {@link SessionWindows}
* @return an instance of {@link TimeWindowedKStream}
*/
SessionWindowedKStream<K, V> windowedBy(final SessionWindows windows);
/**
* Create a new {@link CogroupedKStream} from the this grouped KStream to allow cogrouping other
* {@code KGroupedStream} to it.
* {@link CogroupedKStream} is an abstraction of multiple <i>grouped</i> record streams of {@link KeyValue} pairs.
* It is an intermediate representation after a grouping of {@link KStream}s, before the
* aggregations are applied to the new partitions resulting in a {@link KTable}.
* <p>
* The specified {@link Aggregator} is applied in the actual {@link CogroupedKStream#aggregate(Initializer)
* aggregation} step for each input record and computes a new aggregate using the current aggregate (or for the very
* first record per key using the initial intermediate aggregation result provided via the {@link Initializer} that
* is passed into {@link CogroupedKStream#aggregate(Initializer)}) and the record's value.
*
* @param aggregator an {@link Aggregator} that computes a new aggregate result
* @param <Vout> the type of the output values
* @return a {@link CogroupedKStream}
*/
<Vout> CogroupedKStream<K, Vout> cogroup(final Aggregator<? super K, ? super V, Vout> aggregator);
}

View File

@@ -0,0 +1,699 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.KeyValueStore;
/**
* {@code KGroupedTable} is an abstraction of a <i>re-grouped changelog stream</i> from a primary-keyed table,
* usually on a different grouping key than the original primary key.
* <p>
* It is an intermediate representation after a re-grouping of a {@link KTable} before an aggregation is applied to the
* new partitions resulting in a new {@link KTable}.
* <p>
* A {@code KGroupedTable} must be obtained from a {@link KTable} via {@link KTable#groupBy(KeyValueMapper)
* groupBy(...)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KTable
*/
public interface KGroupedTable<K, V> {
/**
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
* the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
* the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
*
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named the {@link Named} config used to name the processor in the topology
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Named named, final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
* the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count();
/**
* Count number of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper) mapped} to
* the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named the {@link Named} config used to name the processor in the topology
* @return a {@link KTable} that contains "update" records with unmodified keys and {@link Long} values that
* represent the latest (rolling) count (i.e., number of records) for each key
*/
KTable<K, Long> count(final Named named);
/**
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
* current aggregate (first argument) and the record's value (second argument) by adding the new record to the
* aggregate.
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate (first argument) and the record's value (second
* argument) by "removing" the "replaced" record from the aggregate.
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Reducer, String)} can be used to compute aggregate functions like sum.
* For sum, the adder and subtractor would work as follows:
* <pre>{@code
* public class SumAdder implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer newValue) {
* return currentAgg + newValue;
* }
* }
*
* public class SumSubtractor implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer oldValue) {
* return currentAgg - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param adder a {@link Reducer} that adds a new value to the aggregate result
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
* current aggregate (first argument) and the record's value (second argument) by adding the new record to the
* aggregate.
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate (first argument) and the record's value (second
* argument) by "removing" the "replaced" record from the aggregate.
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Reducer, String)} can be used to compute aggregate functions like sum.
* For sum, the adder and subtractor would work as follows:
* <pre>{@code
* public class SumAdder implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer newValue) {
* return currentAgg + newValue;
* }
* }
*
* public class SumSubtractor implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer oldValue) {
* return currentAgg - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param adder a {@link Reducer} that adds a new value to the aggregate result
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor,
final Named named,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Combine the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Aggregator)}).
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Reducer adder} is applied for each update record and computes a new aggregate using the
* current aggregate and the record's value by adding the new record to the aggregate.
* The specified {@link Reducer subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* If there is no current aggregate the {@link Reducer} is not applied and the new aggregate will be the record's
* value as-is.
* Thus, {@code reduce(Reducer, Reducer)} can be used to compute aggregate functions like sum.
* For sum, the adder and subtractor would work as follows:
* <pre>{@code
* public class SumAdder implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer newValue) {
* return currentAgg + newValue;
* }
* }
*
* public class SumSubtractor implements Reducer<Integer> {
* public Integer apply(Integer currentAgg, Integer oldValue) {
* return currentAgg - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param adder a {@link Reducer} that adds a new value to the aggregate result
* @param subtractor a {@link Reducer} that removed an old value from the aggregate result
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor);
/**
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer, Materialized) combining via reduce(...)} as it,
* for example, allows the result to have a different type than the input values.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, Materialized)} can be used to compute aggregate functions
* like sum.
* For sum, the initializer, adder, and subtractor would work as follows:
* <pre>{@code
* // in this example, LongSerde.class must be set as value serde in Materialized#withValueSerde
* public class SumInitializer implements Initializer<Long> {
* public Long apply() {
* return 0L;
* }
* }
*
* public class SumAdder implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer newValue, Long aggregate) {
* return aggregate + newValue;
* }
* }
*
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer oldValue, Long aggregate) {
* return aggregate - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that provides an initial aggregate result value
* @param adder an {@link Aggregator} that adds a new record to the aggregate result
* @param subtractor an {@link Aggregator} that removed an old record from the aggregate result
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @param <VR> the value type of the aggregated {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable}.
* Records with {@code null} key are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer, Materialized) combining via reduce(...)} as it,
* for example, allows the result to have a different type than the input values.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* that can be queried using the provided {@code queryableStoreName}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, Materialized)} can be used to compute aggregate functions
* like sum.
* For sum, the initializer, adder, and subtractor would work as follows:
* <pre>{@code
* // in this example, LongSerde.class must be set as value serde in Materialized#withValueSerde
* public class SumInitializer implements Initializer<Long> {
* public Long apply() {
* return 0L;
* }
* }
*
* public class SumAdder implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer newValue, Long aggregate) {
* return aggregate + newValue;
* }
* }
*
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer oldValue, Long aggregate) {
* return aggregate - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link KeyValueStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* ReadOnlyKeyValueStore<String, Long> localStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>keyValueStore());
* String key = "some-word";
* Long countForWord = localStore.get(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot contain characters other than ASCII
* alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@code Materialized}, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that provides an initial aggregate result value
* @param adder an {@link Aggregator} that adds a new record to the aggregate result
* @param subtractor an {@link Aggregator} that removed an old record from the aggregate result
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized the instance of {@link Materialized} used to materialize the state store. Cannot be {@code null}
* @param <VR> the value type of the aggregated {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor,
final Named named,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized);
/**
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable} using default serializers and deserializers.
* Records with {@code null} key are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer) combining via reduce(...)} as it,
* for example, allows the result to have a different type than the input values.
* If the result value type does not match the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value
* serde} you should use {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, String)} can be used to compute aggregate functions
* like sum.
* For sum, the initializer, adder, and subtractor would work as follows:
* <pre>{@code
* // in this example, LongSerde.class must be set as default value serde in StreamsConfig
* public class SumInitializer implements Initializer<Long> {
* public Long apply() {
* return 0L;
* }
* }
*
* public class SumAdder implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer newValue, Long aggregate) {
* return aggregate + newValue;
* }
* }
*
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer oldValue, Long aggregate) {
* return aggregate - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer a {@link Initializer} that provides an initial aggregate result value
* @param adder a {@link Aggregator} that adds a new record to the aggregate result
* @param subtractor a {@link Aggregator} that removed an old record from the aggregate result
* @param <VR> the value type of the aggregated {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor);
/**
* Aggregate the value of records of the original {@link KTable} that got {@link KTable#groupBy(KeyValueMapper)
* mapped} to the same key into a new instance of {@link KTable} using default serializers and deserializers.
* Records with {@code null} key are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer, Reducer) combining via reduce(...)} as it,
* for example, allows the result to have a different type than the input values.
* If the result value type does not match the {@link StreamsConfig#DEFAULT_VALUE_SERDE_CLASS_CONFIG default value
* serde} you should use {@link #aggregate(Initializer, Aggregator, Aggregator, Materialized)}.
* The result is written into a local {@link KeyValueStore} (which is basically an ever-updating materialized view)
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied once directly before the first input record is processed to
* provide an initial intermediate aggregation result that is used to process the first record.
* Each update to the original {@link KTable} results in a two step update of the result {@link KTable}.
* The specified {@link Aggregator adder} is applied for each update record and computes a new aggregate using the
* current aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value by adding the new record to the aggregate.
* The specified {@link Aggregator subtractor} is applied for each "replaced" record of the original {@link KTable}
* and computes a new aggregate using the current aggregate and the record's value by "removing" the "replaced"
* record from the aggregate.
* Thus, {@code aggregate(Initializer, Aggregator, Aggregator, String)} can be used to compute aggregate functions
* like sum.
* For sum, the initializer, adder, and subtractor would work as follows:
* <pre>{@code
* // in this example, LongSerde.class must be set as default value serde in StreamsConfig
* public class SumInitializer implements Initializer<Long> {
* public Long apply() {
* return 0L;
* }
* }
*
* public class SumAdder implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer newValue, Long aggregate) {
* return aggregate + newValue;
* }
* }
*
* public class SumSubtractor implements Aggregator<String, Integer, Long> {
* public Long apply(String key, Integer oldValue, Long aggregate) {
* return aggregate - oldValue;
* }
* }
* }</pre>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer a {@link Initializer} that provides an initial aggregate result value
* @param adder a {@link Aggregator} that adds a new record to the aggregate result
* @param subtractor a {@link Aggregator} that removed an old record from the aggregate result
* @param named a {@link Named} config used to name the processor in the topology
* @param <VR> the value type of the aggregated {@link KTable}
* @return a {@link KTable} that contains "update" records with unmodified keys, and values that represent the
* latest (rolling) aggregate for each key
*/
<VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor,
final Named named);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.KeyValue;
/**
* The {@code KeyValueMapper} interface for mapping a {@link KeyValue key-value pair} to a new value of arbitrary type.
* For example, it can be used to
* <ul>
* <li>map from an input {@link KeyValue} pair to an output {@link KeyValue} pair with different key and/or value type
* (for this case output type {@code VR == }{@link KeyValue KeyValue&lt;NewKeyType,NewValueType&gt;})</li>
* <li>map from an input record to a new key (with arbitrary key type as specified by {@code VR})</li>
* </ul>
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
* record of a stream (cf. {@link Transformer} for stateful record transformation).
* {@code KeyValueMapper} is a generalization of {@link ValueMapper}.
*
* @param <K> key type
* @param <V> value type
* @param <VR> mapped value type
* @see ValueMapper
* @see Transformer
* @see KStream#map(KeyValueMapper)
* @see KStream#flatMap(KeyValueMapper)
* @see KStream#selectKey(KeyValueMapper)
* @see KStream#groupBy(KeyValueMapper)
* @see KStream#groupBy(KeyValueMapper, Grouped)
* @see KTable#groupBy(KeyValueMapper)
* @see KTable#groupBy(KeyValueMapper, Grouped)
* @see KTable#toStream(KeyValueMapper)
*/
public interface KeyValueMapper<K, V, VR> {
/**
* Map a record with the given key and value to a new value.
*
* @param key the key of the record
* @param value the value of the record
* @return the new value
*/
VR apply(final K key, final V value);
}

View File

@@ -0,0 +1,261 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.state.KeyValueBytesStoreSupplier;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
import org.apache.kafka.streams.state.SessionStore;
import org.apache.kafka.streams.state.StoreSupplier;
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
import org.apache.kafka.streams.state.WindowStore;
import java.time.Duration;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
/**
* Used to describe how a {@link StateStore} should be materialized.
* You can either provide a custom {@link StateStore} backend through one of the provided methods accepting a supplier
* or use the default RocksDB backends by providing just a store name.
* <p>
* For example, you can read a topic as {@link KTable} and force a state store materialization to access the content
* via Interactive Queries API:
* <pre>{@code
* StreamsBuilder builder = new StreamsBuilder();
* KTable<Integer, Integer> table = builder.table(
* "topicName",
* Materialized.as("queryable-store-name"));
* }</pre>
*
* @param <K> type of record key
* @param <V> type of record value
* @param <S> type of state store (note: state stores always have key/value types {@code <Bytes,byte[]>}
*
* @see org.apache.kafka.streams.state.Stores
*/
public class Materialized<K, V, S extends StateStore> {
protected StoreSupplier<S> storeSupplier;
protected String storeName;
protected Serde<V> valueSerde;
protected Serde<K> keySerde;
protected boolean loggingEnabled = true;
protected boolean cachingEnabled = true;
protected Map<String, String> topicConfig = new HashMap<>();
protected Duration retention;
private Materialized(final StoreSupplier<S> storeSupplier) {
this.storeSupplier = storeSupplier;
}
private Materialized(final String storeName) {
this.storeName = storeName;
}
/**
* Copy constructor.
* @param materialized the {@link Materialized} instance to copy.
*/
protected Materialized(final Materialized<K, V, S> materialized) {
this.storeSupplier = materialized.storeSupplier;
this.storeName = materialized.storeName;
this.keySerde = materialized.keySerde;
this.valueSerde = materialized.valueSerde;
this.loggingEnabled = materialized.loggingEnabled;
this.cachingEnabled = materialized.cachingEnabled;
this.topicConfig = materialized.topicConfig;
this.retention = materialized.retention;
}
/**
* Materialize a {@link StateStore} with the given name.
*
* @param storeName the name of the underlying {@link KTable} state store; valid characters are ASCII
* alphanumerics, '.', '_' and '-'.
* @param <K> key type of the store
* @param <V> value type of the store
* @param <S> type of the {@link StateStore}
* @return a new {@link Materialized} instance with the given storeName
*/
public static <K, V, S extends StateStore> Materialized<K, V, S> as(final String storeName) {
Named.validate(storeName);
return new Materialized<>(storeName);
}
/**
* Materialize a {@link WindowStore} using the provided {@link WindowBytesStoreSupplier}.
*
* Important: Custom subclasses are allowed here, but they should respect the retention contract:
* Window stores are required to retain windows at least as long as (window size + window grace period).
* Stores constructed via {@link org.apache.kafka.streams.state.Stores} already satisfy this contract.
*
* @param supplier the {@link WindowBytesStoreSupplier} used to materialize the store
* @param <K> key type of the store
* @param <V> value type of the store
* @return a new {@link Materialized} instance with the given supplier
*/
public static <K, V> Materialized<K, V, WindowStore<Bytes, byte[]>> as(final WindowBytesStoreSupplier supplier) {
Objects.requireNonNull(supplier, "supplier can't be null");
return new Materialized<>(supplier);
}
/**
* Materialize a {@link SessionStore} using the provided {@link SessionBytesStoreSupplier}.
*
* Important: Custom subclasses are allowed here, but they should respect the retention contract:
* Session stores are required to retain windows at least as long as (session inactivity gap + session grace period).
* Stores constructed via {@link org.apache.kafka.streams.state.Stores} already satisfy this contract.
*
* @param supplier the {@link SessionBytesStoreSupplier} used to materialize the store
* @param <K> key type of the store
* @param <V> value type of the store
* @return a new {@link Materialized} instance with the given sup
* plier
*/
public static <K, V> Materialized<K, V, SessionStore<Bytes, byte[]>> as(final SessionBytesStoreSupplier supplier) {
Objects.requireNonNull(supplier, "supplier can't be null");
return new Materialized<>(supplier);
}
/**
* Materialize a {@link KeyValueStore} using the provided {@link KeyValueBytesStoreSupplier}.
*
* @param supplier the {@link KeyValueBytesStoreSupplier} used to materialize the store
* @param <K> key type of the store
* @param <V> value type of the store
* @return a new {@link Materialized} instance with the given supplier
*/
public static <K, V> Materialized<K, V, KeyValueStore<Bytes, byte[]>> as(final KeyValueBytesStoreSupplier supplier) {
Objects.requireNonNull(supplier, "supplier can't be null");
return new Materialized<>(supplier);
}
/**
* Materialize a {@link StateStore} with the provided key and value {@link Serde}s.
* An internal name will be used for the store.
*
* @param keySerde the key {@link Serde} to use. If the {@link Serde} is null, then the default key
* serde from configs will be used
* @param valueSerde the value {@link Serde} to use. If the {@link Serde} is null, then the default value
* serde from configs will be used
* @param <K> key type
* @param <V> value type
* @param <S> store type
* @return a new {@link Materialized} instance with the given key and value serdes
*/
public static <K, V, S extends StateStore> Materialized<K, V, S> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Materialized<K, V, S>((String) null).withKeySerde(keySerde).withValueSerde(valueSerde);
}
/**
* Set the valueSerde the materialized {@link StateStore} will use.
*
* @param valueSerde the value {@link Serde} to use. If the {@link Serde} is null, then the default value
* serde from configs will be used. If the serialized bytes is null for put operations,
* it is treated as delete operation
* @return itself
*/
public Materialized<K, V, S> withValueSerde(final Serde<V> valueSerde) {
this.valueSerde = valueSerde;
return this;
}
/**
* Set the keySerde the materialize {@link StateStore} will use.
* @param keySerde the key {@link Serde} to use. If the {@link Serde} is null, then the default key
* serde from configs will be used
* @return itself
*/
public Materialized<K, V, S> withKeySerde(final Serde<K> keySerde) {
this.keySerde = keySerde;
return this;
}
/**
* Indicates that a changelog should be created for the store. The changelog will be created
* with the provided configs.
* <p>
* Note: Any unrecognized configs will be ignored.
* @param config any configs that should be applied to the changelog
* @return itself
*/
public Materialized<K, V, S> withLoggingEnabled(final Map<String, String> config) {
loggingEnabled = true;
this.topicConfig = config;
return this;
}
/**
* Disable change logging for the materialized {@link StateStore}.
* @return itself
*/
public Materialized<K, V, S> withLoggingDisabled() {
loggingEnabled = false;
this.topicConfig.clear();
return this;
}
/**
* Enable caching for the materialized {@link StateStore}.
* @return itself
*/
public Materialized<K, V, S> withCachingEnabled() {
cachingEnabled = true;
return this;
}
/**
* Disable caching for the materialized {@link StateStore}.
* @return itself
*/
public Materialized<K, V, S> withCachingDisabled() {
cachingEnabled = false;
return this;
}
/**
* Configure retention period for window and session stores. Ignored for key/value stores.
*
* Overridden by pre-configured store suppliers
* ({@link Materialized#as(SessionBytesStoreSupplier)} or {@link Materialized#as(WindowBytesStoreSupplier)}).
*
* Note that the retention period must be at least long enough to contain the windowed data's entire life cycle,
* from window-start through window-end, and for the entire grace period.
*
* @param retention the retention time
* @return itself
* @throws IllegalArgumentException if retention is negative or can't be represented as {@code long milliseconds}
*/
public Materialized<K, V, S> withRetention(final Duration retention) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(retention, "retention");
final long retenationMs = ApiUtils.validateMillisecondDuration(retention, msgPrefix);
if (retenationMs < 0) {
throw new IllegalArgumentException("Retention must not be negative.");
}
this.retention = retention;
return this;
}
}

View File

@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The interface for merging aggregate values for {@link SessionWindows} with the given key.
*
* @param <K> key type
* @param <V> aggregate value type
*/
public interface Merger<K, V> {
/**
* Compute a new aggregate from the key and two aggregates.
*
* @param aggKey the key of the record
* @param aggOne the first aggregate
* @param aggTwo the second aggregate
* @return the new aggregate value
*/
V apply(final K aggKey, final V aggOne, final V aggTwo);
}

View File

@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.errors.TopologyException;
import java.util.Objects;
public class Named implements NamedOperation<Named> {
private static final int MAX_NAME_LENGTH = 249;
protected String name;
protected Named(final Named named) {
this(Objects.requireNonNull(named, "named can't be null").name);
}
protected Named(final String name) {
this.name = name;
if (name != null) {
validate(name);
}
}
/**
* Create a Named instance with provided name.
*
* @param name the processor name to be used. If {@code null} a default processor name will be generated.
* @return A new {@link Named} instance configured with name
*
* @throws TopologyException if an invalid name is specified; valid characters are ASCII alphanumerics, '.', '_' and '-'.
*/
public static Named as(final String name) {
Objects.requireNonNull(name, "name can't be null");
return new Named(name);
}
@Override
public Named withName(final String name) {
return new Named(name);
}
protected static void validate(final String name) {
if (name.isEmpty())
throw new TopologyException("Name is illegal, it can't be empty");
if (name.equals(".") || name.equals(".."))
throw new TopologyException("Name cannot be \".\" or \"..\"");
if (name.length() > MAX_NAME_LENGTH)
throw new TopologyException("Name is illegal, it can't be longer than " + MAX_NAME_LENGTH +
" characters, name: " + name);
if (!containsValidPattern(name))
throw new TopologyException("Name \"" + name + "\" is illegal, it contains a character other than " +
"ASCII alphanumerics, '.', '_' and '-'");
}
/**
* Valid characters for Kafka topics are the ASCII alphanumerics, '.', '_', and '-'
*/
private static boolean containsValidPattern(final String topic) {
for (int i = 0; i < topic.length(); ++i) {
final char c = topic.charAt(i);
// We don't use Character.isLetterOrDigit(c) because it's slower
final boolean validLetterOrDigit = (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z');
final boolean validChar = validLetterOrDigit || c == '.' || c == '_' || c == '-';
if (!validChar) {
return false;
}
}
return true;
}
}

View File

@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* Default interface which can be used to personalized the named of operations, internal topics or store.
*/
interface NamedOperation<T extends NamedOperation<T>> {
/**
* Sets the name to be used for an operation.
*
* @param name the name to use.
* @return an instance of {@link NamedOperation}
*/
T withName(final String name);
}

View File

@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.KeyValue;
/**
* The {@code Predicate} interface represents a predicate (boolean-valued function) of a {@link KeyValue} pair.
* This is a stateless record-by-record operation, i.e, {@link #test(Object, Object)} is invoked individually for each
* record of a stream.
*
* @param <K> key type
* @param <V> value type
* @see KStream#filter(Predicate)
* @see KStream#filterNot(Predicate)
* @see KStream#branch(Predicate[])
* @see KTable#filter(Predicate)
* @see KTable#filterNot(Predicate)
*/
public interface Predicate<K, V> {
/**
* Test if the record with the given key and value satisfies the predicate.
*
* @param key the key of the record
* @param value the value of the record
* @return {@code true} if the {@link KeyValue} pair satisfies the predicate&mdash;{@code false} otherwise
*/
boolean test(final K key, final V value);
}

View File

@@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.errors.TopologyException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Objects;
/**
* An object to define the options used when printing a {@link KStream}.
*
* @param <K> key type
* @param <V> value type
* @see KStream#print(Printed)
*/
public class Printed<K, V> implements NamedOperation<Printed<K, V>> {
protected final OutputStream outputStream;
protected String label;
protected String processorName;
protected KeyValueMapper<? super K, ? super V, String> mapper =
(KeyValueMapper<K, V, String>) (key, value) -> String.format("%s, %s", key, value);
private Printed(final OutputStream outputStream) {
this.outputStream = outputStream;
}
/**
* Copy constructor.
* @param printed instance of {@link Printed} to copy
*/
protected Printed(final Printed<K, V> printed) {
this.outputStream = printed.outputStream;
this.label = printed.label;
this.mapper = printed.mapper;
this.processorName = printed.processorName;
}
/**
* Print the records of a {@link KStream} to a file.
*
* @param filePath path of the file
* @param <K> key type
* @param <V> value type
* @return a new Printed instance
*/
public static <K, V> Printed<K, V> toFile(final String filePath) {
Objects.requireNonNull(filePath, "filePath can't be null");
if (filePath.trim().isEmpty()) {
throw new TopologyException("filePath can't be an empty string");
}
try {
return new Printed<>(Files.newOutputStream(Paths.get(filePath)));
} catch (final IOException e) {
throw new TopologyException("Unable to write stream to file at [" + filePath + "] " + e.getMessage());
}
}
/**
* Print the records of a {@link KStream} to system out.
*
* @param <K> key type
* @param <V> value type
* @return a new Printed instance
*/
public static <K, V> Printed<K, V> toSysOut() {
return new Printed<>(System.out);
}
/**
* Print the records of a {@link KStream} with the provided label.
*
* @param label label to use
* @return this
*/
public Printed<K, V> withLabel(final String label) {
Objects.requireNonNull(label, "label can't be null");
this.label = label;
return this;
}
/**
* Print the records of a {@link KStream} with the provided {@link KeyValueMapper}
* The provided KeyValueMapper's mapped value type must be {@code String}.
* <p>
* The example below shows how to customize output data.
* <pre>{@code
* final KeyValueMapper<Integer, String, String> mapper = new KeyValueMapper<Integer, String, String>() {
* public String apply(Integer key, String value) {
* return String.format("(%d, %s)", key, value);
* }
* };
* }</pre>
*
* Implementors will need to override {@code toString()} for keys and values that are not of type {@link String},
* {@link Integer} etc. to get meaningful information.
*
* @param mapper mapper to use
* @return this
*/
public Printed<K, V> withKeyValueMapper(final KeyValueMapper<? super K, ? super V, String> mapper) {
Objects.requireNonNull(mapper, "mapper can't be null");
this.mapper = mapper;
return this;
}
/**
* Print the records of a {@link KStream} with provided processor name.
*
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
** @return this
*/
@Override
public Printed<K, V> withName(final String processorName) {
this.processorName = processorName;
return this;
}
}

View File

@@ -0,0 +1,201 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
import org.apache.kafka.streams.kstream.internals.WindowedStreamPartitioner;
import org.apache.kafka.streams.processor.StreamPartitioner;
import java.util.Objects;
/**
* This class is used to provide the optional parameters when producing to new topics
* using {@link KStream#through(String, Produced)} or {@link KStream#to(String, Produced)}.
* @param <K> key type
* @param <V> value type
*/
public class Produced<K, V> implements NamedOperation<Produced<K, V>> {
protected Serde<K> keySerde;
protected Serde<V> valueSerde;
protected StreamPartitioner<? super K, ? super V> partitioner;
protected String processorName;
private Produced(final Serde<K> keySerde,
final Serde<V> valueSerde,
final StreamPartitioner<? super K, ? super V> partitioner,
final String processorName) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
this.partitioner = partitioner;
this.processorName = processorName;
}
protected Produced(final Produced<K, V> produced) {
this.keySerde = produced.keySerde;
this.valueSerde = produced.valueSerde;
this.partitioner = produced.partitioner;
this.processorName = produced.processorName;
}
/**
* Create a Produced instance with provided keySerde and valueSerde.
* @param keySerde Serde to use for serializing the key
* @param valueSerde Serde to use for serializing the value
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with keySerde and valueSerde
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Produced<>(keySerde, valueSerde, null, null);
}
/**
* Create a Produced instance with provided keySerde, valueSerde, and partitioner.
* @param keySerde Serde to use for serializing the key
* @param valueSerde Serde to use for serializing the value
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
* if not specified and {@code keySerde} provides a {@link WindowedSerializer} for the key
* {@link WindowedStreamPartitioner} will be used&mdash;otherwise {@link DefaultPartitioner}
* will be used
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with keySerde, valueSerde, and partitioner
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde,
final StreamPartitioner<? super K, ? super V> partitioner) {
return new Produced<>(keySerde, valueSerde, partitioner, null);
}
/**
* Create an instance of {@link Produced} with provided processor name.
*
* @param processorName the processor name to be used. If {@code null} a default processor name will be generated
* @param <K> key type
* @param <V> value type
* @return a new instance of {@link Produced}
*/
public static <K, V> Produced<K, V> as(final String processorName) {
return new Produced<>(null, null, null, processorName);
}
/**
* Create a Produced instance with provided keySerde.
* @param keySerde Serde to use for serializing the key
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with keySerde
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> keySerde(final Serde<K> keySerde) {
return new Produced<>(keySerde, null, null, null);
}
/**
* Create a Produced instance with provided valueSerde.
* @param valueSerde Serde to use for serializing the key
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with valueSerde
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> valueSerde(final Serde<V> valueSerde) {
return new Produced<>(null, valueSerde, null, null);
}
/**
* Create a Produced instance with provided partitioner.
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
* if not specified and the key serde provides a {@link WindowedSerializer} for the key
* {@link WindowedStreamPartitioner} will be used&mdash;otherwise {@link DefaultPartitioner} will be used
* @param <K> key type
* @param <V> value type
* @return A new {@link Produced} instance configured with partitioner
* @see KStream#through(String, Produced)
* @see KStream#to(String, Produced)
*/
public static <K, V> Produced<K, V> streamPartitioner(final StreamPartitioner<? super K, ? super V> partitioner) {
return new Produced<>(null, null, partitioner, null);
}
/**
* Produce records using the provided partitioner.
* @param partitioner the function used to determine how records are distributed among partitions of the topic,
* if not specified and the key serde provides a {@link WindowedSerializer} for the key
* {@link WindowedStreamPartitioner} will be used&mdash;otherwise {@link DefaultPartitioner} wil be used
* @return this
*/
public Produced<K, V> withStreamPartitioner(final StreamPartitioner<? super K, ? super V> partitioner) {
this.partitioner = partitioner;
return this;
}
/**
* Produce records using the provided valueSerde.
* @param valueSerde Serde to use for serializing the value
* @return this
*/
public Produced<K, V> withValueSerde(final Serde<V> valueSerde) {
this.valueSerde = valueSerde;
return this;
}
/**
* Produce records using the provided keySerde.
* @param keySerde Serde to use for serializing the key
* @return this
*/
public Produced<K, V> withKeySerde(final Serde<K> keySerde) {
this.keySerde = keySerde;
return this;
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final Produced<?, ?> produced = (Produced<?, ?>) o;
return Objects.equals(keySerde, produced.keySerde) &&
Objects.equals(valueSerde, produced.valueSerde) &&
Objects.equals(partitioner, produced.partitioner);
}
@Override
public int hashCode() {
return Objects.hash(keySerde, valueSerde, partitioner);
}
@Override
public Produced<K, V> withName(final String name) {
this.processorName = name;
return this;
}
}

View File

@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.KeyValue;
/**
* The {@code Reducer} interface for combining two values of the same type into a new value.
* In contrast to {@link Aggregator} the result type must be the same as the input type.
* <p>
* The provided values can be either original values from input {@link KeyValue} pair records or be a previously
* computed result from {@link Reducer#apply(Object, Object)}.
* <p>
* {@code Reducer} can be used to implement aggregation functions like sum, min, or max.
*
* @param <V> value type
* @see KGroupedStream#reduce(Reducer)
* @see KGroupedStream#reduce(Reducer, Materialized)
* @see TimeWindowedKStream#reduce(Reducer)
* @see TimeWindowedKStream#reduce(Reducer, Materialized)
* @see SessionWindowedKStream#reduce(Reducer)
* @see SessionWindowedKStream#reduce(Reducer, Materialized)
* @see Aggregator
*/
public interface Reducer<V> {
/**
* Aggregate the two given values into a single one.
*
* @param value1 the first value for the aggregation
* @param value2 the second value for the aggregation
* @return the aggregated value
*/
V apply(final V value1, final V value2);
}

View File

@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
/**
* The class that is used to capture the key and value {@link Serde}s used when performing
* {@link KStream#groupBy(KeyValueMapper, Serialized)} and {@link KStream#groupByKey(Serialized)} operations.
*
* @param <K> the key type
* @param <V> the value type
*
* @deprecated since 2.1. Use {@link org.apache.kafka.streams.kstream.Grouped} instead
*/
@Deprecated
public class Serialized<K, V> {
protected final Serde<K> keySerde;
protected final Serde<V> valueSerde;
private Serialized(final Serde<K> keySerde,
final Serde<V> valueSerde) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
}
protected Serialized(final Serialized<K, V> serialized) {
this(serialized.keySerde, serialized.valueSerde);
}
/**
* Construct a {@code Serialized} instance with the provided key and value {@link Serde}s.
* If the {@link Serde} params are {@code null} the default serdes defined in the configs will be used.
*
* @param keySerde keySerde that will be used to materialize a stream
* if not specified the default serdes defined in the configs will be used
* @param valueSerde valueSerde that will be used to materialize a stream
* if not specified the default serdes defined in the configs will be used
* @param <K> the key type
* @param <V> the value type
* @return a new instance of {@link Serialized} configured with the provided serdes
*/
public static <K, V> Serialized<K, V> with(final Serde<K> keySerde,
final Serde<V> valueSerde) {
return new Serialized<>(keySerde, valueSerde);
}
/**
* Construct a {@code Serialized} instance with the provided key {@link Serde}.
* If the {@link Serde} params are null the default serdes defined in the configs will be used.
*
* @param keySerde keySerde that will be used to materialize a stream
* if not specified the default serdes defined in the configs will be used
* @return a new instance of {@link Serialized} configured with the provided key serde
*/
public Serialized<K, V> withKeySerde(final Serde<K> keySerde) {
return new Serialized<>(keySerde, null);
}
/**
* Construct a {@code Serialized} instance with the provided value {@link Serde}.
* If the {@link Serde} params are null the default serdes defined in the configs will be used.
*
* @param valueSerde valueSerde that will be used to materialize a stream
* if not specified the default serdes defined in the configs will be used
* @return a new instance of {@link Serialized} configured with the provided key serde
*/
public Serialized<K, V> withValueSerde(final Serde<V> valueSerde) {
return new Serialized<>(null, valueSerde);
}
}

View File

@@ -0,0 +1,265 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.SessionStore;
import java.time.Duration;
/**
* {@code SessionWindowedCogroupKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation of a {@link CogroupedKStream} in order to apply a windowed aggregation operation
* on the original {@link KGroupedStream} records resulting in a windowed {@link KTable} (a <emph>windowed</emph>
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
* <p>
* {@link SessionWindows} are dynamic data driven windows.
* They have no fixed time boundaries, rather the size of the window is determined by the records.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
* New events are added to sessions until their grace period ends (see {@link SessionWindows#grace(Duration)}).
* <p>
* A {@code SessionWindowedCogroupedKStream} must be obtained from a {@link CogroupedKStream} via
* {@link CogroupedKStream#windowedBy(SessionWindows)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
* @see KGroupedStream
* @see SessionWindows
* @see CogroupedKStream
*/
public interface SessionWindowedCogroupedKStream<K, V> {
/**
* Aggregate the values of records in these streams by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Merger, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Merger<? super K, V> sessionMerger);
/**
* Aggregate the values of records in these streams by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use
* {@link #aggregate(Initializer, Merger, Named, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Merger<? super K, V> sessionMerger,
final Named named);
/**
* Aggregate the values of records in these streams by the grouped key and defined sessions.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the session (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Merger<? super K, V> sessionMerger,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in these streams by the grouped key and defined sessions.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the session (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some windowed aggregation on value type double
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Merger<? super K, V> sessionMerger,
final Named named,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
}

View File

@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.state.internals.SessionKeySchema;
import java.util.Map;
/**
* The inner serde class can be specified by setting the property
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
* if the no-arg constructor is called and hence it is not passed during initialization.
*/
public class SessionWindowedDeserializer<T> implements Deserializer<Windowed<T>> {
private Deserializer<T> inner;
// Default constructor needed by Kafka
public SessionWindowedDeserializer() {}
public SessionWindowedDeserializer(final Deserializer<T> inner) {
this.inner = inner;
}
@SuppressWarnings("unchecked")
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {
if (inner == null) {
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
final String value = (String) configs.get(propertyName);
try {
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).deserializer();
inner.configure(configs, isKey);
} catch (final ClassNotFoundException e) {
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
}
}
}
@Override
public Windowed<T> deserialize(final String topic, final byte[] data) {
WindowedSerdes.verifyInnerDeserializerNotNull(inner, this);
if (data == null || data.length == 0) {
return null;
}
// for either key or value, their schema is the same hence we will just use session key schema
return SessionKeySchema.from(data, inner, topic);
}
@Override
public void close() {
if (inner != null) {
inner.close();
}
}
// Only for testing
Deserializer<T> innerDeserializer() {
return inner;
}
}

View File

@@ -0,0 +1,646 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.SessionStore;
import java.time.Duration;
/**
* {@code SessionWindowedKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation after a grouping and windowing of a {@link KStream} before an aggregation is
* applied to the new (partitioned) windows resulting in a windowed {@link KTable} (a <emph>windowed</emph>
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
* <p>
* {@link SessionWindows} are dynamic data driven windows.
* They have no fixed time boundaries, rather the size of the window is determined by the records.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
* New events are added to sessions until their grace period ends (see {@link SessionWindows#grace(Duration)}).
* <p>
* A {@code SessionWindowedKStream} must be obtained from a {@link KGroupedStream} via
* {@link KGroupedStream#windowedBy(SessionWindows)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
* @see KGroupedStream
* @see SessionWindows
*/
public interface SessionWindowedKStream<K, V> {
/**
* Count the number of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* The default key serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #count(Materialized)}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same session and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key per session
*/
KTable<Windowed<K>, Long> count();
/**
* Count the number of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* The default key serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #count(Named, Materialized)}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same session and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key per session
*/
KTable<Windowed<K>, Long> count(final Named named);
/**
* Count the number of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the name provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key per session
*/
KTable<Windowed<K>, Long> count(final Materialized<K, Long, SessionStore<Bytes, byte[]>> materialized);
/**
* Count the number of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the name provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key per session
*/
KTable<Windowed<K>, Long> count(final Named named,
final Materialized<K, Long, SessionStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use
* {@link #aggregate(Initializer, Aggregator, Merger, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Merger<? super K, VR> sessionMerger);
/**
* Aggregate the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use
* {@link #aggregate(Initializer, Aggregator, Merger, Named, Materialized)}.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Merger<? super K, VR> sessionMerger,
final Named named);
/**
* Aggregate the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some windowed aggregation on value type double
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Merger<? super K, VR> sessionMerger,
final Materialized<K, VR, SessionStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record per session is processed to
* provide an initial intermediate aggregation result that is used to process the first record per session.
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* The specified {@link Merger} is used to merge two existing sessions into one, i.e., when the windows overlap,
* they are merged into a single session and the old sessions are discarded.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // some windowed aggregation on value type double
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String, Long> sessionStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>sessionStore());
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> aggForKeyForSession = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param sessionMerger a {@link Merger} that combines two aggregation results. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Merger<? super K, VR> sessionMerger,
final Named named,
final Materialized<K, VR, SessionStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #reduce(Reducer, Materialized)} .
* <p>
* The value of the first record per session initialized the session result.
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer);
/**
* Combine the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #reduce(Reducer, Named, Materialized)} .
* <p>
* The value of the first record per session initialized the session result.
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer, final Named named);
/**
* Combine the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The value of the first record per session initialized the session result.
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key and defined sessions.
* Note that sessions are generated on a per-key basis and records with different keys create independent sessions.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator, Merger)}).
* The result is written into a local {@link SessionStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The value of the first record per session initialized the session result.
* The specified {@link Reducer} is applied for each additional input record per session and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link SessionStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // compute sum
* Sting queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlySessionStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>ReadOnlySessionStore<String, Long>);
* String key = "some-key";
* KeyValueIterator<Windowed<String>, Long> sumForKeyForWindows = localWindowStore.fetch(key); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key per session
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
final Named named,
final Materialized<K, V, SessionStore<Bytes, byte[]>> materialized);
}

View File

@@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
import org.apache.kafka.streams.state.internals.SessionKeySchema;
import java.util.Map;
/**
* The inner serde class can be specified by setting the property
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
* if the no-arg constructor is called and hence it is not passed during initialization.
*/
public class SessionWindowedSerializer<T> implements WindowedSerializer<T> {
private Serializer<T> inner;
// Default constructor needed by Kafka
public SessionWindowedSerializer() {}
public SessionWindowedSerializer(final Serializer<T> inner) {
this.inner = inner;
}
@SuppressWarnings("unchecked")
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {
if (inner == null) {
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
final String value = (String) configs.get(propertyName);
try {
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).serializer();
inner.configure(configs, isKey);
} catch (final ClassNotFoundException e) {
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
}
}
}
@Override
public byte[] serialize(final String topic, final Windowed<T> data) {
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
if (data == null) {
return null;
}
// for either key or value, their schema is the same hence we will just use session key schema
return SessionKeySchema.toBinary(data, inner, topic);
}
@Override
public void close() {
if (inner != null) {
inner.close();
}
}
@Override
public byte[] serializeBaseKey(final String topic, final Windowed<T> data) {
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
return inner.serialize(topic, data.key());
}
// Only for testing
Serializer<T> innerSerializer() {
return inner;
}
}

View File

@@ -0,0 +1,221 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.state.SessionBytesStoreSupplier;
import java.time.Duration;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
/**
* A session based window specification used for aggregating events into sessions.
* <p>
* Sessions represent a period of activity separated by a defined gap of inactivity.
* Any events processed that fall within the inactivity gap of any existing sessions are merged into the existing sessions.
* If the event falls outside of the session gap then a new session will be created.
* <p>
* For example, if we have a session gap of 5 and the following data arrives:
* <pre>
* +--------------------------------------+
* | key | value | time |
* +-----------+-------------+------------+
* | A | 1 | 10 |
* +-----------+-------------+------------+
* | A | 2 | 12 |
* +-----------+-------------+------------+
* | A | 3 | 20 |
* +-----------+-------------+------------+
* </pre>
* We'd have 2 sessions for key A.
* One starting from time 10 and ending at time 12 and another starting and ending at time 20.
* The length of the session is driven by the timestamps of the data within the session.
* Thus, session windows are no fixed-size windows (c.f. {@link TimeWindows} and {@link JoinWindows}).
* <p>
* If we then received another record:
* <pre>
* +--------------------------------------+
* | key | value | time |
* +-----------+-------------+------------+
* | A | 4 | 16 |
* +-----------+-------------+------------+
* </pre>
* The previous 2 sessions would be merged into a single session with start time 10 and end time 20.
* The aggregate value for this session would be the result of aggregating all 4 values.
* <p>
* For time semantics, see {@link TimestampExtractor}.
*
* @see TimeWindows
* @see UnlimitedWindows
* @see JoinWindows
* @see KGroupedStream#windowedBy(SessionWindows)
* @see TimestampExtractor
*/
public final class SessionWindows {
private final long gapMs;
private final long maintainDurationMs;
private final long graceMs;
private SessionWindows(final long gapMs, final long maintainDurationMs, final long graceMs) {
this.gapMs = gapMs;
this.maintainDurationMs = maintainDurationMs;
this.graceMs = graceMs;
}
/**
* Create a new window specification with the specified inactivity gap in milliseconds.
*
* @param inactivityGapMs the gap of inactivity between sessions in milliseconds
* @return a new window specification with default maintain duration of 1 day
*
* @throws IllegalArgumentException if {@code inactivityGapMs} is zero or negative
* @deprecated Use {@link #with(Duration)} instead.
*/
@Deprecated
public static SessionWindows with(final long inactivityGapMs) {
if (inactivityGapMs <= 0) {
throw new IllegalArgumentException("Gap time (inactivityGapMs) cannot be zero or negative.");
}
return new SessionWindows(inactivityGapMs, DEFAULT_RETENTION_MS, -1);
}
/**
* Create a new window specification with the specified inactivity gap.
*
* @param inactivityGap the gap of inactivity between sessions
* @return a new window specification with default maintain duration of 1 day
*
* @throws IllegalArgumentException if {@code inactivityGap} is zero or negative or can't be represented as {@code long milliseconds}
*/
public static SessionWindows with(final Duration inactivityGap) {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(inactivityGap, "inactivityGap");
return with(ApiUtils.validateMillisecondDuration(inactivityGap, msgPrefix));
}
/**
* Set the window maintain duration (retention time) in milliseconds.
* This retention time is a guaranteed <i>lower bound</i> for how long a window will be maintained.
*
* @return itself
* @throws IllegalArgumentException if {@code durationMs} is smaller than window gap
*
* @deprecated since 2.1. Use {@link Materialized#retention}
* or directly configure the retention in a store supplier and use
* {@link Materialized#as(SessionBytesStoreSupplier)}.
*/
@Deprecated
public SessionWindows until(final long durationMs) throws IllegalArgumentException {
if (durationMs < gapMs) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than window gap.");
}
return new SessionWindows(gapMs, durationMs, graceMs);
}
/**
* Reject out-of-order events that arrive more than {@code afterWindowEnd}
* after the end of its window.
* <p>
* Note that new events may change the boundaries of session windows, so aggressive
* close times can lead to surprising results in which an out-of-order event is rejected and then
* a subsequent event moves the window boundary forward.
*
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
* @return this updated builder
* @throws IllegalArgumentException if the {@code afterWindowEnd} is negative of can't be represented as {@code long milliseconds}
*/
public SessionWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
if (afterWindowEndMs < 0) {
throw new IllegalArgumentException("Grace period must not be negative.");
}
return new SessionWindows(
gapMs,
maintainDurationMs,
afterWindowEndMs
);
}
@SuppressWarnings("deprecation") // continuing to support Windows#maintainMs/segmentInterval in fallback mode
public long gracePeriodMs() {
// NOTE: in the future, when we remove maintainMs,
// we should default the grace period to 24h to maintain the default behavior,
// or we can default to (24h - gapMs) if you want to be super accurate.
return graceMs != -1 ? graceMs : maintainMs() - inactivityGap();
}
/**
* Return the specified gap for the session windows in milliseconds.
*
* @return the inactivity gap of the specified windows
*/
public long inactivityGap() {
return gapMs;
}
/**
* Return the window maintain duration (retention time) in milliseconds.
* <p>
* For {@code SessionWindows} the maintain duration is at least as small as the window gap.
*
* @return the window maintain duration
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
*/
@Deprecated
public long maintainMs() {
return Math.max(maintainDurationMs, gapMs);
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final SessionWindows that = (SessionWindows) o;
return gapMs == that.gapMs &&
maintainDurationMs == that.maintainDurationMs &&
graceMs == that.graceMs;
}
@Override
public int hashCode() {
return Objects.hash(gapMs, maintainDurationMs, graceMs);
}
@Override
public String toString() {
return "SessionWindows{" +
"gapMs=" + gapMs +
", maintainDurationMs=" + maintainDurationMs +
", graceMs=" + graceMs +
'}';
}
}

View File

@@ -0,0 +1,286 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
/**
* Class used to configure the name of the join processor, the repartition topic name,
* state stores or state store names in Stream-Stream join.
* @param <K> the key type
* @param <V1> this value type
* @param <V2> other value type
*/
public class StreamJoined<K, V1, V2> implements NamedOperation<StreamJoined<K, V1, V2>> {
protected final Serde<K> keySerde;
protected final Serde<V1> valueSerde;
protected final Serde<V2> otherValueSerde;
protected final WindowBytesStoreSupplier thisStoreSupplier;
protected final WindowBytesStoreSupplier otherStoreSupplier;
protected final String name;
protected final String storeName;
protected StreamJoined(final StreamJoined<K, V1, V2> streamJoined) {
this(streamJoined.keySerde,
streamJoined.valueSerde,
streamJoined.otherValueSerde,
streamJoined.thisStoreSupplier,
streamJoined.otherStoreSupplier,
streamJoined.name,
streamJoined.storeName);
}
private StreamJoined(final Serde<K> keySerde,
final Serde<V1> valueSerde,
final Serde<V2> otherValueSerde,
final WindowBytesStoreSupplier thisStoreSupplier,
final WindowBytesStoreSupplier otherStoreSupplier,
final String name,
final String storeName) {
this.keySerde = keySerde;
this.valueSerde = valueSerde;
this.otherValueSerde = otherValueSerde;
this.thisStoreSupplier = thisStoreSupplier;
this.otherStoreSupplier = otherStoreSupplier;
this.name = name;
this.storeName = storeName;
}
/**
* Creates a StreamJoined instance with the provided store suppliers. The store suppliers must implement
* the {@link WindowBytesStoreSupplier} interface. The store suppliers must provide unique names or a
* {@link org.apache.kafka.streams.errors.StreamsException} is thrown.
*
* @param storeSupplier this store supplier
* @param otherStoreSupplier other store supplier
* @param <K> the key type
* @param <V1> this value type
* @param <V2> other value type
* @return {@link StreamJoined} instance
*/
public static <K, V1, V2> StreamJoined<K, V1, V2> with(final WindowBytesStoreSupplier storeSupplier,
final WindowBytesStoreSupplier otherStoreSupplier) {
return new StreamJoined<>(
null,
null,
null,
storeSupplier,
otherStoreSupplier,
null,
null
);
}
/**
* Creates a {@link StreamJoined} instance using the provided name for the state stores and hence the changelog
* topics for the join stores. The name for the stores will be ${applicationId}-&lt;storeName&gt;-this-join and ${applicationId}-&lt;storeName&gt;-other-join
* or ${applicationId}-&lt;storeName&gt;-outer-this-join and ${applicationId}-&lt;storeName&gt;-outer-other-join depending if the join is an inner-join
* or an outer join. The changelog topics will have the -changelog suffix. The user should note that even though the join stores will have a
* specified name, the stores will remain unavailable for querying.
*
* @param storeName The name to use for the store
* @param <K> The key type
* @param <V1> This value type
* @param <V2> Other value type
* @return {@link StreamJoined} instance
*/
public static <K, V1, V2> StreamJoined<K, V1, V2> as(final String storeName) {
return new StreamJoined<>(
null,
null,
null,
null,
null,
null,
storeName
);
}
/**
* Creates a {@link StreamJoined} instance with the provided serdes to configure the stores
* for the join.
* @param keySerde The key serde
* @param valueSerde This value serde
* @param otherValueSerde Other value serde
* @param <K> The key type
* @param <V1> This value type
* @param <V2> Other value type
* @return {@link StreamJoined} instance
*/
public static <K, V1, V2> StreamJoined<K, V1, V2> with(final Serde<K> keySerde,
final Serde<V1> valueSerde,
final Serde<V2> otherValueSerde
) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
null,
null,
null,
null
);
}
/**
* Set the name to use for the join processor and the repartition topic(s) if required.
* @param name the name to use
* @return a new {@link StreamJoined} instance
*/
@Override
public StreamJoined<K, V1, V2> withName(final String name) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Sets the base store name to use for both sides of the join. The name for the state stores and hence the changelog
* topics for the join stores. The name for the stores will be ${applicationId}-&lt;storeName&gt;-this-join and ${applicationId}-&lt;storeName&gt;-other-join
* or ${applicationId}-&lt;storeName&gt;-outer-this-join and ${applicationId}-&lt;storeName&gt;-outer-other-join depending if the join is an inner-join
* or an outer join. The changelog topics will have the -changelog suffix. The user should note that even though the join stores will have a
* specified name, the stores will remain unavailable for querying.
*
* @param storeName the storeName to use
* @return a new {@link StreamJoined} instance
*/
public StreamJoined<K, V1, V2> withStoreName(final String storeName) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link Serde Serde<K>} for the key
* @param keySerde the serde to use for the key
* @return a new {@link StreamJoined} configured with the keySerde
*/
public StreamJoined<K, V1, V2> withKeySerde(final Serde<K> keySerde) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link Serde Serde<V1>} for this value
* @param valueSerde the serde to use for this value (calling or left side of the join)
* @return a new {@link StreamJoined} configured with the valueSerde
*/
public StreamJoined<K, V1, V2> withValueSerde(final Serde<V1> valueSerde) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link Serde Serde<V2>} for the other value
* @param otherValueSerde the serde to use for the other value (other or right side of the join)
* @return a new {@link StreamJoined} configured with the otherValueSerde
*/
public StreamJoined<K, V1, V2> withOtherValueSerde(final Serde<V2> otherValueSerde) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link WindowBytesStoreSupplier} for this store supplier. Please note
* this method only provides the store supplier for the left side of the join. If you wish to also provide a
* store supplier for the right (i.e., other) side you must use the {@link StreamJoined#withOtherStoreSupplier(WindowBytesStoreSupplier)}
* method
* @param thisStoreSupplier the store supplier to use for this store supplier (calling or left side of the join)
* @return a new {@link StreamJoined} configured with thisStoreSupplier
*/
public StreamJoined<K, V1, V2> withThisStoreSupplier(final WindowBytesStoreSupplier thisStoreSupplier) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
/**
* Configure with the provided {@link WindowBytesStoreSupplier} for the other store supplier. Please note
* this method only provides the store supplier for the right side of the join. If you wish to also provide a
* store supplier for the left side you must use the {@link StreamJoined#withThisStoreSupplier(WindowBytesStoreSupplier)}
* method
* @param otherStoreSupplier the store supplier to use for the other store supplier (other or right side of the join)
* @return a new {@link StreamJoined} configured with otherStoreSupplier
*/
public StreamJoined<K, V1, V2> withOtherStoreSupplier(final WindowBytesStoreSupplier otherStoreSupplier) {
return new StreamJoined<>(
keySerde,
valueSerde,
otherValueSerde,
thisStoreSupplier,
otherStoreSupplier,
name,
storeName
);
}
@Override
public String toString() {
return "StreamJoin{" +
"keySerde=" + keySerde +
", valueSerde=" + valueSerde +
", otherValueSerde=" + otherValueSerde +
", thisStoreSupplier=" + thisStoreSupplier +
", otherStoreSupplier=" + otherStoreSupplier +
", name='" + name + '\'' +
", storeName='" + storeName + '\'' +
'}';
}
}

View File

@@ -0,0 +1,177 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.kstream.internals.suppress.EagerBufferConfigImpl;
import org.apache.kafka.streams.kstream.internals.suppress.FinalResultsSuppressionBuilder;
import org.apache.kafka.streams.kstream.internals.suppress.StrictBufferConfigImpl;
import org.apache.kafka.streams.kstream.internals.suppress.SuppressedInternal;
import java.time.Duration;
public interface Suppressed<K> extends NamedOperation<Suppressed<K>> {
/**
* Marker interface for a buffer configuration that is "strict" in the sense that it will strictly
* enforce the time bound and never emit early.
*/
interface StrictBufferConfig extends BufferConfig<StrictBufferConfig> {
}
/**
* Marker interface for a buffer configuration that will strictly enforce size constraints
* (bytes and/or number of records) on the buffer, so it is suitable for reducing duplicate
* results downstream, but does not promise to eliminate them entirely.
*/
interface EagerBufferConfig extends BufferConfig<EagerBufferConfig> {
}
interface BufferConfig<BC extends BufferConfig<BC>> {
/**
* Create a size-constrained buffer in terms of the maximum number of keys it will store.
*/
static EagerBufferConfig maxRecords(final long recordLimit) {
return new EagerBufferConfigImpl(recordLimit, Long.MAX_VALUE);
}
/**
* Set a size constraint on the buffer in terms of the maximum number of keys it will store.
*/
BC withMaxRecords(final long recordLimit);
/**
* Create a size-constrained buffer in terms of the maximum number of bytes it will use.
*/
static EagerBufferConfig maxBytes(final long byteLimit) {
return new EagerBufferConfigImpl(Long.MAX_VALUE, byteLimit);
}
/**
* Set a size constraint on the buffer, the maximum number of bytes it will use.
*/
BC withMaxBytes(final long byteLimit);
/**
* Create a buffer unconstrained by size (either keys or bytes).
*
* As a result, the buffer will consume as much memory as it needs, dictated by the time bound.
*
* If there isn't enough heap available to meet the demand, the application will encounter an
* {@link OutOfMemoryError} and shut down (not guaranteed to be a graceful exit). Also, note that
* JVM processes under extreme memory pressure may exhibit poor GC behavior.
*
* This is a convenient option if you doubt that your buffer will be that large, but also don't
* wish to pick particular constraints, such as in testing.
*
* This buffer is "strict" in the sense that it will enforce the time bound or crash.
* It will never emit early.
*/
static StrictBufferConfig unbounded() {
return new StrictBufferConfigImpl();
}
/**
* Set the buffer to be unconstrained by size (either keys or bytes).
*
* As a result, the buffer will consume as much memory as it needs, dictated by the time bound.
*
* If there isn't enough heap available to meet the demand, the application will encounter an
* {@link OutOfMemoryError} and shut down (not guaranteed to be a graceful exit). Also, note that
* JVM processes under extreme memory pressure may exhibit poor GC behavior.
*
* This is a convenient option if you doubt that your buffer will be that large, but also don't
* wish to pick particular constraints, such as in testing.
*
* This buffer is "strict" in the sense that it will enforce the time bound or crash.
* It will never emit early.
*/
StrictBufferConfig withNoBound();
/**
* Set the buffer to gracefully shut down the application when any of its constraints are violated
*
* This buffer is "strict" in the sense that it will enforce the time bound or shut down.
* It will never emit early.
*/
StrictBufferConfig shutDownWhenFull();
/**
* Set the buffer to just emit the oldest records when any of its constraints are violated.
*
* This buffer is "not strict" in the sense that it may emit early, so it is suitable for reducing
* duplicate results downstream, but does not promise to eliminate them.
*/
EagerBufferConfig emitEarlyWhenFull();
}
/**
* Configure the suppression to emit only the "final results" from the window.
*
* By default all Streams operators emit results whenever new results are available.
* This includes windowed operations.
*
* This configuration will instead emit just one result per key for each window, guaranteeing
* to deliver only the final result. This option is suitable for use cases in which the business logic
* requires a hard guarantee that only the final result is propagated. For example, sending alerts.
*
* To accomplish this, the operator will buffer events from the window until the window close (that is,
* until the end-time passes, and additionally until the grace period expires). Since windowed operators
* are required to reject out-of-order events for a window whose grace period is expired, there is an additional
* guarantee that the final results emitted from this suppression will match any queriable state upstream.
*
* @param bufferConfig A configuration specifying how much space to use for buffering intermediate results.
* This is required to be a "strict" config, since it would violate the "final results"
* property to emit early and then issue an update later.
* @return a "final results" mode suppression configuration
*/
static Suppressed<Windowed> untilWindowCloses(final StrictBufferConfig bufferConfig) {
return new FinalResultsSuppressionBuilder<>(null, bufferConfig);
}
/**
* Configure the suppression to wait {@code timeToWaitForMoreEvents} amount of time after receiving a record
* before emitting it further downstream. If another record for the same key arrives in the mean time, it replaces
* the first record in the buffer but does <em>not</em> re-start the timer.
*
* @param timeToWaitForMoreEvents The amount of time to wait, per record, for new events.
* @param bufferConfig A configuration specifying how much space to use for buffering intermediate results.
* @param <K> The key type for the KTable to apply this suppression to.
* @return a suppression configuration
*/
static <K> Suppressed<K> untilTimeLimit(final Duration timeToWaitForMoreEvents, final BufferConfig bufferConfig) {
return new SuppressedInternal<>(null, timeToWaitForMoreEvents, bufferConfig, null, false);
}
/**
* Use the specified name for the suppression node in the topology.
* <p>
* This can be used to insert a suppression without changing the rest of the topology names
* (and therefore not requiring an application reset).
* <p>
* Note however, that once a suppression has buffered some records, removing it from the topology would cause
* the loss of those records.
* <p>
* A suppression can be "disabled" with the configuration {@code untilTimeLimit(Duration.ZERO, ...}.
*
* @param name The name to be used for the suppression node and changelog topic
* @return The same configuration with the addition of the given {@code name}.
*/
@Override
Suppressed<K> withName(final String name);
}

View File

@@ -0,0 +1,248 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.WindowStore;
import java.time.Duration;
/**
* {@code TimeWindowedCogroupKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation of a {@link CogroupedKStream} in order to apply a windowed aggregation operation
* on the original {@link KGroupedStream} records resulting in a windowed {@link KTable} (a <emph>windowed</emph>
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
* <p>
* The specified {@code windows} define either hopping time windows that can be overlapping or tumbling (c.f.
* {@link TimeWindows}) or they define landmark windows (c.f. {@link UnlimitedWindows}).
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
* New events are added to windows until their grace period ends (see {@link TimeWindows#grace(Duration)}).
* <p>
* A {@code TimeWindowedCogroupedKStream} must be obtained from a {@link CogroupedKStream} via
* {@link CogroupedKStream#windowedBy(Windows)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
* @see KGroupedStream
* @see CogroupedKStream
*/
public interface TimeWindowedCogroupedKStream<K, V> {
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Materialized)}.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Named, Materialized)}.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Named named);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} (as specified in {@link KGroupedStream#cogroup(Aggregator)} or
* {@link CogroupedKStream#cogroup(KGroupedStream, Aggregator)}) is applied for each input record and computes a new
* aggregate using the current aggregate (or for the very first record using the intermediate aggregation result
* provided via the {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count or sum etc.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> aggregate(final Initializer<V> initializer,
final Named named,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
}

View File

@@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.state.internals.WindowKeySchema;
import java.util.Map;
/**
* The inner serde class can be specified by setting the property
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
* if the no-arg constructor is called and hence it is not passed during initialization.
*/
public class TimeWindowedDeserializer<T> implements Deserializer<Windowed<T>> {
private final Long windowSize;
private boolean isChangelogTopic;
private Deserializer<T> inner;
// Default constructor needed by Kafka
public TimeWindowedDeserializer() {
this(null, Long.MAX_VALUE);
}
// TODO: fix this part as last bits of KAFKA-4468
public TimeWindowedDeserializer(final Deserializer<T> inner) {
this(inner, Long.MAX_VALUE);
}
public TimeWindowedDeserializer(final Deserializer<T> inner, final long windowSize) {
this.inner = inner;
this.windowSize = windowSize;
this.isChangelogTopic = false;
}
public Long getWindowSize() {
return this.windowSize;
}
@SuppressWarnings("unchecked")
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {
if (inner == null) {
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
final String value = (String) configs.get(propertyName);
try {
inner = Serde.class.cast(Utils.newInstance(value, Serde.class)).deserializer();
inner.configure(configs, isKey);
} catch (final ClassNotFoundException e) {
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
}
}
}
@Override
public Windowed<T> deserialize(final String topic, final byte[] data) {
WindowedSerdes.verifyInnerDeserializerNotNull(inner, this);
if (data == null || data.length == 0) {
return null;
}
// toStoreKeyBinary was used to serialize the data.
if (this.isChangelogTopic) {
return WindowKeySchema.fromStoreKey(data, windowSize, inner, topic);
}
// toBinary was used to serialize the data
return WindowKeySchema.from(data, windowSize, inner, topic);
}
@Override
public void close() {
if (inner != null) {
inner.close();
}
}
public void setIsChangelogTopic(final boolean isChangelogTopic) {
this.isChangelogTopic = isChangelogTopic;
}
// Only for testing
Deserializer<T> innerDeserializer() {
return inner;
}
}

View File

@@ -0,0 +1,637 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StoreQueryParameters;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.state.WindowStore;
import java.time.Duration;
/**
* {@code TimeWindowedKStream} is an abstraction of a <i>windowed</i> record stream of {@link KeyValue} pairs.
* It is an intermediate representation after a grouping and windowing of a {@link KStream} before an aggregation is
* applied to the new (partitioned) windows resulting in a windowed {@link KTable} (a <emph>windowed</emph>
* {@code KTable} is a {@link KTable} with key type {@link Windowed Windowed<K>}).
* <p>
* The specified {@code windows} define either hopping time windows that can be overlapping or tumbling (c.f.
* {@link TimeWindows}) or they define landmark windows (c.f. {@link UnlimitedWindows}).
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating
* materialized view) that can be queried using the name provided in the {@link Materialized} instance.
* Furthermore, updates to the store are sent downstream into a windowed {@link KTable} changelog stream, where
* "windowed" implies that the {@link KTable} key is a combined key of the original record key and a window ID.
* New events are added to {@link TimeWindows} until their grace period ends (see {@link TimeWindows#grace(Duration)}).
* <p>
* A {@code TimeWindowedKStream} must be obtained from a {@link KGroupedStream} via
* {@link KGroupedStream#windowedBy(Windows)}.
*
* @param <K> Type of keys
* @param <V> Type of values
* @see KStream
* @see KGroupedStream
*/
public interface TimeWindowedKStream<K, V> {
/**
* Count the number of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* The default key serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #count(Materialized)}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
*/
KTable<Windowed<K>, Long> count();
/**
* Count the number of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* The default key serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #count(Named, Materialized)}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
*/
KTable<Windowed<K>, Long> count(final Named named);
/**
* Count the number of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the name provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> countForWordsForWindows = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
*/
KTable<Windowed<K>, Long> count(final Materialized<K, Long, WindowStore<Bytes, byte[]>> materialized);
/**
* Count the number of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the name provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> countForWordsForWindows = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized an instance of {@link Materialized} used to materialize a state store. Cannot be {@code null}.
* Note: the valueSerde will be automatically set to {@link org.apache.kafka.common.serialization.Serdes#Long() Serdes#Long()}
* if there is no valueSerde provided
* @return a windowed {@link KTable} that contains "update" records with unmodified keys and {@link Long} values
* that represent the latest (rolling) count (i.e., number of records) for each key within a window
*/
KTable<Windowed<K>, Long> count(final Named named,
final Materialized<K, Long, WindowStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #aggregate(Initializer, Aggregator, Materialized)}.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use
* {@link #aggregate(Initializer, Aggregator, Named, Materialized)}.
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* Note that the internal store name may not be queriable through Interactive Queries.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Named named);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Materialized<K, VR, WindowStore<Bytes, byte[]>> materialized);
/**
* Aggregate the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Aggregating is a generalization of {@link #reduce(Reducer) combining via reduce(...)} as it, for example,
* allows the result to have a different type than the input values.
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The specified {@link Initializer} is applied directly before the first input record (per key) in each window is
* processed to provide an initial intermediate aggregation result that is used to process the first record for
* the window (per key).
* The specified {@link Aggregator} is applied for each input record and computes a new aggregate using the current
* aggregate (or for the very first record using the intermediate aggregation result provided via the
* {@link Initializer}) and the record's value.
* Thus, {@code aggregate()} can be used to compute aggregate functions like count (c.f. {@link #count()}).
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> aggregateStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the {@link Materialized} instance must be a valid Kafka topic name and
* cannot contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the
* provide store name defined in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param initializer an {@link Initializer} that computes an initial intermediate aggregation result. Cannot be {@code null}.
* @param aggregator an {@link Aggregator} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @param <VR> the value type of the resulting {@link KTable}
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
<VR> KTable<Windowed<K>, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Named named,
final Materialized<K, VR, WindowStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value
* (c.f. {@link #aggregate(Initializer, Aggregator)}).
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #reduce(Reducer, Materialized)} .
* <p>
* The value of the first record per window initialized the aggregation result.
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer);
/**
* Combine the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view).
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* The default key and value serde from the config will be used for serializing the result.
* If a different serde is required then you should use {@link #reduce(Reducer, Named, Materialized)} .
* <p>
* The value of the first record per window initialized the aggregation result.
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache is used to deduplicate consecutive updates to
* the same window and key.
* The rate of propagated updates depends on your input data rate, the number of distinct keys, the number of
* parallel running Kafka Streams instances, and the {@link StreamsConfig configuration} parameters for
* {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-${internalStoreName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "internalStoreName" is an internal name
* and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer, final Named named);
/**
* Combine the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The value of the first record per window initialized the aggregation result.
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters) KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> reduceStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@code Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
/**
* Combine the values of records in this stream by the grouped key and defined windows.
* Records with {@code null} key or value are ignored.
* Combining implies that the type of the aggregate result is the same as the type of the input value.
* <p>
* The result is written into a local {@link WindowStore} (which is basically an ever-updating materialized view)
* that can be queried using the store name as provided with {@link Materialized}.
* Furthermore, updates to the store are sent downstream into a {@link KTable} changelog stream.
* <p>
* The value of the first record per window initialized the aggregation result.
* The specified {@link Reducer} is applied for each additional input record per window and computes a new
* aggregate using the current aggregate (first argument) and the record's value (second argument):
* <pre>{@code
* // At the example of a Reducer<Long>
* new Reducer<Long>() {
* public Long apply(Long aggValue, Long currValue) {
* return aggValue + currValue;
* }
* }
* }</pre>
* Thus, {@code reduce()} can be used to compute aggregate functions like sum, min, or max.
* <p>
* Not all updates might get sent downstream, as an internal cache will be used to deduplicate consecutive updates
* to the same window and key if caching is enabled on the {@link Materialized} instance.
* When caching is enabled the rate of propagated updates depends on your input data rate, the number of distinct
* keys, the number of parallel running Kafka Streams instances, and the {@link StreamsConfig configuration}
* parameters for {@link StreamsConfig#CACHE_MAX_BYTES_BUFFERING_CONFIG cache size}, and
* {@link StreamsConfig#COMMIT_INTERVAL_MS_CONFIG commit intervall}.
* <p>
* To query the local {@link WindowStore} it must be obtained via
* {@link KafkaStreams#store(StoreQueryParameters)} KafkaStreams#store(...)}:
* <pre>{@code
* KafkaStreams streams = ... // counting words
* Store queryableStoreName = ... // the queryableStoreName should be the name of the store as defined by the Materialized instance
* ReadOnlyWindowStore<String,Long> localWindowStore = streams.store(queryableStoreName, QueryableStoreTypes.<String, Long>windowStore());
*
* String key = "some-word";
* long fromTime = ...;
* long toTime = ...;
* WindowStoreIterator<Long> reduceStore = localWindowStore.fetch(key, timeFrom, timeTo); // key must be local (application state is shared over all running Kafka Streams instances)
* }</pre>
* For non-local keys, a custom RPC mechanism must be implemented using {@link KafkaStreams#allMetadata()} to
* query the value of the key on a parallel running instance of your Kafka Streams application.
* <p>
* For failure and recovery the store will be backed by an internal changelog topic that will be created in Kafka.
* Therefore, the store name defined by the Materialized instance must be a valid Kafka topic name and cannot
* contain characters other than ASCII alphanumerics, '.', '_' and '-'.
* The changelog topic will be named "${applicationId}-${storeName}-changelog", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is the provide store name defined
* in {@link Materialized}, and "-changelog" is a fixed suffix.
* <p>
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param reducer a {@link Reducer} that computes a new aggregate result. Cannot be {@code null}.
* @param named a {@link Named} config used to name the processor in the topology. Cannot be {@code null}.
* @param materialized a {@link Materialized} config used to materialize a state store. Cannot be {@code null}.
* @return a windowed {@link KTable} that contains "update" records with unmodified keys, and values that represent
* the latest (rolling) aggregate for each key within a window
*/
KTable<Windowed<K>, V> reduce(final Reducer<V> reducer,
final Named named,
final Materialized<K, V, WindowStore<Bytes, byte[]>> materialized);
}

View File

@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.internals.WindowedSerializer;
import org.apache.kafka.streams.state.internals.WindowKeySchema;
import java.util.Map;
/**
* The inner serde class can be specified by setting the property
* {@link StreamsConfig#DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS} or
* {@link StreamsConfig#DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS}
* if the no-arg constructor is called and hence it is not passed during initialization.
*/
public class TimeWindowedSerializer<T> implements WindowedSerializer<T> {
private Serializer<T> inner;
// Default constructor needed by Kafka
@SuppressWarnings("WeakerAccess")
public TimeWindowedSerializer() {}
public TimeWindowedSerializer(final Serializer<T> inner) {
this.inner = inner;
}
@SuppressWarnings("unchecked")
@Override
public void configure(final Map<String, ?> configs, final boolean isKey) {
if (inner == null) {
final String propertyName = isKey ? StreamsConfig.DEFAULT_WINDOWED_KEY_SERDE_INNER_CLASS : StreamsConfig.DEFAULT_WINDOWED_VALUE_SERDE_INNER_CLASS;
final String value = (String) configs.get(propertyName);
try {
inner = Utils.newInstance(value, Serde.class).serializer();
inner.configure(configs, isKey);
} catch (final ClassNotFoundException e) {
throw new ConfigException(propertyName, value, "Serde class " + value + " could not be found.");
}
}
}
@Override
public byte[] serialize(final String topic, final Windowed<T> data) {
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
if (data == null) {
return null;
}
return WindowKeySchema.toBinary(data, inner, topic);
}
@Override
public void close() {
if (inner != null) {
inner.close();
}
}
@Override
public byte[] serializeBaseKey(final String topic, final Windowed<T> data) {
WindowedSerdes.verifyInnerSerializerNotNull(inner, this);
return inner.serialize(topic, data.key());
}
// Only for testing
Serializer<T> innerSerializer() {
return inner;
}
}

View File

@@ -0,0 +1,285 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.kstream.internals.TimeWindow;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
import java.time.Duration;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
/**
* The fixed-size time-based window specifications used for aggregations.
* <p>
* The semantics of time-based aggregation windows are: Every T1 (advance) milliseconds, compute the aggregate total for
* T2 (size) milliseconds.
* <ul>
* <li> If {@code advance < size} a hopping windows is defined:<br />
* it discretize a stream into overlapping windows, which implies that a record maybe contained in one and or
* more "adjacent" windows.</li>
* <li> If {@code advance == size} a tumbling window is defined:<br />
* it discretize a stream into non-overlapping windows, which implies that a record is only ever contained in
* one and only one tumbling window.</li>
* </ul>
* Thus, the specified {@link TimeWindow}s are aligned to the epoch.
* Aligned to the epoch means, that the first window starts at timestamp zero.
* For example, hopping windows with size of 5000ms and advance of 3000ms, have window boundaries
* [0;5000),[3000;8000),... and not [1000;6000),[4000;9000),... or even something "random" like [1452;6452),[4452;9452),...
* <p>
* For time semantics, see {@link TimestampExtractor}.
*
* @see SessionWindows
* @see UnlimitedWindows
* @see JoinWindows
* @see KGroupedStream#windowedBy(Windows)
* @see TimestampExtractor
*/
public final class TimeWindows extends Windows<TimeWindow> {
private final long maintainDurationMs;
/** The size of the windows in milliseconds. */
@SuppressWarnings("WeakerAccess")
public final long sizeMs;
/**
* The size of the window's advance interval in milliseconds, i.e., by how much a window moves forward relative to
* the previous one.
*/
@SuppressWarnings("WeakerAccess")
public final long advanceMs;
private final long graceMs;
private TimeWindows(final long sizeMs, final long advanceMs, final long graceMs, final long maintainDurationMs) {
this.sizeMs = sizeMs;
this.advanceMs = advanceMs;
this.graceMs = graceMs;
this.maintainDurationMs = maintainDurationMs;
}
/** Private constructor for preserving segments. Can be removed along with Windows.segments. **/
@Deprecated
private TimeWindows(final long sizeMs,
final long advanceMs,
final long graceMs,
final long maintainDurationMs,
final int segments) {
super(segments);
this.sizeMs = sizeMs;
this.advanceMs = advanceMs;
this.graceMs = graceMs;
this.maintainDurationMs = maintainDurationMs;
}
/**
* Return a window definition with the given window size, and with the advance interval being equal to the window
* size.
* The time interval represented by the N-th window is: {@code [N * size, N * size + size)}.
* <p>
* This provides the semantics of tumbling windows, which are fixed-sized, gap-less, non-overlapping windows.
* Tumbling windows are a special case of hopping windows with {@code advance == size}.
*
* @param sizeMs The size of the window in milliseconds
* @return a new window definition with default maintain duration of 1 day
* @throws IllegalArgumentException if the specified window size is zero or negative
* @deprecated Use {@link #of(Duration)} instead
*/
@Deprecated
public static TimeWindows of(final long sizeMs) throws IllegalArgumentException {
if (sizeMs <= 0) {
throw new IllegalArgumentException("Window size (sizeMs) must be larger than zero.");
}
// This is a static factory method, so we initialize grace and retention to the defaults.
return new TimeWindows(sizeMs, sizeMs, -1, DEFAULT_RETENTION_MS);
}
/**
* Return a window definition with the given window size, and with the advance interval being equal to the window
* size.
* The time interval represented by the N-th window is: {@code [N * size, N * size + size)}.
* <p>
* This provides the semantics of tumbling windows, which are fixed-sized, gap-less, non-overlapping windows.
* Tumbling windows are a special case of hopping windows with {@code advance == size}.
*
* @param size The size of the window
* @return a new window definition with default maintain duration of 1 day
* @throws IllegalArgumentException if the specified window size is zero or negative or can't be represented as {@code long milliseconds}
*/
@SuppressWarnings("deprecation") // removing #of(final long sizeMs) will fix this
public static TimeWindows of(final Duration size) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(size, "size");
return of(ApiUtils.validateMillisecondDuration(size, msgPrefix));
}
/**
* Return a window definition with the original size, but advance ("hop") the window by the given interval, which
* specifies by how much a window moves forward relative to the previous one.
* The time interval represented by the N-th window is: {@code [N * advance, N * advance + size)}.
* <p>
* This provides the semantics of hopping windows, which are fixed-sized, overlapping windows.
*
* @param advanceMs The advance interval ("hop") in milliseconds of the window, with the requirement that {@code 0 < advanceMs <= sizeMs}.
* @return a new window definition with default maintain duration of 1 day
* @throws IllegalArgumentException if the advance interval is negative, zero, or larger than the window size
* @deprecated Use {@link #advanceBy(Duration)} instead
*/
@Deprecated
public TimeWindows advanceBy(final long advanceMs) {
if (advanceMs <= 0 || advanceMs > sizeMs) {
throw new IllegalArgumentException(String.format("Window advancement interval should be more than zero " +
"and less than window duration which is %d ms, but given advancement interval is: %d ms", sizeMs, advanceMs));
}
return new TimeWindows(sizeMs, advanceMs, graceMs, maintainDurationMs, segments);
}
/**
* Return a window definition with the original size, but advance ("hop") the window by the given interval, which
* specifies by how much a window moves forward relative to the previous one.
* The time interval represented by the N-th window is: {@code [N * advance, N * advance + size)}.
* <p>
* This provides the semantics of hopping windows, which are fixed-sized, overlapping windows.
*
* @param advance The advance interval ("hop") of the window, with the requirement that {@code 0 < advance.toMillis() <= sizeMs}.
* @return a new window definition with default maintain duration of 1 day
* @throws IllegalArgumentException if the advance interval is negative, zero, or larger than the window size
*/
@SuppressWarnings("deprecation") // removing #advanceBy(final long advanceMs) will fix this
public TimeWindows advanceBy(final Duration advance) {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(advance, "advance");
return advanceBy(ApiUtils.validateMillisecondDuration(advance, msgPrefix));
}
@Override
public Map<Long, TimeWindow> windowsFor(final long timestamp) {
long windowStart = (Math.max(0, timestamp - sizeMs + advanceMs) / advanceMs) * advanceMs;
final Map<Long, TimeWindow> windows = new LinkedHashMap<>();
while (windowStart <= timestamp) {
final TimeWindow window = new TimeWindow(windowStart, windowStart + sizeMs);
windows.put(windowStart, window);
windowStart += advanceMs;
}
return windows;
}
@Override
public long size() {
return sizeMs;
}
/**
* Reject out-of-order events that arrive more than {@code millisAfterWindowEnd}
* after the end of its window.
* <p>
* Delay is defined as (stream_time - record_timestamp).
*
* @param afterWindowEnd The grace period to admit out-of-order events to a window.
* @return this updated builder
* @throws IllegalArgumentException if {@code afterWindowEnd} is negative or can't be represented as {@code long milliseconds}
*/
@SuppressWarnings("deprecation") // will be fixed when we remove segments from Windows
public TimeWindows grace(final Duration afterWindowEnd) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(afterWindowEnd, "afterWindowEnd");
final long afterWindowEndMs = ApiUtils.validateMillisecondDuration(afterWindowEnd, msgPrefix);
if (afterWindowEndMs < 0) {
throw new IllegalArgumentException("Grace period must not be negative.");
}
return new TimeWindows(sizeMs, advanceMs, afterWindowEndMs, maintainDurationMs, segments);
}
@SuppressWarnings("deprecation") // continuing to support Windows#maintainMs/segmentInterval in fallback mode
@Override
public long gracePeriodMs() {
// NOTE: in the future, when we remove maintainMs,
// we should default the grace period to 24h to maintain the default behavior,
// or we can default to (24h - size) if you want to be super accurate.
return graceMs != -1 ? graceMs : maintainMs() - size();
}
/**
* @param durationMs the window retention time
* @return itself
* @throws IllegalArgumentException if {@code duration} is smaller than the window size
*
* @deprecated since 2.1. Use {@link Materialized#retention} or directly configure the retention in a store supplier
* and use {@link Materialized#as(WindowBytesStoreSupplier)}.
*/
@Override
@Deprecated
public TimeWindows until(final long durationMs) throws IllegalArgumentException {
if (durationMs < sizeMs) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be smaller than the window size.");
}
return new TimeWindows(sizeMs, advanceMs, graceMs, durationMs, segments);
}
/**
* {@inheritDoc}
* <p>
* For {@code TimeWindows} the maintain duration is at least as small as the window size.
*
* @return the window maintain duration
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
*/
@Override
@Deprecated
public long maintainMs() {
return Math.max(maintainDurationMs, sizeMs);
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final TimeWindows that = (TimeWindows) o;
return maintainDurationMs == that.maintainDurationMs &&
segments == that.segments &&
sizeMs == that.sizeMs &&
advanceMs == that.advanceMs &&
graceMs == that.graceMs;
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public int hashCode() {
return Objects.hash(maintainDurationMs, segments, sizeMs, advanceMs, graceMs);
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public String toString() {
return "TimeWindows{" +
"maintainDurationMs=" + maintainDurationMs +
", sizeMs=" + sizeMs +
", advanceMs=" + advanceMs +
", graceMs=" + graceMs +
", segments=" + segments +
'}';
}
}

View File

@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import java.time.Duration;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.processor.Punctuator;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.To;
/**
* The {@code Transformer} interface is for stateful mapping of an input record to zero, one, or multiple new output
* records (both key and value type can be altered arbitrarily).
* This is a stateful record-by-record operation, i.e, {@link #transform(Object, Object)} is invoked individually for
* each record of a stream and can access and modify a state that is available beyond a single call of
* {@link #transform(Object, Object)} (cf. {@link KeyValueMapper} for stateless record transformation).
* Additionally, this {@code Transformer} can {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule}
* a method to be {@link Punctuator#punctuate(long) called periodically} with the provided context.
* <p>
* Use {@link TransformerSupplier} to provide new instances of {@code Transformer} to Kafka Stream's runtime.
* <p>
* If only a record's value should be modified {@link ValueTransformer} can be used.
*
* @param <K> key type
* @param <V> value type
* @param <R> {@link KeyValue} return type (both key and value type can be set
* arbitrarily)
* @see TransformerSupplier
* @see KStream#transform(TransformerSupplier, String...)
* @see ValueTransformer
* @see KStream#map(KeyValueMapper)
* @see KStream#flatMap(KeyValueMapper)
*/
public interface Transformer<K, V, R> {
/**
* Initialize this transformer.
* This is called once per instance when the topology gets initialized.
* When the framework is done with the transformer, {@link #close()} will be called on it; the
* framework may later re-use the transformer by calling {@link #init(ProcessorContext)} again.
* <p>
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
* <p>
* Note, that {@link ProcessorContext} is updated in the background with the current record's meta data.
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object, Object)}.
*
* @param context the context
*/
void init(final ProcessorContext context);
/**
* Transform the record with the given key and value.
* Additionally, any {@link StateStore state} that is {@link KStream#transform(TransformerSupplier, String...)
* attached} to this operator can be accessed and modified
* arbitrarily (cf. {@link ProcessorContext#getStateStore(String)}).
* <p>
* If only one record should be forward downstream, {@code transform} can return a new {@link KeyValue}. If
* more than one output record should be forwarded downstream, {@link ProcessorContext#forward(Object, Object)}
* and {@link ProcessorContext#forward(Object, Object, To)} can be used.
* If no record should be forwarded downstream, {@code transform} can return {@code null}.
*
* Note that returning a new {@link KeyValue} is merely for convenience. The same can be achieved by using
* {@link ProcessorContext#forward(Object, Object)} and returning {@code null}.
*
* @param key the key for the record
* @param value the value for the record
* @return new {@link KeyValue} pair&mdash;if {@code null} no key-value pair will
* be forwarded to down stream
*/
R transform(final K key, final V value);
/**
* Close this transformer and clean up any resources. The framework may
* later re-use this transformer by calling {@link #init(ProcessorContext)} on it again.
* <p>
* To generate new {@link KeyValue} pairs {@link ProcessorContext#forward(Object, Object)} and
* {@link ProcessorContext#forward(Object, Object, To)} can be used.
*/
void close();
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* A {@code TransformerSupplier} interface which can create one or more {@link Transformer} instances.
*
* @param <K> key type
* @param <V> value type
* @param <R> {@link org.apache.kafka.streams.KeyValue KeyValue} return type (both key and value type can be set
* arbitrarily)
* @see Transformer
* @see KStream#transform(TransformerSupplier, String...)
* @see ValueTransformer
* @see ValueTransformerSupplier
* @see KStream#transformValues(ValueTransformerSupplier, String...)
*/
public interface TransformerSupplier<K, V, R> {
/**
* Return a new {@link Transformer} instance.
*
* @return a new {@link Transformer} instance
*/
Transformer<K, V, R> get();
}

View File

@@ -0,0 +1,173 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.internals.ApiUtils;
import org.apache.kafka.streams.kstream.internals.UnlimitedWindow;
import org.apache.kafka.streams.processor.TimestampExtractor;
import java.time.Instant;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import static org.apache.kafka.streams.internals.ApiUtils.prepareMillisCheckFailMsgPrefix;
/**
* The unlimited window specifications used for aggregations.
* <p>
* An unlimited time window is also called landmark window.
* It has a fixed starting point while its window end is defined as infinite.
* With this regard, it is a fixed-size window with infinite window size.
* <p>
* For time semantics, see {@link TimestampExtractor}.
*
* @see TimeWindows
* @see SessionWindows
* @see JoinWindows
* @see KGroupedStream#windowedBy(Windows)
* @see TimestampExtractor
*/
public final class UnlimitedWindows extends Windows<UnlimitedWindow> {
private static final long DEFAULT_START_TIMESTAMP_MS = 0L;
/** The start timestamp of the window. */
@SuppressWarnings("WeakerAccess")
public final long startMs;
private UnlimitedWindows(final long startMs) {
this.startMs = startMs;
}
/**
* Return an unlimited window starting at timestamp zero.
*/
public static UnlimitedWindows of() {
return new UnlimitedWindows(DEFAULT_START_TIMESTAMP_MS);
}
/**
* Return a new unlimited window for the specified start timestamp.
*
* @param startMs the window start time
* @return a new unlimited window that starts at {@code startMs}
* @throws IllegalArgumentException if the start time is negative
* @deprecated Use {@link #startOn(Instant)} instead
*/
@Deprecated
public UnlimitedWindows startOn(final long startMs) throws IllegalArgumentException {
if (startMs < 0) {
throw new IllegalArgumentException("Window start time (startMs) cannot be negative.");
}
return new UnlimitedWindows(startMs);
}
/**
* Return a new unlimited window for the specified start timestamp.
*
* @param start the window start time
* @return a new unlimited window that starts at {@code start}
* @throws IllegalArgumentException if the start time is negative or can't be represented as {@code long milliseconds}
*/
public UnlimitedWindows startOn(final Instant start) throws IllegalArgumentException {
final String msgPrefix = prepareMillisCheckFailMsgPrefix(start, "start");
return startOn(ApiUtils.validateMillisecondInstant(start, msgPrefix));
}
@Override
public Map<Long, UnlimitedWindow> windowsFor(final long timestamp) {
// always return the single unlimited window
// we cannot use Collections.singleMap since it does not support remove()
final Map<Long, UnlimitedWindow> windows = new HashMap<>();
if (timestamp >= startMs) {
windows.put(startMs, new UnlimitedWindow(startMs));
}
return windows;
}
/**
* {@inheritDoc}
* As unlimited windows have conceptually infinite size, this methods just returns {@link Long#MAX_VALUE}.
*
* @return the size of the specified windows which is {@link Long#MAX_VALUE}
*/
@Override
public long size() {
return Long.MAX_VALUE;
}
/**
* Throws an {@link IllegalArgumentException} because the retention time for unlimited windows is always infinite
* and cannot be changed.
*
* @throws IllegalArgumentException on every invocation.
* @deprecated since 2.1.
*/
@Override
@Deprecated
public UnlimitedWindows until(final long durationMs) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be set for UnlimitedWindows.");
}
/**
* {@inheritDoc}
* The retention time for unlimited windows in infinite and thus represented as {@link Long#MAX_VALUE}.
*
* @return the window retention time that is {@link Long#MAX_VALUE}
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
*/
@Override
@Deprecated
public long maintainMs() {
return Long.MAX_VALUE;
}
@Override
public long gracePeriodMs() {
return 0L;
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final UnlimitedWindows that = (UnlimitedWindows) o;
return startMs == that.startMs && segments == that.segments;
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public int hashCode() {
return Objects.hash(startMs, segments);
}
@SuppressWarnings("deprecation") // removing segments from Windows will fix this
@Override
public String toString() {
return "UnlimitedWindows{" +
"startMs=" + startMs +
", segments=" + segments +
'}';
}
}

View File

@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code ValueJoiner} interface for joining two values into a new value of arbitrary type.
* This is a stateless operation, i.e, {@link #apply(Object, Object)} is invoked individually for each joining
* record-pair of a {@link KStream}-{@link KStream}, {@link KStream}-{@link KTable}, or {@link KTable}-{@link KTable}
* join.
*
* @param <V1> first value type
* @param <V2> second value type
* @param <VR> joined value type
* @see KStream#join(KStream, ValueJoiner, JoinWindows)
* @see KStream#join(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows)
* @see KStream#leftJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows)
* @see KStream#outerJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see KStream#join(KTable, ValueJoiner)
* @see KStream#join(KTable, ValueJoiner, Joined)
* @see KStream#leftJoin(KTable, ValueJoiner)
* @see KStream#leftJoin(KTable, ValueJoiner, Joined)
* @see KTable#join(KTable, ValueJoiner)
* @see KTable#leftJoin(KTable, ValueJoiner)
* @see KTable#outerJoin(KTable, ValueJoiner)
*/
public interface ValueJoiner<V1, V2, VR> {
/**
* Return a joined value consisting of {@code value1} and {@code value2}.
*
* @param value1 the first value for joining
* @param value2 the second value for joining
* @return the joined value
*/
VR apply(final V1 value1, final V2 value2);
}

View File

@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code ValueMapper} interface for mapping a value to a new value of arbitrary type.
* This is a stateless record-by-record operation, i.e, {@link #apply(Object)} is invoked individually for each record
* of a stream (cf. {@link ValueTransformer} for stateful value transformation).
* If {@code ValueMapper} is applied to a {@link org.apache.kafka.streams.KeyValue key-value pair} record the record's
* key is preserved.
* If a record's key and value should be modified {@link KeyValueMapper} can be used.
*
* @param <V> value type
* @param <VR> mapped value type
* @see KeyValueMapper
* @see ValueTransformer
* @see ValueTransformerWithKey
* @see KStream#mapValues(ValueMapper)
* @see KStream#mapValues(ValueMapperWithKey)
* @see KStream#flatMapValues(ValueMapper)
* @see KStream#flatMapValues(ValueMapperWithKey)
* @see KTable#mapValues(ValueMapper)
* @see KTable#mapValues(ValueMapperWithKey)
*/
public interface ValueMapper<V, VR> {
/**
* Map the given value to a new value.
*
* @param value the value to be mapped
* @return the new value
*/
VR apply(final V value);
}

View File

@@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The {@code ValueMapperWithKey} interface for mapping a value to a new value of arbitrary type.
* This is a stateless record-by-record operation, i.e, {@link #apply(Object, Object)} is invoked individually for each
* record of a stream (cf. {@link ValueTransformer} for stateful value transformation).
* If {@code ValueMapperWithKey} is applied to a {@link org.apache.kafka.streams.KeyValue key-value pair} record the
* record's key is preserved.
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
* If a record's key and value should be modified {@link KeyValueMapper} can be used.
*
* @param <K> key type
* @param <V> value type
* @param <VR> mapped value type
* @see KeyValueMapper
* @see ValueTransformer
* @see ValueTransformerWithKey
* @see KStream#mapValues(ValueMapper)
* @see KStream#mapValues(ValueMapperWithKey)
* @see KStream#flatMapValues(ValueMapper)
* @see KStream#flatMapValues(ValueMapperWithKey)
* @see KTable#mapValues(ValueMapper)
* @see KTable#mapValues(ValueMapperWithKey)
*/
public interface ValueMapperWithKey<K, V, VR> {
/**
* Map the given [key and ]value to a new value.
*
* @param readOnlyKey the read-only key
* @param value the value to be mapped
* @return the new value
*/
VR apply(final K readOnlyKey, final V value);
}

View File

@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import java.time.Duration;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.processor.Punctuator;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.To;
/**
* The {@code ValueTransformer} interface for stateful mapping of a value to a new value (with possible new type).
* This is a stateful record-by-record operation, i.e, {@link #transform(Object)} is invoked individually for each
* record of a stream and can access and modify a state that is available beyond a single call of
* {@link #transform(Object)} (cf. {@link ValueMapper} for stateless value transformation).
* Additionally, this {@code ValueTransformer} can {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule}
* a method to be {@link Punctuator#punctuate(long) called periodically} with the provided context.
* If {@code ValueTransformer} is applied to a {@link KeyValue} pair record the record's key is preserved.
* <p>
* Use {@link ValueTransformerSupplier} to provide new instances of {@code ValueTransformer} to Kafka Stream's runtime.
* <p>
* If a record's key and value should be modified {@link Transformer} can be used.
*
* @param <V> value type
* @param <VR> transformed value type
* @see ValueTransformerSupplier
* @see ValueTransformerWithKeySupplier
* @see KStream#transformValues(ValueTransformerSupplier, String...)
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* @see Transformer
*/
public interface ValueTransformer<V, VR> {
/**
* Initialize this transformer.
* This is called once per instance when the topology gets initialized.
* When the framework is done with the transformer, {@link #close()} will be called on it; the
* framework may later re-use the transformer by calling {@link #init(ProcessorContext)} again.
* <p>
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
* <p>
* Note that {@link ProcessorContext} is updated in the background with the current record's meta data.
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object)}.
* <p>
* Note that using {@link ProcessorContext#forward(Object, Object)} or
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within any method of
* {@code ValueTransformer} and will result in an {@link StreamsException exception}.
*
* @param context the context
* @throws IllegalStateException If store gets registered after initialization is already finished
* @throws StreamsException if the store's change log does not contain the partition
*/
void init(final ProcessorContext context);
/**
* Transform the given value to a new value.
* Additionally, any {@link StateStore} that is {@link KStream#transformValues(ValueTransformerSupplier, String...)
* attached} to this operator can be accessed and modified arbitrarily (cf.
* {@link ProcessorContext#getStateStore(String)}).
* <p>
* Note, that using {@link ProcessorContext#forward(Object, Object)} or
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within {@code transform} and
* will result in an {@link StreamsException exception}.
*
* @param value the value to be transformed
* @return the new value
*/
VR transform(final V value);
/**
* Close this transformer and clean up any resources. The framework may
* later re-use this transformer by calling {@link #init(ProcessorContext)} on it again.
* <p>
* It is not possible to return any new output records within {@code close()}.
* Using {@link ProcessorContext#forward(Object, Object)} or {@link ProcessorContext#forward(Object, Object, To)}
* will result in an {@link StreamsException exception}.
*/
void close();
}

View File

@@ -0,0 +1,42 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* A {@code ValueTransformerSupplier} interface which can create one or more {@link ValueTransformer} instances.
*
* @param <V> value type
* @param <VR> transformed value type
* @see ValueTransformer
* @see ValueTransformerWithKey
* @see ValueTransformerWithKeySupplier
* @see KStream#transformValues(ValueTransformerSupplier, String...)
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* @see Transformer
* @see TransformerSupplier
* @see KStream#transform(TransformerSupplier, String...)
*/
public interface ValueTransformerSupplier<V, VR> {
/**
* Return a new {@link ValueTransformer} instance.
*
* @return a new {@link ValueTransformer} instance.
*/
ValueTransformer<V, VR> get();
}

View File

@@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import java.time.Duration;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.processor.Punctuator;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.To;
/**
* The {@code ValueTransformerWithKey} interface for stateful mapping of a value to a new value (with possible new type).
* This is a stateful record-by-record operation, i.e, {@link #transform(Object, Object)} is invoked individually for each
* record of a stream and can access and modify a state that is available beyond a single call of
* {@link #transform(Object, Object)} (cf. {@link ValueMapper} for stateless value transformation).
* Additionally, this {@code ValueTransformerWithKey} can
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} with the provided context.
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
* If {@code ValueTransformerWithKey} is applied to a {@link KeyValue} pair record the record's key is preserved.
* <p>
* Use {@link ValueTransformerWithKeySupplier} to provide new instances of {@link ValueTransformerWithKey} to
* Kafka Stream's runtime.
* <p>
* If a record's key and value should be modified {@link Transformer} can be used.
*
* @param <K> key type
* @param <V> value type
* @param <VR> transformed value type
* @see ValueTransformer
* @see ValueTransformerWithKeySupplier
* @see KStream#transformValues(ValueTransformerSupplier, String...)
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* @see Transformer
*/
public interface ValueTransformerWithKey<K, V, VR> {
/**
* Initialize this transformer.
* This is called once per instance when the topology gets initialized.
* <p>
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
* <p>
* Note that {@link ProcessorContext} is updated in the background with the current record's meta data.
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object, Object)}.
* <p>
* Note that using {@link ProcessorContext#forward(Object, Object)} or
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within any method of
* {@code ValueTransformerWithKey} and will result in an {@link StreamsException exception}.
*
* @param context the context
* @throws IllegalStateException If store gets registered after initialization is already finished
* @throws StreamsException if the store's change log does not contain the partition
*/
void init(final ProcessorContext context);
/**
* Transform the given [key and ]value to a new value.
* Additionally, any {@link StateStore} that is {@link KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* attached} to this operator can be accessed and modified arbitrarily (cf.
* {@link ProcessorContext#getStateStore(String)}).
* <p>
* Note, that using {@link ProcessorContext#forward(Object, Object)} or
* {@link ProcessorContext#forward(Object, Object, To)} is not allowed within {@code transform} and
* will result in an {@link StreamsException exception}.
*
* @param readOnlyKey the read-only key
* @param value the value to be transformed
* @return the new value
*/
VR transform(final K readOnlyKey, final V value);
/**
* Close this processor and clean up any resources.
* <p>
* It is not possible to return any new output records within {@code close()}.
* Using {@link ProcessorContext#forward(Object, Object)} or {@link ProcessorContext#forward(Object, Object, To)},
* will result in an {@link StreamsException exception}.
*/
void close();
}

View File

@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* @param <K> key type
* @param <V> value type
* @param <VR> transformed value type
* @see ValueTransformer
* @see ValueTransformerWithKey
* @see KStream#transformValues(ValueTransformerSupplier, String...)
* @see KStream#transformValues(ValueTransformerWithKeySupplier, String...)
* @see Transformer
* @see TransformerSupplier
* @see KStream#transform(TransformerSupplier, String...)
*/
public interface ValueTransformerWithKeySupplier<K, V, VR> {
ValueTransformerWithKey<K, V, VR> get();
}

View File

@@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.processor.TimestampExtractor;
import java.time.Instant;
/**
* A single window instance, defined by its start and end timestamp.
* {@code Window} is agnostic if start/end boundaries are inclusive or exclusive; this is defined by concrete
* window implementations.
* <p>
* To specify how {@code Window} boundaries are defined use {@link Windows}.
* For time semantics, see {@link TimestampExtractor}.
*
* @see Windows
* @see org.apache.kafka.streams.kstream.internals.TimeWindow
* @see org.apache.kafka.streams.kstream.internals.SessionWindow
* @see org.apache.kafka.streams.kstream.internals.UnlimitedWindow
* @see TimestampExtractor
*/
public abstract class Window {
protected final long startMs;
protected final long endMs;
private final Instant startTime;
private final Instant endTime;
/**
* Create a new window for the given start and end time.
*
* @param startMs the start timestamp of the window
* @param endMs the end timestamp of the window
* @throws IllegalArgumentException if {@code startMs} is negative or if {@code endMs} is smaller than {@code startMs}
*/
public Window(final long startMs, final long endMs) throws IllegalArgumentException {
if (startMs < 0) {
throw new IllegalArgumentException("Window startMs time cannot be negative.");
}
if (endMs < startMs) {
throw new IllegalArgumentException("Window endMs time cannot be smaller than window startMs time.");
}
this.startMs = startMs;
this.endMs = endMs;
this.startTime = Instant.ofEpochMilli(startMs);
this.endTime = Instant.ofEpochMilli(endMs);
}
/**
* Return the start timestamp of this window.
*
* @return The start timestamp of this window.
*/
public long start() {
return startMs;
}
/**
* Return the end timestamp of this window.
*
* @return The end timestamp of this window.
*/
public long end() {
return endMs;
}
/**
* Return the start time of this window.
*
* @return The start time of this window.
*/
public Instant startTime() {
return startTime;
}
/**
* Return the end time of this window.
*
* @return The end time of this window.
*/
public Instant endTime() {
return endTime;
}
/**
* Check if the given window overlaps with this window.
* Should throw an {@link IllegalArgumentException} if the {@code other} window has a different type than {@code
* this} window.
*
* @param other another window of the same type
* @return {@code true} if {@code other} overlaps with this window&mdash;{@code false} otherwise
*/
public abstract boolean overlap(final Window other);
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final Window other = (Window) obj;
return startMs == other.startMs && endMs == other.endMs;
}
@Override
public int hashCode() {
return (int) (((startMs << 32) | endMs) % 0xFFFFFFFFL);
}
@Override
public String toString() {
return "Window{" +
"startMs=" + startMs +
", endMs=" + endMs +
'}';
}
}

View File

@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
/**
* The result key type of a windowed stream aggregation.
* <p>
* If a {@link KStream} gets grouped and aggregated using a window-aggregation the resulting {@link KTable} is a
* so-called "windowed {@link KTable}" with a combined key type that encodes the corresponding aggregation window and
* the original record key.
* Thus, a windowed {@link KTable} has type {@code <Windowed<K>,V>}.
*
* @param <K> type of the key
* @see KGroupedStream#windowedBy(Windows)
* @see KGroupedStream#windowedBy(SessionWindows)
*/
public class Windowed<K> {
private final K key;
private final Window window;
public Windowed(final K key, final Window window) {
this.key = key;
this.window = window;
}
/**
* Return the key of the window.
*
* @return the key of the window
*/
public K key() {
return key;
}
/**
* Return the window containing the values associated with this key.
*
* @return the window containing the values
*/
public Window window() {
return window;
}
@Override
public String toString() {
return "[" + key + "@" + window.start() + "/" + window.end() + "]";
}
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (!(obj instanceof Windowed)) {
return false;
}
final Windowed<?> that = (Windowed) obj;
return window.equals(that.window) && key.equals(that.key);
}
@Override
public int hashCode() {
final long n = ((long) window.hashCode() << 32) | key.hashCode();
return (int) (n % 0xFFFFFFFFL);
}
}

View File

@@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.serialization.Serializer;
public class WindowedSerdes {
static public class TimeWindowedSerde<T> extends Serdes.WrapperSerde<Windowed<T>> {
// Default constructor needed for reflection object creation
public TimeWindowedSerde() {
super(new TimeWindowedSerializer<>(), new TimeWindowedDeserializer<>());
}
public TimeWindowedSerde(final Serde<T> inner) {
super(new TimeWindowedSerializer<>(inner.serializer()), new TimeWindowedDeserializer<>(inner.deserializer()));
}
// This constructor can be used for serialize/deserialize a windowed topic
public TimeWindowedSerde(final Serde<T> inner, final long windowSize) {
super(new TimeWindowedSerializer<>(inner.serializer()), new TimeWindowedDeserializer<>(inner.deserializer(), windowSize));
}
// Helper method for users to specify whether the input topic is a changelog topic for deserializing the key properly.
public TimeWindowedSerde<T> forChangelog(final boolean isChangelogTopic) {
final TimeWindowedDeserializer deserializer = (TimeWindowedDeserializer) this.deserializer();
deserializer.setIsChangelogTopic(isChangelogTopic);
return this;
}
}
static public class SessionWindowedSerde<T> extends Serdes.WrapperSerde<Windowed<T>> {
// Default constructor needed for reflection object creation
public SessionWindowedSerde() {
super(new SessionWindowedSerializer<>(), new SessionWindowedDeserializer<>());
}
public SessionWindowedSerde(final Serde<T> inner) {
super(new SessionWindowedSerializer<>(inner.serializer()), new SessionWindowedDeserializer<>(inner.deserializer()));
}
}
/**
* Construct a {@code TimeWindowedSerde} object for the specified inner class type.
*/
static public <T> Serde<Windowed<T>> timeWindowedSerdeFrom(final Class<T> type) {
return new TimeWindowedSerde<>(Serdes.serdeFrom(type));
}
/**
* Construct a {@code TimeWindowedSerde} object to deserialize changelog topic
* for the specified inner class type and window size.
*/
static public <T> Serde<Windowed<T>> timeWindowedSerdeFrom(final Class<T> type, final long windowSize) {
return new TimeWindowedSerde<>(Serdes.serdeFrom(type), windowSize);
}
/**
* Construct a {@code SessionWindowedSerde} object for the specified inner class type.
*/
static public <T> Serde<Windowed<T>> sessionWindowedSerdeFrom(final Class<T> type) {
return new SessionWindowedSerde<>(Serdes.serdeFrom(type));
}
static void verifyInnerSerializerNotNull(final Serializer inner,
final Serializer wrapper) {
if (inner == null) {
throw new NullPointerException("Inner serializer is `null`. " +
"User code must use constructor `" + wrapper.getClass().getSimpleName() + "(final Serializer<T> inner)` " +
"instead of the no-arg constructor.");
}
}
static void verifyInnerDeserializerNotNull(final Deserializer inner,
final Deserializer wrapper) {
if (inner == null) {
throw new NullPointerException("Inner deserializer is `null`. " +
"User code must use constructor `" + wrapper.getClass().getSimpleName() + "(final Deserializer<T> inner)` " +
"instead of the no-arg constructor.");
}
}
}

View File

@@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.streams.processor.TimestampExtractor;
import org.apache.kafka.streams.state.WindowBytesStoreSupplier;
import java.time.Duration;
import java.util.Map;
import static org.apache.kafka.streams.kstream.internals.WindowingDefaults.DEFAULT_RETENTION_MS;
/**
* The window specification for fixed size windows that is used to define window boundaries and grace period.
* <p>
* Grace period defines how long to wait on out-of-order events. That is, windows will continue to accept new records until {@code stream_time >= window_end + grace_period}.
* Records that arrive after the grace period passed are considered <em>late</em> and will not be processed but are dropped.
* <p>
* Warning: It may be unsafe to use objects of this class in set- or map-like collections,
* since the equals and hashCode methods depend on mutable fields.
*
* @param <W> type of the window instance
* @see TimeWindows
* @see UnlimitedWindows
* @see JoinWindows
* @see SessionWindows
* @see TimestampExtractor
*/
public abstract class Windows<W extends Window> {
private long maintainDurationMs = DEFAULT_RETENTION_MS;
@Deprecated public int segments = 3;
protected Windows() {}
@Deprecated // remove this constructor when we remove segments.
Windows(final int segments) {
this.segments = segments;
}
/**
* Set the window maintain duration (retention time) in milliseconds.
* This retention time is a guaranteed <i>lower bound</i> for how long a window will be maintained.
*
* @param durationMs the window retention time in milliseconds
* @return itself
* @throws IllegalArgumentException if {@code durationMs} is negative
* @deprecated since 2.1. Use {@link Materialized#withRetention(Duration)}
* or directly configure the retention in a store supplier and use {@link Materialized#as(WindowBytesStoreSupplier)}.
*/
@Deprecated
public Windows<W> until(final long durationMs) throws IllegalArgumentException {
if (durationMs < 0) {
throw new IllegalArgumentException("Window retention time (durationMs) cannot be negative.");
}
maintainDurationMs = durationMs;
return this;
}
/**
* Return the window maintain duration (retention time) in milliseconds.
*
* @return the window maintain duration
* @deprecated since 2.1. Use {@link Materialized#retention} instead.
*/
@Deprecated
public long maintainMs() {
return maintainDurationMs;
}
/**
* Set the number of segments to be used for rolling the window store.
* This function is not exposed to users but can be called by developers that extend this class.
*
* @param segments the number of segments to be used
* @return itself
* @throws IllegalArgumentException if specified segments is small than 2
* @deprecated since 2.1 Override segmentInterval() instead.
*/
@Deprecated
protected Windows<W> segments(final int segments) throws IllegalArgumentException {
if (segments < 2) {
throw new IllegalArgumentException("Number of segments must be at least 2.");
}
this.segments = segments;
return this;
}
/**
* Create all windows that contain the provided timestamp, indexed by non-negative window start timestamps.
*
* @param timestamp the timestamp window should get created for
* @return a map of {@code windowStartTimestamp -> Window} entries
*/
public abstract Map<Long, W> windowsFor(final long timestamp);
/**
* Return the size of the specified windows in milliseconds.
*
* @return the size of the specified windows
*/
public abstract long size();
/**
* Return the window grace period (the time to admit
* out-of-order events after the end of the window.)
*
* Delay is defined as (stream_time - record_timestamp).
*/
public abstract long gracePeriodMs();
}

View File

@@ -0,0 +1,139 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.ValueJoiner;
import org.apache.kafka.streams.kstream.ValueMapper;
import org.apache.kafka.streams.kstream.ValueMapperWithKey;
import org.apache.kafka.streams.kstream.ValueTransformer;
import org.apache.kafka.streams.kstream.ValueTransformerSupplier;
import org.apache.kafka.streams.kstream.ValueTransformerWithKey;
import org.apache.kafka.streams.kstream.ValueTransformerWithKeySupplier;
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
import java.util.Collection;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
/*
* Any classes (KTable, KStream, etc) extending this class should follow the serde specification precedence ordering as:
*
* 1) Overridden values via control objects (e.g. Materialized, Serialized, Consumed, etc)
* 2) Serdes that can be inferred from the operator itself (e.g. groupBy().count(), where value serde can default to `LongSerde`).
* 3) Serde inherited from parent operator if possible (note if the key / value types have been changed, then the corresponding serde cannot be inherited).
* 4) Default serde specified in the config.
*/
public abstract class AbstractStream<K, V> {
protected final String name;
protected final Serde<K> keySerde;
protected final Serde<V> valSerde;
protected final Set<String> subTopologySourceNodes;
protected final StreamsGraphNode streamsGraphNode;
protected final InternalStreamsBuilder builder;
// This copy-constructor will allow to extend KStream
// and KTable APIs with new methods without impacting the public interface.
public AbstractStream(final AbstractStream<K, V> stream) {
this.name = stream.name;
this.builder = stream.builder;
this.keySerde = stream.keySerde;
this.valSerde = stream.valSerde;
this.subTopologySourceNodes = stream.subTopologySourceNodes;
this.streamsGraphNode = stream.streamsGraphNode;
}
AbstractStream(final String name,
final Serde<K> keySerde,
final Serde<V> valSerde,
final Set<String> subTopologySourceNodes,
final StreamsGraphNode streamsGraphNode,
final InternalStreamsBuilder builder) {
if (subTopologySourceNodes == null || subTopologySourceNodes.isEmpty()) {
throw new IllegalArgumentException("parameter <sourceNodes> must not be null or empty");
}
this.name = name;
this.builder = builder;
this.keySerde = keySerde;
this.valSerde = valSerde;
this.subTopologySourceNodes = subTopologySourceNodes;
this.streamsGraphNode = streamsGraphNode;
}
// This method allows to expose the InternalTopologyBuilder instance
// to subclasses that extend AbstractStream class.
protected InternalTopologyBuilder internalTopologyBuilder() {
return builder.internalTopologyBuilder;
}
Set<String> ensureCopartitionWith(final Collection<? extends AbstractStream<K, ?>> otherStreams) {
final Set<String> allSourceNodes = new HashSet<>(subTopologySourceNodes);
for (final AbstractStream<K, ?> other: otherStreams) {
allSourceNodes.addAll(other.subTopologySourceNodes);
}
builder.internalTopologyBuilder.copartitionSources(allSourceNodes);
return allSourceNodes;
}
static <T2, T1, R> ValueJoiner<T2, T1, R> reverseJoiner(final ValueJoiner<T1, T2, R> joiner) {
return (value2, value1) -> joiner.apply(value1, value2);
}
static <K, V, VR> ValueMapperWithKey<K, V, VR> withKey(final ValueMapper<V, VR> valueMapper) {
Objects.requireNonNull(valueMapper, "valueMapper can't be null");
return (readOnlyKey, value) -> valueMapper.apply(value);
}
static <K, V, VR> ValueTransformerWithKeySupplier<K, V, VR> toValueTransformerWithKeySupplier(
final ValueTransformerSupplier<V, VR> valueTransformerSupplier) {
Objects.requireNonNull(valueTransformerSupplier, "valueTransformerSupplier can't be null");
return () -> {
final ValueTransformer<V, VR> valueTransformer = valueTransformerSupplier.get();
return new ValueTransformerWithKey<K, V, VR>() {
@Override
public void init(final ProcessorContext context) {
valueTransformer.init(context);
}
@Override
public VR transform(final K readOnlyKey, final V value) {
return valueTransformer.transform(value);
}
@Override
public void close() {
valueTransformer.close();
}
};
};
}
// for testing only
public Serde<K> keySerde() {
return keySerde;
}
public Serde<V> valueSerde() {
return valSerde;
}
}

View File

@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import java.util.Objects;
public class Change<T> {
public final T newValue;
public final T oldValue;
public Change(final T newValue, final T oldValue) {
this.newValue = newValue;
this.oldValue = oldValue;
}
@Override
public String toString() {
return "(" + newValue + "<-" + oldValue + ")";
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final Change<?> change = (Change<?>) o;
return Objects.equals(newValue, change.newValue) &&
Objects.equals(oldValue, change.oldValue);
}
@Override
public int hashCode() {
return Objects.hash(newValue, oldValue);
}
}

View File

@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.header.Headers;
import org.apache.kafka.common.serialization.Deserializer;
import java.nio.ByteBuffer;
import java.util.Objects;
public class ChangedDeserializer<T> implements Deserializer<Change<T>>, WrappingNullableDeserializer<Change<T>, Void, T> {
private static final int NEWFLAG_SIZE = 1;
private Deserializer<T> inner;
public ChangedDeserializer(final Deserializer<T> inner) {
this.inner = inner;
}
public Deserializer<T> inner() {
return inner;
}
@Override
public void setIfUnset(final Deserializer<Void> defaultKeyDeserializer, final Deserializer<T> defaultValueDeserializer) {
if (inner == null) {
inner = Objects.requireNonNull(defaultValueDeserializer);
}
}
@Override
public Change<T> deserialize(final String topic, final Headers headers, final byte[] data) {
final byte[] bytes = new byte[data.length - NEWFLAG_SIZE];
System.arraycopy(data, 0, bytes, 0, bytes.length);
if (ByteBuffer.wrap(data).get(data.length - NEWFLAG_SIZE) != 0) {
return new Change<>(inner.deserialize(topic, headers, bytes), null);
} else {
return new Change<>(null, inner.deserialize(topic, headers, bytes));
}
}
@Override
public Change<T> deserialize(final String topic, final byte[] data) {
return deserialize(topic, null, data);
}
@Override
public void close() {
inner.close();
}
}

View File

@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.header.Headers;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.streams.errors.StreamsException;
import java.nio.ByteBuffer;
import java.util.Objects;
public class ChangedSerializer<T> implements Serializer<Change<T>>, WrappingNullableSerializer<Change<T>, Void, T> {
private static final int NEWFLAG_SIZE = 1;
private Serializer<T> inner;
public ChangedSerializer(final Serializer<T> inner) {
this.inner = inner;
}
public Serializer<T> inner() {
return inner;
}
@Override
public void setIfUnset(final Serializer<Void> defaultKeySerializer, final Serializer<T> defaultValueSerializer) {
if (inner == null) {
inner = Objects.requireNonNull(defaultValueSerializer);
}
}
/**
* @throws StreamsException if both old and new values of data are null, or if
* both values are not null
*/
@Override
public byte[] serialize(final String topic, final Headers headers, final Change<T> data) {
final byte[] serializedKey;
// only one of the old / new values would be not null
if (data.newValue != null) {
if (data.oldValue != null) {
throw new StreamsException("Both old and new values are not null (" + data.oldValue
+ " : " + data.newValue + ") in ChangeSerializer, which is not allowed.");
}
serializedKey = inner.serialize(topic, headers, data.newValue);
} else {
if (data.oldValue == null) {
throw new StreamsException("Both old and new values are null in ChangeSerializer, which is not allowed.");
}
serializedKey = inner.serialize(topic, headers, data.oldValue);
}
final ByteBuffer buf = ByteBuffer.allocate(serializedKey.length + NEWFLAG_SIZE);
buf.put(serializedKey);
buf.put((byte) (data.newValue != null ? 1 : 0));
return buf.array();
}
@Override
public byte[] serialize(final String topic, final Change<T> data) {
return serialize(topic, null, data);
}
@Override
public void close() {
inner.close();
}
}

View File

@@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.kstream.Aggregator;
import org.apache.kafka.streams.kstream.CogroupedKStream;
import org.apache.kafka.streams.kstream.Initializer;
import org.apache.kafka.streams.kstream.KGroupedStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Named;
import org.apache.kafka.streams.kstream.SessionWindowedCogroupedKStream;
import org.apache.kafka.streams.kstream.SessionWindows;
import org.apache.kafka.streams.kstream.TimeWindowedCogroupedKStream;
import org.apache.kafka.streams.kstream.Window;
import org.apache.kafka.streams.kstream.Windows;
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
import org.apache.kafka.streams.state.KeyValueStore;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
public class CogroupedKStreamImpl<K, VOut> extends AbstractStream<K, VOut> implements CogroupedKStream<K, VOut> {
static final String AGGREGATE_NAME = "COGROUPKSTREAM-AGGREGATE-";
static final String MERGE_NAME = "COGROUPKSTREAM-MERGE-";
final private Map<KGroupedStreamImpl<K, ?>, Aggregator<? super K, ? super Object, VOut>> groupPatterns;
final private CogroupedStreamAggregateBuilder<K, VOut> aggregateBuilder;
CogroupedKStreamImpl(final String name,
final Set<String> subTopologySourceNodes,
final StreamsGraphNode streamsGraphNode,
final InternalStreamsBuilder builder) {
super(name, null, null, subTopologySourceNodes, streamsGraphNode, builder);
groupPatterns = new LinkedHashMap<>();
aggregateBuilder = new CogroupedStreamAggregateBuilder<>(builder);
}
@SuppressWarnings("unchecked")
@Override
public <VIn> CogroupedKStream<K, VOut> cogroup(final KGroupedStream<K, VIn> groupedStream,
final Aggregator<? super K, ? super VIn, VOut> aggregator) {
Objects.requireNonNull(groupedStream, "groupedStream can't be null");
Objects.requireNonNull(aggregator, "aggregator can't be null");
groupPatterns.put((KGroupedStreamImpl<K, ?>) groupedStream,
(Aggregator<? super K, ? super Object, VOut>) aggregator);
return this;
}
@Override
public KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
final Materialized<K, VOut, KeyValueStore<Bytes, byte[]>> materialized) {
return aggregate(initializer, NamedInternal.empty(), materialized);
}
@Override
public KTable<K, VOut> aggregate(final Initializer<VOut> initializer, final Named named) {
return aggregate(initializer, named, Materialized.with(keySerde, null));
}
@Override
public KTable<K, VOut> aggregate(final Initializer<VOut> initializer,
final Named named,
final Materialized<K, VOut, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(initializer, "initializer can't be null");
Objects.requireNonNull(named, "named can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
return doAggregate(
initializer,
new NamedInternal(named),
new MaterializedInternal<>(materialized, builder, AGGREGATE_NAME));
}
@Override
public KTable<K, VOut> aggregate(final Initializer<VOut> initializer) {
return aggregate(initializer, Materialized.with(keySerde, null));
}
@Override
public <W extends Window> TimeWindowedCogroupedKStream<K, VOut> windowedBy(final Windows<W> windows) {
Objects.requireNonNull(windows, "windows can't be null");
return new TimeWindowedCogroupedKStreamImpl<>(
windows,
builder,
subTopologySourceNodes,
name,
aggregateBuilder,
streamsGraphNode,
groupPatterns);
}
@Override
public SessionWindowedCogroupedKStream<K, VOut> windowedBy(final SessionWindows sessionWindows) {
Objects.requireNonNull(sessionWindows, "sessionWindows can't be null");
return new SessionWindowedCogroupedKStreamImpl<>(sessionWindows,
builder,
subTopologySourceNodes,
name,
aggregateBuilder,
streamsGraphNode,
groupPatterns);
}
private KTable<K, VOut> doAggregate(final Initializer<VOut> initializer,
final NamedInternal named,
final MaterializedInternal<K, VOut, KeyValueStore<Bytes, byte[]>> materializedInternal) {
return aggregateBuilder.build(
groupPatterns,
initializer,
named,
new TimestampedKeyValueStoreMaterializer<>(materializedInternal).materialize(),
materializedInternal.keySerde(),
materializedInternal.valueSerde(),
materializedInternal.queryableStoreName(),
null,
null,
null);
}
}

View File

@@ -0,0 +1,186 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import static org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode.optimizableRepartitionNodeBuilder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.Aggregator;
import org.apache.kafka.streams.kstream.Initializer;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Merger;
import org.apache.kafka.streams.kstream.SessionWindows;
import org.apache.kafka.streams.kstream.Window;
import org.apache.kafka.streams.kstream.Windows;
import org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode.OptimizableRepartitionNodeBuilder;
import org.apache.kafka.streams.kstream.internals.graph.ProcessorGraphNode;
import org.apache.kafka.streams.kstream.internals.graph.ProcessorParameters;
import org.apache.kafka.streams.kstream.internals.graph.StatefulProcessorNode;
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.state.StoreBuilder;
class CogroupedStreamAggregateBuilder<K, VOut> {
private final InternalStreamsBuilder builder;
private final Map<KGroupedStreamImpl<K, ?>, StreamsGraphNode> parentNodes = new LinkedHashMap<>();
CogroupedStreamAggregateBuilder(final InternalStreamsBuilder builder) {
this.builder = builder;
}
<KR, VIn, W extends Window> KTable<KR, VOut> build(final Map<KGroupedStreamImpl<K, ?>, Aggregator<? super K, ? super Object, VOut>> groupPatterns,
final Initializer<VOut> initializer,
final NamedInternal named,
final StoreBuilder<? extends StateStore> storeBuilder,
final Serde<KR> keySerde,
final Serde<VOut> valSerde,
final String queryableName,
final Windows<W> windows,
final SessionWindows sessionWindows,
final Merger<? super K, VOut> sessionMerger) {
for (final KGroupedStreamImpl<K, ?> repartitionReqs : groupPatterns.keySet()) {
if (repartitionReqs.repartitionRequired) {
final OptimizableRepartitionNodeBuilder<K, ?> repartitionNodeBuilder = optimizableRepartitionNodeBuilder();
final String repartionNamePrefix = repartitionReqs.userProvidedRepartitionTopicName != null ?
repartitionReqs.userProvidedRepartitionTopicName : storeBuilder.name();
createRepartitionSource(repartionNamePrefix, repartitionNodeBuilder, repartitionReqs.keySerde, repartitionReqs.valSerde);
if (!parentNodes.containsKey(repartitionReqs)) {
final StreamsGraphNode repartitionNode = repartitionNodeBuilder.build();
builder.addGraphNode(repartitionReqs.streamsGraphNode, repartitionNode);
parentNodes.put(repartitionReqs, repartitionNode);
}
} else {
parentNodes.put(repartitionReqs, repartitionReqs.streamsGraphNode);
}
}
final Collection<? extends AbstractStream<K, ?>> groupedStreams = new ArrayList<>(parentNodes.keySet());
final AbstractStream<K, ?> kGrouped = groupedStreams.iterator().next();
groupedStreams.remove(kGrouped);
kGrouped.ensureCopartitionWith(groupedStreams);
final Collection<StreamsGraphNode> processors = new ArrayList<>();
boolean stateCreated = false;
int counter = 0;
for (final Entry<KGroupedStreamImpl<K, ?>, Aggregator<? super K, ? super Object, VOut>> kGroupedStream : groupPatterns.entrySet()) {
final StatefulProcessorNode statefulProcessorNode = getStatefulProcessorNode(
kGroupedStream.getValue(),
initializer,
named.suffixWithOrElseGet(
"-cogroup-agg-" + counter++,
builder,
CogroupedKStreamImpl.AGGREGATE_NAME),
stateCreated,
storeBuilder,
windows,
sessionWindows,
sessionMerger);
stateCreated = true;
processors.add(statefulProcessorNode);
builder.addGraphNode(parentNodes.get(kGroupedStream.getKey()), statefulProcessorNode);
}
final String mergeProcessorName = named.suffixWithOrElseGet(
"-cogroup-merge",
builder,
CogroupedKStreamImpl.MERGE_NAME);
final ProcessorSupplier<K, VOut> passThrough = new PassThrough<>();
final ProcessorGraphNode<K, VOut> mergeNode =
new ProcessorGraphNode<>(mergeProcessorName, new ProcessorParameters<>(passThrough, mergeProcessorName));
builder.addGraphNode(processors, mergeNode);
return new KTableImpl<KR, VIn, VOut>(
mergeProcessorName,
keySerde,
valSerde,
Collections.singleton(mergeNode.nodeName()),
queryableName,
passThrough,
mergeNode,
builder);
}
private <W extends Window> StatefulProcessorNode getStatefulProcessorNode(final Aggregator<? super K, ? super Object, VOut> aggregator,
final Initializer<VOut> initializer,
final String processorName,
final boolean stateCreated,
final StoreBuilder<? extends StateStore> storeBuilder,
final Windows<W> windows,
final SessionWindows sessionWindows,
final Merger<? super K, VOut> sessionMerger) {
final ProcessorSupplier<K, ?> kStreamAggregate;
if (windows == null && sessionWindows == null) {
kStreamAggregate = new KStreamAggregate<>(storeBuilder.name(), initializer, aggregator);
} else if (windows != null && sessionWindows == null) {
kStreamAggregate = new KStreamWindowAggregate<>(windows, storeBuilder.name(), initializer, aggregator);
} else if (windows == null && sessionMerger != null) {
kStreamAggregate = new KStreamSessionWindowAggregate<>(sessionWindows, storeBuilder.name(), initializer, aggregator, sessionMerger);
} else {
throw new IllegalArgumentException("must include windows OR sessionWindows + sessionMerger OR all must be null");
}
final StatefulProcessorNode<K, ?> statefulProcessorNode;
if (!stateCreated) {
statefulProcessorNode =
new StatefulProcessorNode<>(
processorName,
new ProcessorParameters<>(kStreamAggregate, processorName),
storeBuilder
);
} else {
statefulProcessorNode =
new StatefulProcessorNode<>(
processorName,
new ProcessorParameters<>(kStreamAggregate, processorName),
new String[]{storeBuilder.name()}
);
}
return statefulProcessorNode;
}
/**
* @return the new sourceName of the repartitioned source
*/
@SuppressWarnings("unchecked")
private <VIn> String createRepartitionSource(final String repartitionTopicNamePrefix,
final OptimizableRepartitionNodeBuilder<K, ?> optimizableRepartitionNodeBuilder,
final Serde<K> keySerde,
final Serde<?> valueSerde) {
return KStreamImpl.createRepartitionedSource(builder,
keySerde,
(Serde<VIn>) valueSerde,
repartitionTopicNamePrefix,
(OptimizableRepartitionNodeBuilder<K, VIn>) optimizableRepartitionNodeBuilder);
}
}

View File

@@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.processor.TimestampExtractor;
public class ConsumedInternal<K, V> extends Consumed<K, V> {
public ConsumedInternal(final Consumed<K, V> consumed) {
super(consumed);
}
public ConsumedInternal(final Serde<K> keySerde,
final Serde<V> valSerde,
final TimestampExtractor timestampExtractor,
final Topology.AutoOffsetReset offsetReset) {
this(Consumed.with(keySerde, valSerde, timestampExtractor, offsetReset));
}
public ConsumedInternal() {
this(Consumed.<K, V>with(null, null));
}
public Serde<K> keySerde() {
return keySerde;
}
public Deserializer<K> keyDeserializer() {
return keySerde == null ? null : keySerde.deserializer();
}
public Serde<V> valueSerde() {
return valueSerde;
}
public Deserializer<V> valueDeserializer() {
return valueSerde == null ? null : valueSerde.deserializer();
}
public TimestampExtractor timestampExtractor() {
return timestampExtractor;
}
public Topology.AutoOffsetReset offsetResetPolicy() {
return resetPolicy;
}
public String name() {
return processorName;
}
}

View File

@@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Deserializer;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serializer;
import java.nio.ByteBuffer;
import static java.util.Objects.requireNonNull;
import static org.apache.kafka.common.utils.Utils.getNullableSizePrefixedArray;
public final class FullChangeSerde<T> {
private final Serde<T> inner;
public static <T> FullChangeSerde<T> wrap(final Serde<T> serde) {
if (serde == null) {
return null;
} else {
return new FullChangeSerde<>(serde);
}
}
private FullChangeSerde(final Serde<T> inner) {
this.inner = requireNonNull(inner);
}
public Serde<T> innerSerde() {
return inner;
}
public Change<byte[]> serializeParts(final String topic, final Change<T> data) {
if (data == null) {
return null;
}
final Serializer<T> innerSerializer = innerSerde().serializer();
final byte[] oldBytes = data.oldValue == null ? null : innerSerializer.serialize(topic, data.oldValue);
final byte[] newBytes = data.newValue == null ? null : innerSerializer.serialize(topic, data.newValue);
return new Change<>(newBytes, oldBytes);
}
public Change<T> deserializeParts(final String topic, final Change<byte[]> serialChange) {
if (serialChange == null) {
return null;
}
final Deserializer<T> innerDeserializer = innerSerde().deserializer();
final T oldValue =
serialChange.oldValue == null ? null : innerDeserializer.deserialize(topic, serialChange.oldValue);
final T newValue =
serialChange.newValue == null ? null : innerDeserializer.deserialize(topic, serialChange.newValue);
return new Change<>(newValue, oldValue);
}
/**
* We used to serialize a Change into a single byte[]. Now, we don't anymore, but we still
* need to be able to read it (so that we can load the state store from previously-written changelog records).
*/
public static Change<byte[]> decomposeLegacyFormattedArrayIntoChangeArrays(final byte[] data) {
if (data == null) {
return null;
}
final ByteBuffer buffer = ByteBuffer.wrap(data);
final byte[] oldBytes = getNullableSizePrefixedArray(buffer);
final byte[] newBytes = getNullableSizePrefixedArray(buffer);
return new Change<>(newBytes, oldBytes);
}
}

View File

@@ -0,0 +1,32 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.kstream.TimeWindowedDeserializer;
import org.apache.kafka.streams.kstream.TimeWindowedSerializer;
import org.apache.kafka.streams.kstream.Windowed;
class FullTimeWindowedSerde<T> extends Serdes.WrapperSerde<Windowed<T>> {
FullTimeWindowedSerde(final Serde<T> inner, final long windowSize) {
super(
new TimeWindowedSerializer<>(inner.serializer()),
new TimeWindowedDeserializer<>(inner.deserializer(), windowSize)
);
}
}

View File

@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.streams.kstream.GlobalKTable;
public class GlobalKTableImpl<K, V> implements GlobalKTable<K, V> {
private final KTableValueGetterSupplier<K, V> valueGetterSupplier;
private final String queryableStoreName;
GlobalKTableImpl(final KTableValueGetterSupplier<K, V> valueGetterSupplier,
final String queryableStoreName) {
this.valueGetterSupplier = valueGetterSupplier;
this.queryableStoreName = queryableStoreName;
}
KTableValueGetterSupplier<K, V> valueGetterSupplier() {
return valueGetterSupplier;
}
@Override
public String queryableStoreName() {
return queryableStoreName;
}
}

View File

@@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.Grouped;
public class GroupedInternal<K, V> extends Grouped<K, V> {
public GroupedInternal(final Grouped<K, V> grouped) {
super(grouped);
}
public Serde<K> keySerde() {
return keySerde;
}
public Serde<V> valueSerde() {
return valueSerde;
}
public String name() {
return name;
}
}

View File

@@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.Aggregator;
import org.apache.kafka.streams.kstream.Initializer;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.internals.graph.ProcessorParameters;
import org.apache.kafka.streams.kstream.internals.graph.StatefulProcessorNode;
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.state.StoreBuilder;
import java.util.Collections;
import java.util.Set;
import static org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode.OptimizableRepartitionNodeBuilder;
import static org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode.optimizableRepartitionNodeBuilder;
class GroupedStreamAggregateBuilder<K, V> {
private final InternalStreamsBuilder builder;
private final Serde<K> keySerde;
private final Serde<V> valueSerde;
private final boolean repartitionRequired;
private final String userProvidedRepartitionTopicName;
private final Set<String> subTopologySourceNodes;
private final String name;
private final StreamsGraphNode streamsGraphNode;
private StreamsGraphNode repartitionNode;
final Initializer<Long> countInitializer = () -> 0L;
final Aggregator<K, V, Long> countAggregator = (aggKey, value, aggregate) -> aggregate + 1;
final Initializer<V> reduceInitializer = () -> null;
GroupedStreamAggregateBuilder(final InternalStreamsBuilder builder,
final GroupedInternal<K, V> groupedInternal,
final boolean repartitionRequired,
final Set<String> subTopologySourceNodes,
final String name,
final StreamsGraphNode streamsGraphNode) {
this.builder = builder;
this.keySerde = groupedInternal.keySerde();
this.valueSerde = groupedInternal.valueSerde();
this.repartitionRequired = repartitionRequired;
this.subTopologySourceNodes = subTopologySourceNodes;
this.name = name;
this.streamsGraphNode = streamsGraphNode;
this.userProvidedRepartitionTopicName = groupedInternal.name();
}
<KR, VR> KTable<KR, VR> build(final NamedInternal functionName,
final StoreBuilder<? extends StateStore> storeBuilder,
final KStreamAggProcessorSupplier<K, KR, V, VR> aggregateSupplier,
final String queryableStoreName,
final Serde<KR> keySerde,
final Serde<VR> valSerde) {
assert queryableStoreName == null || queryableStoreName.equals(storeBuilder.name());
final String aggFunctionName = functionName.name();
String sourceName = this.name;
StreamsGraphNode parentNode = streamsGraphNode;
if (repartitionRequired) {
final OptimizableRepartitionNodeBuilder<K, V> repartitionNodeBuilder = optimizableRepartitionNodeBuilder();
final String repartitionTopicPrefix = userProvidedRepartitionTopicName != null ? userProvidedRepartitionTopicName : storeBuilder.name();
sourceName = createRepartitionSource(repartitionTopicPrefix, repartitionNodeBuilder);
// First time through we need to create a repartition node.
// Any subsequent calls to GroupedStreamAggregateBuilder#build we check if
// the user has provided a name for the repartition topic, is so we re-use
// the existing repartition node, otherwise we create a new one.
if (repartitionNode == null || userProvidedRepartitionTopicName == null) {
repartitionNode = repartitionNodeBuilder.build();
}
builder.addGraphNode(parentNode, repartitionNode);
parentNode = repartitionNode;
}
final StatefulProcessorNode<K, V> statefulProcessorNode =
new StatefulProcessorNode<>(
aggFunctionName,
new ProcessorParameters<>(aggregateSupplier, aggFunctionName),
storeBuilder
);
builder.addGraphNode(parentNode, statefulProcessorNode);
return new KTableImpl<>(aggFunctionName,
keySerde,
valSerde,
sourceName.equals(this.name) ? subTopologySourceNodes : Collections.singleton(sourceName),
queryableStoreName,
aggregateSupplier,
statefulProcessorNode,
builder);
}
/**
* @return the new sourceName of the repartitioned source
*/
private String createRepartitionSource(final String repartitionTopicNamePrefix,
final OptimizableRepartitionNodeBuilder<K, V> optimizableRepartitionNodeBuilder) {
return KStreamImpl.createRepartitionedSource(builder,
keySerde,
valueSerde,
repartitionTopicNamePrefix,
optimizableRepartitionNodeBuilder);
}
}

View File

@@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
public interface InternalNameProvider {
String newProcessorName(final String prefix);
String newStoreName(final String prefix);
}

View File

@@ -0,0 +1,506 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.errors.StreamsException;
import org.apache.kafka.streams.kstream.GlobalKTable;
import org.apache.kafka.streams.kstream.Grouped;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.internals.graph.GlobalStoreNode;
import org.apache.kafka.streams.kstream.internals.graph.OptimizableRepartitionNode;
import org.apache.kafka.streams.kstream.internals.graph.ProcessorParameters;
import org.apache.kafka.streams.kstream.internals.graph.StateStoreNode;
import org.apache.kafka.streams.kstream.internals.graph.StreamSourceNode;
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
import org.apache.kafka.streams.kstream.internals.graph.TableSourceNode;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.internals.InternalTopologyBuilder;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.StoreBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class InternalStreamsBuilder implements InternalNameProvider {
private static final String TABLE_SOURCE_SUFFIX = "-source";
final InternalTopologyBuilder internalTopologyBuilder;
private final AtomicInteger index = new AtomicInteger(0);
private final AtomicInteger buildPriorityIndex = new AtomicInteger(0);
private final LinkedHashMap<StreamsGraphNode, LinkedHashSet<OptimizableRepartitionNode<?, ?>>> keyChangingOperationsToOptimizableRepartitionNodes = new LinkedHashMap<>();
private final LinkedHashSet<StreamsGraphNode> mergeNodes = new LinkedHashSet<>();
private final LinkedHashSet<StreamsGraphNode> tableSourceNodes = new LinkedHashSet<>();
private static final String TOPOLOGY_ROOT = "root";
private static final Logger LOG = LoggerFactory.getLogger(InternalStreamsBuilder.class);
protected final StreamsGraphNode root = new StreamsGraphNode(TOPOLOGY_ROOT) {
@Override
public void writeToTopology(final InternalTopologyBuilder topologyBuilder) {
// no-op for root node
}
};
public InternalStreamsBuilder(final InternalTopologyBuilder internalTopologyBuilder) {
this.internalTopologyBuilder = internalTopologyBuilder;
}
public <K, V> KStream<K, V> stream(final Collection<String> topics,
final ConsumedInternal<K, V> consumed) {
final String name = new NamedInternal(consumed.name()).orElseGenerateWithPrefix(this, KStreamImpl.SOURCE_NAME);
final StreamSourceNode<K, V> streamSourceNode = new StreamSourceNode<>(name, topics, consumed);
addGraphNode(root, streamSourceNode);
return new KStreamImpl<>(name,
consumed.keySerde(),
consumed.valueSerde(),
Collections.singleton(name),
false,
streamSourceNode,
this);
}
public <K, V> KStream<K, V> stream(final Pattern topicPattern,
final ConsumedInternal<K, V> consumed) {
final String name = newProcessorName(KStreamImpl.SOURCE_NAME);
final StreamSourceNode<K, V> streamPatternSourceNode = new StreamSourceNode<>(name, topicPattern, consumed);
addGraphNode(root, streamPatternSourceNode);
return new KStreamImpl<>(name,
consumed.keySerde(),
consumed.valueSerde(),
Collections.singleton(name),
false,
streamPatternSourceNode,
this);
}
public <K, V> KTable<K, V> table(final String topic,
final ConsumedInternal<K, V> consumed,
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
final NamedInternal named = new NamedInternal(consumed.name());
final String sourceName = named
.suffixWithOrElseGet(TABLE_SOURCE_SUFFIX, this, KStreamImpl.SOURCE_NAME);
final String tableSourceName = named
.orElseGenerateWithPrefix(this, KTableImpl.SOURCE_NAME);
final KTableSource<K, V> tableSource = new KTableSource<>(materialized.storeName(), materialized.queryableStoreName());
final ProcessorParameters<K, V> processorParameters = new ProcessorParameters<>(tableSource, tableSourceName);
final TableSourceNode<K, V> tableSourceNode = TableSourceNode.<K, V>tableSourceNodeBuilder()
.withTopic(topic)
.withSourceName(sourceName)
.withNodeName(tableSourceName)
.withConsumedInternal(consumed)
.withMaterializedInternal(materialized)
.withProcessorParameters(processorParameters)
.build();
addGraphNode(root, tableSourceNode);
return new KTableImpl<>(tableSourceName,
consumed.keySerde(),
consumed.valueSerde(),
Collections.singleton(sourceName),
materialized.queryableStoreName(),
tableSource,
tableSourceNode,
this);
}
public <K, V> GlobalKTable<K, V> globalTable(final String topic,
final ConsumedInternal<K, V> consumed,
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(consumed, "consumed can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
// explicitly disable logging for global stores
materialized.withLoggingDisabled();
final NamedInternal named = new NamedInternal(consumed.name());
final String sourceName = named
.suffixWithOrElseGet(TABLE_SOURCE_SUFFIX, this, KStreamImpl.SOURCE_NAME);
final String processorName = named
.orElseGenerateWithPrefix(this, KTableImpl.SOURCE_NAME);
// enforce store name as queryable name to always materialize global table stores
final String storeName = materialized.storeName();
final KTableSource<K, V> tableSource = new KTableSource<>(storeName, storeName);
final ProcessorParameters<K, V> processorParameters = new ProcessorParameters<>(tableSource, processorName);
final TableSourceNode<K, V> tableSourceNode = TableSourceNode.<K, V>tableSourceNodeBuilder()
.withTopic(topic)
.isGlobalKTable(true)
.withSourceName(sourceName)
.withConsumedInternal(consumed)
.withMaterializedInternal(materialized)
.withProcessorParameters(processorParameters)
.build();
addGraphNode(root, tableSourceNode);
return new GlobalKTableImpl<>(new KTableSourceValueGetterSupplier<>(storeName), materialized.queryableStoreName());
}
@Override
public String newProcessorName(final String prefix) {
return prefix + String.format("%010d", index.getAndIncrement());
}
@Override
public String newStoreName(final String prefix) {
return prefix + String.format(KTableImpl.STATE_STORE_NAME + "%010d", index.getAndIncrement());
}
public synchronized void addStateStore(final StoreBuilder builder) {
addGraphNode(root, new StateStoreNode(builder));
}
public synchronized void addGlobalStore(final StoreBuilder<KeyValueStore> storeBuilder,
final String sourceName,
final String topic,
final ConsumedInternal consumed,
final String processorName,
final ProcessorSupplier stateUpdateSupplier) {
final StreamsGraphNode globalStoreNode = new GlobalStoreNode(storeBuilder,
sourceName,
topic,
consumed,
processorName,
stateUpdateSupplier);
addGraphNode(root, globalStoreNode);
}
public synchronized void addGlobalStore(final StoreBuilder<KeyValueStore> storeBuilder,
final String topic,
final ConsumedInternal consumed,
final ProcessorSupplier stateUpdateSupplier) {
// explicitly disable logging for global stores
storeBuilder.withLoggingDisabled();
final String sourceName = newProcessorName(KStreamImpl.SOURCE_NAME);
final String processorName = newProcessorName(KTableImpl.SOURCE_NAME);
addGlobalStore(storeBuilder,
sourceName,
topic,
consumed,
processorName,
stateUpdateSupplier);
}
void addGraphNode(final StreamsGraphNode parent,
final StreamsGraphNode child) {
Objects.requireNonNull(parent, "parent node can't be null");
Objects.requireNonNull(child, "child node can't be null");
parent.addChild(child);
maybeAddNodeForOptimizationMetadata(child);
}
void addGraphNode(final Collection<StreamsGraphNode> parents,
final StreamsGraphNode child) {
Objects.requireNonNull(parents, "parent node can't be null");
Objects.requireNonNull(child, "child node can't be null");
if (parents.isEmpty()) {
throw new StreamsException("Parent node collection can't be empty");
}
for (final StreamsGraphNode parent : parents) {
addGraphNode(parent, child);
}
}
private void maybeAddNodeForOptimizationMetadata(final StreamsGraphNode node) {
node.setBuildPriority(buildPriorityIndex.getAndIncrement());
if (node.parentNodes().isEmpty() && !node.nodeName().equals(TOPOLOGY_ROOT)) {
throw new IllegalStateException(
"Nodes should not have a null parent node. Name: " + node.nodeName() + " Type: "
+ node.getClass().getSimpleName());
}
if (node.isKeyChangingOperation()) {
keyChangingOperationsToOptimizableRepartitionNodes.put(node, new LinkedHashSet<>());
} else if (node instanceof OptimizableRepartitionNode) {
final StreamsGraphNode parentNode = getKeyChangingParentNode(node);
if (parentNode != null) {
keyChangingOperationsToOptimizableRepartitionNodes.get(parentNode).add((OptimizableRepartitionNode) node);
}
} else if (node.isMergeNode()) {
mergeNodes.add(node);
} else if (node instanceof TableSourceNode) {
tableSourceNodes.add(node);
}
}
// use this method for testing only
public void buildAndOptimizeTopology() {
buildAndOptimizeTopology(null);
}
public void buildAndOptimizeTopology(final Properties props) {
maybePerformOptimizations(props);
final PriorityQueue<StreamsGraphNode> graphNodePriorityQueue = new PriorityQueue<>(5, Comparator.comparing(StreamsGraphNode::buildPriority));
graphNodePriorityQueue.offer(root);
while (!graphNodePriorityQueue.isEmpty()) {
final StreamsGraphNode streamGraphNode = graphNodePriorityQueue.remove();
if (LOG.isDebugEnabled()) {
LOG.debug("Adding nodes to topology {} child nodes {}", streamGraphNode, streamGraphNode.children());
}
if (streamGraphNode.allParentsWrittenToTopology() && !streamGraphNode.hasWrittenToTopology()) {
streamGraphNode.writeToTopology(internalTopologyBuilder);
streamGraphNode.setHasWrittenToTopology(true);
}
for (final StreamsGraphNode graphNode : streamGraphNode.children()) {
graphNodePriorityQueue.offer(graphNode);
}
}
}
private void maybePerformOptimizations(final Properties props) {
if (props != null && StreamsConfig.OPTIMIZE.equals(props.getProperty(StreamsConfig.TOPOLOGY_OPTIMIZATION))) {
LOG.debug("Optimizing the Kafka Streams graph for repartition nodes");
optimizeKTableSourceTopics();
maybeOptimizeRepartitionOperations();
}
}
private void optimizeKTableSourceTopics() {
LOG.debug("Marking KTable source nodes to optimize using source topic for changelogs ");
tableSourceNodes.forEach(node -> ((TableSourceNode) node).reuseSourceTopicForChangeLog(true));
}
@SuppressWarnings("unchecked")
private void maybeOptimizeRepartitionOperations() {
maybeUpdateKeyChangingRepartitionNodeMap();
final Iterator<Entry<StreamsGraphNode, LinkedHashSet<OptimizableRepartitionNode<?, ?>>>> entryIterator = keyChangingOperationsToOptimizableRepartitionNodes.entrySet().iterator();
while (entryIterator.hasNext()) {
final Map.Entry<StreamsGraphNode, LinkedHashSet<OptimizableRepartitionNode<?, ?>>> entry = entryIterator.next();
final StreamsGraphNode keyChangingNode = entry.getKey();
if (entry.getValue().isEmpty()) {
continue;
}
final GroupedInternal groupedInternal = new GroupedInternal(getRepartitionSerdes(entry.getValue()));
final String repartitionTopicName = getFirstRepartitionTopicName(entry.getValue());
//passing in the name of the first repartition topic, re-used to create the optimized repartition topic
final StreamsGraphNode optimizedSingleRepartition = createRepartitionNode(repartitionTopicName,
groupedInternal.keySerde(),
groupedInternal.valueSerde());
// re-use parent buildPriority to make sure the single repartition graph node is evaluated before downstream nodes
optimizedSingleRepartition.setBuildPriority(keyChangingNode.buildPriority());
for (final OptimizableRepartitionNode repartitionNodeToBeReplaced : entry.getValue()) {
final StreamsGraphNode keyChangingNodeChild = findParentNodeMatching(repartitionNodeToBeReplaced, gn -> gn.parentNodes().contains(keyChangingNode));
if (keyChangingNodeChild == null) {
throw new StreamsException(String.format("Found a null keyChangingChild node for %s", repartitionNodeToBeReplaced));
}
LOG.debug("Found the child node of the key changer {} from the repartition {}.", keyChangingNodeChild, repartitionNodeToBeReplaced);
// need to add children of key-changing node as children of optimized repartition
// in order to process records from re-partitioning
optimizedSingleRepartition.addChild(keyChangingNodeChild);
LOG.debug("Removing {} from {} children {}", keyChangingNodeChild, keyChangingNode, keyChangingNode.children());
// now remove children from key-changing node
keyChangingNode.removeChild(keyChangingNodeChild);
// now need to get children of repartition node so we can remove repartition node
final Collection<StreamsGraphNode> repartitionNodeToBeReplacedChildren = repartitionNodeToBeReplaced.children();
final Collection<StreamsGraphNode> parentsOfRepartitionNodeToBeReplaced = repartitionNodeToBeReplaced.parentNodes();
for (final StreamsGraphNode repartitionNodeToBeReplacedChild : repartitionNodeToBeReplacedChildren) {
for (final StreamsGraphNode parentNode : parentsOfRepartitionNodeToBeReplaced) {
parentNode.addChild(repartitionNodeToBeReplacedChild);
}
}
for (final StreamsGraphNode parentNode : parentsOfRepartitionNodeToBeReplaced) {
parentNode.removeChild(repartitionNodeToBeReplaced);
}
repartitionNodeToBeReplaced.clearChildren();
LOG.debug("Updated node {} children {}", optimizedSingleRepartition, optimizedSingleRepartition.children());
}
keyChangingNode.addChild(optimizedSingleRepartition);
entryIterator.remove();
}
}
private void maybeUpdateKeyChangingRepartitionNodeMap() {
final Map<StreamsGraphNode, Set<StreamsGraphNode>> mergeNodesToKeyChangers = new HashMap<>();
final Set<StreamsGraphNode> mergeNodeKeyChangingParentsToRemove = new HashSet<>();
for (final StreamsGraphNode mergeNode : mergeNodes) {
mergeNodesToKeyChangers.put(mergeNode, new LinkedHashSet<>());
final Set<Map.Entry<StreamsGraphNode, LinkedHashSet<OptimizableRepartitionNode<?, ?>>>> entrySet = keyChangingOperationsToOptimizableRepartitionNodes.entrySet();
for (final Map.Entry<StreamsGraphNode, LinkedHashSet<OptimizableRepartitionNode<?, ?>>> entry : entrySet) {
if (mergeNodeHasRepartitionChildren(mergeNode, entry.getValue())) {
final StreamsGraphNode maybeParentKey = findParentNodeMatching(mergeNode, node -> node.parentNodes().contains(entry.getKey()));
if (maybeParentKey != null) {
mergeNodesToKeyChangers.get(mergeNode).add(entry.getKey());
}
}
}
}
for (final Map.Entry<StreamsGraphNode, Set<StreamsGraphNode>> entry : mergeNodesToKeyChangers.entrySet()) {
final StreamsGraphNode mergeKey = entry.getKey();
final Collection<StreamsGraphNode> keyChangingParents = entry.getValue();
final LinkedHashSet<OptimizableRepartitionNode<?, ?>> repartitionNodes = new LinkedHashSet<>();
for (final StreamsGraphNode keyChangingParent : keyChangingParents) {
repartitionNodes.addAll(keyChangingOperationsToOptimizableRepartitionNodes.get(keyChangingParent));
mergeNodeKeyChangingParentsToRemove.add(keyChangingParent);
}
keyChangingOperationsToOptimizableRepartitionNodes.put(mergeKey, repartitionNodes);
}
for (final StreamsGraphNode mergeNodeKeyChangingParent : mergeNodeKeyChangingParentsToRemove) {
keyChangingOperationsToOptimizableRepartitionNodes.remove(mergeNodeKeyChangingParent);
}
}
private boolean mergeNodeHasRepartitionChildren(final StreamsGraphNode mergeNode,
final LinkedHashSet<OptimizableRepartitionNode<?, ?>> repartitionNodes) {
return repartitionNodes.stream().allMatch(n -> findParentNodeMatching(n, gn -> gn.parentNodes().contains(mergeNode)) != null);
}
private <K, V> OptimizableRepartitionNode<K, V> createRepartitionNode(final String repartitionTopicName,
final Serde<K> keySerde,
final Serde<V> valueSerde) {
final OptimizableRepartitionNode.OptimizableRepartitionNodeBuilder<K, V> repartitionNodeBuilder = OptimizableRepartitionNode.optimizableRepartitionNodeBuilder();
KStreamImpl.createRepartitionedSource(this,
keySerde,
valueSerde,
repartitionTopicName,
repartitionNodeBuilder);
// ensures setting the repartition topic to the name of the
// first repartition topic to get merged
// this may be an auto-generated name or a user specified name
repartitionNodeBuilder.withRepartitionTopic(repartitionTopicName);
return repartitionNodeBuilder.build();
}
private StreamsGraphNode getKeyChangingParentNode(final StreamsGraphNode repartitionNode) {
final StreamsGraphNode shouldBeKeyChangingNode = findParentNodeMatching(repartitionNode, n -> n.isKeyChangingOperation() || n.isValueChangingOperation());
final StreamsGraphNode keyChangingNode = findParentNodeMatching(repartitionNode, StreamsGraphNode::isKeyChangingOperation);
if (shouldBeKeyChangingNode != null && shouldBeKeyChangingNode.equals(keyChangingNode)) {
return keyChangingNode;
}
return null;
}
private String getFirstRepartitionTopicName(final Collection<OptimizableRepartitionNode<?, ?>> repartitionNodes) {
return repartitionNodes.iterator().next().repartitionTopic();
}
@SuppressWarnings("unchecked")
private GroupedInternal getRepartitionSerdes(final Collection<OptimizableRepartitionNode<?, ?>> repartitionNodes) {
Serde keySerde = null;
Serde valueSerde = null;
for (final OptimizableRepartitionNode<?, ?> repartitionNode : repartitionNodes) {
if (keySerde == null && repartitionNode.keySerde() != null) {
keySerde = repartitionNode.keySerde();
}
if (valueSerde == null && repartitionNode.valueSerde() != null) {
valueSerde = repartitionNode.valueSerde();
}
if (keySerde != null && valueSerde != null) {
break;
}
}
return new GroupedInternal(Grouped.with(keySerde, valueSerde));
}
private StreamsGraphNode findParentNodeMatching(final StreamsGraphNode startSeekingNode,
final Predicate<StreamsGraphNode> parentNodePredicate) {
if (parentNodePredicate.test(startSeekingNode)) {
return startSeekingNode;
}
StreamsGraphNode foundParentNode = null;
for (final StreamsGraphNode parentNode : startSeekingNode.parentNodes()) {
if (parentNodePredicate.test(parentNode)) {
return parentNode;
}
foundParentNode = findParentNodeMatching(parentNode, parentNodePredicate);
}
return foundParentNode;
}
public StreamsGraphNode root() {
return root;
}
}

View File

@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.streams.kstream.Joined;
public class JoinedInternal<K, V, VO> extends Joined<K, V, VO> {
JoinedInternal(final Joined<K, V, VO> joined) {
super(joined);
}
public Serde<K> keySerde() {
return keySerde;
}
public Serde<V> valueSerde() {
return valueSerde;
}
public Serde<VO> otherValueSerde() {
return otherValueSerde;
}
@Override // TODO remove annotation when super.name() is removed
@SuppressWarnings("deprecation") // this method should not be removed if super.name() is removed
public String name() {
return name;
}
}

View File

@@ -0,0 +1,238 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.kstream.Aggregator;
import org.apache.kafka.streams.kstream.CogroupedKStream;
import org.apache.kafka.streams.kstream.Initializer;
import org.apache.kafka.streams.kstream.KGroupedStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Named;
import org.apache.kafka.streams.kstream.Reducer;
import org.apache.kafka.streams.kstream.SessionWindowedKStream;
import org.apache.kafka.streams.kstream.SessionWindows;
import org.apache.kafka.streams.kstream.TimeWindowedKStream;
import org.apache.kafka.streams.kstream.Window;
import org.apache.kafka.streams.kstream.Windows;
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
import org.apache.kafka.streams.state.KeyValueStore;
import java.util.Objects;
import java.util.Set;
class KGroupedStreamImpl<K, V> extends AbstractStream<K, V> implements KGroupedStream<K, V> {
static final String REDUCE_NAME = "KSTREAM-REDUCE-";
static final String AGGREGATE_NAME = "KSTREAM-AGGREGATE-";
private final GroupedStreamAggregateBuilder<K, V> aggregateBuilder;
final boolean repartitionRequired;
final String userProvidedRepartitionTopicName;
KGroupedStreamImpl(final String name,
final Set<String> subTopologySourceNodes,
final GroupedInternal<K, V> groupedInternal,
final boolean repartitionRequired,
final StreamsGraphNode streamsGraphNode,
final InternalStreamsBuilder builder) {
super(name, groupedInternal.keySerde(), groupedInternal.valueSerde(), subTopologySourceNodes, streamsGraphNode, builder);
this.repartitionRequired = repartitionRequired;
this.userProvidedRepartitionTopicName = groupedInternal.name();
this.aggregateBuilder = new GroupedStreamAggregateBuilder<>(
builder,
groupedInternal,
repartitionRequired,
subTopologySourceNodes,
name,
streamsGraphNode
);
}
@Override
public KTable<K, V> reduce(final Reducer<V> reducer) {
return reduce(reducer, Materialized.with(keySerde, valSerde));
}
@Override
public KTable<K, V> reduce(final Reducer<V> reducer,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
return reduce(reducer, NamedInternal.empty(), materialized);
}
@Override
public KTable<K, V> reduce(final Reducer<V> reducer,
final Named named,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(reducer, "reducer can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
Objects.requireNonNull(named, "name can't be null");
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, builder, REDUCE_NAME);
if (materializedInternal.keySerde() == null) {
materializedInternal.withKeySerde(keySerde);
}
if (materializedInternal.valueSerde() == null) {
materializedInternal.withValueSerde(valSerde);
}
final String name = new NamedInternal(named).orElseGenerateWithPrefix(builder, REDUCE_NAME);
return doAggregate(
new KStreamReduce<>(materializedInternal.storeName(), reducer),
name,
materializedInternal
);
}
@Override
public <VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized) {
return aggregate(initializer, aggregator, NamedInternal.empty(), materialized);
}
@Override
public <VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator,
final Named named,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(initializer, "initializer can't be null");
Objects.requireNonNull(aggregator, "aggregator can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
Objects.requireNonNull(named, "named can't be null");
final MaterializedInternal<K, VR, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, builder, AGGREGATE_NAME);
if (materializedInternal.keySerde() == null) {
materializedInternal.withKeySerde(keySerde);
}
final String name = new NamedInternal(named).orElseGenerateWithPrefix(builder, AGGREGATE_NAME);
return doAggregate(
new KStreamAggregate<>(materializedInternal.storeName(), initializer, aggregator),
name,
materializedInternal
);
}
@Override
public <VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> aggregator) {
return aggregate(initializer, aggregator, Materialized.with(keySerde, null));
}
@Override
public KTable<K, Long> count() {
return doCount(NamedInternal.empty(), Materialized.with(keySerde, Serdes.Long()));
}
@Override
public KTable<K, Long> count(final Named named) {
Objects.requireNonNull(named, "named can't be null");
return doCount(named, Materialized.with(keySerde, Serdes.Long()));
}
@Override
public KTable<K, Long> count(final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized) {
return count(NamedInternal.empty(), materialized);
}
@Override
public KTable<K, Long> count(final Named named, final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(materialized, "materialized can't be null");
// TODO: remove this when we do a topology-incompatible release
// we used to burn a topology name here, so we have to keep doing it for compatibility
if (new MaterializedInternal<>(materialized).storeName() == null) {
builder.newStoreName(AGGREGATE_NAME);
}
return doCount(named, materialized);
}
private KTable<K, Long> doCount(final Named named, final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized) {
final MaterializedInternal<K, Long, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, builder, AGGREGATE_NAME);
if (materializedInternal.keySerde() == null) {
materializedInternal.withKeySerde(keySerde);
}
if (materializedInternal.valueSerde() == null) {
materializedInternal.withValueSerde(Serdes.Long());
}
final String name = new NamedInternal(named).orElseGenerateWithPrefix(builder, AGGREGATE_NAME);
return doAggregate(
new KStreamAggregate<>(materializedInternal.storeName(), aggregateBuilder.countInitializer, aggregateBuilder.countAggregator),
name,
materializedInternal);
}
@Override
public <W extends Window> TimeWindowedKStream<K, V> windowedBy(final Windows<W> windows) {
return new TimeWindowedKStreamImpl<>(
windows,
builder,
subTopologySourceNodes,
name,
keySerde,
valSerde,
aggregateBuilder,
streamsGraphNode
);
}
@Override
public SessionWindowedKStream<K, V> windowedBy(final SessionWindows windows) {
return new SessionWindowedKStreamImpl<>(
windows,
builder,
subTopologySourceNodes,
name,
keySerde,
valSerde,
aggregateBuilder,
streamsGraphNode
);
}
private <T> KTable<K, T> doAggregate(final KStreamAggProcessorSupplier<K, K, V, T> aggregateSupplier,
final String functionName,
final MaterializedInternal<K, T, KeyValueStore<Bytes, byte[]>> materializedInternal) {
return aggregateBuilder.build(
new NamedInternal(functionName),
new TimestampedKeyValueStoreMaterializer<>(materializedInternal).materialize(),
aggregateSupplier,
materializedInternal.queryableStoreName(),
materializedInternal.keySerde(),
materializedInternal.valueSerde());
}
@Override
public <Vout> CogroupedKStream<K, Vout> cogroup(final Aggregator<? super K, ? super V, Vout> aggregator) {
Objects.requireNonNull(aggregator, "aggregator can't be null");
return new CogroupedKStreamImpl<K, Vout>(name, subTopologySourceNodes, streamsGraphNode, builder)
.cogroup(this, aggregator);
}
}

View File

@@ -0,0 +1,247 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.kstream.Aggregator;
import org.apache.kafka.streams.kstream.Initializer;
import org.apache.kafka.streams.kstream.KGroupedTable;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Named;
import org.apache.kafka.streams.kstream.Reducer;
import org.apache.kafka.streams.kstream.internals.graph.GroupedTableOperationRepartitionNode;
import org.apache.kafka.streams.kstream.internals.graph.ProcessorParameters;
import org.apache.kafka.streams.kstream.internals.graph.StatefulProcessorNode;
import org.apache.kafka.streams.kstream.internals.graph.StreamsGraphNode;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.state.KeyValueStore;
import java.util.Collections;
import java.util.Objects;
import java.util.Set;
/**
* The implementation class of {@link KGroupedTable}.
*
* @param <K> the key type
* @param <V> the value type
*/
public class KGroupedTableImpl<K, V> extends AbstractStream<K, V> implements KGroupedTable<K, V> {
private static final String AGGREGATE_NAME = "KTABLE-AGGREGATE-";
private static final String REDUCE_NAME = "KTABLE-REDUCE-";
private final String userProvidedRepartitionTopicName;
private final Initializer<Long> countInitializer = () -> 0L;
private final Aggregator<K, V, Long> countAdder = (aggKey, value, aggregate) -> aggregate + 1L;
private final Aggregator<K, V, Long> countSubtractor = (aggKey, value, aggregate) -> aggregate - 1L;
private StreamsGraphNode repartitionGraphNode;
KGroupedTableImpl(final InternalStreamsBuilder builder,
final String name,
final Set<String> subTopologySourceNodes,
final GroupedInternal<K, V> groupedInternal,
final StreamsGraphNode streamsGraphNode) {
super(name, groupedInternal.keySerde(), groupedInternal.valueSerde(), subTopologySourceNodes, streamsGraphNode, builder);
this.userProvidedRepartitionTopicName = groupedInternal.name();
}
private <T> KTable<K, T> doAggregate(final ProcessorSupplier<K, Change<V>> aggregateSupplier,
final NamedInternal named,
final String functionName,
final MaterializedInternal<K, T, KeyValueStore<Bytes, byte[]>> materialized) {
final String sinkName = named.suffixWithOrElseGet("-sink", builder, KStreamImpl.SINK_NAME);
final String sourceName = named.suffixWithOrElseGet("-source", builder, KStreamImpl.SOURCE_NAME);
final String funcName = named.orElseGenerateWithPrefix(builder, functionName);
final String repartitionTopic = (userProvidedRepartitionTopicName != null ? userProvidedRepartitionTopicName : materialized.storeName())
+ KStreamImpl.REPARTITION_TOPIC_SUFFIX;
if (repartitionGraphNode == null || userProvidedRepartitionTopicName == null) {
repartitionGraphNode = createRepartitionNode(sinkName, sourceName, repartitionTopic);
}
// the passed in StreamsGraphNode must be the parent of the repartition node
builder.addGraphNode(this.streamsGraphNode, repartitionGraphNode);
final StatefulProcessorNode statefulProcessorNode = new StatefulProcessorNode<>(
funcName,
new ProcessorParameters<>(aggregateSupplier, funcName),
new TimestampedKeyValueStoreMaterializer<>(materialized).materialize()
);
// now the repartition node must be the parent of the StateProcessorNode
builder.addGraphNode(repartitionGraphNode, statefulProcessorNode);
// return the KTable representation with the intermediate topic as the sources
return new KTableImpl<>(funcName,
materialized.keySerde(),
materialized.valueSerde(),
Collections.singleton(sourceName),
materialized.queryableStoreName(),
aggregateSupplier,
statefulProcessorNode,
builder);
}
private GroupedTableOperationRepartitionNode<K, V> createRepartitionNode(final String sinkName,
final String sourceName,
final String topic) {
return GroupedTableOperationRepartitionNode.<K, V>groupedTableOperationNodeBuilder()
.withRepartitionTopic(topic)
.withSinkName(sinkName)
.withSourceName(sourceName)
.withKeySerde(keySerde)
.withValueSerde(valSerde)
.withNodeName(sourceName).build();
}
@Override
public KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
return reduce(adder, subtractor, NamedInternal.empty(), materialized);
}
@Override
public KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor,
final Named named,
final Materialized<K, V, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(adder, "adder can't be null");
Objects.requireNonNull(subtractor, "subtractor can't be null");
Objects.requireNonNull(named, "named can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final MaterializedInternal<K, V, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, builder, AGGREGATE_NAME);
if (materializedInternal.keySerde() == null) {
materializedInternal.withKeySerde(keySerde);
}
if (materializedInternal.valueSerde() == null) {
materializedInternal.withValueSerde(valSerde);
}
final ProcessorSupplier<K, Change<V>> aggregateSupplier = new KTableReduce<>(
materializedInternal.storeName(),
adder,
subtractor);
return doAggregate(aggregateSupplier, new NamedInternal(named), REDUCE_NAME, materializedInternal);
}
@Override
public KTable<K, V> reduce(final Reducer<V> adder,
final Reducer<V> subtractor) {
return reduce(adder, subtractor, Materialized.with(keySerde, valSerde));
}
@Override
public KTable<K, Long> count(final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized) {
return count(NamedInternal.empty(), materialized);
}
@Override
public KTable<K, Long> count(final Named named, final Materialized<K, Long, KeyValueStore<Bytes, byte[]>> materialized) {
final MaterializedInternal<K, Long, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, builder, AGGREGATE_NAME);
if (materializedInternal.keySerde() == null) {
materializedInternal.withKeySerde(keySerde);
}
if (materializedInternal.valueSerde() == null) {
materializedInternal.withValueSerde(Serdes.Long());
}
final ProcessorSupplier<K, Change<V>> aggregateSupplier = new KTableAggregate<>(
materializedInternal.storeName(),
countInitializer,
countAdder,
countSubtractor);
return doAggregate(aggregateSupplier, new NamedInternal(named), AGGREGATE_NAME, materializedInternal);
}
@Override
public KTable<K, Long> count() {
return count(Materialized.with(keySerde, Serdes.Long()));
}
@Override
public KTable<K, Long> count(final Named named) {
return count(named, Materialized.with(keySerde, Serdes.Long()));
}
@Override
public <VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized) {
return aggregate(initializer, adder, subtractor, NamedInternal.empty(), materialized);
}
@Override
public <VR> KTable<K, VR> aggregate(final Initializer<VR> initializer,
final Aggregator<? super K, ? super V, VR> adder,
final Aggregator<? super K, ? super V, VR> subtractor,
final Named named,
final Materialized<K, VR, KeyValueStore<Bytes, byte[]>> materialized) {
Objects.requireNonNull(initializer, "initializer can't be null");
Objects.requireNonNull(adder, "adder can't be null");
Objects.requireNonNull(subtractor, "subtractor can't be null");
Objects.requireNonNull(named, "named can't be null");
Objects.requireNonNull(materialized, "materialized can't be null");
final MaterializedInternal<K, VR, KeyValueStore<Bytes, byte[]>> materializedInternal =
new MaterializedInternal<>(materialized, builder, AGGREGATE_NAME);
if (materializedInternal.keySerde() == null) {
materializedInternal.withKeySerde(keySerde);
}
final ProcessorSupplier<K, Change<V>> aggregateSupplier = new KTableAggregate<>(
materializedInternal.storeName(),
initializer,
adder,
subtractor);
return doAggregate(aggregateSupplier, new NamedInternal(named), AGGREGATE_NAME, materializedInternal);
}
@Override
public <T> KTable<K, T> aggregate(final Initializer<T> initializer,
final Aggregator<? super K, ? super V, T> adder,
final Aggregator<? super K, ? super V, T> subtractor,
final Named named) {
return aggregate(initializer, adder, subtractor, named, Materialized.with(keySerde, null));
}
@Override
public <T> KTable<K, T> aggregate(final Initializer<T> initializer,
final Aggregator<? super K, ? super V, T> adder,
final Aggregator<? super K, ? super V, T> subtractor) {
return aggregate(initializer, adder, subtractor, Materialized.with(keySerde, null));
}
}

View File

@@ -0,0 +1,27 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.streams.processor.ProcessorSupplier;
public interface KStreamAggProcessorSupplier<K, RK, V, T> extends ProcessorSupplier<K, V> {
KTableValueGetterSupplier<RK, T> view();
void enableSendingOldValues();
}

View File

@@ -0,0 +1,145 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.streams.kstream.Aggregator;
import org.apache.kafka.streams.kstream.Initializer;
import org.apache.kafka.streams.processor.AbstractProcessor;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.internals.metrics.StreamsMetricsImpl;
import org.apache.kafka.streams.state.TimestampedKeyValueStore;
import org.apache.kafka.streams.state.ValueAndTimestamp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.kafka.streams.processor.internals.metrics.TaskMetrics.droppedRecordsSensorOrSkippedRecordsSensor;
import static org.apache.kafka.streams.state.ValueAndTimestamp.getValueOrNull;
public class KStreamAggregate<K, V, T> implements KStreamAggProcessorSupplier<K, K, V, T> {
private static final Logger LOG = LoggerFactory.getLogger(KStreamAggregate.class);
private final String storeName;
private final Initializer<T> initializer;
private final Aggregator<? super K, ? super V, T> aggregator;
private boolean sendOldValues = false;
KStreamAggregate(final String storeName, final Initializer<T> initializer, final Aggregator<? super K, ? super V, T> aggregator) {
this.storeName = storeName;
this.initializer = initializer;
this.aggregator = aggregator;
}
@Override
public Processor<K, V> get() {
return new KStreamAggregateProcessor();
}
@Override
public void enableSendingOldValues() {
sendOldValues = true;
}
private class KStreamAggregateProcessor extends AbstractProcessor<K, V> {
private TimestampedKeyValueStore<K, T> store;
private StreamsMetricsImpl metrics;
private Sensor droppedRecordsSensor;
private TimestampedTupleForwarder<K, T> tupleForwarder;
@SuppressWarnings("unchecked")
@Override
public void init(final ProcessorContext context) {
super.init(context);
metrics = (StreamsMetricsImpl) context.metrics();
droppedRecordsSensor = droppedRecordsSensorOrSkippedRecordsSensor(Thread.currentThread().getName(), context.taskId().toString(), metrics);
store = (TimestampedKeyValueStore<K, T>) context.getStateStore(storeName);
tupleForwarder = new TimestampedTupleForwarder<>(
store,
context,
new TimestampedCacheFlushListener<>(context),
sendOldValues);
}
@Override
public void process(final K key, final V value) {
// If the key or value is null we don't need to proceed
if (key == null || value == null) {
LOG.warn(
"Skipping record due to null key or value. key=[{}] value=[{}] topic=[{}] partition=[{}] offset=[{}]",
key, value, context().topic(), context().partition(), context().offset()
);
droppedRecordsSensor.record();
return;
}
final ValueAndTimestamp<T> oldAggAndTimestamp = store.get(key);
T oldAgg = getValueOrNull(oldAggAndTimestamp);
final T newAgg;
final long newTimestamp;
if (oldAgg == null) {
oldAgg = initializer.apply();
newTimestamp = context().timestamp();
} else {
oldAgg = oldAggAndTimestamp.value();
newTimestamp = Math.max(context().timestamp(), oldAggAndTimestamp.timestamp());
}
newAgg = aggregator.apply(key, value, oldAgg);
store.put(key, ValueAndTimestamp.make(newAgg, newTimestamp));
tupleForwarder.maybeForward(key, newAgg, sendOldValues ? oldAgg : null, newTimestamp);
}
}
@Override
public KTableValueGetterSupplier<K, T> view() {
return new KTableValueGetterSupplier<K, T>() {
public KTableValueGetter<K, T> get() {
return new KStreamAggregateValueGetter();
}
@Override
public String[] storeNames() {
return new String[]{storeName};
}
};
}
private class KStreamAggregateValueGetter implements KTableValueGetter<K, T> {
private TimestampedKeyValueStore<K, T> store;
@SuppressWarnings("unchecked")
@Override
public void init(final ProcessorContext context) {
store = (TimestampedKeyValueStore<K, T>) context.getStateStore(storeName);
}
@Override
public ValueAndTimestamp<T> get(final K key) {
return store.get(key);
}
@Override
public void close() {}
}
}

View File

@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.streams.kstream.Predicate;
import org.apache.kafka.streams.processor.AbstractProcessor;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorSupplier;
import org.apache.kafka.streams.processor.To;
class KStreamBranch<K, V> implements ProcessorSupplier<K, V> {
private final Predicate<K, V>[] predicates;
private final String[] childNodes;
KStreamBranch(final Predicate<K, V>[] predicates,
final String[] childNodes) {
this.predicates = predicates;
this.childNodes = childNodes;
}
@Override
public Processor<K, V> get() {
return new KStreamBranchProcessor();
}
private class KStreamBranchProcessor extends AbstractProcessor<K, V> {
@Override
public void process(final K key, final V value) {
for (int i = 0; i < predicates.length; i++) {
if (predicates[i].test(key, value)) {
// use forward with child here and then break the loop
// so that no record is going to be piped to multiple streams
context().forward(key, value, To.child(childNodes[i]));
break;
}
}
}
}
}

View File

@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.streams.processor.AbstractProcessor;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.kstream.Predicate;
import org.apache.kafka.streams.processor.ProcessorSupplier;
class KStreamFilter<K, V> implements ProcessorSupplier<K, V> {
private final Predicate<K, V> predicate;
private final boolean filterNot;
public KStreamFilter(final Predicate<K, V> predicate, final boolean filterNot) {
this.predicate = predicate;
this.filterNot = filterNot;
}
@Override
public Processor<K, V> get() {
return new KStreamFilterProcessor();
}
private class KStreamFilterProcessor extends AbstractProcessor<K, V> {
@Override
public void process(final K key, final V value) {
if (filterNot ^ predicate.test(key, value)) {
context().forward(key, value);
}
}
}
}

View File

@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.kstream.KeyValueMapper;
import org.apache.kafka.streams.processor.AbstractProcessor;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorSupplier;
class KStreamFlatMap<K, V, K1, V1> implements ProcessorSupplier<K, V> {
private final KeyValueMapper<? super K, ? super V, ? extends Iterable<? extends KeyValue<? extends K1, ? extends V1>>> mapper;
KStreamFlatMap(final KeyValueMapper<? super K, ? super V, ? extends Iterable<? extends KeyValue<? extends K1, ? extends V1>>> mapper) {
this.mapper = mapper;
}
@Override
public Processor<K, V> get() {
return new KStreamFlatMapProcessor();
}
private class KStreamFlatMapProcessor extends AbstractProcessor<K, V> {
@Override
public void process(final K key, final V value) {
for (final KeyValue<? extends K1, ? extends V1> newPair : mapper.apply(key, value)) {
context().forward(newPair.key, newPair.value);
}
}
}
}

View File

@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream.internals;
import org.apache.kafka.streams.kstream.ValueMapperWithKey;
import org.apache.kafka.streams.processor.AbstractProcessor;
import org.apache.kafka.streams.processor.Processor;
import org.apache.kafka.streams.processor.ProcessorSupplier;
class KStreamFlatMapValues<K, V, V1> implements ProcessorSupplier<K, V> {
private final ValueMapperWithKey<? super K, ? super V, ? extends Iterable<? extends V1>> mapper;
KStreamFlatMapValues(final ValueMapperWithKey<? super K, ? super V, ? extends Iterable<? extends V1>> mapper) {
this.mapper = mapper;
}
@Override
public Processor<K, V> get() {
return new KStreamFlatMapValuesProcessor();
}
private class KStreamFlatMapValuesProcessor extends AbstractProcessor<K, V> {
@Override
public void process(final K key, final V value) {
final Iterable<? extends V1> newValues = mapper.apply(key, value);
for (final V1 v : newValues) {
context().forward(key, v);
}
}
}
}

Some files were not shown because too many files have changed in this diff Show More