Add km module kafka

2025-12-24 11:52:08 +08:00 · 2023-02-14 16:27:47 +08:00
parent 229140f067
commit 0b8160a714
4039 changed files with 718112 additions and 46204 deletions
--- a/jmh-benchmarks/README.md
+++ b/jmh-benchmarks/README.md
@@ -0,0 +1,61 @@
+### JMH-Benchmark module
+
+This module contains benchmarks written using [JMH](https://openjdk.java.net/projects/code-tools/jmh/) from OpenJDK.
+Writing correct micro-benchmarks in Java (or another JVM language) is difficult and there are many non-obvious pitfalls (many
+due to compiler optimizations). JMH is a framework for running and analyzing benchmarks (micro or macro) written in Java (or
+another JVM language).
+
+For help in writing correct JMH tests, the best place to start is the [sample code](https://hg.openjdk.java.net/code-tools/jmh/file/tip/jmh-samples/src/main/java/org/openjdk/jmh/samples/) provided
+by the JMH project.
+
+Typically, JMH is expected to run as a separate project in Maven. The jmh-benchmarks module uses
+the [gradle shadow jar](https://github.com/johnrengelman/shadow) plugin to emulate this behavior, by creating the required
+uber-jar file containing the benchmarking code and required JMH classes.  
+
+JMH is highly configurable and users are encouraged to look through the samples for suggestions
+on what options are available. A good tutorial for using JMH can be found [here](http://tutorials.jenkov.com/java-performance/jmh.html#return-value-from-benchmark-method)
+
+### Gradle Tasks / Running benchmarks in gradle
+
+If no benchmark mode is specified, the default is used which is throughput. It is assumed that users run
+the gradle tasks with './gradlew' from the root of the Kafka project.
+
+*  jmh-benchmarks:shadowJar - creates the uber jar required to run the benchmarks.
+
+*  jmh-benchmarks:jmh - runs the `clean` and `shadowJar` tasks followed by all the benchmarks.
+ 
+### Using the jmh script
+If you want to set specific JMH flags or only run a certain test(s) passing arguments via
+gradle tasks is cumbersome.  Instead you can use the `jhm.sh` script.  NOTE: It is assumed users run
+the jmh.sh script from the jmh-benchmarks module.
+
+* Run a specific test setting fork-mode (number iterations) to 2 :`./jmh.sh -f 2 LRUCacheBenchmark`
+
+* By default all JMH output goes to stdout.  To run a benchmark and capture the results in a file:
+`./jmh.sh -f 2 -o benchmarkResults.txt LRUCacheBenchmark`
+NOTE: For now this script needs to be run from the jmh-benchmarks directory.
+ 
+### Running JMH outside of gradle
+The JMH benchmarks can be run outside of gradle as you would with any executable jar file:
+`java -jar <kafka-repo-dir>/jmh-benchmarks/build/libs/kafka-jmh-benchmarks-all.jar -f2 LRUCacheBenchmark`
+
+### JMH Options
+Some common JMH options are:
+```text
+ 
+   -e <regexp+>                Benchmarks to exclude from the run. 
+ 
+   -f <int>                    How many times to fork a single benchmark. Use 0 to 
+                               disable forking altogether. Warning: disabling 
+                               forking may have detrimental impact on benchmark 
+                               and infrastructure reliability, you might want 
+                               to use different warmup mode instead. 
+ 
+   -o <filename>               Redirect human-readable output to a given file. 
+ 
+  
+ 
+   -v <mode>                   Verbosity mode. Available modes are: [SILENT, NORMAL, 
+                               EXTRA] 
+```
+To view all options run jmh with the -h flag. 
--- a/jmh-benchmarks/jmh.sh
+++ b/jmh-benchmarks/jmh.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+base_dir=$(dirname $0)
+jmh_project_name="jmh-benchmarks"
+
+if [ ${base_dir} == "." ]; then
+     gradlew_dir=".."
+elif [ ${base_dir##./} == "${jmh_project_name}" ]; then
+     gradlew_dir="."
+else
+    echo "JMH Benchmarks need to be run from the root of the kafka repository or the 'jmh-benchmarks' directory"
+    exit
+fi
+
+gradleCmd="${gradlew_dir}/gradlew"
+libDir="${base_dir}/build/libs"
+
+echo "running gradlew :jmh-benchmarks:clean :jmh-benchmarks:shadowJar in quiet mode"
+
+$gradleCmd  -q :jmh-benchmarks:clean :jmh-benchmarks:shadowJar
+
+echo "gradle build done"
+
+echo "running JMH with args [$@]"
+
+java -jar ${libDir}/kafka-jmh-benchmarks-all.jar "$@"
+
+echo "JMH benchmarks done"
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/cache/LRUCacheBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/cache/LRUCacheBenchmark.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.jmh.cache;
+
+import java.util.concurrent.TimeUnit;
+import org.apache.kafka.common.cache.LRUCache;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+/**
+ * This is a simple example of a JMH benchmark.
+ *
+ * The sample code provided by the JMH project is a great place to start learning how to write correct benchmarks:
+ * http://hg.openjdk.java.net/code-tools/jmh/file/tip/jmh-samples/src/main/java/org/openjdk/jmh/samples/
+ */
+@State(Scope.Thread)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+public class LRUCacheBenchmark {
+
+    private static final int DISTINCT_KEYS = 10_000;
+
+    private static final String KEY = "the_key_to_use";
+
+    private static final String VALUE = "the quick brown fox jumped over the lazy dog the olympics are about to start";
+
+    private final String[] keys = new String[DISTINCT_KEYS];
+
+    private final String[] values = new String[DISTINCT_KEYS];
+
+    private LRUCache<String, String> lruCache;
+
+    private long counter = 0;
+
+    @Setup(Level.Trial)
+    public void setUp() {
+        for (int i = 0; i < DISTINCT_KEYS; ++i) {
+            keys[i] = KEY + i;
+            values[i] = VALUE + i;
+        }
+        lruCache = new LRUCache<>(100);
+    }
+
+    @Benchmark
+    public String testCachePerformance() {
+        counter++;
+        int index = (int) (counter % DISTINCT_KEYS);
+        String hashkey = keys[index];
+        lruCache.put(hashkey, values[index]);
+        return lruCache.get(hashkey);
+    }
+
+    public static void main(String[] args) throws RunnerException {
+        Options opt = new OptionsBuilder()
+                .include(LRUCacheBenchmark.class.getSimpleName())
+                .forks(2)
+                .build();
+
+        new Runner(opt).run();
+    }
+
+}
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/common/TopicBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/common/TopicBenchmark.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.jmh.common;
+
+import org.apache.kafka.common.internals.Topic;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 5)
+@Measurement(iterations = 15)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+public class TopicBenchmark {
+
+    @State(Scope.Thread)
+    public static class BenchState {
+        @Param({"topic", "longer-topic-name", "very-long-topic-name.with_more_text"})
+        public String topicName;
+    }
+
+    @Benchmark
+    public BenchState testValidate(BenchState state) {
+        // validate doesn't return anything, so return `state` to prevent the JVM from optimising the whole call away
+        Topic.validate(state.topicName);
+        return state;
+    }
+}
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/fetcher/ReplicaFetcherThreadBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/fetcher/ReplicaFetcherThreadBenchmark.java
@@ -0,0 +1,318 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.jmh.fetcher;
+
+import kafka.api.ApiVersion$;
+import kafka.cluster.BrokerEndPoint;
+import kafka.cluster.DelayedOperations;
+import kafka.cluster.Partition;
+import kafka.cluster.PartitionStateStore;
+import kafka.log.CleanerConfig;
+import kafka.log.Defaults;
+import kafka.log.LogAppendInfo;
+import kafka.log.LogConfig;
+import kafka.log.LogManager;
+import kafka.server.BrokerState;
+import kafka.server.BrokerTopicStats;
+import kafka.server.FailedPartitions;
+import kafka.server.KafkaConfig;
+import kafka.server.LogDirFailureChannel;
+import kafka.server.MetadataCache;
+import kafka.server.OffsetAndEpoch;
+import kafka.server.OffsetTruncationState;
+import kafka.server.ReplicaFetcherThread;
+import kafka.server.ReplicaManager;
+import kafka.server.ReplicaQuota;
+import kafka.server.checkpoints.OffsetCheckpoints;
+import kafka.utils.KafkaScheduler;
+import kafka.utils.Pool;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.message.LeaderAndIsrRequestData;
+import org.apache.kafka.common.metrics.Metrics;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.record.BaseRecords;
+import org.apache.kafka.common.record.Records;
+import org.apache.kafka.common.record.RecordsSend;
+import org.apache.kafka.common.requests.EpochEndOffset;
+import org.apache.kafka.common.requests.FetchRequest;
+import org.apache.kafka.common.requests.FetchResponse;
+import org.apache.kafka.common.requests.OffsetsForLeaderEpochRequest;
+import org.apache.kafka.common.utils.Time;
+import org.apache.kafka.common.utils.Utils;
+import org.mockito.Mockito;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import scala.Option;
+import scala.collection.Iterator;
+import scala.collection.JavaConverters;
+import scala.compat.java8.OptionConverters;
+import scala.collection.Map;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Optional;
+import java.util.Properties;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 5)
+@Measurement(iterations = 15)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+
+public class ReplicaFetcherThreadBenchmark {
+    @Param({"100", "500", "1000", "5000"})
+    private int partitionCount;
+
+    private ReplicaFetcherBenchThread fetcher;
+    private LogManager logManager;
+    private File logDir = new File(System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString());
+    private KafkaScheduler scheduler = new KafkaScheduler(1, "scheduler", true);
+    private Pool<TopicPartition, Partition> pool = new Pool<TopicPartition, Partition>(Option.empty());
+
+    @Setup(Level.Trial)
+    public void setup() throws IOException {
+        if (!logDir.mkdir())
+            throw new IOException("error creating test directory");
+
+        scheduler.startup();
+        Properties props = new Properties();
+        props.put("zookeeper.connect", "127.0.0.1:9999");
+        KafkaConfig config = new KafkaConfig(props);
+        LogConfig logConfig = createLogConfig();
+
+        List<File> logDirs = Collections.singletonList(logDir);
+        BrokerTopicStats brokerTopicStats = new BrokerTopicStats();
+        LogDirFailureChannel logDirFailureChannel = Mockito.mock(LogDirFailureChannel.class);
+        logManager = new LogManager(JavaConverters.asScalaIteratorConverter(logDirs.iterator()).asScala().toSeq(),
+                JavaConverters.asScalaIteratorConverter(new ArrayList<File>().iterator()).asScala().toSeq(),
+                new scala.collection.mutable.HashMap<>(),
+                logConfig,
+                new CleanerConfig(0, 0, 0, 0, 0, 0.0, 0, false, "MD5"),
+                1,
+                1000L,
+                10000L,
+                10000L,
+                1000L,
+                60000,
+                scheduler,
+                new BrokerState(),
+                brokerTopicStats,
+                logDirFailureChannel,
+                Time.SYSTEM);
+
+        LinkedHashMap<TopicPartition, FetchResponse.PartitionData<BaseRecords>> initialFetched = new LinkedHashMap<>();
+        scala.collection.mutable.Map<TopicPartition, OffsetAndEpoch> offsetAndEpochs = new scala.collection.mutable.HashMap<>();
+        for (int i = 0; i < partitionCount; i++) {
+            TopicPartition tp = new TopicPartition("topic", i);
+
+            List<Integer> replicas = Arrays.asList(0, 1, 2);
+            LeaderAndIsrRequestData.LeaderAndIsrPartitionState partitionState = new LeaderAndIsrRequestData.LeaderAndIsrPartitionState()
+                    .setControllerEpoch(0)
+                    .setLeader(0)
+                    .setLeaderEpoch(0)
+                    .setIsr(replicas)
+                    .setZkVersion(1)
+                    .setReplicas(replicas)
+                    .setIsNew(true);
+
+            PartitionStateStore partitionStateStore = Mockito.mock(PartitionStateStore.class);
+            Mockito.when(partitionStateStore.fetchTopicConfig()).thenReturn(new Properties());
+            OffsetCheckpoints offsetCheckpoints = Mockito.mock(OffsetCheckpoints.class);
+            Mockito.when(offsetCheckpoints.fetch(logDir.getAbsolutePath(), tp)).thenReturn(Option.apply(0L));
+            Partition partition = new Partition(tp, 100, ApiVersion$.MODULE$.latestVersion(),
+                    0, Time.SYSTEM, partitionStateStore, new DelayedOperationsMock(tp),
+                    Mockito.mock(MetadataCache.class), logManager);
+
+            partition.makeFollower(0, partitionState, 0, offsetCheckpoints);
+            pool.put(tp, partition);
+            offsetAndEpochs.put(tp, new OffsetAndEpoch(0, 0));
+            BaseRecords fetched = new BaseRecords() {
+                @Override
+                public int sizeInBytes() {
+                    return 0;
+                }
+
+                @Override
+                public RecordsSend toSend(String destination) {
+                    return null;
+                }
+            };
+            initialFetched.put(tp, new FetchResponse.PartitionData<>(Errors.NONE, 0, 0, 0,
+                    new LinkedList<>(), fetched));
+        }
+
+        ReplicaManager replicaManager = Mockito.mock(ReplicaManager.class);
+        Mockito.when(replicaManager.brokerTopicStats()).thenReturn(brokerTopicStats);
+        fetcher = new ReplicaFetcherBenchThread(config, replicaManager, pool);
+        fetcher.addPartitions(offsetAndEpochs);
+        // force a pass to move partitions to fetching state. We do this in the setup phase
+        // so that we do not measure this time as part of the steady state work
+        fetcher.doWork();
+        // handle response to engage the incremental fetch session handler
+        fetcher.fetchSessionHandler().handleResponse(new FetchResponse<>(Errors.NONE, initialFetched, 0, 999));
+    }
+
+    @TearDown(Level.Trial)
+    public void tearDown() throws IOException {
+        logManager.shutdown();
+        scheduler.shutdown();
+        Utils.delete(logDir);
+    }
+
+    @Benchmark
+    public long testFetcher() {
+        fetcher.doWork();
+        return fetcher.fetcherStats().requestRate().count();
+    }
+
+    // avoid mocked DelayedOperations to avoid mocked class affecting benchmark results
+    private static class DelayedOperationsMock extends DelayedOperations {
+        DelayedOperationsMock(TopicPartition topicPartition) {
+            super(topicPartition, null, null, null);
+        }
+
+        @Override
+        public int numDelayedDelete() {
+            return 0;
+        }
+    }
+
+    private static LogConfig createLogConfig() {
+        Properties logProps = new Properties();
+        logProps.put(LogConfig.SegmentMsProp(), Defaults.SegmentMs());
+        logProps.put(LogConfig.SegmentBytesProp(), Defaults.SegmentSize());
+        logProps.put(LogConfig.RetentionMsProp(), Defaults.RetentionMs());
+        logProps.put(LogConfig.RetentionBytesProp(), Defaults.RetentionSize());
+        logProps.put(LogConfig.SegmentJitterMsProp(), Defaults.SegmentJitterMs());
+        logProps.put(LogConfig.CleanupPolicyProp(), Defaults.CleanupPolicy());
+        logProps.put(LogConfig.MaxMessageBytesProp(), Defaults.MaxMessageSize());
+        logProps.put(LogConfig.IndexIntervalBytesProp(), Defaults.IndexInterval());
+        logProps.put(LogConfig.SegmentIndexBytesProp(), Defaults.MaxIndexSize());
+        logProps.put(LogConfig.MessageFormatVersionProp(), Defaults.MessageFormatVersion());
+        logProps.put(LogConfig.FileDeleteDelayMsProp(), Defaults.FileDeleteDelayMs());
+        return LogConfig.apply(logProps, new scala.collection.immutable.HashSet<>());
+    }
+
+
+    static class ReplicaFetcherBenchThread extends ReplicaFetcherThread {
+        private final Pool<TopicPartition, Partition> pool;
+
+        ReplicaFetcherBenchThread(KafkaConfig config,
+                                  ReplicaManager replicaManager,
+                                  Pool<TopicPartition,
+                                  Partition> partitions) {
+            super("name",
+                    3,
+                    new BrokerEndPoint(3, "host", 3000),
+                    config,
+                    new FailedPartitions(),
+                    replicaManager,
+                    new Metrics(),
+                    Time.SYSTEM,
+                    new ReplicaQuota() {
+                        @Override
+                        public boolean isQuotaExceeded() {
+                            return false;
+                        }
+
+                        @Override
+                        public void record(long value) {
+                        }
+
+                        @Override
+                        public boolean isThrottled(TopicPartition topicPartition) {
+                            return false;
+                        }
+                    },
+                    Option.empty());
+            
+            pool = partitions;
+        }
+
+        @Override
+        public Option<Object> latestEpoch(TopicPartition topicPartition) {
+            return Option.apply(0);
+        }
+
+        @Override
+        public long logStartOffset(TopicPartition topicPartition) {
+            return pool.get(topicPartition).localLogOrException().logStartOffset();
+        }
+
+        @Override
+        public long logEndOffset(TopicPartition topicPartition) {
+            return 0;
+        }
+
+        @Override
+        public void truncate(TopicPartition tp, OffsetTruncationState offsetTruncationState) {
+            // pretend to truncate to move to Fetching state
+        }
+
+        @Override
+        public Option<OffsetAndEpoch> endOffsetForEpoch(TopicPartition topicPartition, int epoch) {
+            return OptionConverters.toScala(Optional.of(new OffsetAndEpoch(0, 0)));
+        }
+
+        @Override
+        public Option<LogAppendInfo> processPartitionData(TopicPartition topicPartition, long fetchOffset, FetchResponse.PartitionData partitionData) {
+            return Option.empty();
+        }
+
+        @Override
+        public long fetchEarliestOffsetFromLeader(TopicPartition topicPartition, int currentLeaderEpoch) {
+            return 0;
+        }
+
+        @Override
+        public Map<TopicPartition, EpochEndOffset> fetchEpochEndOffsets(Map<TopicPartition, OffsetsForLeaderEpochRequest.PartitionData> partitions) {
+            scala.collection.mutable.Map<TopicPartition, EpochEndOffset> endOffsets = new scala.collection.mutable.HashMap<>();
+            Iterator<TopicPartition> iterator = partitions.keys().iterator();
+            while (iterator.hasNext()) {
+                endOffsets.put(iterator.next(), new EpochEndOffset(0, 100));
+            }
+            return endOffsets;
+        }
+
+        @Override
+        public Map<TopicPartition, FetchResponse.PartitionData<Records>> fetchFromLeader(FetchRequest.Builder fetchRequest) {
+            return new scala.collection.mutable.HashMap<>();
+        }
+    }
+}
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/fetchsession/FetchSessionBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/fetchsession/FetchSessionBenchmark.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.jmh.fetchsession;
+
+import org.apache.kafka.clients.FetchSessionHandler;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.protocol.Errors;
+import org.apache.kafka.common.record.MemoryRecords;
+import org.apache.kafka.common.requests.FetchRequest;
+import org.apache.kafka.common.requests.FetchResponse;
+import org.apache.kafka.common.utils.LogContext;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 5)
+@Measurement(iterations = 10)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+public class FetchSessionBenchmark {
+    private static final LogContext LOG_CONTEXT = new LogContext("[BenchFetchSessionHandler]=");
+
+    @Param(value = {"10", "100", "1000"})
+    private int partitionCount;
+
+    @Param(value = {"0", "10", "100", "1000"})
+    private int updatedPercentage;
+
+    @Param(value = {"false", "true"})
+    private boolean presize;
+
+    private LinkedHashMap<TopicPartition, FetchRequest.PartitionData> fetches;
+    private FetchSessionHandler handler;
+
+    @Setup(Level.Trial)
+    public void setUp() {
+        fetches = new LinkedHashMap<>();
+        handler = new FetchSessionHandler(LOG_CONTEXT, 1);
+        FetchSessionHandler.Builder builder = handler.newBuilder();
+
+        LinkedHashMap<TopicPartition, FetchResponse.PartitionData<MemoryRecords>> respMap = new LinkedHashMap<>();
+        for (int i = 0; i < partitionCount; i++) {
+            TopicPartition tp = new TopicPartition("foo", i);
+            FetchRequest.PartitionData partitionData = new FetchRequest.PartitionData(0, 0, 200,
+                    Optional.empty());
+            fetches.put(tp, partitionData);
+            builder.add(tp, partitionData);
+            respMap.put(tp, new FetchResponse.PartitionData<>(
+                    Errors.NONE,
+                    0L,
+                    0L,
+                    0,
+                    null,
+                    null));
+        }
+        builder.build();
+        // build and handle an initial response so that the next fetch will be incremental
+        handler.handleResponse(new FetchResponse<>(Errors.NONE, respMap, 0, 1));
+
+        int counter = 0;
+        for (TopicPartition topicPartition: new ArrayList<>(fetches.keySet())) {
+            if (updatedPercentage != 0 && counter % (100 / updatedPercentage) == 0) {
+                // reorder in fetch session, and update log start offset
+                fetches.remove(topicPartition);
+                fetches.put(topicPartition, new FetchRequest.PartitionData(50, 40, 200,
+                        Optional.empty()));
+            }
+            counter++;
+        }
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public void incrementalFetchSessionBuild() {
+        FetchSessionHandler.Builder builder;
+        if (presize)
+            builder = handler.newBuilder(fetches.size(), true);
+        else
+            builder = handler.newBuilder();
+
+        for (Map.Entry<TopicPartition, FetchRequest.PartitionData> entry: fetches.entrySet()) {
+            builder.add(entry.getKey(), entry.getValue());
+        }
+
+        builder.build();
+    }
+}
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/UpdateFollowerFetchStateBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/partition/UpdateFollowerFetchStateBenchmark.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.jmh.partition;
+
+import kafka.api.ApiVersion$;
+import kafka.cluster.DelayedOperations;
+import kafka.cluster.Partition;
+import kafka.cluster.PartitionStateStore;
+import kafka.log.CleanerConfig;
+import kafka.log.Defaults;
+import kafka.log.LogConfig;
+import kafka.log.LogManager;
+import kafka.server.BrokerState;
+import kafka.server.BrokerTopicStats;
+import kafka.server.LogDirFailureChannel;
+import kafka.server.LogOffsetMetadata;
+import kafka.server.MetadataCache;
+import kafka.server.checkpoints.OffsetCheckpoints;
+import kafka.utils.KafkaScheduler;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.kafka.common.message.LeaderAndIsrRequestData.LeaderAndIsrPartitionState;
+import org.apache.kafka.common.utils.Time;
+import org.mockito.Mockito;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import scala.Option;
+import scala.collection.JavaConverters;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 5)
+@Measurement(iterations = 15)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+public class UpdateFollowerFetchStateBenchmark {
+    private TopicPartition topicPartition = new TopicPartition(UUID.randomUUID().toString(), 0);
+    private File logDir = new File(System.getProperty("java.io.tmpdir"), topicPartition.toString());
+    private KafkaScheduler scheduler = new KafkaScheduler(1, "scheduler", true);
+    private BrokerTopicStats brokerTopicStats = new BrokerTopicStats();
+    private LogDirFailureChannel logDirFailureChannel = Mockito.mock(LogDirFailureChannel.class);
+    private long nextOffset = 0;
+    private LogManager logManager;
+    private Partition partition;
+
+    @Setup(Level.Trial)
+    public void setUp() {
+        scheduler.startup();
+        LogConfig logConfig = createLogConfig();
+        List<File> logDirs = Collections.singletonList(logDir);
+        logManager = new LogManager(JavaConverters.asScalaIteratorConverter(logDirs.iterator()).asScala().toSeq(),
+                JavaConverters.asScalaIteratorConverter(new ArrayList<File>().iterator()).asScala().toSeq(),
+                new scala.collection.mutable.HashMap<>(),
+                logConfig,
+                new CleanerConfig(0, 0, 0, 0, 0, 0.0, 0, false, "MD5"),
+                1,
+                1000L,
+                10000L,
+                10000L,
+                1000L,
+                60000,
+                scheduler,
+                new BrokerState(),
+                brokerTopicStats,
+                logDirFailureChannel,
+                Time.SYSTEM);
+        OffsetCheckpoints offsetCheckpoints = Mockito.mock(OffsetCheckpoints.class);
+        Mockito.when(offsetCheckpoints.fetch(logDir.getAbsolutePath(), topicPartition)).thenReturn(Option.apply(0L));
+        DelayedOperations delayedOperations = new DelayedOperationsMock();
+
+        // one leader, plus two followers
+        List<Integer> replicas = new ArrayList<>();
+        replicas.add(0);
+        replicas.add(1);
+        replicas.add(2);
+        LeaderAndIsrPartitionState partitionState = new LeaderAndIsrPartitionState()
+            .setControllerEpoch(0)
+            .setLeader(0)
+            .setLeaderEpoch(0)
+            .setIsr(replicas)
+            .setZkVersion(1)
+            .setReplicas(replicas)
+            .setIsNew(true);
+        PartitionStateStore partitionStateStore = Mockito.mock(PartitionStateStore.class);
+        Mockito.when(partitionStateStore.fetchTopicConfig()).thenReturn(new Properties());
+        partition = new Partition(topicPartition, 100,
+                ApiVersion$.MODULE$.latestVersion(), 0, Time.SYSTEM,
+                partitionStateStore, delayedOperations,
+                Mockito.mock(MetadataCache.class), logManager);
+        partition.makeLeader(0, partitionState, 0, offsetCheckpoints);
+    }
+
+    // avoid mocked DelayedOperations to avoid mocked class affecting benchmark results
+    private class DelayedOperationsMock extends DelayedOperations {
+        DelayedOperationsMock() {
+            super(topicPartition, null, null, null);
+        }
+
+        @Override
+        public int numDelayedDelete() {
+            return 0;
+        }
+    }
+
+    @TearDown(Level.Trial)
+    public void tearDown() {
+        logManager.shutdown();
+        scheduler.shutdown();
+    }
+
+    private LogConfig createLogConfig() {
+        Properties logProps = new Properties();
+        logProps.put(LogConfig.SegmentMsProp(), Defaults.SegmentMs());
+        logProps.put(LogConfig.SegmentBytesProp(), Defaults.SegmentSize());
+        logProps.put(LogConfig.RetentionMsProp(), Defaults.RetentionMs());
+        logProps.put(LogConfig.RetentionBytesProp(), Defaults.RetentionSize());
+        logProps.put(LogConfig.SegmentJitterMsProp(), Defaults.SegmentJitterMs());
+        logProps.put(LogConfig.CleanupPolicyProp(), Defaults.CleanupPolicy());
+        logProps.put(LogConfig.MaxMessageBytesProp(), Defaults.MaxMessageSize());
+        logProps.put(LogConfig.IndexIntervalBytesProp(), Defaults.IndexInterval());
+        logProps.put(LogConfig.SegmentIndexBytesProp(), Defaults.MaxIndexSize());
+        logProps.put(LogConfig.MessageFormatVersionProp(), Defaults.MessageFormatVersion());
+        logProps.put(LogConfig.FileDeleteDelayMsProp(), Defaults.FileDeleteDelayMs());
+        return LogConfig.apply(logProps, new scala.collection.immutable.HashSet<>());
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public void updateFollowerFetchStateBench() {
+        // measure the impact of two follower fetches on the leader
+        partition.updateFollowerFetchState(1, new LogOffsetMetadata(nextOffset, nextOffset, 0),
+                0, 1, nextOffset, nextOffset);
+        partition.updateFollowerFetchState(2, new LogOffsetMetadata(nextOffset, nextOffset, 0),
+                0, 1, nextOffset, nextOffset);
+        nextOffset++;
+    }
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public void updateFollowerFetchStateBenchNoChange() {
+        // measure the impact of two follower fetches on the leader when the follower didn't
+        // end up fetching anything
+        partition.updateFollowerFetchState(1, new LogOffsetMetadata(nextOffset, nextOffset, 0),
+                0, 1, 100, nextOffset);
+        partition.updateFollowerFetchState(2, new LogOffsetMetadata(nextOffset, nextOffset, 0),
+                0, 1, 100, nextOffset);
+    }
+}
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/producer/ProducerRecordBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/producer/ProducerRecordBenchmark.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kafka.jmh.producer;
+
+import org.apache.kafka.clients.producer.ProducerRecord;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 5)
+@Measurement(iterations = 15)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+public class ProducerRecordBenchmark {
+
+    @Benchmark
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    public ProducerRecord<String, String> constructorBenchmark() {
+        return new ProducerRecord<>("topic", "value");
+    }
+
+}
--- a/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/record/RecordBatchIterationBenchmark.java
+++ b/jmh-benchmarks/src/main/java/org/apache/kafka/jmh/record/RecordBatchIterationBenchmark.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.kafka.jmh.record;
+
+import org.apache.kafka.common.record.AbstractRecords;
+import org.apache.kafka.common.record.BufferSupplier;
+import org.apache.kafka.common.record.CompressionType;
+import org.apache.kafka.common.record.MemoryRecords;
+import org.apache.kafka.common.record.MemoryRecordsBuilder;
+import org.apache.kafka.common.record.MutableRecordBatch;
+import org.apache.kafka.common.record.Record;
+import org.apache.kafka.common.record.RecordBatch;
+import org.apache.kafka.common.record.TimestampType;
+import org.apache.kafka.common.utils.CloseableIterator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.OperationsPerInvocation;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Random;
+
+import static org.apache.kafka.common.record.RecordBatch.CURRENT_MAGIC_VALUE;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 5)
+@Measurement(iterations = 15)
+public class RecordBatchIterationBenchmark {
+
+    private final Random random = new Random(0);
+    private final int batchCount = 100;
+
+    public enum Bytes {
+        RANDOM, ONES
+    }
+
+    @Param(value = {"10", "50", "200", "500"})
+    private int maxBatchSize = 200;
+
+    @Param(value = {"LZ4", "SNAPPY", "GZIP", "ZSTD", "NONE"})
+    private CompressionType compressionType = CompressionType.NONE;
+
+    @Param(value = {"1", "2"})
+    private byte messageVersion = CURRENT_MAGIC_VALUE;
+
+    @Param(value = {"100", "1000", "10000", "100000"})
+    private int messageSize = 1000;
+
+    @Param(value = {"RANDOM", "ONES"})
+    private Bytes bytes = Bytes.RANDOM;
+
+    // zero starting offset is much faster for v1 batches, but that will almost never happen
+    private final int startingOffset = 42;
+
+    // Used by measureSingleMessage
+    private ByteBuffer singleBatchBuffer;
+
+    // Used by measureVariableBatchSize
+    private ByteBuffer[] batchBuffers;
+    private int[] batchSizes;
+    private BufferSupplier bufferSupplier;
+
+    @Setup
+    public void init() {
+        bufferSupplier = BufferSupplier.create();
+        singleBatchBuffer = createBatch(1);
+
+        batchBuffers = new ByteBuffer[batchCount];
+        batchSizes = new int[batchCount];
+        for (int i = 0; i < batchCount; ++i) {
+            int size = random.nextInt(maxBatchSize) + 1;
+            batchBuffers[i] = createBatch(size);
+            batchSizes[i] = size;
+        }
+    }
+
+    private ByteBuffer createBatch(int batchSize) {
+        byte[] value = new byte[messageSize];
+        final ByteBuffer buf = ByteBuffer.allocate(
+            AbstractRecords.estimateSizeInBytesUpperBound(messageVersion, compressionType, new byte[0], value,
+                    Record.EMPTY_HEADERS) * batchSize
+        );
+
+        final MemoryRecordsBuilder builder =
+            MemoryRecords.builder(buf, messageVersion, compressionType, TimestampType.CREATE_TIME, startingOffset);
+
+        for (int i = 0; i < batchSize; ++i) {
+            switch (bytes) {
+                case ONES:
+                    Arrays.fill(value, (byte) 1);
+                    break;
+                case RANDOM:
+                    random.nextBytes(value);
+                    break;
+            }
+
+            builder.append(0, null, value);
+        }
+        return builder.build().buffer();
+    }
+
+    @Benchmark
+    public void measureIteratorForBatchWithSingleMessage(Blackhole bh) throws IOException {
+        for (RecordBatch batch : MemoryRecords.readableRecords(singleBatchBuffer.duplicate()).batches()) {
+            try (CloseableIterator<Record> iterator = batch.streamingIterator(bufferSupplier)) {
+                while (iterator.hasNext())
+                    bh.consume(iterator.next());
+            }
+        }
+    }
+
+    @OperationsPerInvocation(value = batchCount)
+    @Fork(jvmArgsAppend = "-Xmx8g")
+    @Benchmark
+    public void measureStreamingIteratorForVariableBatchSize(Blackhole bh) throws IOException {
+        for (int i = 0; i < batchCount; ++i) {
+            for (RecordBatch batch : MemoryRecords.readableRecords(batchBuffers[i].duplicate()).batches()) {
+                try (CloseableIterator<Record> iterator = batch.streamingIterator(bufferSupplier)) {
+                    while (iterator.hasNext())
+                        bh.consume(iterator.next());
+                }
+            }
+        }
+    }
+
+    @OperationsPerInvocation(value = batchCount)
+    @Fork(jvmArgsAppend = "-Xmx8g")
+    @Benchmark
+    public void measureSkipIteratorForVariableBatchSize(Blackhole bh) throws IOException {
+        for (int i = 0; i < batchCount; ++i) {
+            for (MutableRecordBatch batch : MemoryRecords.readableRecords(batchBuffers[i].duplicate()).batches()) {
+                try (CloseableIterator<Record> iterator = batch.skipKeyValueIterator(bufferSupplier)) {
+                    while (iterator.hasNext())
+                        bh.consume(iterator.next());
+                }
+            }
+        }
+    }
+}