mirror of
https://github.com/didi/KnowStreaming.git
synced 2026-01-05 13:08:48 +08:00
Add km module kafka
This commit is contained in:
@@ -0,0 +1,164 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from ducktape.tests.test import Test
|
||||
from ducktape.mark.resource import cluster
|
||||
from ducktape.mark import parametrize, matrix
|
||||
from kafkatest.tests.kafka_test import KafkaTest
|
||||
|
||||
from kafkatest.services.performance.streams_performance import StreamsSimpleBenchmarkService
|
||||
from kafkatest.services.zookeeper import ZookeeperService
|
||||
from kafkatest.services.kafka import KafkaService
|
||||
from kafkatest.version import DEV_BRANCH
|
||||
|
||||
STREAMS_SIMPLE_TESTS = ["streamprocess", "streamprocesswithsink", "streamprocesswithstatestore", "streamprocesswithwindowstore"]
|
||||
STREAMS_COUNT_TESTS = ["streamcount", "streamcountwindowed"]
|
||||
STREAMS_JOIN_TESTS = ["streamtablejoin", "streamstreamjoin", "tabletablejoin"]
|
||||
NON_STREAMS_TESTS = ["consume", "consumeproduce"]
|
||||
|
||||
ALL_TEST = "all"
|
||||
STREAMS_SIMPLE_TEST = "streams-simple"
|
||||
STREAMS_COUNT_TEST = "streams-count"
|
||||
STREAMS_JOIN_TEST = "streams-join"
|
||||
|
||||
|
||||
class StreamsSimpleBenchmarkTest(Test):
|
||||
"""
|
||||
Simple benchmark of Kafka Streams.
|
||||
"""
|
||||
|
||||
def __init__(self, test_context):
|
||||
super(StreamsSimpleBenchmarkTest, self).__init__(test_context)
|
||||
|
||||
# these values could be updated in ad-hoc benchmarks
|
||||
self.key_skew = 0
|
||||
self.value_size = 1024
|
||||
self.num_records = 10000000L
|
||||
self.num_threads = 1
|
||||
|
||||
self.replication = 1
|
||||
|
||||
@cluster(num_nodes=12)
|
||||
@matrix(test=["consume", "consumeproduce",
|
||||
"streamprocess", "streamprocesswithsink", "streamprocesswithstatestore", "streamprocesswithwindowstore",
|
||||
"streamcount", "streamcountwindowed",
|
||||
"streamtablejoin", "streamstreamjoin", "tabletablejoin"],
|
||||
scale=[1])
|
||||
def test_simple_benchmark(self, test, scale):
|
||||
"""
|
||||
Run simple Kafka Streams benchmark
|
||||
"""
|
||||
self.driver = [None] * (scale + 1)
|
||||
|
||||
self.final = {}
|
||||
|
||||
#############
|
||||
# SETUP PHASE
|
||||
#############
|
||||
self.zk = ZookeeperService(self.test_context, num_nodes=1)
|
||||
self.zk.start()
|
||||
self.kafka = KafkaService(self.test_context, num_nodes=scale, zk=self.zk, version=DEV_BRANCH, topics={
|
||||
'simpleBenchmarkSourceTopic1' : { 'partitions': scale, 'replication-factor': self.replication },
|
||||
'simpleBenchmarkSourceTopic2' : { 'partitions': scale, 'replication-factor': self.replication },
|
||||
'simpleBenchmarkSinkTopic' : { 'partitions': scale, 'replication-factor': self.replication },
|
||||
'yahooCampaigns' : { 'partitions': 20, 'replication-factor': self.replication },
|
||||
'yahooEvents' : { 'partitions': 20, 'replication-factor': self.replication }
|
||||
})
|
||||
self.kafka.log_level = "INFO"
|
||||
self.kafka.start()
|
||||
|
||||
|
||||
load_test = ""
|
||||
if test == ALL_TEST:
|
||||
load_test = "load-two"
|
||||
if test in STREAMS_JOIN_TESTS or test == STREAMS_JOIN_TEST:
|
||||
load_test = "load-two"
|
||||
if test in STREAMS_COUNT_TESTS or test == STREAMS_COUNT_TEST:
|
||||
load_test = "load-one"
|
||||
if test in STREAMS_SIMPLE_TESTS or test == STREAMS_SIMPLE_TEST:
|
||||
load_test = "load-one"
|
||||
if test in NON_STREAMS_TESTS:
|
||||
load_test = "load-one"
|
||||
|
||||
|
||||
|
||||
################
|
||||
# LOAD PHASE
|
||||
################
|
||||
self.load_driver = StreamsSimpleBenchmarkService(self.test_context,
|
||||
self.kafka,
|
||||
load_test,
|
||||
self.num_threads,
|
||||
self.num_records,
|
||||
self.key_skew,
|
||||
self.value_size)
|
||||
|
||||
self.load_driver.start()
|
||||
self.load_driver.wait(3600) # wait at most 30 minutes
|
||||
self.load_driver.stop()
|
||||
|
||||
if test == ALL_TEST:
|
||||
for single_test in STREAMS_SIMPLE_TESTS + STREAMS_COUNT_TESTS + STREAMS_JOIN_TESTS:
|
||||
self.execute(single_test, scale)
|
||||
elif test == STREAMS_SIMPLE_TEST:
|
||||
for single_test in STREAMS_SIMPLE_TESTS:
|
||||
self.execute(single_test, scale)
|
||||
elif test == STREAMS_COUNT_TEST:
|
||||
for single_test in STREAMS_COUNT_TESTS:
|
||||
self.execute(single_test, scale)
|
||||
elif test == STREAMS_JOIN_TEST:
|
||||
for single_test in STREAMS_JOIN_TESTS:
|
||||
self.execute(single_test, scale)
|
||||
else:
|
||||
self.execute(test, scale)
|
||||
|
||||
return self.final
|
||||
|
||||
def execute(self, test, scale):
|
||||
|
||||
################
|
||||
# RUN PHASE
|
||||
################
|
||||
for num in range(0, scale):
|
||||
self.driver[num] = StreamsSimpleBenchmarkService(self.test_context,
|
||||
self.kafka,
|
||||
test,
|
||||
self.num_threads,
|
||||
self.num_records,
|
||||
self.key_skew,
|
||||
self.value_size)
|
||||
self.driver[num].start()
|
||||
|
||||
#######################
|
||||
# STOP + COLLECT PHASE
|
||||
#######################
|
||||
data = [None] * (scale)
|
||||
|
||||
for num in range(0, scale):
|
||||
self.driver[num].wait()
|
||||
self.driver[num].stop()
|
||||
self.driver[num].node.account.ssh("grep Performance %s" % self.driver[num].STDOUT_FILE, allow_fail=False)
|
||||
data[num] = self.driver[num].collect_data(self.driver[num].node, "")
|
||||
self.driver[num].read_jmx_output_all_nodes()
|
||||
|
||||
for num in range(0, scale):
|
||||
for key in data[num]:
|
||||
self.final[key + "-" + str(num)] = data[num][key]
|
||||
|
||||
for key in sorted(self.driver[num].jmx_stats[0]):
|
||||
self.logger.info("%s: %s" % (key, self.driver[num].jmx_stats[0][key]))
|
||||
|
||||
self.final[test + "-jmx-avg-" + str(num)] = self.driver[num].average_jmx_value
|
||||
self.final[test + "-jmx-max-" + str(num)] = self.driver[num].maximum_jmx_value
|
||||
Reference in New Issue
Block a user