Add km module kafka

2026-01-03 11:28:12 +08:00 · 2023-02-14 16:27:47 +08:00
parent 229140f067
commit 0b8160a714
4039 changed files with 718112 additions and 46204 deletions
--- a/tests/kafkatest/services/init.py
+++ b/tests/kafkatest/services/init.py
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/tests/kafkatest/services/connect.py
+++ b/tests/kafkatest/services/connect.py
@@ -0,0 +1,519 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os.path
+import random
+import signal
+import time
+
+import requests
+from ducktape.errors import DucktapeError
+from ducktape.services.service import Service
+from ducktape.utils.util import wait_until
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+
+
+class ConnectServiceBase(KafkaPathResolverMixin, Service):
+    """Base class for Kafka Connect services providing some common settings and functionality"""
+
+    PERSISTENT_ROOT = "/mnt/connect"
+    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "connect.properties")
+    # The log file contains normal log4j logs written using a file appender. stdout and stderr are handled separately
+    # so they can be used for other output, e.g. verifiable source & sink.
+    LOG_FILE = os.path.join(PERSISTENT_ROOT, "connect.log")
+    STDOUT_FILE = os.path.join(PERSISTENT_ROOT, "connect.stdout")
+    STDERR_FILE = os.path.join(PERSISTENT_ROOT, "connect.stderr")
+    LOG4J_CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "connect-log4j.properties")
+    PID_FILE = os.path.join(PERSISTENT_ROOT, "connect.pid")
+    EXTERNAL_CONFIGS_FILE = os.path.join(PERSISTENT_ROOT, "connect-external-configs.properties")
+    CONNECT_REST_PORT = 8083
+    HEAP_DUMP_FILE = os.path.join(PERSISTENT_ROOT, "connect_heap_dump.bin")
+
+    # Currently the Connect worker supports waiting on four modes:
+    STARTUP_MODE_INSTANT = 'INSTANT'
+    """STARTUP_MODE_INSTANT: Start Connect worker and return immediately"""
+    STARTUP_MODE_LOAD = 'LOAD'
+    """STARTUP_MODE_LOAD: Start Connect worker and return after discovering and loading plugins"""
+    STARTUP_MODE_LISTEN = 'LISTEN'
+    """STARTUP_MODE_LISTEN: Start Connect worker and return after opening the REST port."""
+    STARTUP_MODE_JOIN = 'JOIN'
+    """STARTUP_MODE_JOIN: Start Connect worker and return after joining the group."""
+
+    logs = {
+        "connect_log": {
+            "path": LOG_FILE,
+            "collect_default": True},
+        "connect_stdout": {
+            "path": STDOUT_FILE,
+            "collect_default": False},
+        "connect_stderr": {
+            "path": STDERR_FILE,
+            "collect_default": True},
+        "connect_heap_dump_file": {
+            "path": HEAP_DUMP_FILE,
+            "collect_default": True}
+    }
+
+    def __init__(self, context, num_nodes, kafka, files, startup_timeout_sec = 60):
+        super(ConnectServiceBase, self).__init__(context, num_nodes)
+        self.kafka = kafka
+        self.security_config = kafka.security_config.client_config()
+        self.files = files
+        self.startup_mode = self.STARTUP_MODE_LISTEN
+        self.startup_timeout_sec = startup_timeout_sec
+        self.environment = {}
+        self.external_config_template_func = None
+
+    def pids(self, node):
+        """Return process ids for Kafka Connect processes."""
+        try:
+            return [pid for pid in node.account.ssh_capture("cat " + self.PID_FILE, callback=int)]
+        except:
+            return []
+
+    def set_configs(self, config_template_func, connector_config_templates=None):
+        """
+        Set configurations for the worker and the connector to run on
+        it. These are not provided in the constructor because the worker
+        config generally needs access to ZK/Kafka services to
+        create the configuration.
+        """
+        self.config_template_func = config_template_func
+        self.connector_config_templates = connector_config_templates
+
+    def set_external_configs(self, external_config_template_func):
+        """
+        Set the properties that will be written in the external file properties
+        as used by the org.apache.kafka.common.config.provider.FileConfigProvider.
+        When this is used, the worker configuration must also enable the FileConfigProvider.
+        This is not provided in the constructor in case the worker
+        config generally needs access to ZK/Kafka services to
+        create the configuration.
+        """
+        self.external_config_template_func = external_config_template_func
+
+    def listening(self, node):
+        try:
+            self.list_connectors(node)
+            self.logger.debug("Connect worker started serving REST at: '%s:%s')", node.account.hostname,
+                              self.CONNECT_REST_PORT)
+            return True
+        except requests.exceptions.ConnectionError:
+            self.logger.debug("REST resources are not loaded yet")
+            return False
+
+    def start(self, mode=None):
+        if mode:
+            self.startup_mode = mode
+        super(ConnectServiceBase, self).start()
+
+    def start_and_return_immediately(self, node, worker_type, remote_connector_configs):
+        cmd = self.start_cmd(node, remote_connector_configs)
+        self.logger.debug("Connect %s command: %s", worker_type, cmd)
+        node.account.ssh(cmd)
+
+    def start_and_wait_to_load_plugins(self, node, worker_type, remote_connector_configs):
+        with node.account.monitor_log(self.LOG_FILE) as monitor:
+            self.start_and_return_immediately(node, worker_type, remote_connector_configs)
+            monitor.wait_until('Kafka version', timeout_sec=self.startup_timeout_sec,
+                               err_msg="Never saw message indicating Kafka Connect finished startup on node: " +
+                                       "%s in condition mode: %s" % (str(node.account), self.startup_mode))
+
+    def start_and_wait_to_start_listening(self, node, worker_type, remote_connector_configs):
+        self.start_and_return_immediately(node, worker_type, remote_connector_configs)
+        wait_until(lambda: self.listening(node), timeout_sec=self.startup_timeout_sec,
+                   err_msg="Kafka Connect failed to start on node: %s in condition mode: %s" %
+                   (str(node.account), self.startup_mode))
+
+    def start_and_wait_to_join_group(self, node, worker_type, remote_connector_configs):
+        if worker_type != 'distributed':
+            raise RuntimeError("Cannot wait for joined group message for %s" % worker_type)
+        with node.account.monitor_log(self.LOG_FILE) as monitor:
+            self.start_and_return_immediately(node, worker_type, remote_connector_configs)
+            monitor.wait_until('Joined group', timeout_sec=self.startup_timeout_sec,
+                               err_msg="Never saw message indicating Kafka Connect joined group on node: " +
+                                       "%s in condition mode: %s" % (str(node.account), self.startup_mode))
+
+    def stop_node(self, node, clean_shutdown=True):
+        self.logger.info((clean_shutdown and "Cleanly" or "Forcibly") + " stopping Kafka Connect on " + str(node.account))
+        pids = self.pids(node)
+        sig = signal.SIGTERM if clean_shutdown else signal.SIGKILL
+
+        for pid in pids:
+            node.account.signal(pid, sig, allow_fail=True)
+        if clean_shutdown:
+            for pid in pids:
+                wait_until(lambda: not node.account.alive(pid), timeout_sec=self.startup_timeout_sec, err_msg="Kafka Connect process on " + str(
+                    node.account) + " took too long to exit")
+
+        node.account.ssh("rm -f " + self.PID_FILE, allow_fail=False)
+
+    def restart(self, clean_shutdown=True):
+        # We don't want to do any clean up here, just restart the process.
+        for node in self.nodes:
+            self.logger.info("Restarting Kafka Connect on " + str(node.account))
+            self.restart_node(node, clean_shutdown)
+
+    def restart_node(self, node, clean_shutdown=True):
+        self.stop_node(node, clean_shutdown)
+        self.start_node(node)
+
+    def clean_node(self, node):
+        node.account.kill_process("connect", clean_shutdown=False, allow_fail=True)
+        self.security_config.clean_node(node)
+        other_files = " ".join(self.config_filenames() + self.files)
+        node.account.ssh("rm -rf -- %s %s" % (ConnectServiceBase.PERSISTENT_ROOT, other_files), allow_fail=False)
+
+    def config_filenames(self):
+        return [os.path.join(self.PERSISTENT_ROOT, "connect-connector-" + str(idx) + ".properties") for idx, template in enumerate(self.connector_config_templates or [])]
+
+    def list_connectors(self, node=None, **kwargs):
+        return self._rest_with_retry('/connectors', node=node, **kwargs)
+
+    def create_connector(self, config, node=None, **kwargs):
+        create_request = {
+            'name': config['name'],
+            'config': config
+        }
+        return self._rest_with_retry('/connectors', create_request, node=node, method="POST", **kwargs)
+
+    def get_connector(self, name, node=None, **kwargs):
+        return self._rest_with_retry('/connectors/' + name, node=node, **kwargs)
+
+    def get_connector_config(self, name, node=None, **kwargs):
+        return self._rest_with_retry('/connectors/' + name + '/config', node=node, **kwargs)
+
+    def set_connector_config(self, name, config, node=None, **kwargs):
+        # Unlike many other calls, a 409 when setting a connector config is expected if the connector already exists.
+        # However, we also might see 409s for other reasons (e.g. rebalancing). So we still perform retries at the cost
+        # of tests possibly taking longer to ultimately fail. Tests that care about this can explicitly override the
+        # number of retries.
+        return self._rest_with_retry('/connectors/' + name + '/config', config, node=node, method="PUT", **kwargs)
+
+    def get_connector_tasks(self, name, node=None, **kwargs):
+        return self._rest_with_retry('/connectors/' + name + '/tasks', node=node, **kwargs)
+
+    def delete_connector(self, name, node=None, **kwargs):
+        return self._rest_with_retry('/connectors/' + name, node=node, method="DELETE", **kwargs)
+
+    def get_connector_status(self, name, node=None):
+        return self._rest('/connectors/' + name + '/status', node=node)
+
+    def restart_connector(self, name, node=None, **kwargs):
+        return self._rest_with_retry('/connectors/' + name + '/restart', node=node, method="POST", **kwargs)
+
+    def restart_task(self, connector_name, task_id, node=None):
+        return self._rest('/connectors/' + connector_name + '/tasks/' + str(task_id) + '/restart', node=node, method="POST")
+
+    def pause_connector(self, name, node=None):
+        return self._rest('/connectors/' + name + '/pause', node=node, method="PUT")
+
+    def resume_connector(self, name, node=None):
+        return self._rest('/connectors/' + name + '/resume', node=node, method="PUT")
+
+    def list_connector_plugins(self, node=None):
+        return self._rest('/connector-plugins/', node=node)
+
+    def validate_config(self, connector_type, validate_request, node=None):
+        return self._rest('/connector-plugins/' + connector_type + '/config/validate', validate_request, node=node, method="PUT")
+
+    def _rest(self, path, body=None, node=None, method="GET"):
+        if node is None:
+            node = random.choice(self.nodes)
+
+        meth = getattr(requests, method.lower())
+        url = self._base_url(node) + path
+        self.logger.debug("Kafka Connect REST request: %s %s %s %s", node.account.hostname, url, method, body)
+        resp = meth(url, json=body)
+        self.logger.debug("%s %s response: %d", url, method, resp.status_code)
+        if resp.status_code > 400:
+            self.logger.debug("Connect REST API error for %s: %d %s", resp.url, resp.status_code, resp.text)
+            raise ConnectRestError(resp.status_code, resp.text, resp.url)
+        if resp.status_code == 204 or resp.status_code == 202:
+            return None
+        else:
+            return resp.json()
+
+    def _rest_with_retry(self, path, body=None, node=None, method="GET", retries=40, retry_backoff=.25):
+        """
+        Invokes a REST API with retries for errors that may occur during normal operation (notably 409 CONFLICT
+        responses that can occur due to rebalancing or 404 when the connect resources are not initialized yet).
+        """
+        exception_to_throw = None
+        for i in range(0, retries + 1):
+            try:
+                return self._rest(path, body, node, method)
+            except ConnectRestError as e:
+                exception_to_throw = e
+                if e.status != 409 and e.status != 404:
+                    break
+                time.sleep(retry_backoff)
+        raise exception_to_throw
+
+    def _base_url(self, node):
+        return 'http://' + node.account.externally_routable_ip + ':' + str(self.CONNECT_REST_PORT)
+
+    def append_to_environment_variable(self, envvar, value):
+        env_opts = self.environment[envvar]
+        if env_opts is None:
+            env_opts = "\"%s\"" % value
+        else:
+            env_opts = "\"%s %s\"" % (env_opts.strip('\"'), value)
+        self.environment[envvar] = env_opts
+
+
+class ConnectStandaloneService(ConnectServiceBase):
+    """Runs Kafka Connect in standalone mode."""
+
+    def __init__(self, context, kafka, files, startup_timeout_sec = 60):
+        super(ConnectStandaloneService, self).__init__(context, 1, kafka, files, startup_timeout_sec)
+
+    # For convenience since this service only makes sense with a single node
+    @property
+    def node(self):
+        return self.nodes[0]
+
+    def start_cmd(self, node, connector_configs):
+        cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % self.LOG4J_CONFIG_FILE
+        heap_kafka_opts = "-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=%s" % \
+                          self.logs["connect_heap_dump_file"]["path"]
+        other_kafka_opts = self.security_config.kafka_opts.strip('\"')
+        cmd += "export KAFKA_OPTS=\"%s %s\"; " % (heap_kafka_opts, other_kafka_opts)
+        for envvar in self.environment:
+            cmd += "export %s=%s; " % (envvar, str(self.environment[envvar]))
+        cmd += "%s %s " % (self.path.script("connect-standalone.sh", node), self.CONFIG_FILE)
+        cmd += " ".join(connector_configs)
+        cmd += " & echo $! >&3 ) 1>> %s 2>> %s 3> %s" % (self.STDOUT_FILE, self.STDERR_FILE, self.PID_FILE)
+        return cmd
+
+    def start_node(self, node):
+        node.account.ssh("mkdir -p %s" % self.PERSISTENT_ROOT, allow_fail=False)
+
+        self.security_config.setup_node(node)
+        if self.external_config_template_func:
+            node.account.create_file(self.EXTERNAL_CONFIGS_FILE, self.external_config_template_func(node))
+        node.account.create_file(self.CONFIG_FILE, self.config_template_func(node))
+        node.account.create_file(self.LOG4J_CONFIG_FILE, self.render('connect_log4j.properties', log_file=self.LOG_FILE))
+        remote_connector_configs = []
+        for idx, template in enumerate(self.connector_config_templates):
+            target_file = os.path.join(self.PERSISTENT_ROOT, "connect-connector-" + str(idx) + ".properties")
+            node.account.create_file(target_file, template)
+            remote_connector_configs.append(target_file)
+
+        self.logger.info("Starting Kafka Connect standalone process on " + str(node.account))
+        if self.startup_mode == self.STARTUP_MODE_LOAD:
+            self.start_and_wait_to_load_plugins(node, 'standalone', remote_connector_configs)
+        elif self.startup_mode == self.STARTUP_MODE_INSTANT:
+            self.start_and_return_immediately(node, 'standalone', remote_connector_configs)
+        elif self.startup_mode == self.STARTUP_MODE_JOIN:
+            self.start_and_wait_to_join_group(node, 'standalone', remote_connector_configs)
+        else:
+            # The default mode is to wait until the complete startup of the worker
+            self.start_and_wait_to_start_listening(node, 'standalone', remote_connector_configs)
+
+        if len(self.pids(node)) == 0:
+            raise RuntimeError("No process ids recorded")
+
+
+class ConnectDistributedService(ConnectServiceBase):
+    """Runs Kafka Connect in distributed mode."""
+
+    def __init__(self, context, num_nodes, kafka, files, offsets_topic="connect-offsets",
+                 configs_topic="connect-configs", status_topic="connect-status", startup_timeout_sec = 60):
+        super(ConnectDistributedService, self).__init__(context, num_nodes, kafka, files, startup_timeout_sec)
+        self.startup_mode = self.STARTUP_MODE_JOIN
+        self.offsets_topic = offsets_topic
+        self.configs_topic = configs_topic
+        self.status_topic = status_topic
+
+    # connector_configs argument is intentionally ignored in distributed service.
+    def start_cmd(self, node, connector_configs):
+        cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % self.LOG4J_CONFIG_FILE
+        heap_kafka_opts = "-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=%s" % \
+                          self.logs["connect_heap_dump_file"]["path"]
+        other_kafka_opts = self.security_config.kafka_opts.strip('\"')
+        cmd += "export KAFKA_OPTS=\"%s %s\"; " % (heap_kafka_opts, other_kafka_opts)
+        for envvar in self.environment:
+            cmd += "export %s=%s; " % (envvar, str(self.environment[envvar]))
+        cmd += "%s %s " % (self.path.script("connect-distributed.sh", node), self.CONFIG_FILE)
+        cmd += " & echo $! >&3 ) 1>> %s 2>> %s 3> %s" % (self.STDOUT_FILE, self.STDERR_FILE, self.PID_FILE)
+        return cmd
+
+    def start_node(self, node):
+        node.account.ssh("mkdir -p %s" % self.PERSISTENT_ROOT, allow_fail=False)
+
+        self.security_config.setup_node(node)
+        if self.external_config_template_func:
+            node.account.create_file(self.EXTERNAL_CONFIGS_FILE, self.external_config_template_func(node))
+        node.account.create_file(self.CONFIG_FILE, self.config_template_func(node))
+        node.account.create_file(self.LOG4J_CONFIG_FILE, self.render('connect_log4j.properties', log_file=self.LOG_FILE))
+        if self.connector_config_templates:
+            raise DucktapeError("Config files are not valid in distributed mode, submit connectors via the REST API")
+
+        self.logger.info("Starting Kafka Connect distributed process on " + str(node.account))
+        if self.startup_mode == self.STARTUP_MODE_LOAD:
+            self.start_and_wait_to_load_plugins(node, 'distributed', '')
+        elif self.startup_mode == self.STARTUP_MODE_INSTANT:
+            self.start_and_return_immediately(node, 'distributed', '')
+        elif self.startup_mode == self.STARTUP_MODE_LISTEN:
+            self.start_and_wait_to_start_listening(node, 'distributed', '')
+        else:
+            # The default mode is to wait until the complete startup of the worker
+            self.start_and_wait_to_join_group(node, 'distributed', '')
+
+        if len(self.pids(node)) == 0:
+            raise RuntimeError("No process ids recorded")
+
+
+class ErrorTolerance(object):
+    ALL = "all"
+    NONE = "none"
+
+
+class ConnectRestError(RuntimeError):
+    def __init__(self, status, msg, url):
+        self.status = status
+        self.message = msg
+        self.url = url
+
+    def __unicode__(self):
+        return "Kafka Connect REST call failed: returned " + self.status + " for " + self.url + ". Response: " + self.message
+
+
+class VerifiableConnector(object):
+    def messages(self):
+        """
+        Collect and parse the logs from Kafka Connect nodes. Return a list containing all parsed JSON messages generated by
+        this source.
+        """
+        self.logger.info("Collecting messages from log of %s %s", type(self).__name__, self.name)
+        records = []
+        for node in self.cc.nodes:
+            for line in node.account.ssh_capture('cat ' + self.cc.STDOUT_FILE):
+                try:
+                    data = json.loads(line)
+                except ValueError:
+                    self.logger.debug("Ignoring unparseable line: %s", line)
+                    continue
+                # Filter to only ones matching our name to support multiple verifiable producers
+                if data['name'] != self.name:
+                    continue
+                data['node'] = node
+                records.append(data)
+        return records
+
+    def stop(self):
+        self.logger.info("Destroying connector %s %s", type(self).__name__, self.name)
+        self.cc.delete_connector(self.name)
+
+
+class VerifiableSource(VerifiableConnector):
+    """
+    Helper class for running a verifiable source connector on a Kafka Connect cluster and analyzing the output.
+    """
+
+    def __init__(self, cc, name="verifiable-source", tasks=1, topic="verifiable", throughput=1000):
+        self.cc = cc
+        self.logger = self.cc.logger
+        self.name = name
+        self.tasks = tasks
+        self.topic = topic
+        self.throughput = throughput
+
+    def committed_messages(self):
+        return filter(lambda m: 'committed' in m and m['committed'], self.messages())
+
+    def sent_messages(self):
+        return filter(lambda m: 'committed' not in m or not m['committed'], self.messages())
+
+    def start(self):
+        self.logger.info("Creating connector VerifiableSourceConnector %s", self.name)
+        self.cc.create_connector({
+            'name': self.name,
+            'connector.class': 'org.apache.kafka.connect.tools.VerifiableSourceConnector',
+            'tasks.max': self.tasks,
+            'topic': self.topic,
+            'throughput': self.throughput
+        })
+
+
+class VerifiableSink(VerifiableConnector):
+    """
+    Helper class for running a verifiable sink connector on a Kafka Connect cluster and analyzing the output.
+    """
+
+    def __init__(self, cc, name="verifiable-sink", tasks=1, topics=["verifiable"]):
+        self.cc = cc
+        self.logger = self.cc.logger
+        self.name = name
+        self.tasks = tasks
+        self.topics = topics
+
+    def flushed_messages(self):
+        return filter(lambda m: 'flushed' in m and m['flushed'], self.messages())
+
+    def received_messages(self):
+        return filter(lambda m: 'flushed' not in m or not m['flushed'], self.messages())
+
+    def start(self):
+        self.logger.info("Creating connector VerifiableSinkConnector %s", self.name)
+        self.cc.create_connector({
+            'name': self.name,
+            'connector.class': 'org.apache.kafka.connect.tools.VerifiableSinkConnector',
+            'tasks.max': self.tasks,
+            'topics': ",".join(self.topics)
+        })
+
+class MockSink(object):
+
+    def __init__(self, cc, topics, mode=None, delay_sec=10, name="mock-sink"):
+        self.cc = cc
+        self.logger = self.cc.logger
+        self.name = name
+        self.mode = mode
+        self.delay_sec = delay_sec
+        self.topics = topics
+
+    def start(self):
+        self.logger.info("Creating connector MockSinkConnector %s", self.name)
+        self.cc.create_connector({
+            'name': self.name,
+            'connector.class': 'org.apache.kafka.connect.tools.MockSinkConnector',
+            'tasks.max': 1,
+            'topics': ",".join(self.topics),
+            'mock_mode': self.mode,
+            'delay_ms': self.delay_sec * 1000
+        })
+
+class MockSource(object):
+
+    def __init__(self, cc, mode=None, delay_sec=10, name="mock-source"):
+        self.cc = cc
+        self.logger = self.cc.logger
+        self.name = name
+        self.mode = mode
+        self.delay_sec = delay_sec
+
+    def start(self):
+        self.logger.info("Creating connector MockSourceConnector %s", self.name)
+        self.cc.create_connector({
+            'name': self.name,
+            'connector.class': 'org.apache.kafka.connect.tools.MockSourceConnector',
+            'tasks.max': 1,
+            'mock_mode': self.mode,
+            'delay_ms': self.delay_sec * 1000
+        })
--- a/tests/kafkatest/services/console_consumer.py
+++ b/tests/kafkatest/services/console_consumer.py
@@ -0,0 +1,315 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import itertools
+import os
+
+from ducktape.cluster.remoteaccount import RemoteCommandError
+from ducktape.services.background_thread import BackgroundThreadService
+from ducktape.utils.util import wait_until
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from kafkatest.services.monitor.jmx import JmxMixin
+from kafkatest.version import DEV_BRANCH, LATEST_0_8_2, LATEST_0_9, LATEST_0_10_0, V_0_9_0_0, V_0_10_0_0, V_0_11_0_0, V_2_0_0
+
+"""
+The console consumer is a tool that reads data from Kafka and outputs it to standard output.
+"""
+
+
+class ConsoleConsumer(KafkaPathResolverMixin, JmxMixin, BackgroundThreadService):
+    # Root directory for persistent output
+    PERSISTENT_ROOT = "/mnt/console_consumer"
+    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stdout")
+    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "console_consumer.stderr")
+    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
+    LOG_FILE = os.path.join(LOG_DIR, "console_consumer.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "console_consumer.properties")
+    JMX_TOOL_LOG = os.path.join(PERSISTENT_ROOT, "jmx_tool.log")
+    JMX_TOOL_ERROR_LOG = os.path.join(PERSISTENT_ROOT, "jmx_tool.err.log")
+
+    logs = {
+        "consumer_stdout": {
+            "path": STDOUT_CAPTURE,
+            "collect_default": False},
+        "consumer_stderr": {
+            "path": STDERR_CAPTURE,
+            "collect_default": False},
+        "consumer_log": {
+            "path": LOG_FILE,
+            "collect_default": True},
+        "jmx_log": {
+            "path" : JMX_TOOL_LOG,
+            "collect_default": False},
+        "jmx_err_log": {
+            "path": JMX_TOOL_ERROR_LOG,
+            "collect_default": False}
+    }
+
+    def __init__(self, context, num_nodes, kafka, topic, group_id="test-consumer-group", new_consumer=True,
+                 message_validator=None, from_beginning=True, consumer_timeout_ms=None, version=DEV_BRANCH,
+                 client_id="console-consumer", print_key=False, jmx_object_names=None, jmx_attributes=None,
+                 enable_systest_events=False, stop_timeout_sec=35, print_timestamp=False, print_partition=False,
+                 isolation_level="read_uncommitted", jaas_override_variables=None,
+                 kafka_opts_override="", client_prop_file_override="", consumer_properties={}):
+        """
+        Args:
+            context:                    standard context
+            num_nodes:                  number of nodes to use (this should be 1)
+            kafka:                      kafka service
+            topic:                      consume from this topic
+            new_consumer:               use new Kafka consumer if True
+            message_validator:          function which returns message or None
+            from_beginning:             consume from beginning if True, else from the end
+            consumer_timeout_ms:        corresponds to consumer.timeout.ms. consumer process ends if time between
+                                        successively consumed messages exceeds this timeout. Setting this and
+                                        waiting for the consumer to stop is a pretty good way to consume all messages
+                                        in a topic.
+            print_timestamp             if True, print each message's timestamp as well
+            print_key                   if True, print each message's key as well
+            print_partition             if True, print each message's partition as well
+            enable_systest_events       if True, console consumer will print additional lifecycle-related information
+                                        only available in 0.10.0 and later.
+            stop_timeout_sec            After stopping a node, wait up to stop_timeout_sec for the node to stop,
+                                        and the corresponding background thread to finish successfully.
+            isolation_level             How to handle transactional messages.
+            jaas_override_variables     A dict of variables to be used in the jaas.conf template file
+            kafka_opts_override         Override parameters of the KAFKA_OPTS environment variable
+            client_prop_file_override   Override client.properties file used by the consumer
+            consumer_properties         A dict of values to pass in as --consumer-property key=value
+        """
+        JmxMixin.__init__(self, num_nodes=num_nodes, jmx_object_names=jmx_object_names, jmx_attributes=(jmx_attributes or []),
+                          root=ConsoleConsumer.PERSISTENT_ROOT)
+        BackgroundThreadService.__init__(self, context, num_nodes)
+        self.kafka = kafka
+        self.new_consumer = new_consumer
+        self.group_id = group_id
+        self.args = {
+            'topic': topic,
+        }
+
+        self.consumer_timeout_ms = consumer_timeout_ms
+        for node in self.nodes:
+            node.version = version
+
+        self.from_beginning = from_beginning
+        self.message_validator = message_validator
+        self.messages_consumed = {idx: [] for idx in range(1, num_nodes + 1)}
+        self.clean_shutdown_nodes = set()
+        self.client_id = client_id
+        self.print_key = print_key
+        self.print_partition = print_partition
+        self.log_level = "TRACE"
+        self.stop_timeout_sec = stop_timeout_sec
+
+        self.isolation_level = isolation_level
+        self.enable_systest_events = enable_systest_events
+        if self.enable_systest_events:
+            # Only available in 0.10.0 and up
+            assert version >= V_0_10_0_0
+
+        self.print_timestamp = print_timestamp
+        self.jaas_override_variables = jaas_override_variables or {}
+        self.kafka_opts_override = kafka_opts_override
+        self.client_prop_file_override = client_prop_file_override
+        self.consumer_properties = consumer_properties
+
+
+    def prop_file(self, node):
+        """Return a string which can be used to create a configuration file appropriate for the given node."""
+        # Process client configuration
+        prop_file = self.render('console_consumer.properties')
+        if hasattr(node, "version") and node.version <= LATEST_0_8_2:
+            # in 0.8.2.X and earlier, console consumer does not have --timeout-ms option
+            # instead, we have to pass it through the config file
+            prop_file += "\nconsumer.timeout.ms=%s\n" % str(self.consumer_timeout_ms)
+
+        # Add security properties to the config. If security protocol is not specified,
+        # use the default in the template properties.
+        self.security_config = self.kafka.security_config.client_config(prop_file, node, self.jaas_override_variables)
+        self.security_config.setup_node(node)
+
+        prop_file += str(self.security_config)
+        return prop_file
+
+
+    def start_cmd(self, node):
+        """Return the start command appropriate for the given node."""
+        args = self.args.copy()
+        args['zk_connect'] = self.kafka.zk_connect_setting()
+        args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE
+        args['stderr'] = ConsoleConsumer.STDERR_CAPTURE
+        args['log_dir'] = ConsoleConsumer.LOG_DIR
+        args['log4j_config'] = ConsoleConsumer.LOG4J_CONFIG
+        args['config_file'] = ConsoleConsumer.CONFIG_FILE
+        args['stdout'] = ConsoleConsumer.STDOUT_CAPTURE
+        args['jmx_port'] = self.jmx_port
+        args['console_consumer'] = self.path.script("kafka-console-consumer.sh", node)
+        args['broker_list'] = self.kafka.bootstrap_servers(self.security_config.security_protocol)
+
+        if self.kafka_opts_override:
+            args['kafka_opts'] = "\"%s\"" % self.kafka_opts_override
+        else:
+            args['kafka_opts'] = self.security_config.kafka_opts
+
+        cmd = "export JMX_PORT=%(jmx_port)s; " \
+              "export LOG_DIR=%(log_dir)s; " \
+              "export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j_config)s\"; " \
+              "export KAFKA_OPTS=%(kafka_opts)s; " \
+              "%(console_consumer)s " \
+              "--topic %(topic)s " \
+              "--consumer.config %(config_file)s " % args
+
+        if self.new_consumer:
+            assert node.version >= V_0_9_0_0, \
+                "new_consumer is only supported if version >= 0.9.0.0, version %s" % str(node.version)
+            if node.version <= LATEST_0_10_0:
+                cmd += " --new-consumer"
+            cmd += " --bootstrap-server %(broker_list)s" % args
+            if node.version >= V_0_11_0_0:
+                cmd += " --isolation-level %s" % self.isolation_level
+        else:
+            assert node.version < V_2_0_0, \
+                "new_consumer==false is only supported if version < 2.0.0, version %s" % str(node.version)
+            cmd += " --zookeeper %(zk_connect)s" % args
+
+        if self.from_beginning:
+            cmd += " --from-beginning"
+
+        if self.consumer_timeout_ms is not None:
+            # version 0.8.X and below do not support --timeout-ms option
+            # This will be added in the properties file instead
+            if node.version > LATEST_0_8_2:
+                cmd += " --timeout-ms %s" % self.consumer_timeout_ms
+
+        if self.print_timestamp:
+            cmd += " --property print.timestamp=true"
+
+        if self.print_key:
+            cmd += " --property print.key=true"
+
+        if self.print_partition:
+            cmd += " --property print.partition=true"
+
+        # LoggingMessageFormatter was introduced after 0.9
+        if node.version > LATEST_0_9:
+            cmd += " --formatter kafka.tools.LoggingMessageFormatter"
+
+        if self.enable_systest_events:
+            # enable systest events is only available in 0.10.0 and later
+            # check the assertion here as well, in case node.version has been modified
+            assert node.version >= V_0_10_0_0
+            cmd += " --enable-systest-events"
+
+        if self.consumer_properties is not None:
+            for k, v in self.consumer_properties.items():
+                cmd += " --consumer-property %s=%s" % (k, v)
+
+        cmd += " 2>> %(stderr)s | tee -a %(stdout)s &" % args
+        return cmd
+
+    def pids(self, node):
+        return node.account.java_pids(self.java_class_name())
+
+    def alive(self, node):
+        return len(self.pids(node)) > 0
+
+    def _worker(self, idx, node):
+        node.account.ssh("mkdir -p %s" % ConsoleConsumer.PERSISTENT_ROOT, allow_fail=False)
+
+        # Create and upload config file
+        self.logger.info("console_consumer.properties:")
+
+        self.security_config = self.kafka.security_config.client_config(node=node,
+                                                                        jaas_override_variables=self.jaas_override_variables)
+        self.security_config.setup_node(node)
+
+        if self.client_prop_file_override:
+            prop_file = self.client_prop_file_override
+        else:
+            prop_file = self.prop_file(node)
+
+        self.logger.info(prop_file)
+        node.account.create_file(ConsoleConsumer.CONFIG_FILE, prop_file)
+
+        # Create and upload log properties
+        log_config = self.render('tools_log4j.properties', log_file=ConsoleConsumer.LOG_FILE)
+        node.account.create_file(ConsoleConsumer.LOG4J_CONFIG, log_config)
+
+        # Run and capture output
+        cmd = self.start_cmd(node)
+        self.logger.debug("Console consumer %d command: %s", idx, cmd)
+
+        consumer_output = node.account.ssh_capture(cmd, allow_fail=False)
+
+        with self.lock:
+            self.logger.debug("collecting following jmx objects: %s", self.jmx_object_names)
+            self.start_jmx_tool(idx, node)
+
+        for line in consumer_output:
+            msg = line.strip()
+            if msg == "shutdown_complete":
+                # Note that we can only rely on shutdown_complete message if running 0.10.0 or greater
+                if node in self.clean_shutdown_nodes:
+                    raise Exception("Unexpected shutdown event from consumer, already shutdown. Consumer index: %d" % idx)
+                self.clean_shutdown_nodes.add(node)
+            else:
+                if self.message_validator is not None:
+                    msg = self.message_validator(msg)
+                if msg is not None:
+                    self.messages_consumed[idx].append(msg)
+
+        with self.lock:
+            self.read_jmx_output(idx, node)
+
+    def start_node(self, node):
+        BackgroundThreadService.start_node(self, node)
+
+    def stop_node(self, node):
+        self.logger.info("%s Stopping node %s" % (self.__class__.__name__, str(node.account)))
+        node.account.kill_java_processes(self.java_class_name(),
+                                         clean_shutdown=True, allow_fail=True)
+
+        stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec)
+        assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \
+                        (str(node.account), str(self.stop_timeout_sec))
+
+    def clean_node(self, node):
+        if self.alive(node):
+            self.logger.warn("%s %s was still alive at cleanup time. Killing forcefully..." %
+                             (self.__class__.__name__, node.account))
+        JmxMixin.clean_node(self, node)
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=False, allow_fail=True)
+        node.account.ssh("rm -rf %s" % ConsoleConsumer.PERSISTENT_ROOT, allow_fail=False)
+        self.security_config.clean_node(node)
+
+    def java_class_name(self):
+        return "ConsoleConsumer"
+
+    def has_log_message(self, node, message):
+        try:
+            node.account.ssh("grep '%s' %s" % (message, ConsoleConsumer.LOG_FILE))
+        except RemoteCommandError:
+            return False
+        return True
+
+    def wait_for_offset_reset(self, node, topic, num_partitions):
+        for partition in range(num_partitions):
+            message = "Resetting offset for partition %s-%d" % (topic, partition)
+            wait_until(lambda: self.has_log_message(node, message),
+                       timeout_sec=60,
+                       err_msg="Offset not reset for partition %s-%d" % (topic, partition))
+
--- a/tests/kafkatest/services/consumer_property.py
+++ b/tests/kafkatest/services/consumer_property.py
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Define Consumer configuration property names here.
+"""
+
+GROUP_INSTANCE_ID = "group.instance.id"
+SESSION_TIMEOUT_MS = "session.timeout.ms"
--- a/tests/kafkatest/services/delegation_tokens.py
+++ b/tests/kafkatest/services/delegation_tokens.py
@@ -0,0 +1,102 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+
+"""
+Delegation tokens is a tool to manage the lifecycle of delegation tokens.
+All commands are executed on a secured Kafka node reusing its generated jaas.conf and krb5.conf.
+"""
+
+class DelegationTokens(KafkaPathResolverMixin):
+    def __init__(self, kafka, context):
+        self.client_properties_content = """
+security.protocol=SASL_PLAINTEXT
+sasl.kerberos.service.name=kafka
+"""
+        self.context = context
+        self.command_path = self.path.script("kafka-delegation-tokens.sh")
+        self.kafka_opts = "KAFKA_OPTS=\"-Djava.security.auth.login.config=/mnt/security/jaas.conf " \
+                          "-Djava.security.krb5.conf=/mnt/security/krb5.conf\" "
+        self.kafka = kafka
+        self.bootstrap_server = " --bootstrap-server " + self.kafka.bootstrap_servers('SASL_PLAINTEXT')
+        self.base_cmd = self.kafka_opts + self.command_path + self.bootstrap_server
+        self.client_prop_path = os.path.join(self.kafka.PERSISTENT_ROOT, "client.properties")
+        self.jaas_deleg_conf_path = os.path.join(self.kafka.PERSISTENT_ROOT, "jaas_deleg.conf")
+        self.token_hmac_path = os.path.join(self.kafka.PERSISTENT_ROOT, "deleg_token_hmac.out")
+        self.delegation_token_out = os.path.join(self.kafka.PERSISTENT_ROOT, "delegation_token.out")
+        self.expire_delegation_token_out = os.path.join(self.kafka.PERSISTENT_ROOT, "expire_delegation_token.out")
+        self.renew_delegation_token_out = os.path.join(self.kafka.PERSISTENT_ROOT, "renew_delegation_token.out")
+
+        self.node = self.kafka.nodes[0]
+
+    def generate_delegation_token(self, maxlifetimeperiod=-1):
+        self.node.account.create_file(self.client_prop_path, self.client_properties_content)
+
+        cmd = self.base_cmd + "  --create" \
+                              "  --max-life-time-period %s" \
+                              "  --command-config %s > %s" % (maxlifetimeperiod, self.client_prop_path, self.delegation_token_out)
+        self.node.account.ssh(cmd, allow_fail=False)
+
+    def expire_delegation_token(self, hmac):
+        cmd = self.base_cmd + "  --expire" \
+                              "  --expiry-time-period -1" \
+                              "  --hmac %s" \
+                              "  --command-config %s > %s" % (hmac, self.client_prop_path, self.expire_delegation_token_out)
+        self.node.account.ssh(cmd, allow_fail=False)
+
+    def renew_delegation_token(self, hmac, renew_time_period=-1):
+        cmd = self.base_cmd + "  --renew" \
+                              "  --renew-time-period %s" \
+                              "  --hmac %s" \
+                              "  --command-config %s > %s" \
+              % (renew_time_period, hmac, self.client_prop_path, self.renew_delegation_token_out)
+        return self.node.account.ssh_capture(cmd, allow_fail=False)
+
+    def create_jaas_conf_with_delegation_token(self):
+        dt = self.parse_delegation_token_out()
+        jaas_deleg_content = """
+KafkaClient {
+  org.apache.kafka.common.security.scram.ScramLoginModule required
+  username="%s"
+  password="%s"
+  tokenauth=true;
+};
+""" % (dt["tokenid"], dt["hmac"])
+        self.node.account.create_file(self.jaas_deleg_conf_path, jaas_deleg_content)
+
+        return jaas_deleg_content
+
+    def token_hmac(self):
+        dt = self.parse_delegation_token_out()
+        return dt["hmac"]
+
+    def parse_delegation_token_out(self):
+        cmd = "tail -1 %s" % self.delegation_token_out
+
+        output_iter = self.node.account.ssh_capture(cmd, allow_fail=False)
+        output = ""
+        for line in output_iter:
+            output += line
+
+        tokenid, hmac, owner, renewers, issuedate, expirydate, maxdate = output.split()
+        return {"tokenid" : tokenid,
+                "hmac" : hmac,
+                "owner" : owner,
+                "renewers" : renewers,
+                "issuedate" : issuedate,
+                "expirydate" :expirydate,
+                "maxdate" : maxdate}
--- a/tests/kafkatest/services/kafka/init.py
+++ b/tests/kafkatest/services/kafka/init.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafka import KafkaService
+from util import TopicPartition
+from config import KafkaConfig
--- a/tests/kafkatest/services/kafka/config.py
+++ b/tests/kafkatest/services/kafka/config.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import config_property
+
+
+class KafkaConfig(dict):
+    """A dictionary-like container class which allows for definition of overridable default values,
+    which is also capable of "rendering" itself as a useable server.properties file.
+    """
+
+    DEFAULTS = {
+        config_property.PORT: 9092,
+        config_property.SOCKET_RECEIVE_BUFFER_BYTES: 65536,
+        config_property.LOG_DIRS: "/mnt/kafka/kafka-data-logs-1,/mnt/kafka/kafka-data-logs-2",
+        config_property.ZOOKEEPER_CONNECTION_TIMEOUT_MS: 2000
+    }
+
+    def __init__(self, **kwargs):
+        super(KafkaConfig, self).__init__(**kwargs)
+
+        # Set defaults
+        for key, val in self.DEFAULTS.items():
+            if not self.has_key(key):
+                self[key] = val
+
+    def render(self):
+        """Render self as a series of lines key=val\n, and do so in a consistent order. """
+        keys = [k for k in self.keys()]
+        keys.sort()
+
+        s = ""
+        for k in keys:
+            s += "%s=%s\n" % (k, str(self[k]))
+        return s
+
--- a/tests/kafkatest/services/kafka/config_property.py
+++ b/tests/kafkatest/services/kafka/config_property.py
@@ -0,0 +1,192 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Define Kafka configuration property names here.
+"""
+
+BROKER_ID = "broker.id"
+PORT = "port"
+ADVERTISED_HOSTNAME = "advertised.host.name"
+
+NUM_NETWORK_THREADS = "num.network.threads"
+NUM_IO_THREADS = "num.io.threads"
+SOCKET_SEND_BUFFER_BYTES = "socket.send.buffer.bytes"
+SOCKET_RECEIVE_BUFFER_BYTES = "socket.receive.buffer.bytes"
+SOCKET_REQUEST_MAX_BYTES = "socket.request.max.bytes"
+LOG_DIRS = "log.dirs"
+NUM_PARTITIONS = "num.partitions"
+NUM_RECOVERY_THREADS_PER_DATA_DIR = "num.recovery.threads.per.data.dir"
+
+LOG_RETENTION_HOURS = "log.retention.hours"
+LOG_SEGMENT_BYTES = "log.segment.bytes"
+LOG_RETENTION_CHECK_INTERVAL_MS = "log.retention.check.interval.ms"
+LOG_RETENTION_MS = "log.retention.ms"
+LOG_CLEANER_ENABLE = "log.cleaner.enable"
+
+AUTO_CREATE_TOPICS_ENABLE = "auto.create.topics.enable"
+
+ZOOKEEPER_CONNECT = "zookeeper.connect"
+ZOOKEEPER_SSL_CLIENT_ENABLE = "zookeeper.ssl.client.enable"
+ZOOKEEPER_CLIENT_CNXN_SOCKET = "zookeeper.clientCnxnSocket"
+ZOOKEEPER_CONNECTION_TIMEOUT_MS = "zookeeper.connection.timeout.ms"
+INTER_BROKER_PROTOCOL_VERSION = "inter.broker.protocol.version"
+MESSAGE_FORMAT_VERSION = "log.message.format.version"
+MESSAGE_TIMESTAMP_TYPE = "message.timestamp.type"
+THROTTLING_REPLICATION_RATE_LIMIT = "replication.quota.throttled.rate"
+
+LOG_FLUSH_INTERVAL_MESSAGE = "log.flush.interval.messages"
+REPLICA_HIGHWATERMARK_CHECKPOINT_INTERVAL_MS = "replica.high.watermark.checkpoint.interval.ms"
+LOG_ROLL_TIME_MS = "log.roll.ms"
+OFFSETS_TOPIC_NUM_PARTITIONS = "offsets.topic.num.partitions"
+
+DELEGATION_TOKEN_MAX_LIFETIME_MS="delegation.token.max.lifetime.ms"
+DELEGATION_TOKEN_EXPIRY_TIME_MS="delegation.token.expiry.time.ms"
+DELEGATION_TOKEN_MASTER_KEY="delegation.token.master.key"
+SASL_ENABLED_MECHANISMS="sasl.enabled.mechanisms"
+
+
+"""
+From KafkaConfig.scala
+
+  /** ********* General Configuration ***********/
+  val MaxReservedBrokerIdProp = "reserved.broker.max.id"
+  val MessageMaxBytesProp = "message.max.bytes"
+  val NumIoThreadsProp = "num.io.threads"
+  val BackgroundThreadsProp = "background.threads"
+  val QueuedMaxRequestsProp = "queued.max.requests"
+  /** ********* Socket Server Configuration ***********/
+  val PortProp = "port"
+  val HostNameProp = "host.name"
+  val ListenersProp = "listeners"
+  val AdvertisedPortProp = "advertised.port"
+  val AdvertisedListenersProp = "advertised.listeners"
+  val SocketSendBufferBytesProp = "socket.send.buffer.bytes"
+  val SocketReceiveBufferBytesProp = "socket.receive.buffer.bytes"
+  val SocketRequestMaxBytesProp = "socket.request.max.bytes"
+  val MaxConnectionsPerIpProp = "max.connections.per.ip"
+  val MaxConnectionsPerIpOverridesProp = "max.connections.per.ip.overrides"
+  val ConnectionsMaxIdleMsProp = "connections.max.idle.ms"
+  /** ********* Log Configuration ***********/
+  val NumPartitionsProp = "num.partitions"
+  val LogDirsProp = "log.dirs"
+  val LogDirProp = "log.dir"
+  val LogSegmentBytesProp = "log.segment.bytes"
+
+  val LogRollTimeMillisProp = "log.roll.ms"
+  val LogRollTimeHoursProp = "log.roll.hours"
+
+  val LogRollTimeJitterMillisProp = "log.roll.jitter.ms"
+  val LogRollTimeJitterHoursProp = "log.roll.jitter.hours"
+
+  val LogRetentionTimeMillisProp = "log.retention.ms"
+  val LogRetentionTimeMinutesProp = "log.retention.minutes"
+  val LogRetentionTimeHoursProp = "log.retention.hours"
+
+  val LogRetentionBytesProp = "log.retention.bytes"
+  val LogCleanupIntervalMsProp = "log.retention.check.interval.ms"
+  val LogCleanupPolicyProp = "log.cleanup.policy"
+  val LogCleanerThreadsProp = "log.cleaner.threads"
+  val LogCleanerIoMaxBytesPerSecondProp = "log.cleaner.io.max.bytes.per.second"
+  val LogCleanerDedupeBufferSizeProp = "log.cleaner.dedupe.buffer.size"
+  val LogCleanerIoBufferSizeProp = "log.cleaner.io.buffer.size"
+  val LogCleanerDedupeBufferLoadFactorProp = "log.cleaner.io.buffer.load.factor"
+  val LogCleanerBackoffMsProp = "log.cleaner.backoff.ms"
+  val LogCleanerMinCleanRatioProp = "log.cleaner.min.cleanable.ratio"
+  val LogCleanerEnableProp = "log.cleaner.enable"
+  val LogCleanerDeleteRetentionMsProp = "log.cleaner.delete.retention.ms"
+  val LogIndexSizeMaxBytesProp = "log.index.size.max.bytes"
+  val LogIndexIntervalBytesProp = "log.index.interval.bytes"
+  val LogFlushIntervalMessagesProp = "log.flush.interval.messages"
+  val LogDeleteDelayMsProp = "log.segment.delete.delay.ms"
+  val LogFlushSchedulerIntervalMsProp = "log.flush.scheduler.interval.ms"
+  val LogFlushIntervalMsProp = "log.flush.interval.ms"
+  val LogFlushOffsetCheckpointIntervalMsProp = "log.flush.offset.checkpoint.interval.ms"
+  val LogPreAllocateProp = "log.preallocate"
+  val NumRecoveryThreadsPerDataDirProp = "num.recovery.threads.per.data.dir"
+  val MinInSyncReplicasProp = "min.insync.replicas"
+  /** ********* Replication configuration ***********/
+  val ControllerSocketTimeoutMsProp = "controller.socket.timeout.ms"
+  val DefaultReplicationFactorProp = "default.replication.factor"
+  val ReplicaLagTimeMaxMsProp = "replica.lag.time.max.ms"
+  val ReplicaSocketTimeoutMsProp = "replica.socket.timeout.ms"
+  val ReplicaSocketReceiveBufferBytesProp = "replica.socket.receive.buffer.bytes"
+  val ReplicaFetchMaxBytesProp = "replica.fetch.max.bytes"
+  val ReplicaFetchWaitMaxMsProp = "replica.fetch.wait.max.ms"
+  val ReplicaFetchMinBytesProp = "replica.fetch.min.bytes"
+  val ReplicaFetchBackoffMsProp = "replica.fetch.backoff.ms"
+  val NumReplicaFetchersProp = "num.replica.fetchers"
+  val ReplicaHighWatermarkCheckpointIntervalMsProp = "replica.high.watermark.checkpoint.interval.ms"
+  val FetchPurgatoryPurgeIntervalRequestsProp = "fetch.purgatory.purge.interval.requests"
+  val ProducerPurgatoryPurgeIntervalRequestsProp = "producer.purgatory.purge.interval.requests"
+  val AutoLeaderRebalanceEnableProp = "auto.leader.rebalance.enable"
+  val LeaderImbalancePerBrokerPercentageProp = "leader.imbalance.per.broker.percentage"
+  val LeaderImbalanceCheckIntervalSecondsProp = "leader.imbalance.check.interval.seconds"
+  val UncleanLeaderElectionEnableProp = "unclean.leader.election.enable"
+  val InterBrokerSecurityProtocolProp = "security.inter.broker.protocol"
+  val InterBrokerProtocolVersionProp = "inter.broker.protocol.version"
+  /** ********* Controlled shutdown configuration ***********/
+  val ControlledShutdownMaxRetriesProp = "controlled.shutdown.max.retries"
+  val ControlledShutdownRetryBackoffMsProp = "controlled.shutdown.retry.backoff.ms"
+  val ControlledShutdownEnableProp = "controlled.shutdown.enable"
+  /** ********* Consumer coordinator configuration ***********/
+  val ConsumerMinSessionTimeoutMsProp = "consumer.min.session.timeout.ms"
+  val ConsumerMaxSessionTimeoutMsProp = "consumer.max.session.timeout.ms"
+  /** ********* Offset management configuration ***********/
+  val OffsetMetadataMaxSizeProp = "offset.metadata.max.bytes"
+  val OffsetsLoadBufferSizeProp = "offsets.load.buffer.size"
+  val OffsetsTopicReplicationFactorProp = "offsets.topic.replication.factor"
+  val OffsetsTopicPartitionsProp = "offsets.topic.num.partitions"
+  val OffsetsTopicSegmentBytesProp = "offsets.topic.segment.bytes"
+  val OffsetsTopicCompressionCodecProp = "offsets.topic.compression.codec"
+  val OffsetsRetentionMinutesProp = "offsets.retention.minutes"
+  val OffsetsRetentionCheckIntervalMsProp = "offsets.retention.check.interval.ms"
+  val OffsetCommitTimeoutMsProp = "offsets.commit.timeout.ms"
+  val OffsetCommitRequiredAcksProp = "offsets.commit.required.acks"
+  /** ********* Quota Configuration ***********/
+  val ProducerQuotaBytesPerSecondDefaultProp = "quota.producer.default"
+  val ConsumerQuotaBytesPerSecondDefaultProp = "quota.consumer.default"
+  val NumQuotaSamplesProp = "quota.window.num"
+  val QuotaWindowSizeSecondsProp = "quota.window.size.seconds"
+
+  val DeleteTopicEnableProp = "delete.topic.enable"
+  val CompressionTypeProp = "compression.type"
+
+  /** ********* Kafka Metrics Configuration ***********/
+  val MetricSampleWindowMsProp = CommonClientConfigs.METRICS_SAMPLE_WINDOW_MS_CONFIG
+  val MetricNumSamplesProp: String = CommonClientConfigs.METRICS_NUM_SAMPLES_CONFIG
+  val MetricReporterClassesProp: String = CommonClientConfigs.METRIC_REPORTER_CLASSES_CONFIG
+
+  /** ********* SSL Configuration ****************/
+  val PrincipalBuilderClassProp = SSLConfigs.PRINCIPAL_BUILDER_CLASS_CONFIG
+  val SSLProtocolProp = SSLConfigs.SSL_PROTOCOL_CONFIG
+  val SSLProviderProp = SSLConfigs.SSL_PROVIDER_CONFIG
+  val SSLCipherSuitesProp = SSLConfigs.SSL_CIPHER_SUITES_CONFIG
+  val SSLEnabledProtocolsProp = SSLConfigs.SSL_ENABLED_PROTOCOLS_CONFIG
+  val SSLKeystoreTypeProp = SSLConfigs.SSL_KEYSTORE_TYPE_CONFIG
+  val SSLKeystoreLocationProp = SSLConfigs.SSL_KEYSTORE_LOCATION_CONFIG
+  val SSLKeystorePasswordProp = SSLConfigs.SSL_KEYSTORE_PASSWORD_CONFIG
+  val SSLKeyPasswordProp = SSLConfigs.SSL_KEY_PASSWORD_CONFIG
+  val SSLTruststoreTypeProp = SSLConfigs.SSL_TRUSTSTORE_TYPE_CONFIG
+  val SSLTruststoreLocationProp = SSLConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG
+  val SSLTruststorePasswordProp = SSLConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG
+  val SSLKeyManagerAlgorithmProp = SSLConfigs.SSL_KEYMANAGER_ALGORITHM_CONFIG
+  val SSLTrustManagerAlgorithmProp = SSLConfigs.SSL_TRUSTMANAGER_ALGORITHM_CONFIG
+  val SSLEndpointIdentificationAlgorithmProp = SSLConfigs.SSL_ENDPOINT_IDENTIFICATION_ALGORITHM_CONFIG
+  val SSLSecureRandomImplementationProp = SSLConfigs.SSL_SECURE_RANDOM_IMPLEMENTATION_CONFIG
+  val SSLClientAuthProp = SSLConfigs.SSL_CLIENT_AUTH_CONFIG
+"""
+
+
--- a/tests/kafkatest/services/kafka/kafka.py
+++ b/tests/kafkatest/services/kafka/kafka.py
@@ -0,0 +1,897 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections
+import json
+import os.path
+import re
+import signal
+import time
+
+from ducktape.services.service import Service
+from ducktape.utils.util import wait_until
+from ducktape.cluster.remoteaccount import RemoteCommandError
+
+from config import KafkaConfig
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from kafkatest.services.kafka import config_property
+from kafkatest.services.monitor.jmx import JmxMixin
+from kafkatest.services.security.minikdc import MiniKdc
+from kafkatest.services.security.listener_security_config import ListenerSecurityConfig
+from kafkatest.services.security.security_config import SecurityConfig
+from kafkatest.version import DEV_BRANCH, LATEST_0_10_0
+
+
+class KafkaListener:
+
+    def __init__(self, name, port_number, security_protocol, open=False):
+        self.name = name
+        self.port_number = port_number
+        self.security_protocol = security_protocol
+        self.open = open
+
+    def listener(self):
+        return "%s://:%s" % (self.name, str(self.port_number))
+
+    def advertised_listener(self, node):
+        return "%s://%s:%s" % (self.name, node.account.hostname, str(self.port_number))
+
+    def listener_security_protocol(self):
+        return "%s:%s" % (self.name, self.security_protocol)
+
+class KafkaService(KafkaPathResolverMixin, JmxMixin, Service):
+    PERSISTENT_ROOT = "/mnt/kafka"
+    STDOUT_STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "server-start-stdout-stderr.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "kafka-log4j.properties")
+    # Logs such as controller.log, server.log, etc all go here
+    OPERATIONAL_LOG_DIR = os.path.join(PERSISTENT_ROOT, "kafka-operational-logs")
+    OPERATIONAL_LOG_INFO_DIR = os.path.join(OPERATIONAL_LOG_DIR, "info")
+    OPERATIONAL_LOG_DEBUG_DIR = os.path.join(OPERATIONAL_LOG_DIR, "debug")
+    # Kafka log segments etc go here
+    DATA_LOG_DIR_PREFIX = os.path.join(PERSISTENT_ROOT, "kafka-data-logs")
+    DATA_LOG_DIR_1 = "%s-1" % (DATA_LOG_DIR_PREFIX)
+    DATA_LOG_DIR_2 = "%s-2" % (DATA_LOG_DIR_PREFIX)
+    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "kafka.properties")
+    # Kafka Authorizer
+    ACL_AUTHORIZER = "kafka.security.authorizer.AclAuthorizer"
+    # Old Kafka Authorizer. This is deprecated but still supported.
+    SIMPLE_AUTHORIZER = "kafka.security.auth.SimpleAclAuthorizer"
+    HEAP_DUMP_FILE = os.path.join(PERSISTENT_ROOT, "kafka_heap_dump.bin")
+    INTERBROKER_LISTENER_NAME = 'INTERNAL'
+    JAAS_CONF_PROPERTY = "java.security.auth.login.config=/mnt/security/jaas.conf"
+    KRB5_CONF = "java.security.krb5.conf=/mnt/security/krb5.conf"
+
+    logs = {
+        "kafka_server_start_stdout_stderr": {
+            "path": STDOUT_STDERR_CAPTURE,
+            "collect_default": True},
+        "kafka_operational_logs_info": {
+            "path": OPERATIONAL_LOG_INFO_DIR,
+            "collect_default": True},
+        "kafka_operational_logs_debug": {
+            "path": OPERATIONAL_LOG_DEBUG_DIR,
+            "collect_default": False},
+        "kafka_data_1": {
+            "path": DATA_LOG_DIR_1,
+            "collect_default": False},
+        "kafka_data_2": {
+            "path": DATA_LOG_DIR_2,
+            "collect_default": False},
+        "kafka_heap_dump_file": {
+            "path": HEAP_DUMP_FILE,
+            "collect_default": True}
+    }
+
+    def __init__(self, context, num_nodes, zk, security_protocol=SecurityConfig.PLAINTEXT, interbroker_security_protocol=SecurityConfig.PLAINTEXT,
+                 client_sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI, interbroker_sasl_mechanism=SecurityConfig.SASL_MECHANISM_GSSAPI,
+                 authorizer_class_name=None, topics=None, version=DEV_BRANCH, jmx_object_names=None,
+                 jmx_attributes=None, zk_connect_timeout=5000, zk_session_timeout=6000, server_prop_overides=None, zk_chroot=None,
+                 zk_client_secure=False,
+                 listener_security_config=ListenerSecurityConfig(), per_node_server_prop_overrides=None, extra_kafka_opts=""):
+        """
+        :param context: test context
+        :param ZookeeperService zk:
+        :param dict topics: which topics to create automatically
+        :param str security_protocol: security protocol for clients to use
+        :param str interbroker_security_protocol: security protocol to use for broker-to-broker communication
+        :param str client_sasl_mechanism: sasl mechanism for clients to use
+        :param str interbroker_sasl_mechanism: sasl mechanism to use for broker-to-broker communication
+        :param str authorizer_class_name: which authorizer class to use
+        :param str version: which kafka version to use. Defaults to "dev" branch
+        :param jmx_object_names:
+        :param jmx_attributes:
+        :param int zk_connect_timeout:
+        :param int zk_session_timeout:
+        :param dict server_prop_overides: overrides for kafka.properties file
+        :param zk_chroot:
+        :param bool zk_client_secure: connect to Zookeeper over secure client port (TLS) when True
+        :param ListenerSecurityConfig listener_security_config: listener config to use
+        :param dict per_node_server_prop_overrides:
+        :param str extra_kafka_opts: jvm args to add to KAFKA_OPTS variable
+        """
+        Service.__init__(self, context, num_nodes)
+        JmxMixin.__init__(self, num_nodes=num_nodes, jmx_object_names=jmx_object_names, jmx_attributes=(jmx_attributes or []),
+                          root=KafkaService.PERSISTENT_ROOT)
+
+        self.zk = zk
+
+        self.security_protocol = security_protocol
+        self.client_sasl_mechanism = client_sasl_mechanism
+        self.topics = topics
+        self.minikdc = None
+        self.authorizer_class_name = authorizer_class_name
+        self.zk_set_acl = False
+        if server_prop_overides is None:
+            self.server_prop_overides = []
+        else:
+            self.server_prop_overides = server_prop_overides
+        if per_node_server_prop_overrides is None:
+            self.per_node_server_prop_overrides = {}
+        else:
+            self.per_node_server_prop_overrides = per_node_server_prop_overrides
+        self.log_level = "DEBUG"
+        self.zk_chroot = zk_chroot
+        self.zk_client_secure = zk_client_secure
+        self.listener_security_config = listener_security_config
+        self.extra_kafka_opts = extra_kafka_opts
+
+        #
+        # In a heavily loaded and not very fast machine, it is
+        # sometimes necessary to give more time for the zk client
+        # to have its session established, especially if the client
+        # is authenticating and waiting for the SaslAuthenticated
+        # in addition to the SyncConnected event.
+        #
+        # The default value for zookeeper.connect.timeout.ms is
+        # 2 seconds and here we increase it to 5 seconds, but
+        # it can be overridden by setting the corresponding parameter
+        # for this constructor.
+        self.zk_connect_timeout = zk_connect_timeout
+
+        # Also allow the session timeout to be provided explicitly,
+        # primarily so that test cases can depend on it when waiting
+        # e.g. brokers to deregister after a hard kill.
+        self.zk_session_timeout = zk_session_timeout
+
+        self.port_mappings = {
+            'PLAINTEXT': KafkaListener('PLAINTEXT', 9092, 'PLAINTEXT', False),
+            'SSL': KafkaListener('SSL', 9093, 'SSL', False),
+            'SASL_PLAINTEXT': KafkaListener('SASL_PLAINTEXT', 9094, 'SASL_PLAINTEXT', False),
+            'SASL_SSL': KafkaListener('SASL_SSL', 9095, 'SASL_SSL', False),
+            KafkaService.INTERBROKER_LISTENER_NAME:
+                KafkaListener(KafkaService.INTERBROKER_LISTENER_NAME, 9099, None, False)
+        }
+
+        self.interbroker_listener = None
+        self.setup_interbroker_listener(interbroker_security_protocol, self.listener_security_config.use_separate_interbroker_listener)
+        self.interbroker_sasl_mechanism = interbroker_sasl_mechanism
+
+        for node in self.nodes:
+            node.version = version
+            node.config = KafkaConfig(**{config_property.BROKER_ID: self.idx(node)})
+
+    def set_version(self, version):
+        for node in self.nodes:
+            node.version = version
+
+    @property
+    def interbroker_security_protocol(self):
+        return self.interbroker_listener.security_protocol
+
+    # this is required for backwards compatibility - there are a lot of tests that set this property explicitly
+    # meaning 'use one of the existing listeners that match given security protocol, do not use custom listener'
+    @interbroker_security_protocol.setter
+    def interbroker_security_protocol(self, security_protocol):
+        self.setup_interbroker_listener(security_protocol, use_separate_listener=False)
+
+    def setup_interbroker_listener(self, security_protocol, use_separate_listener=False):
+        self.listener_security_config.use_separate_interbroker_listener = use_separate_listener
+
+        if self.listener_security_config.use_separate_interbroker_listener:
+            # do not close existing port here since it is not used exclusively for interbroker communication
+            self.interbroker_listener = self.port_mappings[KafkaService.INTERBROKER_LISTENER_NAME]
+            self.interbroker_listener.security_protocol = security_protocol
+        else:
+            # close dedicated interbroker port, so it's not dangling in 'listeners' and 'advertised.listeners'
+            self.close_port(KafkaService.INTERBROKER_LISTENER_NAME)
+            self.interbroker_listener = self.port_mappings[security_protocol]
+
+    @property
+    def security_config(self):
+        config = SecurityConfig(self.context, self.security_protocol, self.interbroker_listener.security_protocol,
+                                zk_sasl=self.zk.zk_sasl, zk_tls=self.zk_client_secure,
+                                client_sasl_mechanism=self.client_sasl_mechanism,
+                                interbroker_sasl_mechanism=self.interbroker_sasl_mechanism,
+                                listener_security_config=self.listener_security_config)
+        for port in self.port_mappings.values():
+            if port.open:
+                config.enable_security_protocol(port.security_protocol)
+        return config
+
+    def open_port(self, listener_name):
+        self.port_mappings[listener_name].open = True
+
+    def close_port(self, listener_name):
+        self.port_mappings[listener_name].open = False
+
+    def start_minikdc_if_necessary(self, add_principals=""):
+        if self.security_config.has_sasl:
+            if self.minikdc is None:
+                self.minikdc = MiniKdc(self.context, self.nodes, extra_principals = add_principals)
+                self.minikdc.start()
+        else:
+            self.minikdc = None
+
+    def alive(self, node):
+        return len(self.pids(node)) > 0
+
+    def start(self, add_principals="", use_zk_to_create_topic=True):
+        if self.zk_client_secure and not self.zk.zk_client_secure_port:
+            raise Exception("Unable to start Kafka: TLS to Zookeeper requested but Zookeeper secure port not enabled")
+        self.open_port(self.security_protocol)
+        self.interbroker_listener.open = True
+
+        self.start_minikdc_if_necessary(add_principals)
+        self._ensure_zk_chroot()
+
+        Service.start(self)
+
+        self.logger.info("Waiting for brokers to register at ZK")
+
+        retries = 30
+        expected_broker_ids = set(self.nodes)
+        wait_until(lambda: {node for node in self.nodes if self.is_registered(node)} == expected_broker_ids, 30, 1)
+
+        if retries == 0:
+            raise RuntimeError("Kafka servers didn't register at ZK within 30 seconds")
+
+        # Create topics if necessary
+        if self.topics is not None:
+            for topic, topic_cfg in self.topics.items():
+                if topic_cfg is None:
+                    topic_cfg = {}
+
+                topic_cfg["topic"] = topic
+                self.create_topic(topic_cfg, use_zk_to_create_topic=use_zk_to_create_topic)
+
+    def _ensure_zk_chroot(self):
+        self.logger.info("Ensuring zk_chroot %s exists", self.zk_chroot)
+        if self.zk_chroot:
+            if not self.zk_chroot.startswith('/'):
+                raise Exception("Zookeeper chroot must start with '/' but found " + self.zk_chroot)
+
+            parts = self.zk_chroot.split('/')[1:]
+            for i in range(len(parts)):
+                self.zk.create('/' + '/'.join(parts[:i+1]))
+
+    def set_protocol_and_port(self, node):
+        listeners = []
+        advertised_listeners = []
+        protocol_map = []
+
+        for port in self.port_mappings.values():
+            if port.open:
+                listeners.append(port.listener())
+                advertised_listeners.append(port.advertised_listener(node))
+                protocol_map.append(port.listener_security_protocol())
+
+        self.listeners = ','.join(listeners)
+        self.advertised_listeners = ','.join(advertised_listeners)
+        self.listener_security_protocol_map = ','.join(protocol_map)
+        self.interbroker_bootstrap_servers = self.__bootstrap_servers(self.interbroker_listener, True)
+
+    def prop_file(self, node):
+        self.set_protocol_and_port(node)
+
+        #load template configs as dictionary
+        config_template = self.render('kafka.properties', node=node, broker_id=self.idx(node),
+                                      security_config=self.security_config, num_nodes=self.num_nodes,
+                                      listener_security_config=self.listener_security_config)
+
+        configs = dict( l.rstrip().split('=', 1) for l in config_template.split('\n')
+                        if not l.startswith("#") and "=" in l )
+
+        #load specific test override configs
+        override_configs = KafkaConfig(**node.config)
+        override_configs[config_property.ADVERTISED_HOSTNAME] = node.account.hostname
+        override_configs[config_property.ZOOKEEPER_CONNECT] = self.zk_connect_setting()
+        if self.zk_client_secure:
+            override_configs[config_property.ZOOKEEPER_SSL_CLIENT_ENABLE] = 'true'
+            override_configs[config_property.ZOOKEEPER_CLIENT_CNXN_SOCKET] = 'org.apache.zookeeper.ClientCnxnSocketNetty'
+        else:
+            override_configs[config_property.ZOOKEEPER_SSL_CLIENT_ENABLE] = 'false'
+
+        for prop in self.server_prop_overides:
+            override_configs[prop[0]] = prop[1]
+
+        for prop in self.per_node_server_prop_overrides.get(self.idx(node), []):
+            override_configs[prop[0]] = prop[1]
+
+        #update template configs with test override configs
+        configs.update(override_configs)
+
+        prop_file = self.render_configs(configs)
+        return prop_file
+
+    def render_configs(self, configs):
+        """Render self as a series of lines key=val\n, and do so in a consistent order. """
+        keys = [k for k in configs.keys()]
+        keys.sort()
+
+        s = ""
+        for k in keys:
+            s += "%s=%s\n" % (k, str(configs[k]))
+        return s
+
+    def start_cmd(self, node):
+        cmd = "export JMX_PORT=%d; " % self.jmx_port
+        cmd += "export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % self.LOG4J_CONFIG
+        heap_kafka_opts = "-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=%s" % \
+                          self.logs["kafka_heap_dump_file"]["path"]
+        security_kafka_opts = self.security_config.kafka_opts.strip('\"')
+        cmd += "export KAFKA_OPTS=\"%s %s %s\"; " % (heap_kafka_opts, security_kafka_opts, self.extra_kafka_opts)
+        cmd += "%s %s 1>> %s 2>> %s &" % \
+               (self.path.script("kafka-server-start.sh", node),
+                KafkaService.CONFIG_FILE,
+                KafkaService.STDOUT_STDERR_CAPTURE,
+                KafkaService.STDOUT_STDERR_CAPTURE)
+        return cmd
+
+    def start_node(self, node, timeout_sec=60):
+        node.account.mkdirs(KafkaService.PERSISTENT_ROOT)
+        prop_file = self.prop_file(node)
+        self.logger.info("kafka.properties:")
+        self.logger.info(prop_file)
+        node.account.create_file(KafkaService.CONFIG_FILE, prop_file)
+        node.account.create_file(self.LOG4J_CONFIG, self.render('log4j.properties', log_dir=KafkaService.OPERATIONAL_LOG_DIR))
+
+        self.security_config.setup_node(node)
+        self.security_config.setup_credentials(node, self.path, self.zk_connect_setting(), broker=True)
+
+        cmd = self.start_cmd(node)
+        self.logger.debug("Attempting to start KafkaService on %s with command: %s" % (str(node.account), cmd))
+        with node.account.monitor_log(KafkaService.STDOUT_STDERR_CAPTURE) as monitor:
+            node.account.ssh(cmd)
+            # Kafka 1.0.0 and higher don't have a space between "Kafka" and "Server"
+            monitor.wait_until("Kafka\s*Server.*started", timeout_sec=timeout_sec, backoff_sec=.25,
+                               err_msg="Kafka server didn't finish startup in %d seconds" % timeout_sec)
+
+        # Credentials for inter-broker communication are created before starting Kafka.
+        # Client credentials are created after starting Kafka so that both loading of
+        # existing credentials from ZK and dynamic update of credentials in Kafka are tested.
+        self.security_config.setup_credentials(node, self.path, self.zk_connect_setting(), broker=False)
+
+        self.start_jmx_tool(self.idx(node), node)
+        if len(self.pids(node)) == 0:
+            raise Exception("No process ids recorded on node %s" % node.account.hostname)
+
+    def pids(self, node):
+        """Return process ids associated with running processes on the given node."""
+        try:
+            cmd = "jcmd | grep -e %s | awk '{print $1}'" % self.java_class_name()
+            pid_arr = [pid for pid in node.account.ssh_capture(cmd, allow_fail=True, callback=int)]
+            return pid_arr
+        except (RemoteCommandError, ValueError) as e:
+            return []
+
+    def signal_node(self, node, sig=signal.SIGTERM):
+        pids = self.pids(node)
+        for pid in pids:
+            node.account.signal(pid, sig)
+
+    def signal_leader(self, topic, partition=0, sig=signal.SIGTERM):
+        leader = self.leader(topic, partition)
+        self.signal_node(leader, sig)
+
+    def stop_node(self, node, clean_shutdown=True, timeout_sec=60):
+        pids = self.pids(node)
+        sig = signal.SIGTERM if clean_shutdown else signal.SIGKILL
+
+        for pid in pids:
+            node.account.signal(pid, sig, allow_fail=False)
+
+        try:
+            wait_until(lambda: len(self.pids(node)) == 0, timeout_sec=timeout_sec,
+                       err_msg="Kafka node failed to stop in %d seconds" % timeout_sec)
+        except Exception:
+            self.thread_dump(node)
+            raise
+
+    def thread_dump(self, node):
+        for pid in self.pids(node):
+            try:
+                node.account.signal(pid, signal.SIGQUIT, allow_fail=True)
+            except:
+                self.logger.warn("Could not dump threads on node")
+
+    def clean_node(self, node):
+        JmxMixin.clean_node(self, node)
+        self.security_config.clean_node(node)
+        node.account.kill_java_processes(self.java_class_name(),
+                                         clean_shutdown=False, allow_fail=True)
+        node.account.ssh("sudo rm -rf -- %s" % KafkaService.PERSISTENT_ROOT, allow_fail=False)
+
+    def _kafka_topics_cmd(self, node, use_zk_connection=True):
+        """
+        Returns kafka-topics.sh command path with jaas configuration and krb5 environment variable
+        set. If Admin client is not going to be used, don't set the environment variable.
+        """
+        kafka_topic_script = self.path.script("kafka-topics.sh", node)
+        skip_security_settings = use_zk_connection or not node.version.topic_command_supports_bootstrap_server()
+        return kafka_topic_script if skip_security_settings else \
+            "KAFKA_OPTS='-D%s -D%s' %s" % (KafkaService.JAAS_CONF_PROPERTY, KafkaService.KRB5_CONF, kafka_topic_script)
+
+    def _kafka_topics_cmd_config(self, node, use_zk_connection=True):
+        """
+        Return --command-config parameter to the kafka-topics.sh command. The config parameter specifies
+        the security settings that AdminClient uses to connect to a secure kafka server.
+        """
+        skip_command_config = use_zk_connection or not node.version.topic_command_supports_bootstrap_server()
+        return "" if skip_command_config else " --command-config <(echo '%s')" % (self.security_config.client_config())
+
+    def create_topic(self, topic_cfg, node=None, use_zk_to_create_topic=True):
+        """Run the admin tool create topic command.
+        Specifying node is optional, and may be done if for different kafka nodes have different versions,
+        and we care where command gets run.
+
+        If the node is not specified, run the command from self.nodes[0]
+        """
+        if node is None:
+            node = self.nodes[0]
+        self.logger.info("Creating topic %s with settings %s",
+                         topic_cfg["topic"], topic_cfg)
+
+        use_zk_connection = topic_cfg.get('if-not-exists', False) or use_zk_to_create_topic
+
+        cmd = "%(kafka_topics_cmd)s %(connection_string)s --create --topic %(topic)s " % {
+            'kafka_topics_cmd': self._kafka_topics_cmd(node, use_zk_connection),
+            'connection_string': self._connect_setting(node, use_zk_connection),
+            'topic': topic_cfg.get("topic"),
+        }
+        if 'replica-assignment' in topic_cfg:
+            cmd += " --replica-assignment %(replica-assignment)s" % {
+                'replica-assignment': topic_cfg.get('replica-assignment')
+            }
+        else:
+            cmd += " --partitions %(partitions)d --replication-factor %(replication-factor)d" % {
+                'partitions': topic_cfg.get('partitions', 1),
+                'replication-factor': topic_cfg.get('replication-factor', 1)
+            }
+
+        if topic_cfg.get('if-not-exists', False):
+            cmd += ' --if-not-exists'
+
+        if "configs" in topic_cfg.keys() and topic_cfg["configs"] is not None:
+            for config_name, config_value in topic_cfg["configs"].items():
+                cmd += " --config %s=%s" % (config_name, str(config_value))
+
+        cmd += self._kafka_topics_cmd_config(node, use_zk_connection)
+
+        self.logger.info("Running topic creation command...\n%s" % cmd)
+        node.account.ssh(cmd)
+
+    def delete_topic(self, topic, node=None):
+        """
+        Delete a topic with the topics command
+        :param topic:
+        :param node:
+        :return:
+        """
+        if node is None:
+            node = self.nodes[0]
+        self.logger.info("Deleting topic %s" % topic)
+        kafka_topic_script = self.path.script("kafka-topics.sh", node)
+
+        cmd = kafka_topic_script + " "
+        cmd += "--bootstrap-server %(bootstrap_servers)s --delete --topic %(topic)s " % {
+            'bootstrap_servers': self.bootstrap_servers(self.security_protocol),
+            'topic': topic
+        }
+        self.logger.info("Running topic delete command...\n%s" % cmd)
+        node.account.ssh(cmd)
+
+    def describe_topic(self, topic, node=None, use_zk_to_describe_topic=True):
+        if node is None:
+            node = self.nodes[0]
+        cmd = "%s %s --topic %s --describe %s" % \
+              (self._kafka_topics_cmd(node=node, use_zk_connection=use_zk_to_describe_topic),
+               self._connect_setting(node=node, use_zk_connection=use_zk_to_describe_topic),
+               topic, self._kafka_topics_cmd_config(node=node, use_zk_connection=use_zk_to_describe_topic))
+
+        self.logger.info("Running topic describe command...\n%s" % cmd)
+        output = ""
+        for line in node.account.ssh_capture(cmd):
+            output += line
+        return output
+
+    def list_topics(self, node=None, use_zk_to_list_topic=True):
+        if node is None:
+            node = self.nodes[0]
+        cmd = "%s %s --list %s" % (self._kafka_topics_cmd(node, use_zk_to_list_topic),
+                                   self._connect_setting(node, use_zk_to_list_topic),
+                                   self._kafka_topics_cmd_config(node, use_zk_to_list_topic))
+        for line in node.account.ssh_capture(cmd):
+            if not line.startswith("SLF4J"):
+                yield line.rstrip()
+
+    def alter_message_format(self, topic, msg_format_version, node=None):
+        if node is None:
+            node = self.nodes[0]
+        self.logger.info("Altering message format version for topic %s with format %s", topic, msg_format_version)
+        cmd = "%s --zookeeper %s %s --entity-name %s --entity-type topics --alter --add-config message.format.version=%s" % \
+              (self.path.script("kafka-configs.sh", node), self.zk_connect_setting(), self.zk.zkTlsConfigFileOption(), topic, msg_format_version)
+        self.logger.info("Running alter message format command...\n%s" % cmd)
+        node.account.ssh(cmd)
+
+    def set_unclean_leader_election(self, topic, value=True, node=None):
+        if node is None:
+            node = self.nodes[0]
+        if value is True:
+            self.logger.info("Enabling unclean leader election for topic %s", topic)
+        else:
+            self.logger.info("Disabling unclean leader election for topic %s", topic)
+        cmd = "%s --zookeeper %s %s --entity-name %s --entity-type topics --alter --add-config unclean.leader.election.enable=%s" % \
+              (self.path.script("kafka-configs.sh", node), self.zk_connect_setting(), self.zk.zkTlsConfigFileOption(), topic, str(value).lower())
+        self.logger.info("Running alter unclean leader command...\n%s" % cmd)
+        node.account.ssh(cmd)
+
+    def parse_describe_topic(self, topic_description):
+        """Parse output of kafka-topics.sh --describe (or describe_topic() method above), which is a string of form
+        PartitionCount:2\tReplicationFactor:2\tConfigs:
+            Topic: test_topic\ttPartition: 0\tLeader: 3\tReplicas: 3,1\tIsr: 3,1
+            Topic: test_topic\tPartition: 1\tLeader: 1\tReplicas: 1,2\tIsr: 1,2
+        into a dictionary structure appropriate for use with reassign-partitions tool:
+        {
+            "partitions": [
+                {"topic": "test_topic", "partition": 0, "replicas": [3, 1]},
+                {"topic": "test_topic", "partition": 1, "replicas": [1, 2]}
+            ]
+        }
+        """
+        lines = map(lambda x: x.strip(), topic_description.split("\n"))
+        partitions = []
+        for line in lines:
+            m = re.match(".*Leader:.*", line)
+            if m is None:
+                continue
+
+            fields = line.split("\t")
+            # ["Partition: 4", "Leader: 0"] -> ["4", "0"]
+            fields = map(lambda x: x.split(" ")[1], fields)
+            partitions.append(
+                {"topic": fields[0],
+                 "partition": int(fields[1]),
+                 "replicas": map(int, fields[3].split(','))})
+        return {"partitions": partitions}
+
+    def verify_reassign_partitions(self, reassignment, node=None):
+        """Run the reassign partitions admin tool in "verify" mode
+        """
+        if node is None:
+            node = self.nodes[0]
+
+        json_file = "/tmp/%s_reassign.json" % str(time.time())
+
+        # reassignment to json
+        json_str = json.dumps(reassignment)
+        json_str = json.dumps(json_str)
+
+        # create command
+        cmd = "echo %s > %s && " % (json_str, json_file)
+        cmd += "%s " % self.path.script("kafka-reassign-partitions.sh", node)
+        cmd += "--zookeeper %s " % self.zk_connect_setting()
+        cmd += "--reassignment-json-file %s " % json_file
+        cmd += "--verify "
+        cmd += "&& sleep 1 && rm -f %s" % json_file
+
+        # send command
+        self.logger.info("Verifying partition reassignment...")
+        self.logger.debug(cmd)
+        output = ""
+        for line in node.account.ssh_capture(cmd):
+            output += line
+
+        self.logger.debug(output)
+
+        if re.match(".*Reassignment of partition.*failed.*",
+                    output.replace('\n', '')) is not None:
+            return False
+
+        if re.match(".*is still in progress.*",
+                    output.replace('\n', '')) is not None:
+            return False
+
+        return True
+
+    def execute_reassign_partitions(self, reassignment, node=None,
+                                    throttle=None):
+        """Run the reassign partitions admin tool in "verify" mode
+        """
+        if node is None:
+            node = self.nodes[0]
+        json_file = "/tmp/%s_reassign.json" % str(time.time())
+
+        # reassignment to json
+        json_str = json.dumps(reassignment)
+        json_str = json.dumps(json_str)
+
+        # create command
+        cmd = "echo %s > %s && " % (json_str, json_file)
+        cmd += "%s " % self.path.script( "kafka-reassign-partitions.sh", node)
+        cmd += "--zookeeper %s " % self.zk_connect_setting()
+        cmd += "--reassignment-json-file %s " % json_file
+        cmd += "--execute"
+        if throttle is not None:
+            cmd += " --throttle %d" % throttle
+        cmd += " && sleep 1 && rm -f %s" % json_file
+
+        # send command
+        self.logger.info("Executing parition reassignment...")
+        self.logger.debug(cmd)
+        output = ""
+        for line in node.account.ssh_capture(cmd):
+            output += line
+
+        self.logger.debug("Verify partition reassignment:")
+        self.logger.debug(output)
+
+    def search_data_files(self, topic, messages):
+        """Check if a set of messages made it into the Kakfa data files. Note that
+        this method takes no account of replication. It simply looks for the
+        payload in all the partition files of the specified topic. 'messages' should be
+        an array of numbers. The list of missing messages is returned.
+        """
+        payload_match = "payload: " + "$|payload: ".join(str(x) for x in messages) + "$"
+        found = set([])
+        self.logger.debug("number of unique missing messages we will search for: %d",
+                          len(messages))
+        for node in self.nodes:
+            # Grab all .log files in directories prefixed with this topic
+            files = node.account.ssh_capture("find %s* -regex  '.*/%s-.*/[^/]*.log'" % (KafkaService.DATA_LOG_DIR_PREFIX, topic))
+
+            # Check each data file to see if it contains the messages we want
+            for log in files:
+                cmd = "%s kafka.tools.DumpLogSegments --print-data-log --files %s | grep -E \"%s\"" % \
+                      (self.path.script("kafka-run-class.sh", node), log.strip(), payload_match)
+
+                for line in node.account.ssh_capture(cmd, allow_fail=True):
+                    for val in messages:
+                        if line.strip().endswith("payload: "+str(val)):
+                            self.logger.debug("Found %s in data-file [%s] in line: [%s]" % (val, log.strip(), line.strip()))
+                            found.add(val)
+
+        self.logger.debug("Number of unique messages found in the log: %d",
+                          len(found))
+        missing = list(set(messages) - found)
+
+        if len(missing) > 0:
+            self.logger.warn("The following values were not found in the data files: " + str(missing))
+
+        return missing
+
+    def restart_cluster(self, clean_shutdown=True, timeout_sec=60, after_each_broker_restart=None, *args):
+        for node in self.nodes:
+            self.restart_node(node, clean_shutdown=clean_shutdown, timeout_sec=timeout_sec)
+            if after_each_broker_restart is not None:
+                after_each_broker_restart(*args)
+
+    def restart_node(self, node, clean_shutdown=True, timeout_sec=60):
+        """Restart the given node."""
+        self.stop_node(node, clean_shutdown, timeout_sec)
+        self.start_node(node, timeout_sec)
+
+    def isr_idx_list(self, topic, partition=0):
+        """ Get in-sync replica list the given topic and partition.
+        """
+        self.logger.debug("Querying zookeeper to find in-sync replicas for topic %s and partition %d" % (topic, partition))
+        zk_path = "/brokers/topics/%s/partitions/%d/state" % (topic, partition)
+        partition_state = self.zk.query(zk_path, chroot=self.zk_chroot)
+
+        if partition_state is None:
+            raise Exception("Error finding partition state for topic %s and partition %d." % (topic, partition))
+
+        partition_state = json.loads(partition_state)
+        self.logger.info(partition_state)
+
+        isr_idx_list = partition_state["isr"]
+        self.logger.info("Isr for topic %s and partition %d is now: %s" % (topic, partition, isr_idx_list))
+        return isr_idx_list
+
+    def replicas(self, topic, partition=0):
+        """ Get the assigned replicas for the given topic and partition.
+        """
+        self.logger.debug("Querying zookeeper to find assigned replicas for topic %s and partition %d" % (topic, partition))
+        zk_path = "/brokers/topics/%s" % (topic)
+        assignment = self.zk.query(zk_path, chroot=self.zk_chroot)
+
+        if assignment is None:
+            raise Exception("Error finding partition state for topic %s and partition %d." % (topic, partition))
+
+        assignment = json.loads(assignment)
+        self.logger.info(assignment)
+
+        replicas = assignment["partitions"][str(partition)]
+
+        self.logger.info("Assigned replicas for topic %s and partition %d is now: %s" % (topic, partition, replicas))
+        return [self.get_node(replica) for replica in replicas]
+
+    def leader(self, topic, partition=0):
+        """ Get the leader replica for the given topic and partition.
+        """
+        self.logger.debug("Querying zookeeper to find leader replica for topic %s and partition %d" % (topic, partition))
+        zk_path = "/brokers/topics/%s/partitions/%d/state" % (topic, partition)
+        partition_state = self.zk.query(zk_path, chroot=self.zk_chroot)
+
+        if partition_state is None:
+            raise Exception("Error finding partition state for topic %s and partition %d." % (topic, partition))
+
+        partition_state = json.loads(partition_state)
+        self.logger.info(partition_state)
+
+        leader_idx = int(partition_state["leader"])
+        self.logger.info("Leader for topic %s and partition %d is now: %d" % (topic, partition, leader_idx))
+        return self.get_node(leader_idx)
+
+    def cluster_id(self):
+        """ Get the current cluster id
+        """
+        self.logger.debug("Querying ZooKeeper to retrieve cluster id")
+        cluster = self.zk.query("/cluster/id", chroot=self.zk_chroot)
+
+        try:
+            return json.loads(cluster)['id'] if cluster else None
+        except:
+            self.logger.debug("Data in /cluster/id znode could not be parsed. Data = %s" % cluster)
+            raise
+
+    def check_protocol_errors(self, node):
+        """ Checks for common protocol exceptions due to invalid inter broker protocol handling.
+            While such errors can and should be checked in other ways, checking the logs is a worthwhile failsafe.
+            """
+        for node in self.nodes:
+            exit_code = node.account.ssh("grep -e 'java.lang.IllegalArgumentException: Invalid version' -e SchemaException %s/*"
+                                         % KafkaService.OPERATIONAL_LOG_DEBUG_DIR, allow_fail=True)
+            if exit_code != 1:
+                return False
+        return True
+
+    def list_consumer_groups(self, node=None, command_config=None):
+        """ Get list of consumer groups.
+        """
+        if node is None:
+            node = self.nodes[0]
+        consumer_group_script = self.path.script("kafka-consumer-groups.sh", node)
+
+        if command_config is None:
+            command_config = ""
+        else:
+            command_config = "--command-config " + command_config
+
+        cmd = "%s --bootstrap-server %s %s --list" % \
+              (consumer_group_script,
+               self.bootstrap_servers(self.security_protocol),
+               command_config)
+        output = ""
+        self.logger.debug(cmd)
+        for line in node.account.ssh_capture(cmd):
+            if not line.startswith("SLF4J"):
+                output += line
+        self.logger.debug(output)
+        return output
+
+    def describe_consumer_group(self, group, node=None, command_config=None):
+        """ Describe a consumer group.
+        """
+        if node is None:
+            node = self.nodes[0]
+        consumer_group_script = self.path.script("kafka-consumer-groups.sh", node)
+
+        if command_config is None:
+            command_config = ""
+        else:
+            command_config = "--command-config " + command_config
+
+        cmd = "%s --bootstrap-server %s %s --group %s --describe" % \
+              (consumer_group_script,
+               self.bootstrap_servers(self.security_protocol),
+               command_config, group)
+
+        output = ""
+        self.logger.debug(cmd)
+        for line in node.account.ssh_capture(cmd):
+            if not (line.startswith("SLF4J") or line.startswith("TOPIC") or line.startswith("Could not fetch offset")):
+                output += line
+        self.logger.debug(output)
+        return output
+
+    def zk_connect_setting(self):
+        return self.zk.connect_setting(self.zk_chroot, self.zk_client_secure)
+
+    def _connect_setting(self, node, use_zk_connection=True):
+        """
+        Checks if --bootstrap-server config is supported, if yes then returns a string with
+        bootstrap server, otherwise returns zookeeper connection string.
+        """
+        if node.version.topic_command_supports_bootstrap_server() and not use_zk_connection:
+            connection_setting = "--bootstrap-server %s" % (self.bootstrap_servers(self.security_protocol))
+        else:
+            connection_setting = "--zookeeper %s" % (self.zk_connect_setting())
+
+        return connection_setting
+
+    def __bootstrap_servers(self, port, validate=True, offline_nodes=[]):
+        if validate and not port.open:
+            raise ValueError("We are retrieving bootstrap servers for the port: %s which is not currently open. - " %
+                             str(port.port_number))
+
+        return ','.join([node.account.hostname + ":" + str(port.port_number)
+                         for node in self.nodes
+                         if node not in offline_nodes])
+
+    def bootstrap_servers(self, protocol='PLAINTEXT', validate=True, offline_nodes=[]):
+        """Return comma-delimited list of brokers in this cluster formatted as HOSTNAME1:PORT1,HOSTNAME:PORT2,...
+
+        This is the format expected by many config files.
+        """
+        port_mapping = self.port_mappings[protocol]
+        self.logger.info("Bootstrap client port is: " + str(port_mapping.port_number))
+        return self.__bootstrap_servers(port_mapping, validate, offline_nodes)
+
+    def controller(self):
+        """ Get the controller node
+        """
+        self.logger.debug("Querying zookeeper to find controller broker")
+        controller_info = self.zk.query("/controller", chroot=self.zk_chroot)
+
+        if controller_info is None:
+            raise Exception("Error finding controller info")
+
+        controller_info = json.loads(controller_info)
+        self.logger.debug(controller_info)
+
+        controller_idx = int(controller_info["brokerid"])
+        self.logger.info("Controller's ID: %d" % (controller_idx))
+        return self.get_node(controller_idx)
+
+    def is_registered(self, node):
+        """
+        Check whether a broker is registered in Zookeeper
+        """
+        self.logger.debug("Querying zookeeper to see if broker %s is registered", str(node))
+        broker_info = self.zk.query("/brokers/ids/%s" % self.idx(node), chroot=self.zk_chroot)
+        self.logger.debug("Broker info: %s", broker_info)
+        return broker_info is not None
+
+    def get_offset_shell(self, topic, partitions, max_wait_ms, offsets, time):
+        node = self.nodes[0]
+
+        cmd = self.path.script("kafka-run-class.sh", node)
+        cmd += " kafka.tools.GetOffsetShell"
+        cmd += " --topic %s --broker-list %s --max-wait-ms %s --offsets %s --time %s" % (topic, self.bootstrap_servers(self.security_protocol), max_wait_ms, offsets, time)
+
+        if partitions:
+            cmd += '  --partitions %s' % partitions
+
+        cmd += " 2>> %s/get_offset_shell.log" % KafkaService.PERSISTENT_ROOT
+        cmd += " | tee -a %s/get_offset_shell.log &" % KafkaService.PERSISTENT_ROOT
+        output = ""
+        self.logger.debug(cmd)
+        for line in node.account.ssh_capture(cmd):
+            output += line
+        self.logger.debug(output)
+        return output
+
+    def java_class_name(self):
+        return "kafka.Kafka"
--- a/tests/kafkatest/services/kafka/templates/kafka.properties
+++ b/tests/kafkatest/services/kafka/templates/kafka.properties
@@ -0,0 +1,91 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# see kafka.server.KafkaConfig for additional details and defaults
+advertised.host.name={{ node.account.hostname }}
+
+
+listeners={{ listeners }}
+advertised.listeners={{ advertised_listeners }}
+listener.security.protocol.map={{ listener_security_protocol_map }}
+
+{% if node.version.supports_named_listeners() %}
+inter.broker.listener.name={{ interbroker_listener.name }}
+{% else %}
+security.inter.broker.protocol={{ interbroker_listener.security_protocol }}
+{% endif %}
+
+{% for k, v in listener_security_config.client_listener_overrides.iteritems() %}
+{% if listener_security_config.requires_sasl_mechanism_prefix(k) %}
+listener.name.{{ security_protocol.lower() }}.{{ security_config.client_sasl_mechanism.lower() }}.{{ k }}={{ v }}
+{% else %}
+listener.name.{{ security_protocol.lower() }}.{{ k }}={{ v }}
+{% endif %}
+{% endfor %}
+
+{% if interbroker_listener.name != security_protocol %}
+{% for k, v in listener_security_config.interbroker_listener_overrides.iteritems() %}
+{% if listener_security_config.requires_sasl_mechanism_prefix(k) %}
+listener.name.{{ interbroker_listener.name.lower() }}.{{ security_config.interbroker_sasl_mechanism.lower() }}.{{ k }}={{ v }}
+{% else %}
+listener.name.{{ interbroker_listener.name.lower() }}.{{ k }}={{ v }}
+{% endif %}
+{% endfor %}
+{% endif %}
+
+ssl.keystore.location=/mnt/security/test.keystore.jks
+ssl.keystore.password=test-ks-passwd
+ssl.key.password=test-ks-passwd
+ssl.keystore.type=JKS
+ssl.truststore.location=/mnt/security/test.truststore.jks
+ssl.truststore.password=test-ts-passwd
+ssl.truststore.type=JKS
+ssl.endpoint.identification.algorithm=HTTPS
+# Zookeeper TLS settings
+#
+# Note that zookeeper.ssl.client.enable will be set to true or false elsewhere, as appropriate.
+# If it is false then these ZK keystore/truststore settings will have no effect.  If it is true then
+# zookeeper.clientCnxnSocket will also be set elsewhere (to org.apache.zookeeper.ClientCnxnSocketNetty)
+{% if not zk.zk_tls_encrypt_only %}
+zookeeper.ssl.keystore.location=/mnt/security/test.keystore.jks
+zookeeper.ssl.keystore.password=test-ks-passwd
+{% endif %}
+zookeeper.ssl.truststore.location=/mnt/security/test.truststore.jks
+zookeeper.ssl.truststore.password=test-ts-passwd
+#
+sasl.mechanism.inter.broker.protocol={{ security_config.interbroker_sasl_mechanism }}
+sasl.enabled.mechanisms={{ ",".join(security_config.enabled_sasl_mechanisms) }}
+sasl.kerberos.service.name=kafka
+{% if authorizer_class_name is not none %}
+ssl.client.auth=required
+authorizer.class.name={{ authorizer_class_name }}
+{% endif %}
+
+zookeeper.set.acl={{"true" if zk_set_acl else "false"}}
+
+zookeeper.connection.timeout.ms={{ zk_connect_timeout }}
+zookeeper.session.timeout.ms={{ zk_session_timeout }}
+
+{% if replica_lag is defined %}
+replica.lag.time.max.ms={{replica_lag}}
+{% endif %}
+
+{% if auto_create_topics_enable is defined and auto_create_topics_enable is not none %}
+auto.create.topics.enable={{ auto_create_topics_enable }}
+{% endif %}
+offsets.topic.num.partitions={{ num_nodes }}
+offsets.topic.replication.factor={{ 3 if num_nodes > 3 else num_nodes }}
+# Set to a low, but non-zero value to exercise this path without making tests much slower
+group.initial.rebalance.delay.ms=100
--- a/tests/kafkatest/services/kafka/templates/log4j.properties
+++ b/tests/kafkatest/services/kafka/templates/log4j.properties
@@ -0,0 +1,136 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger={{ log_level|default("DEBUG") }}, stdout
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n
+
+# INFO level appenders
+log4j.appender.kafkaInfoAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.kafkaInfoAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.kafkaInfoAppender.File={{ log_dir }}/info/server.log
+log4j.appender.kafkaInfoAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.kafkaInfoAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.kafkaInfoAppender.Threshold=INFO
+
+log4j.appender.stateChangeInfoAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.stateChangeInfoAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.stateChangeInfoAppender.File={{ log_dir }}/info/state-change.log
+log4j.appender.stateChangeInfoAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.stateChangeInfoAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.stateChangeInfoAppender.Threshold=INFO
+
+log4j.appender.requestInfoAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.requestInfoAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.requestInfoAppender.File={{ log_dir }}/info/kafka-request.log
+log4j.appender.requestInfoAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.requestInfoAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.requestInfoAppender.Threshold=INFO
+
+log4j.appender.cleanerInfoAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.cleanerInfoAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.cleanerInfoAppender.File={{ log_dir }}/info/log-cleaner.log
+log4j.appender.cleanerInfoAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.cleanerInfoAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.cleanerInfoAppender.Threshold=INFO
+
+log4j.appender.controllerInfoAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.controllerInfoAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.controllerInfoAppender.File={{ log_dir }}/info/controller.log
+log4j.appender.controllerInfoAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.controllerInfoAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.controllerInfoAppender.Threshold=INFO
+
+log4j.appender.authorizerInfoAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.authorizerInfoAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.authorizerInfoAppender.File={{ log_dir }}/info/kafka-authorizer.log
+log4j.appender.authorizerInfoAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.authorizerInfoAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.authorizerInfoAppender.Threshold=INFO
+
+# DEBUG level appenders
+log4j.appender.kafkaDebugAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.kafkaDebugAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.kafkaDebugAppender.File={{ log_dir }}/debug/server.log
+log4j.appender.kafkaDebugAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.kafkaDebugAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.kafkaDebugAppender.Threshold=DEBUG
+
+log4j.appender.stateChangeDebugAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.stateChangeDebugAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.stateChangeDebugAppender.File={{ log_dir }}/debug/state-change.log
+log4j.appender.stateChangeDebugAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.stateChangeDebugAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.stateChangeDebugAppender.Threshold=DEBUG
+
+log4j.appender.requestDebugAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.requestDebugAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.requestDebugAppender.File={{ log_dir }}/debug/kafka-request.log
+log4j.appender.requestDebugAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.requestDebugAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.requestDebugAppender.Threshold=DEBUG
+
+log4j.appender.cleanerDebugAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.cleanerDebugAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.cleanerDebugAppender.File={{ log_dir }}/debug/log-cleaner.log
+log4j.appender.cleanerDebugAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.cleanerDebugAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.cleanerDebugAppender.Threshold=DEBUG
+
+log4j.appender.controllerDebugAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.controllerDebugAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.controllerDebugAppender.File={{ log_dir }}/debug/controller.log
+log4j.appender.controllerDebugAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.controllerDebugAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.controllerDebugAppender.Threshold=DEBUG
+
+log4j.appender.authorizerDebugAppender=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.authorizerDebugAppender.DatePattern='.'yyyy-MM-dd-HH
+log4j.appender.authorizerDebugAppender.File={{ log_dir }}/debug/kafka-authorizer.log
+log4j.appender.authorizerDebugAppender.layout=org.apache.log4j.PatternLayout
+log4j.appender.authorizerDebugAppender.layout.ConversionPattern=[%d] %p %m (%c)%n
+log4j.appender.authorizerDebugAppender.Threshold=DEBUG
+
+# Turn on all our debugging info
+log4j.logger.kafka.producer.async.DefaultEventHandler={{ log_level|default("DEBUG") }}, kafkaInfoAppender, kafkaDebugAppender
+log4j.logger.kafka.client.ClientUtils={{ log_level|default("DEBUG") }}, kafkaInfoAppender, kafkaDebugAppender
+log4j.logger.kafka.perf={{ log_level|default("DEBUG") }}, kafkaInfoAppender, kafkaDebugAppender
+log4j.logger.kafka.perf.ProducerPerformance$ProducerThread={{ log_level|default("DEBUG") }}, kafkaInfoAppender, kafkaDebugAppender
+log4j.logger.kafka={{ log_level|default("DEBUG") }}, kafkaInfoAppender, kafkaDebugAppender
+
+log4j.logger.kafka.network.RequestChannel$={{ log_level|default("DEBUG") }}, requestInfoAppender, requestDebugAppender
+log4j.additivity.kafka.network.RequestChannel$=false
+
+log4j.logger.kafka.network.Processor={{ log_level|default("DEBUG") }}, requestInfoAppender, requestDebugAppender
+log4j.logger.kafka.server.KafkaApis={{ log_level|default("DEBUG") }}, requestInfoAppender, requestDebugAppender
+log4j.additivity.kafka.server.KafkaApis=false
+log4j.logger.kafka.request.logger={{ log_level|default("DEBUG") }}, requestInfoAppender, requestDebugAppender
+log4j.additivity.kafka.request.logger=false
+
+log4j.logger.kafka.controller={{ log_level|default("DEBUG") }}, controllerInfoAppender, controllerDebugAppender
+log4j.additivity.kafka.controller=false
+
+log4j.logger.kafka.log.LogCleaner={{ log_level|default("DEBUG") }}, cleanerInfoAppender, cleanerDebugAppender
+log4j.additivity.kafka.log.LogCleaner=false
+
+log4j.logger.state.change.logger={{ log_level|default("DEBUG") }}, stateChangeInfoAppender, stateChangeDebugAppender
+log4j.additivity.state.change.logger=false
+
+#Change this to debug to get the actual audit log for authorizer.
+log4j.logger.kafka.authorizer.logger={{ log_level|default("DEBUG") }}, authorizerInfoAppender, authorizerDebugAppender
+log4j.additivity.kafka.authorizer.logger=false
+
--- a/tests/kafkatest/services/kafka/util.py
+++ b/tests/kafkatest/services/kafka/util.py
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import namedtuple
+
+TopicPartition = namedtuple('TopicPartition', ['topic', 'partition'])
--- a/tests/kafkatest/services/kafka_log4j_appender.py
+++ b/tests/kafkatest/services/kafka_log4j_appender.py
@@ -0,0 +1,83 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ducktape.services.background_thread import BackgroundThreadService
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from kafkatest.services.security.security_config import SecurityConfig
+
+
+class KafkaLog4jAppender(KafkaPathResolverMixin, BackgroundThreadService):
+
+    logs = {
+        "producer_log": {
+            "path": "/mnt/kafka_log4j_appender.log",
+            "collect_default": False}
+    }
+
+    def __init__(self, context, num_nodes, kafka, topic, max_messages=-1, security_protocol="PLAINTEXT"):
+        super(KafkaLog4jAppender, self).__init__(context, num_nodes)
+
+        self.kafka = kafka
+        self.topic = topic
+        self.max_messages = max_messages
+        self.security_protocol = security_protocol
+        self.security_config = SecurityConfig(self.context, security_protocol)
+        self.stop_timeout_sec = 30
+
+    def _worker(self, idx, node):
+        cmd = self.start_cmd(node)
+        self.logger.debug("VerifiableLog4jAppender %d command: %s" % (idx, cmd))
+        self.security_config.setup_node(node)
+        node.account.ssh(cmd)
+
+    def start_cmd(self, node):
+        cmd = self.path.script("kafka-run-class.sh", node)
+        cmd += " "
+        cmd += self.java_class_name()
+        cmd += " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers(self.security_protocol))
+
+        if self.max_messages > 0:
+            cmd += " --max-messages %s" % str(self.max_messages)
+        if self.security_protocol != SecurityConfig.PLAINTEXT:
+            cmd += " --security-protocol %s" % str(self.security_protocol)
+        if self.security_protocol == SecurityConfig.SSL or self.security_protocol == SecurityConfig.SASL_SSL:
+            cmd += " --ssl-truststore-location %s" % str(SecurityConfig.TRUSTSTORE_PATH)
+            cmd += " --ssl-truststore-password %s" % str(SecurityConfig.ssl_stores.truststore_passwd)
+        if self.security_protocol == SecurityConfig.SASL_PLAINTEXT or \
+                self.security_protocol == SecurityConfig.SASL_SSL or \
+                self.security_protocol == SecurityConfig.SASL_MECHANISM_GSSAPI or \
+                self.security_protocol == SecurityConfig.SASL_MECHANISM_PLAIN:
+            cmd += " --sasl-kerberos-service-name %s" % str('kafka')
+            cmd += " --client-jaas-conf-path %s" % str(SecurityConfig.JAAS_CONF_PATH)
+            cmd += " --kerb5-conf-path %s" % str(SecurityConfig.KRB5CONF_PATH)
+
+        cmd += " 2>> /mnt/kafka_log4j_appender.log | tee -a /mnt/kafka_log4j_appender.log &"
+        return cmd
+
+    def stop_node(self, node):
+        node.account.kill_java_processes(self.java_class_name(), allow_fail=False)
+
+        stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec)
+        assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \
+                        (str(node.account), str(self.stop_timeout_sec))
+
+    def clean_node(self, node):
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=False,
+                                         allow_fail=False)
+        node.account.ssh("rm -rf /mnt/kafka_log4j_appender.log", allow_fail=False)
+
+    def java_class_name(self):
+        return "org.apache.kafka.tools.VerifiableLog4jAppender"
--- a/tests/kafkatest/services/log_compaction_tester.py
+++ b/tests/kafkatest/services/log_compaction_tester.py
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from ducktape.services.background_thread import BackgroundThreadService
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin, CORE_LIBS_JAR_NAME, CORE_DEPENDANT_TEST_LIBS_JAR_NAME
+from kafkatest.services.security.security_config import SecurityConfig
+from kafkatest.version import DEV_BRANCH
+
+class LogCompactionTester(KafkaPathResolverMixin, BackgroundThreadService):
+
+    OUTPUT_DIR = "/mnt/logcompaction_tester"
+    LOG_PATH = os.path.join(OUTPUT_DIR, "logcompaction_tester_stdout.log")
+    VERIFICATION_STRING = "Data verification is completed"
+
+    logs = {
+        "tool_logs": {
+            "path": LOG_PATH,
+            "collect_default": True}
+    }
+
+    def __init__(self, context, kafka, security_protocol="PLAINTEXT", stop_timeout_sec=30):
+        super(LogCompactionTester, self).__init__(context, 1)
+
+        self.kafka = kafka
+        self.security_protocol = security_protocol
+        self.security_config = SecurityConfig(self.context, security_protocol)
+        self.stop_timeout_sec = stop_timeout_sec
+        self.log_compaction_completed = False
+
+    def _worker(self, idx, node):
+        node.account.ssh("mkdir -p %s" % LogCompactionTester.OUTPUT_DIR)
+        cmd = self.start_cmd(node)
+        self.logger.info("LogCompactionTester %d command: %s" % (idx, cmd))
+        self.security_config.setup_node(node)
+        for line in node.account.ssh_capture(cmd):
+            self.logger.debug("Checking line:{}".format(line))
+
+            if line.startswith(LogCompactionTester.VERIFICATION_STRING):
+                self.log_compaction_completed = True
+
+    def start_cmd(self, node):
+        core_libs_jar = self.path.jar(CORE_LIBS_JAR_NAME, DEV_BRANCH)
+        core_dependant_test_libs_jar = self.path.jar(CORE_DEPENDANT_TEST_LIBS_JAR_NAME, DEV_BRANCH)
+
+        cmd = "for file in %s; do CLASSPATH=$CLASSPATH:$file; done;" % core_libs_jar
+        cmd += " for file in %s; do CLASSPATH=$CLASSPATH:$file; done;" % core_dependant_test_libs_jar
+        cmd += " export CLASSPATH;"
+        cmd += self.path.script("kafka-run-class.sh", node)
+        cmd += " %s" % self.java_class_name()
+        cmd += " --bootstrap-server %s --messages 1000000 --sleep 20 --duplicates 10 --percent-deletes 10" % (self.kafka.bootstrap_servers(self.security_protocol))
+
+        cmd += " 2>> %s | tee -a %s &" % (self.logs["tool_logs"]["path"], self.logs["tool_logs"]["path"])
+        return cmd
+
+    def stop_node(self, node):
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=True,
+                                         allow_fail=True)
+
+        stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec)
+        assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \
+                        (str(node.account), str(self.stop_timeout_sec))
+
+    def clean_node(self, node):
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=False,
+                                         allow_fail=True)
+        node.account.ssh("rm -rf %s" % LogCompactionTester.OUTPUT_DIR, allow_fail=False)
+
+    def java_class_name(self):
+        return "kafka.tools.LogCompactionTester"
+
+    @property
+    def is_done(self):
+        return self.log_compaction_completed
--- a/tests/kafkatest/services/mirror_maker.py
+++ b/tests/kafkatest/services/mirror_maker.py
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from ducktape.services.service import Service
+from ducktape.utils.util import wait_until
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+
+"""
+MirrorMaker is a tool for mirroring data between two Kafka clusters.
+"""
+
+class MirrorMaker(KafkaPathResolverMixin, Service):
+
+    # Root directory for persistent output
+    PERSISTENT_ROOT = "/mnt/mirror_maker"
+    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
+    LOG_FILE = os.path.join(LOG_DIR, "mirror_maker.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+    PRODUCER_CONFIG = os.path.join(PERSISTENT_ROOT, "producer.properties")
+    CONSUMER_CONFIG = os.path.join(PERSISTENT_ROOT, "consumer.properties")
+
+    logs = {
+        "mirror_maker_log": {
+            "path": LOG_FILE,
+            "collect_default": True}
+        }
+
+    def __init__(self, context, num_nodes, source, target, whitelist=None, num_streams=1,
+                 consumer_timeout_ms=None, offsets_storage="kafka",
+                 offset_commit_interval_ms=60000, log_level="DEBUG", producer_interceptor_classes=None):
+        """
+        MirrorMaker mirrors messages from one or more source clusters to a single destination cluster.
+
+        Args:
+            context:                    standard context
+            source:                     source Kafka cluster
+            target:                     target Kafka cluster to which data will be mirrored
+            whitelist:                  whitelist regex for topics to mirror
+            blacklist:                  blacklist regex for topics not to mirror
+            num_streams:                number of consumer threads to create; can be a single int, or a list with
+                                            one value per node, allowing num_streams to be the same for each node,
+                                            or configured independently per-node
+            consumer_timeout_ms:        consumer stops if t > consumer_timeout_ms elapses between consecutive messages
+            offsets_storage:            used for consumer offsets.storage property
+            offset_commit_interval_ms:  how frequently the mirror maker consumer commits offsets
+        """
+        super(MirrorMaker, self).__init__(context, num_nodes=num_nodes)
+        self.log_level = log_level
+        self.consumer_timeout_ms = consumer_timeout_ms
+        self.num_streams = num_streams
+        if not isinstance(num_streams, int):
+            # if not an integer, num_streams should be configured per-node
+            assert len(num_streams) == num_nodes
+        self.whitelist = whitelist
+        self.source = source
+        self.target = target
+
+        self.offsets_storage = offsets_storage.lower()
+        if not (self.offsets_storage in ["kafka", "zookeeper"]):
+            raise Exception("offsets_storage should be 'kafka' or 'zookeeper'. Instead found %s" % self.offsets_storage)
+
+        self.offset_commit_interval_ms = offset_commit_interval_ms
+        self.producer_interceptor_classes = producer_interceptor_classes
+        self.external_jars = None
+
+        # These properties are potentially used by third-party tests.
+        self.source_auto_offset_reset = None
+        self.partition_assignment_strategy = None
+
+    def start_cmd(self, node):
+        cmd = "export LOG_DIR=%s;" % MirrorMaker.LOG_DIR
+        cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\";" % MirrorMaker.LOG4J_CONFIG
+        cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts
+        # add external dependencies, for instance for interceptors
+        if self.external_jars is not None:
+            cmd += "for file in %s; do CLASSPATH=$CLASSPATH:$file; done; " % self.external_jars
+            cmd += "export CLASSPATH; "
+        cmd += " %s %s" % (self.path.script("kafka-run-class.sh", node),
+                           self.java_class_name())
+        cmd += " --consumer.config %s" % MirrorMaker.CONSUMER_CONFIG
+        cmd += " --producer.config %s" % MirrorMaker.PRODUCER_CONFIG
+        cmd += " --offset.commit.interval.ms %s" % str(self.offset_commit_interval_ms)
+        if isinstance(self.num_streams, int):
+            cmd += " --num.streams %d" % self.num_streams
+        else:
+            # config num_streams separately on each node
+            cmd += " --num.streams %d" % self.num_streams[self.idx(node) - 1]
+        if self.whitelist is not None:
+            cmd += " --whitelist=\"%s\"" % self.whitelist
+
+        cmd += " 1>> %s 2>> %s &" % (MirrorMaker.LOG_FILE, MirrorMaker.LOG_FILE)
+        return cmd
+
+    def pids(self, node):
+        return node.account.java_pids(self.java_class_name())
+
+    def alive(self, node):
+        return len(self.pids(node)) > 0
+
+    def start_node(self, node):
+        node.account.ssh("mkdir -p %s" % MirrorMaker.PERSISTENT_ROOT, allow_fail=False)
+        node.account.ssh("mkdir -p %s" % MirrorMaker.LOG_DIR, allow_fail=False)
+
+        self.security_config = self.source.security_config.client_config()
+        self.security_config.setup_node(node)
+
+        # Create, upload one consumer config file for source cluster
+        consumer_props = self.render("mirror_maker_consumer.properties")
+        consumer_props += str(self.security_config)
+
+        node.account.create_file(MirrorMaker.CONSUMER_CONFIG, consumer_props)
+        self.logger.info("Mirrormaker consumer props:\n" + consumer_props)
+
+        # Create, upload producer properties file for target cluster
+        producer_props = self.render('mirror_maker_producer.properties')
+        producer_props += str(self.security_config)
+        self.logger.info("Mirrormaker producer props:\n" + producer_props)
+        node.account.create_file(MirrorMaker.PRODUCER_CONFIG, producer_props)
+
+
+        # Create and upload log properties
+        log_config = self.render('tools_log4j.properties', log_file=MirrorMaker.LOG_FILE)
+        node.account.create_file(MirrorMaker.LOG4J_CONFIG, log_config)
+
+        # Run mirror maker
+        cmd = self.start_cmd(node)
+        self.logger.debug("Mirror maker command: %s", cmd)
+        node.account.ssh(cmd, allow_fail=False)
+        wait_until(lambda: self.alive(node), timeout_sec=30, backoff_sec=.5,
+                   err_msg="Mirror maker took to long to start.")
+        self.logger.debug("Mirror maker is alive")
+
+    def stop_node(self, node, clean_shutdown=True):
+        node.account.kill_java_processes(self.java_class_name(), allow_fail=True,
+                                         clean_shutdown=clean_shutdown)
+        wait_until(lambda: not self.alive(node), timeout_sec=30, backoff_sec=.5,
+                   err_msg="Mirror maker took to long to stop.")
+
+    def clean_node(self, node):
+        if self.alive(node):
+            self.logger.warn("%s %s was still alive at cleanup time. Killing forcefully..." %
+                             (self.__class__.__name__, node.account))
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=False,
+                                         allow_fail=True)
+        node.account.ssh("rm -rf %s" % MirrorMaker.PERSISTENT_ROOT, allow_fail=False)
+        self.security_config.clean_node(node)
+
+    def java_class_name(self):
+        return "kafka.tools.MirrorMaker"
--- a/tests/kafkatest/services/monitor/init.py
+++ b/tests/kafkatest/services/monitor/init.py
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/tests/kafkatest/services/monitor/http.py
+++ b/tests/kafkatest/services/monitor/http.py
@@ -0,0 +1,228 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
+from collections import defaultdict, namedtuple
+import json
+from threading import Thread
+from select import select
+import socket
+
+MetricKey = namedtuple('MetricKey', ['host', 'client_id', 'name', 'group', 'tags'])
+MetricValue = namedtuple('MetricValue', ['time', 'value'])
+
+# Python's logging library doesn't define anything more detailed than DEBUG, but we'd like a finer-grained setting for
+# for highly detailed messages, e.g. logging every single incoming request.
+TRACE = 5
+
+
+class HttpMetricsCollector(object):
+    """
+    HttpMetricsCollector enables collection of metrics from various Kafka clients instrumented with the
+    PushHttpMetricsReporter. It starts a web server locally and provides the necessary configuration for clients
+    to automatically report metrics data to this server. It also provides basic functionality for querying the
+    recorded metrics. This class can be used either as a mixin or standalone object.
+    """
+
+    # The port to listen on on the worker node, which will be forwarded to the port listening on this driver node
+    REMOTE_PORT = 6789
+
+    def __init__(self, **kwargs):
+        """
+        Create a new HttpMetricsCollector
+        :param period the period, in seconds, between updates that the metrics reporter configuration should define.
+               defaults to reporting once per second
+        :param args:
+        :param kwargs:
+        """
+        self._http_metrics_period = kwargs.pop('period', 1)
+
+        super(HttpMetricsCollector, self).__init__(**kwargs)
+
+        # TODO: currently we maintain just a simple map from all key info -> value. However, some key fields are far
+        # more common to filter on, so we'd want to index by them, e.g. host, client.id, metric name.
+        self._http_metrics = defaultdict(list)
+
+        self._httpd = HTTPServer(('', 0), _MetricsReceiver)
+        self._httpd.parent = self
+        self._httpd.metrics = self._http_metrics
+
+        self._http_metrics_thread = Thread(target=self._run_http_metrics_httpd,
+                                           name='http-metrics-thread[%s]' % str(self))
+        self._http_metrics_thread.start()
+
+        self._forwarders = {}
+
+    @property
+    def http_metrics_url(self):
+        """
+        :return: the URL to use when reporting metrics
+        """
+        return "http://%s:%d" % ("localhost", self.REMOTE_PORT)
+
+    @property
+    def http_metrics_client_configs(self):
+        """
+        Get client configurations that can be used to report data to this collector. Put these in a properties file for
+        clients (e.g. console producer or consumer) to have them push metrics to this driver. Note that in some cases
+        (e.g. streams, connect) these settings may need to be prefixed.
+        :return: a dictionary of client configurations that will direct a client to report metrics to this collector
+        """
+        return {
+            "metric.reporters": "org.apache.kafka.tools.PushHttpMetricsReporter",
+            "metrics.url": self.http_metrics_url,
+            "metrics.period": self._http_metrics_period,
+        }
+
+    def start_node(self, node):
+        local_port = self._httpd.socket.getsockname()[1]
+        self.logger.debug('HttpMetricsCollector listening on %s', local_port)
+        self._forwarders[self.idx(node)] = _ReverseForwarder(self.logger, node, self.REMOTE_PORT, local_port)
+
+        super(HttpMetricsCollector, self).start_node(node)
+
+    def stop(self):
+        super(HttpMetricsCollector, self).stop()
+
+        if self._http_metrics_thread:
+            self.logger.debug("Shutting down metrics httpd")
+            self._httpd.shutdown()
+            self._http_metrics_thread.join()
+            self.logger.debug("Finished shutting down metrics httpd")
+
+    def stop_node(self, node):
+        super(HttpMetricsCollector, self).stop_node(node)
+
+        idx = self.idx(node)
+        self._forwarders[idx].stop()
+        del self._forwarders[idx]
+
+    def metrics(self, host=None, client_id=None, name=None, group=None, tags=None):
+        """
+        Get any collected metrics that match the specified parameters, yielding each as a tuple of
+        (key, [<timestamp, value>, ...]) values.
+        """
+        for k, values in self._http_metrics.iteritems():
+            if ((host is None or host == k.host) and
+                    (client_id is None or client_id == k.client_id) and
+                    (name is None or name == k.name) and
+                    (group is None or group == k.group) and
+                    (tags is None or tags == k.tags)):
+                yield (k, values)
+
+    def _run_http_metrics_httpd(self):
+        self._httpd.serve_forever()
+
+
+class _MetricsReceiver(BaseHTTPRequestHandler):
+    """
+    HTTP request handler that accepts requests from the PushHttpMetricsReporter and stores them back into the parent
+    HttpMetricsCollector
+    """
+
+    def log_message(self, format, *args, **kwargs):
+        # Don't do any logging here so we get rid of the mostly useless per-request Apache log-style info that spams
+        # the debug log
+        pass
+
+    def do_POST(self):
+        data = self.rfile.read(int(self.headers['Content-Length']))
+        data = json.loads(data)
+        self.server.parent.logger.log(TRACE, "POST %s\n\n%s\n%s", self.path, self.headers,
+                                      json.dumps(data, indent=4, separators=(',', ': ')))
+        self.send_response(204)
+        self.end_headers()
+
+        client = data['client']
+        host = client['host']
+        client_id = client['client_id']
+        ts = client['time']
+        metrics = data['metrics']
+        for raw_metric in metrics:
+            name = raw_metric['name']
+            group = raw_metric['group']
+            # Convert to tuple of pairs because dicts & lists are unhashable
+            tags = tuple([(k, v) for k, v in raw_metric['tags'].iteritems()]),
+            value = raw_metric['value']
+
+            key = MetricKey(host=host, client_id=client_id, name=name, group=group, tags=tags)
+            metric_value = MetricValue(time=ts, value=value)
+
+            self.server.metrics[key].append(metric_value)
+
+
+class _ReverseForwarder(object):
+    """
+    Runs reverse forwarding of a port on a node to a local port. This allows you to setup a server on the test driver
+    that only assumes we have basic SSH access that ducktape guarantees is available for worker nodes.
+    """
+
+    def __init__(self, logger, node, remote_port, local_port):
+        self.logger = logger
+        self._node = node
+        self._local_port = local_port
+        self._remote_port = remote_port
+
+        self.logger.debug('Forwarding %s port %d to driver port %d', node, remote_port, local_port)
+
+        self._stopping = False
+
+        self._transport = node.account.ssh_client.get_transport()
+        self._transport.request_port_forward('', remote_port)
+
+        self._accept_thread = Thread(target=self._accept)
+        self._accept_thread.start()
+
+    def stop(self):
+        self._stopping = True
+        self._accept_thread.join(30)
+        if self._accept_thread.isAlive():
+            raise RuntimeError("Failed to stop reverse forwarder on %s", self._node)
+        self._transport.cancel_port_forward('', self._remote_port)
+
+    def _accept(self):
+        while not self._stopping:
+            chan = self._transport.accept(1)
+            if chan is None:
+                continue
+            thr = Thread(target=self._handler, args=(chan,))
+            thr.setDaemon(True)
+            thr.start()
+
+    def _handler(self, chan):
+        sock = socket.socket()
+        try:
+            sock.connect(("localhost", self._local_port))
+        except Exception as e:
+            self.logger.error('Forwarding request to port %d failed: %r', self._local_port, e)
+            return
+
+        self.logger.log(TRACE, 'Connected! Tunnel open %r -> %r -> %d', chan.origin_addr, chan.getpeername(),
+                        self._local_port)
+        while True:
+            r, w, x = select([sock, chan], [], [])
+            if sock in r:
+                data = sock.recv(1024)
+                if len(data) == 0:
+                    break
+                chan.send(data)
+            if chan in r:
+                data = chan.recv(1024)
+                if len(data) == 0:
+                    break
+                sock.send(data)
+        chan.close()
+        sock.close()
+        self.logger.log(TRACE, 'Tunnel closed from %r', chan.origin_addr)
--- a/tests/kafkatest/services/monitor/jmx.py
+++ b/tests/kafkatest/services/monitor/jmx.py
@@ -0,0 +1,141 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from ducktape.cluster.remoteaccount import RemoteCommandError
+from ducktape.utils.util import wait_until
+from kafkatest.version import get_version, V_0_11_0_0, DEV_BRANCH
+
+class JmxMixin(object):
+    """This mixin helps existing service subclasses start JmxTool on their worker nodes and collect jmx stats.
+
+    A couple things worth noting:
+    - this is not a service in its own right.
+    - we assume the service using JmxMixin also uses KafkaPathResolverMixin
+    - this uses the --wait option for JmxTool, so the list of object names must be explicit; no patterns are permitted
+    """
+    def __init__(self, num_nodes, jmx_object_names=None, jmx_attributes=None, jmx_poll_ms=1000, root="/mnt"):
+        self.jmx_object_names = jmx_object_names
+        self.jmx_attributes = jmx_attributes or []
+        self.jmx_poll_ms = jmx_poll_ms
+        self.jmx_port = 9192
+
+        self.started = [False] * num_nodes
+        self.jmx_stats = [{} for x in range(num_nodes)]
+        self.maximum_jmx_value = {}  # map from object_attribute_name to maximum value observed over time
+        self.average_jmx_value = {}  # map from object_attribute_name to average value observed over time
+
+        self.jmx_tool_log = os.path.join(root, "jmx_tool.log")
+        self.jmx_tool_err_log = os.path.join(root, "jmx_tool.err.log")
+
+    def clean_node(self, node):
+        node.account.kill_java_processes(self.jmx_class_name(), clean_shutdown=False,
+                                         allow_fail=True)
+        idx = self.idx(node)
+        self.started[idx-1] = False
+        node.account.ssh("rm -f -- %s %s" % (self.jmx_tool_log, self.jmx_tool_err_log), allow_fail=False)
+
+    def start_jmx_tool(self, idx, node):
+        if self.jmx_object_names is None:
+            self.logger.debug("%s: Not starting jmx tool because no jmx objects are defined" % node.account)
+            return
+
+        if self.started[idx-1]:
+            self.logger.debug("%s: jmx tool has been started already on this node" % node.account)
+            return
+
+        # JmxTool is not particularly robust to slow-starting processes. In order to ensure JmxTool doesn't fail if the
+        # process we're trying to monitor takes awhile before listening on the JMX port, wait until we can see that port
+        # listening before even launching JmxTool
+        def check_jmx_port_listening():
+            return 0 == node.account.ssh("nc -z 127.0.0.1 %d" % self.jmx_port, allow_fail=True)
+
+        wait_until(check_jmx_port_listening, timeout_sec=30, backoff_sec=.1,
+                   err_msg="%s: Never saw JMX port for %s start listening" % (node.account, self))
+
+        # To correctly wait for requested JMX metrics to be added we need the --wait option for JmxTool. This option was
+        # not added until 0.11.0.1, so any earlier versions need to use JmxTool from a newer version.
+        use_jmxtool_version = get_version(node)
+        if use_jmxtool_version <= V_0_11_0_0:
+            use_jmxtool_version = DEV_BRANCH
+        cmd = "%s %s " % (self.path.script("kafka-run-class.sh", use_jmxtool_version), self.jmx_class_name())
+        cmd += "--reporting-interval %d --jmx-url service:jmx:rmi:///jndi/rmi://127.0.0.1:%d/jmxrmi" % (self.jmx_poll_ms, self.jmx_port)
+        cmd += " --wait"
+        for jmx_object_name in self.jmx_object_names:
+            cmd += " --object-name %s" % jmx_object_name
+        cmd += " --attributes "
+        for jmx_attribute in self.jmx_attributes:
+            cmd += "%s," % jmx_attribute
+        cmd += " 1>> %s" % self.jmx_tool_log
+        cmd += " 2>> %s &" % self.jmx_tool_err_log
+
+        self.logger.debug("%s: Start JmxTool %d command: %s" % (node.account, idx, cmd))
+        node.account.ssh(cmd, allow_fail=False)
+        wait_until(lambda: self._jmx_has_output(node), timeout_sec=30, backoff_sec=.5, err_msg="%s: Jmx tool took too long to start" % node.account)
+        self.started[idx-1] = True
+
+    def _jmx_has_output(self, node):
+        """Helper used as a proxy to determine whether jmx is running by that jmx_tool_log contains output."""
+        try:
+            node.account.ssh("test -s %s" % self.jmx_tool_log, allow_fail=False)
+            return True
+        except RemoteCommandError:
+            return False
+
+    def read_jmx_output(self, idx, node):
+        if not self.started[idx-1]:
+            return
+
+        object_attribute_names = []
+
+        cmd = "cat %s" % self.jmx_tool_log
+        self.logger.debug("Read jmx output %d command: %s", idx, cmd)
+        lines = [line for line in node.account.ssh_capture(cmd, allow_fail=False)]
+        assert len(lines) > 1, "There don't appear to be any samples in the jmx tool log: %s" % lines
+
+        for line in lines:
+            if "time" in line:
+                object_attribute_names = line.strip()[1:-1].split("\",\"")[1:]
+                continue
+            stats = [float(field) for field in line.split(',')]
+            time_sec = int(stats[0]/1000)
+            self.jmx_stats[idx-1][time_sec] = {name: stats[i+1] for i, name in enumerate(object_attribute_names)}
+
+        # do not calculate average and maximum of jmx stats until we have read output from all nodes
+        # If the service is multithreaded, this means that the results will be aggregated only when the last
+        # service finishes
+        if any(len(time_to_stats) == 0 for time_to_stats in self.jmx_stats):
+            return
+
+        start_time_sec = min([min(time_to_stats.keys()) for time_to_stats in self.jmx_stats])
+        end_time_sec = max([max(time_to_stats.keys()) for time_to_stats in self.jmx_stats])
+
+        for name in object_attribute_names:
+            aggregates_per_time = []
+            for time_sec in xrange(start_time_sec, end_time_sec + 1):
+                # assume that value is 0 if it is not read by jmx tool at the given time. This is appropriate for metrics such as bandwidth
+                values_per_node = [time_to_stats.get(time_sec, {}).get(name, 0) for time_to_stats in self.jmx_stats]
+                # assume that value is aggregated across nodes by sum. This is appropriate for metrics such as bandwidth
+                aggregates_per_time.append(sum(values_per_node))
+            self.average_jmx_value[name] = sum(aggregates_per_time) / len(aggregates_per_time)
+            self.maximum_jmx_value[name] = max(aggregates_per_time)
+
+    def read_jmx_output_all_nodes(self):
+        for node in self.nodes:
+            self.read_jmx_output(self.idx(node), node)
+
+    def jmx_class_name(self):
+        return "kafka.tools.JmxTool"
--- a/tests/kafkatest/services/performance/init.py
+++ b/tests/kafkatest/services/performance/init.py
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from performance import PerformanceService, throughput, latency, compute_aggregate_throughput
+from end_to_end_latency import EndToEndLatencyService
+from producer_performance import ProducerPerformanceService
+from consumer_performance import ConsumerPerformanceService
--- a/tests/kafkatest/services/performance/consumer_performance.py
+++ b/tests/kafkatest/services/performance/consumer_performance.py
@@ -0,0 +1,187 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+
+from kafkatest.services.performance import PerformanceService
+from kafkatest.services.security.security_config import SecurityConfig
+from kafkatest.version import DEV_BRANCH, V_0_9_0_0, V_2_0_0, LATEST_0_10_0
+
+
+class ConsumerPerformanceService(PerformanceService):
+    """
+        See ConsumerPerformance.scala as the source of truth on these settings, but for reference:
+
+        "zookeeper" "The connection string for the zookeeper connection in the form host:port. Multiple URLS can
+                     be given to allow fail-over. This option is only used with the old consumer."
+
+        "broker-list", "A broker list to use for connecting if using the new consumer."
+
+        "topic", "REQUIRED: The topic to consume from."
+
+        "group", "The group id to consume on."
+
+        "fetch-size", "The amount of data to fetch in a single request."
+
+        "from-latest", "If the consumer does not already have an establishedoffset to consume from,
+                        start with the latest message present in the log rather than the earliest message."
+
+        "socket-buffer-size", "The size of the tcp RECV size."
+
+        "threads", "Number of processing threads."
+
+        "num-fetch-threads", "Number of fetcher threads. Defaults to 1"
+
+        "new-consumer", "Use the new consumer implementation."
+        "consumer.config", "Consumer config properties file."
+    """
+
+    # Root directory for persistent output
+    PERSISTENT_ROOT = "/mnt/consumer_performance"
+    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
+    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "consumer_performance.stdout")
+    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "consumer_performance.stderr")
+    LOG_FILE = os.path.join(LOG_DIR, "consumer_performance.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "consumer.properties")
+
+    logs = {
+        "consumer_performance_output": {
+            "path": STDOUT_CAPTURE,
+            "collect_default": True},
+        "consumer_performance_stderr": {
+            "path": STDERR_CAPTURE,
+            "collect_default": True},
+        "consumer_performance_log": {
+            "path": LOG_FILE,
+            "collect_default": True}
+    }
+
+    def __init__(self, context, num_nodes, kafka, topic, messages, version=DEV_BRANCH, new_consumer=True, settings={}):
+        super(ConsumerPerformanceService, self).__init__(context, num_nodes)
+        self.kafka = kafka
+        self.security_config = kafka.security_config.client_config()
+        self.topic = topic
+        self.messages = messages
+        self.new_consumer = new_consumer
+        self.settings = settings
+
+        assert version >= V_0_9_0_0 or (not new_consumer), \
+            "new_consumer is only supported if version >= 0.9.0.0, version %s" % str(version)
+
+        assert version < V_2_0_0 or new_consumer, \
+            "new_consumer==false is only supported if version < 2.0.0, version %s" % str(version)
+
+        security_protocol = self.security_config.security_protocol
+        assert version >= V_0_9_0_0 or security_protocol == SecurityConfig.PLAINTEXT, \
+            "Security protocol %s is only supported if version >= 0.9.0.0, version %s" % (self.security_config, str(version))
+
+        # These less-frequently used settings can be updated manually after instantiation
+        self.fetch_size = None
+        self.socket_buffer_size = None
+        self.threads = None
+        self.num_fetch_threads = None
+        self.group = None
+        self.from_latest = None
+
+        for node in self.nodes:
+            node.version = version
+
+    def args(self, version):
+        """Dictionary of arguments used to start the Consumer Performance script."""
+        args = {
+            'topic': self.topic,
+            'messages': self.messages,
+        }
+
+        if self.new_consumer:
+            if version <= LATEST_0_10_0:
+                args['new-consumer'] = ""
+            args['broker-list'] = self.kafka.bootstrap_servers(self.security_config.security_protocol)
+        else:
+            args['zookeeper'] = self.kafka.zk_connect_setting()
+
+        if self.fetch_size is not None:
+            args['fetch-size'] = self.fetch_size
+
+        if self.socket_buffer_size is not None:
+            args['socket-buffer-size'] = self.socket_buffer_size
+
+        if self.threads is not None:
+            args['threads'] = self.threads
+
+        if self.num_fetch_threads is not None:
+            args['num-fetch-threads'] = self.num_fetch_threads
+
+        if self.group is not None:
+            args['group'] = self.group
+
+        if self.from_latest:
+            args['from-latest'] = ""
+
+        return args
+
+    def start_cmd(self, node):
+        cmd = "export LOG_DIR=%s;" % ConsumerPerformanceService.LOG_DIR
+        cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts
+        cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\";" % ConsumerPerformanceService.LOG4J_CONFIG
+        cmd += " %s" % self.path.script("kafka-consumer-perf-test.sh", node)
+        for key, value in self.args(node.version).items():
+            cmd += " --%s %s" % (key, value)
+
+        if node.version >= V_0_9_0_0:
+            # This is only used for security settings
+            cmd += " --consumer.config %s" % ConsumerPerformanceService.CONFIG_FILE
+
+        for key, value in self.settings.items():
+            cmd += " %s=%s" % (str(key), str(value))
+
+        cmd += " 2>> %(stderr)s | tee -a %(stdout)s" % {'stdout': ConsumerPerformanceService.STDOUT_CAPTURE,
+                                                        'stderr': ConsumerPerformanceService.STDERR_CAPTURE}
+        return cmd
+
+    def parse_results(self, line, version):
+        parts = line.split(',')
+        if version >= V_0_9_0_0:
+            result = {
+                'total_mb': float(parts[2]),
+                'mbps': float(parts[3]),
+                'records_per_sec': float(parts[5]),
+            }
+        else:
+            result = {
+                'total_mb': float(parts[3]),
+                'mbps': float(parts[4]),
+                'records_per_sec': float(parts[6]),
+            }
+        return result
+
+    def _worker(self, idx, node):
+        node.account.ssh("mkdir -p %s" % ConsumerPerformanceService.PERSISTENT_ROOT, allow_fail=False)
+
+        log_config = self.render('tools_log4j.properties', log_file=ConsumerPerformanceService.LOG_FILE)
+        node.account.create_file(ConsumerPerformanceService.LOG4J_CONFIG, log_config)
+        node.account.create_file(ConsumerPerformanceService.CONFIG_FILE, str(self.security_config))
+        self.security_config.setup_node(node)
+
+        cmd = self.start_cmd(node)
+        self.logger.debug("Consumer performance %d command: %s", idx, cmd)
+        last = None
+        for line in node.account.ssh_capture(cmd):
+            last = line
+
+        # Parse and save the last line's information
+        self.results[idx-1] = self.parse_results(last, node.version)
--- a/tests/kafkatest/services/performance/end_to_end_latency.py
+++ b/tests/kafkatest/services/performance/end_to_end_latency.py
@@ -0,0 +1,124 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from kafkatest.services.performance import PerformanceService
+from kafkatest.services.security.security_config import SecurityConfig
+from kafkatest.version import DEV_BRANCH, V_0_9_0_0
+
+
+
+class EndToEndLatencyService(PerformanceService):
+    MESSAGE_BYTES = 21  # 0.8.X messages are fixed at 21 bytes, so we'll match that for other versions
+
+    # Root directory for persistent output
+    PERSISTENT_ROOT = "/mnt/end_to_end_latency"
+    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
+    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "end_to_end_latency.stdout")
+    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "end_to_end_latency.stderr")
+    LOG_FILE = os.path.join(LOG_DIR, "end_to_end_latency.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "client.properties")
+
+    logs = {
+        "end_to_end_latency_output": {
+            "path": STDOUT_CAPTURE,
+            "collect_default": True},
+        "end_to_end_latency_stderr": {
+            "path": STDERR_CAPTURE,
+            "collect_default": True},
+        "end_to_end_latency_log": {
+            "path": LOG_FILE,
+            "collect_default": True}
+    }
+
+    def __init__(self, context, num_nodes, kafka, topic, num_records, compression_type="none", version=DEV_BRANCH, acks=1):
+        super(EndToEndLatencyService, self).__init__(context, num_nodes,
+                                                     root=EndToEndLatencyService.PERSISTENT_ROOT)
+        self.kafka = kafka
+        self.security_config = kafka.security_config.client_config()
+
+        security_protocol = self.security_config.security_protocol
+
+        if version < V_0_9_0_0:
+            assert security_protocol == SecurityConfig.PLAINTEXT, \
+                "Security protocol %s is only supported if version >= 0.9.0.0, version %s" % (self.security_config, str(version))
+            assert compression_type == "none", \
+                "Compression type %s is only supported if version >= 0.9.0.0, version %s" % (compression_type, str(version))
+
+        self.args = {
+            'topic': topic,
+            'num_records': num_records,
+            'acks': acks,
+            'compression_type': compression_type,
+            'kafka_opts': self.security_config.kafka_opts,
+            'message_bytes': EndToEndLatencyService.MESSAGE_BYTES
+        }
+
+        for node in self.nodes:
+            node.version = version
+
+    def start_cmd(self, node):
+        args = self.args.copy()
+        args.update({
+            'zk_connect': self.kafka.zk_connect_setting(),
+            'bootstrap_servers': self.kafka.bootstrap_servers(self.security_config.security_protocol),
+            'config_file': EndToEndLatencyService.CONFIG_FILE,
+            'kafka_run_class': self.path.script("kafka-run-class.sh", node),
+            'java_class_name': self.java_class_name()
+        })
+
+        cmd = "export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % EndToEndLatencyService.LOG4J_CONFIG
+        if node.version >= V_0_9_0_0:
+            cmd += "KAFKA_OPTS=%(kafka_opts)s %(kafka_run_class)s %(java_class_name)s " % args
+            cmd += "%(bootstrap_servers)s %(topic)s %(num_records)d %(acks)d %(message_bytes)d %(config_file)s" % args
+        else:
+            # Set fetch max wait to 0 to match behavior in later versions
+            cmd += "KAFKA_OPTS=%(kafka_opts)s %(kafka_run_class)s kafka.tools.TestEndToEndLatency " % args
+            cmd += "%(bootstrap_servers)s %(zk_connect)s %(topic)s %(num_records)d 0 %(acks)d" % args
+
+        cmd += " 2>> %(stderr)s | tee -a %(stdout)s" % {'stdout': EndToEndLatencyService.STDOUT_CAPTURE,
+                                                        'stderr': EndToEndLatencyService.STDERR_CAPTURE}
+
+        return cmd
+
+    def _worker(self, idx, node):
+        node.account.ssh("mkdir -p %s" % EndToEndLatencyService.PERSISTENT_ROOT, allow_fail=False)
+
+        log_config = self.render('tools_log4j.properties', log_file=EndToEndLatencyService.LOG_FILE)
+
+        node.account.create_file(EndToEndLatencyService.LOG4J_CONFIG, log_config)
+        client_config = str(self.security_config)
+        if node.version >= V_0_9_0_0:
+            client_config += "compression_type=%(compression_type)s" % self.args
+        node.account.create_file(EndToEndLatencyService.CONFIG_FILE, client_config)
+
+        self.security_config.setup_node(node)
+
+        cmd = self.start_cmd(node)
+        self.logger.debug("End-to-end latency %d command: %s", idx, cmd)
+        results = {}
+        for line in node.account.ssh_capture(cmd):
+            if line.startswith("Avg latency:"):
+                results['latency_avg_ms'] = float(line.split()[2])
+            if line.startswith("Percentiles"):
+                results['latency_50th_ms'] = float(line.split()[3][:-1])
+                results['latency_99th_ms'] = float(line.split()[6][:-1])
+                results['latency_999th_ms'] = float(line.split()[9])
+        self.results[idx-1] = results
+
+    def java_class_name(self):
+        return "kafka.tools.EndToEndLatency"
--- a/tests/kafkatest/services/performance/performance.py
+++ b/tests/kafkatest/services/performance/performance.py
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ducktape.services.background_thread import BackgroundThreadService
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+
+
+class PerformanceService(KafkaPathResolverMixin, BackgroundThreadService):
+
+    def __init__(self, context=None, num_nodes=0, root="/mnt/*", stop_timeout_sec=30):
+        super(PerformanceService, self).__init__(context, num_nodes)
+        self.results = [None] * self.num_nodes
+        self.stats = [[] for x in range(self.num_nodes)]
+        self.stop_timeout_sec = stop_timeout_sec
+        self.root = root
+
+    def java_class_name(self):
+        """
+        Returns the name of the Java class which this service creates.  Subclasses should override
+        this method, so that we know the name of the java process to stop.  If it is not
+        overridden, we will kill all java processes in PerformanceService#stop_node (for backwards
+        compatibility.)
+        """
+        return ""
+
+    def stop_node(self, node):
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=True, allow_fail=True)
+
+        stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec)
+        assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \
+                        (str(node.account), str(self.stop_timeout_sec))
+
+    def clean_node(self, node):
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=False, allow_fail=True)
+        node.account.ssh("rm -rf -- %s" % self.root, allow_fail=False)
+
+
+def throughput(records_per_sec, mb_per_sec):
+    """Helper method to ensure uniform representation of throughput data"""
+    return {
+        "records_per_sec": records_per_sec,
+        "mb_per_sec": mb_per_sec
+    }
+
+
+def latency(latency_50th_ms, latency_99th_ms, latency_999th_ms):
+    """Helper method to ensure uniform representation of latency data"""
+    return {
+        "latency_50th_ms": latency_50th_ms,
+        "latency_99th_ms": latency_99th_ms,
+        "latency_999th_ms": latency_999th_ms
+    }
+
+
+def compute_aggregate_throughput(perf):
+    """Helper method for computing throughput after running a performance service."""
+    aggregate_rate = sum([r['records_per_sec'] for r in perf.results])
+    aggregate_mbps = sum([r['mbps'] for r in perf.results])
+
+    return throughput(aggregate_rate, aggregate_mbps)
--- a/tests/kafkatest/services/performance/producer_performance.py
+++ b/tests/kafkatest/services/performance/producer_performance.py
@@ -0,0 +1,174 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+from ducktape.utils.util import wait_until
+from ducktape.cluster.remoteaccount import RemoteCommandError
+
+from kafkatest.directory_layout.kafka_path import TOOLS_JAR_NAME, TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME
+from kafkatest.services.monitor.http import HttpMetricsCollector
+from kafkatest.services.performance import PerformanceService
+from kafkatest.services.security.security_config import SecurityConfig
+from kafkatest.version import DEV_BRANCH, V_0_9_0_0
+
+
+class ProducerPerformanceService(HttpMetricsCollector, PerformanceService):
+
+    PERSISTENT_ROOT = "/mnt/producer_performance"
+    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "producer_performance.stdout")
+    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "producer_performance.stderr")
+    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
+    LOG_FILE = os.path.join(LOG_DIR, "producer_performance.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+
+    def __init__(self, context, num_nodes, kafka, topic, num_records, record_size, throughput, version=DEV_BRANCH, settings=None,
+                 intermediate_stats=False, client_id="producer-performance"):
+
+        super(ProducerPerformanceService, self).__init__(context=context, num_nodes=num_nodes)
+
+        self.logs = {
+            "producer_performance_stdout": {
+                "path": ProducerPerformanceService.STDOUT_CAPTURE,
+                "collect_default": True},
+            "producer_performance_stderr": {
+                "path": ProducerPerformanceService.STDERR_CAPTURE,
+                "collect_default": True},
+            "producer_performance_log": {
+                "path": ProducerPerformanceService.LOG_FILE,
+                "collect_default": True}
+        }
+
+        self.kafka = kafka
+        self.security_config = kafka.security_config.client_config()
+
+        security_protocol = self.security_config.security_protocol
+        assert version >= V_0_9_0_0 or security_protocol == SecurityConfig.PLAINTEXT, \
+            "Security protocol %s is only supported if version >= 0.9.0.0, version %s" % (self.security_config, str(version))
+
+        self.args = {
+            'topic': topic,
+            'kafka_opts': self.security_config.kafka_opts,
+            'num_records': num_records,
+            'record_size': record_size,
+            'throughput': throughput
+        }
+        self.settings = settings or {}
+        self.intermediate_stats = intermediate_stats
+        self.client_id = client_id
+
+        for node in self.nodes:
+            node.version = version
+
+    def start_cmd(self, node):
+        args = self.args.copy()
+        args.update({
+            'bootstrap_servers': self.kafka.bootstrap_servers(self.security_config.security_protocol),
+            'client_id': self.client_id,
+            'kafka_run_class': self.path.script("kafka-run-class.sh", node),
+            'metrics_props': ' '.join(["%s=%s" % (k, v) for k, v in self.http_metrics_client_configs.iteritems()])
+            })
+
+        cmd = ""
+
+        if node.version < DEV_BRANCH:
+            # In order to ensure more consistent configuration between versions, always use the ProducerPerformance
+            # tool from the development branch
+            tools_jar = self.path.jar(TOOLS_JAR_NAME, DEV_BRANCH)
+            tools_dependant_libs_jar = self.path.jar(TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME, DEV_BRANCH)
+
+            for jar in (tools_jar, tools_dependant_libs_jar):
+                cmd += "for file in %s; do CLASSPATH=$CLASSPATH:$file; done; " % jar
+            cmd += "export CLASSPATH; "
+
+        cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % ProducerPerformanceService.LOG4J_CONFIG
+        cmd += "KAFKA_OPTS=%(kafka_opts)s KAFKA_HEAP_OPTS=\"-XX:+HeapDumpOnOutOfMemoryError\" %(kafka_run_class)s org.apache.kafka.tools.ProducerPerformance " \
+              "--topic %(topic)s --num-records %(num_records)d --record-size %(record_size)d --throughput %(throughput)d --producer-props bootstrap.servers=%(bootstrap_servers)s client.id=%(client_id)s %(metrics_props)s" % args
+
+        self.security_config.setup_node(node)
+        if self.security_config.security_protocol != SecurityConfig.PLAINTEXT:
+            self.settings.update(self.security_config.properties)
+
+        for key, value in self.settings.items():
+            cmd += " %s=%s" % (str(key), str(value))
+
+        cmd += " 2>>%s | tee %s" % (ProducerPerformanceService.STDERR_CAPTURE, ProducerPerformanceService.STDOUT_CAPTURE)
+        return cmd
+
+    def pids(self, node):
+        try:
+            cmd = "jps | grep -i ProducerPerformance | awk '{print $1}'"
+            pid_arr = [pid for pid in node.account.ssh_capture(cmd, allow_fail=True, callback=int)]
+            return pid_arr
+        except (RemoteCommandError, ValueError) as e:
+            return []
+
+    def alive(self, node):
+        return len(self.pids(node)) > 0
+
+    def _worker(self, idx, node):
+        node.account.ssh("mkdir -p %s" % ProducerPerformanceService.PERSISTENT_ROOT, allow_fail=False)
+
+        # Create and upload log properties
+        log_config = self.render('tools_log4j.properties', log_file=ProducerPerformanceService.LOG_FILE)
+        node.account.create_file(ProducerPerformanceService.LOG4J_CONFIG, log_config)
+
+        cmd = self.start_cmd(node)
+        self.logger.debug("Producer performance %d command: %s", idx, cmd)
+
+        # start ProducerPerformance process
+        start = time.time()
+        producer_output = node.account.ssh_capture(cmd)
+        wait_until(lambda: self.alive(node), timeout_sec=20, err_msg="ProducerPerformance failed to start")
+        # block until there is at least one line of output
+        first_line = next(producer_output, None)
+        if first_line is None:
+            raise Exception("No output from ProducerPerformance")
+
+        wait_until(lambda: not self.alive(node), timeout_sec=1200, backoff_sec=2, err_msg="ProducerPerformance failed to finish")
+        elapsed = time.time() - start
+        self.logger.debug("ProducerPerformance process ran for %s seconds" % elapsed)
+
+        # parse producer output from file
+        last = None
+        producer_output = node.account.ssh_capture("cat %s" % ProducerPerformanceService.STDOUT_CAPTURE)
+        for line in producer_output:
+            if self.intermediate_stats:
+                try:
+                    self.stats[idx-1].append(self.parse_stats(line))
+                except:
+                    # Sometimes there are extraneous log messages
+                    pass
+
+            last = line
+        try:
+            self.results[idx-1] = self.parse_stats(last)
+        except:
+            raise Exception("Unable to parse aggregate performance statistics on node %d: %s" % (idx, last))
+
+    def parse_stats(self, line):
+
+        parts = line.split(',')
+        return {
+            'records': int(parts[0].split()[0]),
+            'records_per_sec': float(parts[1].split()[0]),
+            'mbps': float(parts[1].split('(')[1].split()[0]),
+            'latency_avg_ms': float(parts[2].split()[0]),
+            'latency_max_ms': float(parts[3].split()[0]),
+            'latency_50th_ms': float(parts[4].split()[0]),
+            'latency_95th_ms': float(parts[5].split()[0]),
+            'latency_99th_ms': float(parts[6].split()[0]),
+            'latency_999th_ms': float(parts[7].split()[0]),
+        }
--- a/tests/kafkatest/services/performance/streams_performance.py
+++ b/tests/kafkatest/services/performance/streams_performance.py
@@ -0,0 +1,108 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafkatest.services.monitor.jmx import JmxMixin
+from kafkatest.services.streams import StreamsTestBaseService
+from kafkatest.services.kafka import KafkaConfig
+from kafkatest.services import streams_property
+
+#
+# Class used to start the simple Kafka Streams benchmark
+#
+
+class StreamsSimpleBenchmarkService(StreamsTestBaseService):
+    """Base class for simple Kafka Streams benchmark"""
+
+    def __init__(self, test_context, kafka, test_name, num_threads, num_recs_or_wait_ms, key_skew, value_size):
+        super(StreamsSimpleBenchmarkService, self).__init__(test_context,
+                                                            kafka,
+                                                            "org.apache.kafka.streams.perf.SimpleBenchmark",
+                                                            test_name,
+                                                            num_recs_or_wait_ms,
+                                                            key_skew,
+                                                            value_size)
+
+        self.jmx_option = ""
+        if test_name.startswith('stream') or test_name.startswith('table'):
+            self.jmx_option = "stream-jmx"
+            JmxMixin.__init__(self,
+                              num_nodes=1,
+                              jmx_object_names=['kafka.streams:type=stream-thread-metrics,thread-id=simple-benchmark-StreamThread-%d' %(i+1) for i in range(num_threads)],
+                              jmx_attributes=['process-latency-avg',
+                                              'process-rate',
+                                              'commit-latency-avg',
+                                              'commit-rate',
+                                              'poll-latency-avg',
+                                              'poll-rate'],
+                              root=StreamsTestBaseService.PERSISTENT_ROOT)
+
+        if test_name.startswith('consume'):
+            self.jmx_option = "consumer-jmx"
+            JmxMixin.__init__(self,
+                              num_nodes=1,
+                              jmx_object_names=['kafka.consumer:type=consumer-fetch-manager-metrics,client-id=simple-benchmark-consumer'],
+                              jmx_attributes=['records-consumed-rate'],
+                              root=StreamsTestBaseService.PERSISTENT_ROOT)
+
+        self.num_threads = num_threads
+
+    def prop_file(self):
+        cfg = KafkaConfig(**{streams_property.STATE_DIR: self.PERSISTENT_ROOT,
+                             streams_property.KAFKA_SERVERS: self.kafka.bootstrap_servers(),
+                             streams_property.NUM_THREADS: self.num_threads})
+        return cfg.render()
+
+
+    def start_cmd(self, node):
+        if self.jmx_option != "":
+            args = self.args.copy()
+            args['jmx_port'] = self.jmx_port
+            args['config_file'] = self.CONFIG_FILE
+            args['stdout'] = self.STDOUT_FILE
+            args['stderr'] = self.STDERR_FILE
+            args['pidfile'] = self.PID_FILE
+            args['log4j'] = self.LOG4J_CONFIG_FILE
+            args['kafka_run_class'] = self.path.script("kafka-run-class.sh", node)
+
+            cmd = "( export JMX_PORT=%(jmx_port)s; export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\"; " \
+                  "INCLUDE_TEST_JARS=true %(kafka_run_class)s %(streams_class_name)s " \
+                  " %(config_file)s %(user_test_args1)s %(user_test_args2)s %(user_test_args3)s" \
+                  " %(user_test_args4)s & echo $! >&3 ) 1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args
+
+        else:
+            cmd = super(StreamsSimpleBenchmarkService, self).start_cmd(node)
+
+        return cmd
+
+    def start_node(self, node):
+        super(StreamsSimpleBenchmarkService, self).start_node(node)
+
+        if self.jmx_option != "":
+            self.start_jmx_tool(1, node)
+
+    def clean_node(self, node):
+        if self.jmx_option != "":
+            JmxMixin.clean_node(self, node)
+
+        super(StreamsSimpleBenchmarkService, self).clean_node(node)
+
+    def collect_data(self, node, tag = None):
+        # Collect the data and return it to the framework
+        output = node.account.ssh_capture("grep Performance %s" % self.STDOUT_FILE)
+        data = {}
+        for line in output:
+            parts = line.split(':')
+            data[tag + parts[0]] = parts[1]
+        return data
--- a/tests/kafkatest/services/performance/templates/tools_log4j.properties
+++ b/tests/kafkatest/services/performance/templates/tools_log4j.properties
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define the root logger with appender file
+log4j.rootLogger = {{ log_level|default("INFO") }}, FILE
+
+log4j.appender.FILE=org.apache.log4j.FileAppender
+log4j.appender.FILE.File={{ log_file }}
+log4j.appender.FILE.ImmediateFlush=true
+# Set the append to false, overwrite
+log4j.appender.FILE.Append=false
+log4j.appender.FILE.layout=org.apache.log4j.PatternLayout
+log4j.appender.FILE.layout.conversionPattern=[%d] %p %m (%c)%n
--- a/tests/kafkatest/services/replica_verification_tool.py
+++ b/tests/kafkatest/services/replica_verification_tool.py
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ducktape.services.background_thread import BackgroundThreadService
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from kafkatest.services.security.security_config import SecurityConfig
+
+import re
+
+
+class ReplicaVerificationTool(KafkaPathResolverMixin, BackgroundThreadService):
+
+    logs = {
+        "producer_log": {
+            "path": "/mnt/replica_verification_tool.log",
+            "collect_default": False}
+    }
+
+    def __init__(self, context, num_nodes, kafka, topic, report_interval_ms, security_protocol="PLAINTEXT", stop_timeout_sec=30):
+        super(ReplicaVerificationTool, self).__init__(context, num_nodes)
+
+        self.kafka = kafka
+        self.topic = topic
+        self.report_interval_ms = report_interval_ms
+        self.security_protocol = security_protocol
+        self.security_config = SecurityConfig(self.context, security_protocol)
+        self.partition_lag = {}
+        self.stop_timeout_sec = stop_timeout_sec
+
+    def _worker(self, idx, node):
+        cmd = self.start_cmd(node)
+        self.logger.debug("ReplicaVerificationTool %d command: %s" % (idx, cmd))
+        self.security_config.setup_node(node)
+        for line in node.account.ssh_capture(cmd):
+            self.logger.debug("Parsing line:{}".format(line))
+
+            parsed = re.search('.*max lag is (.+?) for partition ([a-zA-Z0-9._-]+-[0-9]+) at', line)
+            if parsed:
+                lag = int(parsed.group(1))
+                topic_partition = parsed.group(2)
+                self.logger.debug("Setting max lag for {} as {}".format(topic_partition, lag))
+                self.partition_lag[topic_partition] = lag
+
+    def get_lag_for_partition(self, topic, partition):
+        """
+        Get latest lag for given topic-partition
+
+        Args:
+            topic:          a topic
+            partition:      a partition of the topic
+        """
+        topic_partition = topic + '-' + str(partition)
+        lag = self.partition_lag.get(topic_partition, -1)
+        self.logger.debug("Returning lag for {} as {}".format(topic_partition, lag))
+
+        return lag
+
+    def start_cmd(self, node):
+        cmd = self.path.script("kafka-run-class.sh", node)
+        cmd += " %s" % self.java_class_name()
+        cmd += " --broker-list %s --topic-white-list %s --time -2 --report-interval-ms %s" % (self.kafka.bootstrap_servers(self.security_protocol), self.topic, self.report_interval_ms)
+
+        cmd += " 2>> /mnt/replica_verification_tool.log | tee -a /mnt/replica_verification_tool.log &"
+        return cmd
+
+    def stop_node(self, node):
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=True,
+                                         allow_fail=True)
+
+        stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec)
+        assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \
+                        (str(node.account), str(self.stop_timeout_sec))
+
+    def clean_node(self, node):
+        node.account.kill_java_processes(self.java_class_name(), clean_shutdown=False,
+                                         allow_fail=True)
+        node.account.ssh("rm -rf /mnt/replica_verification_tool.log", allow_fail=False)
+
+    def java_class_name(self):
+        return "kafka.tools.ReplicaVerificationTool"
--- a/tests/kafkatest/services/security/init.py
+++ b/tests/kafkatest/services/security/init.py
@@ -0,0 +1,15 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
--- a/tests/kafkatest/services/security/kafka_acls.py
+++ b/tests/kafkatest/services/security/kafka_acls.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+
+
+class ACLs(KafkaPathResolverMixin):
+    def __init__(self, context):
+        self.context = context
+
+    def set_acls(self, protocol, kafka, topic, group):
+        node = kafka.nodes[0]
+        setting = kafka.zk_connect_setting()
+
+        # Set server ACLs
+        kafka_principal = "User:CN=systemtest" if protocol == "SSL" else "User:kafka"
+        self.acls_command(node, ACLs.add_cluster_acl(setting, kafka_principal))
+        self.acls_command(node, ACLs.broker_read_acl(setting, "*", kafka_principal))
+
+        # Set client ACLs
+        client_principal = "User:CN=systemtest" if protocol == "SSL" else "User:client"
+        self.acls_command(node, ACLs.produce_acl(setting, topic, client_principal))
+        self.acls_command(node, ACLs.consume_acl(setting, topic, group, client_principal))
+
+    def acls_command(self, node, properties):
+        cmd = "%s %s" % (self.path.script("kafka-acls.sh", node), properties)
+        node.account.ssh(cmd)
+
+    @staticmethod
+    def add_cluster_acl(zk_connect, principal="User:kafka"):
+        return "--authorizer-properties zookeeper.connect=%(zk_connect)s --add --cluster " \
+               "--operation=ClusterAction --allow-principal=%(principal)s " % {
+            'zk_connect': zk_connect,
+            'principal': principal
+        }
+
+    @staticmethod
+    def broker_read_acl(zk_connect, topic, principal="User:kafka"):
+        return "--authorizer-properties zookeeper.connect=%(zk_connect)s --add --topic=%(topic)s " \
+               "--operation=Read --allow-principal=%(principal)s " % {
+            'zk_connect': zk_connect,
+            'topic': topic,
+            'principal': principal
+        }
+
+    @staticmethod
+    def produce_acl(zk_connect, topic, principal="User:client"):
+        return "--authorizer-properties zookeeper.connect=%(zk_connect)s --add --topic=%(topic)s " \
+               "--producer --allow-principal=%(principal)s " % {
+            'zk_connect': zk_connect,
+            'topic': topic,
+            'principal': principal
+        }
+
+    @staticmethod
+    def consume_acl(zk_connect, topic, group, principal="User:client"):
+        return "--authorizer-properties zookeeper.connect=%(zk_connect)s --add --topic=%(topic)s " \
+               "--group=%(group)s --consumer --allow-principal=%(principal)s " % {
+            'zk_connect': zk_connect,
+            'topic': topic,
+            'group': group,
+            'principal': principal
+        }
--- a/tests/kafkatest/services/security/listener_security_config.py
+++ b/tests/kafkatest/services/security/listener_security_config.py
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+class ListenerSecurityConfig:
+
+    SASL_MECHANISM_PREFIXED_CONFIGS = ["connections.max.reauth.ms", "sasl.jaas.config",
+                                       "sasl.login.callback.handler.class", "sasl.login.class",
+                                       "sasl.server.callback.handler.class"]
+
+    def __init__(self, use_separate_interbroker_listener=False,
+                 client_listener_overrides={}, interbroker_listener_overrides={}):
+        """
+        :param bool use_separate_interbroker_listener - if set, will use a separate interbroker listener,
+        with security protocol set to interbroker_security_protocol value. If set, requires
+        interbroker_security_protocol to be provided.
+        Normally port name is the same as its security protocol, so setting security_protocol and
+        interbroker_security_protocol to the same value will lead to a single port being open and both client
+        and broker-to-broker communication will go over that port. This parameter allows
+        you to add an interbroker listener with the same security protocol as a client listener, but running on a
+        separate port.
+        :param dict client_listener_overrides - non-prefixed listener config overrides for named client listener
+        (for example 'sasl.jaas.config', 'ssl.keystore.location', 'sasl.login.callback.handler.class', etc).
+        :param dict interbroker_listener_overrides - non-prefixed listener config overrides for named interbroker
+        listener (for example 'sasl.jaas.config', 'ssl.keystore.location', 'sasl.login.callback.handler.class', etc).
+        """
+        self.use_separate_interbroker_listener = use_separate_interbroker_listener
+        self.client_listener_overrides = client_listener_overrides
+        self.interbroker_listener_overrides = interbroker_listener_overrides
+
+    def requires_sasl_mechanism_prefix(self, config):
+        return config in ListenerSecurityConfig.SASL_MECHANISM_PREFIXED_CONFIGS
--- a/tests/kafkatest/services/security/minikdc.py
+++ b/tests/kafkatest/services/security/minikdc.py
@@ -0,0 +1,136 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import random
+import uuid
+from io import open
+from os import remove, close
+from shutil import move
+from tempfile import mkstemp
+
+from ducktape.services.service import Service
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin, CORE_LIBS_JAR_NAME, CORE_DEPENDANT_TEST_LIBS_JAR_NAME
+from kafkatest.version import DEV_BRANCH
+
+
+class MiniKdc(KafkaPathResolverMixin, Service):
+
+    logs = {
+        "minikdc_log": {
+            "path": "/mnt/minikdc/minikdc.log",
+            "collect_default": True}
+    }
+
+    WORK_DIR = "/mnt/minikdc"
+    PROPS_FILE = "/mnt/minikdc/minikdc.properties"
+    KEYTAB_FILE = "/mnt/minikdc/keytab"
+    KRB5CONF_FILE = "/mnt/minikdc/krb5.conf"
+    LOG_FILE = "/mnt/minikdc/minikdc.log"
+
+    LOCAL_KEYTAB_FILE = None
+    LOCAL_KRB5CONF_FILE = None
+
+    @staticmethod
+    def _set_local_keytab_file(local_scratch_dir):
+        """Set MiniKdc.LOCAL_KEYTAB_FILE exactly once per test.
+
+        LOCAL_KEYTAB_FILE is currently used like a global variable to provide a mechanism to share the
+        location of the local keytab file among all services which might need it.
+
+        Since individual ducktape tests are each run in a subprocess forked from the ducktape main process,
+        class variables set at class load time are duplicated between test processes. This leads to collisions
+        if test subprocesses are run in parallel, so we defer setting these class variables until after the test itself
+        begins to run.
+        """
+        if MiniKdc.LOCAL_KEYTAB_FILE is None:
+            MiniKdc.LOCAL_KEYTAB_FILE = os.path.join(local_scratch_dir, "keytab")
+        return MiniKdc.LOCAL_KEYTAB_FILE
+
+    @staticmethod
+    def _set_local_krb5conf_file(local_scratch_dir):
+        """Set MiniKdc.LOCAL_KRB5CONF_FILE exactly once per test.
+
+        See _set_local_keytab_file for details why we do this.
+        """
+
+        if MiniKdc.LOCAL_KRB5CONF_FILE is None:
+            MiniKdc.LOCAL_KRB5CONF_FILE = os.path.join(local_scratch_dir, "krb5conf")
+        return MiniKdc.LOCAL_KRB5CONF_FILE
+
+    def __init__(self, context, kafka_nodes, extra_principals=""):
+        super(MiniKdc, self).__init__(context, 1)
+        self.kafka_nodes = kafka_nodes
+        self.extra_principals = extra_principals
+
+        # context.local_scratch_dir uses a ducktape feature:
+        # each test_context object has a unique local scratch directory which is available for the duration of the test
+        # which is automatically garbage collected after the test finishes
+        MiniKdc._set_local_keytab_file(context.local_scratch_dir)
+        MiniKdc._set_local_krb5conf_file(context.local_scratch_dir)
+
+    def replace_in_file(self, file_path, pattern, subst):
+        fh, abs_path = mkstemp()
+        with open(abs_path, 'w') as new_file:
+            with open(file_path) as old_file:
+                for line in old_file:
+                    new_file.write(line.replace(pattern, subst))
+        close(fh)
+        remove(file_path)
+        move(abs_path, file_path)
+
+    def start_node(self, node):
+        node.account.ssh("mkdir -p %s" % MiniKdc.WORK_DIR, allow_fail=False)
+        props_file = self.render('minikdc.properties',  node=node)
+        node.account.create_file(MiniKdc.PROPS_FILE, props_file)
+        self.logger.info("minikdc.properties")
+        self.logger.info(props_file)
+
+        kafka_principals = ' '.join(['kafka/' + kafka_node.account.hostname for kafka_node in self.kafka_nodes])
+        principals = 'client ' + kafka_principals + ' ' + self.extra_principals
+        self.logger.info("Starting MiniKdc with principals " + principals)
+
+        core_libs_jar = self.path.jar(CORE_LIBS_JAR_NAME, DEV_BRANCH)
+        core_dependant_test_libs_jar = self.path.jar(CORE_DEPENDANT_TEST_LIBS_JAR_NAME, DEV_BRANCH)
+
+        cmd = "for file in %s; do CLASSPATH=$CLASSPATH:$file; done;" % core_libs_jar
+        cmd += " for file in %s; do CLASSPATH=$CLASSPATH:$file; done;" % core_dependant_test_libs_jar
+        cmd += " export CLASSPATH;"
+        cmd += " %s kafka.security.minikdc.MiniKdc %s %s %s %s 1>> %s 2>> %s &" % (self.path.script("kafka-run-class.sh", node), MiniKdc.WORK_DIR, MiniKdc.PROPS_FILE, MiniKdc.KEYTAB_FILE, principals, MiniKdc.LOG_FILE, MiniKdc.LOG_FILE)
+        self.logger.debug("Attempting to start MiniKdc on %s with command: %s" % (str(node.account), cmd))
+        with node.account.monitor_log(MiniKdc.LOG_FILE) as monitor:
+            node.account.ssh(cmd)
+            monitor.wait_until("MiniKdc Running", timeout_sec=60, backoff_sec=1, err_msg="MiniKdc didn't finish startup")
+
+        node.account.copy_from(MiniKdc.KEYTAB_FILE, MiniKdc.LOCAL_KEYTAB_FILE)
+        node.account.copy_from(MiniKdc.KRB5CONF_FILE, MiniKdc.LOCAL_KRB5CONF_FILE)
+
+        # KDC is set to bind openly (via 0.0.0.0). Change krb5.conf to hold the specific KDC address
+        self.replace_in_file(MiniKdc.LOCAL_KRB5CONF_FILE, '0.0.0.0', node.account.hostname)
+
+    def stop_node(self, node):
+        self.logger.info("Stopping %s on %s" % (type(self).__name__, node.account.hostname))
+        node.account.kill_java_processes("MiniKdc", clean_shutdown=True, allow_fail=False)
+
+    def clean_node(self, node):
+        node.account.kill_java_processes("MiniKdc", clean_shutdown=False, allow_fail=True)
+        node.account.ssh("rm -rf " + MiniKdc.WORK_DIR, allow_fail=False)
+        if os.path.exists(MiniKdc.LOCAL_KEYTAB_FILE):
+            os.remove(MiniKdc.LOCAL_KEYTAB_FILE)
+        if os.path.exists(MiniKdc.LOCAL_KRB5CONF_FILE):
+            os.remove(MiniKdc.LOCAL_KRB5CONF_FILE)
+
+
--- a/tests/kafkatest/services/security/security_config.py
+++ b/tests/kafkatest/services/security/security_config.py
@@ -0,0 +1,352 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import subprocess
+from tempfile import mkdtemp
+from shutil import rmtree
+from ducktape.template import TemplateRenderer
+from kafkatest.services.security.minikdc import MiniKdc
+from kafkatest.services.security.listener_security_config import ListenerSecurityConfig
+import itertools
+
+
+class SslStores(object):
+    def __init__(self, local_scratch_dir, logger=None):
+        self.logger = logger
+        self.ca_crt_path = os.path.join(local_scratch_dir, "test.ca.crt")
+        self.ca_jks_path = os.path.join(local_scratch_dir, "test.ca.jks")
+        self.ca_passwd = "test-ca-passwd"
+
+        self.truststore_path = os.path.join(local_scratch_dir, "test.truststore.jks")
+        self.truststore_passwd = "test-ts-passwd"
+        self.keystore_passwd = "test-ks-passwd"
+        # Zookeeper TLS (as of v3.5.6) does not support a key password different than the keystore password
+        self.key_passwd = self.keystore_passwd
+        # Allow upto one hour of clock skew between host and VMs
+        self.startdate = "-1H"
+
+        for file in [self.ca_crt_path, self.ca_jks_path, self.truststore_path]:
+            if os.path.exists(file):
+                os.remove(file)
+
+    def generate_ca(self):
+        """
+        Generate CA private key and certificate.
+        """
+
+        self.runcmd("keytool -genkeypair -alias ca -keyalg RSA -keysize 2048 -keystore %s -storetype JKS -storepass %s -keypass %s -dname CN=SystemTestCA -startdate %s --ext bc=ca:true" % (self.ca_jks_path, self.ca_passwd, self.ca_passwd, self.startdate))
+        self.runcmd("keytool -export -alias ca -keystore %s -storepass %s -storetype JKS -rfc -file %s" % (self.ca_jks_path, self.ca_passwd, self.ca_crt_path))
+
+    def generate_truststore(self):
+        """
+        Generate JKS truststore containing CA certificate.
+        """
+
+        self.runcmd("keytool -importcert -alias ca -file %s -keystore %s -storepass %s -storetype JKS -noprompt" % (self.ca_crt_path, self.truststore_path, self.truststore_passwd))
+
+    def generate_and_copy_keystore(self, node):
+        """
+        Generate JKS keystore with certificate signed by the test CA.
+        The generated certificate has the node's hostname as a DNS SubjectAlternativeName.
+        """
+
+        ks_dir = mkdtemp(dir="/tmp")
+        ks_path = os.path.join(ks_dir, "test.keystore.jks")
+        csr_path = os.path.join(ks_dir, "test.kafka.csr")
+        crt_path = os.path.join(ks_dir, "test.kafka.crt")
+
+        self.runcmd("keytool -genkeypair -alias kafka -keyalg RSA -keysize 2048 -keystore %s -storepass %s -storetype JKS -keypass %s -dname CN=systemtest -ext SAN=DNS:%s -startdate %s" % (ks_path, self.keystore_passwd, self.key_passwd, self.hostname(node), self.startdate))
+        self.runcmd("keytool -certreq -keystore %s -storepass %s -storetype JKS -keypass %s -alias kafka -file %s" % (ks_path, self.keystore_passwd, self.key_passwd, csr_path))
+        self.runcmd("keytool -gencert -keystore %s -storepass %s -storetype JKS -alias ca -infile %s -outfile %s -dname CN=systemtest -ext SAN=DNS:%s -startdate %s" % (self.ca_jks_path, self.ca_passwd, csr_path, crt_path, self.hostname(node), self.startdate))
+        self.runcmd("keytool -importcert -keystore %s -storepass %s -storetype JKS -alias ca -file %s -noprompt" % (ks_path, self.keystore_passwd, self.ca_crt_path))
+        self.runcmd("keytool -importcert -keystore %s -storepass %s -storetype JKS -keypass %s -alias kafka -file %s -noprompt" % (ks_path, self.keystore_passwd, self.key_passwd, crt_path))
+        node.account.copy_to(ks_path, SecurityConfig.KEYSTORE_PATH)
+
+        # generate ZooKeeper client TLS config file for encryption-only (no client cert) use case
+        str = """zookeeper.clientCnxnSocket=org.apache.zookeeper.ClientCnxnSocketNetty
+zookeeper.ssl.client.enable=true
+zookeeper.ssl.truststore.location=%s
+zookeeper.ssl.truststore.password=%s
+""" % (SecurityConfig.TRUSTSTORE_PATH, self.truststore_passwd)
+        node.account.create_file(SecurityConfig.ZK_CLIENT_TLS_ENCRYPT_ONLY_CONFIG_PATH, str)
+
+        # also generate ZooKeeper client TLS config file for mutual authentication use case
+        str = """zookeeper.clientCnxnSocket=org.apache.zookeeper.ClientCnxnSocketNetty
+zookeeper.ssl.client.enable=true
+zookeeper.ssl.truststore.location=%s
+zookeeper.ssl.truststore.password=%s
+zookeeper.ssl.keystore.location=%s
+zookeeper.ssl.keystore.password=%s
+""" % (SecurityConfig.TRUSTSTORE_PATH, self.truststore_passwd, SecurityConfig.KEYSTORE_PATH, self.keystore_passwd)
+        node.account.create_file(SecurityConfig.ZK_CLIENT_MUTUAL_AUTH_CONFIG_PATH, str)
+
+        rmtree(ks_dir)
+
+    def hostname(self, node):
+        """ Hostname which may be overridden for testing validation failures
+        """
+        return node.account.hostname
+
+    def runcmd(self, cmd):
+        if self.logger:
+            self.logger.log(logging.DEBUG, cmd)
+        proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+        stdout, stderr = proc.communicate()
+
+        if proc.returncode != 0:
+            raise RuntimeError("Command '%s' returned non-zero exit status %d: %s" % (cmd, proc.returncode, stdout))
+
+
+class SecurityConfig(TemplateRenderer):
+
+    PLAINTEXT = 'PLAINTEXT'
+    SSL = 'SSL'
+    SASL_PLAINTEXT = 'SASL_PLAINTEXT'
+    SASL_SSL = 'SASL_SSL'
+    SASL_MECHANISM_GSSAPI = 'GSSAPI'
+    SASL_MECHANISM_PLAIN = 'PLAIN'
+    SASL_MECHANISM_SCRAM_SHA_256 = 'SCRAM-SHA-256'
+    SASL_MECHANISM_SCRAM_SHA_512 = 'SCRAM-SHA-512'
+    SCRAM_CLIENT_USER = "kafka-client"
+    SCRAM_CLIENT_PASSWORD = "client-secret"
+    SCRAM_BROKER_USER = "kafka-broker"
+    SCRAM_BROKER_PASSWORD = "broker-secret"
+    CONFIG_DIR = "/mnt/security"
+    KEYSTORE_PATH = "/mnt/security/test.keystore.jks"
+    TRUSTSTORE_PATH = "/mnt/security/test.truststore.jks"
+    ZK_CLIENT_TLS_ENCRYPT_ONLY_CONFIG_PATH = "/mnt/security/zk_client_tls_encrypt_only_config.properties"
+    ZK_CLIENT_MUTUAL_AUTH_CONFIG_PATH = "/mnt/security/zk_client_mutual_auth_config.properties"
+    JAAS_CONF_PATH = "/mnt/security/jaas.conf"
+    KRB5CONF_PATH = "/mnt/security/krb5.conf"
+    KEYTAB_PATH = "/mnt/security/keytab"
+
+    # This is initialized only when the first instance of SecurityConfig is created
+    ssl_stores = None
+
+    def __init__(self, context, security_protocol=None, interbroker_security_protocol=None,
+                 client_sasl_mechanism=SASL_MECHANISM_GSSAPI, interbroker_sasl_mechanism=SASL_MECHANISM_GSSAPI,
+                 zk_sasl=False, zk_tls=False, template_props="", static_jaas_conf=True, jaas_override_variables=None,
+                 listener_security_config=ListenerSecurityConfig()):
+        """
+        Initialize the security properties for the node and copy
+        keystore and truststore to the remote node if the transport protocol 
+        is SSL. If security_protocol is None, the protocol specified in the
+        template properties file is used. If no protocol is specified in the
+        template properties either, PLAINTEXT is used as default.
+        """
+
+        self.context = context
+        if not SecurityConfig.ssl_stores:
+            # This generates keystore/trustore files in a local scratch directory which gets
+            # automatically destroyed after the test is run
+            # Creating within the scratch directory allows us to run tests in parallel without fear of collision
+            SecurityConfig.ssl_stores = SslStores(context.local_scratch_dir, context.logger)
+            SecurityConfig.ssl_stores.generate_ca()
+            SecurityConfig.ssl_stores.generate_truststore()
+
+        if security_protocol is None:
+            security_protocol = self.get_property('security.protocol', template_props)
+        if security_protocol is None:
+            security_protocol = SecurityConfig.PLAINTEXT
+        elif security_protocol not in [SecurityConfig.PLAINTEXT, SecurityConfig.SSL, SecurityConfig.SASL_PLAINTEXT, SecurityConfig.SASL_SSL]:
+            raise Exception("Invalid security.protocol in template properties: " + security_protocol)
+
+        if interbroker_security_protocol is None:
+            interbroker_security_protocol = security_protocol
+        self.interbroker_security_protocol = interbroker_security_protocol
+        self.has_sasl = self.is_sasl(security_protocol) or self.is_sasl(interbroker_security_protocol) or zk_sasl
+        self.has_ssl = self.is_ssl(security_protocol) or self.is_ssl(interbroker_security_protocol) or zk_tls
+        self.zk_sasl = zk_sasl
+        self.zk_tls = zk_tls
+        self.static_jaas_conf = static_jaas_conf
+        self.listener_security_config = listener_security_config
+        self.properties = {
+            'security.protocol' : security_protocol,
+            'ssl.keystore.location' : SecurityConfig.KEYSTORE_PATH,
+            'ssl.keystore.password' : SecurityConfig.ssl_stores.keystore_passwd,
+            'ssl.key.password' : SecurityConfig.ssl_stores.key_passwd,
+            'ssl.truststore.location' : SecurityConfig.TRUSTSTORE_PATH,
+            'ssl.truststore.password' : SecurityConfig.ssl_stores.truststore_passwd,
+            'ssl.endpoint.identification.algorithm' : 'HTTPS',
+            'sasl.mechanism' : client_sasl_mechanism,
+            'sasl.mechanism.inter.broker.protocol' : interbroker_sasl_mechanism,
+            'sasl.kerberos.service.name' : 'kafka'
+        }
+        self.properties.update(self.listener_security_config.client_listener_overrides)
+        self.jaas_override_variables = jaas_override_variables or {}
+
+    def client_config(self, template_props="", node=None, jaas_override_variables=None):
+        # If node is not specified, use static jaas config which will be created later.
+        # Otherwise use static JAAS configuration files with SASL_SSL and sasl.jaas.config
+        # property with SASL_PLAINTEXT so that both code paths are tested by existing tests.
+        # Note that this is an artibtrary choice and it is possible to run all tests with
+        # either static or dynamic jaas config files if required.
+        static_jaas_conf = node is None or (self.has_sasl and self.has_ssl)
+        return SecurityConfig(self.context, self.security_protocol,
+                              client_sasl_mechanism=self.client_sasl_mechanism,
+                              template_props=template_props,
+                              static_jaas_conf=static_jaas_conf,
+                              jaas_override_variables=jaas_override_variables,
+                              listener_security_config=self.listener_security_config)
+
+    def enable_security_protocol(self, security_protocol):
+        self.has_sasl = self.has_sasl or self.is_sasl(security_protocol)
+        self.has_ssl = self.has_ssl or self.is_ssl(security_protocol)
+
+    def setup_ssl(self, node):
+        node.account.ssh("mkdir -p %s" % SecurityConfig.CONFIG_DIR, allow_fail=False)
+        node.account.copy_to(SecurityConfig.ssl_stores.truststore_path, SecurityConfig.TRUSTSTORE_PATH)
+        SecurityConfig.ssl_stores.generate_and_copy_keystore(node)
+
+    def setup_sasl(self, node):
+        node.account.ssh("mkdir -p %s" % SecurityConfig.CONFIG_DIR, allow_fail=False)
+        jaas_conf_file = "jaas.conf"
+        java_version = node.account.ssh_capture("java -version")
+
+        jaas_conf = None
+        if 'sasl.jaas.config' not in self.properties:
+            jaas_conf = self.render_jaas_config(
+                jaas_conf_file,
+                {
+                    'node': node,
+                    'is_ibm_jdk': any('IBM' in line for line in java_version),
+                    'SecurityConfig': SecurityConfig,
+                    'client_sasl_mechanism': self.client_sasl_mechanism,
+                    'enabled_sasl_mechanisms': self.enabled_sasl_mechanisms
+                }
+            )
+        else:
+            jaas_conf = self.properties['sasl.jaas.config']
+
+        if self.static_jaas_conf:
+            node.account.create_file(SecurityConfig.JAAS_CONF_PATH, jaas_conf)
+        elif 'sasl.jaas.config' not in self.properties:
+            self.properties['sasl.jaas.config'] = jaas_conf.replace("\n", " \\\n")
+        if self.has_sasl_kerberos:
+            node.account.copy_to(MiniKdc.LOCAL_KEYTAB_FILE, SecurityConfig.KEYTAB_PATH)
+            node.account.copy_to(MiniKdc.LOCAL_KRB5CONF_FILE, SecurityConfig.KRB5CONF_PATH)
+
+    def render_jaas_config(self, jaas_conf_file, config_variables):
+        """
+        Renders the JAAS config file contents
+
+        :param jaas_conf_file: name of the JAAS config template file
+        :param config_variables: dict of variables used in the template
+        :return: the rendered template string
+        """
+        variables = config_variables.copy()
+        variables.update(self.jaas_override_variables)  # override variables
+        return self.render(jaas_conf_file, **variables)
+
+    def setup_node(self, node):
+        if self.has_ssl:
+            self.setup_ssl(node)
+
+        if self.has_sasl:
+            self.setup_sasl(node)
+
+    def setup_credentials(self, node, path, zk_connect, broker):
+        if broker:
+            self.maybe_create_scram_credentials(node, zk_connect, path, self.interbroker_sasl_mechanism,
+                 SecurityConfig.SCRAM_BROKER_USER, SecurityConfig.SCRAM_BROKER_PASSWORD)
+        else:
+            self.maybe_create_scram_credentials(node, zk_connect, path, self.client_sasl_mechanism,
+                 SecurityConfig.SCRAM_CLIENT_USER, SecurityConfig.SCRAM_CLIENT_PASSWORD)
+
+    def maybe_create_scram_credentials(self, node, zk_connect, path, mechanism, user_name, password):
+        if self.has_sasl and self.is_sasl_scram(mechanism):
+            cmd = "%s --zookeeper %s --entity-name %s --entity-type users --alter --add-config %s=[password=%s]" % \
+                  (path.script("kafka-configs.sh", node), zk_connect,
+                  user_name, mechanism, password)
+            node.account.ssh(cmd)
+
+    def clean_node(self, node):
+        if self.security_protocol != SecurityConfig.PLAINTEXT:
+            node.account.ssh("rm -rf %s" % SecurityConfig.CONFIG_DIR, allow_fail=False)
+
+    def get_property(self, prop_name, template_props=""):
+        """
+        Get property value from the string representation of
+        a properties file.
+        """
+        value = None
+        for line in template_props.split("\n"):
+            items = line.split("=")
+            if len(items) == 2 and items[0].strip() == prop_name:
+                value = str(items[1].strip())
+        return value
+
+    def is_ssl(self, security_protocol):
+        return security_protocol == SecurityConfig.SSL or security_protocol == SecurityConfig.SASL_SSL
+
+    def is_sasl(self, security_protocol):
+        return security_protocol == SecurityConfig.SASL_PLAINTEXT or security_protocol == SecurityConfig.SASL_SSL
+
+    def is_sasl_scram(self, sasl_mechanism):
+        return sasl_mechanism == SecurityConfig.SASL_MECHANISM_SCRAM_SHA_256 or sasl_mechanism == SecurityConfig.SASL_MECHANISM_SCRAM_SHA_512
+
+    @property
+    def security_protocol(self):
+        return self.properties['security.protocol']
+
+    @property
+    def client_sasl_mechanism(self):
+        return self.properties['sasl.mechanism']
+
+    @property
+    def interbroker_sasl_mechanism(self):
+        return self.properties['sasl.mechanism.inter.broker.protocol']
+
+    @property
+    def enabled_sasl_mechanisms(self):
+        return set([self.client_sasl_mechanism, self.interbroker_sasl_mechanism])
+
+    @property
+    def has_sasl_kerberos(self):
+        return self.has_sasl and (SecurityConfig.SASL_MECHANISM_GSSAPI in self.enabled_sasl_mechanisms)
+
+    @property
+    def kafka_opts(self):
+        if self.has_sasl:
+            if self.static_jaas_conf:
+                return "\"-Djava.security.auth.login.config=%s -Djava.security.krb5.conf=%s\"" % (SecurityConfig.JAAS_CONF_PATH, SecurityConfig.KRB5CONF_PATH)
+            else:
+                return "\"-Djava.security.krb5.conf=%s\"" % SecurityConfig.KRB5CONF_PATH
+        else:
+            return ""
+
+    def props(self, prefix=''):
+        """
+        Return properties as string with line separators, optionally with a prefix.
+        This is used to append security config properties to
+        a properties file.
+        :param prefix: prefix to add to each property
+        :return: a string containing line-separated properties
+        """
+        if self.security_protocol == SecurityConfig.PLAINTEXT:
+            return ""
+        if self.has_sasl and not self.static_jaas_conf and 'sasl.jaas.config' not in self.properties:
+            raise Exception("JAAS configuration property has not yet been initialized")
+        config_lines = (prefix + key + "=" + value for key, value in self.properties.iteritems())
+        # Extra blank lines ensure this can be appended/prepended safely
+        return "\n".join(itertools.chain([""], config_lines, [""]))
+
+    def __str__(self):
+        """
+        Return properties as a string with line separators.
+        """
+        return self.props()
--- a/tests/kafkatest/services/security/templates/jaas.conf
+++ b/tests/kafkatest/services/security/templates/jaas.conf
@@ -0,0 +1,108 @@
+/**
+  * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE
+  * file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file
+  * to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
+  * License. You may obtain a copy of the License at
+  *
+  * http://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+  * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+  * specific language governing permissions and limitations under the License.
+  */
+
+
+{% if static_jaas_conf %}
+KafkaClient {
+{% endif %}
+{% if "GSSAPI" in client_sasl_mechanism %}
+{% if is_ibm_jdk %}
+    com.ibm.security.auth.module.Krb5LoginModule required debug=false
+    credsType=both
+    useKeytab="file:/mnt/security/keytab"
+    principal="client@EXAMPLE.COM";
+{% else %}
+    com.sun.security.auth.module.Krb5LoginModule required debug=false
+    doNotPrompt=true
+    useKeyTab=true
+    storeKey=true
+    keyTab="/mnt/security/keytab"
+    principal="client@EXAMPLE.COM";
+{% endif %}
+{% elif client_sasl_mechanism == "PLAIN" %}
+	org.apache.kafka.common.security.plain.PlainLoginModule required
+	username="client"
+	password="client-secret";
+{% elif "SCRAM-SHA-256" in client_sasl_mechanism or "SCRAM-SHA-512" in client_sasl_mechanism  %}
+	org.apache.kafka.common.security.scram.ScramLoginModule required
+	username="{{ SecurityConfig.SCRAM_CLIENT_USER }}"
+	password="{{ SecurityConfig.SCRAM_CLIENT_PASSWORD }}";
+{% endif %}
+
+{% if static_jaas_conf %}
+};
+
+KafkaServer {
+{% if "GSSAPI" in enabled_sasl_mechanisms %}
+{% if is_ibm_jdk %}
+    com.ibm.security.auth.module.Krb5LoginModule required debug=false
+    credsType=both
+    useKeytab="file:/mnt/security/keytab"
+    principal="kafka/{{ node.account.hostname }}@EXAMPLE.COM";
+{% else %}
+    com.sun.security.auth.module.Krb5LoginModule required debug=false
+    doNotPrompt=true
+    useKeyTab=true
+    storeKey=true
+    keyTab="/mnt/security/keytab"
+    principal="kafka/{{ node.account.hostname }}@EXAMPLE.COM";
+{% endif %}
+{% endif %}
+{% if "PLAIN" in enabled_sasl_mechanisms %}
+	org.apache.kafka.common.security.plain.PlainLoginModule required
+	username="kafka"
+	password="kafka-secret"
+	user_client="client-secret"
+	user_kafka="kafka-secret";
+{% endif %}
+{% if "SCRAM-SHA-256" in client_sasl_mechanism or "SCRAM-SHA-512" in client_sasl_mechanism %}
+	org.apache.kafka.common.security.scram.ScramLoginModule required
+	username="{{ SecurityConfig.SCRAM_BROKER_USER }}"
+	password="{{ SecurityConfig.SCRAM_BROKER_PASSWORD }}";
+{% endif %}
+};
+
+{% if zk_sasl %}
+Client {
+{% if is_ibm_jdk %}
+    com.ibm.security.auth.module.Krb5LoginModule required debug=false
+    credsType=both
+    useKeytab="file:/mnt/security/keytab"
+    principal="zkclient@EXAMPLE.COM";
+{% else %}
+   com.sun.security.auth.module.Krb5LoginModule required
+   useKeyTab=true
+   keyTab="/mnt/security/keytab"
+   storeKey=true
+   useTicketCache=false
+   principal="zkclient@EXAMPLE.COM";
+{% endif %}
+};
+
+Server {
+{% if is_ibm_jdk %}
+   com.ibm.security.auth.module.Krb5LoginModule required debug=false
+   credsType=both
+   useKeyTab="file:/mnt/security/keytab"
+   principal="zookeeper/{{ node.account.hostname }}@EXAMPLE.COM";
+{% else %}
+   com.sun.security.auth.module.Krb5LoginModule required
+   useKeyTab=true
+   keyTab="/mnt/security/keytab"
+   storeKey=true
+   useTicketCache=false
+   principal="zookeeper/{{ node.account.hostname }}@EXAMPLE.COM";
+{% endif %}
+};
+{% endif %}
+{% endif %}
--- a/tests/kafkatest/services/security/templates/minikdc.properties
+++ b/tests/kafkatest/services/security/templates/minikdc.properties
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+# 
+#    http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+kdc.bind.address=0.0.0.0
+
--- a/tests/kafkatest/services/streams.py
+++ b/tests/kafkatest/services/streams.py
@@ -0,0 +1,701 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path
+import signal
+import streams_property
+import consumer_property
+from ducktape.services.service import Service
+from ducktape.utils.util import wait_until
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from kafkatest.services.kafka import KafkaConfig
+from kafkatest.services.monitor.jmx import JmxMixin
+from kafkatest.version import LATEST_0_10_0, LATEST_0_10_1
+
+STATE_DIR = "state.dir"
+
+class StreamsTestBaseService(KafkaPathResolverMixin, JmxMixin, Service):
+    """Base class for Streams Test services providing some common settings and functionality"""
+
+    PERSISTENT_ROOT = "/mnt/streams"
+
+    # The log file contains normal log4j logs written using a file appender. stdout and stderr are handled separately
+    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "streams.properties")
+    LOG_FILE = os.path.join(PERSISTENT_ROOT, "streams.log")
+    STDOUT_FILE = os.path.join(PERSISTENT_ROOT, "streams.stdout")
+    STDERR_FILE = os.path.join(PERSISTENT_ROOT, "streams.stderr")
+    JMX_LOG_FILE = os.path.join(PERSISTENT_ROOT, "jmx_tool.log")
+    JMX_ERR_FILE = os.path.join(PERSISTENT_ROOT, "jmx_tool.err.log")
+    LOG4J_CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+    PID_FILE = os.path.join(PERSISTENT_ROOT, "streams.pid")
+
+    CLEAN_NODE_ENABLED = True
+
+    logs = {
+        "streams_config": {
+            "path": CONFIG_FILE,
+            "collect_default": True},
+        "streams_config.1": {
+            "path": CONFIG_FILE + ".1",
+            "collect_default": True},
+        "streams_config.0-1": {
+            "path": CONFIG_FILE + ".0-1",
+            "collect_default": True},
+        "streams_config.1-1": {
+            "path": CONFIG_FILE + ".1-1",
+            "collect_default": True},
+        "streams_log": {
+            "path": LOG_FILE,
+            "collect_default": True},
+        "streams_stdout": {
+            "path": STDOUT_FILE,
+            "collect_default": True},
+        "streams_stderr": {
+            "path": STDERR_FILE,
+            "collect_default": True},
+        "streams_log.1": {
+            "path": LOG_FILE + ".1",
+            "collect_default": True},
+        "streams_stdout.1": {
+            "path": STDOUT_FILE + ".1",
+            "collect_default": True},
+        "streams_stderr.1": {
+            "path": STDERR_FILE + ".1",
+            "collect_default": True},
+        "streams_log.2": {
+            "path": LOG_FILE + ".2",
+            "collect_default": True},
+        "streams_stdout.2": {
+            "path": STDOUT_FILE + ".2",
+            "collect_default": True},
+        "streams_stderr.2": {
+            "path": STDERR_FILE + ".2",
+            "collect_default": True},
+        "streams_log.3": {
+            "path": LOG_FILE + ".3",
+            "collect_default": True},
+        "streams_stdout.3": {
+            "path": STDOUT_FILE + ".3",
+            "collect_default": True},
+        "streams_stderr.3": {
+            "path": STDERR_FILE + ".3",
+            "collect_default": True},
+        "streams_log.0-1": {
+            "path": LOG_FILE + ".0-1",
+            "collect_default": True},
+        "streams_stdout.0-1": {
+            "path": STDOUT_FILE + ".0-1",
+            "collect_default": True},
+        "streams_stderr.0-1": {
+            "path": STDERR_FILE + ".0-1",
+            "collect_default": True},
+        "streams_log.0-2": {
+            "path": LOG_FILE + ".0-2",
+            "collect_default": True},
+        "streams_stdout.0-2": {
+            "path": STDOUT_FILE + ".0-2",
+            "collect_default": True},
+        "streams_stderr.0-2": {
+            "path": STDERR_FILE + ".0-2",
+            "collect_default": True},
+        "streams_log.0-3": {
+            "path": LOG_FILE + ".0-3",
+            "collect_default": True},
+        "streams_stdout.0-3": {
+            "path": STDOUT_FILE + ".0-3",
+            "collect_default": True},
+        "streams_stderr.0-3": {
+            "path": STDERR_FILE + ".0-3",
+            "collect_default": True},
+        "streams_log.0-4": {
+            "path": LOG_FILE + ".0-4",
+            "collect_default": True},
+        "streams_stdout.0-4": {
+            "path": STDOUT_FILE + ".0-4",
+            "collect_default": True},
+        "streams_stderr.0-4": {
+            "path": STDERR_FILE + ".0-4",
+            "collect_default": True},
+        "streams_log.0-5": {
+            "path": LOG_FILE + ".0-5",
+            "collect_default": True},
+        "streams_stdout.0-5": {
+            "path": STDOUT_FILE + ".0-5",
+            "collect_default": True},
+        "streams_stderr.0-5": {
+            "path": STDERR_FILE + ".0-5",
+            "collect_default": True},
+        "streams_log.0-6": {
+            "path": LOG_FILE + ".0-6",
+            "collect_default": True},
+        "streams_stdout.0-6": {
+            "path": STDOUT_FILE + ".0-6",
+            "collect_default": True},
+        "streams_stderr.0-6": {
+            "path": STDERR_FILE + ".0-6",
+            "collect_default": True},
+        "streams_log.1-1": {
+            "path": LOG_FILE + ".1-1",
+            "collect_default": True},
+        "streams_stdout.1-1": {
+            "path": STDOUT_FILE + ".1-1",
+            "collect_default": True},
+        "streams_stderr.1-1": {
+            "path": STDERR_FILE + ".1-1",
+            "collect_default": True},
+        "streams_log.1-2": {
+            "path": LOG_FILE + ".1-2",
+            "collect_default": True},
+        "streams_stdout.1-2": {
+            "path": STDOUT_FILE + ".1-2",
+            "collect_default": True},
+        "streams_stderr.1-2": {
+            "path": STDERR_FILE + ".1-2",
+            "collect_default": True},
+        "streams_log.1-3": {
+            "path": LOG_FILE + ".1-3",
+            "collect_default": True},
+        "streams_stdout.1-3": {
+            "path": STDOUT_FILE + ".1-3",
+            "collect_default": True},
+        "streams_stderr.1-3": {
+            "path": STDERR_FILE + ".1-3",
+            "collect_default": True},
+        "streams_log.1-4": {
+            "path": LOG_FILE + ".1-4",
+            "collect_default": True},
+        "streams_stdout.1-4": {
+            "path": STDOUT_FILE + ".1-4",
+            "collect_default": True},
+        "streams_stderr.1-4": {
+            "path": STDERR_FILE + ".1-4",
+            "collect_default": True},
+        "streams_log.1-5": {
+            "path": LOG_FILE + ".1-5",
+            "collect_default": True},
+        "streams_stdout.1-5": {
+            "path": STDOUT_FILE + ".1-5",
+            "collect_default": True},
+        "streams_stderr.1-5": {
+            "path": STDERR_FILE + ".1-5",
+            "collect_default": True},
+        "streams_log.1-6": {
+            "path": LOG_FILE + ".1-6",
+            "collect_default": True},
+        "streams_stdout.1-6": {
+            "path": STDOUT_FILE + ".1-6",
+            "collect_default": True},
+        "streams_stderr.1-6": {
+            "path": STDERR_FILE + ".1-6",
+            "collect_default": True},
+        "jmx_log": {
+            "path": JMX_LOG_FILE,
+            "collect_default": True},
+        "jmx_err": {
+            "path": JMX_ERR_FILE,
+            "collect_default": True},
+    }
+
+    def __init__(self, test_context, kafka, streams_class_name, user_test_args1, user_test_args2=None, user_test_args3=None, user_test_args4=None):
+        Service.__init__(self, test_context, num_nodes=1)
+        self.kafka = kafka
+        self.args = {'streams_class_name': streams_class_name,
+                     'user_test_args1': user_test_args1,
+                     'user_test_args2': user_test_args2,
+                     'user_test_args3': user_test_args3,
+                     'user_test_args4': user_test_args4}
+        self.log_level = "DEBUG"
+
+    @property
+    def node(self):
+        return self.nodes[0]
+
+    def pids(self, node):
+        try:
+            pids = [pid for pid in node.account.ssh_capture("cat " + self.PID_FILE, callback=str)]
+            return [int(pid) for pid in pids]
+        except Exception, exception:
+            self.logger.debug(str(exception))
+            return []
+
+    def stop_nodes(self, clean_shutdown=True):
+        for node in self.nodes:
+            self.stop_node(node, clean_shutdown)
+
+    def stop_node(self, node, clean_shutdown=True):
+        self.logger.info((clean_shutdown and "Cleanly" or "Forcibly") + " stopping Streams Test on " + str(node.account))
+        pids = self.pids(node)
+        sig = signal.SIGTERM if clean_shutdown else signal.SIGKILL
+
+        for pid in pids:
+            node.account.signal(pid, sig, allow_fail=True)
+        if clean_shutdown:
+            for pid in pids:
+                wait_until(lambda: not node.account.alive(pid), timeout_sec=120, err_msg="Streams Test process on " + str(node.account) + " took too long to exit")
+
+        node.account.ssh("rm -f " + self.PID_FILE, allow_fail=False)
+
+    def restart(self):
+        # We don't want to do any clean up here, just restart the process.
+        for node in self.nodes:
+            self.logger.info("Restarting Kafka Streams on " + str(node.account))
+            self.stop_node(node)
+            self.start_node(node)
+
+
+    def abortThenRestart(self):
+        # We don't want to do any clean up here, just abort then restart the process. The running service is killed immediately.
+        for node in self.nodes:
+            self.logger.info("Aborting Kafka Streams on " + str(node.account))
+            self.stop_node(node, False)
+            self.logger.info("Restarting Kafka Streams on " + str(node.account))
+            self.start_node(node)
+
+    def wait(self, timeout_sec=1440):
+        for node in self.nodes:
+            self.wait_node(node, timeout_sec)
+
+    def wait_node(self, node, timeout_sec=None):
+        for pid in self.pids(node):
+            wait_until(lambda: not node.account.alive(pid), timeout_sec=timeout_sec, err_msg="Streams Test process on " + str(node.account) + " took too long to exit")
+
+    def clean_node(self, node):
+        node.account.kill_process("streams", clean_shutdown=False, allow_fail=True)
+        if self.CLEAN_NODE_ENABLED:
+            node.account.ssh("rm -rf " + self.PERSISTENT_ROOT, allow_fail=False)
+
+    def start_cmd(self, node):
+        args = self.args.copy()
+        args['config_file'] = self.CONFIG_FILE
+        args['stdout'] = self.STDOUT_FILE
+        args['stderr'] = self.STDERR_FILE
+        args['pidfile'] = self.PID_FILE
+        args['log4j'] = self.LOG4J_CONFIG_FILE
+        args['kafka_run_class'] = self.path.script("kafka-run-class.sh", node)
+
+        cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\"; " \
+              "INCLUDE_TEST_JARS=true %(kafka_run_class)s %(streams_class_name)s " \
+              " %(config_file)s %(user_test_args1)s %(user_test_args2)s %(user_test_args3)s" \
+              " %(user_test_args4)s & echo $! >&3 ) 1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args
+
+        self.logger.info("Executing streams cmd: " + cmd)
+
+        return cmd
+
+    def prop_file(self):
+        cfg = KafkaConfig(**{streams_property.STATE_DIR: self.PERSISTENT_ROOT, streams_property.KAFKA_SERVERS: self.kafka.bootstrap_servers()})
+        return cfg.render()
+
+    def start_node(self, node):
+        node.account.mkdirs(self.PERSISTENT_ROOT)
+        prop_file = self.prop_file()
+        node.account.create_file(self.CONFIG_FILE, prop_file)
+        node.account.create_file(self.LOG4J_CONFIG_FILE, self.render('tools_log4j.properties', log_file=self.LOG_FILE))
+
+        self.logger.info("Starting StreamsTest process on " + str(node.account))
+        with node.account.monitor_log(self.STDOUT_FILE) as monitor:
+            node.account.ssh(self.start_cmd(node))
+            monitor.wait_until('StreamsTest instance started', timeout_sec=60, err_msg="Never saw message indicating StreamsTest finished startup on " + str(node.account))
+
+        if len(self.pids(node)) == 0:
+            raise RuntimeError("No process ids recorded")
+
+
+class StreamsSmokeTestBaseService(StreamsTestBaseService):
+    """Base class for Streams Smoke Test services providing some common settings and functionality"""
+
+    def __init__(self, test_context, kafka, command, processing_guarantee = 'at_least_once', num_threads = 3, replication_factor = 3):
+        super(StreamsSmokeTestBaseService, self).__init__(test_context,
+                                                          kafka,
+                                                          "org.apache.kafka.streams.tests.StreamsSmokeTest",
+                                                          command)
+        self.NUM_THREADS = num_threads
+        self.PROCESSING_GUARANTEE = processing_guarantee
+        self.KAFKA_STREAMS_VERSION = ""
+        self.UPGRADE_FROM = None
+        self.REPLICATION_FACTOR = replication_factor
+
+    def set_version(self, kafka_streams_version):
+        self.KAFKA_STREAMS_VERSION = kafka_streams_version
+
+    def set_upgrade_from(self, upgrade_from):
+        self.UPGRADE_FROM = upgrade_from
+
+    def prop_file(self):
+        properties = {streams_property.STATE_DIR: self.PERSISTENT_ROOT,
+                      streams_property.KAFKA_SERVERS: self.kafka.bootstrap_servers(),
+                      "processing.guarantee": self.PROCESSING_GUARANTEE,
+                      streams_property.NUM_THREADS: self.NUM_THREADS,
+                      "replication.factor": self.REPLICATION_FACTOR,
+                      "num.standby.replicas": 2,
+                      "buffered.records.per.partition": 100,
+                      "commit.interval.ms": 1000,
+                      "auto.offset.reset": "earliest",
+                      "acks": "all"}
+
+        if self.UPGRADE_FROM is not None:
+            properties['upgrade.from'] = self.UPGRADE_FROM
+
+        cfg = KafkaConfig(**properties)
+        return cfg.render()
+
+    def start_cmd(self, node):
+        args = self.args.copy()
+        args['config_file'] = self.CONFIG_FILE
+        args['stdout'] = self.STDOUT_FILE
+        args['stderr'] = self.STDERR_FILE
+        args['pidfile'] = self.PID_FILE
+        args['log4j'] = self.LOG4J_CONFIG_FILE
+        args['version'] = self.KAFKA_STREAMS_VERSION
+        args['kafka_run_class'] = self.path.script("kafka-run-class.sh", node)
+
+        cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\";" \
+              " INCLUDE_TEST_JARS=true UPGRADE_KAFKA_STREAMS_TEST_VERSION=%(version)s" \
+              " bash -x %(kafka_run_class)s %(streams_class_name)s" \
+              " %(config_file)s %(user_test_args1)s" \
+              " & echo $! >&3 ) " \
+              "1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args
+
+        self.logger.info("Executing streams cmd: " + cmd)
+
+        return cmd
+
+class StreamsEosTestBaseService(StreamsTestBaseService):
+    """Base class for Streams EOS Test services providing some common settings and functionality"""
+
+    clean_node_enabled = True
+
+    def __init__(self, test_context, kafka, command):
+        super(StreamsEosTestBaseService, self).__init__(test_context,
+                                                        kafka,
+                                                        "org.apache.kafka.streams.tests.StreamsEosTest",
+                                                        command)
+
+    def clean_node(self, node):
+        if self.clean_node_enabled:
+            super(StreamsEosTestBaseService, self).clean_node(node)
+
+
+class StreamsSmokeTestDriverService(StreamsSmokeTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsSmokeTestDriverService, self).__init__(test_context, kafka, "run")
+        self.DISABLE_AUTO_TERMINATE = ""
+
+    def disable_auto_terminate(self):
+        self.DISABLE_AUTO_TERMINATE = "disableAutoTerminate"
+
+    def start_cmd(self, node):
+        args = self.args.copy()
+        args['config_file'] = self.CONFIG_FILE
+        args['stdout'] = self.STDOUT_FILE
+        args['stderr'] = self.STDERR_FILE
+        args['pidfile'] = self.PID_FILE
+        args['log4j'] = self.LOG4J_CONFIG_FILE
+        args['disable_auto_terminate'] = self.DISABLE_AUTO_TERMINATE
+        args['kafka_run_class'] = self.path.script("kafka-run-class.sh", node)
+
+        cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\"; " \
+              "INCLUDE_TEST_JARS=true %(kafka_run_class)s %(streams_class_name)s " \
+              " %(config_file)s %(user_test_args1)s %(disable_auto_terminate)s" \
+              " & echo $! >&3 ) 1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args
+
+        return cmd
+
+class StreamsSmokeTestJobRunnerService(StreamsSmokeTestBaseService):
+    def __init__(self, test_context, kafka, processing_guarantee = 'at_least_once', num_threads = 3, replication_factor = 3):
+        super(StreamsSmokeTestJobRunnerService, self).__init__(test_context, kafka, "process", processing_guarantee = processing_guarantee, num_threads = num_threads, replication_factor = replication_factor)
+
+class StreamsSmokeTestEOSJobRunnerService(StreamsSmokeTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsSmokeTestEOSJobRunnerService, self).__init__(test_context, kafka, "process-eos")
+
+
+class StreamsEosTestDriverService(StreamsEosTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsEosTestDriverService, self).__init__(test_context, kafka, "run")
+
+
+class StreamsEosTestJobRunnerService(StreamsEosTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsEosTestJobRunnerService, self).__init__(test_context, kafka, "process")
+
+class StreamsComplexEosTestJobRunnerService(StreamsEosTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsComplexEosTestJobRunnerService, self).__init__(test_context, kafka, "process-complex")
+
+class StreamsEosTestVerifyRunnerService(StreamsEosTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsEosTestVerifyRunnerService, self).__init__(test_context, kafka, "verify")
+
+
+class StreamsComplexEosTestVerifyRunnerService(StreamsEosTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsComplexEosTestVerifyRunnerService, self).__init__(test_context, kafka, "verify-complex")
+
+
+class StreamsSmokeTestShutdownDeadlockService(StreamsSmokeTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsSmokeTestShutdownDeadlockService, self).__init__(test_context, kafka, "close-deadlock-test")
+
+
+class StreamsBrokerCompatibilityService(StreamsTestBaseService):
+    def __init__(self, test_context, kafka, eosEnabled):
+        super(StreamsBrokerCompatibilityService, self).__init__(test_context,
+                                                                kafka,
+                                                                "org.apache.kafka.streams.tests.BrokerCompatibilityTest",
+                                                                eosEnabled)
+
+
+class StreamsBrokerDownResilienceService(StreamsTestBaseService):
+    def __init__(self, test_context, kafka, configs):
+        super(StreamsBrokerDownResilienceService, self).__init__(test_context,
+                                                                 kafka,
+                                                                 "org.apache.kafka.streams.tests.StreamsBrokerDownResilienceTest",
+                                                                 configs)
+
+    def start_cmd(self, node):
+        args = self.args.copy()
+        args['config_file'] = self.CONFIG_FILE
+        args['stdout'] = self.STDOUT_FILE
+        args['stderr'] = self.STDERR_FILE
+        args['pidfile'] = self.PID_FILE
+        args['log4j'] = self.LOG4J_CONFIG_FILE
+        args['kafka_run_class'] = self.path.script("kafka-run-class.sh", node)
+
+        cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\"; " \
+              "INCLUDE_TEST_JARS=true %(kafka_run_class)s %(streams_class_name)s " \
+              " %(config_file)s %(user_test_args1)s %(user_test_args2)s %(user_test_args3)s" \
+              " %(user_test_args4)s & echo $! >&3 ) 1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args
+
+        self.logger.info("Executing: " + cmd)
+
+        return cmd
+
+
+class StreamsStandbyTaskService(StreamsTestBaseService):
+    def __init__(self, test_context, kafka, configs):
+        super(StreamsStandbyTaskService, self).__init__(test_context,
+                                                        kafka,
+                                                        "org.apache.kafka.streams.tests.StreamsStandByReplicaTest",
+                                                        configs)
+
+
+class StreamsOptimizedUpgradeTestService(StreamsTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsOptimizedUpgradeTestService, self).__init__(test_context,
+                                                                 kafka,
+                                                                 "org.apache.kafka.streams.tests.StreamsOptimizedTest",
+                                                                 "")
+        self.OPTIMIZED_CONFIG = 'none'
+        self.INPUT_TOPIC = None
+        self.AGGREGATION_TOPIC = None
+        self.REDUCE_TOPIC = None
+        self.JOIN_TOPIC = None
+
+    def prop_file(self):
+        properties = {streams_property.STATE_DIR: self.PERSISTENT_ROOT,
+                      streams_property.KAFKA_SERVERS: self.kafka.bootstrap_servers()}
+
+        properties['topology.optimization'] = self.OPTIMIZED_CONFIG
+        properties['input.topic'] = self.INPUT_TOPIC
+        properties['aggregation.topic'] = self.AGGREGATION_TOPIC
+        properties['reduce.topic'] = self.REDUCE_TOPIC
+        properties['join.topic'] = self.JOIN_TOPIC
+
+        cfg = KafkaConfig(**properties)
+        return cfg.render()
+
+
+class StreamsUpgradeTestJobRunnerService(StreamsTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsUpgradeTestJobRunnerService, self).__init__(test_context,
+                                                                 kafka,
+                                                                 "org.apache.kafka.streams.tests.StreamsUpgradeTest",
+                                                                 "")
+        self.UPGRADE_FROM = None
+        self.UPGRADE_TO = None
+        self.extra_properties = {}
+
+    def set_config(self, key, value):
+        self.extra_properties[key] = value
+
+    def set_version(self, kafka_streams_version):
+        self.KAFKA_STREAMS_VERSION = kafka_streams_version
+
+    def set_upgrade_from(self, upgrade_from):
+        self.UPGRADE_FROM = upgrade_from
+
+    def set_upgrade_to(self, upgrade_to):
+        self.UPGRADE_TO = upgrade_to
+
+    def prop_file(self):
+        properties = self.extra_properties.copy()
+        properties[streams_property.STATE_DIR] = self.PERSISTENT_ROOT
+        properties[streams_property.KAFKA_SERVERS] = self.kafka.bootstrap_servers()
+
+        if self.UPGRADE_FROM is not None:
+            properties['upgrade.from'] = self.UPGRADE_FROM
+        if self.UPGRADE_TO == "future_version":
+            properties['test.future.metadata'] = "any_value"
+
+        cfg = KafkaConfig(**properties)
+        return cfg.render()
+
+    def start_cmd(self, node):
+        args = self.args.copy()
+
+        if self.KAFKA_STREAMS_VERSION == str(LATEST_0_10_0) or self.KAFKA_STREAMS_VERSION == str(LATEST_0_10_1):
+            args['zk'] = self.kafka.zk.connect_setting()
+        else:
+            args['zk'] = ""
+        args['config_file'] = self.CONFIG_FILE
+        args['stdout'] = self.STDOUT_FILE
+        args['stderr'] = self.STDERR_FILE
+        args['pidfile'] = self.PID_FILE
+        args['log4j'] = self.LOG4J_CONFIG_FILE
+        args['version'] = self.KAFKA_STREAMS_VERSION
+        args['kafka_run_class'] = self.path.script("kafka-run-class.sh", node)
+
+        cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\"; " \
+              "INCLUDE_TEST_JARS=true UPGRADE_KAFKA_STREAMS_TEST_VERSION=%(version)s " \
+              " %(kafka_run_class)s %(streams_class_name)s %(zk)s %(config_file)s " \
+              " & echo $! >&3 ) 1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args
+
+        self.logger.info("Executing: " + cmd)
+
+        return cmd
+
+
+class StreamsNamedRepartitionTopicService(StreamsTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(StreamsNamedRepartitionTopicService, self).__init__(test_context,
+                                                                  kafka,
+                                                                  "org.apache.kafka.streams.tests.StreamsNamedRepartitionTest",
+                                                                  "")
+        self.ADD_ADDITIONAL_OPS = 'false'
+        self.INPUT_TOPIC = None
+        self.AGGREGATION_TOPIC = None
+
+    def prop_file(self):
+        properties = {streams_property.STATE_DIR: self.PERSISTENT_ROOT,
+                      streams_property.KAFKA_SERVERS: self.kafka.bootstrap_servers()}
+
+        properties['input.topic'] = self.INPUT_TOPIC
+        properties['aggregation.topic'] = self.AGGREGATION_TOPIC
+        properties['add.operations'] = self.ADD_ADDITIONAL_OPS
+
+        cfg = KafkaConfig(**properties)
+        return cfg.render()
+
+
+class StaticMemberTestService(StreamsTestBaseService):
+    def __init__(self, test_context, kafka, group_instance_id, num_threads):
+        super(StaticMemberTestService, self).__init__(test_context,
+                                                      kafka,
+                                                      "org.apache.kafka.streams.tests.StaticMemberTestClient",
+                                                      "")
+        self.INPUT_TOPIC = None
+        self.GROUP_INSTANCE_ID = group_instance_id
+        self.NUM_THREADS = num_threads
+    def prop_file(self):
+        properties = {streams_property.STATE_DIR: self.PERSISTENT_ROOT,
+                      streams_property.KAFKA_SERVERS: self.kafka.bootstrap_servers(),
+                      streams_property.NUM_THREADS: self.NUM_THREADS,
+                      consumer_property.GROUP_INSTANCE_ID: self.GROUP_INSTANCE_ID,
+                      consumer_property.SESSION_TIMEOUT_MS: 60000}
+
+        properties['input.topic'] = self.INPUT_TOPIC
+
+        cfg = KafkaConfig(**properties)
+        return cfg.render()
+
+
+class CooperativeRebalanceUpgradeService(StreamsTestBaseService):
+    def __init__(self, test_context, kafka):
+        super(CooperativeRebalanceUpgradeService, self).__init__(test_context,
+                                                                 kafka,
+                                                                 "org.apache.kafka.streams.tests.StreamsUpgradeToCooperativeRebalanceTest",
+                                                                 "")
+        self.UPGRADE_FROM = None
+        # these properties will be overridden in test
+        self.SOURCE_TOPIC = None
+        self.SINK_TOPIC = None
+        self.TASK_DELIMITER = "#"
+        self.REPORT_INTERVAL = None
+
+        self.standby_tasks = None
+        self.active_tasks = None
+        self.upgrade_phase = None
+
+    def set_tasks(self, task_string):
+        label = "TASK-ASSIGNMENTS:"
+        task_string_substr = task_string[len(label):]
+        all_tasks = task_string_substr.split(self.TASK_DELIMITER)
+        self.active_tasks = set(all_tasks[0].split(","))
+        if len(all_tasks) > 1:
+            self.standby_tasks = set(all_tasks[1].split(","))
+
+    def set_version(self, kafka_streams_version):
+        self.KAFKA_STREAMS_VERSION = kafka_streams_version
+
+    def set_upgrade_phase(self, upgrade_phase):
+        self.upgrade_phase = upgrade_phase
+
+    def start_cmd(self, node):
+        args = self.args.copy()
+
+        if self.KAFKA_STREAMS_VERSION == str(LATEST_0_10_0) or self.KAFKA_STREAMS_VERSION == str(LATEST_0_10_1):
+            args['zk'] = self.kafka.zk.connect_setting()
+        else:
+            args['zk'] = ""
+        args['config_file'] = self.CONFIG_FILE
+        args['stdout'] = self.STDOUT_FILE
+        args['stderr'] = self.STDERR_FILE
+        args['pidfile'] = self.PID_FILE
+        args['log4j'] = self.LOG4J_CONFIG_FILE
+        args['version'] = self.KAFKA_STREAMS_VERSION
+        args['kafka_run_class'] = self.path.script("kafka-run-class.sh", node)
+
+        cmd = "( export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%(log4j)s\"; " \
+              "INCLUDE_TEST_JARS=true UPGRADE_KAFKA_STREAMS_TEST_VERSION=%(version)s " \
+              " %(kafka_run_class)s %(streams_class_name)s %(zk)s %(config_file)s " \
+              " & echo $! >&3 ) 1>> %(stdout)s 2>> %(stderr)s 3> %(pidfile)s" % args
+
+        self.logger.info("Executing: " + cmd)
+
+        return cmd
+
+    def prop_file(self):
+        properties = {streams_property.STATE_DIR: self.PERSISTENT_ROOT,
+                      streams_property.KAFKA_SERVERS: self.kafka.bootstrap_servers()}
+
+        if self.UPGRADE_FROM is not None:
+            properties['upgrade.from'] = self.UPGRADE_FROM
+        else:
+            try:
+                del properties['upgrade.from']
+            except KeyError:
+                self.logger.info("Key 'upgrade.from' not there, better safe than sorry")
+
+        if self.upgrade_phase is not None:
+            properties['upgrade.phase'] = self.upgrade_phase
+
+        properties['source.topic'] = self.SOURCE_TOPIC
+        properties['sink.topic'] = self.SINK_TOPIC
+        properties['task.delimiter'] = self.TASK_DELIMITER
+        properties['report.interval'] = self.REPORT_INTERVAL
+
+        cfg = KafkaConfig(**properties)
+        return cfg.render()
--- a/tests/kafkatest/services/streams_property.py
+++ b/tests/kafkatest/services/streams_property.py
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Define Streams configuration property names here.
+"""
+
+STATE_DIR = "state.dir"
+KAFKA_SERVERS = "bootstrap.servers"
+NUM_THREADS = "num.stream.threads"
--- a/tests/kafkatest/services/templates/connect_log4j.properties
+++ b/tests/kafkatest/services/templates/connect_log4j.properties
@@ -0,0 +1,29 @@
+##
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##
+
+# Define the root logger with appender file
+log4j.rootLogger = {{ log_level|default("INFO") }}, FILE
+
+log4j.appender.FILE=org.apache.log4j.FileAppender
+log4j.appender.FILE.File={{ log_file }}
+log4j.appender.FILE.ImmediateFlush=true
+log4j.appender.FILE.Append=true
+log4j.appender.FILE.layout=org.apache.log4j.PatternLayout
+log4j.appender.FILE.layout.conversionPattern=[%d] %p %m (%c)%n
+
+log4j.logger.org.apache.zookeeper=ERROR
+log4j.logger.org.reflections=ERROR
--- a/tests/kafkatest/services/templates/console_consumer.properties
+++ b/tests/kafkatest/services/templates/console_consumer.properties
@@ -0,0 +1,24 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+group.id={{ group_id|default('test-consumer-group') }}
+
+{% if client_id is defined and client_id is not none %}
+client.id={{ client_id }}
+{% endif %}
+
+{% if consumer_metadata_max_age_ms is defined and consumer_metadata_max_age_ms is not none %}
+metadata.max.age.ms={{ consumer_metadata_max_age_ms }}
+{% endif %}
--- a/tests/kafkatest/services/templates/mirror_maker_consumer.properties
+++ b/tests/kafkatest/services/templates/mirror_maker_consumer.properties
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.consumer.ConsumerConfig for more details
+
+bootstrap.servers={{ source.bootstrap_servers(security_config.security_protocol) }}
+
+{% if source_auto_offset_reset is defined and source_auto_offset_reset is not none %}
+auto.offset.reset={{ source_auto_offset_reset|default('latest') }}
+{% endif %}
+
+group.id={{ group_id|default('test-consumer-group') }}
+
+{% if partition_assignment_strategy is defined and partition_assignment_strategy is not none %}
+partition.assignment.strategy={{ partition_assignment_strategy }}
+{% endif %}
--- a/tests/kafkatest/services/templates/mirror_maker_producer.properties
+++ b/tests/kafkatest/services/templates/mirror_maker_producer.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+bootstrap.servers = {{ target.bootstrap_servers(security_config.security_protocol) }}
+
+{% if producer_interceptor_classes is defined and producer_interceptor_classes is not none %}
+interceptor.classes={{ producer_interceptor_classes }}
+{% endif %}
--- a/tests/kafkatest/services/templates/producer.properties
+++ b/tests/kafkatest/services/templates/producer.properties
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# see kafka.producer.ProducerConfig for more details
+
+request.timeout.ms={{ request_timeout_ms }}
--- a/tests/kafkatest/services/templates/tools_log4j.properties
+++ b/tests/kafkatest/services/templates/tools_log4j.properties
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define the root logger with appender file
+log4j.rootLogger = {{ log_level|default("INFO") }}, FILE
+
+{% if loggers is defined %}
+{% for logger, log_level in loggers.iteritems() %}
+log4j.logger.{{ logger }}={{ log_level }}
+{% endfor %}
+{% endif %}
+
+log4j.appender.FILE=org.apache.log4j.FileAppender
+log4j.appender.FILE.File={{ log_file }}
+log4j.appender.FILE.ImmediateFlush=true
+# Set the append to true
+log4j.appender.FILE.Append=true
+log4j.appender.FILE.layout=org.apache.log4j.PatternLayout
+log4j.appender.FILE.layout.conversionPattern=[%d] %p %m (%c)%n
--- a/tests/kafkatest/services/templates/zookeeper.properties
+++ b/tests/kafkatest/services/templates/zookeeper.properties
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+dataDir=/mnt/zookeeper/data
+{% if zk_client_port %}
+clientPort=2181
+{% endif %}
+{% if zk_client_secure_port %}
+secureClientPort=2182
+serverCnxnFactory=org.apache.zookeeper.server.NettyServerCnxnFactory
+authProvider.x509=org.apache.zookeeper.server.auth.X509AuthenticationProvider
+ssl.keyStore.location=/mnt/security/test.keystore.jks
+ssl.keyStore.password=test-ks-passwd
+ssl.keyStore.type=JKS
+ssl.trustStore.location=/mnt/security/test.truststore.jks
+ssl.trustStore.password=test-ts-passwd
+ssl.trustStore.type=JKS
+{% if zk_tls_encrypt_only %}
+ssl.clientAuth=none
+{% endif %}
+{% endif %}
+maxClientCnxns=0
+initLimit=5
+syncLimit=2
+quorumListenOnAllIPs=true
+{% for node in nodes %}
+server.{{ loop.index }}={{ node.account.hostname }}:2888:3888
+{% endfor %}
--- a/tests/kafkatest/services/transactional_message_copier.py
+++ b/tests/kafkatest/services/transactional_message_copier.py
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import json
+import signal
+
+from ducktape.utils.util import wait_until
+from ducktape.services.background_thread import BackgroundThreadService
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from ducktape.cluster.remoteaccount import RemoteCommandError
+
+class TransactionalMessageCopier(KafkaPathResolverMixin, BackgroundThreadService):
+    """This service wraps org.apache.kafka.tools.TransactionalMessageCopier for
+    use in system testing.
+    """
+    PERSISTENT_ROOT = "/mnt/transactional_message_copier"
+    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "transactional_message_copier.stdout")
+    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "transactional_message_copier.stderr")
+    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
+    LOG_FILE = os.path.join(LOG_DIR, "transactional_message_copier.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+
+    logs = {
+        "transactional_message_copier_stdout": {
+            "path": STDOUT_CAPTURE,
+            "collect_default": True},
+        "transactional_message_copier_stderr": {
+            "path": STDERR_CAPTURE,
+            "collect_default": True},
+        "transactional_message_copier_log": {
+            "path": LOG_FILE,
+            "collect_default": True}
+    }
+
+    def __init__(self, context, num_nodes, kafka, transactional_id, consumer_group,
+                 input_topic, input_partition, output_topic, max_messages=-1,
+                 transaction_size=1000, transaction_timeout=None, enable_random_aborts=True,
+                 use_group_metadata=False, group_mode=False):
+        super(TransactionalMessageCopier, self).__init__(context, num_nodes)
+        self.kafka = kafka
+        self.transactional_id = transactional_id
+        self.consumer_group = consumer_group
+        self.transaction_size = transaction_size
+        self.transaction_timeout = transaction_timeout
+        self.input_topic = input_topic
+        self.input_partition = input_partition
+        self.output_topic = output_topic
+        self.max_messages = max_messages
+        self.message_copy_finished = False
+        self.consumed = -1
+        self.remaining = -1
+        self.stop_timeout_sec = 60
+        self.enable_random_aborts = enable_random_aborts
+        self.use_group_metadata = use_group_metadata
+        self.group_mode = group_mode
+        self.loggers = {
+            "org.apache.kafka.clients.producer": "TRACE",
+            "org.apache.kafka.clients.consumer": "TRACE"
+        }
+
+    def _worker(self, idx, node):
+        node.account.ssh("mkdir -p %s" % TransactionalMessageCopier.PERSISTENT_ROOT,
+                         allow_fail=False)
+        # Create and upload log properties
+        log_config = self.render('tools_log4j.properties',
+                                 log_file=TransactionalMessageCopier.LOG_FILE)
+        node.account.create_file(TransactionalMessageCopier.LOG4J_CONFIG, log_config)
+        # Configure security
+        self.security_config = self.kafka.security_config.client_config(node=node)
+        self.security_config.setup_node(node)
+        cmd = self.start_cmd(node, idx)
+        self.logger.debug("TransactionalMessageCopier %d command: %s" % (idx, cmd))
+        try:
+            for line in node.account.ssh_capture(cmd):
+                line = line.strip()
+                data = self.try_parse_json(line)
+                if data is not None:
+                    with self.lock:
+                        self.remaining = int(data["remaining"])
+                        self.consumed = int(data["consumed"])
+                        self.logger.info("%s: consumed %d, remaining %d" %
+                                         (self.transactional_id, self.consumed, self.remaining))
+                        if "shutdown_complete" in data:
+                           if self.remaining == 0:
+                                # We are only finished if the remaining
+                                # messages at the time of shutdown is 0.
+                                #
+                                # Otherwise a clean shutdown would still print
+                                # a 'shutdown complete' messages even though
+                                # there are unprocessed messages, causing
+                                # tests to fail.
+                                self.logger.info("%s : Finished message copy" % self.transactional_id)
+                                self.message_copy_finished = True
+                           else:
+                               self.logger.info("%s : Shut down without finishing message copy." %\
+                                                self.transactional_id)
+        except RemoteCommandError as e:
+            self.logger.debug("Got exception while reading output from copier, \
+                              probably because it was SIGKILL'd (exit code 137): %s" % str(e))
+
+    def start_cmd(self, node, idx):
+        cmd  = "export LOG_DIR=%s;" % TransactionalMessageCopier.LOG_DIR
+        cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts
+        cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % TransactionalMessageCopier.LOG4J_CONFIG
+        cmd += self.path.script("kafka-run-class.sh", node) + " org.apache.kafka.tools." + "TransactionalMessageCopier"
+        cmd += " --broker-list %s" % self.kafka.bootstrap_servers(self.security_config.security_protocol)
+        cmd += " --transactional-id %s" % self.transactional_id
+        cmd += " --consumer-group %s" % self.consumer_group
+        cmd += " --input-topic %s" % self.input_topic
+        cmd += " --output-topic %s" % self.output_topic
+        cmd += " --input-partition %s" % str(self.input_partition)
+        cmd += " --transaction-size %s" % str(self.transaction_size)
+
+        if self.transaction_timeout is not None:
+            cmd += " --transaction-timeout %s" % str(self.transaction_timeout)
+
+        if self.enable_random_aborts:
+            cmd += " --enable-random-aborts"
+
+        if self.use_group_metadata:
+            cmd += " --use-group-metadata"
+
+        if self.group_mode:
+            cmd += " --group-mode"
+
+        if self.max_messages > 0:
+            cmd += " --max-messages %s" % str(self.max_messages)
+        cmd += " 2>> %s | tee -a %s &" % (TransactionalMessageCopier.STDERR_CAPTURE, TransactionalMessageCopier.STDOUT_CAPTURE)
+
+        return cmd
+
+    def clean_node(self, node):
+        self.kill_node(node, clean_shutdown=False)
+        node.account.ssh("rm -rf " + self.PERSISTENT_ROOT, allow_fail=False)
+        self.security_config.clean_node(node)
+
+    def pids(self, node):
+        try:
+            cmd = "jps | grep -i TransactionalMessageCopier | awk '{print $1}'"
+            pid_arr = [pid for pid in node.account.ssh_capture(cmd, allow_fail=True, callback=int)]
+            return pid_arr
+        except (RemoteCommandError, ValueError) as e:
+            self.logger.error("Could not list pids: %s" % str(e))
+            return []
+
+    def alive(self, node):
+        return len(self.pids(node)) > 0
+
+    def kill_node(self, node, clean_shutdown=True):
+        pids = self.pids(node)
+        sig = signal.SIGTERM if clean_shutdown else signal.SIGKILL
+        for pid in pids:
+            node.account.signal(pid, sig)
+            wait_until(lambda: len(self.pids(node)) == 0, timeout_sec=60, err_msg="Message Copier failed to stop")
+
+    def stop_node(self, node, clean_shutdown=True):
+        self.kill_node(node, clean_shutdown)
+        stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec)
+        assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \
+            (str(node.account), str(self.stop_timeout_sec))
+
+    def restart(self, clean_shutdown):
+        if self.is_done:
+            return
+        node = self.nodes[0]
+        with self.lock:
+            self.consumed = -1
+            self.remaining = -1
+        self.stop_node(node, clean_shutdown)
+        self.start_node(node)
+
+    def try_parse_json(self, string):
+        """Try to parse a string as json. Return None if not parseable."""
+        try:
+            record = json.loads(string)
+            return record
+        except ValueError:
+            self.logger.debug("Could not parse as json: %s" % str(string))
+            return None
+
+    @property
+    def is_done(self):
+        return self.message_copy_finished
+
+    def progress_percent(self):
+        with self.lock:
+            if self.remaining < 0:
+                return 0
+            if self.consumed + self.remaining == 0:
+                return 100
+            return (float(self.consumed)/float(self.consumed + self.remaining)) * 100
--- a/tests/kafkatest/services/trogdor/init.py
+++ b/tests/kafkatest/services/trogdor/init.py
@@ -0,0 +1,14 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/tests/kafkatest/services/trogdor/consume_bench_workload.py
+++ b/tests/kafkatest/services/trogdor/consume_bench_workload.py
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from ducktape.services.service import Service
+from kafkatest.services.trogdor.task_spec import TaskSpec
+
+
+class ConsumeBenchWorkloadSpec(TaskSpec):
+    def __init__(self, start_ms, duration_ms, consumer_node, bootstrap_servers,
+                 target_messages_per_sec, max_messages, active_topics,
+                 consumer_conf, common_client_conf, admin_client_conf, consumer_group=None, threads_per_worker=1):
+        super(ConsumeBenchWorkloadSpec, self).__init__(start_ms, duration_ms)
+        self.message["class"] = "org.apache.kafka.trogdor.workload.ConsumeBenchSpec"
+        self.message["consumerNode"] = consumer_node
+        self.message["bootstrapServers"] = bootstrap_servers
+        self.message["targetMessagesPerSec"] = target_messages_per_sec
+        self.message["maxMessages"] = max_messages
+        self.message["consumerConf"] = consumer_conf
+        self.message["adminClientConf"] = admin_client_conf
+        self.message["commonClientConf"] = common_client_conf
+        self.message["activeTopics"] = active_topics
+        self.message["threadsPerWorker"] = threads_per_worker
+        if consumer_group is not None:
+            self.message["consumerGroup"] = consumer_group
+
+
+class ConsumeBenchWorkloadService(Service):
+    def __init__(self, context, kafka):
+        Service.__init__(self, context, num_nodes=1)
+        self.bootstrap_servers = kafka.bootstrap_servers(validate=False)
+        self.consumer_node = self.nodes[0].account.hostname
+
+    def free(self):
+        Service.free(self)
+
+    def wait_node(self, node, timeout_sec=None):
+        pass
+
+    def stop_node(self, node):
+        pass
+
+    def clean_node(self, node):
+        pass
--- a/tests/kafkatest/services/trogdor/degraded_network_fault_spec.py
+++ b/tests/kafkatest/services/trogdor/degraded_network_fault_spec.py
@@ -0,0 +1,48 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafkatest.services.trogdor.task_spec import TaskSpec
+
+
+class DegradedNetworkFaultSpec(TaskSpec):
+    """
+    The specification for a network degradation fault.
+
+    Degrades the network so that traffic on a subset of nodes has higher latency
+    """
+
+    def __init__(self, start_ms, duration_ms):
+        """
+        Create a new NetworkDegradeFaultSpec.
+
+        :param start_ms:        The start time, as described in task_spec.py
+        :param duration_ms:     The duration in milliseconds.
+        """
+        super(DegradedNetworkFaultSpec, self).__init__(start_ms, duration_ms)
+        self.message["class"] = "org.apache.kafka.trogdor.fault.DegradedNetworkFaultSpec"
+        self.message["nodeSpecs"] = {}
+
+    def add_node_spec(self, node, networkDevice, latencyMs=0, rateLimitKbit=0):
+        """
+        Add a node spec to this fault spec
+        :param node:            The node name which is to be degraded
+        :param networkDevice:   The network device name (e.g., eth0) to apply the degradation to
+        :param latencyMs:       Optional. How much latency to add to each packet
+        :param rateLimitKbit:   Optional. Maximum throughput in kilobits per second to allow
+        :return:
+        """
+        self.message["nodeSpecs"][node] = {
+            "rateLimitKbit": rateLimitKbit, "latencyMs": latencyMs, "networkDevice": networkDevice
+        }
--- a/tests/kafkatest/services/trogdor/files_unreadable_fault_spec.py
+++ b/tests/kafkatest/services/trogdor/files_unreadable_fault_spec.py
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafkatest.services.trogdor.task_spec import TaskSpec
+
+
+class FilesUnreadableFaultSpec(TaskSpec):
+    """
+    The specification for a fault which makes files unreadable.
+    """
+
+    def __init__(self, start_ms, duration_ms, node_names, mount_path,
+                 prefix, error_code):
+        """
+        Create a new FilesUnreadableFaultSpec.
+
+        :param start_ms:        The start time, as described in task_spec.py
+        :param duration_ms:     The duration in milliseconds.
+        :param node_names:      The names of the node(s) to create the fault on.
+        :param mount_path:      The mount path.
+        :param prefix:          The prefix within the mount point to make unreadable.
+        :param error_code:      The error code to use.
+        """
+        super(FilesUnreadableFaultSpec, self).__init__(start_ms, duration_ms)
+        self.message["class"] = "org.apache.kafka.trogdor.fault.FilesUnreadableFaultSpec"
+        self.message["nodeNames"] = node_names
+        self.message["mountPath"] = mount_path
+        self.message["prefix"] = prefix
+        self.message["errorCode"] = error_code
+
+        self.kibosh_message = {}
+        self.kibosh_message["type"] = "unreadable"
+        self.kibosh_message["prefix"] = prefix
+        self.kibosh_message["code"] = error_code
--- a/tests/kafkatest/services/trogdor/kibosh.py
+++ b/tests/kafkatest/services/trogdor/kibosh.py
@@ -0,0 +1,156 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os.path
+
+from ducktape.services.service import Service
+from ducktape.utils import util
+
+
+class KiboshService(Service):
+    """
+    Kibosh is a fault-injecting FUSE filesystem.
+
+    Attributes:
+        INSTALL_ROOT                    The path of where Kibosh is installed.
+        BINARY_NAME                     The Kibosh binary name.
+        BINARY_PATH                     The path to the kibosh binary.
+    """
+    INSTALL_ROOT = "/opt/kibosh/build"
+    BINARY_NAME = "kibosh"
+    BINARY_PATH = os.path.join(INSTALL_ROOT, BINARY_NAME)
+
+    def __init__(self, context, nodes, target, mirror, persist="/mnt/kibosh"):
+        """
+        Create a Kibosh service.
+
+        :param context:             The TestContext object.
+        :param nodes:               The nodes to put the Kibosh FS on.  Kibosh allocates no
+                                    nodes of its own.
+        :param target:              The target directory, which Kibosh exports a view of.
+        :param mirror:              The mirror directory, where Kibosh injects faults.
+        :param persist:             Where the log files and pid files will be created.
+        """
+        Service.__init__(self, context, num_nodes=0)
+        if (len(nodes) == 0):
+            raise RuntimeError("You must supply at least one node to run the service on.")
+        for node in nodes:
+            self.nodes.append(node)
+
+        self.target = target
+        self.mirror = mirror
+        self.persist = persist
+
+        self.control_path = os.path.join(self.mirror, "kibosh_control")
+        self.pidfile_path = os.path.join(self.persist, "pidfile")
+        self.stdout_stderr_path = os.path.join(self.persist, "kibosh-stdout-stderr.log")
+        self.log_path = os.path.join(self.persist, "kibosh.log")
+        self.logs = {
+            "kibosh-stdout-stderr.log": {
+                "path": self.stdout_stderr_path,
+                "collect_default": True},
+            "kibosh.log": {
+                "path": self.log_path,
+                "collect_default": True}
+        }
+
+    def free(self):
+        """Clear the nodes list."""
+        # Because the filesystem runs on nodes which have been allocated by other services, those nodes
+        # are not deallocated here.
+        self.nodes = []
+        Service.free(self)
+
+    def kibosh_running(self, node):
+        return 0 == node.account.ssh("test -e '%s'" % self.control_path, allow_fail=True)
+
+    def start_node(self, node):
+        node.account.mkdirs(self.persist)
+        cmd = "sudo -E "
+        cmd += " %s" % KiboshService.BINARY_PATH
+        cmd += " --target %s" % self.target
+        cmd += " --pidfile %s" % self.pidfile_path
+        cmd += " --log %s" % self.log_path
+        cmd += " --control-mode 666"
+        cmd += " --verbose"
+        cmd += " %s" % self.mirror
+        cmd += " &> %s" % self.stdout_stderr_path
+        node.account.ssh(cmd)
+        util.wait_until(lambda: self.kibosh_running(node), 20, backoff_sec=.1,
+                        err_msg="Timed out waiting for kibosh to start on %s" % node.account.hostname)
+
+    def pids(self, node):
+        return [pid for pid in node.account.ssh_capture("test -e '%s' && test -e /proc/$(cat '%s')" %
+                                                        (self.pidfile_path, self.pidfile_path), allow_fail=True)]
+
+    def wait_node(self, node, timeout_sec=None):
+        return len(self.pids(node)) == 0
+
+    def kibosh_process_running(self, node):
+        pids = self.pids(node)
+        if len(pids) == 0:
+            return True
+        return False
+
+    def stop_node(self, node):
+        """Halt kibosh process(es) on this node."""
+        node.account.logger.debug("stop_node(%s): unmounting %s" % (node.name, self.mirror))
+        node.account.ssh("sudo fusermount -u %s" % self.mirror, allow_fail=True)
+        # Wait for the kibosh process to terminate.
+        try:
+            util.wait_until(lambda: self.kibosh_process_running(node), 20, backoff_sec=.1,
+                            err_msg="Timed out waiting for kibosh to stop on %s" % node.account.hostname)
+        except TimeoutError:
+            # If the process won't terminate, use kill -9 to shut it down.
+            node.account.logger.debug("stop_node(%s): killing the kibosh process managing %s" % (node.name, self.mirror))
+            node.account.ssh("sudo kill -9 %s" % (" ".join(self.pids(node))), allow_fail=True)
+            node.account.ssh("sudo fusermount -u %s" % self.mirror)
+            util.wait_until(lambda: self.kibosh_process_running(node), 20, backoff_sec=.1,
+                            err_msg="Timed out waiting for kibosh to stop on %s" % node.account.hostname)
+
+    def clean_node(self, node):
+        """Clean up persistent state on this node - e.g. service logs, configuration files etc."""
+        self.stop_node(node)
+        node.account.ssh("rm -rf -- %s" % self.persist)
+
+    def set_faults(self, node, specs):
+        """
+        Set the currently active faults.
+
+        :param node:        The node.
+        :param spec:        An array of FaultSpec objects describing the faults.
+        """
+        if len(specs) == 0:
+            obj_json = "{}"
+        else:
+            fault_array = [spec.kibosh_message for spec in specs]
+            obj = { 'faults': fault_array }
+            obj_json = json.dumps(obj)
+        node.account.create_file(self.control_path, obj_json)
+
+    def get_fault_json(self, node):
+        """
+        Return a JSON string which contains the currently active faults.
+
+        :param node:        The node.
+
+        :returns:           The fault JSON describing the faults.
+        """
+        iter = node.account.ssh_capture("cat '%s'" % self.control_path)
+        text = ""
+        for line in iter:
+            text = "%s%s" % (text, line.rstrip("\r\n"))
+        return text
--- a/tests/kafkatest/services/trogdor/network_partition_fault_spec.py
+++ b/tests/kafkatest/services/trogdor/network_partition_fault_spec.py
@@ -0,0 +1,39 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafkatest.services.trogdor.task_spec import TaskSpec
+
+
+class NetworkPartitionFaultSpec(TaskSpec):
+    """
+    The specification for a network partition fault.
+
+    Network partition faults fracture the network into different partitions
+    that cannot communicate with each other.
+    """
+
+    def __init__(self, start_ms, duration_ms, partitions):
+        """
+        Create a new NetworkPartitionFaultSpec.
+
+        :param start_ms:        The start time, as described in task_spec.py
+        :param duration_ms:     The duration in milliseconds.
+        :param partitions:      An array of arrays describing the partitions.
+                                The inner arrays may contain either node names,
+                                or ClusterNode objects.
+        """
+        super(NetworkPartitionFaultSpec, self).__init__(start_ms, duration_ms)
+        self.message["class"] = "org.apache.kafka.trogdor.fault.NetworkPartitionFaultSpec"
+        self.message["partitions"] = [TaskSpec.to_node_names(p) for p in partitions]
--- a/tests/kafkatest/services/trogdor/no_op_task_spec.py
+++ b/tests/kafkatest/services/trogdor/no_op_task_spec.py
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafkatest.services.trogdor.task_spec import TaskSpec
+
+
+class NoOpTaskSpec(TaskSpec):
+    """
+    The specification for a nop-op task.
+
+    No-op faults are used to test Trogdor.  They don't do anything,
+    but must be propagated to all Trogdor agents.
+    """
+
+    def __init__(self, start_ms, duration_ms):
+        """
+        Create a new NoOpFault.
+
+        :param start_ms:        The start time, as described in task_spec.py
+        :param duration_ms:     The duration in milliseconds.
+        """
+        super(NoOpTaskSpec, self).__init__(start_ms, duration_ms)
+        self.message["class"] = "org.apache.kafka.trogdor.task.NoOpTaskSpec";
--- a/tests/kafkatest/services/trogdor/process_stop_fault_spec.py
+++ b/tests/kafkatest/services/trogdor/process_stop_fault_spec.py
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafkatest.services.trogdor.task_spec import TaskSpec
+
+
+class ProcessStopFaultSpec(TaskSpec):
+    """
+    The specification for a process stop fault.
+    """
+
+    def __init__(self, start_ms, duration_ms, nodes, java_process_name):
+        """
+        Create a new ProcessStopFaultSpec.
+
+        :param start_ms:            The start time, as described in task_spec.py
+        :param duration_ms:         The duration in milliseconds.
+        :param node_names:          An array describing the nodes to stop processes on.  The array
+                                    may contain either node names, or ClusterNode objects.
+        :param java_process_name:   The name of the java process to stop.  This is the name which
+                                    is reported by jps, etc., not the OS-level process name.
+        """
+        super(ProcessStopFaultSpec, self).__init__(start_ms, duration_ms)
+        self.message["class"] = "org.apache.kafka.trogdor.fault.ProcessStopFaultSpec"
+        self.message["nodeNames"] = TaskSpec.to_node_names(nodes)
+        self.message["javaProcessName"] = java_process_name
--- a/tests/kafkatest/services/trogdor/produce_bench_workload.py
+++ b/tests/kafkatest/services/trogdor/produce_bench_workload.py
@@ -0,0 +1,56 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from ducktape.services.service import Service
+from kafkatest.services.trogdor.task_spec import TaskSpec
+
+
+class ProduceBenchWorkloadSpec(TaskSpec):
+    def __init__(self, start_ms, duration_ms, producer_node, bootstrap_servers,
+                 target_messages_per_sec, max_messages, producer_conf, admin_client_conf,
+                 common_client_conf, inactive_topics, active_topics,
+                 transaction_generator=None):
+        super(ProduceBenchWorkloadSpec, self).__init__(start_ms, duration_ms)
+        self.message["class"] = "org.apache.kafka.trogdor.workload.ProduceBenchSpec"
+        self.message["producerNode"] = producer_node
+        self.message["bootstrapServers"] = bootstrap_servers
+        self.message["targetMessagesPerSec"] = target_messages_per_sec
+        self.message["maxMessages"] = max_messages
+        self.message["producerConf"] = producer_conf
+        self.message["transactionGenerator"] = transaction_generator
+        self.message["adminClientConf"] = admin_client_conf
+        self.message["commonClientConf"] = common_client_conf
+        self.message["inactiveTopics"] = inactive_topics
+        self.message["activeTopics"] = active_topics
+
+
+class ProduceBenchWorkloadService(Service):
+    def __init__(self, context, kafka):
+        Service.__init__(self, context, num_nodes=1)
+        self.bootstrap_servers = kafka.bootstrap_servers(validate=False)
+        self.producer_node = self.nodes[0].account.hostname
+
+    def free(self):
+        Service.free(self)
+
+    def wait_node(self, node, timeout_sec=None):
+        pass
+
+    def stop_node(self, node):
+        pass
+
+    def clean_node(self, node):
+        pass
--- a/tests/kafkatest/services/trogdor/round_trip_workload.py
+++ b/tests/kafkatest/services/trogdor/round_trip_workload.py
@@ -0,0 +1,49 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from ducktape.services.service import Service
+from kafkatest.services.trogdor.task_spec import TaskSpec
+
+
+class RoundTripWorkloadSpec(TaskSpec):
+    def __init__(self, start_ms, duration_ms, client_node, bootstrap_servers,
+                 target_messages_per_sec, max_messages, active_topics):
+        super(RoundTripWorkloadSpec, self).__init__(start_ms, duration_ms)
+        self.message["class"] = "org.apache.kafka.trogdor.workload.RoundTripWorkloadSpec"
+        self.message["clientNode"] = client_node
+        self.message["bootstrapServers"] = bootstrap_servers
+        self.message["targetMessagesPerSec"] = target_messages_per_sec
+        self.message["maxMessages"] = max_messages
+        self.message["activeTopics"] = active_topics
+
+
+class RoundTripWorkloadService(Service):
+    def __init__(self, context, kafka):
+        Service.__init__(self, context, num_nodes=1)
+        self.bootstrap_servers = kafka.bootstrap_servers(validate=False)
+        self.client_node = self.nodes[0].account.hostname
+
+    def free(self):
+        Service.free(self)
+
+    def wait_node(self, node, timeout_sec=None):
+        pass
+
+    def stop_node(self, node):
+        pass
+
+    def clean_node(self, node):
+        pass
--- a/tests/kafkatest/services/trogdor/task_spec.py
+++ b/tests/kafkatest/services/trogdor/task_spec.py
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+
+
+class TaskSpec(object):
+    """
+    The base class for a task specification.
+
+    MAX_DURATION_MS         The longest duration we should use for a task specification.
+    """
+
+    MAX_DURATION_MS=10000000
+
+    def __init__(self, start_ms, duration_ms):
+        """
+        Create a new task specification.
+
+        :param start_ms:        The target start time in milliseconds since the epoch.
+        :param duration_ms:     The duration in milliseconds.
+        """
+        self.message = {
+            'startMs': start_ms,
+            'durationMs': duration_ms
+        }
+
+    @staticmethod
+    def to_node_names(nodes):
+        """
+        Convert an array of nodes or node names to an array of node names.
+        """
+        node_names = []
+        for obj in nodes:
+            if isinstance(obj, basestring):
+                node_names.append(obj)
+            else:
+                node_names.append(obj.name)
+        return node_names
+
+    def __str__(self):
+        return json.dumps(self.message)
--- a/tests/kafkatest/services/trogdor/templates/log4j.properties
+++ b/tests/kafkatest/services/trogdor/templates/log4j.properties
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=DEBUG, mylogger
+log4j.logger.kafka=DEBUG
+log4j.logger.org.apache.kafka=DEBUG
+log4j.logger.org.eclipse=INFO
+log4j.appender.mylogger=org.apache.log4j.FileAppender
+log4j.appender.mylogger.File={{ log_path }}
+log4j.appender.mylogger.layout=org.apache.log4j.PatternLayout
+log4j.appender.mylogger.layout.ConversionPattern=[%d] %p %m (%c)%n
--- a/tests/kafkatest/services/trogdor/trogdor.py
+++ b/tests/kafkatest/services/trogdor/trogdor.py
@@ -0,0 +1,354 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os.path
+import requests
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3 import Retry
+
+from ducktape.services.service import Service
+from ducktape.utils.util import wait_until
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+
+
+class TrogdorService(KafkaPathResolverMixin, Service):
+    """
+    A ducktape service for running the trogdor fault injection daemons.
+
+    Attributes:
+        PERSISTENT_ROOT                 The root filesystem path to store service files under.
+        COORDINATOR_STDOUT_STDERR       The path where we store the coordinator's stdout/stderr output.
+        AGENT_STDOUT_STDERR             The path where we store the agents's stdout/stderr output.
+        COORDINATOR_LOG                 The path where we store the coordinator's log4j output.
+        AGENT_LOG                       The path where we store the agent's log4j output.
+        AGENT_LOG4J_PROPERTIES          The path to the agent log4j.properties file for log config.
+        COORDINATOR_LOG4J_PROPERTIES    The path to the coordinator log4j.properties file for log config.
+        CONFIG_PATH                     The path to the trogdor configuration file.
+        DEFAULT_AGENT_PORT              The default port to use for trogdor_agent daemons.
+        DEFAULT_COORDINATOR_PORT        The default port to use for trogdor_coordinator daemons.
+        REQUEST_TIMEOUT                 The request timeout in seconds to use for REST requests.
+        REQUEST_HEADERS                 The request headers to use when communicating with trogdor.
+    """
+
+    PERSISTENT_ROOT="/mnt/trogdor"
+    COORDINATOR_STDOUT_STDERR = os.path.join(PERSISTENT_ROOT, "trogdor-coordinator-stdout-stderr.log")
+    AGENT_STDOUT_STDERR = os.path.join(PERSISTENT_ROOT, "trogdor-agent-stdout-stderr.log")
+    COORDINATOR_LOG = os.path.join(PERSISTENT_ROOT, "trogdor-coordinator.log")
+    AGENT_LOG = os.path.join(PERSISTENT_ROOT, "trogdor-agent.log")
+    COORDINATOR_LOG4J_PROPERTIES = os.path.join(PERSISTENT_ROOT, "trogdor-coordinator-log4j.properties")
+    AGENT_LOG4J_PROPERTIES = os.path.join(PERSISTENT_ROOT, "trogdor-agent-log4j.properties")
+    CONFIG_PATH = os.path.join(PERSISTENT_ROOT, "trogdor.conf")
+    DEFAULT_AGENT_PORT=8888
+    DEFAULT_COORDINATOR_PORT=8889
+    REQUEST_TIMEOUT=5
+    REQUEST_HEADERS = {"Content-type": "application/json"}
+
+    logs = {
+        "trogdor_coordinator_stdout_stderr": {
+            "path": COORDINATOR_STDOUT_STDERR,
+            "collect_default": True},
+        "trogdor_agent_stdout_stderr": {
+            "path": AGENT_STDOUT_STDERR,
+            "collect_default": True},
+        "trogdor_coordinator_log": {
+            "path": COORDINATOR_LOG,
+            "collect_default": True},
+        "trogdor_agent_log": {
+            "path": AGENT_LOG,
+            "collect_default": True},
+    }
+
+
+    def __init__(self, context, agent_nodes=None, client_services=None,
+                 agent_port=DEFAULT_AGENT_PORT, coordinator_port=DEFAULT_COORDINATOR_PORT):
+        """
+        Create a Trogdor service.
+
+        :param context:             The test context.
+        :param agent_nodes:         The nodes to run the agents on.
+        :param client_services:     Services whose nodes we should run agents on.
+        :param agent_port:          The port to use for the trogdor_agent daemons.
+        :param coordinator_port:    The port to use for the trogdor_coordinator daemons.
+        """
+        Service.__init__(self, context, num_nodes=1)
+        self.coordinator_node = self.nodes[0]
+        if client_services is not None:
+            for client_service in client_services:
+                for node in client_service.nodes:
+                    self.nodes.append(node)
+        if agent_nodes is not None:
+            for agent_node in agent_nodes:
+                self.nodes.append(agent_node)
+        if (len(self.nodes) == 1):
+            raise RuntimeError("You must supply at least one agent node to run the service on.")
+        self.agent_port = agent_port
+        self.coordinator_port = coordinator_port
+
+    def free(self):
+        # We only want to deallocate the coordinator node, not the agent nodes.  So we
+        # change self.nodes to include only the coordinator node, and then invoke
+        # the base class' free method.
+        if self.coordinator_node is not None:
+            self.nodes = [self.coordinator_node]
+            self.coordinator_node = None
+            Service.free(self)
+
+    def _create_config_dict(self):
+        """
+        Create a dictionary with the Trogdor configuration.
+
+        :return:            The configuration dictionary.
+        """
+        dict_nodes = {}
+        for node in self.nodes:
+            dict_nodes[node.name] = {
+                "hostname": node.account.ssh_hostname,
+            }
+            if node.name == self.coordinator_node.name:
+                dict_nodes[node.name]["trogdor.coordinator.port"] = self.coordinator_port
+            else:
+                dict_nodes[node.name]["trogdor.agent.port"] = self.agent_port
+
+        return {
+            "platform": "org.apache.kafka.trogdor.basic.BasicPlatform",
+            "nodes": dict_nodes,
+        }
+
+    def start_node(self, node):
+        node.account.mkdirs(TrogdorService.PERSISTENT_ROOT)
+
+        # Create the configuration file on the node.
+        str = json.dumps(self._create_config_dict(), indent=2)
+        self.logger.info("Creating configuration file %s with %s" % (TrogdorService.CONFIG_PATH, str))
+        node.account.create_file(TrogdorService.CONFIG_PATH, str)
+
+        if self.is_coordinator(node):
+            self._start_coordinator_node(node)
+        else:
+            self._start_agent_node(node)
+
+    def _start_coordinator_node(self, node):
+        node.account.create_file(TrogdorService.COORDINATOR_LOG4J_PROPERTIES,
+                                 self.render('log4j.properties',
+                                             log_path=TrogdorService.COORDINATOR_LOG))
+        self._start_trogdor_daemon("coordinator", TrogdorService.COORDINATOR_STDOUT_STDERR,
+                                   TrogdorService.COORDINATOR_LOG4J_PROPERTIES,
+                                   TrogdorService.COORDINATOR_LOG, node)
+        self.logger.info("Started trogdor coordinator on %s." % node.name)
+
+    def _start_agent_node(self, node):
+        node.account.create_file(TrogdorService.AGENT_LOG4J_PROPERTIES,
+                                 self.render('log4j.properties',
+                                             log_path=TrogdorService.AGENT_LOG))
+        self._start_trogdor_daemon("agent", TrogdorService.AGENT_STDOUT_STDERR,
+                                   TrogdorService.AGENT_LOG4J_PROPERTIES,
+                                   TrogdorService.AGENT_LOG, node)
+        self.logger.info("Started trogdor agent on %s." % node.name)
+
+    def _start_trogdor_daemon(self, daemon_name, stdout_stderr_capture_path,
+                              log4j_properties_path, log_path, node):
+        cmd = "export KAFKA_LOG4J_OPTS='-Dlog4j.configuration=file:%s'; " % log4j_properties_path
+        cmd += "%s %s --%s.config %s --node-name %s 1>> %s 2>> %s &" % \
+               (self.path.script("trogdor.sh", node),
+                daemon_name,
+                daemon_name,
+                TrogdorService.CONFIG_PATH,
+                node.name,
+                stdout_stderr_capture_path,
+                stdout_stderr_capture_path)
+        node.account.ssh(cmd)
+        with node.account.monitor_log(log_path) as monitor:
+            monitor.wait_until("Starting %s process." % daemon_name, timeout_sec=60, backoff_sec=.10,
+                               err_msg=("%s on %s didn't finish startup" % (daemon_name, node.name)))
+
+    def wait_node(self, node, timeout_sec=None):
+        if self.is_coordinator(node):
+            return len(node.account.java_pids(self.coordinator_class_name())) == 0
+        else:
+            return len(node.account.java_pids(self.agent_class_name())) == 0
+
+    def stop_node(self, node):
+        """Halt trogdor processes on this node."""
+        if self.is_coordinator(node):
+            node.account.kill_java_processes(self.coordinator_class_name())
+        else:
+            node.account.kill_java_processes(self.agent_class_name())
+
+    def clean_node(self, node):
+        """Clean up persistent state on this node - e.g. service logs, configuration files etc."""
+        self.stop_node(node)
+        node.account.ssh("rm -rf -- %s" % TrogdorService.PERSISTENT_ROOT)
+
+    def _coordinator_url(self, path):
+        return "http://%s:%d/coordinator/%s" % \
+               (self.coordinator_node.account.ssh_hostname, self.coordinator_port, path)
+
+    def request_session(self):
+        """
+        Creates a new request session which will retry for a while.
+        """
+        session = requests.Session()
+        session.mount('http://',
+                      HTTPAdapter(max_retries=Retry(total=5, backoff_factor=0.3)))
+        return session
+
+    def _coordinator_post(self, path, message):
+        """
+        Make a POST request to the Trogdor coordinator.
+
+        :param path:            The URL path to use.
+        :param message:         The message object to send.
+        :return:                The response as an object.
+        """
+        url = self._coordinator_url(path)
+        self.logger.info("POST %s %s" % (url, message))
+        response = self.request_session().post(url, json=message,
+                                               timeout=TrogdorService.REQUEST_TIMEOUT,
+                                               headers=TrogdorService.REQUEST_HEADERS)
+        response.raise_for_status()
+        return response.json()
+
+    def _coordinator_put(self, path, message):
+        """
+        Make a PUT request to the Trogdor coordinator.
+
+        :param path:            The URL path to use.
+        :param message:         The message object to send.
+        :return:                The response as an object.
+        """
+        url = self._coordinator_url(path)
+        self.logger.info("PUT %s %s" % (url, message))
+        response = self.request_session().put(url, json=message,
+                                              timeout=TrogdorService.REQUEST_TIMEOUT,
+                                              headers=TrogdorService.REQUEST_HEADERS)
+        response.raise_for_status()
+        return response.json()
+
+    def _coordinator_get(self, path, message):
+        """
+        Make a GET request to the Trogdor coordinator.
+
+        :param path:            The URL path to use.
+        :param message:         The message object to send.
+        :return:                The response as an object.
+        """
+        url = self._coordinator_url(path)
+        self.logger.info("GET %s %s" % (url, message))
+        response = self.request_session().get(url, json=message,
+                                              timeout=TrogdorService.REQUEST_TIMEOUT,
+                                              headers=TrogdorService.REQUEST_HEADERS)
+        response.raise_for_status()
+        return response.json()
+
+    def create_task(self, id, spec):
+        """
+        Create a new task.
+
+        :param id:          The task id.
+        :param spec:        The task spec.
+        """
+        self._coordinator_post("task/create", { "id": id, "spec": spec.message})
+        return TrogdorTask(id, self)
+
+    def stop_task(self, id):
+        """
+        Stop a task.
+
+        :param id:          The task id.
+        """
+        self._coordinator_put("task/stop", { "id": id })
+
+    def tasks(self):
+        """
+        Get the tasks which are on the coordinator.
+
+        :returns:           A map of task id strings to task state objects.
+                            Task state objects contain a 'spec' field with the spec
+                            and a 'state' field with the state.
+        """
+        return self._coordinator_get("tasks", {})
+
+    def is_coordinator(self, node):
+        return node == self.coordinator_node
+
+    def agent_class_name(self):
+        return "org.apache.kafka.trogdor.agent.Agent"
+
+    def coordinator_class_name(self):
+        return "org.apache.kafka.trogdor.coordinator.Coordinator"
+
+class TrogdorTask(object):
+    PENDING_STATE = "PENDING"
+    RUNNING_STATE = "RUNNING"
+    STOPPING_STATE = "STOPPING"
+    DONE_STATE = "DONE"
+
+    def __init__(self, id, trogdor):
+        self.id = id
+        self.trogdor = trogdor
+
+    def task_state_or_error(self):
+        task_state = self.trogdor.tasks()["tasks"][self.id]
+        if task_state is None:
+            raise RuntimeError("Coordinator did not know about %s." % self.id)
+        error = task_state.get("error")
+        if error is None or error == "":
+            return task_state["state"], None
+        else:
+            return None, error
+
+    def done(self):
+        """
+        Check if this task is done.
+
+        :raises RuntimeError:       If the task encountered an error.
+        :returns:                   True if the task is in DONE_STATE;
+                                    False if it is in a different state.
+        """
+        (task_state, error) = self.task_state_or_error()
+        if task_state is not None:
+            return task_state == TrogdorTask.DONE_STATE
+        else:
+            raise RuntimeError("Failed to gracefully stop %s: got task error: %s" % (self.id, error))
+
+    def running(self):
+        """
+        Check if this task is running.
+
+        :raises RuntimeError:       If the task encountered an error.
+        :returns:                   True if the task is in RUNNING_STATE;
+                                    False if it is in a different state.
+        """
+        (task_state, error) = self.task_state_or_error()
+        if task_state is not None:
+            return task_state == TrogdorTask.RUNNING_STATE
+        else:
+            raise RuntimeError("Failed to start %s: got task error: %s" % (self.id, error))
+
+    def stop(self):
+        """
+        Stop this task.
+
+        :raises RuntimeError:       If the task encountered an error.
+        """
+        if self.done():
+            return
+        self.trogdor.stop_task(self.id)
+
+    def wait_for_done(self, timeout_sec=360):
+        wait_until(lambda: self.done(),
+                   timeout_sec=timeout_sec,
+                   err_msg="%s failed to finish in the expected amount of time." % self.id)
--- a/tests/kafkatest/services/verifiable_client.py
+++ b/tests/kafkatest/services/verifiable_client.py
@@ -0,0 +1,330 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from kafkatest.directory_layout.kafka_path import TOOLS_JAR_NAME, TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME
+from kafkatest.version import DEV_BRANCH, LATEST_0_8_2
+from ducktape.cluster.remoteaccount import RemoteCommandError
+
+import importlib
+import os
+import subprocess
+import signal
+
+
+"""This module abstracts the implementation of a verifiable client, allowing
+client developers to plug in their own client for all kafkatests that make
+use of either the VerifiableConsumer or VerifiableProducer classes.
+
+A verifiable client class must implement exec_cmd() and pids().
+
+This file provides:
+ * VerifiableClientMixin class: to be used for creating new verifiable client classes
+ * VerifiableClientJava class: the default Java verifiable clients
+ * VerifiableClientApp class: uses global configuration to specify
+   the command to execute and optional "pids" command, deploy script, etc.
+   Config syntax (pass as --global <json_or_jsonfile>):
+      {"Verifiable(Producer|Consumer|Client)": {
+       "class": "kafkatest.services.verifiable_client.VerifiableClientApp",
+       "exec_cmd": "/vagrant/x/myclient --some --standard --args",
+       "pids": "pgrep -f ...", // optional
+       "deploy": "/vagrant/x/mydeploy.sh", // optional
+       "kill_signal": 2 // optional clean_shutdown kill signal (SIGINT in this case)
+      }}
+ * VerifiableClientDummy class: testing dummy
+
+
+
+==============================
+Verifiable client requirements
+==============================
+
+There are currently two verifiable client specifications:
+ * VerifiableConsumer
+ * VerifiableProducer
+
+Common requirements for both:
+ * One-way communication (client -> tests) through new-line delimited
+   JSON objects on stdout (details below).
+ * Log/debug to stderr
+
+Common communication for both:
+ * `{ "name": "startup_complete" }` - Client succesfully started
+ * `{ "name": "shutdown_complete" }` - Client succesfully terminated (after receiving SIGINT/SIGTERM)
+
+
+==================
+VerifiableConsumer
+==================
+
+Command line arguments:
+ * `--group-id <group-id>`
+ * `--topic <topic>`
+ * `--broker-list <brokers>`
+ * `--session-timeout <n>`
+ * `--enable-autocommit`
+ * `--max-messages <n>`
+ * `--assignment-strategy <s>`
+ * `--consumer.config <config-file>` - consumer config properties (typically empty)
+
+Environment variables:
+ * `LOG_DIR` - log output directory. Typically not needed if logs are written to stderr.
+ * `KAFKA_OPTS` - Security config properties (Java client syntax)
+ * `KAFKA_LOG4J_OPTS` - Java log4j options (can be ignored)
+
+Client communication:
+ * `{ "name": "offsets_committed",  "success": bool, "error": "<errstr>", "offsets": [ { "topic": "<t>", "partition": <p>, "offset": <o> } ] }` - offset commit results, should be emitted for each committed offset. Emit prior to partitions_revoked.
+ * `{ "name": "records_consumed", "partitions": [ { "topic": "<t>", "partition": <p>,  "minOffset": <o>, "maxOffset": <o> } ], "count": <total_consumed> }` - per-partition delta stats from last records_consumed. Emit every 1000 messages, or 1s. Emit prior to partitions_assigned, partitions_revoked and offsets_committed.
+ * `{ "name": "partitions_revoked", "partitions": [ { "topic": "<t>", "partition": <p> } ] }` - rebalance: revoked partitions
+ * `{ "name": "partitions_assigned", "partitions": [ { "topic": "<t>", "partition": <p> } ] }` - rebalance: assigned partitions
+
+
+==================
+VerifiableProducer
+==================
+
+Command line arguments:
+ * `--topic <topic>`
+ * `--broker-list <brokers>`
+ * `--max-messages <n>`
+ * `--throughput <msgs/s>`
+ * `--producer.config <config-file>` - producer config properties (typically empty)
+
+Environment variables:
+ * `LOG_DIR` - log output directory. Typically not needed if logs are written to stderr.
+ * `KAFKA_OPTS` - Security config properties (Java client syntax)
+ * `KAFKA_LOG4J_OPTS` - Java log4j options (can be ignored)
+
+Client communication:
+ * `{ "name": "producer_send_error", "message": "<error msg>", "topic": "<t>", "key": "<msg key>", "value": "<msg value>" }` - emit on produce error.
+ * `{ "name": "producer_send_success", "topic": "<t>", "partition": <p>, "offset": <o>, "key": "<msg key>", "value": "<msg value>" }` - emit on produce success.
+
+
+
+===========
+Development
+===========
+
+**Logs:**
+During development of kafkatest clients it is generally a good idea to
+enable collection of the client's stdout and stderr logs for troubleshooting.
+Do this by setting "collect_default" to True for verifiable_consumder_stdout
+and .._stderr in verifiable_consumer.py and verifiable_producer.py
+
+
+**Deployment:**
+There's currently no automatic way of deploying 3rd party kafkatest clients
+on the VM instance so this needs to be done (at least partially) manually for
+now.
+
+One way to do this is logging in to a worker (`vagrant ssh worker1`), downloading
+and building the kafkatest client under /vagrant (which maps to the kafka root
+directory on the host and is shared with all VM instances).
+Also make sure to install any system-level dependencies on each instance.
+
+Then use /vagrant/..../yourkafkatestclient as your run-time path since it will
+now be available on all instances.
+
+The VerifiableClientApp automates the per-worker deployment with the optional
+"deploy": "/vagrant/../deploy_script.sh" globals configuration property, this
+script will be called on the VM just prior to executing the client.
+"""
+
+def create_verifiable_client_implementation(context, parent):
+    """Factory for generating a verifiable client implementation class instance
+
+    :param parent: parent class instance, either VerifiableConsumer or VerifiableProducer
+
+    This will first check for a fully qualified client implementation class name
+    in context.globals as "Verifiable<type>" where <type> is "Producer" or "Consumer",
+    followed by "VerifiableClient" (which should implement both).
+    The global object layout is: {"class": "<full class name>", "..anything..": ..}.
+
+    If present, construct a new instance, else defaults to VerifiableClientJava
+    """
+
+    # Default class
+    obj = {"class": "kafkatest.services.verifiable_client.VerifiableClientJava"}
+
+    parent_name = parent.__class__.__name__.rsplit('.', 1)[-1]
+    for k in [parent_name, "VerifiableClient"]:
+        if k in context.globals:
+            obj = context.globals[k]
+            break
+
+    if "class" not in obj:
+        raise SyntaxError('%s (or VerifiableClient) expected object format: {"class": "full.class.path", ..}' % parent_name)
+
+    clname = obj["class"]
+    # Using the fully qualified classname, import the implementation class
+    if clname.find('.') == -1:
+        raise SyntaxError("%s (or VerifiableClient) must specify full class path (including module)" % parent_name)
+
+    (module_name, clname) = clname.rsplit('.', 1)
+    cluster_mod = importlib.import_module(module_name)
+    impl_class = getattr(cluster_mod, clname)
+    return impl_class(parent, obj)
+
+
+
+class VerifiableClientMixin (object):
+    """
+    Verifiable client mixin class
+    """
+    @property
+    def impl (self):
+        """
+        :return: Return (and create if necessary) the Verifiable client implementation object.
+        """
+        # Add _impl attribute to parent Verifiable(Consumer|Producer) object.
+        if not hasattr(self, "_impl"):
+            setattr(self, "_impl", create_verifiable_client_implementation(self.context, self))
+            if hasattr(self.context, "logger") and self.context.logger is not None:
+                self.context.logger.debug("Using client implementation %s for %s" % (self._impl.__class__.__name__, self.__class__.__name__))
+        return self._impl
+
+
+    def exec_cmd (self, node):
+        """
+        :return: command string to execute client.
+        Environment variables will be prepended and command line arguments
+        appended to this string later by start_cmd().
+
+        This method should also take care of deploying the client on the instance, if necessary.
+        """
+        raise NotImplementedError()
+
+    def pids (self, node):
+        """ :return: list of pids for this client instance on node """
+        raise NotImplementedError()
+
+    def kill_signal (self, clean_shutdown=True):
+        """ :return: the kill signal to terminate the application. """
+        if not clean_shutdown:
+            return signal.SIGKILL
+
+        return self.conf.get("kill_signal", signal.SIGTERM)
+
+
+class VerifiableClientJava (VerifiableClientMixin):
+    """
+    Verifiable Consumer and Producer using the official Java client.
+    """
+    def __init__(self, parent, conf=None):
+        """
+        :param parent: The parent instance, either VerifiableConsumer or VerifiableProducer
+        :param conf: Optional conf object (the --globals VerifiableX object)
+        """
+        super(VerifiableClientJava, self).__init__()
+        self.parent = parent
+        self.java_class_name = parent.java_class_name()
+        self.conf = conf
+
+    def exec_cmd (self, node):
+        """ :return: command to execute to start instance
+        Translates Verifiable* to the corresponding Java client class name """
+        cmd = ""
+        if self.java_class_name == 'VerifiableProducer' and node.version <= LATEST_0_8_2:
+            # 0.8.2.X releases do not have VerifiableProducer.java, so cheat and add
+            # the tools jar from trunk to the classpath
+            tools_jar = self.parent.path.jar(TOOLS_JAR_NAME, DEV_BRANCH)
+            tools_dependant_libs_jar = self.parent.path.jar(TOOLS_DEPENDANT_TEST_LIBS_JAR_NAME, DEV_BRANCH)
+            cmd += "for file in %s; do CLASSPATH=$CLASSPATH:$file; done; " % tools_jar
+            cmd += "for file in %s; do CLASSPATH=$CLASSPATH:$file; done; " % tools_dependant_libs_jar
+            cmd += "export CLASSPATH; "
+        cmd += self.parent.path.script("kafka-run-class.sh", node) + " org.apache.kafka.tools." + self.java_class_name
+        return cmd
+
+    def pids (self, node):
+        """ :return: pid(s) for this client intstance on node """
+        try:
+            cmd = "jps | grep -i " + self.java_class_name + " | awk '{print $1}'"
+            pid_arr = [pid for pid in node.account.ssh_capture(cmd, allow_fail=True, callback=int)]
+            return pid_arr
+        except (RemoteCommandError, ValueError) as e:
+            return []
+
+
+class VerifiableClientDummy (VerifiableClientMixin):
+    """
+    Dummy class for testing the pluggable framework
+    """
+    def __init__(self, parent, conf=None):
+        """
+        :param parent: The parent instance, either VerifiableConsumer or VerifiableProducer
+        :param conf: Optional conf object (the --globals VerifiableX object)
+        """
+        super(VerifiableClientDummy, self).__init__()
+        self.parent = parent
+        self.conf = conf
+
+    def exec_cmd (self, node):
+        """ :return: command to execute to start instance """
+        return 'echo -e \'{"name": "shutdown_complete" }\n\' ; echo ARGS:'
+
+    def pids (self, node):
+        """ :return: pid(s) for this client intstance on node """
+        return []
+
+
+class VerifiableClientApp (VerifiableClientMixin):
+    """
+    VerifiableClient using --global settings for exec_cmd, pids and deploy.
+    By using this a verifiable client application can be used through simple
+    --globals configuration rather than implementing a Python class.
+    """
+
+    def __init__(self, parent, conf):
+        """
+        :param parent: The parent instance, either VerifiableConsumer or VerifiableProducer
+        :param conf: Optional conf object (the --globals VerifiableX object)
+        """
+        super(VerifiableClientApp, self).__init__()
+        self.parent = parent
+        # "VerifiableConsumer" or "VerifiableProducer"
+        self.name = self.parent.__class__.__name__
+        self.conf = conf
+
+        if "exec_cmd" not in self.conf:
+            raise SyntaxError("%s requires \"exec_cmd\": .. to be set in --globals %s object" % \
+                              (self.__class__.__name__, self.name))
+
+    def exec_cmd (self, node):
+        """ :return: command to execute to start instance """
+        self.deploy(node)
+        return self.conf["exec_cmd"]
+
+    def pids (self, node):
+        """ :return: pid(s) for this client intstance on node """
+
+        cmd = self.conf.get("pids", "pgrep -f '" + self.conf["exec_cmd"] + "'")
+        try:
+            pid_arr = [pid for pid in node.account.ssh_capture(cmd, allow_fail=True, callback=int)]
+            self.parent.context.logger.info("%s pids are: %s" % (str(node.account), pid_arr))
+            return pid_arr
+        except (subprocess.CalledProcessError, ValueError) as e:
+            return []
+
+    def deploy (self, node):
+        """ Call deploy script specified by "deploy" --global key
+            This optional script is run on the VM instance just prior to
+            executing `exec_cmd` to deploy the kafkatest client.
+            The script path must be as seen by the VM instance, e.g. /vagrant/.... """
+
+        if "deploy" not in self.conf:
+            return
+
+        script_cmd = self.conf["deploy"]
+        self.parent.context.logger.debug("Deploying %s: %s" % (self, script_cmd))
+        r = node.account.ssh(script_cmd)
--- a/tests/kafkatest/services/verifiable_consumer.py
+++ b/tests/kafkatest/services/verifiable_consumer.py
@@ -0,0 +1,418 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+from ducktape.services.background_thread import BackgroundThreadService
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from kafkatest.services.kafka import TopicPartition
+from kafkatest.services.verifiable_client import VerifiableClientMixin
+from kafkatest.version import DEV_BRANCH, V_2_3_0, V_2_3_1, V_0_10_0_0
+
+
+class ConsumerState:
+    Started = 1
+    Dead = 2
+    Rebalancing = 3
+    Joined = 4
+
+
+class ConsumerEventHandler(object):
+
+    def __init__(self, node, verify_offsets, idx):
+        self.node = node
+        self.idx = idx
+        self.state = ConsumerState.Dead
+        self.revoked_count = 0
+        self.assigned_count = 0
+        self.assignment = []
+        self.position = {}
+        self.committed = {}
+        self.total_consumed = 0
+        self.verify_offsets = verify_offsets
+
+    def handle_shutdown_complete(self):
+        self.state = ConsumerState.Dead
+        self.assignment = []
+        self.position = {}
+
+    def handle_startup_complete(self):
+        self.state = ConsumerState.Started
+
+    def handle_offsets_committed(self, event, node, logger):
+        if event["success"]:
+            for offset_commit in event["offsets"]:
+                if offset_commit.get("error", "") != "":
+                    logger.debug("%s: Offset commit failed for: %s" % (str(node.account), offset_commit))
+                    continue
+
+                topic = offset_commit["topic"]
+                partition = offset_commit["partition"]
+                tp = TopicPartition(topic, partition)
+                offset = offset_commit["offset"]
+                assert tp in self.assignment, \
+                    "Committed offsets for partition %s not assigned (current assignment: %s)" % \
+                    (str(tp), str(self.assignment))
+                assert tp in self.position, "No previous position for %s: %s" % (str(tp), event)
+                assert self.position[tp] >= offset, \
+                    "The committed offset %d was greater than the current position %d for partition %s" % \
+                    (offset, self.position[tp], str(tp))
+                self.committed[tp] = offset
+
+    def handle_records_consumed(self, event, logger):
+        assert self.state == ConsumerState.Joined, \
+            "Consumed records should only be received when joined (current state: %s)" % str(self.state)
+
+        for record_batch in event["partitions"]:
+            tp = TopicPartition(topic=record_batch["topic"],
+                                partition=record_batch["partition"])
+            min_offset = record_batch["minOffset"]
+            max_offset = record_batch["maxOffset"]
+
+            assert tp in self.assignment, \
+                "Consumed records for partition %s which is not assigned (current assignment: %s)" % \
+                (str(tp), str(self.assignment))
+            if tp not in self.position or self.position[tp] == min_offset:
+                self.position[tp] = max_offset + 1
+            else:
+                msg = "Consumed from an unexpected offset (%d, %d) for partition %s" % \
+                      (self.position.get(tp), min_offset, str(tp))
+                if self.verify_offsets:
+                    raise AssertionError(msg)
+                else:
+                    if tp in self.position:
+                        self.position[tp] = max_offset + 1
+                    logger.warn(msg)
+            self.total_consumed += event["count"]
+
+    def handle_partitions_revoked(self, event):
+        self.revoked_count += 1
+        self.state = ConsumerState.Rebalancing
+        self.position = {}
+
+    def handle_partitions_assigned(self, event):
+        self.assigned_count += 1
+        self.state = ConsumerState.Joined
+        assignment = []
+        for topic_partition in event["partitions"]:
+            topic = topic_partition["topic"]
+            partition = topic_partition["partition"]
+            assignment.append(TopicPartition(topic, partition))
+        self.assignment = assignment
+
+    def handle_kill_process(self, clean_shutdown):
+        # if the shutdown was clean, then we expect the explicit
+        # shutdown event from the consumer
+        if not clean_shutdown:
+            self.handle_shutdown_complete()
+
+    def current_assignment(self):
+        return list(self.assignment)
+
+    def current_position(self, tp):
+        if tp in self.position:
+            return self.position[tp]
+        else:
+            return None
+
+    def last_commit(self, tp):
+        if tp in self.committed:
+            return self.committed[tp]
+        else:
+            return None
+
+
+class VerifiableConsumer(KafkaPathResolverMixin, VerifiableClientMixin, BackgroundThreadService):
+    """This service wraps org.apache.kafka.tools.VerifiableConsumer for use in
+    system testing. 
+    
+    NOTE: this class should be treated as a PUBLIC API. Downstream users use
+    this service both directly and through class extension, so care must be 
+    taken to ensure compatibility.
+    """
+
+    PERSISTENT_ROOT = "/mnt/verifiable_consumer"
+    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "verifiable_consumer.stdout")
+    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "verifiable_consumer.stderr")
+    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
+    LOG_FILE = os.path.join(LOG_DIR, "verifiable_consumer.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "verifiable_consumer.properties")
+
+    logs = {
+        "verifiable_consumer_stdout": {
+            "path": STDOUT_CAPTURE,
+            "collect_default": False},
+        "verifiable_consumer_stderr": {
+            "path": STDERR_CAPTURE,
+            "collect_default": False},
+        "verifiable_consumer_log": {
+            "path": LOG_FILE,
+            "collect_default": True}
+        }
+
+    def __init__(self, context, num_nodes, kafka, topic, group_id,
+                 static_membership=False, max_messages=-1, session_timeout_sec=30, enable_autocommit=False,
+                 assignment_strategy=None,
+                 version=DEV_BRANCH, stop_timeout_sec=30, log_level="INFO", jaas_override_variables=None,
+                 on_record_consumed=None, reset_policy="earliest", verify_offsets=True):
+        """
+        :param jaas_override_variables: A dict of variables to be used in the jaas.conf template file
+        """
+        super(VerifiableConsumer, self).__init__(context, num_nodes)
+        self.log_level = log_level
+        self.kafka = kafka
+        self.topic = topic
+        self.group_id = group_id
+        self.reset_policy = reset_policy
+        self.static_membership = static_membership
+        self.max_messages = max_messages
+        self.session_timeout_sec = session_timeout_sec
+        self.enable_autocommit = enable_autocommit
+        self.assignment_strategy = assignment_strategy
+        self.prop_file = ""
+        self.stop_timeout_sec = stop_timeout_sec
+        self.on_record_consumed = on_record_consumed
+        self.verify_offsets = verify_offsets
+
+        self.event_handlers = {}
+        self.global_position = {}
+        self.global_committed = {}
+        self.jaas_override_variables = jaas_override_variables or {}
+
+        for node in self.nodes:
+            node.version = version
+
+    def java_class_name(self):
+        return "VerifiableConsumer"
+
+    def _worker(self, idx, node):
+        with self.lock:
+            if node not in self.event_handlers:
+                self.event_handlers[node] = ConsumerEventHandler(node, self.verify_offsets, idx)
+            handler = self.event_handlers[node]
+
+        node.account.ssh("mkdir -p %s" % VerifiableConsumer.PERSISTENT_ROOT, allow_fail=False)
+
+        # Create and upload log properties
+        log_config = self.render('tools_log4j.properties', log_file=VerifiableConsumer.LOG_FILE)
+        node.account.create_file(VerifiableConsumer.LOG4J_CONFIG, log_config)
+
+        # Create and upload config file
+        self.security_config = self.kafka.security_config.client_config(self.prop_file, node,
+                                                                        self.jaas_override_variables)
+        self.security_config.setup_node(node)
+        self.prop_file += str(self.security_config)
+        self.logger.info("verifiable_consumer.properties:")
+        self.logger.info(self.prop_file)
+        node.account.create_file(VerifiableConsumer.CONFIG_FILE, self.prop_file)
+        self.security_config.setup_node(node)
+        # apply group.instance.id to the node for static membership validation
+        node.group_instance_id = None
+        if self.static_membership:
+            assert node.version >= V_2_3_0, \
+                "Version %s does not support static membership (must be 2.3 or higher)" % str(node.version)
+            node.group_instance_id = self.group_id + "-instance-" + str(idx)
+
+        if self.assignment_strategy:
+            assert node.version >= V_0_10_0_0, \
+                "Version %s does not setting an assignment strategy (must be 0.10.0 or higher)" % str(node.version)
+
+        cmd = self.start_cmd(node)
+        self.logger.debug("VerifiableConsumer %d command: %s" % (idx, cmd))
+
+        for line in node.account.ssh_capture(cmd):
+            event = self.try_parse_json(node, line.strip())
+            if event is not None:
+                with self.lock:
+                    name = event["name"]
+                    if name == "shutdown_complete":
+                        handler.handle_shutdown_complete()
+                    elif name == "startup_complete":
+                        handler.handle_startup_complete()
+                    elif name == "offsets_committed":
+                        handler.handle_offsets_committed(event, node, self.logger)
+                        self._update_global_committed(event)
+                    elif name == "records_consumed":
+                        handler.handle_records_consumed(event, self.logger)
+                        self._update_global_position(event, node)
+                    elif name == "record_data" and self.on_record_consumed:
+                        self.on_record_consumed(event, node)
+                    elif name == "partitions_revoked":
+                        handler.handle_partitions_revoked(event)
+                    elif name == "partitions_assigned":
+                        handler.handle_partitions_assigned(event)
+                    else:
+                        self.logger.debug("%s: ignoring unknown event: %s" % (str(node.account), event))
+
+    def _update_global_position(self, consumed_event, node):
+        for consumed_partition in consumed_event["partitions"]:
+            tp = TopicPartition(consumed_partition["topic"], consumed_partition["partition"])
+            if tp in self.global_committed:
+                # verify that the position never gets behind the current commit.
+                if self.global_committed[tp] > consumed_partition["minOffset"]:
+                    msg = "Consumed position %d is behind the current committed offset %d for partition %s" % \
+                          (consumed_partition["minOffset"], self.global_committed[tp], str(tp))
+                    if self.verify_offsets:
+                        raise AssertionError(msg)
+                    else:
+                        self.logger.warn(msg)
+
+            # the consumer cannot generally guarantee that the position increases monotonically
+            # without gaps in the face of hard failures, so we only log a warning when this happens
+            if tp in self.global_position and self.global_position[tp] != consumed_partition["minOffset"]:
+                self.logger.warn("%s: Expected next consumed offset of %d for partition %s, but instead saw %d" %
+                                 (str(node.account), self.global_position[tp], str(tp), consumed_partition["minOffset"]))
+
+            self.global_position[tp] = consumed_partition["maxOffset"] + 1
+
+    def _update_global_committed(self, commit_event):
+        if commit_event["success"]:
+            for offset_commit in commit_event["offsets"]:
+                tp = TopicPartition(offset_commit["topic"], offset_commit["partition"])
+                offset = offset_commit["offset"]
+                assert self.global_position[tp] >= offset, \
+                    "Committed offset %d for partition %s is ahead of the current position %d" % \
+                    (offset, str(tp), self.global_position[tp])
+                self.global_committed[tp] = offset
+
+    def start_cmd(self, node):
+        cmd = ""
+        cmd += "export LOG_DIR=%s;" % VerifiableConsumer.LOG_DIR
+        cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts
+        cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % VerifiableConsumer.LOG4J_CONFIG
+        cmd += self.impl.exec_cmd(node)
+        if self.on_record_consumed:
+            cmd += " --verbose"
+
+        if node.group_instance_id:
+            cmd += " --group-instance-id %s" % node.group_instance_id
+        elif node.version == V_2_3_0 or node.version == V_2_3_1:
+            # In 2.3, --group-instance-id was required, but would be left empty
+            # if `None` is passed as the argument value
+            cmd += " --group-instance-id None"
+
+        if self.assignment_strategy:
+            cmd += " --assignment-strategy %s" % self.assignment_strategy
+
+        if self.enable_autocommit:
+            cmd += " --enable-autocommit "
+
+        cmd += " --reset-policy %s --group-id %s --topic %s --broker-list %s --session-timeout %s" % \
+               (self.reset_policy, self.group_id, self.topic,
+                self.kafka.bootstrap_servers(self.security_config.security_protocol),
+                self.session_timeout_sec*1000)
+               
+        if self.max_messages > 0:
+            cmd += " --max-messages %s" % str(self.max_messages)
+
+        cmd += " --consumer.config %s" % VerifiableConsumer.CONFIG_FILE
+        cmd += " 2>> %s | tee -a %s &" % (VerifiableConsumer.STDOUT_CAPTURE, VerifiableConsumer.STDOUT_CAPTURE)
+        return cmd
+
+    def pids(self, node):
+        return self.impl.pids(node)
+
+    def try_parse_json(self, node, string):
+        """Try to parse a string as json. Return None if not parseable."""
+        try:
+            return json.loads(string)
+        except ValueError:
+            self.logger.debug("%s: Could not parse as json: %s" % (str(node.account), str(string)))
+            return None
+
+    def stop_all(self):
+        for node in self.nodes:
+            self.stop_node(node)
+
+    def kill_node(self, node, clean_shutdown=True, allow_fail=False):
+        sig = self.impl.kill_signal(clean_shutdown)
+        for pid in self.pids(node):
+            node.account.signal(pid, sig, allow_fail)
+
+        with self.lock:
+            self.event_handlers[node].handle_kill_process(clean_shutdown)
+
+    def stop_node(self, node, clean_shutdown=True):
+        self.kill_node(node, clean_shutdown=clean_shutdown)
+
+        stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec)
+        assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \
+                        (str(node.account), str(self.stop_timeout_sec))
+
+    def clean_node(self, node):
+        self.kill_node(node, clean_shutdown=False)
+        node.account.ssh("rm -rf " + self.PERSISTENT_ROOT, allow_fail=False)
+        self.security_config.clean_node(node)
+
+    def current_assignment(self):
+        with self.lock:
+            return { handler.node: handler.current_assignment() for handler in self.event_handlers.itervalues() }
+
+    def current_position(self, tp):
+        with self.lock:
+            if tp in self.global_position:
+                return self.global_position[tp]
+            else:
+                return None
+
+    def owner(self, tp):
+        with self.lock:
+            for handler in self.event_handlers.itervalues():
+                if tp in handler.current_assignment():
+                    return handler.node
+            return None
+
+    def last_commit(self, tp):
+        with self.lock:
+            if tp in self.global_committed:
+                return self.global_committed[tp]
+            else:
+                return None
+
+    def total_consumed(self):
+        with self.lock:
+            return sum(handler.total_consumed for handler in self.event_handlers.itervalues())
+
+    def num_rebalances(self):
+        with self.lock:
+            return max(handler.assigned_count for handler in self.event_handlers.itervalues())
+
+    def num_revokes_for_alive(self, keep_alive=1):
+        with self.lock:
+            return max([handler.revoked_count for handler in self.event_handlers.itervalues()
+                       if handler.idx <= keep_alive])
+
+    def joined_nodes(self):
+        with self.lock:
+            return [handler.node for handler in self.event_handlers.itervalues()
+                    if handler.state == ConsumerState.Joined]
+
+    def rebalancing_nodes(self):
+        with self.lock:
+            return [handler.node for handler in self.event_handlers.itervalues()
+                    if handler.state == ConsumerState.Rebalancing]
+
+    def dead_nodes(self):
+        with self.lock:
+            return [handler.node for handler in self.event_handlers.itervalues()
+                    if handler.state == ConsumerState.Dead]
+
+    def alive_nodes(self):
+        with self.lock:
+            return [handler.node for handler in self.event_handlers.itervalues()
+                    if handler.state != ConsumerState.Dead]
--- a/tests/kafkatest/services/verifiable_producer.py
+++ b/tests/kafkatest/services/verifiable_producer.py
@@ -0,0 +1,315 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+
+import time
+from ducktape.cluster.remoteaccount import RemoteCommandError
+from ducktape.services.background_thread import BackgroundThreadService
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from kafkatest.services.kafka import TopicPartition
+from kafkatest.services.verifiable_client import VerifiableClientMixin
+from kafkatest.utils import is_int, is_int_with_prefix
+from kafkatest.version import DEV_BRANCH
+
+
+class VerifiableProducer(KafkaPathResolverMixin, VerifiableClientMixin, BackgroundThreadService):
+    """This service wraps org.apache.kafka.tools.VerifiableProducer for use in
+    system testing.
+
+    NOTE: this class should be treated as a PUBLIC API. Downstream users use
+    this service both directly and through class extension, so care must be
+    taken to ensure compatibility.
+    """
+
+    PERSISTENT_ROOT = "/mnt/verifiable_producer"
+    STDOUT_CAPTURE = os.path.join(PERSISTENT_ROOT, "verifiable_producer.stdout")
+    STDERR_CAPTURE = os.path.join(PERSISTENT_ROOT, "verifiable_producer.stderr")
+    LOG_DIR = os.path.join(PERSISTENT_ROOT, "logs")
+    LOG_FILE = os.path.join(LOG_DIR, "verifiable_producer.log")
+    LOG4J_CONFIG = os.path.join(PERSISTENT_ROOT, "tools-log4j.properties")
+    CONFIG_FILE = os.path.join(PERSISTENT_ROOT, "verifiable_producer.properties")
+
+    logs = {
+        "verifiable_producer_stdout": {
+            "path": STDOUT_CAPTURE,
+            "collect_default": False},
+        "verifiable_producer_stderr": {
+            "path": STDERR_CAPTURE,
+            "collect_default": False},
+        "verifiable_producer_log": {
+            "path": LOG_FILE,
+            "collect_default": True}
+        }
+
+    def __init__(self, context, num_nodes, kafka, topic, max_messages=-1, throughput=100000,
+                 message_validator=is_int, compression_types=None, version=DEV_BRANCH, acks=None,
+                 stop_timeout_sec=150, request_timeout_sec=30, log_level="INFO",
+                 enable_idempotence=False, offline_nodes=[], create_time=-1, repeating_keys=None,
+                 jaas_override_variables=None, kafka_opts_override="", client_prop_file_override="",
+                 retries=None):
+        """
+        Args:
+            :param max_messages                number of messages to be produced per producer
+            :param message_validator           checks for an expected format of messages produced. There are
+                                               currently two:
+                                               * is_int is an integer format; this is default and expected to be used if
+                                                 num_nodes = 1
+                                               * is_int_with_prefix recommended if num_nodes > 1, because otherwise each producer
+                                                 will produce exactly same messages, and validation may miss missing messages.
+            :param compression_types           If None, all producers will not use compression; or a list of compression types,
+                                               one per producer (could be "none").
+            :param jaas_override_variables     A dict of variables to be used in the jaas.conf template file
+            :param kafka_opts_override         Override parameters of the KAFKA_OPTS environment variable
+            :param client_prop_file_override   Override client.properties file used by the consumer
+        """
+        super(VerifiableProducer, self).__init__(context, num_nodes)
+        self.log_level = log_level
+
+        self.kafka = kafka
+        self.topic = topic
+        self.max_messages = max_messages
+        self.throughput = throughput
+        self.message_validator = message_validator
+        self.compression_types = compression_types
+        if self.compression_types is not None:
+            assert len(self.compression_types) == num_nodes, "Specify one compression type per node"
+
+        for node in self.nodes:
+            node.version = version
+        self.acked_values = []
+        self.acked_values_by_partition = {}
+        self._last_acked_offsets = {}
+        self.not_acked_values = []
+        self.produced_count = {}
+        self.clean_shutdown_nodes = set()
+        self.acks = acks
+        self.stop_timeout_sec = stop_timeout_sec
+        self.request_timeout_sec = request_timeout_sec
+        self.enable_idempotence = enable_idempotence
+        self.offline_nodes = offline_nodes
+        self.create_time = create_time
+        self.repeating_keys = repeating_keys
+        self.jaas_override_variables = jaas_override_variables or {}
+        self.kafka_opts_override = kafka_opts_override
+        self.client_prop_file_override = client_prop_file_override
+        self.retries = retries
+
+    def java_class_name(self):
+        return "VerifiableProducer"
+
+    def prop_file(self, node):
+        idx = self.idx(node)
+        prop_file = self.render('producer.properties', request_timeout_ms=(self.request_timeout_sec * 1000))
+        prop_file += "\n{}".format(str(self.security_config))
+        if self.compression_types is not None:
+            compression_index = idx - 1
+            self.logger.info("VerifiableProducer (index = %d) will use compression type = %s", idx,
+                             self.compression_types[compression_index])
+            prop_file += "\ncompression.type=%s\n" % self.compression_types[compression_index]
+        return prop_file
+
+    def _worker(self, idx, node):
+        node.account.ssh("mkdir -p %s" % VerifiableProducer.PERSISTENT_ROOT, allow_fail=False)
+
+        # Create and upload log properties
+        log_config = self.render('tools_log4j.properties', log_file=VerifiableProducer.LOG_FILE)
+        node.account.create_file(VerifiableProducer.LOG4J_CONFIG, log_config)
+
+        # Configure security
+        self.security_config = self.kafka.security_config.client_config(node=node,
+                                                                        jaas_override_variables=self.jaas_override_variables)
+        self.security_config.setup_node(node)
+
+        # Create and upload config file
+        if self.client_prop_file_override:
+            producer_prop_file = self.client_prop_file_override
+        else:
+            producer_prop_file = self.prop_file(node)
+
+        if self.acks is not None:
+            self.logger.info("VerifiableProducer (index = %d) will use acks = %s", idx, self.acks)
+            producer_prop_file += "\nacks=%s\n" % self.acks
+
+        if self.enable_idempotence:
+            self.logger.info("Setting up an idempotent producer")
+            producer_prop_file += "\nmax.in.flight.requests.per.connection=5\n"
+            producer_prop_file += "\nretries=1000000\n"
+            producer_prop_file += "\nenable.idempotence=true\n"
+        elif self.retries is not None:
+            self.logger.info("VerifiableProducer (index = %d) will use retries = %s", idx, self.retries)
+            producer_prop_file += "\nretries=%s\n" % self.retries
+            producer_prop_file += "\ndelivery.timeout.ms=%s\n" % (self.request_timeout_sec * 1000 * self.retries)
+
+        self.logger.info("verifiable_producer.properties:")
+        self.logger.info(producer_prop_file)
+        node.account.create_file(VerifiableProducer.CONFIG_FILE, producer_prop_file)
+
+        cmd = self.start_cmd(node, idx)
+        self.logger.debug("VerifiableProducer %d command: %s" % (idx, cmd))
+
+        self.produced_count[idx] = 0
+        last_produced_time = time.time()
+        prev_msg = None
+
+        for line in node.account.ssh_capture(cmd):
+            line = line.strip()
+
+            data = self.try_parse_json(line)
+            if data is not None:
+
+                with self.lock:
+                    if data["name"] == "producer_send_error":
+                        data["node"] = idx
+                        self.not_acked_values.append(self.message_validator(data["value"]))
+                        self.produced_count[idx] += 1
+
+                    elif data["name"] == "producer_send_success":
+                        partition = TopicPartition(data["topic"], data["partition"])
+                        value = self.message_validator(data["value"])
+                        self.acked_values.append(value)
+
+                        if partition not in self.acked_values_by_partition:
+                            self.acked_values_by_partition[partition] = []
+                        self.acked_values_by_partition[partition].append(value)
+
+                        self._last_acked_offsets[partition] = data["offset"]
+                        self.produced_count[idx] += 1
+
+                        # Log information if there is a large gap between successively acknowledged messages
+                        t = time.time()
+                        time_delta_sec = t - last_produced_time
+                        if time_delta_sec > 2 and prev_msg is not None:
+                            self.logger.debug(
+                                "Time delta between successively acked messages is large: " +
+                                "delta_t_sec: %s, prev_message: %s, current_message: %s" % (str(time_delta_sec), str(prev_msg), str(data)))
+
+                        last_produced_time = t
+                        prev_msg = data
+
+                    elif data["name"] == "shutdown_complete":
+                        if node in self.clean_shutdown_nodes:
+                            raise Exception("Unexpected shutdown event from producer, already shutdown. Producer index: %d" % idx)
+                        self.clean_shutdown_nodes.add(node)
+
+    def _has_output(self, node):
+        """Helper used as a proxy to determine whether jmx is running by that jmx_tool_log contains output."""
+        try:
+            node.account.ssh("test -z \"$(cat %s)\"" % VerifiableProducer.STDOUT_CAPTURE, allow_fail=False)
+            return False
+        except RemoteCommandError:
+            return True
+
+    def start_cmd(self, node, idx):
+        cmd  = "export LOG_DIR=%s;" % VerifiableProducer.LOG_DIR
+        if self.kafka_opts_override:
+            cmd += " export KAFKA_OPTS=\"%s\";" % self.kafka_opts_override
+        else:
+            cmd += " export KAFKA_OPTS=%s;" % self.security_config.kafka_opts
+
+        cmd += " export KAFKA_LOG4J_OPTS=\"-Dlog4j.configuration=file:%s\"; " % VerifiableProducer.LOG4J_CONFIG
+        cmd += self.impl.exec_cmd(node)
+        cmd += " --topic %s --broker-list %s" % (self.topic, self.kafka.bootstrap_servers(self.security_config.security_protocol, True, self.offline_nodes))
+        if self.max_messages > 0:
+            cmd += " --max-messages %s" % str(self.max_messages)
+        if self.throughput > 0:
+            cmd += " --throughput %s" % str(self.throughput)
+        if self.message_validator == is_int_with_prefix:
+            cmd += " --value-prefix %s" % str(idx)
+        if self.acks is not None:
+            cmd += " --acks %s " % str(self.acks)
+        if self.create_time > -1:
+            cmd += " --message-create-time %s " % str(self.create_time)
+        if self.repeating_keys is not None:
+            cmd += " --repeating-keys %s " % str(self.repeating_keys)
+
+        cmd += " --producer.config %s" % VerifiableProducer.CONFIG_FILE
+
+        cmd += " 2>> %s | tee -a %s &" % (VerifiableProducer.STDOUT_CAPTURE, VerifiableProducer.STDOUT_CAPTURE)
+        return cmd
+
+    def kill_node(self, node, clean_shutdown=True, allow_fail=False):
+        sig = self.impl.kill_signal(clean_shutdown)
+        for pid in self.pids(node):
+            node.account.signal(pid, sig, allow_fail)
+
+    def pids(self, node):
+        return self.impl.pids(node)
+
+    def alive(self, node):
+        return len(self.pids(node)) > 0
+
+    @property
+    def last_acked_offsets(self):
+        with self.lock:
+            return self._last_acked_offsets
+
+    @property
+    def acked(self):
+        with self.lock:
+            return self.acked_values
+
+    @property
+    def acked_by_partition(self):
+        with self.lock:
+            return self.acked_values_by_partition
+
+    @property
+    def not_acked(self):
+        with self.lock:
+            return self.not_acked_values
+
+    @property
+    def num_acked(self):
+        with self.lock:
+            return len(self.acked_values)
+
+    @property
+    def num_not_acked(self):
+        with self.lock:
+            return len(self.not_acked_values)
+
+    def each_produced_at_least(self, count):
+        with self.lock:
+            for idx in range(1, self.num_nodes + 1):
+                if self.produced_count.get(idx) is None or self.produced_count[idx] < count:
+                    return False
+            return True
+
+    def stop_node(self, node):
+        # There is a race condition on shutdown if using `max_messages` since the
+        # VerifiableProducer will shutdown automatically when all messages have been
+        # written. In this case, the process will be gone and the signal will fail.
+        allow_fail = self.max_messages > 0
+        self.kill_node(node, clean_shutdown=True, allow_fail=allow_fail)
+
+        stopped = self.wait_node(node, timeout_sec=self.stop_timeout_sec)
+        assert stopped, "Node %s: did not stop within the specified timeout of %s seconds" % \
+                        (str(node.account), str(self.stop_timeout_sec))
+
+    def clean_node(self, node):
+        self.kill_node(node, clean_shutdown=False, allow_fail=False)
+        node.account.ssh("rm -rf " + self.PERSISTENT_ROOT, allow_fail=False)
+        self.security_config.clean_node(node)
+
+    def try_parse_json(self, string):
+        """Try to parse a string as json. Return None if not parseable."""
+        try:
+            record = json.loads(string)
+            return record
+        except ValueError:
+            self.logger.debug("Could not parse as json: %s" % str(string))
+            return None
--- a/tests/kafkatest/services/zookeeper.py
+++ b/tests/kafkatest/services/zookeeper.py
@@ -0,0 +1,251 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import re
+import time
+
+from ducktape.services.service import Service
+from ducktape.utils.util import wait_until
+from ducktape.cluster.remoteaccount import RemoteCommandError
+
+from kafkatest.directory_layout.kafka_path import KafkaPathResolverMixin
+from kafkatest.services.security.security_config import SecurityConfig
+from kafkatest.version import DEV_BRANCH
+
+
+class ZookeeperService(KafkaPathResolverMixin, Service):
+    ROOT = "/mnt/zookeeper"
+    DATA = os.path.join(ROOT, "data")
+    HEAP_DUMP_FILE = os.path.join(ROOT, "zk_heap_dump.bin")
+
+    logs = {
+        "zk_log": {
+            "path": "%s/zk.log" % ROOT,
+            "collect_default": True},
+        "zk_data": {
+            "path": DATA,
+            "collect_default": False},
+        "zk_heap_dump_file": {
+            "path": HEAP_DUMP_FILE,
+            "collect_default": True}
+    }
+
+    def __init__(self, context, num_nodes, zk_sasl = False, zk_client_port = True, zk_client_secure_port = False,
+                 zk_tls_encrypt_only = False):
+        """
+        :type context
+        """
+        self.kafka_opts = ""
+        self.zk_sasl = zk_sasl
+        if not zk_client_port and not zk_client_secure_port:
+            raise Exception("Cannot disable both ZK clientPort and clientSecurePort")
+        self.zk_client_port = zk_client_port
+        self.zk_client_secure_port = zk_client_secure_port
+        self.zk_tls_encrypt_only = zk_tls_encrypt_only
+        super(ZookeeperService, self).__init__(context, num_nodes)
+
+    @property
+    def security_config(self):
+        return SecurityConfig(self.context, zk_sasl=self.zk_sasl, zk_tls=self.zk_client_secure_port)
+
+    @property
+    def security_system_properties(self):
+        return "-Dzookeeper.authProvider.sasl=org.apache.zookeeper.server.auth.SASLAuthenticationProvider " \
+               "-DjaasLoginRenew=3600000 " \
+               "-Djava.security.auth.login.config=%s " \
+               "-Djava.security.krb5.conf=%s " % (self.security_config.JAAS_CONF_PATH, self.security_config.KRB5CONF_PATH)
+
+    @property
+    def zk_principals(self):
+        return " zkclient "  + ' '.join(['zookeeper/' + zk_node.account.hostname for zk_node in self.nodes])
+
+    def restart_cluster(self):
+        for node in self.nodes:
+            self.restart_node(node)
+
+    def restart_node(self, node):
+        """Restart the given node."""
+        self.stop_node(node)
+        self.start_node(node)
+
+    def start_node(self, node):
+        idx = self.idx(node)
+        self.logger.info("Starting ZK node %d on %s", idx, node.account.hostname)
+
+        node.account.ssh("mkdir -p %s" % ZookeeperService.DATA)
+        node.account.ssh("echo %d > %s/myid" % (idx, ZookeeperService.DATA))
+
+        self.security_config.setup_node(node)
+        config_file = self.render('zookeeper.properties')
+        self.logger.info("zookeeper.properties:")
+        self.logger.info(config_file)
+        node.account.create_file("%s/zookeeper.properties" % ZookeeperService.ROOT, config_file)
+
+        heap_kafka_opts = "-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=%s" % self.logs["zk_heap_dump_file"]["path"]
+        other_kafka_opts = self.kafka_opts + ' ' + self.security_system_properties \
+            if self.security_config.zk_sasl else self.kafka_opts
+        start_cmd = "export KAFKA_OPTS=\"%s %s\";" % (heap_kafka_opts, other_kafka_opts)
+        start_cmd += "%s " % self.path.script("zookeeper-server-start.sh", node)
+        start_cmd += "%s/zookeeper.properties &>> %s &" % (ZookeeperService.ROOT, self.logs["zk_log"]["path"])
+        node.account.ssh(start_cmd)
+
+        wait_until(lambda: self.listening(node), timeout_sec=30, err_msg="Zookeeper node failed to start")
+
+    def listening(self, node):
+        try:
+            port = 2181 if self.zk_client_port else 2182
+            cmd = "nc -z %s %s" % (node.account.hostname, port)
+            node.account.ssh_output(cmd, allow_fail=False)
+            self.logger.debug("Zookeeper started accepting connections at: '%s:%s')", node.account.hostname, port)
+            return True
+        except (RemoteCommandError, ValueError) as e:
+            return False
+
+    def pids(self, node):
+        return node.account.java_pids(self.java_class_name())
+
+    def alive(self, node):
+        return len(self.pids(node)) > 0
+
+    def stop_node(self, node):
+        idx = self.idx(node)
+        self.logger.info("Stopping %s node %d on %s" % (type(self).__name__, idx, node.account.hostname))
+        node.account.kill_java_processes(self.java_class_name(), allow_fail=False)
+        node.account.kill_java_processes(self.java_cli_class_name(), allow_fail=False)
+        wait_until(lambda: not self.alive(node), timeout_sec=5, err_msg="Timed out waiting for zookeeper to stop.")
+
+    def clean_node(self, node):
+        self.logger.info("Cleaning ZK node %d on %s", self.idx(node), node.account.hostname)
+        if self.alive(node):
+            self.logger.warn("%s %s was still alive at cleanup time. Killing forcefully..." %
+                             (self.__class__.__name__, node.account))
+        node.account.kill_java_processes(self.java_class_name(),
+                                         clean_shutdown=False, allow_fail=True)
+        node.account.kill_java_processes(self.java_cli_class_name(),
+                                         clean_shutdown=False, allow_fail=False)
+        node.account.ssh("rm -rf -- %s" % ZookeeperService.ROOT, allow_fail=False)
+
+
+    # force_tls is a necessary option for the case where we define both encrypted and non-encrypted ports
+    def connect_setting(self, chroot=None, force_tls=False):
+        if chroot and not chroot.startswith("/"):
+            raise Exception("ZK chroot must start with '/', invalid chroot: %s" % chroot)
+
+        chroot = '' if chroot is None else chroot
+        return ','.join([node.account.hostname + (':2182' if not self.zk_client_port or force_tls else ':2181') + chroot
+                         for node in self.nodes])
+
+    def zkTlsConfigFileOption(self, forZooKeeperMain=False):
+        if not self.zk_client_secure_port:
+            return ""
+        return ("-zk-tls-config-file " if forZooKeeperMain else "--zk-tls-config-file ") + \
+               (SecurityConfig.ZK_CLIENT_TLS_ENCRYPT_ONLY_CONFIG_PATH if self.zk_tls_encrypt_only else SecurityConfig.ZK_CLIENT_MUTUAL_AUTH_CONFIG_PATH)
+
+    #
+    # This call is used to simulate a rolling upgrade to enable/disable
+    # the use of ZooKeeper ACLs.
+    #
+    def zookeeper_migration(self, node, zk_acl):
+        la_migra_cmd = "export KAFKA_OPTS=\"%s\";" % \
+                       self.security_system_properties if self.security_config.zk_sasl else ""
+        la_migra_cmd += "%s --zookeeper.acl=%s --zookeeper.connect=%s %s" % \
+                       (self.path.script("zookeeper-security-migration.sh", node), zk_acl,
+                        self.connect_setting(force_tls=self.zk_client_secure_port),
+                        self.zkTlsConfigFileOption())
+        node.account.ssh(la_migra_cmd)
+
+    def _check_chroot(self, chroot):
+        if chroot and not chroot.startswith("/"):
+            raise Exception("ZK chroot must start with '/', invalid chroot: %s" % chroot)
+
+    def query(self, path, chroot=None):
+        """
+        Queries zookeeper for data associated with 'path' and returns all fields in the schema
+        """
+        self._check_chroot(chroot)
+
+        chroot_path = ('' if chroot is None else chroot) + path
+
+        kafka_run_class = self.path.script("kafka-run-class.sh", DEV_BRANCH)
+        cmd = "%s %s -server %s %s get %s" % \
+              (kafka_run_class, self.java_cli_class_name(), self.connect_setting(force_tls=self.zk_client_secure_port),
+               self.zkTlsConfigFileOption(True),
+               chroot_path)
+        self.logger.debug(cmd)
+
+        node = self.nodes[0]
+        result = None
+        for line in node.account.ssh_capture(cmd, allow_fail=True):
+            # loop through all lines in the output, but only hold on to the first match
+            if result is None:
+                match = re.match("^({.+})$", line)
+                if match is not None:
+                    result = match.groups()[0]
+        return result
+
+    def create(self, path, chroot=None, value=""):
+        """
+        Create an znode at the given path
+        """
+        self._check_chroot(chroot)
+
+        chroot_path = ('' if chroot is None else chroot) + path
+
+        kafka_run_class = self.path.script("kafka-run-class.sh", DEV_BRANCH)
+        cmd = "%s %s -server %s %s create %s '%s'" % \
+              (kafka_run_class, self.java_cli_class_name(), self.connect_setting(force_tls=self.zk_client_secure_port),
+               self.zkTlsConfigFileOption(True),
+               chroot_path, value)
+        self.logger.debug(cmd)
+        output = self.nodes[0].account.ssh_output(cmd)
+        self.logger.debug(output)
+
+    def describe(self, topic):
+        """
+        Describe the given topic using the ConfigCommand CLI
+        """
+
+        kafka_run_class = self.path.script("kafka-run-class.sh", DEV_BRANCH)
+        cmd = "%s kafka.admin.ConfigCommand --zookeeper %s %s --describe --topic %s" % \
+              (kafka_run_class, self.connect_setting(force_tls=self.zk_client_secure_port),
+               self.zkTlsConfigFileOption(),
+               topic)
+        self.logger.debug(cmd)
+        output = self.nodes[0].account.ssh_output(cmd)
+        self.logger.debug(output)
+
+    def list_acls(self, topic):
+        """
+        List ACLs for the given topic using the AclCommand CLI
+        """
+
+        kafka_run_class = self.path.script("kafka-run-class.sh", DEV_BRANCH)
+        cmd = "%s kafka.admin.AclCommand --authorizer-properties zookeeper.connect=%s %s --list --topic %s" % \
+              (kafka_run_class, self.connect_setting(force_tls=self.zk_client_secure_port),
+               self.zkTlsConfigFileOption(),
+               topic)
+        self.logger.debug(cmd)
+        output = self.nodes[0].account.ssh_output(cmd)
+        self.logger.debug(output)
+
+    def java_class_name(self):
+        """ The class name of the Zookeeper quorum peers. """
+        return "org.apache.zookeeper.server.quorum.QuorumPeerMain"
+
+    def java_cli_class_name(self):
+        """ The class name of the Zookeeper tool within Kafka. """
+        return "org.apache.zookeeper.ZooKeeperMainWithTlsSupportForKafka"