Add km module kafka

This commit is contained in:
leewei
2023-02-14 14:57:39 +08:00
parent 229140f067
commit 469baad65b
4310 changed files with 736354 additions and 46204 deletions

View File

@@ -0,0 +1,175 @@
![kafka-manger-logo](../../assets/images/common/logo_name.png)
**一站式`Apache Kafka`集群指标监控与运维管控平台**
---
# 3. Kafka C 示例
## 1. 申请资源
使用前请确保有下列资源,如果可以跳过!
- topic[Topic 申请](../5. Kafka 云平台——资源申请/Topic 申请.md)
- appId和password[应用申请](../5. Kafka 云平台——资源申请/应用申请.md)
- clusterId可以在Topic详情中查看
**程序启动前请确保AppID具有Topic的使用权限**
## 2. 实例
使用`librdkafka`
### 2.1 Producer发送实例
```cpp
#include <stdio.h>
#include <signal.h>
#include <string.h>
#include "rdkafka.h"
static int run = 1;
static void stop(int sig){
run = 0;
fclose(stdin);
}
/*
每条消息调用一次该回调函数,说明消息是传递成功(rkmessage->err == RD_KAFKA_RESP_ERR_NO_ERROR)
还是传递失败(rkmessage->err != RD_KAFKA_RESP_ERR_NO_ERROR)
该回调函数由rd_kafka_poll()触发,在应用程序的线程上执行
*/
static void dr_msg_cb(rd_kafka_t *rk,
const rd_kafka_message_t *rkmessage, void *opaque){
if(rkmessage->err)
fprintf(stderr, "%% Message delivery failed: %s\n",
rd_kafka_err2str(rkmessage->err));
else
fprintf(stderr,
"%% Message delivered (%zd bytes, "
"partition %"PRId32")\n",
rkmessage->len, rkmessage->partition);
/* rkmessage被librdkafka自动销毁*/
}
int main(int argc, char **argv){
rd_kafka_t *rk; /*Producer instance handle*/
rd_kafka_topic_t *rkt; /*topic对象*/
rd_kafka_conf_t *conf; /*临时配置对象*/
char errstr[512];
char buf[512];
const char *brokers;
const char *topic;
if(argc != 5){
fprintf(stderr, "%% Usage: %s <broker> <topic>\n", argv[0]);
return 1;
}
brokers = argv[1];
topic = argv[2];
char* username = argv[3];
char* password = argv[4];
/* 创建一个kafka配置占位 */
conf = rd_kafka_conf_new();
/*创建broker集群*/
if (rd_kafka_conf_set(conf, "bootstrap.servers", brokers, errstr,
sizeof(errstr)) != RD_KAFKA_CONF_OK){
fprintf(stderr, "%s\n", errstr);
return 1;
}
rd_kafka_conf_set(conf, "security.protocol", "SASL_PLAINTEXT", errstr, sizeof(errstr));
rd_kafka_conf_set(conf, "sasl.mechanisms", "PLAIN", errstr, sizeof(errstr));
rd_kafka_conf_set(conf, "sasl.username", username, errstr, sizeof(errstr));
rd_kafka_conf_set(conf, "sasl.password", password, errstr, sizeof(errstr));
rd_kafka_conf_set(conf, "api.version.request", "true", errstr, sizeof(errstr));
//开启kafka日志
rd_kafka_conf_set(conf, "debug", "topic", errstr, sizeof(errstr));
/*设置发送报告回调函数rd_kafka_produce()接收的每条消息都会调用一次该回调函数
*应用程序需要定期调用rd_kafka_poll()来服务排队的发送报告回调函数*/
rd_kafka_conf_set_dr_msg_cb(conf, dr_msg_cb);
/*创建producer实例
rd_kafka_new()获取conf对象的所有权,应用程序在此调用之后不得再次引用它*/
rk = rd_kafka_new(RD_KAFKA_PRODUCER, conf, errstr, sizeof(errstr));
if(!rk){
fprintf(stderr, "%% Failed to create new producer:%s\n", errstr);
return 1;
}
/*实例化一个或多个topics(`rd_kafka_topic_t`)来提供生产或消费topic
对象保存topic特定的配置并在内部填充所有可用分区和leader brokers*/
rkt = rd_kafka_topic_new(rk, topic, NULL);
if (!rkt){
fprintf(stderr, "%% Failed to create topic object: %s\n",
rd_kafka_err2str(rd_kafka_last_error()));
rd_kafka_destroy(rk);
return 1;
}
/*用于中断的信号*/
signal(SIGINT, stop);
fprintf(stderr,
"%% Type some text and hit enter to produce message\n"
"%% Or just hit enter to only serve delivery reports\n"
"%% Press Ctrl-C or Ctrl-D to exit\n");
while(run && fgets(buf, sizeof(buf), stdin)){
size_t len = strlen(buf);
if(buf[len-1] == '\n')
buf[--len] = '\0';
if(len == 0){
/*轮询用于事件的kafka handle
事件将导致应用程序提供的回调函数被调用
第二个参数是最大阻塞时间如果设为0将会是非阻塞的调用*/
rd_kafka_poll(rk, 0);
continue;
}
retry:
/*Send/Produce message.
这是一个异步调用在成功的情况下只会将消息排入内部producer队列
对broker的实际传递尝试由后台线程处理之前注册的传递回调函数(dr_msg_cb)
用于在消息传递成功或失败时向应用程序发回信号*/
if (rd_kafka_produce(
/* Topic object */
rkt,
/*使用内置的分区来选择分区*/
RD_KAFKA_PARTITION_UA,
/*生成payload的副本*/
RD_KAFKA_MSG_F_COPY,
/*消息体和长度*/
buf, len,
/*可选键及其长度*/
NULL, 0,
NULL) == -1){
fprintf(stderr,
"%% Failed to produce to topic %s: %s\n",
rd_kafka_topic_name(rkt),
rd_kafka_err2str(rd_kafka_last_error()));
if (rd_kafka_last_error() == RD_KAFKA_RESP_ERR__QUEUE_FULL){
/*如果内部队列满等待消息传输完成并retry,
内部队列表示要发送的消息和已发送或失败的消息,
内部队列受限于queue.buffering.max.messages配置项*/
rd_kafka_poll(rk, 1000);
goto retry;
}
}else{
fprintf(stderr, "%% Enqueued message (%zd bytes) for topic %s\n",
len, rd_kafka_topic_name(rkt));
}
/*producer应用程序应不断地通过以频繁的间隔调用rd_kafka_poll()来为
传送报告队列提供服务。在没有生成消息以确定先前生成的消息已发送了其
发送报告回调函数(和其他注册过的回调函数)期间要确保rd_kafka_poll()
仍然被调用*/
rd_kafka_poll(rk, 0);
}
fprintf(stderr, "%% Flushing final message.. \n");
/*rd_kafka_flush是rd_kafka_poll()的抽象化,
等待所有未完成的produce请求完成通常在销毁producer实例前完成
以确保所有排列中和正在传输的produce请求在销毁前完成*/
rd_kafka_flush(rk, 10*1000);
/* Destroy topic object */
rd_kafka_topic_destroy(rkt);
/* Destroy the producer instance */
rd_kafka_destroy(rk);
return 0;
}
//编译示例
//gcc producer.c ~/librdkafka/src/librdkafka.a -I ~/librdkafka/src -lm -lsasl2 -lssl -lcrypto -lcrypto -lz -ldl -lpthread -lrt
运行示例
//./a.out 10.179.24.223:9093 test2 95.kafka 12345
```

View File

@@ -0,0 +1,346 @@
# C++客户端 接入 Kafka
- 使用`librdkafka`
## 发送例子
```cpp
#include <iostream>
#include <string>
#include <cstdlib>
#include <cstdio>
#include <csignal>
#include <cstring>
#include <getopt.h>
#include "rdkafkacpp.h"
static bool run = true;
static void sigterm (int sig) {
run = false;
}
class ExampleDeliveryReportCb : public RdKafka::DeliveryReportCb {
public:
void dr_cb (RdKafka::Message &message) {
std::cout << "Message delivery for (" << message.len() << " bytes): " <<
message.errstr() << std::endl;
if (message.key())
std::cout << "Key: " << *(message.key()) << ";" << std::endl;
}
};
class ExampleEventCb : public RdKafka::EventCb {
public:
void event_cb (RdKafka::Event &event) {
switch (event.type())
{
case RdKafka::Event::EVENT_ERROR:
std::cerr << "ERROR (" << RdKafka::err2str(event.err()) << "): " <<
event.str() << std::endl;
if (event.err() == RdKafka::ERR__ALL_BROKERS_DOWN)
run = false;
break;
case RdKafka::Event::EVENT_STATS:
std::cerr << "\"STATS\": " << event.str() << std::endl;
break;
case RdKafka::Event::EVENT_LOG:
fprintf(stderr, "LOG-%i-%s: %s\n",
event.severity(), event.fac().c_str(), event.str().c_str());
break;
default:
std::cerr << "EVENT " << event.type() <<
" (" << RdKafka::err2str(event.err()) << "): " <<
event.str() << std::endl;
break;
}
}
};
int main (int argc, char **argv)
{
if (argc != 5) {
return 0;
std::cout << "missing args" << std::endl;
}
std::string brokers = argv[1];
std::string topic_str = argv[2];
std::string username = argv[3];
std::string password = argv[4];
std::string errstr;
int32_t partition = RdKafka::Topic::PARTITION_UA;
RdKafka::Conf *conf = RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL);
RdKafka::Conf *tconf = RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC);
conf->set("bootstrap.servers", brokers, errstr);
conf->set("security.protocol", "SASL_PLAINTEXT", errstr);
conf->set("sasl.mechanisms", "PLAIN", errstr);
conf->set("sasl.username", username, errstr);
conf->set("sasl.password", password, errstr);
conf->set("batch.num.messages", 40000, errstr);
conf->set("linger.ms", 2000, errstr);
conf->set("sasl.debug", "topic", errstr);
ExampleEventCb ex_event_cb;
conf->set("event_cb", &ex_event_cb, errstr);
signal(SIGINT, sigterm);
signal(SIGTERM, sigterm);
ExampleDeliveryReportCb ex_dr_cb;
conf->set("dr_cb", &ex_dr_cb, errstr);
RdKafka::Producer *producer = RdKafka::Producer::create(conf, errstr);
if (!producer) {
std::cerr << "Failed to create producer: " << errstr << std::endl;
exit(1);
}
std::cout << "% Created producer " << producer->name() << std::endl;
RdKafka::Topic *topic = RdKafka::Topic::create(producer, topic_str,
tconf, errstr);
if (!topic) {
std::cerr << "Failed to create topic: " << errstr << std::endl;
exit(1);
}
for (std::string line; run && std::getline(std::cin, line);) {
if (line.empty()) {
producer->poll(0);
continue;
}
RdKafka::ErrorCode resp =
producer->produce(topic, partition,
RdKafka::Producer::RK_MSG_COPY /* Copy payload */,
const_cast<char *>(line.c_str()), line.size(),
NULL, NULL);
if (resp != RdKafka::ERR_NO_ERROR)
std::cerr << "% Produce failed: " <<
RdKafka::err2str(resp) << std::endl;
else
std::cerr << "% Produced message (" << line.size() << " bytes)" <<
std::endl;
producer->poll(0);
}
run = true;
// 退出前处理完输出队列中的消息
while (run && producer->outq_len() > 0) {
std::cerr << "Waiting for " << producer->outq_len() << std::endl;
producer->poll(1000);
}
delete conf;
delete tconf;
delete topic;
delete producer;
RdKafka::wait_destroyed(5000);
return 0;
}
//g++ producer.cpp ~/git/librdkafka/src-cpp/librdkafka++.a ~/git/librdkafka/src/librdkafka.a -I ~/git/librdkafka/src-cpp -lm -lsasl2 -lssl -lcrypto -lcrypto -lz -ldl -lpthread -lrt
//./a.out 127.0.0.1:9093 test2 95.test0 12345
```
## 消费例子
```cpp
#include <iostream>
#include <string>
#include <cstdlib>
#include <cstdio>
#include <csignal>
#include <cstring>
#include <string>
#include <sys/time.h>
#include <getopt.h>
#include <unistd.h>
#include "rdkafkacpp.h"
static bool run = true;
static bool exit_eof = true;
static int eof_cnt = 0;
static int partition_cnt = 0;
static int verbosity = 1;
static long msg_cnt = 0;
static int64_t msg_bytes = 0;
static void sigterm (int sig) {
run = false;
}
class ExampleEventCb : public RdKafka::EventCb {
public:
void event_cb (RdKafka::Event &event) {
switch (event.type())
{
case RdKafka::Event::EVENT_ERROR:
std::cerr << "ERROR (" << RdKafka::err2str(event.err()) << "): " <<
event.str() << std::endl;
//if (event.err() == RdKafka::ERR__ALL_BROKERS_DOWN)
// run = false;
break;
case RdKafka::Event::EVENT_STATS:
std::cerr << "\"STATS\": " << event.str() << std::endl;
break;
case RdKafka::Event::EVENT_LOG:
fprintf(stderr, "LOG-%i-%s: %s\n",
event.severity(), event.fac().c_str(), event.str().c_str());
break;
case RdKafka::Event::EVENT_THROTTLE:
std::cerr << "THROTTLED: " << event.throttle_time() << "ms by " <<
event.broker_name() << " id " << (int)event.broker_id() << std::endl;
break;
default:
std::cerr << "EVENT " << event.type() <<
" (" << RdKafka::err2str(event.err()) << "): " <<
event.str() << std::endl;
break;
}
}
};
void msg_consume(RdKafka::Message* message, void* opaque) {
switch (message->err()) {
case RdKafka::ERR__TIMED_OUT:
//std::cerr << "RdKafka::ERR__TIMED_OUT"<<std::endl;
break;
case RdKafka::ERR_NO_ERROR:
/* Real message */
msg_cnt++;
msg_bytes += message->len();
if (verbosity >= 3)
std::cerr << "Read msg at offset " << message->offset() << std::endl;
RdKafka::MessageTimestamp ts;
ts = message->timestamp();
if (verbosity >= 2 &&
ts.type != RdKafka::MessageTimestamp::MSG_TIMESTAMP_NOT_AVAILABLE) {
std::string tsname = "?";
if (ts.type == RdKafka::MessageTimestamp::MSG_TIMESTAMP_CREATE_TIME)
tsname = "create time";
else if (ts.type == RdKafka::MessageTimestamp::MSG_TIMESTAMP_LOG_APPEND_TIME)
tsname = "log append time";
std::cout << "Timestamp: " << tsname << " " << ts.timestamp << std::endl;
}
if (verbosity >= 2 && message->key()) {
std::cout << "Key: " << *message->key() << std::endl;
}
if (verbosity >= 1) {
printf("%.*s\n",
static_cast<int>(message->len()),
static_cast<const char *>(message->payload()));
}
break;
case RdKafka::ERR__PARTITION_EOF:
/* Last message */
if (exit_eof && ++eof_cnt == partition_cnt) {
std::cerr << "%% EOF reached for all " << partition_cnt <<
" partition(s)" << std::endl;
run = false;
}
break;
case RdKafka::ERR__UNKNOWN_TOPIC:
case RdKafka::ERR__UNKNOWN_PARTITION:
std::cerr << "Consume failed: " << message->errstr() << std::endl;
run = false;
break;
default:
/* Errors */
std::cerr << "Consume failed: " << message->errstr() << std::endl;
run = false;
}
}
class ExampleConsumeCb : public RdKafka::ConsumeCb {
public:
void consume_cb (RdKafka::Message &msg, void *opaque) {
msg_consume(&msg, opaque);
}
};
int main (int argc, char **argv) {
if (argc != 5) {
std::cout << "missing args. ex: ./consume.out bootstrap topic user password" << std::endl;
return 0;
}
std::string brokers = argv[1];
std::string topic_str = argv[2];
std::string username = argv[3];
std::string password = argv[4];
std::string errstr;
std::vector<std::string> topics;
std::string group_id="cg_104" + std::to_string(time(NULL));
RdKafka::Conf *conf = RdKafka::Conf::create(RdKafka::Conf::CONF_GLOBAL);
RdKafka::Conf *tconf = RdKafka::Conf::create(RdKafka::Conf::CONF_TOPIC);
//group.id必须设置
if (conf->set("group.id", group_id, errstr) != RdKafka::Conf::CONF_OK) {
std::cerr << errstr << std::endl;
exit(1);
}
topics.push_back(topic_str);
//bootstrap.servers可以替换为metadata.broker.list
conf->set("bootstrap.servers", brokers, errstr);
conf->set("security.protocol", "SASL_PLAINTEXT", errstr);
conf->set("sasl.mechanisms", "PLAIN", errstr);
conf->set("sasl.username", username, errstr);
conf->set("sasl.password", password, errstr);
conf->set("debug", "topic,broker", errstr);
conf->set("api.version.request", "true", errstr);
//ExampleConsumeCb ex_consume_cb;
//conf->set("consume_cb", &ex_consume_cb, errstr);
//ExampleEventCb ex_event_cb;
//conf->set("event_cb", &ex_event_cb, errstr);
tconf->set("auto.offset.reset", "smallest", errstr);
conf->set("default_topic_conf", tconf, errstr);
signal(SIGINT, sigterm);
signal(SIGTERM, sigterm);
RdKafka::KafkaConsumer *consumer = RdKafka::KafkaConsumer::create(conf, errstr);
if (!consumer) {
std::cerr << "Failed to create consumer: " << errstr << std::endl;
exit(1);
}
std::cout << "% Created consumer " << consumer->name() << std::endl;
RdKafka::ErrorCode err = consumer->subscribe(topics);
if (err) {
std::cerr << "Failed to subscribe to " << topics.size() << " topics: "
<< RdKafka::err2str(err) << std::endl;
exit(1);
}
while (run) {
//5000毫秒未订阅到消息触发RdKafka::ERR__TIMED_OUT
RdKafka::Message *msg = consumer->consume(5000);
msg_consume(msg, NULL);
delete msg;
}
consumer->close();
delete conf;
delete tconf;
delete consumer;
std::cerr << "% Consumed " << msg_cnt << " messages ("
<< msg_bytes << " bytes)" << std::endl;
//应用退出之前等待rdkafka清理资源
RdKafka::wait_destroyed(5000);
return 0;
}
//g++ consumer.cpp ~/git/librdkafka/src-cpp/librdkafka++.a ~/git/librdkafka/src/librdkafka.a -I ~/git/librdkafka/src-cpp -lm -lsasl2 -lssl -lcrypto -lcrypto -lz -ldl -lpthread -lrt -o consume.out
//./consume.out 127.0.0.1:9093 test2 95.test0 12345
```

View File

@@ -0,0 +1,25 @@
![kafka-manager-logo](../../assets/images/common/logo_name.png)
**一站式`Apache Kafka`集群指标监控与运维管控平台**
# Filebeat 接入 Kafka
## 配置
```shell
output.kafka:
hosts: ["192.168.0.1:9093"]
username: {clusterId}.{appId} # example 8.appId_000855_cn
password: {password} # example wzJ80zSL3xv4
topic: "my_topic"
partition.round_robin:
reachable_only: false
required_acks: 1
compression: lz4
max_message_bytes: 100000
```

View File

@@ -0,0 +1,42 @@
![kafka-manager-logo](../../assets/images/common/logo_name.png)
**一站式`Apache Kafka`集群指标监控与运维管控平台**
# Flume 接入 Kafka
## 配置
```shell
a1.sources = avro-source
a1.sinks = kafka-sink
a1.channels = memory-channel
a1.sources.avro-source.type = netcat
a1.sources.avro-source.bind = localhost
a1.sources.avro-source.port = 44444
a1.sinks.kafka-sink.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.kafka-sink.kafka.bootstrap.servers = 192.168.0.1:9093
a1.sinks.kafka-sink.kafka.topic = my_topic
# 如果接入非安全管控时, 则删除下面三行配置如果是consumer则将producer修改为consumer
a1.sinks.kafka-sink.kafka.producer.security.protocol = SASL_PLAINTEXT
a1.sinks.kafka-sink.kafka.producer.sasl.mechanism = PLAIN
a1.sinks.kafka-sink.kafka.producer.sasl.jaas.config = org.apache.kafka.common.security.plain.PlainLoginModule required \
username="{clusterId}.{appId}" \
password="{password}";
# sasl.jaas.config 例子
# a1.sinks.kafka-sink.kafka.producer.sasl.jaas.config = org.apache.kafka.common.security.plain.PlainLoginModule required \
# username="8.appId_000855_cn" \
# password="wzJ80zSL3xv4";
a1.channels.memory-channel.type = memory
a1.sources.avro-source.channels = memory-channel
a1.sinks.kafka-sink.channel = memory-channel
```
## 引用
- [Flume 用户手册](http://flume.apache.org/releases/content/1.9.0/FlumeUserGuide.html)

View File

@@ -0,0 +1,168 @@
![kafka-manger-logo](../../assets/images/common/logo_name.png)
**一站式`Apache Kafka`集群指标监控与运维管控平台**
---
# 2. Kafka Go 示例
## 1. 简介
我们的`golang kafka`客户端采用`sarama+sarama-cluster`
文档:
- https://godoc.org/github.com/Shopify/sarama
- https://github.com/bsm/sarama-cluster
滴滴内部对`sarama``sarama-cluster`进行了一些改进,我们使用`go`客户端生产和消费时候使用滴滴内部的`sarama``sarama-cluster`,否则可能出现无法消费等不可预知的情况;
## 2. 申请资源
使用前请确保有下列资源,如果可以跳过!
- topic[Topic 申请](../5. Kafka 云平台——资源申请/Topic 申请.md)
- appId和password[应用申请](../5. Kafka 云平台——资源申请/应用申请.md)
- clusterId可以在Topic详情中查看
**程序启动前请确保AppID具有Topic的使用权限**
## 3. 导入依赖
项目代码中必须引入`import`
**注意在项目中必须使用我们提供的sarama和sarama-cluster**
```go
import (
"git.xiaojukeji.com/bigdata-databus/sarama-cluster"
"git.xiaojukeji.com/bigdata-databus/sarama" //support automatic consumer-group rebalancing and offset tracking
)
```
## 4. 实例
如果需要开启`kafka`日志,请在代码中添加:
```go
var logger = log.New(os.Stdout, "kafka", log.LstdFlags)
sarama.Logger = logger
```
### 4.1 Producer发送实例
推荐采用异步发送的方式
```go
import (
"git.xiaojukeji.com/bigdata-databus/sarama-cluster"
"git.xiaojukeji.com/bigdata-databus/sarama" //support automatic consumer-group rebalancing and offset tracking
)
// asyncProducer 异步生产者
// 并发量大时,必须采用这种方式
func asyncProducer() {
//请填写正确的bootstrap server地址,参考本文第三项找到正确的连接地址
bootstrap := "****bootstrap.servers****";
config := sarama.NewConfig()
config.Version = sarama.V0_10_2_0; //注意版本
config.Producer.Compression = sarama.CompressionSnappy; //注意压缩方式
config.Producer.Return.Successes = true //必须有这个选项
config.Producer.Timeout = 5 * time.Second
//如果是异步发送建议按照配置config.Producer.Flush.Bytes=100 * 1024和config.Producer.Flush.Frequency=1000 * time.Millisecond
config.Producer.Flush.Bytes = 100 * 1024//到了100k则批量发送
config.Producer.Flush.Frequency = 1000 * time.Millisecond//不到100k但是到了1秒则发送
//请填写正确的clusterIdappId密码
config.Net.SASL.User = "****clusterId****.****AppId****" //clusterId对应关系见上表eg:44.appId_000
config.Net.SASL.Password = "****password****"
config.Net.SASL.Handshake = true
config.Net.SASL.Enable = true
topic := "xxx" //topic
p, err := sarama.NewAsyncProducer(strings.Split(bootstrap, ","), config)
defer p.Close()
if err != nil {
return
}
//必须有这个匿名函数内容
go func(p sarama.AsyncProducer) {
errors := p.Errors()
success := p.Successes()
for {
select {
case err := <-errors:
if err != nil {
glog.Errorln(err)
}
case <-success:
}
}
}(p)
v := "async: " + strconv.Itoa(rand.New(rand.NewSource(time.Now().UnixNano())).Intn(10000))
fmt.Fprintln(os.Stdout, v)
msg := &sarama.ProducerMessage{
Topic: topic,
Value: sarama.ByteEncoder(v),
}
p.Input() <- msg
}
```
### 4.2 consumer消费实例
**consumer 用 sarama-cluster能够提供 consumer rebalance 和 offset track**
使用过程中,不要用同一个消费组消费多个`topic`,否则会造成删除其中一个`topic`时,影响其他`topic`消费。
```go
import (
"git.xiaojukeji.com/bigdata-databus/sarama-cluster"
"git.xiaojukeji.com/bigdata-databus/sarama" //support automatic consumer-group rebalancing and offset tracking
)
// consumer 消费者、
func consumer() {
//请填写正确的bootstrap server地址,参考本文第三项找到正确的连接地址
bootstrap := "****bootstrap.servers****";
config := cluster.NewConfig()
config.Config.Version = sarama.V0_10_2_0;
config.Group.Return.Notifications = true
config.Consumer.Offsets.CommitInterval = 1 * time.Second
config.Consumer.Offsets.Initial = sarama.OffsetNewest //初始从最新的offset开始
//请填写正确的clusterIdappId密码
config.Net.SASL.User = "****clusterId****.****AppId****"; //eg: 44.appId_000
config.Net.SASL.Password = "****password****"
config.Net.SASL.Handshake = true
config.Net.SASL.Enable = true
topics := "xxx,xxx" //topic 列表
groupID := "cg-xxx" //必须以cg开头
c, err := cluster.NewConsumer(strings.Split(bootstrap, ","), groupID, strings.Split(topics, ","), config)
if err != nil {
glog.Errorf("Failed open consumer: %v", err)
return
}
defer c.Close()
go func(c *cluster.Consumer) {
errors := c.Errors()
noti := c.Notifications()
for {
select {
case err := <-errors:
glog.Errorln(err)
case <-noti:
}
}
}(c);
for msg := range c.Messages() {
fmt.Fprintf(os.Stdout, "%s/%d/%d\t%s\n", msg.Topic, msg.Partition, msg.Offset, msg.Value)
c.MarkOffset(msg, "") //MarkOffset 并不是实时写入kafka有可能在程序crash时丢掉未提交的offset
}
}
```

View File

@@ -0,0 +1,90 @@
# Java客户端 接入 Kafka
## Producer
```java
public class ProducerTest {
private static String topicName;
private static int msgNum;
private static int key;
public static void main(String[] args) {
Properties props = new Properties();
//请填写正确的bootstrap server地址
props.put("bootstrap.servers", "****bootstrap.servers****"); //填写服务发现地址
props.put("compression.type", "lz4"); //压缩方式在平衡存储与cpu使用率后推荐使用lz4
props.put("linger.ms", 500 ); // 建议500,务必要改
props.put("batch.size", 100000 );//每个请求的批量大小
props.put("max.in.flight.requests.per.connection", 1 ); 如果需要保证消息顺序需要设置为1默认为5
props.put("security.protocol", "SASL_PLAINTEXT");//注意安全管控的security.protocol内容
props.put("sasl.mechanism", "PLAIN");//注意安全管控的security.protocol内容
//请填写正确的clusterIdappId密码 clusterId对应关系见上表
String format = "org.apache.kafka.common.security.plain.PlainLoginModule required username=\"%s.%s\" password=\"%s\";";
String jaas_config = String.format(format, {clusterId}, {appId}, {password});
props.put("sasl.jaas.config", jaas_config);
//根据实际场景选择序列化类
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
topicName = "test";
msgNum = *; // 发送的消息数
Producer<String, String> producer = new KafkaProducer<>(props);
for (int i = 0; i < msgNum; i++) {
String msg = i + " This is prodecer test.";
producer.send(new ProducerRecord<String, String>(topicName, msg));
}
producer.close();
}
}
PS相比之前版本使用gateway版本需要新增三个参数
props.put("security.protocol", "SASL_PLAINTEXT");
props.put("sasl.mechanism", "PLAIN");
props.put("sasl.jaas.config", "org.apache.kafka.common.security.plain.PlainLoginModule required " +
"username=\"clusterId.appId\" password=\"password\";");
```
## Consumer
```java
public class ConsumerTest {
private static String topicName;
private static String group;
public static void main(String[] args) {
Properties props = new Properties();
//请填写正确的bootstrap server地址
props.put("bootstrap.servers", "****bootstrap.servers****");//填写服务发现地址
props.put("group.id", group);//group不需要申请根据自己业务属性起名字
props.put("auto.offset.reset", "latest");//earliest/latest消息消费起始位置earliest代表消费历史数据latest代表消费最新的数据
props.put("enable.auto.commit", "true"); // 自动commit
props.put("auto.commit.interval.ms", "1000"); // 自动commit的间隔
props.put("session.timeout.ms", "30000");
props.put("security.protocol", "SASL_PLAINTEXT");
props.put("sasl.mechanism", "PLAIN");
//请填写正确的clusterIdappId密码
String format = "org.apache.kafka.common.security.plain.PlainLoginModule required username=\"%s.%s\" password=\"%s\";";
String jaas_config = String.format(format, {clusterId}, {appId}, {password});
props.put("sasl.jaas.config", jaas_config);
//根据实际场景选择序列化类
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList(topicName)); // 可消费多个topic,组成一个list
while (true) {
try {
ConsumerRecords<String, String> records = consumer.poll(1000);
for (ConsumerRecord<String, String> record : records) {
System.out.println("offset = " + record.offset() + ", key = " + record.key() + ", value = " + record.value());
}
}catch (Throwable e){
//TODO print your error,特别注意这里的poll 可能因为网络问题等原因发生异常,不能catch到异常后就close KafkaConsumer实例,否则无法继续消费
}
}
}
}
PS相比之前版本使用gateway版本需要新增三个参数
props.put("security.protocol", "SASL_PLAINTEXT");
props.put("sasl.mechanism", "PLAIN");
props.put("sasl.jaas.config", "org.apache.kafka.common.security.plain.PlainLoginModule required " +
"username=\"clusterId.appId\" password=\"password\";");
```

View File

@@ -0,0 +1,108 @@
# Java客户端 消息顺序写入 Kafka例子
## 发送例子
例子中,客户端有序发送最重要的配置是`max.in.flight.requests.per.connection=1`
```java
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.kafka.clients.producer.*;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
public class SequentialProducer {
private static Properties createProperties() {
Properties properties = new Properties();
properties.put("bootstrap.servers", "**kafka集群服务地址***");
properties.put("acks", "all");
properties.put("max.in.flight.requests.per.connection", 1); // 顺序发送必须加上
properties.put("retries", 30);
properties.put("request.timeout.ms", 12000);
properties.put("linger.ms", 10);
properties.put("batch.size", 65536);
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
// 安全管控配置,如若没有可进行去除下面三个配置
properties.put("security.protocol", "SASL_PLAINTEXT");
properties.put("sasl.mechanism", "PLAIN");
properties.put("sasl.jaas.config", "org.apache.kafka.common.security.plain.PlainLoginModule required " +
"username=\"{clusterId}.{username}\" password=\"{password}\";");
return properties;
}
private static boolean sendRecords(KafkaProducer<String, String> producer, List<ProducerRecord<String, String>> records) {
final CountDownLatch countDownLatch = new CountDownLatch(records.size());
final AtomicBoolean failedFlag = new AtomicBoolean(Boolean.FALSE);
for (ProducerRecord<String, String> record : records) {
System.out.println(record.value());
producer.send(new ProducerRecord<String, String>(record.topic(), record.key(), record.value()), new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
// 发送完成后的回调, 如果exception != null 表示发送失败了
if (exception != null) {
// 发送失败 -> 设置发送失败标记
failedFlag.set(Boolean.TRUE);
}
countDownLatch.countDown();
}
});
}
boolean success = false;
try {
// 等待 60 秒超时
success = countDownLatch.await(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
if (failedFlag.get()) {
success = false;
}
return success;
}
public static void main(String[] args){
String mysqlTableName = "DB_Table_Name";
// kafka producer
KafkaProducer<String, String> producer = new KafkaProducer<String, String>(createProperties());
// 需要发送的数据数组
List<ProducerRecord<String, String>> records = new ArrayList<ProducerRecord<String, String>>();
// 发送的结果
boolean status = true;
long mysqlTableId = 1;
while (true) {
if (status) {
// 发送成功, 则清除上一次的数据, 然后获取新数据,
records.clear();
for (int i = 0; i < 5; ++i) {
// 构造数据时, 需要指定key, key可以是mysql表的表名, 这样同个表的数据将会发送到同一个分区
records.add(new ProducerRecord<String, String>("{topicName}", mysqlTableName, "hello kafka, id=" + mysqlTableId++));
}
} else {
// 发送失败, 则不做任何事情, 即继续尝试上一批数据的发送
System.out.println("send failed, ------- records:" + records.get(0));
}
status = sendRecords(producer, records);
}
}
}
```

View File

@@ -0,0 +1,40 @@
![kafka-manager-logo](../../assets/images/common/logo_name.png)
**一站式`Apache Kafka`集群指标监控与运维管控平台**
# Kafka-Connect 接入 Kafka
## 配置
schema-registry即可以配置zookeeper的地址, 也可以配置 kafka-broker 的地址。
建议使用 kafka-broker 的地址因为zk获取到的 kafka-broker 地址, 不一定能直接连接成功。
```shell
# 增加如下配置
bootstrap.servers=192.168.0.1:9093
sasl.mechanism=PLAIN
security.protocol=SASL_PLAINTEXT
sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required \
username="{clusterId}.{appId}" \
password="{password}";
producer.sasl.mechanism=PLAIN
producer.security.protocol=SASL_PLAINTEXT
producer.sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required \
username="{clusterId}.{appId}" \
password="{password}";
consumer.sasl.mechanism=PLAIN
consumer.security.protocol=SASL_PLAINTEXT
consumer.sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required \
username="{clusterId}.{appId}" \
password="{password}";
```

View File

@@ -0,0 +1,67 @@
# Python客户端 接入 Kafka
## 安装
```shell
# python kafka客户端采用kafka-python(1.4.6)及以上
pip install kafka-python
#使用lz4压缩需要
pip install lz4
#使用snappy压缩需要
pip install snappy
pip install python-snappy
```
## 发送例子
```python
from kafka import KafkaConsumer,KafkaProducer
import logging
import time
import json
#logging.basicConfig(level=logging.DEBUG)
BOOTSTRAP_SERVERS='127.0.0.1:9093'
TOPIC='test0'
producer=KafkaProducer(bootstrap_servers=BOOTSTRAP_SERVERS,
compression_type="lz4",
security_protocol="SASL_PLAINTEXT",
sasl_mechanism="PLAIN",
sasl_plain_username='95.appId_000001', //clusterId.appId
sasl_plain_password='12345'
)
for i in range(10):
producer.send(TOPIC, bytes("Hello World".encode('utf-8')))
producer.flush()
```
## 消费例子
```python
from kafka import KafkaConsumer,KafkaProducer
import logging
import time
import json
#logging.basicConfig(level=logging.DEBUG)
BOOTSTRAP_SERVERS='127.0.0.1:9093'
TOPIC='test0'
consumer = KafkaConsumer(TOPIC,
group_id = 'test_group'
bootstrap_servers=BOOTSTRAP_SERVERS,
auto_offset_reset='earliest',
security_protocol='SASL_PLAINTEXT',
sasl_mechanism='PLAIN',
sasl_plain_username='95.appId_000001', //clusterId.appId
sasl_plain_password='12345',
receive_buffer_bytes=1024,
enable_auto_commit='False')
for msg in consumer:
print(msg)
consumer.commit()
```

View File

@@ -0,0 +1,28 @@
![kafka-manager-logo](../../assets/images/common/logo_name.png)
**一站式`Apache Kafka`集群指标监控与运维管控平台**
# Confluent-Schema-Registry 接入 Kafka
## 配置
schema-registry即可以配置zookeeper的地址, 也可以配置 kafka-broker 的地址。
建议使用 kafka-broker 的地址因为zk获取到的 kafka-broker 地址, 不一定能直接连接成功。
```shell
# 增加如下配置
kafkastore.bootstrap.servers=SASL_PLAINTEXT://127.0.0.1:9093
kafkastore.sasl.mechanism=PLAIN
kafkastore.security.protocol=SASL_PLAINTEXT
kafkastore.sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required \
username="{clusterId}.{appId}" \
password="{password}";
# example
# kafkastore.sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required \
# username="8.appId_000855_cn" \
# password="12345678";
```