[Bugfix]修复正常情况下,集群状态统计错误的问题(#865)

This commit is contained in:
zengqiao
2023-02-09 16:43:02 +08:00
committed by EricZeng
parent f95be2c1b3
commit 5bd93aa478
5 changed files with 61 additions and 40 deletions

View File

@@ -4,8 +4,12 @@ import com.xiaojukeji.know.streaming.km.common.bean.entity.cluster.ClusterPhysHe
import com.xiaojukeji.know.streaming.km.common.bean.entity.cluster.ClusterPhysState;
import com.xiaojukeji.know.streaming.km.common.bean.dto.cluster.MultiClusterDashboardDTO;
import com.xiaojukeji.know.streaming.km.common.bean.entity.result.PaginationResult;
import com.xiaojukeji.know.streaming.km.common.bean.entity.result.Result;
import com.xiaojukeji.know.streaming.km.common.bean.vo.cluster.ClusterPhyBaseVO;
import com.xiaojukeji.know.streaming.km.common.bean.vo.cluster.ClusterPhyDashboardVO;
import java.util.List;
/**
* 多集群总体状态
*/
@@ -24,4 +28,6 @@ public interface MultiClusterPhyManager {
* @return
*/
PaginationResult<ClusterPhyDashboardVO> getClusterPhysDashboard(MultiClusterDashboardDTO dto);
Result<List<ClusterPhyBaseVO>> getClusterPhysBasic();
}

View File

@@ -9,13 +9,12 @@ import com.xiaojukeji.know.streaming.km.common.bean.entity.cluster.ClusterPhysHe
import com.xiaojukeji.know.streaming.km.common.bean.entity.cluster.ClusterPhysState;
import com.xiaojukeji.know.streaming.km.common.bean.entity.cluster.ClusterPhy;
import com.xiaojukeji.know.streaming.km.common.bean.dto.cluster.MultiClusterDashboardDTO;
import com.xiaojukeji.know.streaming.km.common.bean.entity.kafkacontroller.KafkaController;
import com.xiaojukeji.know.streaming.km.common.bean.entity.metrics.ClusterMetrics;
import com.xiaojukeji.know.streaming.km.common.bean.entity.result.PaginationResult;
import com.xiaojukeji.know.streaming.km.common.bean.entity.result.Result;
import com.xiaojukeji.know.streaming.km.common.bean.vo.cluster.ClusterPhyBaseVO;
import com.xiaojukeji.know.streaming.km.common.bean.vo.cluster.ClusterPhyDashboardVO;
import com.xiaojukeji.know.streaming.km.common.bean.vo.metrics.line.MetricMultiLinesVO;
import com.xiaojukeji.know.streaming.km.common.constant.Constant;
import com.xiaojukeji.know.streaming.km.common.converter.ClusterVOConverter;
import com.xiaojukeji.know.streaming.km.common.enums.health.HealthStateEnum;
import com.xiaojukeji.know.streaming.km.common.utils.ConvertUtil;
@@ -24,7 +23,6 @@ import com.xiaojukeji.know.streaming.km.common.utils.PaginationMetricsUtil;
import com.xiaojukeji.know.streaming.km.common.utils.ValidateUtils;
import com.xiaojukeji.know.streaming.km.core.service.cluster.ClusterMetricService;
import com.xiaojukeji.know.streaming.km.core.service.cluster.ClusterPhyService;
import com.xiaojukeji.know.streaming.km.core.service.kafkacontroller.KafkaControllerService;
import com.xiaojukeji.know.streaming.km.core.service.version.metrics.kafka.ClusterMetricVersionItems;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
@@ -42,37 +40,26 @@ public class MultiClusterPhyManagerImpl implements MultiClusterPhyManager {
@Autowired
private ClusterMetricService clusterMetricService;
@Autowired
private KafkaControllerService kafkaControllerService;
@Override
public ClusterPhysState getClusterPhysState() {
List<ClusterPhy> clusterPhyList = clusterPhyService.listAllClusters();
ClusterPhysState physState = new ClusterPhysState(0, 0, 0, clusterPhyList.size());
Map<Long, KafkaController> controllerMap = kafkaControllerService.getKafkaControllersFromDB(
clusterPhyList.stream().map(elem -> elem.getId()).collect(Collectors.toList()),
false
);
ClusterPhysState physState = new ClusterPhysState(0, 0, clusterPhyList.size());
for (ClusterPhy clusterPhy: clusterPhyList) {
KafkaController kafkaController = controllerMap.get(clusterPhy.getId());
if (kafkaController != null && !kafkaController.alive()) {
// 存在明确的信息表示controller挂了
physState.setDownCount(physState.getDownCount() + 1);
} else if ((System.currentTimeMillis() - clusterPhy.getCreateTime().getTime() >= 5 * 60 * 1000) && kafkaController == null) {
// 集群接入时间是在近5分钟内同时kafkaController信息不存在则设置为down
for (ClusterPhy clusterPhy : clusterPhyList) {
ClusterMetrics metrics = clusterMetricService.getLatestMetricsFromCache(clusterPhy.getId());
Float state = metrics.getMetric(ClusterMetricVersionItems.CLUSTER_METRIC_HEALTH_STATE);
if (state == null) {
physState.setUnknownCount(physState.getUnknownCount() + 1);
} else if (state.intValue() == HealthStateEnum.DEAD.getDimension()) {
physState.setDownCount(physState.getDownCount() + 1);
} else {
// 其他情况都设置为alive
physState.setLiveCount(physState.getLiveCount() + 1);
}
}
return physState;
}
@Override
public ClusterPhysHealthState getClusterPhysHealthState() {
List<ClusterPhy> clusterPhyList = clusterPhyService.listAllClusters();
@@ -107,23 +94,6 @@ public class MultiClusterPhyManagerImpl implements MultiClusterPhyManager {
// 转为vo格式方便后续进行分页筛选等
List<ClusterPhyDashboardVO> voList = ConvertUtil.list2List(clusterPhyList, ClusterPhyDashboardVO.class);
// 获取集群controller信息并补充到vo中,
Map<Long, KafkaController> controllerMap = kafkaControllerService.getKafkaControllersFromDB(clusterPhyList.stream().map(elem -> elem.getId()).collect(Collectors.toList()), false);
for (ClusterPhyDashboardVO vo: voList) {
KafkaController kafkaController = controllerMap.get(vo.getId());
if (kafkaController != null && !kafkaController.alive()) {
// 存在明确的信息表示controller挂了
vo.setAlive(Constant.DOWN);
} else if ((System.currentTimeMillis() - vo.getCreateTime().getTime() >= 5 * 60L * 1000L) && kafkaController == null) {
// 集群接入时间是在近5分钟内同时kafkaController信息不存在则设置为down
vo.setAlive(Constant.DOWN);
} else {
// 其他情况都设置为alive
vo.setAlive(Constant.ALIVE);
}
}
// 本地分页过滤
voList = this.getAndPagingDataInLocal(voList, dto);
@@ -148,6 +118,15 @@ public class MultiClusterPhyManagerImpl implements MultiClusterPhyManager {
);
}
@Override
public Result<List<ClusterPhyBaseVO>> getClusterPhysBasic() {
// 获取集群
List<ClusterPhy> clusterPhyList = clusterPhyService.listAllClusters();
// 转为vo格式方便后续进行分页筛选等
return Result.buildSuc(ConvertUtil.list2List(clusterPhyList, ClusterPhyBaseVO.class));
}
/**************************************************** private method ****************************************************/