网站数据采集应用管理
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

145 lines
5.5 KiB

  1. package com.bfd.task.process;
  2. import org.apache.kafka.clients.consumer.Consumer;
  3. import org.apache.kafka.clients.consumer.ConsumerConfig;
  4. import org.apache.kafka.clients.consumer.ConsumerRecord;
  5. import org.apache.kafka.clients.consumer.ConsumerRecords;
  6. import org.apache.kafka.clients.consumer.KafkaConsumer;
  7. import org.springframework.beans.factory.annotation.Value;
  8. import org.springframework.stereotype.Component;
  9. import com.alibaba.fastjson.JSONObject;
  10. import com.bfd.task.cache.ConfigCache;
  11. import com.bfd.task.entity.Constants;
  12. import com.bfd.task.utils.QueueUtil;
  13. import java.time.Duration;
  14. import java.util.Collections;
  15. import java.util.HashMap;
  16. import java.util.List;
  17. import java.util.Map;
  18. import lombok.extern.slf4j.Slf4j;
  19. /**
  20. * @author jian.mao
  21. * @date 2023年9月21日
  22. * @description
  23. */
  24. @Slf4j
  25. @Component
  26. public class DataConsumptionProcess implements Runnable {
  27. @Value("${spring.kafka.bootstrap-servers}")
  28. private String bootstrapServers;
  29. @Value("${spring.kafka.consumer.group-id}")
  30. private String groupId;
  31. @Value("${customize-kafka.consumer.topic}")
  32. private String topic;
  33. @SuppressWarnings("unchecked")
  34. @Override
  35. public void run() {
  36. // 创建 Kafka 消费者配置
  37. Map<String, Object> consumerProps = new HashMap<String, Object>(16);
  38. consumerProps.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,
  39. bootstrapServers);
  40. consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
  41. //跟读
  42. consumerProps.put("auto.offset.reset", "latest");
  43. consumerProps.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
  44. consumerProps.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
  45. consumerProps.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
  46. consumerProps.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
  47. Consumer<String, String> consumer = new KafkaConsumer<>(consumerProps);
  48. try {
  49. // 订阅主题
  50. consumer.subscribe(Collections.singletonList(topic));
  51. // 消费消息
  52. while (true) {
  53. // 没超时的话正常消费数据
  54. ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(1000));
  55. for (ConsumerRecord<String, String> record : records) {
  56. Map<String, Object> resultData = new HashMap<String, Object>(32);
  57. try {
  58. // 处理消息,这里可以根据需要进行业务处理
  59. Map<String, Object> resultEs = JSONObject.parseObject(record.value());
  60. log.info("Received message: "+ record.value());
  61. if(!resultEs.containsKey(Constants.TASKID)){
  62. log.warn("数据体缺少taskId");
  63. continue;
  64. }
  65. String taskId = resultEs.get(Constants.TASKID).toString();
  66. if(!ConfigCache.taskCache.containsKey(taskId)){
  67. log.warn("不属于有知任务产出的数据,taskId:{}",taskId);
  68. continue;
  69. }
  70. Map<String, Object> task = (Map<String, Object>) ConfigCache.taskCache.get(taskId);
  71. String token = (String) task.get(Constants.BUSINESSKEY);
  72. Map<String, Object> input = (Map<String, Object>) task.get(Constants.INPUT);
  73. Integer hasVideo = (Integer) input.get(Constants.HASVIDEO);
  74. if(resultEs.get(Constants.HASVIDEO).equals(hasVideo)){
  75. Map<String,Object> crawlResults = JSONObject.parseObject(record.value());
  76. //结果加工 例如videopath[]转换成String
  77. bulidResult(crawlResults);
  78. // 结果集组装
  79. Map<String, Object> result = new HashMap<String, Object>(16);
  80. //结果内容
  81. Map<String,Object> data = new HashMap<String, Object>(16);
  82. //获取输出字段
  83. Map<String,Object> output = (Map<String, Object>) task.get(Constants.OUTPUT);
  84. for (String key: output.keySet()) {
  85. if (crawlResults.containsKey(key)){
  86. data.put(key,crawlResults.get(key));
  87. }
  88. }
  89. result.put(Constants.RESULTS, JSONObject.toJSONString(data));
  90. for (String key : task.keySet()) {
  91. resultData.put(key, task.get(key));
  92. }
  93. result.put(Constants.STATUS, 1);
  94. result.put(Constants.MESSAGE, "成功");
  95. resultData.put(Constants.RESULT, result);
  96. QueueUtil.sendQueue.put(JSONObject.toJSONString(resultData));
  97. //taskId赋值
  98. if(resultEs.containsKey(Constants.TASKID)){
  99. if(taskId == null){
  100. taskId = resultEs.get(Constants.TASKID).toString();
  101. }
  102. }
  103. }else{
  104. log.info("不符合需求数据----");
  105. }
  106. } catch (Exception e) {
  107. // TODO: handle exception
  108. log.error("数据格式异常:{}",record.value());
  109. //结果集
  110. Map<String, Object> result = new HashMap<String, Object>(16);
  111. //遍历入库返回结果,拼接响应内容
  112. result.put(Constants.RESULTS, e.getMessage());
  113. result.put(Constants.MESSAGE, "异常");
  114. result.put(Constants.STATUS, 2);
  115. resultData.put(Constants.RESULT, result);
  116. //发送kafka
  117. QueueUtil.sendQueue.put(JSONObject.toJSONString(resultData));
  118. }
  119. }
  120. }
  121. } catch (Exception e) {
  122. log.error("kafka消费异常\n", e);
  123. consumer.close();
  124. }
  125. }
  126. /**
  127. * 结果加工
  128. * @param result
  129. */
  130. private void bulidResult(Map<String, Object> result){
  131. //视频gofast地址加工
  132. List<String> videoPath = (List<String>) result.get(Constants.VIDEOPATH);
  133. if(videoPath != null && videoPath.size() > 0){
  134. String videoUrl = videoPath.get(0);
  135. result.put(Constants.VIDEOPATH, videoUrl);
  136. }
  137. }
  138. }