commit badb0bbee9b040c0dba191e19cfd343e9bf1d008 Author: 55007 <55007@maojian> Date: Tue Jan 7 17:13:01 2025 +0800 脚本分析应用 diff --git a/.classpath b/.classpath new file mode 100644 index 0000000..f7e4a1d --- /dev/null +++ b/.classpath @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b83d222 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target/ diff --git a/.project b/.project new file mode 100644 index 0000000..36c22db --- /dev/null +++ b/.project @@ -0,0 +1,23 @@ + + + data_statistical + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.m2e.core.maven2Builder + + + + + + org.eclipse.jdt.core.javanature + org.eclipse.m2e.core.maven2Nature + + diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000..abdea9a --- /dev/null +++ b/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +encoding//src/main/java=UTF-8 +encoding//src/main/resources=UTF-8 +encoding/=UTF-8 diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000..2f5cc74 --- /dev/null +++ b/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,8 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled +org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning +org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore +org.eclipse.jdt.core.compiler.release=disabled +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs new file mode 100644 index 0000000..f897a7f --- /dev/null +++ b/.settings/org.eclipse.m2e.core.prefs @@ -0,0 +1,4 @@ +activeProfiles= +eclipse.preferences.version=1 +resolveWorkspaceProjects=true +version=1 diff --git a/README.md b/README.md new file mode 100644 index 0000000..1667875 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +脚本分析应用 +部署位置:/opt/analyze/apps/data_Statistical + diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..701609d --- /dev/null +++ b/pom.xml @@ -0,0 +1,165 @@ + + + 4.0.0 + com.bfd + data_Statistical + 0.0.1-SNAPSHOT + data_Statistical + data_Statistical + + 1.8 + UTF-8 + UTF-8 + 2.2.4.RELEASE + + + + org.springframework.boot + spring-boot-starter-web + + + + org.projectlombok + lombok + true + + + org.springframework.boot + spring-boot-starter-test + test + + + org.apache.commons + commons-math3 + 3.6.1 + + + com.alibaba.fastjson2 + fastjson2 + 2.0.12 + + + cn.hutool + hutool-all + 5.8.27 + + + org.apache.kafka + kafka-clients + 2.7.1 + + + com.squareup.okhttp3 + okhttp + 3.11.0 + + + org.thymeleaf + thymeleaf + 3.0.12.RELEASE + + + de.codecentric + spring-boot-admin-client + 2.2.4 + + + + + + org.springframework.boot + spring-boot-dependencies + ${spring-boot.version} + pom + import + + + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + *.properties + *.yml + *.yaml + + + + + com.bfd.data_statistical.DataStatisticalApplication + + true + + lib/ + + false + + + + config/ + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy + package + + copy-dependencies + + + ${project.build.directory}/lib/ + + + + + + + maven-resources-plugin + + + copy-resources + package + + copy-resources + + + + + + src/main/resources/ + + *.properties + *.yml + *.yaml + + + + ${project.build.directory}/config + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 8 + 8 + + + + + + diff --git a/src/main/java/com/bfd/data_statistical/DataStatisticalApplication.java b/src/main/java/com/bfd/data_statistical/DataStatisticalApplication.java new file mode 100644 index 0000000..548313c --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/DataStatisticalApplication.java @@ -0,0 +1,90 @@ +package com.bfd.data_statistical; + +import com.alibaba.fastjson2.JSONObject; +import com.bfd.data_statistical.service.impl.VarAnaServiceImpl; +import com.bfd.data_statistical.util.Config; +import com.bfd.data_statistical.util.varPojo; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.ConfigurableApplicationContext; +import org.thymeleaf.TemplateEngine; +import org.thymeleaf.context.Context; + +import javax.annotation.Resource; +import java.util.Date; +import java.util.Map; + + +@SpringBootApplication +@Slf4j +public class DataStatisticalApplication { + @Resource + VarAnaServiceImpl varAnaService; + + public static void main(String[] args) { + ConfigurableApplicationContext run = SpringApplication.run(DataStatisticalApplication.class, args); + DataStatisticalApplication bean = run.getBean(DataStatisticalApplication.class); + bean.run(); + + + } + + public void run() { + new Thread(new Runnable() { + @Override + public void run() { + while (true) { + try { + if (Config.taskQueue.size() == 0) { + log.info("没有需要计算的"); + Thread.sleep(1000 * 10); + } else { + Map take = Config.taskQueue.take(); + Long time = (Long) take.get("time"); + Long time2 = System.currentTimeMillis(); + // 将时间戳转换为Date对象 + Date date1 = new Date(time); + Date date2 = new Date(time2); + // 计算时间差(毫秒数) + long diffMillis = Math.abs(date2.getTime() - date1.getTime()); + // 转换为分钟数 + long diffMinutes = diffMillis / (60 * 1000); + // 检查是否大于两分钟 + if (diffMinutes < 1) { + Thread.sleep(1000 * 60); + } else { +// Config.taskQueue.put(take); + } + + System.out.println("时间差大于1分钟"); + String type = (String) take.get("type"); + log.info(type + "数据组ID --> " + take.get("businessKey") + ",结束调用,开始调用计算"); + switch (type) { + case "var": + varAnaService.singleFactor((String) take.get("businessKey"), (JSONObject) take.get("data")); + break; + case "rel": + varAnaService.correlationAna((String) take.get("businessKey"), (JSONObject) take.get("data")); + break; + case "main": + varAnaService.mainAna((String) take.get("businessKey"), (JSONObject) take.get("data")); + break; + + } + Config.dataMap_time.remove((String) take.get("businessKey")); + + } + + } catch (Exception e) { + log.error("队列失败",e); + } + } + } + }).start(); + } + + +} diff --git a/src/main/java/com/bfd/data_statistical/controller/ApiController.java b/src/main/java/com/bfd/data_statistical/controller/ApiController.java new file mode 100644 index 0000000..3b43767 --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/controller/ApiController.java @@ -0,0 +1,115 @@ +package com.bfd.data_statistical.controller; + +import com.alibaba.fastjson2.JSONObject; +import com.bfd.data_statistical.service.DataProcessService; +import com.bfd.data_statistical.util.Config; +import lombok.extern.slf4j.Slf4j; +import org.springframework.web.bind.annotation.*; + +import javax.annotation.Resource; +import java.util.HashMap; +import java.util.Map; + +/** + * @author guowei + */ +@RestController +@Slf4j +@RequestMapping(value = "/anaApi") +@CrossOrigin(origins = "*", maxAge = 3600) +public class ApiController { + @Resource + DataProcessService dataProcessService; + + /** + * 方差分析 Api + * @param jsonObject + * @return + */ + @RequestMapping(value = "/varAna", method = RequestMethod.POST, produces = "application/json") + @ResponseBody + public String varAna(@RequestBody JSONObject jsonObject) { + log.info("方差分析参数:"+jsonObject); + JSONObject data = jsonObject.getJSONObject("data"); + String businessKey = data.getString("businessKey"); + + synchronized (Config.lock) { + if (!Config.dataMap_time.containsKey(businessKey)) { + Config.dataMap_time.put(businessKey, jsonObject); + Map map = new HashMap<>(); + try { + map.put("businessKey", businessKey); + map.put("data", jsonObject); + map.put("time", System.currentTimeMillis()); + map.put("type", "var"); + Config.taskQueue.put(map); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + dataProcessService.varAna(jsonObject); + return "success"; + } + + /** + * 相关性分析 Api + * @param jsonObject + * @return + */ + @RequestMapping(value = "/relativityAna", method = RequestMethod.POST, produces = "application/json") + @ResponseBody + public String relativityAna(@RequestBody JSONObject jsonObject) { + log.info("相关性分析参数:"+jsonObject); + JSONObject data = jsonObject.getJSONObject("data"); + String businessKey = data.getString("businessKey"); + synchronized (Config.lock) { + if (!Config.dataMap_time.containsKey(businessKey)) { + Config.dataMap_time.put(businessKey, jsonObject); + Map map = new HashMap<>(); + try { + map.put("businessKey", businessKey); + map.put("data", jsonObject); + map.put("time", System.currentTimeMillis()); + map.put("type", "rel"); + Config.taskQueue.put(map); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + dataProcessService.relativityAna(jsonObject); + return "success"; + } + + + /** + * 主成分分析 Api + * @param jsonObject + * @return + */ + @RequestMapping(value = "/mainAna", method = RequestMethod.POST, produces = "application/json") + @ResponseBody + public String mainAna(@RequestBody JSONObject jsonObject) { + log.info("主成分分析参数:"+jsonObject); + JSONObject data = jsonObject.getJSONObject("data"); + String businessKey = data.getString("businessKey"); + synchronized (Config.lock) { + if (!Config.dataMap_time.containsKey(businessKey)) { + Config.dataMap_time.put(businessKey, jsonObject); + Map map = new HashMap<>(); + try { + map.put("businessKey", businessKey); + map.put("data", jsonObject); + map.put("time", System.currentTimeMillis()); + map.put("type", "main"); + Config.taskQueue.put(map); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + dataProcessService.mainAna(jsonObject); + return "success"; + } +} diff --git a/src/main/java/com/bfd/data_statistical/service/DataProcessService.java b/src/main/java/com/bfd/data_statistical/service/DataProcessService.java new file mode 100644 index 0000000..0d43b05 --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/service/DataProcessService.java @@ -0,0 +1,19 @@ +package com.bfd.data_statistical.service; + +import com.alibaba.fastjson2.JSONObject; +import org.springframework.stereotype.Service; + +/** + * @author guowei + */ +@Service +public interface DataProcessService { + + void varAna(JSONObject businessKey); + + void relativityAna(JSONObject businessKey); + + void mainAna(JSONObject businessKey); + + +} diff --git a/src/main/java/com/bfd/data_statistical/service/VarAnaService.java b/src/main/java/com/bfd/data_statistical/service/VarAnaService.java new file mode 100644 index 0000000..d0f5a1f --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/service/VarAnaService.java @@ -0,0 +1,30 @@ +package com.bfd.data_statistical.service; + +import com.alibaba.fastjson2.JSONObject; +import org.springframework.stereotype.Service; + +/** + * @author guowei + */ +@Service +public interface VarAnaService { + + /** + * 单因素 + */ + void singleFactor(String businessKey,JSONObject jsonObject); + + /** + * 双因素 + */ + void doubleFactor(JSONObject jsonObject); + + /** + * 多因素 + */ + void multipleFactor(JSONObject jsonObject); + + void correlationAna(String businessKey,JSONObject jsonObject); + + void mainAna(String businessKey,JSONObject jsonObject); +} diff --git a/src/main/java/com/bfd/data_statistical/service/impl/DataProcessServiceImpl.java b/src/main/java/com/bfd/data_statistical/service/impl/DataProcessServiceImpl.java new file mode 100644 index 0000000..088b089 --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/service/impl/DataProcessServiceImpl.java @@ -0,0 +1,209 @@ +package com.bfd.data_statistical.service.impl; + +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; +import com.bfd.data_statistical.service.DataProcessService; +import com.bfd.data_statistical.util.Config; +import com.bfd.data_statistical.util.Utils; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import javax.annotation.Resource; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * @author guowei + */ +@Service +@Slf4j +public class DataProcessServiceImpl implements DataProcessService { + + @Resource + VarAnaServiceImpl varAnaService; + + @Override + public void varAna(JSONObject jsonObject) { + JSONObject data = jsonObject.getJSONObject("data"); + String businessKey = data.getString("businessKey"); + //输入 + JSONObject input = jsonObject.getJSONObject("input"); + log.info("数据组ID --> " + businessKey + ";data -->" + jsonObject); + + JSONArray ind_var = input.getJSONArray("ind_var"); + //因变量 + String dep_var = input.getString("dep_var"); + //显著性水平 + Double significance_level = input.getDouble("significance_level"); + //缺失值处理 + String miss_value = input.getString("miss_value"); + //置信区间 + String confidence_interval = input.getString("confidence_interval"); +// String percentageStr = confidence_interval.replace("%", ""); +// double percentage = Double.parseDouble(percentageStr); // 解析为double类型数字 +// double decimal = percentage / 100.0; // 转换为十进制小数 + synchronized (Config.lock) { + if (!Config.dataMap.containsKey(businessKey)) { + Map dataMap = new HashMap<>(); + Map vData = new HashMap<>(); + ind_var.forEach(value -> { + JSONObject item = JSONObject.parseObject(JSON.toJSONString(value)); + vData.put(item.getString("key"), new ArrayList<>()); + }); + vData.put("因变量", new ArrayList<>()); + dataMap.put("data", vData); + Config.dataMap.put(businessKey, dataMap); + } + + Map dataMap = (Map) Config.dataMap.get(businessKey); + Map vData = (Map) dataMap.get("data"); + + ind_var.forEach(value -> { + JSONObject item = JSONObject.parseObject(JSON.toJSONString(value)); + List indList = (List) vData.get(item.getString("key")); + String indValue = (String) Utils.jsonParse(item.getString("value"), data); + indList.add(indValue); + vData.put(item.getString("key"), indList); + }); + List dep = (List) vData.get("因变量"); + String depValue = (String) Utils.jsonParse(dep_var, data); + Double value; + if (depValue.isEmpty()) { + value = Double.NaN; + } else { + value = Double.valueOf(depValue); + } + dep.add(value); + + vData.put("因变量", dep); + dataMap.put("data", vData); + dataMap.put("significance_level", significance_level); + dataMap.put("miss_value", miss_value); + dataMap.put("confidence_interval", confidence_interval); + Config.dataMap.put(businessKey, dataMap); + } + + + //如果是结束标识,这组数据可以进行计算 +// if (data.containsKey("isLast") ) { +// log.info("数据组ID --> " + businessKey + ",结束调用,开始关联处理"); +// varAnaService.singleFactor(businessKey, jsonObject); +// } + + } + + @Override + public void relativityAna(JSONObject jsonObject) { + JSONObject data = jsonObject.getJSONObject("data"); + String businessKey = data.getString("businessKey"); + //输入 + JSONObject input = jsonObject.getJSONObject("input"); + log.info("(相关性分析)数据组ID --> " + businessKey + ";data -->" + jsonObject); + + //数据 + JSONArray variables = input.getJSONArray("variables"); + synchronized (Config.lock) { + if (!Config.dataMap_rel.containsKey(businessKey)) { + Map dataMap = new HashMap<>(); + Map vData = new HashMap<>(); + variables.forEach(value -> { + JSONObject item = JSONObject.parseObject(JSON.toJSONString(value)); + vData.put(item.getString("key"), new ArrayList<>()); + + }); + dataMap.put("data", vData); + Config.dataMap_rel.put(businessKey, dataMap); + } + + + Map dataMap = (Map) Config.dataMap_rel.get(businessKey); + Map vData = (Map) dataMap.get("data"); + variables.forEach(value -> { + JSONObject item = JSONObject.parseObject(JSON.toJSONString(value)); + List vList = (List) vData.get(item.getString("key")); + String itemvalue = item.getString("value"); + String vValue = (String) Utils.jsonParse(itemvalue, data); + Double dvalue; + if (vValue.isEmpty()) { + dvalue = Double.NaN; + } else { + dvalue = Double.valueOf(vValue); + } + vList.add(dvalue); + vData.put(item.getString("key"), vList); + }); + + dataMap.put("data", vData); + dataMap.put("ana_way", input.getString("ana_way")); + dataMap.put("ace_level", input.getDouble("ace_level")); + dataMap.put("miss_value", input.getString("miss_value")); + + Config.dataMap_rel.put(businessKey, dataMap); + } + //如果是结束标识,这组数据可以进行计算 +// if (data.containsKey("isLast")) { +// log.info("数据组ID --> " + businessKey + ",结束调用,开始计算处理"); +// varAnaService.correlationAna(businessKey,jsonObject); +// } + + + } + + @Override + public void mainAna(JSONObject jsonObject) { + JSONObject data = jsonObject.getJSONObject("data"); + String businessKey = data.getString("businessKey"); + //输入 + JSONObject input = jsonObject.getJSONObject("input"); + log.info("(主成分分析)数据组ID --> " + businessKey + ";data -->" + jsonObject); + //数据 + JSONArray variables = input.getJSONArray("variables"); + synchronized (Config.lock) { + if (!Config.dataMap_main.containsKey(businessKey)) { + Map dataMap = new HashMap<>(); + Map vData = new HashMap<>(); + variables.forEach(value -> { + JSONObject item = JSONObject.parseObject(JSON.toJSONString(value)); + vData.put(item.getString("key"), new ArrayList<>()); + + }); + dataMap.put("data", vData); + Config.dataMap_main.put(businessKey, dataMap); + } + + Map dataMap = (Map) Config.dataMap_main.get(businessKey); + Map vData = (Map) dataMap.get("data"); + variables.forEach(value -> { + JSONObject item = JSONObject.parseObject(JSON.toJSONString(value)); + List vList = (List) vData.get(item.getString("key")); + String itemvalue = item.getString("value"); + String vValue = (String) Utils.jsonParse(itemvalue, data); + Double dvalue; + if (vValue.isEmpty()) { + dvalue = Double.NaN; + } else { + dvalue = Double.valueOf(vValue); + } + vList.add(dvalue); + vData.put(item.getString("key"), vList); + }); + dataMap.put("data", vData); + dataMap.put("draw_method", input.getString("draw_method")); + dataMap.put("miss_value", input.getString("miss_value")); + dataMap.put("factor_num", input.getIntValue("factor_num")); + int cumulative_variance = input.getIntValue("cumulative_variance"); +// Double cum = Double.valueOf(cumulative_variance); + dataMap.put("cumulative_variance", cumulative_variance); + + Config.dataMap_main.put(businessKey, dataMap); + } + //如果是结束标识,这组数据可以进行计算 +// if (data.containsKey("isLast")) { +// log.info("数据组ID --> " + businessKey + ",结束调用,开始计算处理"); +// varAnaService.mainAna(businessKey,jsonObject); +// } + } +} diff --git a/src/main/java/com/bfd/data_statistical/service/impl/VarAnaServiceImpl.java b/src/main/java/com/bfd/data_statistical/service/impl/VarAnaServiceImpl.java new file mode 100644 index 0000000..8552890 --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/service/impl/VarAnaServiceImpl.java @@ -0,0 +1,171 @@ +package com.bfd.data_statistical.service.impl; + +import cn.hutool.core.util.IdUtil; +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONObject; +import com.bfd.data_statistical.service.VarAnaService; +import com.bfd.data_statistical.util.Config; +import com.bfd.data_statistical.util.KfkUtil; +import com.bfd.data_statistical.util.Utils; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Service; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URLEncoder; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * @author guowei + */ +@Service +@Slf4j +public class VarAnaServiceImpl implements VarAnaService { + + @Override + public void singleFactor(String businessKey, JSONObject jsonObject) { + Map resultMap = new HashMap<>(32); + Map results = new HashMap<>(); + try { + //data + Map dataMap = (Map) Config.dataMap.get(businessKey); + Map callMap = new HashMap<>(); + callMap.put("data", dataMap.get("data")); + callMap.put("alpha", dataMap.get("significance_level")); + callMap.put("miss_value", dataMap.get("miss_value")); + + String confidence_interval = (String) dataMap.get("confidence_interval"); + String percentageStr = confidence_interval.replace("%", ""); + double percentage = Double.parseDouble(percentageStr); // 解析为double类型数字 + double decimal = percentage / 100.0; // 转换为十进制小数 + callMap.put("confidence", decimal); + + log.info("调用方差分析应用: request->" + JSON.toJSONString(callMap)); + String html = Utils.callApiVar(JSON.toJSONString(callMap)); + log.info("调用方差分析应用: result->" + html); + +// result.getJSONObject("ANOVA结果") + String assembleHtml = Utils.assembleHtml_var(html.replace("NaN", "\"NaN\""), confidence_interval); + JSONObject output = jsonObject.getJSONObject("output"); + if (output.containsKey("id")) { + resultMap.put("id", IdUtil.randomUUID()); + } + resultMap.put("content", assembleHtml); + results.put("status",1); + results.put("message","成功"); + } catch (Exception e) { + log.error("方差分析应用调用失败", e); + resultMap.put("content", "方差分析应用调用失败"); + results.put("status",2); + results.put("message","失败"); + } + resultMap.put("isLast", 1); + results.put("results", JSON.toJSONString(resultMap)); + + jsonObject.put("result", results); + KfkUtil.sendKafka(JSON.toJSONString(jsonObject)); + log.info("处理完成,result:" + JSON.toJSONString(results)); + Config.dataMap.remove(businessKey); + +// Utils.callScript(JSON.toJSONString(callMap),significance_level,miss_value,confidence_interval); + + } + + @Override + public void doubleFactor(JSONObject jsonObject) { + + } + + @Override + public void multipleFactor(JSONObject jsonObject) { + + } + + @Override + public void correlationAna(String businessKey, JSONObject jsonObject) { + Map results = new HashMap<>(); + Map resultMap = new HashMap<>(32); + try { + Map dataMap_rel = (Map) Config.dataMap_rel.get(businessKey); + log.info("调用相关性分析应用: request->" + JSON.toJSONString(dataMap_rel)); + String html = Utils.callApiRel(JSON.toJSONString(dataMap_rel)); + log.info("调用相关性分析应用: result->" + html); + + String assembleHtml = Utils.assembleHtml_rel(html.replace("NaN", "\"NaN\""), (Double) dataMap_rel.get("ace_level")); + JSONObject output = jsonObject.getJSONObject("output"); + if (output.containsKey("id")) { + resultMap.put("id", IdUtil.randomUUID()); + } + resultMap.put("content", assembleHtml); + results.put("status",1); + results.put("message","成功"); + } catch (Exception e) { + log.error("相关性分析应用调用失败", e); + resultMap.put("content", "相关性分析应用调用失败"); + results.put("status",2); + results.put("message","失败"); + } + resultMap.put("isLast", 1); + results.put("results", JSON.toJSONString(resultMap)); + jsonObject.put("result", results); + KfkUtil.sendKafka(JSON.toJSONString(jsonObject)); + log.info("处理完成,result:" + JSON.toJSONString(results)); + Config.dataMap_rel.remove(businessKey); + } + + @Override + public void mainAna(String businessKey, JSONObject jsonObject) { + Map results = new HashMap<>(); + Map resultMap = new HashMap<>(32); + try { + Map dataMap_rel = (Map) Config.dataMap_main.get(businessKey); + log.info("调用主成分分析应用: request->" + JSON.toJSONString(dataMap_rel)); + String html = Utils.callApiMain(JSON.toJSONString(dataMap_rel)); + log.info("调用主成分分析应用: result->" + html); + + String assembleHtml = Utils.assembleHtml_main(html.replace("NaN", "\"NaN\"")); + JSONObject output = jsonObject.getJSONObject("output"); + if (output.containsKey("id")) { + resultMap.put("id", IdUtil.randomUUID()); + } + resultMap.put("content", assembleHtml); + results.put("status",1); + results.put("message","成功"); + + } catch (Exception e) { + log.error("主成分分析应用调用失败", e); + resultMap.put("content", "主成分分析应用调用失败"); + results.put("status",2); + results.put("message","失败"); + } + resultMap.put("isLast", 1); + results.put("results", JSON.toJSONString(resultMap)); + jsonObject.put("result", results); + KfkUtil.sendKafka(JSON.toJSONString(jsonObject)); + log.info("处理完成,result:" + JSON.toJSONString(results)); + Config.dataMap_main.remove(businessKey); + } + + public static void main(String[] args) { + try { + Runtime rt = Runtime.getRuntime(); + Process pr = rt.exec("python D:\\PycharmProjects\\projects\\应用\\demo1.py"); + + BufferedReader input = new BufferedReader(new InputStreamReader(pr.getInputStream())); + + String line = null; + while ((line = input.readLine()) != null) { + System.out.println(line); + } + + int exitVal = pr.waitFor(); + System.out.println("Exited with error code " + exitVal); + } catch (IOException | InterruptedException e) { + e.printStackTrace(); + } + } +} diff --git a/src/main/java/com/bfd/data_statistical/util/Config.java b/src/main/java/com/bfd/data_statistical/util/Config.java new file mode 100644 index 0000000..a34b7c2 --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/util/Config.java @@ -0,0 +1,59 @@ +package com.bfd.data_statistical.util; + +import org.springframework.beans.BeansException; +import org.springframework.context.ApplicationContext; +import org.springframework.context.ApplicationContextAware; +import org.springframework.stereotype.Component; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.LinkedBlockingQueue; + +/** + * @author guowei + */ +@Component +public class Config implements ApplicationContextAware { + public static final Object lock = new Object(); + + /** + * 方差分析 + */ + public static Map dataMap = new HashMap<>(); + /** + * 相关性分析 + */ + public static Map dataMap_rel = new HashMap<>(); + + /** + * 主成分分析 + */ + public static Map dataMap_main = new HashMap<>(); + + public static Map dataMap_time = new HashMap<>(); + public static LinkedBlockingQueue taskQueue = new LinkedBlockingQueue(1000); + public static LinkedBlockingQueue taskQueue_rel = new LinkedBlockingQueue(1000); + public static LinkedBlockingQueue taskQueue_main = new LinkedBlockingQueue(1000); + public static Boolean loop = false; + + private static ApplicationContext app; + + @Override + public void setApplicationContext(ApplicationContext applicationContext) throws BeansException { + app = applicationContext; + } + + public static String getMainUrl(){ + return app.getEnvironment().getProperty("api.main"); + } + + public static String getRelUrl(){ + return app.getEnvironment().getProperty("api.rel"); + } + + public static String getVarUrl(){ + return app.getEnvironment().getProperty("api.var"); + } + + +} diff --git a/src/main/java/com/bfd/data_statistical/util/KfkUtil.java b/src/main/java/com/bfd/data_statistical/util/KfkUtil.java new file mode 100644 index 0000000..f429866 --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/util/KfkUtil.java @@ -0,0 +1,81 @@ +package com.bfd.data_statistical.util; + +import lombok.extern.slf4j.Slf4j; +import org.apache.kafka.clients.producer.KafkaProducer; +import org.apache.kafka.clients.producer.ProducerRecord; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; + +import java.util.Properties; + +/** + * @author guowei + * kfk工具类 + */ +@Component +@Slf4j +public class KfkUtil { + private static String topic; + + private static String brokerList; + + @Value("${crawl.kafka.topic}") + public void setTopic(String topic) { + KfkUtil.topic = topic; + } + + @Value("${crawl.kafka.brokers}") + public void setBrokerList(String brokerList) { + KfkUtil.brokerList = brokerList; + } + private static KafkaProducer kafkaProducer; + + public static int num = 0; + + /** + * 获取KafkaProducer实例 + */ + public static KafkaProducer getProducer() { + if (kafkaProducer == null) { + Properties props = new Properties(); + //xxx服务器ip + props.put("bootstrap.servers", brokerList); + //所有follower都响应了才认为消息提交成功,即"committed" + props.put("acks", "all"); + //retries = MAX 无限重试,直到你意识到出现了问题:) + props.put("retries", 3); + //producer将试图批处理消息记录,以减少请求次数.默认的批量处理消息字节数 + props.put("batch.size", 16384); + //batch.size当批量的数据大小达到设定值后,就会立即发送,不顾下面的linger.ms + //延迟1ms发送,这项设置将通过增加小的延迟来完成--即,不是立即发送一条记录,producer将会等待给定的延迟时间以允许其他消息记录发送,这些消息记录可以批量处理 + props.put("linger.ms", 1); + //producer可以用来缓存数据的内存大小。 + props.put("buffer.memory", 33554432); + props.put("key.serializer", + "org.apache.kafka.common.serialization.StringSerializer"); + props.put("value.serializer", + "org.apache.kafka.common.serialization.StringSerializer"); + kafkaProducer = new KafkaProducer(props); + } + return kafkaProducer; + } + + /** + * 关闭KafkaProducer实例 + */ + public static void closeProducer() { + if (kafkaProducer != null) { + log.info("----------close producer----------"); + kafkaProducer.close(); + kafkaProducer = null; + } + } + + public static void sendKafka(String resultData) { + KafkaProducer producer = getProducer(); + ProducerRecord se = new ProducerRecord(topic, resultData); + producer.send(se); + log.info("发送kafka成功"); +// num++; + } +} diff --git a/src/main/java/com/bfd/data_statistical/util/Utils.java b/src/main/java/com/bfd/data_statistical/util/Utils.java new file mode 100644 index 0000000..e696f86 --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/util/Utils.java @@ -0,0 +1,485 @@ +package com.bfd.data_statistical.util; + +import com.alibaba.fastjson2.JSON; +import com.alibaba.fastjson2.JSONArray; +import com.alibaba.fastjson2.JSONObject; +import com.alibaba.fastjson2.JSONPath; +import okhttp3.*; +import org.springframework.stereotype.Component; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.*; + +/** + * @author guowei + */ +@Component +public class Utils { + + public static Object jsonParse(String key, Map data) { + String[] keySplit = key.split(":"); + String jsonPath = keySplit[1]; + String dataJson = (String) data.get(keySplit[0]); + JSONObject dataJsonObject = JSON.parseObject(dataJson); + Object dataValue = JSONPath.eval(dataJsonObject, jsonPath); + return dataValue; + } + + public static String callApiVar(String data) throws IOException { + OkHttpClient client = new OkHttpClient().newBuilder() + .build(); + MediaType mediaType = MediaType.parse("application/json"); + RequestBody body = RequestBody.create(mediaType, data); +// RequestBody body = RequestBody.create(mediaType, "{\"data\": { \"因变量\": [ 200,210,190,220,230,225,210,215,205],\"广告类型\": [\"A\", \"A\", \"A\", \"B\", \"B\", \"B\", \"C\", \"C\",\"C\"]},\"alpha\": \"0.01\",\"confidence\": \"0.05\"}"); + Request request = new Request.Builder() + .url(Config.getVarUrl()) + .method("POST", body) + .addHeader("User-Agent", "Apifox/1.0.0 (https://apifox.com)") + .addHeader("Content-Type", "application/json") + .build(); + Response response = client.newCall(request).execute(); + return response.body().string(); + } + + public static String callApiRel(String data) throws IOException { + OkHttpClient client = new OkHttpClient().newBuilder() + .build(); + MediaType mediaType = MediaType.parse("application/json"); + RequestBody body = RequestBody.create(mediaType, data); +// RequestBody body = RequestBody.create(mediaType, "{\"data\":{\"GDP增长率\":[3.5,3.0,2.9,3.2,2.8,-5.0,4.0],\"失业率\":[5.0,4.9,5.2,4.5,4.2,7.8,6.1],\"年份\":[2015,2016,2017,2018,2019,2020,2021],\"通货膨胀率\":[1.5,1.7,2.0,2.1,2.5,1.2,2.9]},\"method\":\"pearson\"}"); + Request request = new Request.Builder() + .url(Config.getRelUrl()) + .method("POST", body) + .addHeader("User-Agent", "Apifox/1.0.0 (https://apifox.com)") + .addHeader("Content-Type", "application/json") + .build(); + Response response = client.newCall(request).execute(); + return response.body().string(); + } + + public static String callApiMain(String data) throws IOException { + OkHttpClient client = new OkHttpClient().newBuilder() + .build(); + MediaType mediaType = MediaType.parse("application/json"); + RequestBody body = RequestBody.create(mediaType, data); +// RequestBody body = RequestBody.create(mediaType, "{\"data\":{\"利润率\":[10.2,10.5,10.8,11.1,11.4],\"员工满意度指数\":[82,83,85,86,87],\"客户满意度指数\":[85,88,87,90,91],\"市场份额增长\":[5.1,5.3,5.6,5.9,6.2],\"营业收入增长率\":[5.5,5.7,5.4,5.9,6.2]}}"); + Request request = new Request.Builder() + .url(Config.getMainUrl()) + .method("POST", body) + .addHeader("User-Agent", "Apifox/1.0.0 (https://apifox.com)") + .addHeader("Content-Type", "application/json") + .build(); + Response response = client.newCall(request).execute(); + return response.body().string(); + } + + public static String assembleHtml(String json) { + JSONObject jsonObject = JSONObject.parseObject(json); + StringBuilder htmlTable = new StringBuilder(); + htmlTable.append(""); + + // Iterate over JSON fields and create table rows + for (String key : jsonObject.keySet()) { + Object value = jsonObject.get(key); + htmlTable.append(""); + htmlTable.append(""); + if (value instanceof JSONObject) { + htmlTable.append(""); + } else { + htmlTable.append(""); + } + htmlTable.append(""); + } + + htmlTable.append("
").append(key).append("").append(convertJsonToHtmlTable((JSONObject) value)).append("").append(value).append("
"); + + return String.valueOf(htmlTable); + } + + private static String convertJsonToHtmlTable(JSONObject jsonObject) { + StringBuilder htmlTable = new StringBuilder(); + htmlTable.append(""); + + // Iterate over JSON fields and create table rows + for (String key : jsonObject.keySet()) { + Object value = jsonObject.get(key); + htmlTable.append(""); + htmlTable.append(""); + if (value instanceof JSONObject) { + htmlTable.append(""); + } else { + htmlTable.append(""); + } + htmlTable.append(""); + } + + htmlTable.append("
").append(key).append("").append(convertJsonToHtmlTable((JSONObject) value)).append("").append(value).append("
"); + return htmlTable.toString(); + } + + public static String assembleHtml_var(String data, String confidence_interval) { +// String jsonData = "{\"ANOVA结果\":{\"F\":{\"C(广告类型)\":7.784745762711876,\"Residual\":\"NaN\"},\"PR(>F)\":{\"C(广告类型)\":0.02152454745613121,\"Residual\":\"NaN\"},\"df\":{\"C(广告类型)\":2.0,\"Residual\":6.0},\"sum_sq\":{\"C(广告类型)\":520.7482993197281,\"Residual\":200.68027210884333}},\"描述性统计\":{\"广告类型\":{\"均值\":{\"A\":208.09523809523807,\"B\":225.0,\"C\":209.76190476190473},\"标准差\":{\"A\":7.3308591992098275,\"B\":5.0,\"C\":4.647433641891219}}},\"相关统计量\":{\"F值\":7.784745762711876,\"p值\":0.02152454745613121,\"总平方和 (SST)\":721.4285714285713},\"置信区间\":{\"0\":{\"C(广告类型)[T.B]\":5.350308374503625,\"C(广告类型)[T.C]\":-9.887786863591625,\"Intercept\":199.9250056510876},\"1\":{\"C(广告类型)[T.B]\":28.45921543502017,\"C(广告类型)[T.C]\":13.221120196924929,\"Intercept\":216.2654705393885}}}"; +// String html = assembleHtml(jsonData); + JSONObject jsonObject = JSONObject.parseObject(data); + StringBuilder htmlTable = new StringBuilder(); +// htmlTable.append(""); + htmlTable.append("
"); + + htmlTable.append(""); + htmlTable.append(""); + JSONObject descriptiveStats = jsonObject.getJSONObject("描述性统计"); + Set desKeys = descriptiveStats.keySet(); + for (String key : desKeys) { + JSONObject item = descriptiveStats.getJSONObject(key); + JSONObject means = item.getJSONObject("均值"); + JSONObject stdDevs = item.getJSONObject("标准差"); + + htmlTable.append(""); + htmlTable.append(""); + Set itemKeys = means.keySet(); + for (String itemkey : itemKeys) { + htmlTable.append("") + .append("") + .append("") + .append("") + .append(""); + } + htmlTable.append(""); + } + htmlTable.append("
各组均值和标准差
" + key + "均值标准差
").append(itemkey).append("").append(String.format("%.2f", means.getDouble(itemkey))).append("").append(String.format("%.2f", stdDevs.getDouble(itemkey))).append("
"); + + JSONObject anovaResults = jsonObject.getJSONObject("ANOVA结果"); + htmlTable.append(""); + htmlTable.append(""); + htmlTable.append(""); + htmlTable.append(""); + // Process C(广告类型) + Set keys = anovaResults.getJSONObject("sum_sq").keySet(); + for (String key : keys) { + htmlTable.append("") + .append("") + .append("") + .append("") + .append("") + .append("") + .append(""); + } + htmlTable.append(""); + htmlTable.append("
ANOVA结果
变量sum_sqdfF值p值
").append(key).append("").append(anovaResults.getJSONObject("sum_sq").getDouble(key)).append("").append(anovaResults.getJSONObject("df").getDouble(key)).append("").append(anovaResults.getJSONObject("F").getString(key)).append("").append(anovaResults.getJSONObject("PR(>F)").getString(key)).append("
"); + + JSONObject relatedStats = jsonObject.getJSONObject("相关统计量"); + htmlTable.append(""); + htmlTable.append(""); + htmlTable.append(""); + htmlTable.append(""); + Set tongjikeys = relatedStats.keySet(); + for (String key : tongjikeys) { + htmlTable.append("") + .append("") + .append("") + .append(""); + } + htmlTable.append(""); + htmlTable.append("
相关统计量
统计量
").append(key).append("").append(relatedStats.get(key)).append("
"); + + + JSONObject ciLower = jsonObject.getJSONObject("置信区间").getJSONObject("0"); + JSONObject ciUpper = jsonObject.getJSONObject("置信区间").getJSONObject("1"); + htmlTable.append(""); + htmlTable.append(""); + htmlTable.append(""); + // Generate table headers dynamically + htmlTable.append(""); + htmlTable.append(""); + // Get all variable names (keys) + Set variableNames = ciLower.keySet(); + // Generate table rows dynamically + for (String variable : variableNames) { + htmlTable.append(""); + htmlTable.append(""); + htmlTable.append(""); + htmlTable.append(""); + htmlTable.append(""); + } + htmlTable.append("
"+confidence_interval + "置信区间
变量下限上限
").append(variable).append("").append(ciLower.getDouble(variable)).append("").append(ciUpper.getDouble(variable)).append("
"); + + +// System.out.println(htmlTable.toString()); + return htmlTable.toString(); +// System.out.println(html); + } + + public static String assembleHtml_rel(String data, Double ace_level) { + // 解析 JSON 数据 + JSONObject jsonObject = JSON.parseObject(data); + JSONObject descriptiveStatistics = jsonObject.getJSONObject("描述性统计"); + + // 获取动态列名 + Set keys = descriptiveStatistics.keySet(); + List columns = new ArrayList<>(); + for (String key : keys) { + columns.add(key); + } + + // 生成 HTML 表格 + StringBuilder htmlTable = new StringBuilder(); +// htmlTable.append(""); + htmlTable.append("
"); + htmlTable.append("") + .append("") + .append("") + .append("") + .append(""); + // 添加动态列名 + for (int i = 0; i < columns.size(); i++) { + htmlTable.append(""); + } + htmlTable.append("") + .append("") + .append(""); + // 获取所有统计量的键 + String[] stats = {"count", "mean", "std", "min", "25%", "50%", "75%", "max"}; + // 填充表格数据 + for (String stat : stats) { + htmlTable.append("") + .append(""); + for (int i = 0; i < columns.size(); i++) { + String column = (String) columns.get(i); + if (descriptiveStatistics.getJSONObject(column).containsKey(stat)) { + htmlTable.append(""); + } else { + htmlTable.append(""); // 如果没有这个统计量,则填充N/A + } + } + htmlTable.append(""); + } + htmlTable.append(""); + htmlTable.append("
描述性统计
统计量").append(columns.get(i)).append("
").append(stat).append("").append(descriptiveStatistics.getJSONObject(column).get(stat)).append("").append("N/A").append("
"); + + JSONObject correlationMatrix = jsonObject.getJSONObject("相关系数矩阵"); + + // 获取动态列名 + Set jzkeys = correlationMatrix.keySet(); + List jzcolumns = new ArrayList<>(); + for (String key : jzkeys) { + jzcolumns.add(key); + } + htmlTable.append("") + .append("") + .append("") + .append("") + .append(""); + // 添加动态列名 + for (int i = 0; i < jzcolumns.size(); i++) { + htmlTable.append(""); + } + htmlTable.append("") + .append("") + .append(""); + + // 填充表格数据 + for (int i = 0; i < jzcolumns.size(); i++) { + String rowName = (String) jzcolumns.get(i); + htmlTable.append("") + .append(""); + for (int j = 0; j < jzcolumns.size(); j++) { + String colName = (String) jzcolumns.get(j); + if (correlationMatrix.getJSONObject(rowName).containsKey(colName)) { + htmlTable.append(""); + } else { + htmlTable.append(""); // 如果没有这个统计量,则填充N/A + } + } + htmlTable.append(""); + } + htmlTable.append(""); + htmlTable.append("
相关系数矩阵
").append(jzcolumns.get(i)).append("
").append(rowName).append("").append(correlationMatrix.getJSONObject(rowName).get(colName)).append("").append("N/A").append("
"); + + JSONObject pValueTable = jsonObject.getJSONObject("P值表 (显著性水平=" + ace_level + ")"); + + // 获取动态列名 + Set pkeys = pValueTable.keySet(); + List pcolumns = new ArrayList<>(); + for (String key : pkeys) { + pcolumns.add(key); + } + htmlTable.append("") + .append("") + .append("") + .append("") + .append(""); + for (int i = 0; i < pcolumns.size(); i++) { + htmlTable.append(""); + } + htmlTable.append("") + .append("") + .append(""); + // 填充表格数据 + for (int i = 0; i < pcolumns.size(); i++) { + String rowName = (String) pcolumns.get(i); + htmlTable.append("") + .append(""); + for (int j = 0; j < columns.size(); j++) { + String colName = (String) pcolumns.get(j); + if (pValueTable.getJSONObject(rowName).containsKey(colName)) { + htmlTable.append(""); + } else { + htmlTable.append(""); // 如果没有这个统计量,则填充N/A + } + } + htmlTable.append(""); + } + htmlTable.append(""); + htmlTable.append("
P值表 (显著性水平=" + ace_level + ")
").append(pcolumns.get(i)).append("
").append(rowName).append("").append(pValueTable.getJSONObject(rowName).get(colName)).append("").append("N/A").append("
"); + htmlTable.append("
"); + + + // 打印 HTML 表格 +// System.out.println(htmlTable.toString()); + return htmlTable.toString(); + } + + public static String assembleHtml_main(String data) { + // 解析 JSON 数据 + JSONObject jsonObject = JSON.parseObject(data); + JSONObject descriptiveStatistics = jsonObject.getJSONObject("主成分得分"); + // 获取动态列名 + Set keys = descriptiveStatistics.keySet(); + List columns = new ArrayList<>(); + for (String key : keys) { + columns.add(key); + } + // 生成 HTML 表格 + StringBuilder htmlTable = new StringBuilder(); +// htmlTable.append(""); + htmlTable.append("
"); + + htmlTable.append("") + .append("") + .append("") + .append("") + .append(""); + // 添加动态列名 + for (int i = 0; i < columns.size(); i++) { + htmlTable.append(""); + } + htmlTable.append("") + .append(""); + htmlTable.append(""); + Set stats = descriptiveStatistics.getJSONObject((String) columns.get(0)).keySet(); + // 获取所有统计量的键 +// String[] stats = {"0", "1", "2", "3", "4"}; + // 填充表格数据 + for (String stat : stats) { + htmlTable.append("") + .append(""); + for (int i = 0; i < columns.size(); i++) { + String column = (String) columns.get(i); + if (descriptiveStatistics.getJSONObject(column).containsKey(stat)) { + htmlTable.append(""); + } else { + htmlTable.append(""); // 如果没有这个统计量,则填充N/A + } + } + htmlTable.append(""); + } + htmlTable.append(""); + htmlTable.append("
主成分得分
").append(columns.get(i)).append("
").append(stat).append("").append(descriptiveStatistics.getJSONObject(column).get(stat)).append("").append("N/A").append("
"); + + JSONObject correlationMatrix = jsonObject.getJSONObject("主成分载荷"); + + // 获取动态列名 + Set jzkeys = correlationMatrix.keySet(); + List jzcolumns = new ArrayList<>(); + for (String key : jzkeys) { + jzcolumns.add(key); + } + htmlTable.append("") + .append("") + .append("") + .append("") + .append(""); + // 添加动态列名 + for (int i = 0; i < jzcolumns.size(); i++) { + htmlTable.append(""); + } + htmlTable.append("") + .append("") + .append(""); + // 填充表格数据 + // 获取所有统计量的键 + String[] stats_zcf = {"PC1", "PC2", "PC3"}; + // 填充表格数据 + for (String stat : stats_zcf) { + htmlTable.append("") + .append(""); + for (int i = 0; i < jzcolumns.size(); i++) { + String column = (String) jzcolumns.get(i); + if (correlationMatrix.getJSONObject(column).containsKey(stat)) { + htmlTable.append(""); + } else { + htmlTable.append(""); // 如果没有这个统计量,则填充N/A + } + } + htmlTable.append(""); + } + htmlTable.append("").append("
主成分载荷
").append(jzcolumns.get(i)).append("
").append(stat).append("").append(correlationMatrix.getJSONObject(column).get(stat)).append("").append("N/A").append("
"); + + JSONArray pValueTable = jsonObject.getJSONArray("累积方差解释"); + + // 获取动态列名 + htmlTable.append("") + .append("") + .append("") + .append("") + .append("") + .append("") + .append("") + .append("") + .append(""); + + int pNum = pValueTable.size(); + List pcolumns = new ArrayList<>(); + for (int i = 1; i <= pNum; i++) { + + htmlTable.append("") + .append("") + .append("") + .append(""); + } + + htmlTable.append("").append("
累积方差解释
主成分解释的方差比例
").append("PC" + i).append("").append(pValueTable.get(i - 1)).append("
"); + htmlTable.append("
"); + + + // 打印 HTML 表格 +// System.out.println(htmlTable.toString()); + return htmlTable.toString(); + } + + public static void main(String[] args) { +// String data = "{\"P值表 (显著性水平=0.01)\":{\"GDP增长率\":{\"GDP增长率\":1.0,\"失业率\":0.025969486575263344,\"年份\":0.42924735072718156,\"通货膨胀率\":0.133583580599569},\"失业率\":{\"GDP增长率\":0.025969486575263344,\"失业率\":1.0,\"年份\":0.240403199576762,\"通货膨胀率\":0.39905124966692257},\"年份\":{\"GDP增长率\":0.42924735072718156,\"失业率\":0.240403199576762,\"年份\":1.0,\"通货膨胀率\":0.2659542971106307},\"通货膨胀率\":{\"GDP增长率\":0.133583580599569,\"失业率\":0.39905124966692257,\"年份\":0.2659542971106307,\"通货膨胀率\":1.0}},\"描述性统计\":{\"GDP增长率\":{\"25%\":2.8499999999999996,\"50%\":3.0,\"75%\":3.35,\"count\":7.0,\"max\":4.0,\"mean\":2.0571428571428574,\"min\":-5.0,\"std\":3.138926112997428},\"失业率\":{\"25%\":4.7,\"50%\":5.0,\"75%\":5.65,\"count\":7.0,\"max\":7.8,\"mean\":5.385714285714286,\"min\":4.2,\"std\":1.2212405870378578},\"年份\":{\"25%\":2016.5,\"50%\":2018.0,\"75%\":2019.5,\"count\":7.0,\"max\":2021.0,\"mean\":2018.0,\"min\":2015.0,\"std\":2.160246899469287},\"通货膨胀率\":{\"25%\":1.6,\"50%\":2.0,\"75%\":2.3,\"count\":7.0,\"max\":2.9,\"mean\":1.9857142857142858,\"min\":1.2,\"std\":0.5843188953205017}},\"相关系数矩阵\":{\"GDP增长率\":{\"GDP增长率\":1.0,\"失业率\":-0.8136538484920446,\"年份\":-0.35885344674380804,\"通货膨胀率\":0.6247921262264898},\"失业率\":{\"GDP增长率\":-0.8136538484920446,\"失业率\":1.0,\"年份\":0.5117161794157596,\"通货膨胀率\":-0.3810356879053683},\"年份\":{\"GDP增长率\":-0.35885344674380804,\"失业率\":0.5117161794157596,\"年份\":1.0,\"通货膨胀率\":0.48853665304335764},\"通货膨胀率\":{\"GDP增长率\":0.6247921262264898,\"失业率\":-0.3810356879053683,\"年份\":0.48853665304335764,\"通货膨胀率\":1.0}}}"; +// String s = assembleHtml_rel(data, 0.01); + String data = "{\"主成分得分\":{\"PC1\":{\"0\":-0.5284343087964162,\"1\":-2.315443264439099,\"2\":2.843877573235515},\"PC2\":{\"0\":0.9066548428545789,\"1\":-0.5926211987309484,\"2\":-0.3140336441236311},\"PC3\":{\"0\":1.0467283057891841e-16,\"1\":1.0467283057891829e-16,\"2\":1.0467283057891834e-16}},\"主成分载荷\":{\"利润率\":{\"PC1\":0.4668537355103065,\"PC2\":-0.0788849066439597,\"PC3\":-0.32761502595211395},\"员工满意指数\":{\"PC1\":0.4674212894188098,\"PC2\":-0.022345654557889807,\"PC3\":0.7943786052557248},\"客户满意指数\":{\"PC1\":0.4013979730000272,\"PC2\":-0.7871878091015423,\"PC3\":-0.08757932327782991},\"市场份额增长\":{\"PC1\":0.4365584476761835,\"PC2\":0.5492208058930436,\"PC3\":0.10090990545647134},\"营业收入增长率\":{\"PC1\":0.4602839524382316,\"PC2\":0.2682716958649626,\"PC3\":-0.4937388777454417}},\"累积方差解释\":[0.9152106654067643,0.08478933459323566,2.1912802922805874e-33]}"; + + String s = assembleHtml_main(data); +// String data = "{\"ANOVA结果\":{\"F\":{\"C(广告类型)\":7.784745762711876,\"Residual\":\"NaN\"},\"PR(>F)\":{\"C(广告类型)\":0.02152454745613121,\"Residual\":\"NaN\"},\"df\":{\"C(广告类型)\":2.0,\"Residual\":6.0},\"sum_sq\":{\"C(广告类型)\":520.7482993197281,\"Residual\":200.68027210884333}},\"描述性统计\":{\"广告类型\":{\"均值\":{\"A\":208.09523809523807,\"B\":225.0,\"C\":209.76190476190473},\"标准差\":{\"A\":7.3308591992098275,\"B\":5.0,\"C\":4.647433641891219}}},\"相关统计量\":{\"F值\":7.784745762711876,\"p值\":0.02152454745613121,\"总平方和 (SST)\":721.4285714285713},\"置信区间\":{\"0\":{\"C(广告类型)[T.B]\":5.350308385615985,\"C(广告类型)[T.C]\":-9.887786852479266,\"Intercept\":199.9250056589452},\"1\":{\"C(广告类型)[T.B]\":28.45921542390781,\"C(广告类型)[T.C]\":13.22112018581257,\"Intercept\":216.2654705315309}}}"; +// String s = assembleHtml_var(data,"95%"); + System.out.println(s); + } +} diff --git a/src/main/java/com/bfd/data_statistical/util/varPojo.java b/src/main/java/com/bfd/data_statistical/util/varPojo.java new file mode 100644 index 0000000..61107ee --- /dev/null +++ b/src/main/java/com/bfd/data_statistical/util/varPojo.java @@ -0,0 +1,22 @@ +package com.bfd.data_statistical.util; + +import lombok.Data; + +import java.util.Map; + +/** + * @author guowei + */ +@Data +public class varPojo { + private Map ANOVA结果; + private Map PR; + private Map df; + private Map sum_sq; + private double F值; + private double p值; + private double MSE; + private double SST; + private Map>> 描述性统计; + private Map> 置信区间; +} diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml new file mode 100644 index 0000000..30a69b9 --- /dev/null +++ b/src/main/resources/application.yml @@ -0,0 +1,38 @@ +server: + port: 9900 +crawl: + kafka: + topic: produce_analyze + brokers: 172.18.1.146:9092,172.18.1.147:9092,172.18.1.148:9092 +api: + main: http://172.18.1.147:9903/mainAna/ + rel: http://172.18.1.147:9902/relAna/ + var: http://172.18.1.147:9901/varAna/ +#日志级别 +logging: + level: + com: + bfd: INFO + #日志路径 + log: + path: ./logs +#spring: +# boot: +# admin: +# client: +# url: http://172.18.1.147:8001 +# instance: +# service-base-url: http://172.18.1.147:9999 +# application: +# name: 分析 +#management: +# endpoints: +# web: +# exposure: +# include: "*" +# endpoint: +# health: +# show-details: always +# health: +# elasticsearch: +# enabled: false \ No newline at end of file diff --git a/src/main/resources/logback-spring.xml b/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..0c59240 --- /dev/null +++ b/src/main/resources/logback-spring.xml @@ -0,0 +1,38 @@ + + + + + + + + + true + + ${logging.level} + + + ${logging.path}/crawlSchedule.log + + + + ${logging.path}/crawlSchedule.log.%d{yyyy-MM-dd} + + 7 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n + UTF-8 + + + + + + + + diff --git a/src/main/resources/python/mainAna.py b/src/main/resources/python/mainAna.py new file mode 100644 index 0000000..e669d30 --- /dev/null +++ b/src/main/resources/python/mainAna.py @@ -0,0 +1,104 @@ +import pandas as pd +from sklearn.decomposition import PCA +from sklearn.preprocessing import StandardScaler +from flask import Flask, request, jsonify +app = Flask(__name__) + +def run(data,n_components,miss_value): + + # 将JSON数据转换为DataFrame + df = pd.DataFrame(data) + if miss_value == '用均值替代': + # 处理缺失值:用均值替代 + df.fillna(df.mean(), inplace=True) + elif miss_value == '用中位数替代': + # 处理缺失值 - 用中位数替代 + df.fillna(df.median(), inplace=True) + elif miss_value == '忽略缺失值': + # 处理缺失值 - 忽略缺失值 + df.dropna(inplace=True) + # 数据标准化 + scaler = StandardScaler() + scaled_df = scaler.fit_transform(df) + + # 执行PCA + decimal = n_components / 100 + print(decimal) + pca = PCA(n_components= decimal) # 累积方差解释率至少95% + principal_components = pca.fit_transform(scaled_df) + explained_variance = pca.explained_variance_ratio_ + + # 创建成分得分DataFrame + score_labels = [f"PC{i + 1}" for i in range(len(principal_components[0]))] + scores = pd.DataFrame(principal_components, columns=score_labels) + + # 创建载荷DataFrame + loadings = pd.DataFrame(pca.components_, columns=df.columns, index=score_labels) + + # 构建返回的结果字典 + result = { + "主成分得分": scores.to_dict(), + "主成分载荷": loadings.to_dict(), + "累积方差解释": explained_variance.tolist() + } + + # 将结果返回为JSON格式 + return jsonify(result) + + +def factor(data, n_components, miss_value): + df = pd.DataFrame(data) + + if miss_value == '用均值替代': + # 处理缺失值:用均值替代 + df.fillna(df.mean(), inplace=True) + elif miss_value == '用中位数替代': + # 处理缺失值 - 用中位数替代 + df.fillna(df.median(), inplace=True) + elif miss_value == '忽略缺失值': + # 处理缺失值 - 忽略缺失值 + df.dropna(inplace=True) + # 数据标准化 + scaler = StandardScaler() + scaled_df = scaler.fit_transform(df) + # 执行PCA,提取3个主成分 + pca = PCA(n_components=n_components) + principal_components = pca.fit_transform(scaled_df) + explained_variance = pca.explained_variance_ratio_ + # 创建成分得分DataFrame + score_labels = [f"PC{i + 1}" for i in range(len(principal_components[0]))] + scores = pd.DataFrame(principal_components, columns=score_labels) + # 创建载荷DataFrame + loadings = pd.DataFrame(pca.components_, columns=df.columns, index=score_labels) + # 打印结果 +# print("主成分得分:\n", scores) +# print("主成分载荷:\n", loadings) +# print("累积方差解释:\n", explained_variance) + # 构建返回的结果字典 + result = { + "主成分得分": scores.round(6).to_dict(), + "主成分载荷": loadings.to_dict(), + "累积方差解释": explained_variance.tolist() + } + + # 将结果返回为JSON格式 + return jsonify(result) + + +@app.route("/mainAna/", methods=["POST"]) +def get_cookie(): + # 获取 POST 请求中的 JSON 数据 + global data + new_data = request.json + print(new_data) + draw_method = new_data['draw_method'] + if draw_method == '因子数': + data = factor(new_data['data'], new_data['factor_num'], new_data['miss_value']) + elif draw_method == '累积方差解释率': + data = run(new_data['data'], new_data['cumulative_variance'],new_data['miss_value']) + + return data + + +if __name__ == '__main__': + app.run(port=9903, debug=False, host='0.0.0.0') # 启动服务 diff --git a/src/main/resources/python/relAna.py b/src/main/resources/python/relAna.py new file mode 100644 index 0000000..98f0293 --- /dev/null +++ b/src/main/resources/python/relAna.py @@ -0,0 +1,92 @@ +import pandas as pd +from scipy.stats import pearsonr, spearmanr +from flask import Flask, request, jsonify + +app = Flask(__name__) + + +## 方差分析 + + +def run_Pearson(data, alpha, miss_value): + # 将JSON数据转换为DataFrame + df = pd.DataFrame(data) + if miss_value == '用均值替代': + # 处理缺失值:用均值替代 + df.fillna(df.mean(), inplace=True) + elif miss_value == '用中位数替代': + # 处理缺失值 - 用中位数替代 + df.fillna(df.median(), inplace=True) + elif miss_value == '忽略缺失值': + # 处理缺失值 - 忽略缺失值 + df.dropna(inplace=True) + # 计算描述性统计 + desc_stats = df.describe() + # 计算Pearson相关系数矩阵和p值表 + corr_matrix = df.corr(method='pearson') + p_values = df.corr(method=lambda x, y: pearsonr(x, y)[1]) # 使用lambda函数计算p值 + # 显著性水平 + alpha = alpha + + # 构建返回的结果字典 + result = { + "描述性统计": desc_stats.to_dict(), + "相关系数矩阵": corr_matrix.to_dict(), + "P值表 (显著性水平={})".format(alpha): p_values.to_dict() + } + + # 将结果返回为JSON格式 + return jsonify(result) + + +def run_Spearman(data, alpha, miss_value): + # 将JSON数据转换为DataFrame + df = pd.DataFrame(data) + if miss_value == '用均值替代': + # 处理缺失值:用均值替代 + df.fillna(df.mean(), inplace=True) + elif miss_value == '用中位数替代': + # 处理缺失值 - 用中位数替代 + df.fillna(df.median(), inplace=True) + elif miss_value == '忽略缺失值': + # 处理缺失值 - 忽略缺失值 + df.dropna(inplace=True) + # 计算描述性统计 + desc_stats = df.describe() + # 计算Spearman相关系数矩阵和p值表 + corr_matrix, p_values = spearmanr(df, nan_policy='omit') + # 将相关系数矩阵和p值表转换为数据框 + corr_df = pd.DataFrame(corr_matrix, columns=df.columns, + index=df.columns) + p_values_df = pd.DataFrame(p_values, columns=df.columns, + index=df.columns) + # 显著性水平 + # alpha = alpha + # 构建返回的结果字典 + result = { + "描述性统计": desc_stats.to_dict(), + "相关系数矩阵": corr_df.to_dict(), + "P值表 (显著性水平={})".format(alpha): p_values_df.to_dict() + } + + # 将结果返回为JSON格式 + return jsonify(result) + + +@app.route("/relAna/", methods=["POST"]) +def get_cookie(): + # 获取 POST 请求中的 JSON 数据 + global data + new_data = request.json + print(new_data) + ana_way = new_data['ana_way'] + if ana_way == 'Pearson': + data = run_Pearson(new_data['data'], new_data['ace_level'], new_data['miss_value']) + elif ana_way == 'Spearman': + data = run_Spearman(new_data['data'], new_data['ace_level'], new_data['miss_value']) + + return data + + +if __name__ == '__main__': + app.run(port=9902, debug=False, host='0.0.0.0') # 启动服务 diff --git a/src/main/resources/python/varAna.py b/src/main/resources/python/varAna.py new file mode 100644 index 0000000..db9df68 --- /dev/null +++ b/src/main/resources/python/varAna.py @@ -0,0 +1,78 @@ +from decimal import Decimal + +import pandas as pd +import statsmodels.api as sm +from statsmodels.formula.api import ols +from flask import Flask, request, jsonify + +app = Flask(__name__) + + +def run(data, alphaid, miss_value, confidence): + df = pd.DataFrame(data) + if miss_value == '用均值替代': + # 处理缺失值:用均值替代 + df['因变量'].fillna(df['因变量'].mean(), inplace=True) + elif miss_value == '用中位数替代': + # 处理缺失值 - 用中位数替代 + df['因变量'].fillna(df['因变量'].median(), inplace=True) + elif miss_value == '忽略缺失值': + # 处理缺失值 - 忽略缺失值 + df.dropna(inplace=True) + # 获取自变量的键 + independent_variables = list(df.columns.drop('因变量')) + statistics = {} + result = {} + for var in independent_variables: + grouped = df.groupby(var)['因变量'] + group_means = grouped.mean() + group_stds = grouped.std() + print(f"{var}的各组均值:\n", group_means) + print(f"{var}的各组标准差:\n", group_stds) + # 将每个变量的均值和标准差存储到字典中 + statistics[var] = {'均值': group_means.to_dict(), '标准差': group_stds.to_dict()} + # 将统计数据添加到一个最外层的字典中 + result['描述性统计'] = statistics + # 单因素方差分析 + # 构建模型公式 + formula = '因变量 ~ ' + ' + '.join(['C({})'.format(var) for var in independent_variables]) + model = ols(formula, data=df).fit() + anova_results = sm.stats.anova_lm(model, typ=2, alpha=alphaid) # 显著性水平为0.01 + anova_results_dict = anova_results.to_dict() + + # 输出ANOVA结果 + + print("ANOVA结果:\n", anova_results_dict) + result['ANOVA结果'] = anova_results_dict + # 95% 置信区间 + # conf_int = model.conf_int(0.05) + c = Decimal('1.0') - Decimal(confidence) + conf_int = model.conf_int(float(c)) + conf_int_dict = conf_int.to_dict() + print("\n95% 置信区间:\n", conf_int_dict) + result['置信区间'] = conf_int_dict + # 提取相关统计量 + f_value = anova_results['F'].iloc[0] + p_value = anova_results['PR(>F)'].iloc[0] + # mse = anova_results['sum_sq'].iloc[1] / anova_results['df'].iloc[1] + sst = sum((df['因变量'] - df['因变量'].mean()) ** 2) + result['相关统计量'] = { + "F值": f_value, + "p值": p_value, + "总平方和 (SST)": sst + } + return jsonify(result) + + +@app.route("/varAna/", methods=["POST"]) +def get_cookie(): + # 获取 POST 请求中的 JSON 数据 + new_data = request.json + print(new_data) + + data = run(new_data['data'], new_data['alpha'], new_data['miss_value'], new_data['confidence']) + return data + + +if __name__ == '__main__': + app.run(port=9901, debug=False, host='0.0.0.0') # 启动服务