Browse Source

脚本分析应用

master
55007 6 months ago
commit
badb0bbee9
  1. 40
      .classpath
  2. 1
      .gitignore
  3. 23
      .project
  4. 4
      .settings/org.eclipse.core.resources.prefs
  5. 8
      .settings/org.eclipse.jdt.core.prefs
  6. 4
      .settings/org.eclipse.m2e.core.prefs
  7. 3
      README.md
  8. 165
      pom.xml
  9. 90
      src/main/java/com/bfd/data_statistical/DataStatisticalApplication.java
  10. 115
      src/main/java/com/bfd/data_statistical/controller/ApiController.java
  11. 19
      src/main/java/com/bfd/data_statistical/service/DataProcessService.java
  12. 30
      src/main/java/com/bfd/data_statistical/service/VarAnaService.java
  13. 209
      src/main/java/com/bfd/data_statistical/service/impl/DataProcessServiceImpl.java
  14. 171
      src/main/java/com/bfd/data_statistical/service/impl/VarAnaServiceImpl.java
  15. 59
      src/main/java/com/bfd/data_statistical/util/Config.java
  16. 81
      src/main/java/com/bfd/data_statistical/util/KfkUtil.java
  17. 485
      src/main/java/com/bfd/data_statistical/util/Utils.java
  18. 22
      src/main/java/com/bfd/data_statistical/util/varPojo.java
  19. 38
      src/main/resources/application.yml
  20. 38
      src/main/resources/logback-spring.xml
  21. 104
      src/main/resources/python/mainAna.py
  22. 92
      src/main/resources/python/relAna.py
  23. 78
      src/main/resources/python/varAna.py

40
.classpath

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
<attribute name="test" value="true"/>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

1
.gitignore

@ -0,0 +1 @@
/target/

23
.project

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>data_statistical</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

4
.settings/org.eclipse.core.resources.prefs

@ -0,0 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding/<project>=UTF-8

8
.settings/org.eclipse.jdt.core.prefs

@ -0,0 +1,8 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.problem.enablePreviewFeatures=disabled
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.problem.reportPreviewFeatures=ignore
org.eclipse.jdt.core.compiler.release=disabled
org.eclipse.jdt.core.compiler.source=1.8

4
.settings/org.eclipse.m2e.core.prefs

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

3
README.md

@ -0,0 +1,3 @@
脚本分析应用
部署位置:/opt/analyze/apps/data_Statistical

165
pom.xml

@ -0,0 +1,165 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.bfd</groupId>
<artifactId>data_Statistical</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>data_Statistical</name>
<description>data_Statistical</description>
<properties>
<java.version>1.8</java.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<spring-boot.version>2.2.4.RELEASE</spring-boot.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6.1</version> <!-- 版本号根据需要选择 -->
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>2.0.12</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.8.27</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.7.1</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.11.0</version>
</dependency>
<dependency>
<groupId>org.thymeleaf</groupId>
<artifactId>thymeleaf</artifactId>
<version>3.0.12.RELEASE</version> <!-- 版本号根据你需要使用的版本来确定 -->
</dependency>
<dependency>
<groupId>de.codecentric</groupId>
<artifactId>spring-boot-admin-client</artifactId>
<version>2.2.4</version>
</dependency>
</dependencies>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>${spring-boot.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<!--不打入jar包的文件类型或者路径-->
<excludes>
<exclude>*.properties</exclude>
<exclude>*.yml</exclude>
<exclude>*.yaml</exclude>
</excludes>
<archive>
<manifest>
<!-- 执行的主程序路径 -->
<mainClass>com.bfd.data_statistical.DataStatisticalApplication</mainClass>
<!--是否要把第三方jar放到manifest的classpath中-->
<addClasspath>true</addClasspath>
<!--生成的manifest中classpath的前缀,因为要把第三方jar放到lib目录下,所以classpath的前缀是lib/-->
<classpathPrefix>lib/</classpathPrefix>
<!-- 打包时 MANIFEST.MF 文件不记录的时间戳版本 -->
<useUniqueVersions>false</useUniqueVersions>
</manifest>
<manifestEntries>
<!-- 在 Class-Path 下添加配置文件的路径 -->
<Class-Path>config/</Class-Path>
</manifestEntries>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib/</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<executions>
<execution>
<id>copy-resources</id>
<phase>package</phase>
<goals>
<goal>copy-resources</goal>
</goals>
<configuration>
<resources>
<!--把配置文件打包到指定路径-->
<resource>
<directory>src/main/resources/</directory>
<includes>
<include>*.properties</include>
<include>*.yml</include>
<exclude>*.yaml</exclude>
</includes>
</resource>
</resources>
<outputDirectory>${project.build.directory}/config</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

90
src/main/java/com/bfd/data_statistical/DataStatisticalApplication.java

@ -0,0 +1,90 @@
package com.bfd.data_statistical;
import com.alibaba.fastjson2.JSONObject;
import com.bfd.data_statistical.service.impl.VarAnaServiceImpl;
import com.bfd.data_statistical.util.Config;
import com.bfd.data_statistical.util.varPojo;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.context.ConfigurableApplicationContext;
import org.thymeleaf.TemplateEngine;
import org.thymeleaf.context.Context;
import javax.annotation.Resource;
import java.util.Date;
import java.util.Map;
@SpringBootApplication
@Slf4j
public class DataStatisticalApplication {
@Resource
VarAnaServiceImpl varAnaService;
public static void main(String[] args) {
ConfigurableApplicationContext run = SpringApplication.run(DataStatisticalApplication.class, args);
DataStatisticalApplication bean = run.getBean(DataStatisticalApplication.class);
bean.run();
}
public void run() {
new Thread(new Runnable() {
@Override
public void run() {
while (true) {
try {
if (Config.taskQueue.size() == 0) {
log.info("没有需要计算的");
Thread.sleep(1000 * 10);
} else {
Map take = Config.taskQueue.take();
Long time = (Long) take.get("time");
Long time2 = System.currentTimeMillis();
// 将时间戳转换为Date对象
Date date1 = new Date(time);
Date date2 = new Date(time2);
// 计算时间差毫秒数
long diffMillis = Math.abs(date2.getTime() - date1.getTime());
// 转换为分钟数
long diffMinutes = diffMillis / (60 * 1000);
// 检查是否大于两分钟
if (diffMinutes < 1) {
Thread.sleep(1000 * 60);
} else {
// Config.taskQueue.put(take);
}
System.out.println("时间差大于1分钟");
String type = (String) take.get("type");
log.info(type + "数据组ID --> " + take.get("businessKey") + ",结束调用,开始调用计算");
switch (type) {
case "var":
varAnaService.singleFactor((String) take.get("businessKey"), (JSONObject) take.get("data"));
break;
case "rel":
varAnaService.correlationAna((String) take.get("businessKey"), (JSONObject) take.get("data"));
break;
case "main":
varAnaService.mainAna((String) take.get("businessKey"), (JSONObject) take.get("data"));
break;
}
Config.dataMap_time.remove((String) take.get("businessKey"));
}
} catch (Exception e) {
log.error("队列失败",e);
}
}
}
}).start();
}
}

115
src/main/java/com/bfd/data_statistical/controller/ApiController.java

@ -0,0 +1,115 @@
package com.bfd.data_statistical.controller;
import com.alibaba.fastjson2.JSONObject;
import com.bfd.data_statistical.service.DataProcessService;
import com.bfd.data_statistical.util.Config;
import lombok.extern.slf4j.Slf4j;
import org.springframework.web.bind.annotation.*;
import javax.annotation.Resource;
import java.util.HashMap;
import java.util.Map;
/**
* @author guowei
*/
@RestController
@Slf4j
@RequestMapping(value = "/anaApi")
@CrossOrigin(origins = "*", maxAge = 3600)
public class ApiController {
@Resource
DataProcessService dataProcessService;
/**
* 方差分析 Api
* @param jsonObject
* @return
*/
@RequestMapping(value = "/varAna", method = RequestMethod.POST, produces = "application/json")
@ResponseBody
public String varAna(@RequestBody JSONObject jsonObject) {
log.info("方差分析参数:"+jsonObject);
JSONObject data = jsonObject.getJSONObject("data");
String businessKey = data.getString("businessKey");
synchronized (Config.lock) {
if (!Config.dataMap_time.containsKey(businessKey)) {
Config.dataMap_time.put(businessKey, jsonObject);
Map map = new HashMap<>();
try {
map.put("businessKey", businessKey);
map.put("data", jsonObject);
map.put("time", System.currentTimeMillis());
map.put("type", "var");
Config.taskQueue.put(map);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
dataProcessService.varAna(jsonObject);
return "success";
}
/**
* 相关性分析 Api
* @param jsonObject
* @return
*/
@RequestMapping(value = "/relativityAna", method = RequestMethod.POST, produces = "application/json")
@ResponseBody
public String relativityAna(@RequestBody JSONObject jsonObject) {
log.info("相关性分析参数:"+jsonObject);
JSONObject data = jsonObject.getJSONObject("data");
String businessKey = data.getString("businessKey");
synchronized (Config.lock) {
if (!Config.dataMap_time.containsKey(businessKey)) {
Config.dataMap_time.put(businessKey, jsonObject);
Map map = new HashMap<>();
try {
map.put("businessKey", businessKey);
map.put("data", jsonObject);
map.put("time", System.currentTimeMillis());
map.put("type", "rel");
Config.taskQueue.put(map);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
dataProcessService.relativityAna(jsonObject);
return "success";
}
/**
* 主成分分析 Api
* @param jsonObject
* @return
*/
@RequestMapping(value = "/mainAna", method = RequestMethod.POST, produces = "application/json")
@ResponseBody
public String mainAna(@RequestBody JSONObject jsonObject) {
log.info("主成分分析参数:"+jsonObject);
JSONObject data = jsonObject.getJSONObject("data");
String businessKey = data.getString("businessKey");
synchronized (Config.lock) {
if (!Config.dataMap_time.containsKey(businessKey)) {
Config.dataMap_time.put(businessKey, jsonObject);
Map map = new HashMap<>();
try {
map.put("businessKey", businessKey);
map.put("data", jsonObject);
map.put("time", System.currentTimeMillis());
map.put("type", "main");
Config.taskQueue.put(map);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
dataProcessService.mainAna(jsonObject);
return "success";
}
}

19
src/main/java/com/bfd/data_statistical/service/DataProcessService.java

@ -0,0 +1,19 @@
package com.bfd.data_statistical.service;
import com.alibaba.fastjson2.JSONObject;
import org.springframework.stereotype.Service;
/**
* @author guowei
*/
@Service
public interface DataProcessService {
void varAna(JSONObject businessKey);
void relativityAna(JSONObject businessKey);
void mainAna(JSONObject businessKey);
}

30
src/main/java/com/bfd/data_statistical/service/VarAnaService.java

@ -0,0 +1,30 @@
package com.bfd.data_statistical.service;
import com.alibaba.fastjson2.JSONObject;
import org.springframework.stereotype.Service;
/**
* @author guowei
*/
@Service
public interface VarAnaService {
/**
* 单因素
*/
void singleFactor(String businessKey,JSONObject jsonObject);
/**
* 双因素
*/
void doubleFactor(JSONObject jsonObject);
/**
* 多因素
*/
void multipleFactor(JSONObject jsonObject);
void correlationAna(String businessKey,JSONObject jsonObject);
void mainAna(String businessKey,JSONObject jsonObject);
}

209
src/main/java/com/bfd/data_statistical/service/impl/DataProcessServiceImpl.java

@ -0,0 +1,209 @@
package com.bfd.data_statistical.service.impl;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import com.bfd.data_statistical.service.DataProcessService;
import com.bfd.data_statistical.util.Config;
import com.bfd.data_statistical.util.Utils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @author guowei
*/
@Service
@Slf4j
public class DataProcessServiceImpl implements DataProcessService {
@Resource
VarAnaServiceImpl varAnaService;
@Override
public void varAna(JSONObject jsonObject) {
JSONObject data = jsonObject.getJSONObject("data");
String businessKey = data.getString("businessKey");
//输入
JSONObject input = jsonObject.getJSONObject("input");
log.info("数据组ID --> " + businessKey + ";data -->" + jsonObject);
JSONArray ind_var = input.getJSONArray("ind_var");
//因变量
String dep_var = input.getString("dep_var");
//显著性水平
Double significance_level = input.getDouble("significance_level");
//缺失值处理
String miss_value = input.getString("miss_value");
//置信区间
String confidence_interval = input.getString("confidence_interval");
// String percentageStr = confidence_interval.replace("%", "");
// double percentage = Double.parseDouble(percentageStr); // 解析为double类型数字
// double decimal = percentage / 100.0; // 转换为十进制小数
synchronized (Config.lock) {
if (!Config.dataMap.containsKey(businessKey)) {
Map dataMap = new HashMap<>();
Map vData = new HashMap<>();
ind_var.forEach(value -> {
JSONObject item = JSONObject.parseObject(JSON.toJSONString(value));
vData.put(item.getString("key"), new ArrayList<>());
});
vData.put("因变量", new ArrayList<>());
dataMap.put("data", vData);
Config.dataMap.put(businessKey, dataMap);
}
Map dataMap = (Map) Config.dataMap.get(businessKey);
Map vData = (Map) dataMap.get("data");
ind_var.forEach(value -> {
JSONObject item = JSONObject.parseObject(JSON.toJSONString(value));
List indList = (List) vData.get(item.getString("key"));
String indValue = (String) Utils.jsonParse(item.getString("value"), data);
indList.add(indValue);
vData.put(item.getString("key"), indList);
});
List dep = (List) vData.get("因变量");
String depValue = (String) Utils.jsonParse(dep_var, data);
Double value;
if (depValue.isEmpty()) {
value = Double.NaN;
} else {
value = Double.valueOf(depValue);
}
dep.add(value);
vData.put("因变量", dep);
dataMap.put("data", vData);
dataMap.put("significance_level", significance_level);
dataMap.put("miss_value", miss_value);
dataMap.put("confidence_interval", confidence_interval);
Config.dataMap.put(businessKey, dataMap);
}
//如果是结束标识这组数据可以进行计算
// if (data.containsKey("isLast") ) {
// log.info("数据组ID --> " + businessKey + ",结束调用,开始关联处理");
// varAnaService.singleFactor(businessKey, jsonObject);
// }
}
@Override
public void relativityAna(JSONObject jsonObject) {
JSONObject data = jsonObject.getJSONObject("data");
String businessKey = data.getString("businessKey");
//输入
JSONObject input = jsonObject.getJSONObject("input");
log.info("(相关性分析)数据组ID --> " + businessKey + ";data -->" + jsonObject);
//数据
JSONArray variables = input.getJSONArray("variables");
synchronized (Config.lock) {
if (!Config.dataMap_rel.containsKey(businessKey)) {
Map dataMap = new HashMap<>();
Map vData = new HashMap<>();
variables.forEach(value -> {
JSONObject item = JSONObject.parseObject(JSON.toJSONString(value));
vData.put(item.getString("key"), new ArrayList<>());
});
dataMap.put("data", vData);
Config.dataMap_rel.put(businessKey, dataMap);
}
Map dataMap = (Map) Config.dataMap_rel.get(businessKey);
Map vData = (Map) dataMap.get("data");
variables.forEach(value -> {
JSONObject item = JSONObject.parseObject(JSON.toJSONString(value));
List vList = (List) vData.get(item.getString("key"));
String itemvalue = item.getString("value");
String vValue = (String) Utils.jsonParse(itemvalue, data);
Double dvalue;
if (vValue.isEmpty()) {
dvalue = Double.NaN;
} else {
dvalue = Double.valueOf(vValue);
}
vList.add(dvalue);
vData.put(item.getString("key"), vList);
});
dataMap.put("data", vData);
dataMap.put("ana_way", input.getString("ana_way"));
dataMap.put("ace_level", input.getDouble("ace_level"));
dataMap.put("miss_value", input.getString("miss_value"));
Config.dataMap_rel.put(businessKey, dataMap);
}
//如果是结束标识这组数据可以进行计算
// if (data.containsKey("isLast")) {
// log.info("数据组ID --> " + businessKey + ",结束调用,开始计算处理");
// varAnaService.correlationAna(businessKey,jsonObject);
// }
}
@Override
public void mainAna(JSONObject jsonObject) {
JSONObject data = jsonObject.getJSONObject("data");
String businessKey = data.getString("businessKey");
//输入
JSONObject input = jsonObject.getJSONObject("input");
log.info("(主成分分析)数据组ID --> " + businessKey + ";data -->" + jsonObject);
//数据
JSONArray variables = input.getJSONArray("variables");
synchronized (Config.lock) {
if (!Config.dataMap_main.containsKey(businessKey)) {
Map dataMap = new HashMap<>();
Map vData = new HashMap<>();
variables.forEach(value -> {
JSONObject item = JSONObject.parseObject(JSON.toJSONString(value));
vData.put(item.getString("key"), new ArrayList<>());
});
dataMap.put("data", vData);
Config.dataMap_main.put(businessKey, dataMap);
}
Map dataMap = (Map) Config.dataMap_main.get(businessKey);
Map vData = (Map) dataMap.get("data");
variables.forEach(value -> {
JSONObject item = JSONObject.parseObject(JSON.toJSONString(value));
List vList = (List) vData.get(item.getString("key"));
String itemvalue = item.getString("value");
String vValue = (String) Utils.jsonParse(itemvalue, data);
Double dvalue;
if (vValue.isEmpty()) {
dvalue = Double.NaN;
} else {
dvalue = Double.valueOf(vValue);
}
vList.add(dvalue);
vData.put(item.getString("key"), vList);
});
dataMap.put("data", vData);
dataMap.put("draw_method", input.getString("draw_method"));
dataMap.put("miss_value", input.getString("miss_value"));
dataMap.put("factor_num", input.getIntValue("factor_num"));
int cumulative_variance = input.getIntValue("cumulative_variance");
// Double cum = Double.valueOf(cumulative_variance);
dataMap.put("cumulative_variance", cumulative_variance);
Config.dataMap_main.put(businessKey, dataMap);
}
//如果是结束标识这组数据可以进行计算
// if (data.containsKey("isLast")) {
// log.info("数据组ID --> " + businessKey + ",结束调用,开始计算处理");
// varAnaService.mainAna(businessKey,jsonObject);
// }
}
}

171
src/main/java/com/bfd/data_statistical/service/impl/VarAnaServiceImpl.java

@ -0,0 +1,171 @@
package com.bfd.data_statistical.service.impl;
import cn.hutool.core.util.IdUtil;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.bfd.data_statistical.service.VarAnaService;
import com.bfd.data_statistical.util.Config;
import com.bfd.data_statistical.util.KfkUtil;
import com.bfd.data_statistical.util.Utils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* @author guowei
*/
@Service
@Slf4j
public class VarAnaServiceImpl implements VarAnaService {
@Override
public void singleFactor(String businessKey, JSONObject jsonObject) {
Map resultMap = new HashMap<>(32);
Map results = new HashMap<>();
try {
//data
Map dataMap = (Map) Config.dataMap.get(businessKey);
Map callMap = new HashMap<>();
callMap.put("data", dataMap.get("data"));
callMap.put("alpha", dataMap.get("significance_level"));
callMap.put("miss_value", dataMap.get("miss_value"));
String confidence_interval = (String) dataMap.get("confidence_interval");
String percentageStr = confidence_interval.replace("%", "");
double percentage = Double.parseDouble(percentageStr); // 解析为double类型数字
double decimal = percentage / 100.0; // 转换为十进制小数
callMap.put("confidence", decimal);
log.info("调用方差分析应用: request->" + JSON.toJSONString(callMap));
String html = Utils.callApiVar(JSON.toJSONString(callMap));
log.info("调用方差分析应用: result->" + html);
// result.getJSONObject("ANOVA结果")
String assembleHtml = Utils.assembleHtml_var(html.replace("NaN", "\"NaN\""), confidence_interval);
JSONObject output = jsonObject.getJSONObject("output");
if (output.containsKey("id")) {
resultMap.put("id", IdUtil.randomUUID());
}
resultMap.put("content", assembleHtml);
results.put("status",1);
results.put("message","成功");
} catch (Exception e) {
log.error("方差分析应用调用失败", e);
resultMap.put("content", "方差分析应用调用失败");
results.put("status",2);
results.put("message","失败");
}
resultMap.put("isLast", 1);
results.put("results", JSON.toJSONString(resultMap));
jsonObject.put("result", results);
KfkUtil.sendKafka(JSON.toJSONString(jsonObject));
log.info("处理完成,result:" + JSON.toJSONString(results));
Config.dataMap.remove(businessKey);
// Utils.callScript(JSON.toJSONString(callMap),significance_level,miss_value,confidence_interval);
}
@Override
public void doubleFactor(JSONObject jsonObject) {
}
@Override
public void multipleFactor(JSONObject jsonObject) {
}
@Override
public void correlationAna(String businessKey, JSONObject jsonObject) {
Map results = new HashMap<>();
Map resultMap = new HashMap<>(32);
try {
Map dataMap_rel = (Map) Config.dataMap_rel.get(businessKey);
log.info("调用相关性分析应用: request->" + JSON.toJSONString(dataMap_rel));
String html = Utils.callApiRel(JSON.toJSONString(dataMap_rel));
log.info("调用相关性分析应用: result->" + html);
String assembleHtml = Utils.assembleHtml_rel(html.replace("NaN", "\"NaN\""), (Double) dataMap_rel.get("ace_level"));
JSONObject output = jsonObject.getJSONObject("output");
if (output.containsKey("id")) {
resultMap.put("id", IdUtil.randomUUID());
}
resultMap.put("content", assembleHtml);
results.put("status",1);
results.put("message","成功");
} catch (Exception e) {
log.error("相关性分析应用调用失败", e);
resultMap.put("content", "相关性分析应用调用失败");
results.put("status",2);
results.put("message","失败");
}
resultMap.put("isLast", 1);
results.put("results", JSON.toJSONString(resultMap));
jsonObject.put("result", results);
KfkUtil.sendKafka(JSON.toJSONString(jsonObject));
log.info("处理完成,result:" + JSON.toJSONString(results));
Config.dataMap_rel.remove(businessKey);
}
@Override
public void mainAna(String businessKey, JSONObject jsonObject) {
Map results = new HashMap<>();
Map resultMap = new HashMap<>(32);
try {
Map dataMap_rel = (Map) Config.dataMap_main.get(businessKey);
log.info("调用主成分分析应用: request->" + JSON.toJSONString(dataMap_rel));
String html = Utils.callApiMain(JSON.toJSONString(dataMap_rel));
log.info("调用主成分分析应用: result->" + html);
String assembleHtml = Utils.assembleHtml_main(html.replace("NaN", "\"NaN\""));
JSONObject output = jsonObject.getJSONObject("output");
if (output.containsKey("id")) {
resultMap.put("id", IdUtil.randomUUID());
}
resultMap.put("content", assembleHtml);
results.put("status",1);
results.put("message","成功");
} catch (Exception e) {
log.error("主成分分析应用调用失败", e);
resultMap.put("content", "主成分分析应用调用失败");
results.put("status",2);
results.put("message","失败");
}
resultMap.put("isLast", 1);
results.put("results", JSON.toJSONString(resultMap));
jsonObject.put("result", results);
KfkUtil.sendKafka(JSON.toJSONString(jsonObject));
log.info("处理完成,result:" + JSON.toJSONString(results));
Config.dataMap_main.remove(businessKey);
}
public static void main(String[] args) {
try {
Runtime rt = Runtime.getRuntime();
Process pr = rt.exec("python D:\\PycharmProjects\\projects\\应用\\demo1.py");
BufferedReader input = new BufferedReader(new InputStreamReader(pr.getInputStream()));
String line = null;
while ((line = input.readLine()) != null) {
System.out.println(line);
}
int exitVal = pr.waitFor();
System.out.println("Exited with error code " + exitVal);
} catch (IOException | InterruptedException e) {
e.printStackTrace();
}
}
}

59
src/main/java/com/bfd/data_statistical/util/Config.java

@ -0,0 +1,59 @@
package com.bfd.data_statistical.util;
import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Component;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue;
/**
* @author guowei
*/
@Component
public class Config implements ApplicationContextAware {
public static final Object lock = new Object();
/**
* 方差分析
*/
public static Map dataMap = new HashMap<>();
/**
* 相关性分析
*/
public static Map dataMap_rel = new HashMap<>();
/**
* 主成分分析
*/
public static Map dataMap_main = new HashMap<>();
public static Map dataMap_time = new HashMap<>();
public static LinkedBlockingQueue<Map> taskQueue = new LinkedBlockingQueue<Map>(1000);
public static LinkedBlockingQueue<Map> taskQueue_rel = new LinkedBlockingQueue<Map>(1000);
public static LinkedBlockingQueue<Map> taskQueue_main = new LinkedBlockingQueue<Map>(1000);
public static Boolean loop = false;
private static ApplicationContext app;
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
app = applicationContext;
}
public static String getMainUrl(){
return app.getEnvironment().getProperty("api.main");
}
public static String getRelUrl(){
return app.getEnvironment().getProperty("api.rel");
}
public static String getVarUrl(){
return app.getEnvironment().getProperty("api.var");
}
}

81
src/main/java/com/bfd/data_statistical/util/KfkUtil.java

@ -0,0 +1,81 @@
package com.bfd.data_statistical.util;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.util.Properties;
/**
* @author guowei
* kfk工具类
*/
@Component
@Slf4j
public class KfkUtil {
private static String topic;
private static String brokerList;
@Value("${crawl.kafka.topic}")
public void setTopic(String topic) {
KfkUtil.topic = topic;
}
@Value("${crawl.kafka.brokers}")
public void setBrokerList(String brokerList) {
KfkUtil.brokerList = brokerList;
}
private static KafkaProducer<String, String> kafkaProducer;
public static int num = 0;
/**
* 获取KafkaProducer实例
*/
public static KafkaProducer<String, String> getProducer() {
if (kafkaProducer == null) {
Properties props = new Properties();
//xxx服务器ip
props.put("bootstrap.servers", brokerList);
//所有follower都响应了才认为消息提交成功"committed"
props.put("acks", "all");
//retries = MAX 无限重试直到你意识到出现了问题:)
props.put("retries", 3);
//producer将试图批处理消息记录以减少请求次数.默认的批量处理消息字节数
props.put("batch.size", 16384);
//batch.size当批量的数据大小达到设定值后就会立即发送不顾下面的linger.ms
//延迟1ms发送这项设置将通过增加小的延迟来完成--不是立即发送一条记录producer将会等待给定的延迟时间以允许其他消息记录发送这些消息记录可以批量处理
props.put("linger.ms", 1);
//producer可以用来缓存数据的内存大小
props.put("buffer.memory", 33554432);
props.put("key.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
kafkaProducer = new KafkaProducer<String, String>(props);
}
return kafkaProducer;
}
/**
* 关闭KafkaProducer实例
*/
public static void closeProducer() {
if (kafkaProducer != null) {
log.info("----------close producer----------");
kafkaProducer.close();
kafkaProducer = null;
}
}
public static void sendKafka(String resultData) {
KafkaProducer<String, String> producer = getProducer();
ProducerRecord<String, String> se = new ProducerRecord<String, String>(topic, resultData);
producer.send(se);
log.info("发送kafka成功");
// num++;
}
}

485
src/main/java/com/bfd/data_statistical/util/Utils.java

@ -0,0 +1,485 @@
package com.bfd.data_statistical.util;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONObject;
import com.alibaba.fastjson2.JSONPath;
import okhttp3.*;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;
/**
* @author guowei
*/
@Component
public class Utils {
public static Object jsonParse(String key, Map data) {
String[] keySplit = key.split(":");
String jsonPath = keySplit[1];
String dataJson = (String) data.get(keySplit[0]);
JSONObject dataJsonObject = JSON.parseObject(dataJson);
Object dataValue = JSONPath.eval(dataJsonObject, jsonPath);
return dataValue;
}
public static String callApiVar(String data) throws IOException {
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, data);
// RequestBody body = RequestBody.create(mediaType, "{\"data\": { \"因变量\": [ 200,210,190,220,230,225,210,215,205],\"广告类型\": [\"A\", \"A\", \"A\", \"B\", \"B\", \"B\", \"C\", \"C\",\"C\"]},\"alpha\": \"0.01\",\"confidence\": \"0.05\"}");
Request request = new Request.Builder()
.url(Config.getVarUrl())
.method("POST", body)
.addHeader("User-Agent", "Apifox/1.0.0 (https://apifox.com)")
.addHeader("Content-Type", "application/json")
.build();
Response response = client.newCall(request).execute();
return response.body().string();
}
public static String callApiRel(String data) throws IOException {
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, data);
// RequestBody body = RequestBody.create(mediaType, "{\"data\":{\"GDP增长率\":[3.5,3.0,2.9,3.2,2.8,-5.0,4.0],\"失业率\":[5.0,4.9,5.2,4.5,4.2,7.8,6.1],\"年份\":[2015,2016,2017,2018,2019,2020,2021],\"通货膨胀率\":[1.5,1.7,2.0,2.1,2.5,1.2,2.9]},\"method\":\"pearson\"}");
Request request = new Request.Builder()
.url(Config.getRelUrl())
.method("POST", body)
.addHeader("User-Agent", "Apifox/1.0.0 (https://apifox.com)")
.addHeader("Content-Type", "application/json")
.build();
Response response = client.newCall(request).execute();
return response.body().string();
}
public static String callApiMain(String data) throws IOException {
OkHttpClient client = new OkHttpClient().newBuilder()
.build();
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, data);
// RequestBody body = RequestBody.create(mediaType, "{\"data\":{\"利润率\":[10.2,10.5,10.8,11.1,11.4],\"员工满意度指数\":[82,83,85,86,87],\"客户满意度指数\":[85,88,87,90,91],\"市场份额增长\":[5.1,5.3,5.6,5.9,6.2],\"营业收入增长率\":[5.5,5.7,5.4,5.9,6.2]}}");
Request request = new Request.Builder()
.url(Config.getMainUrl())
.method("POST", body)
.addHeader("User-Agent", "Apifox/1.0.0 (https://apifox.com)")
.addHeader("Content-Type", "application/json")
.build();
Response response = client.newCall(request).execute();
return response.body().string();
}
public static String assembleHtml(String json) {
JSONObject jsonObject = JSONObject.parseObject(json);
StringBuilder htmlTable = new StringBuilder();
htmlTable.append("<table>");
// Iterate over JSON fields and create table rows
for (String key : jsonObject.keySet()) {
Object value = jsonObject.get(key);
htmlTable.append("<tr>");
htmlTable.append("<th>").append(key).append("</th>");
if (value instanceof JSONObject) {
htmlTable.append("<td>").append(convertJsonToHtmlTable((JSONObject) value)).append("</td>");
} else {
htmlTable.append("<td>").append(value).append("</td>");
}
htmlTable.append("</tr>");
}
htmlTable.append("</table>");
return String.valueOf(htmlTable);
}
private static String convertJsonToHtmlTable(JSONObject jsonObject) {
StringBuilder htmlTable = new StringBuilder();
htmlTable.append("<table>");
// Iterate over JSON fields and create table rows
for (String key : jsonObject.keySet()) {
Object value = jsonObject.get(key);
htmlTable.append("<tr>");
htmlTable.append("<th>").append(key).append("</th>");
if (value instanceof JSONObject) {
htmlTable.append("<td>").append(convertJsonToHtmlTable((JSONObject) value)).append("</td>");
} else {
htmlTable.append("<td>").append(value).append("</td>");
}
htmlTable.append("</tr>");
}
htmlTable.append("</table>");
return htmlTable.toString();
}
public static String assembleHtml_var(String data, String confidence_interval) {
// String jsonData = "{\"ANOVA结果\":{\"F\":{\"C(广告类型)\":7.784745762711876,\"Residual\":\"NaN\"},\"PR(>F)\":{\"C(广告类型)\":0.02152454745613121,\"Residual\":\"NaN\"},\"df\":{\"C(广告类型)\":2.0,\"Residual\":6.0},\"sum_sq\":{\"C(广告类型)\":520.7482993197281,\"Residual\":200.68027210884333}},\"描述性统计\":{\"广告类型\":{\"均值\":{\"A\":208.09523809523807,\"B\":225.0,\"C\":209.76190476190473},\"标准差\":{\"A\":7.3308591992098275,\"B\":5.0,\"C\":4.647433641891219}}},\"相关统计量\":{\"F值\":7.784745762711876,\"p值\":0.02152454745613121,\"总平方和 (SST)\":721.4285714285713},\"置信区间\":{\"0\":{\"C(广告类型)[T.B]\":5.350308374503625,\"C(广告类型)[T.C]\":-9.887786863591625,\"Intercept\":199.9250056510876},\"1\":{\"C(广告类型)[T.B]\":28.45921543502017,\"C(广告类型)[T.C]\":13.221120196924929,\"Intercept\":216.2654705393885}}}";
// String html = assembleHtml(jsonData);
JSONObject jsonObject = JSONObject.parseObject(data);
StringBuilder htmlTable = new StringBuilder();
// htmlTable.append("<style>")
// .append("table { border-collapse: collapse; width: 100%; }")
// .append("th, td { border: 1px solid black; padding: 8px; text-align: left; }")
// .append("th { background-color: #f2f2f2; }")
// .append("</style>");
htmlTable.append("<div>");
htmlTable.append("<table class=\"data-table-class\">");
htmlTable.append("<caption>各组均值和标准差</caption>");
JSONObject descriptiveStats = jsonObject.getJSONObject("描述性统计");
Set<String> desKeys = descriptiveStats.keySet();
for (String key : desKeys) {
JSONObject item = descriptiveStats.getJSONObject(key);
JSONObject means = item.getJSONObject("均值");
JSONObject stdDevs = item.getJSONObject("标准差");
htmlTable.append("<thead><tr><th>" + key + "</th><th>均值</th><th>标准差</th></tr></thead>");
htmlTable.append("<tbody>");
Set<String> itemKeys = means.keySet();
for (String itemkey : itemKeys) {
htmlTable.append("<tr>")
.append("<th>").append(itemkey).append("</th>")
.append("<td>").append(String.format("%.2f", means.getDouble(itemkey))).append("</td>")
.append("<td>").append(String.format("%.2f", stdDevs.getDouble(itemkey))).append("</td>")
.append("</tr>");
}
htmlTable.append("</tbody>");
}
htmlTable.append("</table>");
JSONObject anovaResults = jsonObject.getJSONObject("ANOVA结果");
htmlTable.append("<table class=\"data-table-class\">");
htmlTable.append("<caption>ANOVA结果</caption>");
htmlTable.append("<thead><tr><th>变量</th><th>sum_sq</th><th>df</th><th>F值</th><th>p值</th></tr></thead>");
htmlTable.append("<tbody>");
// Process C(广告类型)
Set<String> keys = anovaResults.getJSONObject("sum_sq").keySet();
for (String key : keys) {
htmlTable.append("<tr>")
.append("<th>").append(key).append("</th>")
.append("<td>").append(anovaResults.getJSONObject("sum_sq").getDouble(key)).append("</td>")
.append("<td>").append(anovaResults.getJSONObject("df").getDouble(key)).append("</td>")
.append("<td>").append(anovaResults.getJSONObject("F").getString(key)).append("</td>")
.append("<td>").append(anovaResults.getJSONObject("PR(>F)").getString(key)).append("</td>")
.append("</tr>");
}
htmlTable.append("</tbody>");
htmlTable.append("</table>");
JSONObject relatedStats = jsonObject.getJSONObject("相关统计量");
htmlTable.append("<table class=\"data-table-class\">");
htmlTable.append("<caption>相关统计量</caption>");
htmlTable.append("<thead><tr><th>统计量</th><th>值</th></tr></thead>");
htmlTable.append("<tbody>");
Set<String> tongjikeys = relatedStats.keySet();
for (String key : tongjikeys) {
htmlTable.append("<tr>")
.append("<td>").append(key).append("</td>")
.append("<td>").append(relatedStats.get(key)).append("</td>")
.append("</tr>");
}
htmlTable.append("</tbody>");
htmlTable.append("</table>");
JSONObject ciLower = jsonObject.getJSONObject("置信区间").getJSONObject("0");
JSONObject ciUpper = jsonObject.getJSONObject("置信区间").getJSONObject("1");
htmlTable.append("<table class=\"data-table-class\">");
htmlTable.append("<caption>"+confidence_interval + "置信区间</caption>");
htmlTable.append("<thead><tr>");
// Generate table headers dynamically
htmlTable.append("<th>变量</th><th>下限</th><th>上限</th>");
htmlTable.append("</tr></thead><tbody>");
// Get all variable names (keys)
Set<String> variableNames = ciLower.keySet();
// Generate table rows dynamically
for (String variable : variableNames) {
htmlTable.append("<tr>");
htmlTable.append("<td>").append(variable).append("</td>");
htmlTable.append("<td>").append(ciLower.getDouble(variable)).append("</td>");
htmlTable.append("<td>").append(ciUpper.getDouble(variable)).append("</td>");
htmlTable.append("</tr>");
}
htmlTable.append("</tbody></table></div>");
// System.out.println(htmlTable.toString());
return htmlTable.toString();
// System.out.println(html);
}
public static String assembleHtml_rel(String data, Double ace_level) {
// 解析 JSON 数据
JSONObject jsonObject = JSON.parseObject(data);
JSONObject descriptiveStatistics = jsonObject.getJSONObject("描述性统计");
// 获取动态列名
Set<String> keys = descriptiveStatistics.keySet();
List columns = new ArrayList<>();
for (String key : keys) {
columns.add(key);
}
// 生成 HTML 表格
StringBuilder htmlTable = new StringBuilder();
// htmlTable.append("<style>")
// .append("table { border-collapse: collapse; width: 100%; }")
// .append("th, td { border: 1px solid black; padding: 8px; text-align: left; }")
// .append("th { background-color: #f2f2f2; }")
// .append("</style>");
htmlTable.append("<div>");
htmlTable.append("<table class=\"data-table-class\">")
.append("<caption>描述性统计</caption>")
.append("<thead>")
.append("<tr>")
.append("<th>统计量</th>");
// 添加动态列名
for (int i = 0; i < columns.size(); i++) {
htmlTable.append("<th>").append(columns.get(i)).append("</th>");
}
htmlTable.append("</tr>")
.append("</thead>")
.append("<tbody>");
// 获取所有统计量的键
String[] stats = {"count", "mean", "std", "min", "25%", "50%", "75%", "max"};
// 填充表格数据
for (String stat : stats) {
htmlTable.append("<tr>")
.append("<th>").append(stat).append("</th>");
for (int i = 0; i < columns.size(); i++) {
String column = (String) columns.get(i);
if (descriptiveStatistics.getJSONObject(column).containsKey(stat)) {
htmlTable.append("<td>").append(descriptiveStatistics.getJSONObject(column).get(stat)).append("</td>");
} else {
htmlTable.append("<td>").append("N/A").append("</td>"); // 如果没有这个统计量则填充N/A
}
}
htmlTable.append("</tr>");
}
htmlTable.append("</tbody>");
htmlTable.append("</table>");
JSONObject correlationMatrix = jsonObject.getJSONObject("相关系数矩阵");
// 获取动态列名
Set<String> jzkeys = correlationMatrix.keySet();
List jzcolumns = new ArrayList<>();
for (String key : jzkeys) {
jzcolumns.add(key);
}
htmlTable.append("<table class=\"data-table-class\">")
.append("<caption>相关系数矩阵</caption>")
.append("<thead>")
.append("<tr>")
.append("<th></th>");
// 添加动态列名
for (int i = 0; i < jzcolumns.size(); i++) {
htmlTable.append("<th>").append(jzcolumns.get(i)).append("</th>");
}
htmlTable.append("</tr>")
.append("</thead>")
.append("<tbody>");
// 填充表格数据
for (int i = 0; i < jzcolumns.size(); i++) {
String rowName = (String) jzcolumns.get(i);
htmlTable.append("<tr>")
.append("<th>").append(rowName).append("</th>");
for (int j = 0; j < jzcolumns.size(); j++) {
String colName = (String) jzcolumns.get(j);
if (correlationMatrix.getJSONObject(rowName).containsKey(colName)) {
htmlTable.append("<td>").append(correlationMatrix.getJSONObject(rowName).get(colName)).append("</td>");
} else {
htmlTable.append("<td>").append("N/A").append("</td>"); // 如果没有这个统计量则填充N/A
}
}
htmlTable.append("</tr>");
}
htmlTable.append("</tbody>");
htmlTable.append("</table>");
JSONObject pValueTable = jsonObject.getJSONObject("P值表 (显著性水平=" + ace_level + ")");
// 获取动态列名
Set<String> pkeys = pValueTable.keySet();
List pcolumns = new ArrayList<>();
for (String key : pkeys) {
pcolumns.add(key);
}
htmlTable.append("<table class=\"data-table-class\">")
.append("<caption>P值表 (显著性水平=" + ace_level + ")</caption>")
.append("<thead>")
.append("<tr>")
.append("<th></th>");
for (int i = 0; i < pcolumns.size(); i++) {
htmlTable.append("<th>").append(pcolumns.get(i)).append("</th>");
}
htmlTable.append("</tr>")
.append("</thead>")
.append("<tbody>");
// 填充表格数据
for (int i = 0; i < pcolumns.size(); i++) {
String rowName = (String) pcolumns.get(i);
htmlTable.append("<tr>")
.append("<th>").append(rowName).append("</th>");
for (int j = 0; j < columns.size(); j++) {
String colName = (String) pcolumns.get(j);
if (pValueTable.getJSONObject(rowName).containsKey(colName)) {
htmlTable.append("<td>").append(pValueTable.getJSONObject(rowName).get(colName)).append("</td>");
} else {
htmlTable.append("<td>").append("N/A").append("</td>"); // 如果没有这个统计量则填充N/A
}
}
htmlTable.append("</tr>");
}
htmlTable.append("</tbody>");
htmlTable.append("</table>");
htmlTable.append("</div>");
// 打印 HTML 表格
// System.out.println(htmlTable.toString());
return htmlTable.toString();
}
public static String assembleHtml_main(String data) {
// 解析 JSON 数据
JSONObject jsonObject = JSON.parseObject(data);
JSONObject descriptiveStatistics = jsonObject.getJSONObject("主成分得分");
// 获取动态列名
Set<String> keys = descriptiveStatistics.keySet();
List columns = new ArrayList<>();
for (String key : keys) {
columns.add(key);
}
// 生成 HTML 表格
StringBuilder htmlTable = new StringBuilder();
// htmlTable.append("<style>")
// .append("table { border-collapse: collapse; width: 100%; }")
// .append("th, td { border: 1px solid black; padding: 8px; text-align: left; }")
// .append("th { background-color: #f2f2f2; }")
// .append("</style>");
htmlTable.append("<div>");
htmlTable.append("<table class=\"data-table-class\">")
.append("<caption>主成分得分</caption>")
.append("<thead>")
.append("<tr>")
.append("<th></th>");
// 添加动态列名
for (int i = 0; i < columns.size(); i++) {
htmlTable.append("<th>").append(columns.get(i)).append("</th>");
}
htmlTable.append("</tr>")
.append("</thead>");
htmlTable.append("<tbody>");
Set<String> stats = descriptiveStatistics.getJSONObject((String) columns.get(0)).keySet();
// 获取所有统计量的键
// String[] stats = {"0", "1", "2", "3", "4"};
// 填充表格数据
for (String stat : stats) {
htmlTable.append("<tr>")
.append("<th>").append(stat).append("</th>");
for (int i = 0; i < columns.size(); i++) {
String column = (String) columns.get(i);
if (descriptiveStatistics.getJSONObject(column).containsKey(stat)) {
htmlTable.append("<td>").append(descriptiveStatistics.getJSONObject(column).get(stat)).append("</td>");
} else {
htmlTable.append("<td>").append("N/A").append("</td>"); // 如果没有这个统计量则填充N/A
}
}
htmlTable.append("</tr>");
}
htmlTable.append("</tbody>");
htmlTable.append("</table>");
JSONObject correlationMatrix = jsonObject.getJSONObject("主成分载荷");
// 获取动态列名
Set<String> jzkeys = correlationMatrix.keySet();
List jzcolumns = new ArrayList<>();
for (String key : jzkeys) {
jzcolumns.add(key);
}
htmlTable.append("<table class=\"data-table-class\">")
.append("<caption>主成分载荷</caption>")
.append("<thead>")
.append("<tr>")
.append("<th></th>");
// 添加动态列名
for (int i = 0; i < jzcolumns.size(); i++) {
htmlTable.append("<th>").append(jzcolumns.get(i)).append("</th>");
}
htmlTable.append("</tr>")
.append("</thead>")
.append("<tbody>");
// 填充表格数据
// 获取所有统计量的键
String[] stats_zcf = {"PC1", "PC2", "PC3"};
// 填充表格数据
for (String stat : stats_zcf) {
htmlTable.append("<tr>")
.append("<th>").append(stat).append("</th>");
for (int i = 0; i < jzcolumns.size(); i++) {
String column = (String) jzcolumns.get(i);
if (correlationMatrix.getJSONObject(column).containsKey(stat)) {
htmlTable.append("<td>").append(correlationMatrix.getJSONObject(column).get(stat)).append("</td>");
} else {
htmlTable.append("<td>").append("N/A").append("</td>"); // 如果没有这个统计量则填充N/A
}
}
htmlTable.append("</tr>");
}
htmlTable.append("</tbody>").append("</table>");
JSONArray pValueTable = jsonObject.getJSONArray("累积方差解释");
// 获取动态列名
htmlTable.append("<table class=\"data-table-class\">")
.append("<caption>累积方差解释</caption>")
.append("<thead>")
.append("<tr>")
.append("<th>主成分</th>")
.append("<th>解释的方差比例</th>")
.append("</tr>")
.append("</thead>")
.append("<tbody>");
int pNum = pValueTable.size();
List pcolumns = new ArrayList<>();
for (int i = 1; i <= pNum; i++) {
htmlTable.append("<tr>")
.append("<th>").append("PC" + i).append("</th>")
.append("<td>").append(pValueTable.get(i - 1)).append("</td>")
.append("</tr>");
}
htmlTable.append("</tbody>").append("</table>");
htmlTable.append("</div>");
// 打印 HTML 表格
// System.out.println(htmlTable.toString());
return htmlTable.toString();
}
public static void main(String[] args) {
// String data = "{\"P值表 (显著性水平=0.01)\":{\"GDP增长率\":{\"GDP增长率\":1.0,\"失业率\":0.025969486575263344,\"年份\":0.42924735072718156,\"通货膨胀率\":0.133583580599569},\"失业率\":{\"GDP增长率\":0.025969486575263344,\"失业率\":1.0,\"年份\":0.240403199576762,\"通货膨胀率\":0.39905124966692257},\"年份\":{\"GDP增长率\":0.42924735072718156,\"失业率\":0.240403199576762,\"年份\":1.0,\"通货膨胀率\":0.2659542971106307},\"通货膨胀率\":{\"GDP增长率\":0.133583580599569,\"失业率\":0.39905124966692257,\"年份\":0.2659542971106307,\"通货膨胀率\":1.0}},\"描述性统计\":{\"GDP增长率\":{\"25%\":2.8499999999999996,\"50%\":3.0,\"75%\":3.35,\"count\":7.0,\"max\":4.0,\"mean\":2.0571428571428574,\"min\":-5.0,\"std\":3.138926112997428},\"失业率\":{\"25%\":4.7,\"50%\":5.0,\"75%\":5.65,\"count\":7.0,\"max\":7.8,\"mean\":5.385714285714286,\"min\":4.2,\"std\":1.2212405870378578},\"年份\":{\"25%\":2016.5,\"50%\":2018.0,\"75%\":2019.5,\"count\":7.0,\"max\":2021.0,\"mean\":2018.0,\"min\":2015.0,\"std\":2.160246899469287},\"通货膨胀率\":{\"25%\":1.6,\"50%\":2.0,\"75%\":2.3,\"count\":7.0,\"max\":2.9,\"mean\":1.9857142857142858,\"min\":1.2,\"std\":0.5843188953205017}},\"相关系数矩阵\":{\"GDP增长率\":{\"GDP增长率\":1.0,\"失业率\":-0.8136538484920446,\"年份\":-0.35885344674380804,\"通货膨胀率\":0.6247921262264898},\"失业率\":{\"GDP增长率\":-0.8136538484920446,\"失业率\":1.0,\"年份\":0.5117161794157596,\"通货膨胀率\":-0.3810356879053683},\"年份\":{\"GDP增长率\":-0.35885344674380804,\"失业率\":0.5117161794157596,\"年份\":1.0,\"通货膨胀率\":0.48853665304335764},\"通货膨胀率\":{\"GDP增长率\":0.6247921262264898,\"失业率\":-0.3810356879053683,\"年份\":0.48853665304335764,\"通货膨胀率\":1.0}}}";
// String s = assembleHtml_rel(data, 0.01);
String data = "{\"主成分得分\":{\"PC1\":{\"0\":-0.5284343087964162,\"1\":-2.315443264439099,\"2\":2.843877573235515},\"PC2\":{\"0\":0.9066548428545789,\"1\":-0.5926211987309484,\"2\":-0.3140336441236311},\"PC3\":{\"0\":1.0467283057891841e-16,\"1\":1.0467283057891829e-16,\"2\":1.0467283057891834e-16}},\"主成分载荷\":{\"利润率\":{\"PC1\":0.4668537355103065,\"PC2\":-0.0788849066439597,\"PC3\":-0.32761502595211395},\"员工满意指数\":{\"PC1\":0.4674212894188098,\"PC2\":-0.022345654557889807,\"PC3\":0.7943786052557248},\"客户满意指数\":{\"PC1\":0.4013979730000272,\"PC2\":-0.7871878091015423,\"PC3\":-0.08757932327782991},\"市场份额增长\":{\"PC1\":0.4365584476761835,\"PC2\":0.5492208058930436,\"PC3\":0.10090990545647134},\"营业收入增长率\":{\"PC1\":0.4602839524382316,\"PC2\":0.2682716958649626,\"PC3\":-0.4937388777454417}},\"累积方差解释\":[0.9152106654067643,0.08478933459323566,2.1912802922805874e-33]}";
String s = assembleHtml_main(data);
// String data = "{\"ANOVA结果\":{\"F\":{\"C(广告类型)\":7.784745762711876,\"Residual\":\"NaN\"},\"PR(>F)\":{\"C(广告类型)\":0.02152454745613121,\"Residual\":\"NaN\"},\"df\":{\"C(广告类型)\":2.0,\"Residual\":6.0},\"sum_sq\":{\"C(广告类型)\":520.7482993197281,\"Residual\":200.68027210884333}},\"描述性统计\":{\"广告类型\":{\"均值\":{\"A\":208.09523809523807,\"B\":225.0,\"C\":209.76190476190473},\"标准差\":{\"A\":7.3308591992098275,\"B\":5.0,\"C\":4.647433641891219}}},\"相关统计量\":{\"F值\":7.784745762711876,\"p值\":0.02152454745613121,\"总平方和 (SST)\":721.4285714285713},\"置信区间\":{\"0\":{\"C(广告类型)[T.B]\":5.350308385615985,\"C(广告类型)[T.C]\":-9.887786852479266,\"Intercept\":199.9250056589452},\"1\":{\"C(广告类型)[T.B]\":28.45921542390781,\"C(广告类型)[T.C]\":13.22112018581257,\"Intercept\":216.2654705315309}}}";
// String s = assembleHtml_var(data,"95%");
System.out.println(s);
}
}

22
src/main/java/com/bfd/data_statistical/util/varPojo.java

@ -0,0 +1,22 @@
package com.bfd.data_statistical.util;
import lombok.Data;
import java.util.Map;
/**
* @author guowei
*/
@Data
public class varPojo {
private Map<String, Object> ANOVA结果;
private Map<String, Object> PR;
private Map<String, Object> df;
private Map<String, Object> sum_sq;
private double F值;
private double p值;
private double MSE;
private double SST;
private Map<String, Map<String, Map<String, Double>>> 描述性统计;
private Map<String, Map<String, Double>> 置信区间;
}

38
src/main/resources/application.yml

@ -0,0 +1,38 @@
server:
port: 9900
crawl:
kafka:
topic: produce_analyze
brokers: 172.18.1.146:9092,172.18.1.147:9092,172.18.1.148:9092
api:
main: http://172.18.1.147:9903/mainAna/
rel: http://172.18.1.147:9902/relAna/
var: http://172.18.1.147:9901/varAna/
#日志级别
logging:
level:
com:
bfd: INFO
#日志路径
log:
path: ./logs
#spring:
# boot:
# admin:
# client:
# url: http://172.18.1.147:8001
# instance:
# service-base-url: http://172.18.1.147:9999
# application:
# name: 分析
#management:
# endpoints:
# web:
# exposure:
# include: "*"
# endpoint:
# health:
# show-details: always
# health:
# elasticsearch:
# enabled: false

38
src/main/resources/logback-spring.xml

@ -0,0 +1,38 @@
<configuration>
<!-- 属性文件:在properties文件中找到对应的配置项 -->
<springProperty scope="context" name="logging.path" source="logging.log.path"/>
<springProperty scope="context" name="logging.level" source="logging.level.com.bfd"/>
<!-- 默认的控制台日志输出,一般生产环境都是后台启动,这个没太大作用 -->
<!-- <appender name="STDOUT"
class="ch.qos.logback.core.ConsoleAppender">
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<Pattern>%d{HH:mm:ss.SSS} %-5level %logger{80} - %msg%n</Pattern>
</encoder>
</appender> -->
<appender name="GLMAPPER-LOGGERONE"
class="ch.qos.logback.core.rolling.RollingFileAppender">
<append>true</append>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>${logging.level}</level>
</filter>
<file>
${logging.path}/crawlSchedule.log
<!-- ${logging.path}/sendKafka.log -->
</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<FileNamePattern>${logging.path}/crawlSchedule.log.%d{yyyy-MM-dd}</FileNamePattern>
<!-- <FileNamePattern>${logging.path}/sendKafka.log.%d{yyyy-MM-dd}</FileNamePattern> -->
<MaxHistory>7</MaxHistory>
</rollingPolicy>
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %line %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<root level="info">
<appender-ref ref="GLMAPPER-LOGGERONE"/>
<!-- <appender-ref ref="STDOUT"/> -->
</root>
</configuration>

104
src/main/resources/python/mainAna.py

@ -0,0 +1,104 @@
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from flask import Flask, request, jsonify
app = Flask(__name__)
def run(data,n_components,miss_value):
# 将JSON数据转换为DataFrame
df = pd.DataFrame(data)
if miss_value == '用均值替代':
# 处理缺失值:用均值替代
df.fillna(df.mean(), inplace=True)
elif miss_value == '用中位数替代':
# 处理缺失值 - 用中位数替代
df.fillna(df.median(), inplace=True)
elif miss_value == '忽略缺失值':
# 处理缺失值 - 忽略缺失值
df.dropna(inplace=True)
# 数据标准化
scaler = StandardScaler()
scaled_df = scaler.fit_transform(df)
# 执行PCA
decimal = n_components / 100
print(decimal)
pca = PCA(n_components= decimal) # 累积方差解释率至少95%
principal_components = pca.fit_transform(scaled_df)
explained_variance = pca.explained_variance_ratio_
# 创建成分得分DataFrame
score_labels = [f"PC{i + 1}" for i in range(len(principal_components[0]))]
scores = pd.DataFrame(principal_components, columns=score_labels)
# 创建载荷DataFrame
loadings = pd.DataFrame(pca.components_, columns=df.columns, index=score_labels)
# 构建返回的结果字典
result = {
"主成分得分": scores.to_dict(),
"主成分载荷": loadings.to_dict(),
"累积方差解释": explained_variance.tolist()
}
# 将结果返回为JSON格式
return jsonify(result)
def factor(data, n_components, miss_value):
df = pd.DataFrame(data)
if miss_value == '用均值替代':
# 处理缺失值:用均值替代
df.fillna(df.mean(), inplace=True)
elif miss_value == '用中位数替代':
# 处理缺失值 - 用中位数替代
df.fillna(df.median(), inplace=True)
elif miss_value == '忽略缺失值':
# 处理缺失值 - 忽略缺失值
df.dropna(inplace=True)
# 数据标准化
scaler = StandardScaler()
scaled_df = scaler.fit_transform(df)
# 执行PCA,提取3个主成分
pca = PCA(n_components=n_components)
principal_components = pca.fit_transform(scaled_df)
explained_variance = pca.explained_variance_ratio_
# 创建成分得分DataFrame
score_labels = [f"PC{i + 1}" for i in range(len(principal_components[0]))]
scores = pd.DataFrame(principal_components, columns=score_labels)
# 创建载荷DataFrame
loadings = pd.DataFrame(pca.components_, columns=df.columns, index=score_labels)
# 打印结果
# print("主成分得分:\n", scores)
# print("主成分载荷:\n", loadings)
# print("累积方差解释:\n", explained_variance)
# 构建返回的结果字典
result = {
"主成分得分": scores.round(6).to_dict(),
"主成分载荷": loadings.to_dict(),
"累积方差解释": explained_variance.tolist()
}
# 将结果返回为JSON格式
return jsonify(result)
@app.route("/mainAna/", methods=["POST"])
def get_cookie():
# 获取 POST 请求中的 JSON 数据
global data
new_data = request.json
print(new_data)
draw_method = new_data['draw_method']
if draw_method == '因子数':
data = factor(new_data['data'], new_data['factor_num'], new_data['miss_value'])
elif draw_method == '累积方差解释率':
data = run(new_data['data'], new_data['cumulative_variance'],new_data['miss_value'])
return data
if __name__ == '__main__':
app.run(port=9903, debug=False, host='0.0.0.0') # 启动服务

92
src/main/resources/python/relAna.py

@ -0,0 +1,92 @@
import pandas as pd
from scipy.stats import pearsonr, spearmanr
from flask import Flask, request, jsonify
app = Flask(__name__)
## 方差分析
def run_Pearson(data, alpha, miss_value):
# 将JSON数据转换为DataFrame
df = pd.DataFrame(data)
if miss_value == '用均值替代':
# 处理缺失值:用均值替代
df.fillna(df.mean(), inplace=True)
elif miss_value == '用中位数替代':
# 处理缺失值 - 用中位数替代
df.fillna(df.median(), inplace=True)
elif miss_value == '忽略缺失值':
# 处理缺失值 - 忽略缺失值
df.dropna(inplace=True)
# 计算描述性统计
desc_stats = df.describe()
# 计算Pearson相关系数矩阵和p值表
corr_matrix = df.corr(method='pearson')
p_values = df.corr(method=lambda x, y: pearsonr(x, y)[1]) # 使用lambda函数计算p值
# 显著性水平
alpha = alpha
# 构建返回的结果字典
result = {
"描述性统计": desc_stats.to_dict(),
"相关系数矩阵": corr_matrix.to_dict(),
"P值表 (显著性水平={})".format(alpha): p_values.to_dict()
}
# 将结果返回为JSON格式
return jsonify(result)
def run_Spearman(data, alpha, miss_value):
# 将JSON数据转换为DataFrame
df = pd.DataFrame(data)
if miss_value == '用均值替代':
# 处理缺失值:用均值替代
df.fillna(df.mean(), inplace=True)
elif miss_value == '用中位数替代':
# 处理缺失值 - 用中位数替代
df.fillna(df.median(), inplace=True)
elif miss_value == '忽略缺失值':
# 处理缺失值 - 忽略缺失值
df.dropna(inplace=True)
# 计算描述性统计
desc_stats = df.describe()
# 计算Spearman相关系数矩阵和p值表
corr_matrix, p_values = spearmanr(df, nan_policy='omit')
# 将相关系数矩阵和p值表转换为数据框
corr_df = pd.DataFrame(corr_matrix, columns=df.columns,
index=df.columns)
p_values_df = pd.DataFrame(p_values, columns=df.columns,
index=df.columns)
# 显著性水平
# alpha = alpha
# 构建返回的结果字典
result = {
"描述性统计": desc_stats.to_dict(),
"相关系数矩阵": corr_df.to_dict(),
"P值表 (显著性水平={})".format(alpha): p_values_df.to_dict()
}
# 将结果返回为JSON格式
return jsonify(result)
@app.route("/relAna/", methods=["POST"])
def get_cookie():
# 获取 POST 请求中的 JSON 数据
global data
new_data = request.json
print(new_data)
ana_way = new_data['ana_way']
if ana_way == 'Pearson':
data = run_Pearson(new_data['data'], new_data['ace_level'], new_data['miss_value'])
elif ana_way == 'Spearman':
data = run_Spearman(new_data['data'], new_data['ace_level'], new_data['miss_value'])
return data
if __name__ == '__main__':
app.run(port=9902, debug=False, host='0.0.0.0') # 启动服务

78
src/main/resources/python/varAna.py

@ -0,0 +1,78 @@
from decimal import Decimal
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from flask import Flask, request, jsonify
app = Flask(__name__)
def run(data, alphaid, miss_value, confidence):
df = pd.DataFrame(data)
if miss_value == '用均值替代':
# 处理缺失值:用均值替代
df['因变量'].fillna(df['因变量'].mean(), inplace=True)
elif miss_value == '用中位数替代':
# 处理缺失值 - 用中位数替代
df['因变量'].fillna(df['因变量'].median(), inplace=True)
elif miss_value == '忽略缺失值':
# 处理缺失值 - 忽略缺失值
df.dropna(inplace=True)
# 获取自变量的键
independent_variables = list(df.columns.drop('因变量'))
statistics = {}
result = {}
for var in independent_variables:
grouped = df.groupby(var)['因变量']
group_means = grouped.mean()
group_stds = grouped.std()
print(f"{var}的各组均值:\n", group_means)
print(f"{var}的各组标准差:\n", group_stds)
# 将每个变量的均值和标准差存储到字典中
statistics[var] = {'均值': group_means.to_dict(), '标准差': group_stds.to_dict()}
# 将统计数据添加到一个最外层的字典中
result['描述性统计'] = statistics
# 单因素方差分析
# 构建模型公式
formula = '因变量 ~ ' + ' + '.join(['C({})'.format(var) for var in independent_variables])
model = ols(formula, data=df).fit()
anova_results = sm.stats.anova_lm(model, typ=2, alpha=alphaid) # 显著性水平为0.01
anova_results_dict = anova_results.to_dict()
# 输出ANOVA结果
print("ANOVA结果:\n", anova_results_dict)
result['ANOVA结果'] = anova_results_dict
# 95% 置信区间
# conf_int = model.conf_int(0.05)
c = Decimal('1.0') - Decimal(confidence)
conf_int = model.conf_int(float(c))
conf_int_dict = conf_int.to_dict()
print("\n95% 置信区间:\n", conf_int_dict)
result['置信区间'] = conf_int_dict
# 提取相关统计量
f_value = anova_results['F'].iloc[0]
p_value = anova_results['PR(>F)'].iloc[0]
# mse = anova_results['sum_sq'].iloc[1] / anova_results['df'].iloc[1]
sst = sum((df['因变量'] - df['因变量'].mean()) ** 2)
result['相关统计量'] = {
"F值": f_value,
"p值": p_value,
"总平方和 (SST)": sst
}
return jsonify(result)
@app.route("/varAna/", methods=["POST"])
def get_cookie():
# 获取 POST 请求中的 JSON 数据
new_data = request.json
print(new_data)
data = run(new_data['data'], new_data['alpha'], new_data['miss_value'], new_data['confidence'])
return data
if __name__ == '__main__':
app.run(port=9901, debug=False, host='0.0.0.0') # 启动服务
Loading…
Cancel
Save