Browse Source

20210607-v1

崔老师项目版本代码
release-1.0
杜静 4 years ago
parent
commit
3e532bca9f
  1. 1
      .idea/.name
  2. 2
      .idea/compiler.xml
  3. 8
      .idea/libraries/Maven__com_alibaba_fastjson_1_2_60.xml
  4. 8
      .idea/libraries/Maven__com_fasterxml_classmate_1_4_0.xml
  5. 8
      .idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_6.xml
  6. 8
      .idea/libraries/Maven__com_github_virtuald_curvesapi_1_06.xml
  7. 13
      .idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml
  8. 13
      .idea/libraries/Maven__com_google_errorprone_error_prone_annotations_2_1_3.xml
  9. 13
      .idea/libraries/Maven__com_google_guava_guava_25_0_jre.xml
  10. 13
      .idea/libraries/Maven__com_google_j2objc_j2objc_annotations_1_1.xml
  11. 13
      .idea/libraries/Maven__com_ibm_icu_icu4j_4_6.xml
  12. 13
      .idea/libraries/Maven__com_monitorjbl_xlsx_streamer_2_1_0.xml
  13. 13
      .idea/libraries/Maven__com_rackspace_apache_xerces2_xsd11_2_11_1.xml
  14. 13
      .idea/libraries/Maven__com_rackspace_eclipse_webtools_sourceediting_org_eclipse_wst_xml_xpath2_processor_2_1_100.xml
  15. 8
      .idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_6_0.xml
  16. 8
      .idea/libraries/Maven__com_squareup_okio_okio_1_11_0.xml
  17. 13
      .idea/libraries/Maven__com_sun_mail_javax_mail_1_6_2.xml
  18. 13
      .idea/libraries/Maven__com_swagger_ui_swagger_bootstrap_ui_1_8_8.xml
  19. 8
      .idea/libraries/Maven__commons_codec_commons_codec_1_12.xml
  20. 13
      .idea/libraries/Maven__edu_princeton_cup_java_cup_10k.xml
  21. 8
      .idea/libraries/Maven__io_springfox_springfox_core_2_9_2.xml
  22. 8
      .idea/libraries/Maven__io_springfox_springfox_schema_2_9_2.xml
  23. 8
      .idea/libraries/Maven__io_springfox_springfox_spi_2_9_2.xml
  24. 8
      .idea/libraries/Maven__io_springfox_springfox_spring_web_2_9_2.xml
  25. 8
      .idea/libraries/Maven__io_springfox_springfox_swagger2_2_9_2.xml
  26. 8
      .idea/libraries/Maven__io_springfox_springfox_swagger_common_2_9_2.xml
  27. 13
      .idea/libraries/Maven__io_springfox_springfox_swagger_ui_2_9_2.xml
  28. 8
      .idea/libraries/Maven__io_swagger_swagger_annotations_1_5_20.xml
  29. 8
      .idea/libraries/Maven__io_swagger_swagger_models_1_5_20.xml
  30. 13
      .idea/libraries/Maven__it_sauronsoftware_jave_1_0_2.xml
  31. 13
      .idea/libraries/Maven__javax_activation_activation_1_1.xml
  32. 13
      .idea/libraries/Maven__javax_mail_javax_mail_api_1_6_2.xml
  33. 8
      .idea/libraries/Maven__org_apache_commons_commons_collections4_4_3.xml
  34. 13
      .idea/libraries/Maven__org_apache_commons_commons_compress_1_18.xml
  35. 13
      .idea/libraries/Maven__org_apache_commons_commons_math3_3_6_1.xml
  36. 13
      .idea/libraries/Maven__org_apache_poi_poi_3_15.xml
  37. 13
      .idea/libraries/Maven__org_apache_poi_poi_4_1_0.xml
  38. 8
      .idea/libraries/Maven__org_apache_poi_poi_ooxml_4_1_0.xml
  39. 8
      .idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_4_1_0.xml
  40. 8
      .idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_3_1_0.xml
  41. 13
      .idea/libraries/Maven__org_checkerframework_checker_compat_qual_2_0_0.xml
  42. 13
      .idea/libraries/Maven__org_codehaus_mojo_animal_sniffer_annotations_1_14.xml
  43. 8
      .idea/libraries/Maven__org_mapstruct_mapstruct_1_2_0_Final.xml
  44. 13
      .idea/libraries/Maven__org_slf4j_slf4j_api_1_7_12.xml
  45. 13
      .idea/libraries/Maven__stax_stax_api_1_0_1.xml
  46. 13
      .idea/libraries/Maven__xml_apis_xml_apis_1_4_01.xml
  47. 13
      .idea/libraries/Maven__xml_resolver_xml_resolver_1_2.xml
  48. 2
      .idea/modules.xml
  49. 2
      .idea/vcs.xml
  50. 51
      cl_query_data_job/cl_query_data_job.iml
  51. 78
      cl_query_data_job/pom.xml
  52. 20
      cl_query_data_job/src/main/java/com/bfd/mf/job/Application.java
  53. 162
      cl_query_data_job/src/main/java/com/bfd/mf/job/config/AllKeys.java
  54. 264
      cl_query_data_job/src/main/java/com/bfd/mf/job/config/AppConfig.java
  55. 1116
      cl_query_data_job/src/main/java/com/bfd/mf/job/config/BFDApiConfig.java
  56. 81
      cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java
  57. 27
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/EmailGroup.java
  58. 101
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/ServiceLoad.java
  59. 20
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Subject.java
  60. 9
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/SubjectCount.java
  61. 111
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Task.java
  62. 36
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/TaskCount.java
  63. 90
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/UploadTask.java
  64. 14
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/EmailGroupRepository.java
  65. 1
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ResultDetailRepository.java
  66. 17
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ServiceLoadRepository.java
  67. 11
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java
  68. 5
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectRepository.java
  69. 7
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskCountRepository.java
  70. 61
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java
  71. 100
      cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/UploadTaskRepository.java
  72. 41
      cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java
  73. 1
      cl_query_data_job/src/main/java/com/bfd/mf/job/download/OkHttpUtils.java
  74. 1315
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/BacktraceService.java
  75. 207
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryMiniService.java
  76. 7
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/WriterTXTService.java
  77. 241
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java
  78. 230
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/backtrace/BacktraceService.java
  79. 203
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java
  80. 2
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryNormalService.java
  81. 354
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java
  82. 6
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java
  83. 90
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java
  84. 21
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java
  85. 311
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java
  86. 545
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadExcelService.java
  87. 214
      cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadService.java
  88. 321
      cl_query_data_job/src/main/java/com/bfd/mf/job/util/DataCheckUtil.java
  89. 365
      cl_query_data_job/src/main/java/com/bfd/mf/job/util/DateUtil.java
  90. 286
      cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java
  91. 57
      cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils.java
  92. 451
      cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils2.java
  93. 239
      cl_query_data_job/src/main/java/com/bfd/mf/job/util/ReadLine.java
  94. 119
      cl_query_data_job/src/main/java/com/bfd/mf/job/util/ZipUtils.java
  95. 38
      cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java
  96. 3
      cl_query_data_job/src/main/java/com/bfd/mf/job/worker/BacktraceProducer.java
  97. 14
      cl_query_data_job/src/main/java/com/bfd/mf/job/worker/QueryProducer.java
  98. 1
      cl_query_data_job/src/main/java/com/bfd/mf/job/worker/ReadWriterOlyDataProducer.java
  99. 40
      cl_query_data_job/src/main/java/com/bfd/mf/job/worker/SQOutPutProducer.java
  100. 4
      cl_query_data_job/src/main/java/com/bfd/mf/job/worker/StatisticsProducer.java

1
.idea/.name

@ -1 +0,0 @@
cl_stream_30

2
.idea/compiler.xml

@ -13,7 +13,7 @@
<bytecodeTargetLevel> <bytecodeTargetLevel>
<module name="cl_query_data_job" target="1.8" /> <module name="cl_query_data_job" target="1.8" />
<module name="cl_search_api" target="1.8" /> <module name="cl_search_api" target="1.8" />
<module name="cl_stream_3.0" target="1.8" />
<module name="cl_stream_3.1" target="1.8" />
</bytecodeTargetLevel> </bytecodeTargetLevel>
</component> </component>
</project> </project>

8
.idea/libraries/Maven__com_alibaba_fastjson_1_2_6.xml → .idea/libraries/Maven__com_alibaba_fastjson_1_2_60.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: com.alibaba:fastjson:1.2.6">
<library name="Maven: com.alibaba:fastjson:1.2.60">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/alibaba/fastjson/1.2.6/fastjson-1.2.6.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/alibaba/fastjson/1.2.60/fastjson-1.2.60.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/alibaba/fastjson/1.2.6/fastjson-1.2.6-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/alibaba/fastjson/1.2.60/fastjson-1.2.60-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/alibaba/fastjson/1.2.6/fastjson-1.2.6-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/alibaba/fastjson/1.2.60/fastjson-1.2.60-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__com_fasterxml_classmate_1_3_1.xml → .idea/libraries/Maven__com_fasterxml_classmate_1_4_0.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: com.fasterxml:classmate:1.3.1">
<library name="Maven: com.fasterxml:classmate:1.4.0">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/classmate/1.3.1/classmate-1.3.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/classmate/1.4.0/classmate-1.4.0.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/classmate/1.3.1/classmate-1.3.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/classmate/1.4.0/classmate-1.4.0-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/classmate/1.3.1/classmate-1.3.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/classmate/1.4.0/classmate-1.4.0-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_5.xml → .idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_6.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: com.fasterxml.jackson.core:jackson-databind:2.9.5">
<library name="Maven: com.fasterxml.jackson.core:jackson-databind:2.9.6">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/jackson/core/jackson-databind/2.9.5/jackson-databind-2.9.5.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/jackson/core/jackson-databind/2.9.6/jackson-databind-2.9.6.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/jackson/core/jackson-databind/2.9.5/jackson-databind-2.9.5-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/jackson/core/jackson-databind/2.9.6/jackson-databind-2.9.6-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/jackson/core/jackson-databind/2.9.5/jackson-databind-2.9.5-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/fasterxml/jackson/core/jackson-databind/2.9.6/jackson-databind-2.9.6-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__com_github_virtuald_curvesapi_1_04.xml → .idea/libraries/Maven__com_github_virtuald_curvesapi_1_06.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: com.github.virtuald:curvesapi:1.04">
<library name="Maven: com.github.virtuald:curvesapi:1.06">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/github/virtuald/curvesapi/1.04/curvesapi-1.04.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/github/virtuald/curvesapi/1.06/curvesapi-1.06.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/github/virtuald/curvesapi/1.04/curvesapi-1.04-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/github/virtuald/curvesapi/1.06/curvesapi-1.06-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/github/virtuald/curvesapi/1.04/curvesapi-1.04-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/github/virtuald/curvesapi/1.06/curvesapi-1.06-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

13
.idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.google.code.findbugs:jsr305:1.3.9">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__com_google_errorprone_error_prone_annotations_2_1_3.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.google.errorprone:error_prone_annotations:2.1.3">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/errorprone/error_prone_annotations/2.1.3/error_prone_annotations-2.1.3.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/google/errorprone/error_prone_annotations/2.1.3/error_prone_annotations-2.1.3-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/errorprone/error_prone_annotations/2.1.3/error_prone_annotations-2.1.3-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__com_google_guava_guava_25_0_jre.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.google.guava:guava:25.0-jre">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/25.0-jre/guava-25.0-jre.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/25.0-jre/guava-25.0-jre-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/guava/guava/25.0-jre/guava-25.0-jre-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__com_google_j2objc_j2objc_annotations_1_1.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.google.j2objc:j2objc-annotations:1.1">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/j2objc/j2objc-annotations/1.1/j2objc-annotations-1.1.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/google/j2objc/j2objc-annotations/1.1/j2objc-annotations-1.1-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/google/j2objc/j2objc-annotations/1.1/j2objc-annotations-1.1-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__com_ibm_icu_icu4j_4_6.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.ibm.icu:icu4j:4.6">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/ibm/icu/icu4j/4.6/icu4j-4.6.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/ibm/icu/icu4j/4.6/icu4j-4.6-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/ibm/icu/icu4j/4.6/icu4j-4.6-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__com_monitorjbl_xlsx_streamer_2_1_0.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.monitorjbl:xlsx-streamer:2.1.0">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/monitorjbl/xlsx-streamer/2.1.0/xlsx-streamer-2.1.0.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/monitorjbl/xlsx-streamer/2.1.0/xlsx-streamer-2.1.0-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/monitorjbl/xlsx-streamer/2.1.0/xlsx-streamer-2.1.0-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__com_rackspace_apache_xerces2_xsd11_2_11_1.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.rackspace.apache:xerces2-xsd11:2.11.1">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/rackspace/apache/xerces2-xsd11/2.11.1/xerces2-xsd11-2.11.1.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/rackspace/apache/xerces2-xsd11/2.11.1/xerces2-xsd11-2.11.1-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/rackspace/apache/xerces2-xsd11/2.11.1/xerces2-xsd11-2.11.1-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__com_rackspace_eclipse_webtools_sourceediting_org_eclipse_wst_xml_xpath2_processor_2_1_100.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.rackspace.eclipse.webtools.sourceediting:org.eclipse.wst.xml.xpath2.processor:2.1.100">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/rackspace/eclipse/webtools/sourceediting/org.eclipse.wst.xml.xpath2.processor/2.1.100/org.eclipse.wst.xml.xpath2.processor-2.1.100.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/rackspace/eclipse/webtools/sourceediting/org.eclipse.wst.xml.xpath2.processor/2.1.100/org.eclipse.wst.xml.xpath2.processor-2.1.100-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/rackspace/eclipse/webtools/sourceediting/org.eclipse.wst.xml.xpath2.processor/2.1.100/org.eclipse.wst.xml.xpath2.processor-2.1.100-sources.jar!/" />
</SOURCES>
</library>
</component>

8
.idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_9_1.xml → .idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_6_0.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: com.squareup.okhttp3:okhttp:3.9.1">
<library name="Maven: com.squareup.okhttp3:okhttp:3.6.0">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okhttp3/okhttp/3.9.1/okhttp-3.9.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okhttp3/okhttp/3.6.0/okhttp-3.6.0.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okhttp3/okhttp/3.9.1/okhttp-3.9.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okhttp3/okhttp/3.6.0/okhttp-3.6.0-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okhttp3/okhttp/3.9.1/okhttp-3.9.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okhttp3/okhttp/3.6.0/okhttp-3.6.0-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__com_squareup_okio_okio_1_13_0.xml → .idea/libraries/Maven__com_squareup_okio_okio_1_11_0.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: com.squareup.okio:okio:1.13.0">
<library name="Maven: com.squareup.okio:okio:1.11.0">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okio/okio/1.13.0/okio-1.13.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okio/okio/1.11.0/okio-1.11.0.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okio/okio/1.13.0/okio-1.13.0-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okio/okio/1.11.0/okio-1.11.0-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okio/okio/1.13.0/okio-1.13.0-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/com/squareup/okio/okio/1.11.0/okio-1.11.0-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

13
.idea/libraries/Maven__com_sun_mail_javax_mail_1_6_2.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: com.sun.mail:javax.mail:1.6.2">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/sun/mail/javax.mail/1.6.2/javax.mail-1.6.2.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/sun/mail/javax.mail/1.6.2/javax.mail-1.6.2-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/sun/mail/javax.mail/1.6.2/javax.mail-1.6.2-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__com_swagger_ui_swagger_bootstrap_ui_1_8_8.xml

@ -1,13 +0,0 @@
<component name="libraryTable">
<library name="Maven: com.swagger.ui:swagger-bootstrap-ui:1.8.8">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/com/swagger/ui/swagger-bootstrap-ui/1.8.8/swagger-bootstrap-ui-1.8.8.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/com/swagger/ui/swagger-bootstrap-ui/1.8.8/swagger-bootstrap-ui-1.8.8-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/com/swagger/ui/swagger-bootstrap-ui/1.8.8/swagger-bootstrap-ui-1.8.8-sources.jar!/" />
</SOURCES>
</library>
</component>

8
.idea/libraries/Maven__commons_codec_commons_codec_1_10.xml → .idea/libraries/Maven__commons_codec_commons_codec_1_12.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: commons-codec:commons-codec:1.10">
<library name="Maven: commons-codec:commons-codec:1.12">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.10/commons-codec-1.10.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.12/commons-codec-1.12.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.10/commons-codec-1.10-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.12/commons-codec-1.12-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.10/commons-codec-1.10-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/commons-codec/commons-codec/1.12/commons-codec-1.12-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

13
.idea/libraries/Maven__edu_princeton_cup_java_cup_10k.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: edu.princeton.cup:java-cup:10k">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/edu/princeton/cup/java-cup/10k/java-cup-10k.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/edu/princeton/cup/java-cup/10k/java-cup-10k-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/edu/princeton/cup/java-cup/10k/java-cup-10k-sources.jar!/" />
</SOURCES>
</library>
</component>

8
.idea/libraries/Maven__io_springfox_springfox_core_2_6_1.xml → .idea/libraries/Maven__io_springfox_springfox_core_2_9_2.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: io.springfox:springfox-core:2.6.1">
<library name="Maven: io.springfox:springfox-core:2.9.2">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-core/2.6.1/springfox-core-2.6.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-core/2.9.2/springfox-core-2.9.2.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-core/2.6.1/springfox-core-2.6.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-core/2.9.2/springfox-core-2.9.2-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-core/2.6.1/springfox-core-2.6.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-core/2.9.2/springfox-core-2.9.2-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__io_springfox_springfox_schema_2_6_1.xml → .idea/libraries/Maven__io_springfox_springfox_schema_2_9_2.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: io.springfox:springfox-schema:2.6.1">
<library name="Maven: io.springfox:springfox-schema:2.9.2">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-schema/2.6.1/springfox-schema-2.6.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-schema/2.9.2/springfox-schema-2.9.2.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-schema/2.6.1/springfox-schema-2.6.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-schema/2.9.2/springfox-schema-2.9.2-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-schema/2.6.1/springfox-schema-2.6.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-schema/2.9.2/springfox-schema-2.9.2-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__io_springfox_springfox_spi_2_6_1.xml → .idea/libraries/Maven__io_springfox_springfox_spi_2_9_2.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: io.springfox:springfox-spi:2.6.1">
<library name="Maven: io.springfox:springfox-spi:2.9.2">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spi/2.6.1/springfox-spi-2.6.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spi/2.9.2/springfox-spi-2.9.2.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spi/2.6.1/springfox-spi-2.6.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spi/2.9.2/springfox-spi-2.9.2-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spi/2.6.1/springfox-spi-2.6.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spi/2.9.2/springfox-spi-2.9.2-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__io_springfox_springfox_spring_web_2_6_1.xml → .idea/libraries/Maven__io_springfox_springfox_spring_web_2_9_2.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: io.springfox:springfox-spring-web:2.6.1">
<library name="Maven: io.springfox:springfox-spring-web:2.9.2">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spring-web/2.6.1/springfox-spring-web-2.6.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spring-web/2.9.2/springfox-spring-web-2.9.2.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spring-web/2.6.1/springfox-spring-web-2.6.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spring-web/2.9.2/springfox-spring-web-2.9.2-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spring-web/2.6.1/springfox-spring-web-2.6.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-spring-web/2.9.2/springfox-spring-web-2.9.2-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__io_springfox_springfox_swagger2_2_6_1.xml → .idea/libraries/Maven__io_springfox_springfox_swagger2_2_9_2.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: io.springfox:springfox-swagger2:2.6.1">
<library name="Maven: io.springfox:springfox-swagger2:2.9.2">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger2/2.6.1/springfox-swagger2-2.6.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger2/2.9.2/springfox-swagger2-2.9.2.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger2/2.6.1/springfox-swagger2-2.6.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger2/2.9.2/springfox-swagger2-2.9.2-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger2/2.6.1/springfox-swagger2-2.6.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger2/2.9.2/springfox-swagger2-2.9.2-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__io_springfox_springfox_swagger_common_2_6_1.xml → .idea/libraries/Maven__io_springfox_springfox_swagger_common_2_9_2.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: io.springfox:springfox-swagger-common:2.6.1">
<library name="Maven: io.springfox:springfox-swagger-common:2.9.2">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-common/2.6.1/springfox-swagger-common-2.6.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-common/2.9.2/springfox-swagger-common-2.9.2.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-common/2.6.1/springfox-swagger-common-2.6.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-common/2.9.2/springfox-swagger-common-2.9.2-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-common/2.6.1/springfox-swagger-common-2.6.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-common/2.9.2/springfox-swagger-common-2.9.2-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

13
.idea/libraries/Maven__io_springfox_springfox_swagger_ui_2_9_2.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: io.springfox:springfox-swagger-ui:2.9.2">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-ui/2.9.2/springfox-swagger-ui-2.9.2.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-ui/2.9.2/springfox-swagger-ui-2.9.2-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/springfox/springfox-swagger-ui/2.9.2/springfox-swagger-ui-2.9.2-sources.jar!/" />
</SOURCES>
</library>
</component>

8
.idea/libraries/Maven__io_swagger_swagger_annotations_1_5_10.xml → .idea/libraries/Maven__io_swagger_swagger_annotations_1_5_20.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: io.swagger:swagger-annotations:1.5.10">
<library name="Maven: io.swagger:swagger-annotations:1.5.20">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-annotations/1.5.10/swagger-annotations-1.5.10.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-annotations/1.5.20/swagger-annotations-1.5.20.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-annotations/1.5.10/swagger-annotations-1.5.10-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-annotations/1.5.20/swagger-annotations-1.5.20-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-annotations/1.5.10/swagger-annotations-1.5.10-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-annotations/1.5.20/swagger-annotations-1.5.20-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__io_swagger_swagger_models_1_5_10.xml → .idea/libraries/Maven__io_swagger_swagger_models_1_5_20.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: io.swagger:swagger-models:1.5.10">
<library name="Maven: io.swagger:swagger-models:1.5.20">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-models/1.5.10/swagger-models-1.5.10.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-models/1.5.20/swagger-models-1.5.20.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-models/1.5.10/swagger-models-1.5.10-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-models/1.5.20/swagger-models-1.5.20-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-models/1.5.10/swagger-models-1.5.10-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/io/swagger/swagger-models/1.5.20/swagger-models-1.5.20-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

13
.idea/libraries/Maven__it_sauronsoftware_jave_1_0_2.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: it.sauronsoftware:jave:1.0.2">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/it/sauronsoftware/jave/1.0.2/jave-1.0.2.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/it/sauronsoftware/jave/1.0.2/jave-1.0.2-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/it/sauronsoftware/jave/1.0.2/jave-1.0.2-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__javax_activation_activation_1_1.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: javax.activation:activation:1.1">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/javax/activation/activation/1.1/activation-1.1.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/javax/activation/activation/1.1/activation-1.1-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/javax/activation/activation/1.1/activation-1.1-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__javax_mail_javax_mail_api_1_6_2.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: javax.mail:javax.mail-api:1.6.2">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/javax/mail/javax.mail-api/1.6.2/javax.mail-api-1.6.2.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/javax/mail/javax.mail-api/1.6.2/javax.mail-api-1.6.2-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/javax/mail/javax.mail-api/1.6.2/javax.mail-api-1.6.2-sources.jar!/" />
</SOURCES>
</library>
</component>

8
.idea/libraries/Maven__org_apache_commons_commons_collections4_4_1.xml → .idea/libraries/Maven__org_apache_commons_commons_collections4_4_3.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: org.apache.commons:commons-collections4:4.1">
<library name="Maven: org.apache.commons:commons-collections4:4.3">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-collections4/4.1/commons-collections4-4.1.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-collections4/4.1/commons-collections4-4.1-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-collections4/4.1/commons-collections4-4.1-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-collections4/4.3/commons-collections4-4.3-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

13
.idea/libraries/Maven__org_apache_commons_commons_compress_1_18.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: org.apache.commons:commons-compress:1.18">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-compress/1.18/commons-compress-1.18.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-compress/1.18/commons-compress-1.18-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-compress/1.18/commons-compress-1.18-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__org_apache_commons_commons_math3_3_6_1.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: org.apache.commons:commons-math3:3.6.1">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-math3/3.6.1/commons-math3-3.6.1-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__org_apache_poi_poi_3_15.xml

@ -1,13 +0,0 @@
<component name="libraryTable">
<library name="Maven: org.apache.poi:poi:3.15">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi/3.15/poi-3.15.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi/3.15/poi-3.15-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi/3.15/poi-3.15-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__org_apache_poi_poi_4_1_0.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: org.apache.poi:poi:4.1.0">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi/4.1.0/poi-4.1.0.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi/4.1.0/poi-4.1.0-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi/4.1.0/poi-4.1.0-sources.jar!/" />
</SOURCES>
</library>
</component>

8
.idea/libraries/Maven__org_apache_poi_poi_ooxml_3_15.xml → .idea/libraries/Maven__org_apache_poi_poi_ooxml_4_1_0.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: org.apache.poi:poi-ooxml:3.15">
<library name="Maven: org.apache.poi:poi-ooxml:4.1.0">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml/3.15/poi-ooxml-3.15.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml/3.15/poi-ooxml-3.15-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml/3.15/poi-ooxml-3.15-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml/4.1.0/poi-ooxml-4.1.0-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_3_15.xml → .idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_4_1_0.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: org.apache.poi:poi-ooxml-schemas:3.15">
<library name="Maven: org.apache.poi:poi-ooxml-schemas:4.1.0">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml-schemas/3.15/poi-ooxml-schemas-3.15.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml-schemas/4.1.0/poi-ooxml-schemas-4.1.0.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml-schemas/3.15/poi-ooxml-schemas-3.15-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml-schemas/4.1.0/poi-ooxml-schemas-4.1.0-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml-schemas/3.15/poi-ooxml-schemas-3.15-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/poi/poi-ooxml-schemas/4.1.0/poi-ooxml-schemas-4.1.0-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

8
.idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_2_6_0.xml → .idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_3_1_0.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: org.apache.xmlbeans:xmlbeans:2.6.0">
<library name="Maven: org.apache.xmlbeans:xmlbeans:3.1.0">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/xmlbeans/xmlbeans/2.6.0/xmlbeans-2.6.0.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/xmlbeans/xmlbeans/2.6.0/xmlbeans-2.6.0-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/apache/xmlbeans/xmlbeans/2.6.0/xmlbeans-2.6.0-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

13
.idea/libraries/Maven__org_checkerframework_checker_compat_qual_2_0_0.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: org.checkerframework:checker-compat-qual:2.0.0">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/checkerframework/checker-compat-qual/2.0.0/checker-compat-qual-2.0.0.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/checkerframework/checker-compat-qual/2.0.0/checker-compat-qual-2.0.0-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/checkerframework/checker-compat-qual/2.0.0/checker-compat-qual-2.0.0-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__org_codehaus_mojo_animal_sniffer_annotations_1_14.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: org.codehaus.mojo:animal-sniffer-annotations:1.14">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/codehaus/mojo/animal-sniffer-annotations/1.14/animal-sniffer-annotations-1.14.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/codehaus/mojo/animal-sniffer-annotations/1.14/animal-sniffer-annotations-1.14-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/codehaus/mojo/animal-sniffer-annotations/1.14/animal-sniffer-annotations-1.14-sources.jar!/" />
</SOURCES>
</library>
</component>

8
.idea/libraries/Maven__org_mapstruct_mapstruct_1_0_0_Final.xml → .idea/libraries/Maven__org_mapstruct_mapstruct_1_2_0_Final.xml

@ -1,13 +1,13 @@
<component name="libraryTable"> <component name="libraryTable">
<library name="Maven: org.mapstruct:mapstruct:1.0.0.Final">
<library name="Maven: org.mapstruct:mapstruct:1.2.0.Final">
<CLASSES> <CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/mapstruct/mapstruct/1.0.0.Final/mapstruct-1.0.0.Final.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/mapstruct/mapstruct/1.2.0.Final/mapstruct-1.2.0.Final.jar!/" />
</CLASSES> </CLASSES>
<JAVADOC> <JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/mapstruct/mapstruct/1.0.0.Final/mapstruct-1.0.0.Final-javadoc.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/mapstruct/mapstruct/1.2.0.Final/mapstruct-1.2.0.Final-javadoc.jar!/" />
</JAVADOC> </JAVADOC>
<SOURCES> <SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/mapstruct/mapstruct/1.0.0.Final/mapstruct-1.0.0.Final-sources.jar!/" />
<root url="jar://$MAVEN_REPOSITORY$/org/mapstruct/mapstruct/1.2.0.Final/mapstruct-1.2.0.Final-sources.jar!/" />
</SOURCES> </SOURCES>
</library> </library>
</component> </component>

13
.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_12.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: org.slf4j:slf4j-api:1.7.12">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.12/slf4j-api-1.7.12.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.12/slf4j-api-1.7.12-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.12/slf4j-api-1.7.12-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__stax_stax_api_1_0_1.xml

@ -1,13 +0,0 @@
<component name="libraryTable">
<library name="Maven: stax:stax-api:1.0.1">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/stax/stax-api/1.0.1/stax-api-1.0.1.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/stax/stax-api/1.0.1/stax-api-1.0.1-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/stax/stax-api/1.0.1/stax-api-1.0.1-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__xml_apis_xml_apis_1_4_01.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: xml-apis:xml-apis:1.4.01">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01-sources.jar!/" />
</SOURCES>
</library>
</component>

13
.idea/libraries/Maven__xml_resolver_xml_resolver_1_2.xml

@ -0,0 +1,13 @@
<component name="libraryTable">
<library name="Maven: xml-resolver:xml-resolver:1.2">
<CLASSES>
<root url="jar://$MAVEN_REPOSITORY$/xml-resolver/xml-resolver/1.2/xml-resolver-1.2.jar!/" />
</CLASSES>
<JAVADOC>
<root url="jar://$MAVEN_REPOSITORY$/xml-resolver/xml-resolver/1.2/xml-resolver-1.2-javadoc.jar!/" />
</JAVADOC>
<SOURCES>
<root url="jar://$MAVEN_REPOSITORY$/xml-resolver/xml-resolver/1.2/xml-resolver-1.2-sources.jar!/" />
</SOURCES>
</library>
</component>

2
.idea/modules.xml

@ -4,7 +4,7 @@
<modules> <modules>
<module fileurl="file://$PROJECT_DIR$/cl_query_data_job/cl_query_data_job.iml" filepath="$PROJECT_DIR$/cl_query_data_job/cl_query_data_job.iml" /> <module fileurl="file://$PROJECT_DIR$/cl_query_data_job/cl_query_data_job.iml" filepath="$PROJECT_DIR$/cl_query_data_job/cl_query_data_job.iml" />
<module fileurl="file://$PROJECT_DIR$/cl_search_api/cl_search_api.iml" filepath="$PROJECT_DIR$/cl_search_api/cl_search_api.iml" /> <module fileurl="file://$PROJECT_DIR$/cl_search_api/cl_search_api.iml" filepath="$PROJECT_DIR$/cl_search_api/cl_search_api.iml" />
<module fileurl="file://$PROJECT_DIR$/cl_stream_3.0.iml" filepath="$PROJECT_DIR$/cl_stream_3.0.iml" />
<module fileurl="file://$PROJECT_DIR$/cl_stream_3.1.iml" filepath="$PROJECT_DIR$/cl_stream_3.1.iml" />
</modules> </modules>
</component> </component>
</project> </project>

2
.idea/vcs.xml

@ -2,7 +2,5 @@
<project version="4"> <project version="4">
<component name="VcsDirectoryMappings"> <component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" /> <mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="E:\work\workspace2021\socialx\SocialX_Stream_3.0\cl_stream_3.0\IntelligentCrawlSearch" vcs="Git" />
<mapping directory="E:\work\workspace2021\socialx\SocialX_Stream_3.0\cl_stream_3.0\cl_stream_3.0" vcs="Git" />
</component> </component>
</project> </project>

51
cl_query_data_job/cl_query_data_job.iml

@ -79,7 +79,7 @@
<orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" /> <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.6" level="project" />
<orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" /> <orderEntry type="library" name="Maven: commons-io:commons-io:2.4" level="project" />
<orderEntry type="library" name="Maven: com.google.guava:guava:19.0" level="project" /> <orderEntry type="library" name="Maven: com.google.guava:guava:19.0" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.6" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:fastjson:1.2.60" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch:6.0.0" level="project" /> <orderEntry type="library" name="Maven: org.elasticsearch:elasticsearch:6.0.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-core:7.0.1" level="project" /> <orderEntry type="library" name="Maven: org.apache.lucene:lucene-core:7.0.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.lucene:lucene-analyzers-common:7.0.1" level="project" /> <orderEntry type="library" name="Maven: org.apache.lucene:lucene-analyzers-common:7.0.1" level="project" />
@ -128,11 +128,8 @@
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.5" level="project" /> <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore:4.4.5" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.2" level="project" /> <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpasyncclient:4.1.2" level="project" />
<orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.5" level="project" /> <orderEntry type="library" name="Maven: org.apache.httpcomponents:httpcore-nio:4.4.5" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.10" level="project" />
<orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.3" level="project" /> <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1.3" level="project" />
<orderEntry type="library" name="Maven: org.elasticsearch.plugin:aggs-matrix-stats-client:6.0.0" level="project" /> <orderEntry type="library" name="Maven: org.elasticsearch.plugin:aggs-matrix-stats-client:6.0.0" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:3.9.1" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:1.13.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.kafka:kafka-clients:0.10.1.0" level="project" /> <orderEntry type="library" name="Maven: org.apache.kafka:kafka-clients:0.10.1.0" level="project" />
<orderEntry type="library" name="Maven: net.jpountz.lz4:lz4:1.3.0" level="project" /> <orderEntry type="library" name="Maven: net.jpountz.lz4:lz4:1.3.0" level="project" />
<orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.2.6" level="project" /> <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.2.6" level="project" />
@ -157,5 +154,51 @@
<orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.10.6" level="project" /> <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.10.6" level="project" />
<orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.21" level="project" /> <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.21" level="project" />
<orderEntry type="library" name="Maven: com.101tec:zkclient:0.10" level="project" /> <orderEntry type="library" name="Maven: com.101tec:zkclient:0.10" level="project" />
<orderEntry type="library" name="Maven: org.apache.poi:poi:4.1.0" level="project" />
<orderEntry type="library" name="Maven: commons-codec:commons-codec:1.12" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-collections4:4.3" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-math3:3.6.1" level="project" />
<orderEntry type="library" name="Maven: org.apache.poi:poi-ooxml:4.1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.poi:poi-ooxml-schemas:4.1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.xmlbeans:xmlbeans:3.1.0" level="project" />
<orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.18" level="project" />
<orderEntry type="library" name="Maven: com.github.virtuald:curvesapi:1.06" level="project" />
<orderEntry type="library" name="Maven: com.monitorjbl:xlsx-streamer:2.1.0" level="project" />
<orderEntry type="library" name="Maven: com.rackspace.apache:xerces2-xsd11:2.11.1" level="project" />
<orderEntry type="library" name="Maven: com.rackspace.eclipse.webtools.sourceediting:org.eclipse.wst.xml.xpath2.processor:2.1.100" level="project" />
<orderEntry type="library" name="Maven: edu.princeton.cup:java-cup:10k" level="project" />
<orderEntry type="library" name="Maven: com.ibm.icu:icu4j:4.6" level="project" />
<orderEntry type="library" name="Maven: xml-resolver:xml-resolver:1.2" level="project" />
<orderEntry type="library" name="Maven: xml-apis:xml-apis:1.4.01" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okhttp3:okhttp:3.6.0" level="project" />
<orderEntry type="library" name="Maven: com.squareup.okio:okio:1.11.0" level="project" />
<orderEntry type="library" name="Maven: it.sauronsoftware:jave:1.0.2" level="project" />
<orderEntry type="library" name="Maven: com.bfd.nlp:nlp_common_util:1.1" level="project" />
<orderEntry type="library" name="Maven: com.alibaba:druid:1.0.11" level="project" />
<orderEntry type="module-library">
<library name="Maven: com.alibaba:jconsole:1.8.0">
<CLASSES>
<root url="jar://C:/Program Files/Java/jdk1.8.0_65/lib/jconsole.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="module-library">
<library name="Maven: com.alibaba:tools:1.8.0">
<CLASSES>
<root url="jar://C:/Program Files/Java/jdk1.8.0_65/lib/tools.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
<orderEntry type="library" name="Maven: org.jolokia:jolokia-core:1.3.3" level="project" />
<orderEntry type="library" name="Maven: com.googlecode.json-simple:json-simple:1.1.1" level="project" />
<orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.9.2" level="project" />
<orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
<orderEntry type="library" name="Maven: javax.mail:javax.mail-api:1.6.2" level="project" />
<orderEntry type="library" name="Maven: com.sun.mail:javax.mail:1.6.2" level="project" />
<orderEntry type="library" name="Maven: javax.activation:activation:1.1" level="project" />
</component> </component>
</module> </module>

78
cl_query_data_job/pom.xml

@ -4,9 +4,9 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<parent> <parent>
<artifactId>cl_stream_3.0</artifactId>
<artifactId>cl_stream_4.0</artifactId>
<groupId>com.bfd.mf</groupId> <groupId>com.bfd.mf</groupId>
<version>3.0-SNAPSHOT</version>
<version>4.0-SNAPSHOT</version>
</parent> </parent>
<artifactId>cl_query_data_job</artifactId> <artifactId>cl_query_data_job</artifactId>
@ -72,10 +72,15 @@
<version>19.0</version> <version>19.0</version>
</dependency> </dependency>
<!--<dependency>-->
<!--<groupId>com.alibaba</groupId>-->
<!--<artifactId>fastjson</artifactId>-->
<!--<version>1.2.6</version>-->
<!--</dependency>-->
<dependency> <dependency>
<groupId>com.alibaba</groupId> <groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId> <artifactId>fastjson</artifactId>
<version>1.2.6</version>
<version>1.2.60</version>
</dependency> </dependency>
@ -100,13 +105,6 @@
<version>6.0.0</version> <version>6.0.0</version>
</dependency> </dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.9.1</version>
</dependency>
<dependency> <dependency>
<groupId>org.apache.kafka</groupId> <groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId> <artifactId>kafka-clients</artifactId>
@ -138,6 +136,66 @@
<artifactId>kafka</artifactId> <artifactId>kafka</artifactId>
<version>0.10</version> <version>0.10</version>
</dependency> </dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>com.monitorjbl</groupId>
<artifactId>xlsx-streamer</artifactId>
<version>2.1.0</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.6.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/it.sauronsoftware/jave -->
<dependency>
<groupId>it.sauronsoftware</groupId>
<artifactId>jave</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>com.bfd.nlp</groupId>
<artifactId>nlp_common_util</artifactId>
<version>1.1</version>
<exclusions>
<exclusion>
<artifactId>httpclient</artifactId>
<groupId>org.apache.httpcomponents</groupId>
</exclusion>
<!--<exclusion>-->
<!--<artifactId>slf4j-log4j12</artifactId>-->
<!--<groupId>org.slf4j</groupId>-->
<!--</exclusion>-->
<exclusion>
<artifactId>logback-classic</artifactId>
<groupId>ch.qos.logback</groupId>
</exclusion>
</exclusions>
</dependency>
<!-- 有关邮件发送的包 -->
<!-- https://mvnrepository.com/artifact/javax.mail/javax.mail-api -->
<dependency>
<groupId>javax.mail</groupId>
<artifactId>javax.mail-api</artifactId>
<version>1.6.2</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.sun.mail/javax.mail -->
<dependency>
<groupId>com.sun.mail</groupId>
<artifactId>javax.mail</artifactId>
<version>1.6.2</version>
</dependency>
</dependencies> </dependencies>

20
cl_query_data_job/src/main/java/com/bfd/mf/job/Application.java

@ -23,6 +23,12 @@ public class Application {
private QueryProducer queryProducer; private QueryProducer queryProducer;
@Autowired @Autowired
private StatisticsProducer statisticsProducer; private StatisticsProducer statisticsProducer;
@Autowired
private UpLoadProducer upLoadProducer;
@Autowired
private TaskCountProducer taskCountProducer;
@Autowired
private AlarmProducer alarmProducer;
public static void main(String[] args) { public static void main(String[] args) {
ConfigurableApplicationContext context = SpringApplication.run(Application.class, args); ConfigurableApplicationContext context = SpringApplication.run(Application.class, args);
@ -42,7 +48,19 @@ public class Application {
} }
if(config.getEnableBacktraceProducer()){ if(config.getEnableBacktraceProducer()){
backtraceProducer.start(); backtraceProducer.start();
LOGGER.info("---- Statistics producer started successfully. ----");
LOGGER.info("---- Backtrace producer started successfully. ----");
}
if(config.getEnableUpLoadProducer()){
upLoadProducer.start();
LOGGER.info("---- Backtrace producer started successfully. ----");
}
if(config.getEnableTaskcountProducer()){
taskCountProducer.start();
LOGGER.info("---- TaskCount producer started successfully. ----");
}
if(config.getEnableAlarmProducer()){
alarmProducer.start();
LOGGER.info("---- Alarm producer started successfully. ----");
} }
LOGGER.info("---- Application started successfully. ----"); LOGGER.info("---- Application started successfully. ----");

162
cl_query_data_job/src/main/java/com/bfd/mf/job/config/AllKeys.java

@ -0,0 +1,162 @@
package com.bfd.mf.job.config;
import com.bfd.mf.job.util.DateUtil;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
public class AllKeys {
private static Map<String,Object> map = new HashMap();
public static Map<String, Object> getMap() {
return map;
}
public static void setMap(Map<String, Object> map) {
AllKeys.map = map;
}
static {
map.put("_id_","");
map.put("age","");
map.put("area","");
map.put("attitudesCount","");
map.put("attr","");
map.put("author","");
map.put("authorId","");
map.put("authorLevel","");
map.put("authornickname","");
map.put("availability",0);
map.put("avatar","");
map.put("brand","");
map.put("brandId","");
map.put("cate","");
map.put("channel","");
map.put("city","");
map.put("collectCount",0);
map.put("commentId","");
map.put("commentScore",0);
map.put("commentsCount",0);
map.put("commentUrl","");
map.put("content","");
map.put("contentLength",0);
map.put("contentSimHash","");
map.put("contentTag","");
map.put("country","");
map.put("crawlDataFlag","");
map.put("crawlDate",new Date ());
map.put("crawlDay",0L);
map.put("crawlTime",0L);
map.put("crawlTimeStr","");
map.put("createDate",new Date ());
map.put("createDay",0L);
map.put("createTime",0L);
map.put("createTimeStr","");
map.put("dataCount",0);
map.put("dataId","");
map.put("docId","");
map.put("docType","");
map.put("downCnt",0);
map.put("egc",0);
map.put("enSource","");
map.put("expression",new ArrayList<>());
map.put("extension","");
map.put("fansCount","");
map.put("favorCnt",0);
map.put("filePath",new ArrayList<>());
map.put("imagePath",new ArrayList<>());
map.put("videoPath",new ArrayList<>());
map.put("filePathSize",new ArrayList<>());
map.put("imagePathSize",new ArrayList<>());
map.put("videoPathSize",new ArrayList<>());
map.put("finalPhrase","");
map.put("firstListBrand","");
map.put("fiveListBrand","");
map.put("forumScore","");
map.put("forwardAttitudesCount",0);
map.put("forwardAuthor","");
map.put("forwardAvatar","");
map.put("forwardCommentsCount",0);
map.put("forwardContent","");
map.put("forwardImgs","");
map.put("forwardPostSource","");
map.put("forwardPubTime",0L);
map.put("forwardQuoteCount",0);
map.put("forwardUrl","");
map.put("forwardUserId","");
map.put("forwardUserType",0);
map.put("forwardUserUrl","");
map.put("fourListBrand","");
map.put("friendsCount","");
map.put("getSource","");
map.put("hashTag",new ArrayList<>());
map.put("hlKeywords",new ArrayList<>());
map.put("impression","");
map.put("isDownload",false);
map.put("isVip",0);
map.put("language","");
map.put("lastModifiedTime",0L);
map.put("listBrand","");
map.put("location","");
map.put("nomorprice",0);
map.put("opinions",new ArrayList<>());
map.put("originalPhrase","");
map.put("otherSourceJson","");
map.put("pageCommentCount",0);
map.put("pageTranspondCount",0);
map.put("pageType","");
map.put("pgc",0);
map.put("pictureList","");
map.put("places",new ArrayList<>());
map.put("postCount","");
map.put("postId","");
map.put("postSource","");
map.put("price",0);
map.put("primary",1);
map.put("productParameter","");
map.put("projectName","");
map.put("promotionInfo","");
map.put("province","");
map.put("pubDate",new Date());
map.put("pubDay", DateUtil.getcurr());
map.put("pubTime",DateUtil.getcurr());
map.put("pubTimeStr", DateUtil.getDateTime());
map.put("quoteCount",0);
map.put("readCount",0);
map.put("resolution","");
map.put("secondListBrand","");
map.put("sex","");
map.put("sign","");
map.put("siteId","");
map.put("skuProperties","");
map.put("smallImgs","");
map.put("source","");
map.put("sysAbstract","");
map.put("sysKeywords","");
map.put("sysSentiment",0.0);
map.put("threeListBrand","");
map.put("thumbnails","");
map.put("title","");
map.put("titleLength",0);
map.put("titleSimHash","");
map.put("translateContent","");
map.put("translateTitle","");
map.put("ugc",0);
map.put("url","");
map.put("urlHash","");
map.put("userType","");
map.put("userUrl","");
map.put("videoTime","");
map.put("videoUrl","");
map.put("avatarPath","");
map.put("viewCnt",0);
map.put("channelNum","");
map.put("crawlDataFlagType","");
map.put("primaryPost","");
map.put("dns","");
map.put("asrText","");
map.put("ocrText",new ArrayList<>());
}
}

264
cl_query_data_job/src/main/java/com/bfd/mf/job/config/AppConfig.java

@ -1,8 +1,5 @@
package com.bfd.mf.job.config; package com.bfd.mf.job.config;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.joda.time.Instant; import org.joda.time.Instant;
import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Configuration;
@ -26,6 +23,7 @@ public class AppConfig {
public static final String SEPARATOR_UP = "丄"; public static final String SEPARATOR_UP = "丄";
public static final String SEPARATOR_DOWN = "丅"; public static final String SEPARATOR_DOWN = "丅";
public static final String CL_INDEX= "cl_index"; public static final String CL_INDEX= "cl_index";
public static final String SEPARATOR = "_";
// 从配置文件中读的参数 // 从配置文件中读的参数
private static final String CONFIG_ES_CLUSTER_NAME = "name"; private static final String CONFIG_ES_CLUSTER_NAME = "name";
@ -41,19 +39,34 @@ public class AppConfig {
private Integer testThreadCount; private Integer testThreadCount;
private Long testTaskId; private Long testTaskId;
private String brokerList; private String brokerList;
private String sendTopic;
private List<String> analysisTopic; private List<String> analysisTopic;
private String analysisGroup; private String analysisGroup;
// private Boolean enableAnalysisProducer;
// private Boolean enableAnalysisConsumer;
// private Integer analysisProducerThreadCount;
// private Integer analysisConsumerThreadCount;
private Boolean enableStatisticsProducer; // 离线查询统计服务的状态 private Boolean enableStatisticsProducer; // 离线查询统计服务的状态
private Boolean enableQueryProducer; // 离线查询数据服务的状态 private Boolean enableQueryProducer; // 离线查询数据服务的状态
private Boolean enableBacktraceProducer; // 离线拉取数据服务的状态欧莱雅 private Boolean enableBacktraceProducer; // 离线拉取数据服务的状态欧莱雅
private Boolean enableUpLoadProducer;
private Boolean enableOutputProducer;
private Boolean enableTaskcountProducer;
private Boolean enableAlarmProducer;
private Integer statisticsProducerThreadCount; // 离线查询统计服务的线程数 private Integer statisticsProducerThreadCount; // 离线查询统计服务的线程数
private Integer queryProducerThreadCount; private Integer queryProducerThreadCount;
private Integer backtraceProducerThreadCount; private Integer backtraceProducerThreadCount;
// private Boolean enableCompany;
private Integer upLoadProducerThreadCount;
private Integer outputProducerThreadCount;
private Integer taskcountProducerThreadCount;
private Integer alarmProducerThreadCount;
private String goFastPostUrl;
private String goFastDomain;
private String uploadOLYExcelPath;
private String uploadZipPath;
private String indexNamePre;
// private Boolean enableAnalysisProducer;
// private Boolean enableAnalysisConsumer;
// private Integer analysisProducerThreadCount;
// private Integer analysisConsumerThreadCount;
// private Boolean enableCompany;
// private Integer companyThreadCount; // private Integer companyThreadCount;
// private Boolean enableCompanyProducer; // private Boolean enableCompanyProducer;
// private Boolean enableCompanyConsumer; // private Boolean enableCompanyConsumer;
@ -61,6 +74,8 @@ public class AppConfig {
// private Integer companyConsumerThreadCount; // private Integer companyConsumerThreadCount;
// private Boolean enableZombie; // private Boolean enableZombie;
private Integer periodS; private Integer periodS;
private Long intervalTime;
private Long queryDataYearStarttime;
private String ruleRest; private String ruleRest;
private String commentRest; private String commentRest;
private Integer ruleRestConcurrency; private Integer ruleRestConcurrency;
@ -68,8 +83,25 @@ public class AppConfig {
private Integer failureUpper; private Integer failureUpper;
private Map<String, Object> esNormal; private Map<String, Object> esNormal;
private Map<String, Object> esMini; private Map<String, Object> esMini;
private Map<String, Object> esLogstash;
public Boolean getEnableAlarmProducer() {
return enableAlarmProducer;
}
public void setEnableAlarmProducer(Boolean enableAlarmProducer) {
this.enableAlarmProducer = enableAlarmProducer;
}
public Integer getAlarmProducerThreadCount() {
return alarmProducerThreadCount;
}
public void setAlarmProducerThreadCount(Integer alarmProducerThreadCount) {
this.alarmProducerThreadCount = alarmProducerThreadCount;
}
public Integer getPeriodS() { public Integer getPeriodS() {
return periodS; return periodS;
} }
@ -78,6 +110,22 @@ public class AppConfig {
this.periodS = periodS; this.periodS = periodS;
} }
public Long getIntervalTime() {
return intervalTime;
}
public void setIntervalTime(Long intervalTime) {
this.intervalTime = intervalTime;
}
public Long getQueryDataYearStarttime() {
return queryDataYearStarttime;
}
public void setQueryDataYearStarttime(Long queryDataYearStarttime) {
this.queryDataYearStarttime = queryDataYearStarttime;
}
public Integer getQueryProducerThreadCount() { public Integer getQueryProducerThreadCount() {
return queryProducerThreadCount; return queryProducerThreadCount;
} }
@ -118,6 +166,22 @@ public class AppConfig {
this.enableStatisticsProducer = enableStatisticsProducer; this.enableStatisticsProducer = enableStatisticsProducer;
} }
public Boolean getEnableTaskcountProducer() {
return enableTaskcountProducer;
}
public void setEnableTaskcountProducer(Boolean enableTaskcountProducer) {
this.enableTaskcountProducer = enableTaskcountProducer;
}
public Integer getTaskcountProducerThreadCount() {
return taskcountProducerThreadCount;
}
public void setTaskcountProducerThreadCount(Integer taskcountProducerThreadCount) {
this.taskcountProducerThreadCount = taskcountProducerThreadCount;
}
public Integer getStatisticsProducerThreadCount() { public Integer getStatisticsProducerThreadCount() {
return statisticsProducerThreadCount; return statisticsProducerThreadCount;
} }
@ -126,6 +190,38 @@ public class AppConfig {
this.statisticsProducerThreadCount = statisticsProducerThreadCount; this.statisticsProducerThreadCount = statisticsProducerThreadCount;
} }
public Boolean getEnableUpLoadProducer() {
return enableUpLoadProducer;
}
public void setEnableUpLoadProducer(Boolean enableUpLoadProducer) {
this.enableUpLoadProducer = enableUpLoadProducer;
}
public Integer getUpLoadProducerThreadCount() {
return upLoadProducerThreadCount;
}
public void setUpLoadProducerThreadCount(Integer upLoadProducerThreadCount) {
this.upLoadProducerThreadCount = upLoadProducerThreadCount;
}
public Boolean getEnableOutputProducer() {
return enableOutputProducer;
}
public void setEnableOutputProducer(Boolean enableOutputProducer) {
this.enableOutputProducer = enableOutputProducer;
}
public Integer getOutputProducerThreadCount() {
return outputProducerThreadCount;
}
public void setOutputProducerThreadCount(Integer outputProducerThreadCount) {
this.outputProducerThreadCount = outputProducerThreadCount;
}
public String getVersion() { public String getVersion() {
return version; return version;
} }
@ -166,6 +262,14 @@ public class AppConfig {
this.brokerList = brokerList; this.brokerList = brokerList;
} }
public String getSendTopic() {
return sendTopic;
}
public void setSendTopic(String sendTopic) {
this.sendTopic = sendTopic;
}
public List<String> getAnalysisTopic() { public List<String> getAnalysisTopic() {
return analysisTopic; return analysisTopic;
} }
@ -182,7 +286,6 @@ public class AppConfig {
this.analysisGroup = analysisGroup; this.analysisGroup = analysisGroup;
} }
public String getRuleRest() { public String getRuleRest() {
return ruleRest; return ruleRest;
} }
@ -215,6 +318,31 @@ public class AppConfig {
this.failureUpper = failureUpper; this.failureUpper = failureUpper;
} }
public Map<String, Object> getEsLogstash() {
return esLogstash;
}
public void setEsLogstash(Map<String, Object> esLogstash) {
this.esLogstash = esLogstash;
}
public String esLogstashClusterName() {
return (String) esLogstash.get(CONFIG_ES_CLUSTER_NAME);
}
public String[] esLogstashAddress() {
return ((String) esLogstash.get(CONFIG_ES_ADDRESS)).split(",");
}
public Long esLogstashUpper() {
String upper = (String) esLogstash.get(CONFIG_ES_SOURCE_UPPER);
return Instant.parse(upper).getMillis();
}
public String esLogstashStandby() {
return (String) esLogstash.get(CONFIG_ES_SOURCE_STANDBY);
}
public Map<String, Object> getEsNormal() { public Map<String, Object> getEsNormal() {
return esNormal; return esNormal;
} }
@ -271,6 +399,55 @@ public class AppConfig {
return size; return size;
} }
public String getCommentRest() {
return commentRest;
}
public void setCommentRest(String commentRest) {
this.commentRest = commentRest;
}
public String getGoFastPostUrl() {
return goFastPostUrl;
}
public void setGoFastPostUrl(String goFastPostUrl) {
this.goFastPostUrl = goFastPostUrl;
}
public String getGoFastDomain() {
return goFastDomain;
}
public void setGoFastDomain(String goFastDomain) {
this.goFastDomain = goFastDomain;
}
public String getUploadOLYExcelPath() {
return uploadOLYExcelPath;
}
public void setUploadOLYExcelPath(String uploadOLYExcelPath) {
this.uploadOLYExcelPath = uploadOLYExcelPath;
}
public String getUploadZipPath() {
return uploadZipPath;
}
public void setUploadZipPath(String uploadZipPath) {
this.uploadZipPath = uploadZipPath;
}
public String getIndexNamePre() {
return indexNamePre;
}
public void setIndexNamePre(String indexNamePre) {
this.indexNamePre = indexNamePre;
}
public void verify() { public void verify() {
@ -280,34 +457,24 @@ public class AppConfig {
Assert.isTrue(testTaskId > 0, "Config testTaskId must gt 0"); Assert.isTrue(testTaskId > 0, "Config testTaskId must gt 0");
} }
Assert.hasLength(brokerList, "Config brokerList must not be empty"); Assert.hasLength(brokerList, "Config brokerList must not be empty");
if(enableStatisticsProducer){
if(enableStatisticsProducer){ // 统计
Assert.isTrue(statisticsProducerThreadCount > 0, "Config statisticsProducerThreadCount must gt 0"); Assert.isTrue(statisticsProducerThreadCount > 0, "Config statisticsProducerThreadCount must gt 0");
} }
if(enableQueryProducer){
Assert.isTrue(queryProducerThreadCount > 0, "Config statisticsProducerThreadCount must gt 0");
if(enableQueryProducer){ // 离线拉数
Assert.isTrue(queryProducerThreadCount > 0, "Config queryProducerThreadCount must gt 0");
}
if(enableBacktraceProducer){ // 欧莱雅离线拉数
Assert.isTrue(backtraceProducerThreadCount > 0, "Config backtraceProducerThreadCount must gt 0");
}
if(enableUpLoadProducer){ // 数据上传
Assert.isTrue(upLoadProducerThreadCount > 0, "Config upLoadProducerThreadCount must gt 0");
}
if(enableTaskcountProducer){ // 驾驶舱-每天的平均任务量统计
Assert.isTrue(taskcountProducerThreadCount > 0, "Config taskcountProducerThreadCount must gt 0");
} }
if(enableStatisticsProducer){
Assert.isTrue(backtraceProducerThreadCount > 0, "Config statisticsProducerThreadCount must gt 0");
if(enableAlarmProducer){ // 驾驶舱-每天的平均任务量统计
Assert.isTrue(alarmProducerThreadCount > 0, "Config alarmProducerThreadCount must gt 0");
} }
// if (enableAnalysisProducer) {
// Assert.isTrue(analysisProducerThreadCount > 0, "Config analysisProducerThreadCount must gt 0");
// Assert.notEmpty(analysisTopic, "Config analysisTopic must not be empty.");
// }
// if (enableAnalysisConsumer) {
// Assert.isTrue(analysisConsumerThreadCount > 0, "Config analysisConsumerThreadCount must gt 0");
// Assert.hasLength(analysisGroup, "Config analysisGroup must not be empty.");
// }
// if (enableCompany) {
// Assert.isTrue(companyThreadCount > 0, "Config companyThreadCount must gt 0");
// }
// if (enableCompanyProducer) {
// Assert.isTrue(companyProducerThreadCount > 0, "Config companyProducerThreadCount must gt 0");
//
// }
// if (enableCompanyConsumer) {
// Assert.isTrue(companyConsumerThreadCount > 0, "Config companyConsumerThreadCount must gt 0");
//
// }
Assert.isTrue(periodS > 0, "Config periodS must gt 0"); Assert.isTrue(periodS > 0, "Config periodS must gt 0");
Assert.hasLength(ruleRest, "Config ruleRest must not be empty"); Assert.hasLength(ruleRest, "Config ruleRest must not be empty");
Assert.isTrue(ruleRestConcurrency > 0, "Config ruleRestConcurrency must gt 0"); Assert.isTrue(ruleRestConcurrency > 0, "Config ruleRestConcurrency must gt 0");
@ -317,38 +484,7 @@ public class AppConfig {
Assert.notEmpty(esMini, "Config esTarget must not be empty"); Assert.notEmpty(esMini, "Config esTarget must not be empty");
} }
public String getCommentRest() {
return commentRest;
}
public void setCommentRest(String commentRest) {
this.commentRest = commentRest;
}
// public Map<String, Object> getRedis() {
// return redis;
// }
// public void setRedis(Map<String, Object> redis) {
// this.redis = redis;
// }
// public String redisModel() {
// return (String) redis.get(REDIS_MODEL);
// }
// public String redisZkadress() {
// return (String) redis.get(REDIS_ZKADRESS);
// }
// public String redisZksessiontimeoutms() {
// return (String) redis.get(REDIS_ZKSESSIONTIMEOUTMS);
// }
// public String redisProxypath() {
// return (String) redis.get(REDIS_PORT);
// }
// public String redisAddress() {
// return (String) redis.get(REDIS_ADDRESS);
// }
// public String redisPort() {
// return (String) redis.get(REDIS_PORT);
// }
} }

1116
cl_query_data_job/src/main/java/com/bfd/mf/job/config/BFDApiConfig.java
File diff suppressed because it is too large
View File

81
cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java

@ -6,6 +6,19 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class ESConstants { public class ESConstants {
//0:社交媒体1:新闻资讯2:博客智库3:论坛贴吧4:网络视频5:电商网站6:搜索引擎7:生活方式
public static final String DOCTYPESOCIAL = "0";
public static final String DOCTYPENEWS = "1";
public static final String DOCTYPEBLOG = "2";
public static final String DOCTYPEBBS = "3";
public static final String DOCTYPEVIDEO = "4";
public static final String DOCTYPEITEM = "5";
public static final String DOCTYPESEARDH = "6";
public static final String DOCTYPELIFE = "7";
public static final String LOGSTASH = "logstash-";
/** /**
* 是否是主贴 * 是否是主贴
*/ */
@ -465,9 +478,6 @@ public class ESConstants {
*/ */
public static final String DOC_TYPE_WEIXIN = "weixin"; public static final String DOC_TYPE_WEIXIN = "weixin";
public static final String MEDIA_AREA_KEY = "dict"; public static final String MEDIA_AREA_KEY = "dict";
public static final float BOOST_WEIGHT = 10f; public static final float BOOST_WEIGHT = 10f;
@ -495,7 +505,6 @@ public class ESConstants {
public static final String SUMMARY = "summary"; public static final String SUMMARY = "summary";
// public static final String FILEPATH = "filePath"; // public static final String FILEPATH = "filePath";
public static final String VIDEOURL = "videoUrl"; public static final String VIDEOURL = "videoUrl";
@ -650,8 +659,8 @@ public class ESConstants {
/* /*
* docType(news\bbs...):int 类型常量 * docType(news\bbs...):int 类型常量
*/ */
public static final int DOCTYPENEWS = 0;
public static final int DOCTYPEBBS = 1;
// public static final int DOCTYPENEWS = 0;
// public static final int DOCTYPEBBS = 1;
/* /*
* 组合查询语句key值类型 * 组合查询语句key值类型
*/ */
@ -771,19 +780,33 @@ public class ESConstants {
*define es mapping fields *define es mapping fields
*/ */
public static String PUBTIME = "pubTime"; public static String PUBTIME = "pubTime";
public static String CHANNEL_HYLANDA = "hylanda";
public static String PUBTIMESTR = "pubTimeStr";
public static String PUBDAY = "pubDay";
public static String PUBDATE = "pubDate";
public static String PUBTIME_STR = "pubTimeStr";
public static String CRAWLTIME = "crawlTime";
public static String CRAWLTIMESTR = "crawlTimeStr";
public static String CRAWLDAY = "crawlDay";
public static String CRAWLDATE = "crawlDate";
public static String CREATETIME = "createTime"; public static String CREATETIME = "createTime";
public static String CREATETIMESTR = "createTimeStr"; public static String CREATETIMESTR = "createTimeStr";
public static String CREATETIMEDAY = "createDay";
public static String CREATEDAY = "createDay";
public static String CREATEDATE = "createDate";
public static String OCRTEXT = "ocrText";
public static String ASRTEXT = "asrText";
public static String PUTTIME = "putTime";
public static String CRAWLTIME = "crawlTime";
public static String ORIGINAL_SOURCE = "originalSource"; public static String ORIGINAL_SOURCE = "originalSource";
public static String CONTENT_SIMHASH = "contentSimHash"; public static String CONTENT_SIMHASH = "contentSimHash";
public static String QUOTE_COUNT = "quoteCount"; public static String QUOTE_COUNT = "quoteCount";
public static String PUTTIME = "putTime";
public static String CHANNEL_HYLANDA = "hylanda";
/** /**
* 内容 * 内容
*/ */
@ -802,7 +825,13 @@ public class ESConstants {
public static final String TITLE = "title"; public static final String TITLE = "title";
public static final String USER_LEVEL_NAME = "userLevelName"; public static final String USER_LEVEL_NAME = "userLevelName";
public static String SOURCE = "source"; public static String SOURCE = "source";
public static String CRAWLDATAFLAG= "crawlDataFlag";
public static String TRANSLATETITLE = "translateTitle";
public static String TRANSLATECONTENT = "translateContent";
public static String AUTHORID = "authorId";
public static String PRICE = "price";
public static String PRODUCTPARAMETER = "productParameter";
public static String CRAWLDATAFLAG = "crawlDataFlag";
public static String CRAWLDATAFLAGTYPE = "crawlDataFlagType";
//微信专属字段内容固定 value="微信" //微信专属字段内容固定 value="微信"
public static String WEIXIN_SOURCE = "weixinSource"; public static String WEIXIN_SOURCE = "weixinSource";
@ -821,7 +850,7 @@ public class ESConstants {
public static String KEYWORD = "keyword"; public static String KEYWORD = "keyword";
public static String ACCOUNT = "account"; public static String ACCOUNT = "account";
// public static String URL = "url";
// public static String URL = "url";
public static String SYS_ABSTRACT = "sysAbstract"; public static String SYS_ABSTRACT = "sysAbstract";
public static String SYS_KEYWORDS = "sysKeywords"; public static String SYS_KEYWORDS = "sysKeywords";
@ -905,15 +934,13 @@ public class ESConstants {
public static final String APP = "app"; public static final String APP = "app";
public static String WEIBO_DEFAULT_AVATOR = "http://tva1.sinaimg.cn/default/images/default_avatar_male_180.gif"; public static String WEIBO_DEFAULT_AVATOR = "http://tva1.sinaimg.cn/default/images/default_avatar_male_180.gif";
public static final String BAIDUKOUBEI = "baidukoubei";
public static final String DIANPING = "dianping";
public static final String FANGTIANXIA = "fangtianxia";
public static final String BAIDUTIEBA = "baidutieba";
public static final String SOUHU = "souhu";
public static final String XIECHENG = "xiecheng";
public static final String AUTOHOME = "autohome";
public static final String TMALL = "tmall"; public static final String TMALL = "tmall";
public static final String YILONG = "yilong";
public static final String TAOBAO = "taobao";
public static final String EJINGDONG = "ejingdong";
public static final String SUNING = "suning";
public static final String WEIPINHUI = "weipinhui";
public static final String EJUMEI = "ejumei";
public static final String SEPHORA = "sephora";
// 电商 // 电商
public static final String ITEM = "item"; public static final String ITEM = "item";
public static final String ITEM_NAME = "itemName"; public static final String ITEM_NAME = "itemName";
@ -991,21 +1018,18 @@ public class ESConstants {
public static final String FORWARD_USER_TYPE = "forwardUserType"; public static final String FORWARD_USER_TYPE = "forwardUserType";
public static final String FORWARD_PUBTIME = "forwardPubTime"; public static final String FORWARD_PUBTIME = "forwardPubTime";
public static final String FORWARD_FLAG = "forwardFlag"; // 转发回帖标志 1(转发回帖) 0(原贴) public static final String FORWARD_FLAG = "forwardFlag"; // 转发回帖标志 1(转发回帖) 0(原贴)
public static final int REGULAR_PUBTIME_QUERY = 0; public static final int REGULAR_PUBTIME_QUERY = 0;
public static final int REGULAR_PUBTIME_AND_CREATETIME_QUERY = 1; public static final int REGULAR_PUBTIME_AND_CREATETIME_QUERY = 1;
public static final List<String> TYPE_LIST = new ArrayList<>(); public static final List<String> TYPE_LIST = new ArrayList<>();
public static final List<Long> BACKEND_ANALYSIS_LIST = new ArrayList<>(); public static final List<Long> BACKEND_ANALYSIS_LIST = new ArrayList<>();
public static final List<Long> BACKEND_AVAILABILITY_LIST = new ArrayList<>(); public static final List<Long> BACKEND_AVAILABILITY_LIST = new ArrayList<>();
public static final List<String> APP_TYPE_LIST = new ArrayList<String>(); public static final List<String> APP_TYPE_LIST = new ArrayList<String>();
@ -1035,15 +1059,22 @@ public class ESConstants {
public static final String IMAGEPATHSIZE = "imagePathSize"; public static final String IMAGEPATHSIZE = "imagePathSize";
public static final String FILEPATHSIZE = "filePathSize"; public static final String FILEPATHSIZE = "filePathSize";
public static final String VIDEOPATHSIZE = "videoPathSize"; public static final String VIDEOPATHSIZE = "videoPathSize";
public static final String SRCIMAGEPATH = "srcimagePath";
public static final String SRCVIDEOPATH = "srcvideoPath";
public static final String SRCFILEPATH = "srcfilePath";
public static final String PGC = "pgc"; public static final String PGC = "pgc";
public static final String UGC = "ugc"; public static final String UGC = "ugc";
public static final String EGC = "egc"; public static final String EGC = "egc";
public static final String URL = "url"; public static final String URL = "url";
public static final String SIZE = "size"; public static final String SIZE = "size";
public static final String RESOLUTION= "resolution"; public static final String RESOLUTION= "resolution";
public static final String VIDEOTIME = "videoTime"; public static final String VIDEOTIME = "videoTime";
public static final String GOFASTURL = "gofastUrl";
public static final String ORIGINALURL = "originalUrl";
public static final String PATHSIZELIST = "pathSizeList";
public static final String PATH = "path";
} }

27
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/EmailGroup.java

@ -0,0 +1,27 @@
package com.bfd.mf.job.domain.entity;
import javax.persistence.Entity;
import javax.persistence.Table;
@Entity
@Table(name = "cl_email_group")
public class EmailGroup extends AbstractEntity{
private String email;
private String groupName;
public String getEmail() {
return email;
}
public void setEmail(String email) {
this.email = email;
}
public String getGroupName() {
return groupName;
}
public void setGroupName(String groupName) {
this.groupName = groupName;
}
}

101
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/ServiceLoad.java

@ -0,0 +1,101 @@
package com.bfd.mf.job.domain.entity;
import javax.persistence.Entity;
import javax.persistence.Table;
import java.util.Date;
@Entity
@Table(name = "cl_service_load")
public class ServiceLoad extends AbstractEntity {
private String channelId;
private String clientId;
private Integer status;
private String serviceUrl;
private String serviceNames;
private String token;
private String serviceCluster;
private Float serviceStatus;
private Date updateTime;
private String updateUser;
public String getChannelId() {
return channelId;
}
public void setChannelId(String channelId) {
this.channelId = channelId;
}
public String getClientId() {
return clientId;
}
public void setClientId(String clientId) {
this.clientId = clientId;
}
public Integer getStatus() {
return status;
}
public void setStatus(Integer status) {
this.status = status;
}
public String getServiceUrl() {
return serviceUrl;
}
public void setServiceUrl(String serviceUrl) {
this.serviceUrl = serviceUrl;
}
public String getServiceNames() {
return serviceNames;
}
public void setServiceNames(String serviceNames) {
this.serviceNames = serviceNames;
}
public String getToken() {
return token;
}
public void setToken(String token) {
this.token = token;
}
public String getServiceCluster() {
return serviceCluster;
}
public void setServiceCluster(String serviceCluster) {
this.serviceCluster = serviceCluster;
}
public Float getServiceStatus() {
return serviceStatus;
}
public void setServiceStatus(Float serviceStatus) {
this.serviceStatus = serviceStatus;
}
public Date getUpdateTime() {
return updateTime;
}
public void setUpdateTime(Date updateTime) {
this.updateTime = updateTime;
}
public String getUpdateUser() {
return updateUser;
}
public void setUpdateUser(String updateUser) {
this.updateUser = updateUser;
}
}

20
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Subject.java

@ -11,7 +11,7 @@ public class Subject extends AbstractEntity {
private Integer top; //0:未置顶1:置顶 private Integer top; //0:未置顶1:置顶
private String name; // 专题名称 private String name; // 专题名称
private String description; // 话题描述 private String description; // 话题描述
private Integer status; //专题状态 0使用中 1已暂停
// private Integer status; //专题状态 0使用中 1已暂停
private Integer subjectType; //话题类型1分类,2话题,3帖子 private Integer subjectType; //话题类型1分类,2话题,3帖子
private Integer keywordsType; //关键词类型1简单型,2关联型,3专家型 private Integer keywordsType; //关键词类型1简单型,2关联型,3专家型
private String simpleContent; // 简单型原始内容逗号分割 private String simpleContent; // 简单型原始内容逗号分割
@ -68,14 +68,14 @@ public class Subject extends AbstractEntity {
public void setDescription(String description) { public void setDescription(String description) {
this.description = description; this.description = description;
} }
public Integer getStatus() {
return status;
}
public void setStatus(Integer status) {
this.status = status;
}
//
// public Integer getStatus() {
// return status;
// }
//
// public void setStatus(Integer status) {
// this.status = status;
// }
public Integer getSubjectType() { public Integer getSubjectType() {
return subjectType; return subjectType;
@ -339,7 +339,7 @@ public class Subject extends AbstractEntity {
"top=" + top + "top=" + top +
", name='" + name + '\'' + ", name='" + name + '\'' +
", description='" + description + '\'' + ", description='" + description + '\'' +
", status=" + status +
// ", status=" + status +
", subjectType=" + subjectType + ", subjectType=" + subjectType +
", keywordsType=" + keywordsType + ", keywordsType=" + keywordsType +
", simpleContent='" + simpleContent + '\'' + ", simpleContent='" + simpleContent + '\'' +

9
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/SubjectCount.java

@ -10,6 +10,7 @@ import java.util.Date;
public class SubjectCount extends AbstractEntity { public class SubjectCount extends AbstractEntity {
private BigInteger subjectId; private BigInteger subjectId;
private Date createTime; private Date createTime;
private Date updateTime;
private BigInteger totalCount; private BigInteger totalCount;
private BigInteger todayTotalCount; private BigInteger todayTotalCount;
private BigInteger socialTotalCount; private BigInteger socialTotalCount;
@ -51,6 +52,14 @@ public class SubjectCount extends AbstractEntity {
this.createTime = createTime; this.createTime = createTime;
} }
public Date getUpdateTime() {
return updateTime;
}
public void setUpdateTime(Date updateTime) {
this.updateTime = updateTime;
}
public BigInteger getTotalCount() { public BigInteger getTotalCount() {
return totalCount; return totalCount;
} }

111
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Task.java

@ -2,53 +2,49 @@ package com.bfd.mf.job.domain.entity;
import javax.persistence.*; import javax.persistence.*;
import java.math.BigInteger; import java.math.BigInteger;
import java.sql.Timestamp;
import java.util.Date; import java.util.Date;
@Entity @Entity
@Table(name = "cl_task") @Table(name = "cl_task")
public class Task extends AbstractEntity { public class Task extends AbstractEntity {
// @Id
// @GeneratedValue(strategy = GenerationType.IDENTITY)
// private BigInteger id;
// private Integer top;
//id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark
// private long top;
private BigInteger subjectId; private BigInteger subjectId;
// private String appId;
private String externalId; private String externalId;
// private Integer crawlId;
// private long crawlId;
private Integer siteType; private Integer siteType;
private Integer taskType;
//private Integer siteId;
private Integer taskType; // u
// private long siteId;
private String cid; private String cid;
// private String attachTag;
// private Integer crawlStatus;
// private String crawlKeyword;
// private String crawlPageTypes;
// private String crawlContentKey;
// private Integer crawlMode;
// private Integer crawlCyclicityTime;
//private Integer crawlPeriodHour;
// private BigInteger maxPageNum;
// private BigInteger secondaryDataMaxPage;
// private String attachTag;
private Integer crawlStatus; // u
// private String crawlKeyword;
// private String crawlPageTypes;
private String crawlContentKey;
// private long crawlMode;
// private int crawlCyclicityTime; //crawl_cyclicity_time
// private long crawlPeriodHour;
// private long maxPageNum;
// private long secondaryDataMaxPage;
private BigInteger crawlStartTime; private BigInteger crawlStartTime;
private BigInteger crawlEndTime; private BigInteger crawlEndTime;
private String crawlDataFlag; private String crawlDataFlag;
private BigInteger dataTotal; private BigInteger dataTotal;
private BigInteger todayDataTotal; private BigInteger todayDataTotal;
private Integer cacheNum; private Integer cacheNum;
// private Date createTime;
//private String createUser;
// private String createUserId;
private Date updateTime;
// private String updateUser;
// private String updateUserId;
// private Integer del;
// public BigInteger getId() {
// return id;
// }
//
// public void setId(BigInteger id) {
// this.id = id;
// }
// private java.sql.Timestamp createTime;
// private String createUser;
// private String createUserId;
private java.sql.Timestamp updateTime;
// private String updateUser;
// private String updateUserId;
private Integer del;
// private String fileName;
// private String fileRemark;
public BigInteger getSubjectId() { public BigInteger getSubjectId() {
return subjectId; return subjectId;
@ -66,14 +62,6 @@ public class Task extends AbstractEntity {
this.externalId = externalId; this.externalId = externalId;
} }
public Integer getTaskType() {
return taskType;
}
public void setTaskType(Integer taskType) {
this.taskType = taskType;
}
public Integer getSiteType() { public Integer getSiteType() {
return siteType; return siteType;
} }
@ -81,13 +69,14 @@ public class Task extends AbstractEntity {
public void setSiteType(Integer siteType) { public void setSiteType(Integer siteType) {
this.siteType = siteType; this.siteType = siteType;
} }
// public Integer getSiteId() {
// return siteId;
// }
//
// public void setSiteId(Integer siteId) {
// this.siteId = siteId;
// }
public Integer getTaskType() {
return taskType;
}
public void setTaskType(Integer taskType) {
this.taskType = taskType;
}
public String getCid() { public String getCid() {
return cid; return cid;
@ -97,6 +86,22 @@ public class Task extends AbstractEntity {
this.cid = cid; this.cid = cid;
} }
public Integer getCrawlStatus() {
return crawlStatus;
}
public String getCrawlContentKey() {
return crawlContentKey;
}
public void setCrawlContentKey(String crawlContentKey) {
this.crawlContentKey = crawlContentKey;
}
public void setCrawlStatus(Integer crawlStatus) {
this.crawlStatus = crawlStatus;
}
public BigInteger getCrawlStartTime() { public BigInteger getCrawlStartTime() {
return crawlStartTime; return crawlStartTime;
} }
@ -145,11 +150,19 @@ public class Task extends AbstractEntity {
this.cacheNum = cacheNum; this.cacheNum = cacheNum;
} }
public Date getUpdateTime() {
public Timestamp getUpdateTime() {
return updateTime; return updateTime;
} }
public void setUpdateTime(Date updateTime) {
public void setUpdateTime(Timestamp updateTime) {
this.updateTime = updateTime; this.updateTime = updateTime;
} }
public Integer getDel() {
return del;
}
public void setDel(Integer del) {
this.del = del;
}
} }

36
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/TaskCount.java

@ -0,0 +1,36 @@
package com.bfd.mf.job.domain.entity;
import javax.persistence.Entity;
import javax.persistence.Table;
@Entity
@Table(name = "cl_task_count")
public class TaskCount extends AbstractEntity {
private String countDate;
private float avgCount;
private float avgSpeed;
public String getCountDate() {
return countDate;
}
public void setCountDate(String countDate) {
this.countDate = countDate;
}
public float getAvgCount() {
return avgCount;
}
public void setAvgCount(float avgCount) {
this.avgCount = avgCount;
}
public float getAvgSpeed() {
return avgSpeed;
}
public void setAvgSpeed(float avgSpeed) {
this.avgSpeed = avgSpeed;
}
}

90
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/UploadTask.java

@ -0,0 +1,90 @@
package com.bfd.mf.job.domain.entity;
import javax.persistence.*;
import java.math.BigInteger;
@Entity
@Table(name = "cl_task")
public class UploadTask extends AbstractEntity {
// private long top;
private BigInteger subjectId;
private Integer taskType; // u
private Integer crawlStatus; // u
private String fileName;
private String crawlDataFlag;
public String getCrawlDataFlag() {
return crawlDataFlag;
}
public void setCrawlDataFlag(String crawlDataFlag) {
this.crawlDataFlag = crawlDataFlag;
}
public BigInteger getSubjectId() {
return subjectId;
}
public void setSubjectId(BigInteger subjectId) {
this.subjectId = subjectId;
}
public int getTaskType() {
return taskType;
}
public void setTaskType(int taskType) {
this.taskType = taskType;
}
public int getCrawlStatus() {
return crawlStatus;
}
public void setCrawlStatus(int crawlStatus) {
this.crawlStatus = crawlStatus;
}
public String getFileName() {
return fileName;
}
public void setFileName(String fileName) {
this.fileName = fileName;
}
// private int del;
// private String fileRemark;
// private String appId;
// private String externalId;
// private long crawlId;
// private int siteType;
// private long siteId;
// private String cid;
// private String attachTag;
// private String crawlKeyword;
// private String crawlPageTypes;
// private String crawlContentKey;
// private long crawlMode;
// private int crawlCyclicityTime; //crawl_cyclicity_time
// private long crawlPeriodHour;
// private long maxPageNum;
// private long secondaryDataMaxPage;
// private BigInteger crawlStartTime;
// private BigInteger crawlEndTime;
// private String crawlDataFlag;
// private BigInteger dataTotal;
// private BigInteger todayDataTotal;
// private Integer cacheNum;
// private java.sql.Timestamp createTime;
// private String createUser;
// private String createUserId;
// private Timestamp updateTime;
// private String updateUser;
// private String updateUserId;
}

14
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/EmailGroupRepository.java

@ -0,0 +1,14 @@
package com.bfd.mf.job.domain.repository;
import com.bfd.mf.job.domain.entity.EmailGroup;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.CrudRepository;
import java.util.List;
public interface EmailGroupRepository extends CrudRepository<EmailGroup, Long> {
@Query(value = "SELECT * FROM cl_email_group WHERE id IN (SELECT email_group_id from cl_site_email_res WHERE site_id = (SELECT id FROM `cl_site` WHERE is_usable = 0 AND cid = ?1));", nativeQuery = true)
List<EmailGroup> getEmailGroupsByCid(String cid);
}

1
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ResultDetailRepository.java

@ -1,7 +1,6 @@
package com.bfd.mf.job.domain.repository; package com.bfd.mf.job.domain.repository;
import com.bfd.mf.job.domain.entity.ResultDetail; import com.bfd.mf.job.domain.entity.ResultDetail;
import com.bfd.mf.job.domain.entity.Task;
import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Modifying;
import org.springframework.data.jpa.repository.Query; import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.CrudRepository; import org.springframework.data.repository.CrudRepository;

17
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ServiceLoadRepository.java

@ -0,0 +1,17 @@
package com.bfd.mf.job.domain.repository;
import com.bfd.mf.job.domain.entity.ServiceLoad;
import com.bfd.mf.job.domain.entity.TaskCount;
import org.springframework.data.jpa.repository.Modifying;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.CrudRepository;
import org.springframework.transaction.annotation.Transactional;
public interface ServiceLoadRepository extends CrudRepository<ServiceLoad, Long> {
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_service_load set service_status =?2 ,update_time = now() where id =?1 ", nativeQuery = true)
void updateTaskCount(Integer id, Float serviceStatus);
}

11
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java

@ -7,7 +7,7 @@ import org.springframework.data.repository.CrudRepository;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.List;
import java.util.Date;
public interface SubjectCountRepository extends CrudRepository<SubjectCount, Long> { public interface SubjectCountRepository extends CrudRepository<SubjectCount, Long> {
@ -20,8 +20,8 @@ public interface SubjectCountRepository extends CrudRepository<SubjectCount, Lon
@Query(value = "SELECT id FROM cl_subject_count WHERE subject_id is NULL AND create_time = ?1", nativeQuery = true) @Query(value = "SELECT id FROM cl_subject_count WHERE subject_id is NULL AND create_time = ?1", nativeQuery = true)
BigInteger findIdBySubjectDate(String today); BigInteger findIdBySubjectDate(String today);
@Query(value = "SELECT ct.id,ct.subject_id,ct.cache_num,ct.cid,ct.external_id,ct.crawl_data_flag,ct.crawl_start_time,ct.crawl_end_time,cs.kafka_switch,cs.kafka_addr,cs.kafka_topic,cs.go_fast_addr,cs.`status`,ct.del as delt,cs.del as dels FROM cl_task ct JOIN cl_subject cs ON (ct.subject_id = cs.id) WHERE cs.status = 0 AND ct.del = 0 AND cs.del = 0",nativeQuery = true)
List<Object> findAllSubject();
// @Query(value = "SELECT ct.id,ct.subject_id,ct.cache_num,ct.cid,ct.external_id,ct.crawl_data_flag,ct.crawl_start_time,ct.crawl_end_time,cs.kafka_switch,cs.kafka_addr,cs.kafka_topic,cs.go_fast_addr,cs.`status`,ct.del as delt,cs.del as dels FROM cl_task ct JOIN cl_subject cs ON (ct.subject_id = cs.id) WHERE cs.status = 0 AND ct.del = 0 AND cs.del = 0",nativeQuery = true)
// List<Object> findAllSubject();
/** /**
@ -41,7 +41,7 @@ public interface SubjectCountRepository extends CrudRepository<SubjectCount, Lon
@Modifying @Modifying
@Transactional(rollbackFor = Exception.class) @Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_subject_count set update_time = now(),total_count=?3,today_total_count=?2," +
@Query(value = "update cl_subject_count set total_count=?3,today_total_count=?2," +
"url_type_total_count =?4,keyword_type_total_count=?5,account_type_total_count=?6," + "url_type_total_count =?4,keyword_type_total_count=?5,account_type_total_count=?6," +
"url_type_count=?7,keyword_type_count=?8,account_type_count=?9," + "url_type_count=?7,keyword_type_count=?8,account_type_count=?9," +
"social_total_count=?10,social_count=?11,bbs_total_count=?12,bbs_count=?13,blog_total_count=?14,blog_count =?15,news_total_count=?16,news_count=?17," + "social_total_count=?10,social_count=?11,bbs_total_count=?12,bbs_count=?13,blog_total_count=?14,blog_count =?15,news_total_count=?16,news_count=?17," +
@ -57,6 +57,9 @@ public interface SubjectCountRepository extends CrudRepository<SubjectCount, Lon
BigInteger video_total_count, BigInteger video_count, BigInteger life_total_count, BigInteger life_count); BigInteger video_total_count, BigInteger video_count, BigInteger life_total_count, BigInteger life_count);
@Query(value = "SELECT update_time FROM cl_subject_count WHERE subject_id = ?1 AND create_time = ?2", nativeQuery = true)
Date getUpdateTimeBySubjectId(BigInteger subjectId, String today);
// //
// /** // /**

5
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectRepository.java

@ -48,6 +48,11 @@ public interface SubjectRepository extends CrudRepository<Subject, Long> {
@Query(value = "update cl_subject set cache_recalculate_status=?2 where id=?1", nativeQuery = true) @Query(value = "update cl_subject set cache_recalculate_status=?2 where id=?1", nativeQuery = true)
void updateCacheRecalculateStatus(Long subjectId,int reCacheRecalculateStatus); void updateCacheRecalculateStatus(Long subjectId,int reCacheRecalculateStatus);
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set crawl_status=?2,end_time=now() where subject_id=?1", nativeQuery = true)
void updateTaskStatus(Long subjectId,int status);
// //
// /** // /**
// * 更新进度 // * 更新进度

7
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskCountRepository.java

@ -0,0 +1,7 @@
package com.bfd.mf.job.domain.repository;
import com.bfd.mf.job.domain.entity.TaskCount;
import org.springframework.data.repository.CrudRepository;
public interface TaskCountRepository extends CrudRepository<TaskCount, Long> {
}

61
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java

@ -8,33 +8,59 @@ import org.springframework.transaction.annotation.Transactional;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.List; import java.util.List;
import java.util.Map;
public interface TaskRepository extends CrudRepository<Task, Long> { public interface TaskRepository extends CrudRepository<Task, Long> {
@Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE NOW() > SUBDATE(update_time,interval -15 minute) AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)", nativeQuery = true)
List<Task> findAllTask();
// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE NOW() > SUBDATE(update_time,interval -15 minute) AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)", nativeQuery = true)
// List<Task> findAllTask();
@Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE cache_num = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)",nativeQuery = true)
@Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM cl_task WHERE task_type <> 3 AND crawl_status = 1 AND cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true)
List<Task> findAllNewTask(); List<Task> findAllNewTask();
@Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0",nativeQuery = true)
// 统计服务查询 要统计的任务之前由于所有任务都要半小时统计任务太多会把E搞挂就只统计 update_time 近一天的吧
// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark FROM cl_task WHERE del = 0 AND crawl_status <> 3",nativeQuery = true) // AND crawl_status <> 3
// 每天只统计两种情况的任务
//1当天完成的任务crawl_status=3 and end_time > 前天
//2状态为采集中或者 暂停的任务 crawl_status=0 or crawl_tatus=1
// 其他的任务就不用每天都统计了
@Query(value = " SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM `cl_task` WHERE del = 0 AND ((crawl_status = 1 OR crawl_status = 0) OR (crawl_status = 3 AND end_time > date_sub(curdate(),interval 2 day))) ;",nativeQuery = true)
List<Task> findAllBydel0(); List<Task> findAllBydel0();
//
// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true)
// List<Task> findAllNewTask();
//
// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE del=0 ",nativeQuery = true)
// List<Task> findAllBydel0();
@Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0 AND subject_id=?1",nativeQuery = true)
List<Task> findTasksBySbujectIdAndDel0(BigInteger subjectId);
// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0 AND subject_id=?1",nativeQuery = true)
// List<Task> findTasksBySbujectIdAndDel0(BigInteger subjectId);
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2",nativeQuery = true)
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true)
Long findDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType); Long findDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType);
@Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2",nativeQuery = true)
@Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true)
Long findTodayDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType); Long findTodayDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType);
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true)
Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
@Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
@Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true)
Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
// 统计任务的抓取量 任务质量 任务状态为已完成 今天入库的总数据量 / 总任务数
@Query(value = " SELECT SUM(data_total) FROM cl_task WHERE end_time >?1 AND end_time <?2 AND crawl_status = 3 AND task_type <>3 ",nativeQuery = true)
Long findTodayDataTotal(String taskStartTime ,String taskEndTime);
// @Query(value = " SELECT count(*) FROM cl_task WHERE today_data_total > 0 AND task_level < 2 AND crawl_status = 3 ",nativeQuery = true)
// Long findTodayDataTotalTaskNum();
// @Query(value = "SELECT id,subject_id,task_type,crawl_status,file_name,del from cl_task WHERE del = 0 AND task_type = 3 AND crawl_status=1 ",nativeQuery = true)
// List<UploadTask> getTaskNeedUpLoad();
// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
/** /**
@ -61,7 +87,22 @@ public interface TaskRepository extends CrudRepository<Task, Long> {
@Query(value = "update cl_task set data_total =?2 , today_data_total =?3 where id =?1 ", nativeQuery = true) @Query(value = "update cl_task set data_total =?2 , today_data_total =?3 where id =?1 ", nativeQuery = true)
void updateTaskCount(Long id, Long totalCount, Long todayCount); void updateTaskCount(Long id, Long totalCount, Long todayCount);
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set crawl_status =?4 where id =?1 ", nativeQuery = true)
void updateCrawlStatus(long taskId);
@Query(value = " SELECT id,start_time,end_time FROM cl_task WHERE del=0 AND crawl_status = 3 AND start_time >?1 AND end_time <?2 ",nativeQuery = true)
List<Map<String,String>> findByCrawlTime(String taskStartTime, String taskEndTime);
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "UPDATE cl_task SET today_data_total=0 WHERE end_time <?1 AND crawl_status = 3 AND today_data_total >0 ", nativeQuery = true)
void updateTodayTotalCount(String updateTime);
// 获得前一天完成的任务的时间差除欧莱雅的任务和上传的任务
@Query(value = "SELECT TIMESTAMPDIFF(MINUTE, start_time,end_time) FROM cl_task WHERE del = 0 AND task_type <>3 AND crawl_status = 3 AND data_total > 0 AND end_time > ?1 AND end_time < ?2 ",nativeQuery = true)
List<BigInteger> findTaskByCrawlTime(String taskStartTime, String taskEndTime);
// //
// /** // /**

100
cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/UploadTaskRepository.java

@ -0,0 +1,100 @@
package com.bfd.mf.job.domain.repository;
import com.bfd.mf.job.domain.entity.UploadTask;
import org.springframework.data.jpa.repository.Modifying;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.CrudRepository;
import org.springframework.transaction.annotation.Transactional;
import java.util.List;
public interface UploadTaskRepository extends CrudRepository<UploadTask, Long> {
@Query(value = "SELECT id,subject_id,task_type,crawl_status,file_name,crawl_data_flag,del " +
"from cl_task " +
"WHERE del = 0 AND task_type = 3 AND crawl_status=0 limit 1",nativeQuery = true)
List<UploadTask> getTaskNeedUpLoad();
// @Query(value = "SELECT id,subject_id,task_type,crawl_status,crawl_data_flag,del from cl_task WHERE del = 0 AND task_type = 3 AND crawl_status=1 ",nativeQuery = true)
// List<UploadTask> getTaskNeedUpLoad();
// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
//
// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true)
// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType);
/**
* 更新每个任务 拉数据次数
*/
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set cache_num=?1 where id=?2", nativeQuery = true)
Integer updateStatus(int cache_num, long id);
/**
* 乐观锁
*/
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set cache_num=?1 where id=?2 and cache_num=?3", nativeQuery = true)
Integer tryLock(Integer newStatus, long id, Integer oldStatus);
/**
* 修改每个任务的统计结果
*/
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set data_total =?2 , today_data_total =?3 where id =?1 ", nativeQuery = true)
void updateTaskCount(Long id, Long totalCount, Long todayCount);
@Modifying
@Transactional(rollbackFor = Exception.class)
@Query(value = "update cl_task set crawl_status =?2 ,data_total = ?3 ,crawl_start_time = ?4 , crawl_end_time = ?5 where id =?1 ", nativeQuery = true)
void updateCrawlStatus(long taskId,int crawlStatus,int dataTotal,long crawlStartTime,long crawlEndTime);
//
// /**
// * 更新进度
// */
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "update cl_label_backtrace_task set processed=processed+:processed,satisfied=satisfied+:satisfied,updated_time=:updatedTime where id=:id", nativeQuery = true)
// Integer increaseStat(@Param("processed") long processed, @Param("satisfied") long satisfied, @Param("updatedTime") long updatedTime, @Param("id") long id);
//
// /**
// * 设置进度
// */
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "update cl_label_backtrace_task set progress=:progress,updated_time=:updatedTime where id=:id and progress <= :progress", nativeQuery = true)
// Integer setProgress(@Param("progress") double progress, @Param("updatedTime") long updatedTime, @Param("id") long id);
//
// /**
// * 增加进度
// *
// * @Query(value = "update cl_label_backtrace_task set progress=progress+:progress,updated_time=:updatedTime where id=:id", nativeQuery = true)
// */
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "update cl_label_backtrace_task set progress=progress+:progress,updated_time=:updatedTime where id=:id", nativeQuery = true)
// Integer increaseProgress(@Param("progress") double progress, @Param("updatedTime") long updatedTime, @Param("id") long id);
//
//
// /**
// * 重置状态
// * 如果status=2并且updated_time<给定lastUpdatedTime并且retry_times<=max_retry_times重置status为1
// */
// @Modifying
// @Transactional(rollbackFor = Exception.class)
// @Query(value = "update cl_label_backtrace_task set status=1 where updated_time<=?1 and status=2 and retry_times<=max_retry_times", nativeQuery = true)
// Integer resetStatus(Long lastUpdatedTime);
}

41
cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java

@ -5,6 +5,7 @@ import okhttp3.*;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import java.awt.image.BufferedImage; import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.net.URL; import java.net.URL;
@ -12,8 +13,9 @@ import java.util.HashMap;
import java.util.Map; import java.util.Map;
public class DownLoadFile { public class DownLoadFile {
public static Map<String,String> downloadAndSaveFile(String getUrl,String putUrl){ public static Map<String,String> downloadAndSaveFile(String getUrl,String putUrl){
String realUrl = "";
String realUrl = "";double size;
Map<String,String> realresult= new HashMap<>(); Map<String,String> realresult= new HashMap<>();
try{ try{
String files [] = getUrl.split("/"); String files [] = getUrl.split("/");
@ -23,16 +25,17 @@ public class DownLoadFile {
header.put("Connection","keep-alive"); header.put("Connection","keep-alive");
try { try {
Map<String,Object> downloadresult = OkHttpUtils.doGetBytes(getUrl,header); Map<String,Object> downloadresult = OkHttpUtils.doGetBytes(getUrl,header);
double size= (double) downloadresult.get("size");
if (downloadresult.containsKey("content") && size > 0){
size= (double) downloadresult.get("size");
if (downloadresult.containsKey("content")&&size>0){
byte[] content = (byte[]) downloadresult.get("content"); byte[] content = (byte[]) downloadresult.get("content");
size= (double) downloadresult.get("size"); size= (double) downloadresult.get("size");
size = Double.valueOf(String.format("%.3f", size));
Thread.sleep(4000); Thread.sleep(4000);
String result = DownLoadFile.upload(putUrl,fileName,content); String result = DownLoadFile.upload(putUrl,fileName,content);
Thread.sleep(4000); Thread.sleep(4000);
realUrl = JSONObject.parseObject(result).getString("url"); realUrl = JSONObject.parseObject(result).getString("url");
realresult.put("realUrl",realUrl); realresult.put("realUrl",realUrl);
realresult.put("size",String.format("%.2f", size));
realresult.put("size",size+"");
} }
} catch (IOException e) { } catch (IOException e) {
@ -72,7 +75,7 @@ public class DownLoadFile {
return result; return result;
} }
public static String imagesize(String getUrl ) throws IOException{
public static String imagesize(String getUrl) throws IOException{
String realUrl = "";Integer size; String realUrl = "";Integer size;
String realresult=""; String realresult="";
try{ try{
@ -88,5 +91,33 @@ public class DownLoadFile {
return realresult; return realresult;
} }
public static Map<String,Object> upload(String uploadUrl,String fileName,File file) {
Map<String,Object> resultMap = new HashMap<>();
try {
OkHttpClient httpClient = new OkHttpClient();
MultipartBody multipartBody = new MultipartBody.Builder().
setType(MultipartBody.FORM)
.addFormDataPart("file", fileName,
RequestBody.create(MediaType.parse("multipart/form-data;charset=utf-8"),
file))
.addFormDataPart("output", "json")
.build();
Request request = new Request.Builder()
.url(uploadUrl)
.post(multipartBody)
.build();
Response response = httpClient.newCall(request).execute();
if (response.isSuccessful()) {
ResponseBody body = response.body();
if (body != null) {
resultMap =JSONObject.parseObject( body.string());
}
}
} catch (Exception e) {
e.printStackTrace();
}
return resultMap;
}
} }

1
cl_query_data_job/src/main/java/com/bfd/mf/job/download/OkHttpUtils.java

@ -188,5 +188,4 @@ public class OkHttpUtils {
System.gc(); System.gc();
} }
} }
} }

1315
cl_query_data_job/src/main/java/com/bfd/mf/job/service/BacktraceService.java
File diff suppressed because it is too large
View File

207
cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryMiniService.java

@ -1,207 +0,0 @@
package com.bfd.mf.job.service;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.Task;
import com.bfd.mf.job.util.EsUtils;
import org.apache.lucene.index.Term;
import org.elasticsearch.index.query.*;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.sql.Timestamp;
import java.util.HashMap;
import java.util.Map;
import java.util.TimeZone;
@Service
public class EsQueryMiniService {
private static Logger logger = LoggerFactory.getLogger(EsQueryMiniService.class);
private static String clSubject = "cl_major_";
private static String subjectPre = "major";
/**
* 统计 每个专题下每个渠道 的总量
*/
public Map<String,Long> getSubjectChannelStatistics(String clusterName,String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
if(indexName.contains(subjectPre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE);
String indexNames [] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag");
resultMap = EsUtils.parseTerms(result);
}
}
}catch (Exception e){
e.printStackTrace();
}
return resultMap;
}
/**
* 统计 每个专题下每个渠道 当天的增量
*/
public Map<String,Long> getSubjectChannelTodayStatistics(String clusterName,String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
if(indexName.contains(subjectPre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
long current=System.currentTimeMillis();
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
.gte(startTime)
.lt(current);
qb.must(rangeQueryBuilder);
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE);
String indexNames [] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag");
resultMap = EsUtils.parseTerms(result);
}
}
}catch (Exception e){
e.printStackTrace();
}
return resultMap;
}
/**
* 统计 每个专题下crawlDataFlag 三种类型当天的总量
*/
public Map<String,Long> getSubjectCrawlDataFlagStatistics(String clusterName, String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
if(indexName.contains(subjectPre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG);
String indexNames [] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag");
Map<String,Long> termsMap = EsUtils.parseTerms(result);
resultMap = EsUtils.getResultMap(termsMap);
}
}
}catch (Exception e){
e.printStackTrace();
}
return resultMap;
}
/**
* 统计 每个专题下crawlDataFlag 三种类型 的增量
*/
public Map<String,Long> getSubjectCrawlDataFlagTodayStatistics(String clusterName, String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
if(indexName.contains(subjectPre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
long current=System.currentTimeMillis();
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
.gte(startTime)
.lt(current);
qb.must(rangeQueryBuilder);
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG);
String indexNames [] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag");
Map<String,Long> termsMap = EsUtils.parseTerms(result);
resultMap = EsUtils.getResultMap(termsMap);
}
}
}catch (Exception e){
e.printStackTrace();
}
return resultMap;
}
public Map<String,Long> getTaskCount(String clusterName,Long taskId, Task task,String crawlDataFlag) {
Map<String,Long> countMap = new HashMap<>();
String indexName = clSubject + task.getSubjectId();//subject_id
String cid = task.getCid().toLowerCase();
Long crawlStartTime = task.getCrawlStartTime().longValue();
Long crawlEndTime = task.getCrawlEndTime().longValue();
// String crawlDataFlag =task.getCrawlDataFlag();
if(indexName.contains(subjectPre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
// 任务ID 筛选
TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE,cid);
TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG,crawlDataFlag);
qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder);
// 时间范围筛选
BoolQueryBuilder shouldbq = QueryBuilders.boolQuery();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.PUBTIME)
.gte(crawlStartTime)
.lt(crawlEndTime);
// 用户数据
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
TermQueryBuilder primartTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PRIMARY,2);
// TermQueryBuilder pubTimeTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PUBTIME,0);
boolQueryBuilder.must(primartTermQueryBuilder);
shouldbq.should(boolQueryBuilder).should(rangeQueryBuilder);
qb.must(shouldbq);
logger.info("QB1 : indexName: {}. taskId : {}.{\"query\": {}}.",indexName,taskId,qb.toString().replace("\n","").replace("\r","").replace(" ",""));
Long count = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("totalCount",count);
long current=System.currentTimeMillis();
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder2 = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
.gte(startTime).lt(current);
qb.must(rangeQueryBuilder2);
logger.info("QB2 : : indexName: {}. taskId : {}.{\"query\": {}}.",indexName,taskId,qb.toString().replace("\n","").replace("\r","").replace(" ",""));
Long todayCount = EsUtils.queryCount(clusterName,indexName,qb);
countMap.put("todayCount",todayCount);
}
}
return countMap;
}
// public Long getTaskTodayCount(String clusterName,Integer id, Map<String, Object> task) {
// Long count = 0L;
// String indexName = clSubject + (String) task.get("subject_id");
// String cid = (String) task.get(ESConstants.CID);
// Long crawlStartTime = (Long) task.get("crawl_start_time");
// Long crawlEndTime = (Long) task.get("crawl_end_time");
// String crawlDataFlag = (String) task.get("crawl_data_flag");
//
// if(indexName.contains(subjectPre)) {
// boolean isExists = EsUtils.indexExists(clusterName, indexName);
// if (isExists) {
// BoolQueryBuilder qb = QueryBuilders.boolQuery();
// long current=System.currentTimeMillis();
// long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
// Long startTime = new Timestamp(zero).getTime();
// RangeQueryBuilder rangeQueryBuilder = QueryBuilders
// .rangeQuery(ESConstants.CRAWLTIME)
// .gte(startTime)
// .lt(current);
// qb.must(rangeQueryBuilder);
//// Terms result = EsUtils.queryTag(clusterName, indexName, qb, ab, ESConstant.DOC_TYPE + "Tag");
//// resultMap = parseTerms(result);
// }
// }
// return count;
// }
}

7
cl_query_data_job/src/main/java/com/bfd/mf/job/service/WriterTXTService.java

@ -1,20 +1,13 @@
package com.bfd.mf.job.service; package com.bfd.mf.job.service;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.ResultDetail; import com.bfd.mf.job.domain.entity.ResultDetail;
import com.bfd.mf.job.domain.repository.ResultDetailRepository; import com.bfd.mf.job.domain.repository.ResultDetailRepository;
import com.bfd.mf.job.util.EsUtils;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.RateLimiter; import com.google.common.util.concurrent.RateLimiter;
import org.assertj.core.util.Lists;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.util.List; import java.util.List;
import java.util.Map;
@Service @Service
public class WriterTXTService { public class WriterTXTService {

241
cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java

@ -0,0 +1,241 @@
package com.bfd.mf.job.service.alarm;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.EmailGroup;
import com.bfd.mf.job.domain.entity.TaskCount;
import com.bfd.mf.job.domain.repository.EmailGroupRepository;
import com.bfd.mf.job.domain.repository.ServiceLoadRepository;
import com.bfd.mf.job.domain.repository.TaskCountRepository;
import com.bfd.mf.job.domain.repository.TaskRepository;
import com.bfd.mf.job.util.DateUtil;
import com.bfd.mf.job.util.EMailUtils;
import com.bfd.mf.job.util.EsUtils;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.math.BigInteger;
import java.util.*;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
@Service
public class AlarmService {
private static final Logger LOGGER = LoggerFactory.getLogger(AlarmService.class);
@Autowired
private AppConfig config;
@Autowired
private TaskRepository taskRepository;
@Autowired
private TaskCountRepository taskCountRepository;
@Autowired
private ServiceLoadRepository serviceLoadRepository;
@Autowired
private EmailGroupRepository emailGroupRepository;
@PostConstruct
public void init() {
// 注册数据查询来源
// EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source
// EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target
EsUtils.registerCluster(config.esLogstashClusterName(),config.esLogstashAddress());
}
/*
尝试在指定时间内获得许可如果获得了则直接返回如果没有获得则执行下面的流程
*/
// public void tryAcquire() {
// long start = System.currentTimeMillis();
// LOGGER.info("------------------------------------------------------------------ AlarmService ------------------------------------------------------");
// long end = System.currentTimeMillis();
// LOGGER.info("TaskCountService finish, took:{} ms.",(end - start));
//
//
// }
public void produce() {
long start = System.currentTimeMillis();
LOGGER.info("------------------------------------------------------------------ AlarmService ------------------------------------------------------");
// 索引
String date = DateUtil.parseDateByday2(new Date().getTime());
String index = ESConstants.LOGSTASH + date;
// System.out.println(index); //logstash-2021.05.20 logstash-2021.05.21
String startTime = DateUtil.getDateTime(System.currentTimeMillis());
String endTime = DateUtil.getDateTime(System.currentTimeMillis() - 60 * 30 * 1000);
String type = "datasave";
QueryBuilder queryBuilder = getQueryBuilder(startTime,endTime,type);
String clusterName = config.esLogstashClusterName();
String sourceIndices [] = {index};
Map<String,Integer> errorCid = new HashMap<>();
// 查询语句
EsUtils.scrollQuery(clusterName, sourceIndices,"doc",
queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES,
dataList -> {
try {
if (dataList.size() == 0) {
System.out.println("没查到相关的 评论 数据");
return;
}
for (JSONObject data : dataList) {
Map<String,Object> dataMap = data;
String cid = (String) dataMap.get("cid");
if(null == cid){
// System.out.println(data);
String message = (String) dataMap.get("message");
if(message.contains("cid")) {
cid = message.split("cid=")[1];
// System.out.println("--- " + cid);
if(cid.contains("}")){
cid = cid.split("}")[0];
}
}
}
if(null != cid && cid.contains("}")){
cid = cid.split("}")[0];
}
// System.out.println(cid);
if(null == cid){
System.out.println(data);
}
if(errorCid.containsKey(cid)){
Integer errorNum = errorCid.get(cid);
errorCid.put(cid,errorNum+1);
}else{
errorCid.put(cid,1);
}
}
} catch (Exception e) {
e.printStackTrace();
}
});
System.out.println(JSONObject.toJSONString(errorCid));
// 遍历统计的map value> 10 的报警
for(Map.Entry<String, Integer> entry : errorCid.entrySet()){
String cid = entry.getKey();
Integer errorNum = entry.getValue();
if(errorNum > 50){
System.out.println( "这个站点30分钟内的解析失败次数超过15次 " +cid + " : " + errorNum);
saveToAlarm(cid,errorNum);
}
}
/**
* 1时间范围是半小时内
* 2
*/
// 根据查询半小时内下载负载率求平均后修改 负载率表的值
List<Float> serviceStatusList = new ArrayList<>();
serviceStatusList.add(32.6F);
serviceStatusList.add(51F);
serviceStatusList.add(0.0F);
serviceStatusList.add(0.0F);
serviceStatusList.add(18.3F);
serviceStatusList.add(23.3F);
serviceStatusList.add(64F);
serviceStatusList.add(73F);
serviceStatusList.add(44.6F);
serviceStatusList.add(38F);
for(int i = 0; i < 10 ; i ++) {
serviceLoadRepository.updateTaskCount(i+1,serviceStatusList.get(i));
}
long end = System.currentTimeMillis();
LOGGER.info("ServiceLoadService finish, took:{} ms.",(end - start));
}
private void saveToAlarm(String cid, Integer errorNum) {
/**
`alarm_config` varchar(255) NOT NULL COMMENT '报警任务配置',
*/
Integer alarm_tag = 3;
Integer alarm_reason = 1;
String alarm_message = "[chenrui.li]这个站点解析失败次数为:"+errorNum;
String alarm_task_url = ""; // 无法确认是哪个任务
String alarm_task_content = ""; // 无法确认是哪个任务
String alarm_cid = cid;
String alarm_config = "";
String alarm_trigger_time = DateUtil.getDateTime(new Date().getTime());
Date alarm_assign_time = null;
Date alarm_finish_time = null;
String alarm_handler = "jing.du@percent.cn";
List<String> emailList = new ArrayList<>();
emailList.add(alarm_handler);
// 根据站点查询站点的处理人
String email_addr = getEmailByCid(cid);
String opinion = "";
Integer status = 2;
String create_time = DateUtil.getDateTime(new Date().getTime());
Date update_time = null;
int del = 0;
String sql = "INSERT INTO cl_alarm (alarm_tag,alarm_reason,alarm_message,alarm_task_url,alarm_task_content,alarm_cid," +
"alarm_config,alarm_trigger_time,alarm_handler,status,create_time,del) " +
"values ("+alarm_tag+","+alarm_reason+",'"+alarm_message+"','"+alarm_task_url+"','"+alarm_task_content+"'," +
"'"+alarm_cid+"','"+alarm_config+"','"+alarm_trigger_time+"','"+alarm_handler+"',"+status+",'"+create_time+"',"+del+") ";
System.out.println(sql);
Map<String, Object> siteMessage = new HashMap<String, Object>();
siteMessage.put("cid", cid);
// siteMessage.put("categoryName", pagetype);
// siteMessage.put("sample", crawldataflag);
EMailUtils.getInstance().sendEmail(6, siteMessage, emailList,"30");
}
private String getEmailByCid(String cid) {
List<EmailGroup> emails = emailGroupRepository.getEmailGroupsByCid(cid);
String alarmEmail = "";
String emailGroup = emails.get(0).getEmail();
if(emailGroup.contains(",")){
alarmEmail = emailGroup.split(",")[0];
}else{
alarmEmail = emailGroup;
}
return alarmEmail;
}
private QueryBuilder getQueryBuilder(String nowTime, String befor30min,String type) {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
try {
// 筛选时间
boolean boo = true;
QueryBuilder crawlTimeRange = buildRangeQueryBuilder(
"crawl_time.keyword",befor30min ,nowTime, boo, boo);
boolQueryBuilder.must(crawlTimeRange);
//parse
QueryBuilder termQueryBuilder = QueryBuilders.termsQuery("type",type);
}catch (Exception e){
e.printStackTrace();
}
return boolQueryBuilder;
}
private QueryBuilder buildRangeQueryBuilder(String field, Object startVal, Object endVal, Boolean isIncludeLower, Boolean isIncludeUpper) {
return rangeQuery(field)
.from(startVal)
.to(endVal)
.includeLower(isIncludeLower)
.includeUpper(isIncludeUpper);
}
}

230
cl_query_data_job/src/main/java/com/bfd/mf/job/service/backtrace/BacktraceService.java

@ -0,0 +1,230 @@
package com.bfd.mf.job.service.backtrace;
import com.alibaba.fastjson.JSONObject;
import com.bfd.crawler.elasti.ElastiProducer;
import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.Subject;
import com.bfd.mf.job.domain.repository.SubjectRepository;
import com.bfd.mf.job.util.EsUtils;
import com.bfd.mf.job.util.Kafka010Utils;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.RateLimiter;
import org.assertj.core.util.Lists;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
@Service
public class BacktraceService {
private static final Logger LOGGER = LoggerFactory.getLogger(BacktraceService.class);
private static final long PERIOD_MILLS = 1 * 3600 * 1000L;
private static BlockingQueue<Map<Long, List<? extends Number>>> P_TASK_CACHE_RANGE = new LinkedBlockingQueue<>();
// private static Map<Long, BacktraceTask> C_UNNORMAL_TASK_CACHE = new ConcurrentHashMap<>();
private static Map<Long, Double> C_TASK_PROGRESS_CACHE = Maps.newHashMap();
private static Lock C_TASK_PROGRESS_CACHE_LOCK = new ReentrantLock();
private static Map<Long, Long> C_TASK_PROCESSED_CACHE = Maps.newHashMap();
private static Map<Long, Long> C_TASK_SATISFIED_CACHE = Maps.newHashMap();
private static Map<Long, Long> C_TASK_SEGMENT_CACHE = Maps.newHashMap();
private static Lock C_TASK_STAT_CACHE_LOCK = new ReentrantLock();
private static BlockingQueue<EsUtils.BulkItem> DATA_CACHE = new LinkedBlockingQueue<>(10240);
private static BlockingQueue<EsUtils.BulkItem> NEW_DATA_CACHE = new LinkedBlockingQueue<>(10240);
private RateLimiter dataRateLimiter;
private RateLimiter pRateLimiter;
private RateLimiter cRateLimiter;
private static int subjectEsNum = 1;
private static String indexType = "docs";
private static int bussinessType = 1;
@Autowired
private AppConfig config;
@Autowired
private SubjectRepository subjectRepository;
@PostConstruct
public void init() {
// 注册数据查询来源
EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source
EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target
pRateLimiter = RateLimiter.create(1.0D / config.getPeriodS());
cRateLimiter = RateLimiter.create(1.0D / config.getPeriodS());
dataRateLimiter = RateLimiter.create(config.esMiniBulkRate());
// kafkaProducer = Kafka010Utils.getProducer(config.getBrokerList());
}
/*
尝试在指定时间内获得许可如果获得了则直接返回如果没有获得则执行下面的流程
*/
public void tryAcquire() {
if (!pRateLimiter.tryAcquire()) {//是在指定的时间内尝试地获得1个许可如果获取不到则返回false
return;
}
// 查询 cl_subject 表中 status=0 del =0 cache_recalculate_status = 1
List<Map<String,Object>> subjectTaskList = subjectRepository.querySubjectTaskByCacheRecalculateStatus();
for (Map<String,Object> subject: subjectTaskList) {
System.out.println("需要拉数据的任务:"+JSONObject.toJSONString(subject));
Map<Long, List<? extends Number>> cache = Maps.newHashMap();
Long subjectId = Long.valueOf( subject.get(ESConstants.ID).toString());
subjectRepository.updateCacheRecalculateStatus(subjectId,2);//
cache.put(subjectId, Lists.newArrayList(0L, 0L, 1.0, 1L, 1L));
try {
P_TASK_CACHE_RANGE.put(cache);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
// // 查询 cl_label_backtrace_task 表中 状态为 1 and retry_times <= max_retry_times 的任务出来
// List<LabelBacktraceTask> backtraceTaskList = SubjectRepository.findAllByStatus(1);
// for (LabelBacktraceTask task : taskList) {
// if (!isAvailable(task)) {
// continue;
// }
// LOGGER.info("Executing task:{}.", JSON.toJSONString(task));
// failureService.createTable(task.getFailureTableName());
// Long totalSegment = 1L;//(task.getDateEnd() - task.getDateStart()) / PERIOD_MILLS; // 3600000
// Long segment = 1L;
// Double progressFactor = 1.0 / totalSegment;
// }
}
public void produce(){
Map<Long, List<? extends Number>> range = P_TASK_CACHE_RANGE.poll();// poll -->若队列为空返回null
if (Objects.isNull(range)) {
return;
}
Long subjectId = 0L;
for (Map.Entry<Long, List<? extends Number>> entry : range.entrySet()) {
subjectId = entry.getKey();
}
Subject subject = subjectRepository.findById(subjectId).get();
String clusterName = config.esNormalClusterName();
String subjectIndexName = config.getIndexNamePre() + subjectId;
long fromMills = subject.getCacheStart().longValue();
long toMills = subject.getCacheEnd().longValue();
Long year = config.getQueryDataYearStarttime();
String[] sourceIndices = EsUtils.getIndices(AppConfig.CL_INDEX, "_",
fromMills, toMills, AppConfig.DATE_FORMAT, config.esNormalUpper(),
config.esNormalStandby(),year);
String esQuery = subject.getEsQuery();
BoolQueryBuilder qb = EsUtils.getBoolQueryBuilderFromSqlStr(esQuery);
LOGGER.info("索引范围:"+sourceIndices[0]+" ~ "+ sourceIndices[sourceIndices.length-1] +" ; QB : \n{}.",qb);
Long finalSubjectId = subjectId;
// sourceIndices.size/100*i 就是当前的百分比
Double percent = 0.0;
for(int i = 0 ; i < sourceIndices.length ; i ++) {
String index [] = {sourceIndices[i]};
percent = (i*1.0)/sourceIndices.length ;
EsUtils.scrollQuery(clusterName, index, ESConstants.INDEX_TYPE,
qb, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES,
dataList -> {
try {
for (JSONObject data : dataList) {
data.put(ESConstants.SUBJECT_ID, finalSubjectId);
String url = data.getString(ESConstants.URL);
// System.out.println(url);
String product_id = getProductId(data,url);
data.put(ESConstants.CRAWLDATAFLAGTYPE,2);
data.put(ESConstants.CRAWLDATAFLAG,"url:"+product_id);
// System.out.println(subjectIndexName + " data --- "+data.get(ESConstants.DOC_ID));
writerToMajorES(subjectIndexName, data);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
});
percent = Double.valueOf(String.format("%.3f", percent));
// 数据拉完了需要修改一下 cl_subject 表中的 任务的 cache_recalculate_status 状态位
// 同时也需要修改一下 cl_task 中每个任务的状态位
if(percent >= 0.996){
int status = 3;
subjectRepository.updateCacheRecalculateStatus(subjectId,status);
subjectRepository.updateTaskStatus(subjectId,status);
}
}
LOGGER.info("******** ******** subjectIndexName : " + subjectIndexName + "这个专题下的数据拉完了,可以改状态了!");
}
private String getProductId(JSONObject data, String url) {
String product_id = data.getString(ESConstants.PRODUCT_ID);
try {
if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.TMALL)) {
if (url.contains("&id=")) {
product_id = url.split("&id=")[1].split("&")[0].replace("/", "*");
} else { //https://detail.tmall.com/item.htm?id=582242698961&rn=08db719e4a7ee5b6d4f5b58825d1f261&abbucket=20
product_id = url.split("/?id=")[1].split("&")[0].replace("/", "*");
}
}
if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.EJINGDONG)) {
product_id = url.split(".html")[0].split("item.jd.")[1].replace("/", "*");
}
if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.TAOBAO)) {
if (url.contains("&id=")) {
product_id = url.split("&id=")[1].split("&")[0].replace("/", "*");
} else {
product_id = url.split("/?id=")[1].split("&")[0].replace("/", "*");
}
}
if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.WEIPINHUI)) {
product_id = url.split(".vip.com/")[1].split(".html")[0].replace("/", "*");
}
if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.EJUMEI)) {
product_id = url.split("com/")[1].split(".html")[0].replace("/", "*");
}
if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.SUNING)) {
product_id = url.split("suning.com/")[1].split(".html")[0].replace("/", "*");
}
if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.SEPHORA)) {
product_id = url.split(".cn")[1].split(".html")[0].replace("/", "*");
}
}catch (Exception e){
e.printStackTrace();
}
return product_id;
}
// public static void main(String[] args) {
// BacktraceService b = new BacktraceService();
// String url = "https://detail.tmall.com/item.htm?id=582242698961&rn=08db719e4a7ee5b6d4f5b58825d1f261&abbucket=20";
// JSONObject data = new JSONObject();
// data.put("enSource","tmall");
// String id = b.getProductId(data,url);
// System.out.println(id);
// }
private static void writerToMajorES(String indexName , Map<String, Object> responseMap) {
System.out.println("==========================写入到【专题】ES : ==========" + indexName + " - "+responseMap.get("docId") );
ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, subjectEsNum, indexName, indexType);
elastiProducer.sendMessageToEs(JsonUtils.toJSONString(responseMap));
}
// public static void main(String[] args) {
// String url = "https://detail.tmall.com/item.htm?id=598372446974&skuId=4336725650385&user_id=2549841410&cat_id=50031573&is_b=1&rn=66410a97e53d6338e3bff62cfd307a80";
// String product_id = "";
// if(url.contains("&id=")) {
// product_id = url.split("&id=")[1].split("&")[0].replace("/", "*");
// }else{
// product_id = url.split("/?id=")[1].split("&")[0].replace("/", "*");
// }
// System.out.println(product_id);
// }
}

203
cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java

@ -0,0 +1,203 @@
package com.bfd.mf.job.service.es;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.Task;
import com.bfd.mf.job.util.EsUtils;
import org.elasticsearch.index.query.*;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.sql.Timestamp;
import java.util.HashMap;
import java.util.Map;
import java.util.TimeZone;
@Service
public class EsQueryMiniService {
private static Logger logger = LoggerFactory.getLogger(EsQueryMiniService.class);
/**
* 统计 每个专题下每个渠道 的总量
*/
public Map<String,Long> getSubjectChannelStatistics(String clusterName,String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE);
String indexNames [] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag");
resultMap = EsUtils.parseTerms(result);
}
}catch (Exception e){
e.printStackTrace();
}
return resultMap;
}
/**
* 统计 每个专题下每个渠道 当天的增量
*/
public Map<String,Long> getSubjectChannelTodayStatistics(String clusterName,String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
long current=System.currentTimeMillis();
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
.gte(startTime)
.lt(current);
qb.must(rangeQueryBuilder);
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE);
String indexNames [] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag");
resultMap = EsUtils.parseTerms(result);
}
}catch (Exception e){
e.printStackTrace();
}
return resultMap;
}
/**
* 统计 每个专题下crawlDataFlag 三种类型当天的总量
*/
public Map<String,Long> getSubjectCrawlDataFlagStatistics(String clusterName, String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG);
String indexNames [] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag");
Map<String,Long> termsMap = EsUtils.parseTerms(result);
resultMap = EsUtils.getResultMap(termsMap);
}
}catch (Exception e){
e.printStackTrace();
}
return resultMap;
}
/**
* 统计 每个专题下crawlDataFlag 三种类型 的增量
*/
public Map<String,Long> getSubjectCrawlDataFlagTodayStatistics(String clusterName, String indexName) {
Map<String,Long> resultMap = new HashMap<>();
try{
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
long current=System.currentTimeMillis();
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
.gte(startTime)
.lt(current);
qb.must(rangeQueryBuilder);
AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG);
String indexNames [] = {indexName};
Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag");
Map<String,Long> termsMap = EsUtils.parseTerms(result);
resultMap = EsUtils.getResultMap(termsMap);
}
}catch (Exception e){
e.printStackTrace();
}
return resultMap;
}
public Map<String,Long> getTaskCount(String clusterName,Long taskId, Task task,String crawlDataFlag,String indexNamePre) {
Map<String,Long> countMap = new HashMap<>();
String indexName = indexNamePre + task.getSubjectId();//subject_id
if(null != task.getCid()) {
String cid = task.getCid().toLowerCase();
Long crawlStartTime = task.getCrawlStartTime().longValue();
Long crawlEndTime = task.getCrawlEndTime().longValue();
// String crawlDataFlag =task.getCrawlDataFlag();
if (indexName.contains(indexNamePre)) {
boolean isExists = EsUtils.indexExists(clusterName, indexName);
if (isExists) {
BoolQueryBuilder qb = QueryBuilders.boolQuery();
// 任务ID 筛选
TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid);
TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag);
qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder);
// 时间范围筛选
BoolQueryBuilder shouldbq = QueryBuilders.boolQuery();
RangeQueryBuilder rangeQueryBuilder = QueryBuilders
.rangeQuery(ESConstants.PUBTIME)
.gte(crawlStartTime)
.lt(crawlEndTime);
// 用户数据
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
TermQueryBuilder primartTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PRIMARY, 2);
// TermQueryBuilder pubTimeTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PUBTIME,0);
boolQueryBuilder.must(primartTermQueryBuilder);
shouldbq.should(boolQueryBuilder).should(rangeQueryBuilder);
qb.must(shouldbq);
logger.info("QB1 : indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long count = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("totalCount", count);
// 上面的语句是查询 该任务的 总数据量totalCount下面的语句是查询 该任务当天的数据量todayCount
long current = System.currentTimeMillis();
long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset();
Long startTime = new Timestamp(zero).getTime();
RangeQueryBuilder rangeQueryBuilder2 = QueryBuilders
.rangeQuery(ESConstants.CRAWLTIME)
.gte(startTime).lt(current);
qb.must(rangeQueryBuilder2);
logger.info("QB2 : indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", ""));
Long todayCount = EsUtils.queryCount(clusterName, indexName, qb);
countMap.put("todayCount", todayCount);
}
}
}
return countMap;
}
// public Long getTaskTodayCount(String clusterName,Integer id, Map<String, Object> task) {
// Long count = 0L;
// String indexName = clSubject + (String) task.get("subject_id");
// String cid = (String) task.get(ESConstants.CID);
// Long crawlStartTime = (Long) task.get("crawl_start_time");
// Long crawlEndTime = (Long) task.get("crawl_end_time");
// String crawlDataFlag = (String) task.get("crawl_data_flag");
//
// if(indexName.contains(subjectPre)) {
// boolean isExists = EsUtils.indexExists(clusterName, indexName);
// if (isExists) {
// BoolQueryBuilder qb = QueryBuilders.boolQuery();
// long current=System.currentTimeMillis();
// long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();
// Long startTime = new Timestamp(zero).getTime();
// RangeQueryBuilder rangeQueryBuilder = QueryBuilders
// .rangeQuery(ESConstants.CRAWLTIME)
// .gte(startTime)
// .lt(current);
// qb.must(rangeQueryBuilder);
//// Terms result = EsUtils.queryTag(clusterName, indexName, qb, ab, ESConstant.DOC_TYPE + "Tag");
//// resultMap = parseTerms(result);
// }
// }
// return count;
// }
}

2
cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryNormalService.java → cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryNormalService.java

@ -1,4 +1,4 @@
package com.bfd.mf.job.service;
package com.bfd.mf.job.service.es;
import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.util.EsUtils; import com.bfd.mf.job.util.EsUtils;

354
cl_query_data_job/src/main/java/com/bfd/mf/job/service/QueryService.java → cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java

@ -1,7 +1,9 @@
package com.bfd.mf.job.service;
package com.bfd.mf.job.service.query;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.alibaba.fastjson.serializer.SerializerFeature;
import com.bfd.crawler.utils.JsonUtils;
import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.Subject; import com.bfd.mf.job.domain.entity.Subject;
@ -9,8 +11,10 @@ import com.bfd.mf.job.domain.entity.Task;
import com.bfd.mf.job.domain.repository.SubjectRepository; import com.bfd.mf.job.domain.repository.SubjectRepository;
import com.bfd.mf.job.domain.repository.TaskRepository; import com.bfd.mf.job.domain.repository.TaskRepository;
import com.bfd.mf.job.download.DownLoadFile; import com.bfd.mf.job.download.DownLoadFile;
import com.bfd.mf.job.util.DataCheckUtil;
import com.bfd.mf.job.util.EsUtils; import com.bfd.mf.job.util.EsUtils;
import com.bfd.mf.job.util.Kafka010Utils; import com.bfd.mf.job.util.Kafka010Utils;
import com.bfd.mf.job.util.ReadLine;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.google.common.util.concurrent.RateLimiter; import com.google.common.util.concurrent.RateLimiter;
import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.lang3.exception.ExceptionUtils;
@ -21,17 +25,14 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct; import javax.annotation.PostConstruct;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.math.BigInteger; import java.math.BigInteger;
import java.sql.Timestamp; import java.sql.Timestamp;
import java.util.*; import java.util.*;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery; import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
@ -64,7 +65,7 @@ public class QueryService {
EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target
pRateLimiter = RateLimiter.create(1.0D / config.getPeriodS()); pRateLimiter = RateLimiter.create(1.0D / config.getPeriodS());
kafkaProducer = Kafka010Utils.getProducer(config.getBrokerList()); kafkaProducer = Kafka010Utils.getProducer(config.getBrokerList());
//
// cRateLimiter = RateLimiter.create(1.0D / config.getPeriodS()); // cRateLimiter = RateLimiter.create(1.0D / config.getPeriodS());
// dataRateLimiter = RateLimiter.create(config.esMiniBulkRate()); // dataRateLimiter = RateLimiter.create(config.esMiniBulkRate());
} }
@ -76,7 +77,7 @@ public class QueryService {
if (!pRateLimiter.tryAcquire()) {//是在指定的时间内尝试地获得1个许可如果获取不到则返回false if (!pRateLimiter.tryAcquire()) {//是在指定的时间内尝试地获得1个许可如果获取不到则返回false
return; return;
} }
LOGGER.info("================================================================ QueryService =================================================================");
LOGGER.info("================================================================ QueryService (离线拉数据)=================================================================");
// 把要统计的结果事儿也在这里面统计了 // 把要统计的结果事儿也在这里面统计了
// List<Object> allSubject = subjectCountRepository.findAllSubject(); // List<Object> allSubject = subjectCountRepository.findAllSubject();
// for (Object ob:allSubject) { // for (Object ob:allSubject) {
@ -94,6 +95,8 @@ public class QueryService {
Double progressFactor = 1.0 / totalSegment; Double progressFactor = 1.0 / totalSegment;
Map<Long, List<? extends Number>> cache = Maps.newHashMap(); Map<Long, List<? extends Number>> cache = Maps.newHashMap();
long taskId = task.getId().longValue(); long taskId = task.getId().longValue();
int cache_num = 1;
taskRepository.updateStatus(cache_num, task.getId().longValue());
cache.put(taskId, Lists.newArrayList(0L, 0L, progressFactor, totalSegment, segment)); cache.put(taskId, Lists.newArrayList(0L, 0L, progressFactor, totalSegment, segment));
try { try {
P_TASK_CACHE_RANGE.put(cache); P_TASK_CACHE_RANGE.put(cache);
@ -119,56 +122,81 @@ public class QueryService {
taskId = entry.getKey(); taskId = entry.getKey();
} }
Task task = taskRepository.findById(taskId).get(); Task task = taskRepository.findById(taskId).get();
System.out.println("开始拉数据的任务是:" + JSONObject.toJSONString(task));
LOGGER.info("开始拉数据的任务是:" + JSONObject.toJSONString(task));
List<String> docIdsList = new ArrayList<>();
try { try {
// 创建过滤条件 & 任务预处理 // 创建过滤条件 & 任务预处理
fromMills = task.getCrawlStartTime().longValue(); fromMills = task.getCrawlStartTime().longValue();
toMills = task.getCrawlEndTime().longValue(); toMills = task.getCrawlEndTime().longValue();
String[] sourceIndices = EsUtils.getIndices(AppConfig.CL_INDEX, "_",
fromMills, toMills, AppConfig.DATE_FORMAT, config.esNormalUpper(), config.esNormalStandby());
QueryBuilder queryBuilder;
String clusterName = config.esNormalClusterName(); // 获得 109 clusterName
String cid = task.getCid().toLowerCase();
String crawlDataFlag = task.getCrawlDataFlag();
BigInteger subjectId = task.getSubjectId();
Subject subject = subjectRepository.getSubjectBySubjectId(subjectId.longValue());
String indexName = "cl_major_" + task.getSubjectId();
Integer cacheNum = task.getCacheNum();
Long year = config.getQueryDataYearStarttime(); // 获取配置文件中用直接拉年份的时间节点现在设置的是2019年2019年前的全部用年做索引不拆成天
String clusterName = config.esNormalClusterName(); // 获取配置文件中ES的名称
// 根据条件获取到要查询的索引的集合
if(toMills > new Date().getTime()){
toMills = new Date().getTime();
}
String[] sourceIndices = EsUtils.getIndices(AppConfig.CL_INDEX, AppConfig.SEPARATOR,
fromMills, toMills, AppConfig.DATE_FORMAT, config.esNormalUpper(), config.esNormalStandby(),year);
String cid = task.getCid().toLowerCase(); // 站点的cid
String siteType = task.getSiteType().toString(); // 站点的类型 主要看是不是电商的因为电商的主贴和评论在ES中的存储方式跟其他的相反
String crawlDataFlag = task.getCrawlDataFlag(); // 任务的抓取条件
String crawlContentKey = task.getCrawlContentKey(); // 要拉取的字段主要看是否需要拉评论
// BigInteger subjectId = task.getSubjectId();
// Subject subject = subjectRepository.getSubjectBySubjectId(subjectId.longValue());
String indexName = "cl_major_" + task.getSubjectId(); // 索引名称
Integer cacheNum = task.getCacheNum(); // 拉取数据的次数
// 当拉数据的次数 大于1 次的时候再拉数据的开始时间就不用是任务设置的开始时间了同时可以再加个采集时间范围限制一下确保拉的数据都是任务添加之后才采集的就行 // 当拉数据的次数 大于1 次的时候再拉数据的开始时间就不用是任务设置的开始时间了同时可以再加个采集时间范围限制一下确保拉的数据都是任务添加之后才采集的就行
QueryBuilder queryBuilder; // 根据条件组装查询用具
if(cacheNum > 1 ) { // 已经拉过历史数据的任务 开始时间改成当天凌晨查询发表和抓取都是当天的数据 if(cacheNum > 1 ) { // 已经拉过历史数据的任务 开始时间改成当天凌晨查询发表和抓取都是当天的数据
long current=System.currentTimeMillis();//当前时间毫秒数 long current=System.currentTimeMillis();//当前时间毫秒数
long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数 long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数
fromMills = new Timestamp(zero).getTime(); fromMills = new Timestamp(zero).getTime();
queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag,cacheNum);
queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag,cacheNum,siteType);
}else{ }else{
fromMills = task.getCrawlStartTime().longValue(); fromMills = task.getCrawlStartTime().longValue();
queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag,cacheNum);
queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag,cacheNum,siteType);
} }
LOGGER.info("Query primary, task:{}, index:{}, from:{}, to:{}, indices:{}, dsl:{}.", LOGGER.info("Query primary, task:{}, index:{}, from:{}, to:{}, indices:{}, dsl:{}.",
taskId, taskId,
indexName, indexName,
new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT), new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT),
new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT), new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT),
sourceIndices.toString(),
JSONObject.toJSONString(sourceIndices),
queryBuilder.toString()); queryBuilder.toString());
// 传入的参数 集群名称索引名称索引类型type, 查询Builder,scroll查询页面大小,scroll查询scrollId有效时间 // 传入的参数 集群名称索引名称索引类型type, 查询Builder,scroll查询页面大小,scroll查询scrollId有效时间
String finalTaskId = taskId+""; String finalTaskId = taskId+"";
long pubTime = fromMills;
long finalFromMills = fromMills;
long finalToMills = toMills;
EsUtils.scrollQuery(clusterName, sourceIndices, ESConstants.INDEX_TYPE, EsUtils.scrollQuery(clusterName, sourceIndices, ESConstants.INDEX_TYPE,
queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES, queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES,
dataList -> { dataList -> {
try { try {
if(dataList.size() == 0){
System.out.println("没查到相关的 主贴 数据");
return;
}
for (JSONObject data : dataList) { for (JSONObject data : dataList) {
data = getCreateTime(data,crawlDataFlag);
// 离线拉的数据加个字段吧跟正常拉的数据做区分
if(data.get(ESConstants.DOC_TYPE).equals(ESConstants.ITEM) && data.get(ESConstants.PRIMARY).equals(1)){
data = getPubTime(data,pubTime);
}
saveService.initData(data,finalTaskId); saveService.initData(data,finalTaskId);
// 发送主贴 // 发送主贴
// 是否要下载图片到指定的 go-fast上 // 是否要下载图片到指定的 go-fast上
if(subject.getGoFastSwitch() == 1){
String goFastAddr = subject.getGoFastAddr();
if("" == goFastAddr){
goFastAddr = defultAddr;
}
// 现在判断视频图片文件是否下载的方式只取决于isDownload 字段
boolean isDownload = data.getBoolean(ESConstants.ISDOWNLOAD);
if(isDownload){
String goFastAddr = defultAddr;
data = downloadAndChangePath(data,goFastAddr); data = downloadAndChangePath(data,goFastAddr);
} }
// if(subject.getGoFastSwitch() == 1){
// String goFastAddr = subject.getGoFastAddr();
// if("" == goFastAddr){
// goFastAddr = defultAddr;
// }
// data = downloadAndChangePath(data,goFastAddr);
// }
// 是否写入到指定的kafka // 是否写入到指定的kafka
// if(subject.getKafkaSwitch() == 1) { // if(subject.getKafkaSwitch() == 1) {
// String kafkaTopic = subject.getKafkaTopic(); // String kafkaTopic = subject.getKafkaTopic();
@ -178,18 +206,34 @@ public class QueryService {
// } // }
// kafkaProducer.send(kafkaTopic, JSONObject.toJSONString(data)); // kafkaProducer.send(kafkaTopic, JSONObject.toJSONString(data));
// } // }
saveService.saveToEsWithFilter(config.esMiniClusterName(), indexName, data);
LOGGER.debug("Send message, indexName :{} , taskId:{} , ID :{}.", indexName, task.getId(), data.getString("_id_"));
System.out.println("=== "+data);
if(!data.get("_id_").equals("")) {
saveService.saveToEsWithFilter(config.esMiniClusterName(), indexName, data);
kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data));
LOGGER.debug("Send message, indexName :{} , taskId:{} , ID :{}.", indexName, task.getId(), data.getString("_id_"));
// 将要拉评论的ID 添加到list ,电商的数据不用拉评论哦
if(!siteType.equals(ESConstants.DOCTYPEITEM)) {
if (crawlContentKey.contains("comment") || crawlContentKey.contains("socialComment")) {
docIdsList.add(data.get(ESConstants.DOC_ID).toString());
}
}
}
} }
} catch (Exception e) { } catch (Exception e) {
System.out.println("******* " + dataList );
throw new RuntimeException(e); throw new RuntimeException(e);
} }
}); });
// 开始拉评论数据
if(docIdsList.size() > 0) {
String docType = docIdsList.get(0).split("_")[1];
String docIds [] = docIdsList.toArray(new String[0]);
queryComments(docIds, docType, finalFromMills, finalToMills,finalTaskId,crawlDataFlag,indexName);
}
LOGGER.info("This Task is OK ! taskId = " + taskId);
Integer cache_num = task.getCacheNum(); Integer cache_num = task.getCacheNum();
cache_num = cache_num +1; cache_num = cache_num +1;
taskRepository.updateStatus(cache_num, task.getId().longValue()); taskRepository.updateStatus(cache_num, task.getId().longValue());
} catch (Exception e) { } catch (Exception e) {
JSONObject msg = new JSONObject(); JSONObject msg = new JSONObject();
msg.put("message", "produce error due to [" + ExceptionUtils.getStackTrace(e) + "]"); msg.put("message", "produce error due to [" + ExceptionUtils.getStackTrace(e) + "]");
@ -199,6 +243,76 @@ public class QueryService {
} }
} }
private JSONObject getPubTime(JSONObject data,long pubTime) {
data.put(ESConstants.PUBTIME,pubTime);
data.put(ESConstants.PUBTIMESTR, DataCheckUtil.getCurrentTime(pubTime));
data.put(ESConstants.PUBDAY, DataCheckUtil.getDay(pubTime));
data.put(ESConstants.PUBDATE, DataCheckUtil.getDate(pubTime));
return data;
}
private JSONObject getCreateTime(JSONObject data,String crawlDataFlag) {
long createTime = System.currentTimeMillis() ;
data.put(ESConstants.CREATETIME, createTime);
data.put(ESConstants.CREATETIMESTR, DataCheckUtil.getCurrentTime(createTime));
data.put(ESConstants.CREATEDAY, DataCheckUtil.getDay(createTime));
data.put(ESConstants.CREATEDATE, DataCheckUtil.getDate(createTime));
data.put(ESConstants.CRAWLDATAFLAG,crawlDataFlag);
return data;
}
private void queryComments(String[] docId,String docType,
long startTime,long endTime,
String crawlDataFlag,String finalTaskId,
String indexName) {
LOGGER.info("开始拉取评论数据:");
QueryBuilder queryBuilder = getQueryBuilder(docId, startTime, endTime);
String index = "cl_index_"+docType; //cl_index_video
String[] sourceIndices = {index};
String clusterName = config.esNormalClusterName();
LOGGER.info("QB : "+queryBuilder);
LOGGER.info("queryComments index : "+ JSONObject.toJSONString(sourceIndices));
EsUtils.scrollQuery(clusterName, sourceIndices, ESConstants.INDEX_TYPE,
queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES,
dataList -> {
try {
if (dataList.size() == 0) {
System.out.println("没查到相关的 评论 数据");
return;
}
for (JSONObject data : dataList) {
data = getCreateTime(data,crawlDataFlag);
saveService.initData(data,finalTaskId);
if(!data.get("_id_").equals("")) {
saveService.saveToEsWithFilter(config.esMiniClusterName(), indexName, data);
kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data));
LOGGER.debug("Send comments message, indexName :{} , taskId:{} , ID :{}.", indexName, finalTaskId, data.getString("_id_"));
}
}
} catch (Exception e) {
e.printStackTrace();
}
});
}
// 组装查询评论的查询语句
private QueryBuilder getQueryBuilder(String[] docId, long startTime, long endTime) {
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
try{
// 筛选时间
boolean boo = true;
QueryBuilder pubTimeRange = buildRangeQueryBuilder(
ESConstants.PUBTIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo);
boolQueryBuilder.must(pubTimeRange);
// 筛选ID
QueryBuilder termQueryBuilder = QueryBuilders.termsQuery(ESConstants.DOC_ID,docId);
boolQueryBuilder.must(termQueryBuilder);
}catch (Exception e){
e.printStackTrace();
}
return boolQueryBuilder;
}
/** /**
* 下载 文件视频图片并将新的路径替换写入到 pathSize中 * 下载 文件视频图片并将新的路径替换写入到 pathSize中
* videoPath == egc * videoPath == egc
@ -206,47 +320,173 @@ public class QueryService {
imagePath == pgc imagePath == pgc
*/ */
private JSONObject downloadAndChangePath(JSONObject data, String goFastAddr) { private JSONObject downloadAndChangePath(JSONObject data, String goFastAddr) {
String isDownload = data.get(ESConstants.ISDOWNLOAD).toString();
Map<String,String> goFastMap = new HashMap<>();
List<Map<String,String>> filePathSize = new ArrayList<>();
List<Map<String,String>> videoPathSize = new ArrayList<>();
List<Map<String,String>> imagePathSize = new ArrayList<>();
// 文件下载
List<String> filePath = (List<String>) data.get(ESConstants.FILEPATH); List<String> filePath = (List<String>) data.get(ESConstants.FILEPATH);
String isDownload = "true";
if(filePath.size() > 0){ if(filePath.size() > 0){
List<Map<String,String>> filePathSize = getPathSize(filePath,goFastAddr,0,data);
// 调用下载接口下载并将附件上传到自己的go-fast
Map<String,Object> srcPathMap = getPathSize(filePath,goFastAddr,0,data);
filePath = (List<String>) srcPathMap.get(ESConstants.PATH);
data.put(ESConstants.FILEPATH,filePath);
// 组装 FILEPATHSIZE 字段
filePathSize = (List<Map<String, String>>) srcPathMap.get(ESConstants.PATHSIZELIST);
if(filePathSize.size() >0){ if(filePathSize.size() >0){
data.put(ESConstants.FILEPATHSIZE,JSONObject.toJSONString(filePathSize)); data.put(ESConstants.FILEPATHSIZE,JSONObject.toJSONString(filePathSize));
data.put(ESConstants.UGC,1); data.put(ESConstants.UGC,1);
data.put(ESConstants.ISDOWNLOAD,isDownload); data.put(ESConstants.ISDOWNLOAD,isDownload);
} }
// 组装 SRCFILEPATH 字段
Map<String,String> srcAndGofastUrlMap = (Map<String, String>) srcPathMap.get("srcMap");
if(data.containsKey("forwardUrl") && null != data.get("forwardUrl") ) {
try {
List<Map<String, String>> forwardUrl = JsonUtils.parseArray(data.get("forwardUrl").toString());
List<Map<String, String>> srcPath = getSrcPath(forwardUrl,srcAndGofastUrlMap);
data.put(ESConstants.SRCFILEPATH, JSON.toJSONString(srcPath, SerializerFeature.DisableCircularReferenceDetect));
} catch (Exception e) {
e.printStackTrace();
}
}
} }
// 视频下载
List<String> videoPath = (List<String>) data.get(ESConstants.VIDEOPATH); List<String> videoPath = (List<String>) data.get(ESConstants.VIDEOPATH);
if(videoPath.size() > 0){ if(videoPath.size() > 0){
List<Map<String,String>> videoPathSize = getPathSize(videoPath,goFastAddr,1,data);
// List<Map<String,String>> videoPathSize = getPathSize(videoPath,goFastAddr,1,data);
System.out.println("************ 要下载的视频链接的 List : "+videoPath);
Map<String,Object> srcPathMap = getPathSize(videoPath,goFastAddr,0,data);
videoPath = (List<String>) srcPathMap.get(ESConstants.PATH);
data.put(ESConstants.VIDEOPATH,videoPath);
videoPathSize = (List<Map<String, String>>) srcPathMap.get(ESConstants.PATHSIZELIST);
if(videoPathSize.size() >0){ if(videoPathSize.size() >0){
data.put(ESConstants.VIDEOPATHSIZE,JSONObject.toJSONString(videoPathSize)); data.put(ESConstants.VIDEOPATHSIZE,JSONObject.toJSONString(videoPathSize));
data.put(ESConstants.EGC,1); data.put(ESConstants.EGC,1);
data.put(ESConstants.ISDOWNLOAD,isDownload); data.put(ESConstants.ISDOWNLOAD,isDownload);
} }
// 组装 SRCVIDEOPATH 字段
Map<String,String> srcAndGofastUrlMap = (Map<String, String>) srcPathMap.get("srcMap");
if(data.containsKey("videoUrl") && null != data.get("videoUrl") ) {
List<Map<String, String>> srcPath = new ArrayList<>();
if(data.get("videoUrl").toString().contains("originalUrl")){
try {
List<Map<String,String>> videoUrl = JsonUtils.parseArray( data.get("videoUrl").toString());
srcPath = getSrcPath(videoUrl,srcAndGofastUrlMap);
} catch (Exception e) {
e.printStackTrace();
}
}else{
List<String> videoUrl = new ArrayList<>();
try {
if(data.get("videoUrl").toString().contains("[")) {
videoUrl = JsonUtils.parseArray(data.get("videoUrl").toString());
}else{
videoUrl.add(data.get("videoUrl").toString());
}
}catch (Exception e){
e.printStackTrace();
}
srcPath = new ArrayList<>();
Map<String, String> srcurlMap = new HashMap<>();
if(videoPath.size() > 0) {
srcurlMap.put(ESConstants.GOFASTURL, videoPath.get(0));
}
System.out.println("===============视频原链接的List: " + videoUrl);
if(videoUrl.size() > 0) {
srcurlMap.put(ESConstants.ORIGINALURL, videoUrl.get(0));
}
if(srcurlMap.size() > 0) {
srcPath.add(srcurlMap);
}
}
data.put(ESConstants.SRCVIDEOPATH,JSON.toJSONString(srcPath, SerializerFeature.DisableCircularReferenceDetect));
}
} }
// 图片下载
List<String> imagePath = (List<String>) data.get(ESConstants.IMAGEPATH); List<String> imagePath = (List<String>) data.get(ESConstants.IMAGEPATH);
if(imagePath.size() > 0){ if(imagePath.size() > 0){
List<Map<String,String>> imagePathSize = getPathSize(imagePath,goFastAddr,2,data);
//List<Map<String,String>> imagePathSize = getPathSize(imagePath,goFastAddr,2,data);
Map<String,Object> srcPathMap = getPathSize(imagePath,goFastAddr,0,data);
imagePath = (List<String>) srcPathMap.get(ESConstants.PATH);
data.put(ESConstants.IMAGEPATH,imagePath);
imagePathSize = (List<Map<String, String>>) srcPathMap.get(ESConstants.PATHSIZELIST);
if(imagePathSize.size() >0){ if(imagePathSize.size() >0){
data.put(ESConstants.IMAGEPATHSIZE,JSONObject.toJSONString(imagePathSize)); data.put(ESConstants.IMAGEPATHSIZE,JSONObject.toJSONString(imagePathSize));
data.put(ESConstants.PGC,1); data.put(ESConstants.PGC,1);
data.put(ESConstants.ISDOWNLOAD,isDownload); data.put(ESConstants.ISDOWNLOAD,isDownload);
} }
Map<String,String> srcAndGofastUrlMap = (Map<String, String>) srcPathMap.get("srcMap");
List<Map<String, String>> srcPath = new ArrayList<>();
if(data.containsKey("pictureList") && null != data.get("pictureList")){
Map<String,Object> pictureList = JSONObject.parseObject(data.get("pictureList").toString());
if(!pictureList.isEmpty()){
Map<String,String> srcurlMap=new HashMap<>();
for (Map.Entry<String, Object> entry : pictureList.entrySet()) {
Map<String,Object> imgmap= (Map<String, Object>) entry.getValue();
if(imgmap.containsKey("uploadImg") && imgmap.get("uploadImg") != null && imgmap.get("uploadImg") != ""){
srcurlMap.put(ESConstants.GOFASTURL,srcAndGofastUrlMap.get(imgmap.get("uploadImg")));
srcurlMap.put(ESConstants.ORIGINALURL,imgmap.get("img").toString());
}
srcPath.add(srcurlMap);
}
}
}
data.put(ESConstants.SRCIMAGEPATH,JSON.toJSONString(srcPath, SerializerFeature.DisableCircularReferenceDetect));
}
// 当三个 pathSize 都为 0 的时候表示三个下载结果都为空为了保持页面和实际结果的统一这块改成 false
if(filePathSize.size() == 0 && videoPathSize.size() == 0 && imagePathSize.size() == 0){
data.put(ESConstants.ISDOWNLOAD,false);
} }
return data; return data;
} }
private List<Map<String,String>> getSrcPath(List<Map<String, String>> forwardUrl, Map<String, String> srcAndGofastUrlMap) {
List<Map<String, String>> srcPathList = new ArrayList<>();
for (Map<String, String> urlMap : forwardUrl) {
if(null != urlMap) {
Map<String, String> srcurlMap = new HashMap<>();
if (urlMap.containsKey(ESConstants.GOFASTURL) && null != urlMap.get(ESConstants.GOFASTURL)) {
srcurlMap.put(ESConstants.GOFASTURL, srcAndGofastUrlMap.get(urlMap.get(ESConstants.GOFASTURL)));
} else {
srcurlMap.put(ESConstants.GOFASTURL, "");
}
srcurlMap.put(ESConstants.ORIGINALURL, urlMap.get(ESConstants.ORIGINALURL));
srcPathList.add(srcurlMap);
}
}
return srcPathList;
}
// public static void main(String[] args) {
// QueryService queryService = new QueryService();
// List<String> list = ReadLine.readLine(new File("E:\\work/test1.txt"));
// JSONObject data = JSONObject.parseObject(list.get(0));
// String goFastAddr = "http://172.18.1.113:8080/upload";
// JSONObject result = queryService.downloadAndChangePath(data,goFastAddr);
// System.out.println(result);
//
// }
/** /**
* downloadType =0 文件 =1 图片 = 2 视频 * downloadType =0 文件 =1 图片 = 2 视频
*/ */
private List<Map<String,String>> getPathSize(List<String> pathList, String goFastAddr,Integer downloadType,JSONObject data) {
private Map<String,Object> getPathSize(List<String> pathList, String goFastAddr,Integer downloadType,JSONObject data) {
Map<String,Object> pathMap = new HashMap<>();
List<Map<String,String>> pathSizeList = new ArrayList<>(); List<Map<String,String>> pathSizeList = new ArrayList<>();
List<String> path = new ArrayList<>();
Map<String,String> srcMap = new HashMap<>();
for (String downloadUrl:pathList) { for (String downloadUrl:pathList) {
String resolution = ""; String resolution = "";
String videoTime = ""; String videoTime = "";
try { try {
if(!downloadUrl.contains("si-te.percent.cn")){
if(null != downloadUrl && !downloadUrl.contains("si-te.percent.cn")){
Map<String,String> pathSizeMap = DownLoadFile.downloadAndSaveFile(downloadUrl, goFastAddr); Map<String,String> pathSizeMap = DownLoadFile.downloadAndSaveFile(downloadUrl, goFastAddr);
LOGGER.info("[QueryService] getPathSize goFaskAddr {}. resultMap {}.",goFastAddr ,pathSizeMap); LOGGER.info("[QueryService] getPathSize goFaskAddr {}. resultMap {}.",goFastAddr ,pathSizeMap);
if(pathSizeMap.size() > 0){ if(pathSizeMap.size() > 0){
@ -257,43 +497,57 @@ public class QueryService {
if(downloadType == 2) { // 图片 if(downloadType == 2) { // 图片
resolution = DownLoadFile.imagesize(downloadUrl); resolution = DownLoadFile.imagesize(downloadUrl);
} }
String url = pathSizeMap.get("realUrl");
String size = pathSizeMap.get("size");
//String url = pathSizeMap.get("realUrl").replace(config.getGoFastDomain(),"");
String url = pathSizeMap.get("realUrl").replace("http://172.18.1.113:8080","");
String size = pathSizeMap.get("size") + "KB";
pathSizeMap.put(ESConstants.URL,url); pathSizeMap.put(ESConstants.URL,url);
pathSizeMap.put(ESConstants.SIZE,size); pathSizeMap.put(ESConstants.SIZE,size);
pathSizeMap.put(ESConstants.RESOLUTION,resolution); pathSizeMap.put(ESConstants.RESOLUTION,resolution);
pathSizeMap.put(ESConstants.VIDEOTIME,videoTime); pathSizeMap.put(ESConstants.VIDEOTIME,videoTime);
pathSizeMap.remove("realUrl"); pathSizeMap.remove("realUrl");
// 这个是三个PathSize imagePathSize ,videoPathSize filePathSize
pathSizeList.add(pathSizeMap); pathSizeList.add(pathSizeMap);
// 这个是 用来做 gofast 和原链接替换的key 是原链接value 是go-fast 链接
srcMap.put(downloadUrl,url);
// 这个值使用来替换 三个 Path imagePath,videoPath,filePath
path.add(url);
} }
} }
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); e.printStackTrace();
} }
} }
return pathSizeList;
pathMap.put(ESConstants.PATHSIZELIST,pathSizeList);
pathMap.put("srcMap",srcMap);
pathMap.put(ESConstants.PATH,path);
return pathMap;
//return pathSizeList;
} }
private QueryBuilder getQueryBuilder(Long startTime, Long endTime, private QueryBuilder getQueryBuilder(Long startTime, Long endTime,
String cid,String crawlDataFlag,Integer cacheNum) {
String cid,String crawlDataFlag,Integer cacheNum,String siteType) {
// 查询语句组装 // 查询语句组装
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
try { try {
// 当拉取次数大于1的还需要限制 采集时间采集时间不早于今天 // 当拉取次数大于1的还需要限制 采集时间采集时间不早于今天
boolean boo = true; boolean boo = true;
if(cacheNum > 1 ){
// if(cacheNum > 1 ){
// QueryBuilder pubTimeRange = buildRangeQueryBuilder(
// ESConstants.CREATETIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo);
// boolQueryBuilder.must(pubTimeRange);
// }
// // 筛选发表时间
if(!siteType.equals(ESConstants.DOCTYPEITEM)) {
QueryBuilder pubTimeRange = buildRangeQueryBuilder( QueryBuilder pubTimeRange = buildRangeQueryBuilder(
ESConstants.CREATETIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo);
ESConstants.PUBTIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo);
boolQueryBuilder.must(pubTimeRange); boolQueryBuilder.must(pubTimeRange);
} }
// 筛选发表时间
QueryBuilder pubTimeRange = buildRangeQueryBuilder(
ESConstants.PUBTIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo);
boolQueryBuilder.must(pubTimeRange);
// 筛选站点 // 筛选站点
if(cid.equals("taobao") || cid.equals("tmall")){
boolQueryBuilder.must(QueryBuilders.termsQuery(ESConstants.EN_SOURCE, "taobao","tmall"));
if(cid.equals(ESConstants.TAOBAO) || cid.equals(ESConstants.TMALL)){
boolQueryBuilder.must(QueryBuilders.termsQuery(ESConstants.EN_SOURCE, ESConstants.TAOBAO,ESConstants.TMALL));
}else { }else {
boolQueryBuilder.must(QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid)); boolQueryBuilder.must(QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid));
} }
@ -345,7 +599,7 @@ public class QueryService {
String account = crawlDataFlag.split("account:")[1]; String account = crawlDataFlag.split("account:")[1];
System.out.println("[buildCrawlDataFlagBuilder] account --- " + account); System.out.println("[buildCrawlDataFlagBuilder] account --- " + account);
TermQueryBuilder queryAccountBuilders = QueryBuilders.termQuery(ESConstants.USER_URL,account); TermQueryBuilder queryAccountBuilders = QueryBuilders.termQuery(ESConstants.USER_URL,account);
queryBuilder = QueryBuilders.boolQuery().must(queryAccountBuilders).should(queryCrawlDataFlagBuilder);
queryBuilder = QueryBuilders.boolQuery().should(queryAccountBuilders).should(queryCrawlDataFlagBuilder);
} }
}catch (Exception e){ }catch (Exception e){
e.printStackTrace(); e.printStackTrace();

6
cl_query_data_job/src/main/java/com/bfd/mf/job/service/SaveService.java → cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java

@ -1,11 +1,10 @@
package com.bfd.mf.job.service;
package com.bfd.mf.job.service.query;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.util.EsUtils; import com.bfd.mf.job.util.EsUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@Service @Service
@ -15,7 +14,6 @@ public class SaveService {
public void initData(final JSONObject data, String taskId) { public void initData(final JSONObject data, String taskId) {
// 初始化自定义字段 // 初始化自定义字段
data.put(ESConstants.TASKID, taskId); data.put(ESConstants.TASKID, taskId);
data.put(ESConstants.ISDOWNLOAD,"false");
data.put("where","backtrace"); data.put("where","backtrace");
} }
@ -24,7 +22,7 @@ public class SaveService {
try { try {
LOGGER.info("[SaveService] saveToEsWithFilter 写入ES " + miniCluster + " | " + miniIndex); LOGGER.info("[SaveService] saveToEsWithFilter 写入ES " + miniCluster + " | " + miniIndex);
EsUtils.index(miniCluster, miniIndex, ESConstants.INDEX_TYPE, data, ESConstants._ID); EsUtils.index(miniCluster, miniIndex, ESConstants.INDEX_TYPE, data, ESConstants._ID);
EsUtils.index(miniCluster, miniIndex, ESConstants.INDEX_TYPE, data, ESConstants._ID);
// EsUtils.index(miniCluster, miniIndex, ESConstants.INDEX_TYPE, data, ESConstants._ID);
} catch (Exception e) { } catch (Exception e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }

90
cl_query_data_job/src/main/java/com/bfd/mf/job/service/StatisticsService.java → cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java

@ -1,9 +1,13 @@
package com.bfd.mf.job.service;
package com.bfd.mf.job.service.statistics;
import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.Task; import com.bfd.mf.job.domain.entity.Task;
import com.bfd.mf.job.domain.repository.SubjectRepository; import com.bfd.mf.job.domain.repository.SubjectRepository;
import com.bfd.mf.job.domain.repository.TaskRepository; import com.bfd.mf.job.domain.repository.TaskRepository;
import com.bfd.mf.job.service.es.EsQueryMiniService;
import com.bfd.mf.job.service.es.EsQueryNormalService;
import com.bfd.mf.job.util.DateUtil;
import com.bfd.mf.job.util.EsUtils; import com.bfd.mf.job.util.EsUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -15,11 +19,11 @@ import java.math.BigInteger;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.TimeZone;
@Service @Service
public class StatisticsService { public class StatisticsService {
private static final Logger LOGGER = LoggerFactory.getLogger(StatisticsService.class); private static final Logger LOGGER = LoggerFactory.getLogger(StatisticsService.class);
private static String indexPre = "cl_major_";
@Autowired @Autowired
private AppConfig config; private AppConfig config;
@Autowired @Autowired
@ -45,16 +49,18 @@ public class StatisticsService {
LOGGER.info("------------------------------------------------------------------ StatisticsService ------------------------------------------------------"); LOGGER.info("------------------------------------------------------------------ StatisticsService ------------------------------------------------------");
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
//-------统计134上的总量------------------------------------------------------------------------------------ //-------统计134上的总量------------------------------------------------------------------------------------
String clusterName = config.esNormalClusterName(); // 获得 137 clusterName
String clusterName = config.esNormalClusterName(); // 获得 134 clusterName
statisticsTotal(clusterName); statisticsTotal(clusterName);
long end = System.currentTimeMillis(); long end = System.currentTimeMillis();
LOGGER.info("Statistics Total, took:{} ms.",(end - start)); LOGGER.info("Statistics Total, took:{} ms.",(end - start));
//-------统计147上的 每个任务的总量------------------------------------------------------------------------- //-------统计147上的 每个任务的总量-------------------------------------------------------------------------
start = System.currentTimeMillis(); start = System.currentTimeMillis();
clusterName = config.esMiniClusterName(); // 获得 147 clusterName clusterName = config.esMiniClusterName(); // 获得 147 clusterName
statisticsTask(clusterName); statisticsTask(clusterName);
end = System.currentTimeMillis(); end = System.currentTimeMillis();
LOGGER.info("Statistics Task, took:{} ms.",(end - start)); LOGGER.info("Statistics Task, took:{} ms.",(end - start));
//-------统计每个专题的量------------------------------------------------------------------------------------ //-------统计每个专题的量------------------------------------------------------------------------------------
start = System.currentTimeMillis(); start = System.currentTimeMillis();
// 如果是正常任务的用这种方式统计 // 如果是正常任务的用这种方式统计
@ -82,6 +88,7 @@ public class StatisticsService {
Map<String,Long> subjectChannelTodayMap = new HashMap<>(); Map<String,Long> subjectChannelTodayMap = new HashMap<>();
long count = 0L; long count = 0L;
long todayCount = 0L; long todayCount = 0L;
// 按渠道统计每个渠道的数据量
for(int i = 0 ; i < 8 ; i ++){ for(int i = 0 ; i < 8 ; i ++){
if(null != taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i)){ if(null != taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i)){
count = taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i); count = taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i);
@ -95,51 +102,52 @@ public class StatisticsService {
} }
switch (i){ switch (i){
case 0: case 0:
subjectChannelMap.put("social",count);
subjectChannelTodayMap.put("social",todayCount);
subjectChannelMap.put(ESConstants.SOCIAL,count);
subjectChannelTodayMap.put(ESConstants.SOCIAL,todayCount);
break; break;
case 1: case 1:
subjectChannelMap.put("news",count);
subjectChannelTodayMap.put("news",todayCount);
subjectChannelMap.put(ESConstants.NEWS,count);
subjectChannelTodayMap.put(ESConstants.NEWS,todayCount);
break; break;
case 2: case 2:
subjectChannelMap.put("blog",count);
subjectChannelTodayMap.put("blog",todayCount);
subjectChannelMap.put(ESConstants.BLOG,count);
subjectChannelTodayMap.put(ESConstants.BLOG,todayCount);
break; break;
case 3: case 3:
subjectChannelMap.put("bbs",count);
subjectChannelTodayMap.put("bbs",todayCount);
subjectChannelMap.put(ESConstants.BBS,count);
subjectChannelTodayMap.put(ESConstants.BBS,todayCount);
break; break;
case 4: case 4:
subjectChannelMap.put("video",count);
subjectChannelTodayMap.put("video",todayCount);
subjectChannelMap.put(ESConstants.VIDEO,count);
subjectChannelTodayMap.put(ESConstants.VIDEO,todayCount);
break; break;
case 5: case 5:
subjectChannelMap.put("item",count);
subjectChannelTodayMap.put("item",todayCount);
subjectChannelMap.put(ESConstants.ITEM,count);
subjectChannelTodayMap.put(ESConstants.ITEM,todayCount);
break; break;
case 6: case 6:
subjectChannelMap.put("search",count);
subjectChannelTodayMap.put("search",todayCount);
subjectChannelMap.put(ESConstants.SEARCH,count);
subjectChannelTodayMap.put(ESConstants.SEARCH,todayCount);
break; break;
case 7: case 7:
subjectChannelMap.put("lief",count);
subjectChannelTodayMap.put("lief",todayCount);
subjectChannelMap.put(ESConstants.LIFE,count);
subjectChannelTodayMap.put(ESConstants.LIFE,todayCount);
break; break;
} }
} }
// 按采集方式统计数据量
Map<String,Long> subjectCrawlDatFlagMap = new HashMap<>(); Map<String,Long> subjectCrawlDatFlagMap = new HashMap<>();
Map<String,Long> subjectCrawlDataFlagTodayMap = new HashMap<>(); Map<String,Long> subjectCrawlDataFlagTodayMap = new HashMap<>();
long siteCount = 0L; long siteCount = 0L;
long siteTodayCount = 0L; long siteTodayCount = 0L;
for(int i = 0 ; i < 8 ; i ++) {
if(null != taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId, i)) {
siteCount = taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId, i);
for(int i = 0 ; i < 4 ; i ++) {
if(null != taskRepository.findDataTotalBySbujectIdAndTaskType(subjectId, i)) {
siteCount = taskRepository.findDataTotalBySbujectIdAndTaskType(subjectId, i);
}else{ }else{
siteCount = 0; siteCount = 0;
} }
if(null != taskRepository.findTodayDataTotalBySbujectIdAndSiteType(subjectId, i)) {
siteTodayCount = taskRepository.findTodayDataTotalBySbujectIdAndSiteType(subjectId, i);
if(null != taskRepository.findTodayDataTotalBySbujectIdAndTaskType(subjectId, i)) {
siteTodayCount = taskRepository.findTodayDataTotalBySbujectIdAndTaskType(subjectId, i);
}else{ }else{
siteTodayCount = 0; siteTodayCount = 0;
} }
@ -156,6 +164,10 @@ public class StatisticsService {
subjectCrawlDatFlagMap.put("url", siteCount); subjectCrawlDatFlagMap.put("url", siteCount);
subjectCrawlDataFlagTodayMap.put("url", siteTodayCount); subjectCrawlDataFlagTodayMap.put("url", siteTodayCount);
break; break;
case 3:
subjectCrawlDatFlagMap.put("upload", siteCount);
subjectCrawlDataFlagTodayMap.put("upload", siteTodayCount);
break;
} }
} }
totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap); totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap);
@ -171,7 +183,7 @@ public class StatisticsService {
boolean result = true; boolean result = true;
try{ try{
// LOGGER.info("[StatisticsService] statisticsSubject start... subjectId : " + subjectId); // LOGGER.info("[StatisticsService] statisticsSubject start... subjectId : " + subjectId);
String indexName = indexPre + subjectId;
String indexName = config.getIndexNamePre() + subjectId;
// 统计这个专题下每个渠道的总量 // 统计这个专题下每个渠道的总量
Map<String,Long> subjectChannelMap = esQueryMiniService.getSubjectChannelStatistics(miniName,indexName); Map<String,Long> subjectChannelMap = esQueryMiniService.getSubjectChannelStatistics(miniName,indexName);
// 统计这个专题下每个渠道的增量 // 统计这个专题下每个渠道的增量
@ -214,6 +226,13 @@ public class StatisticsService {
public boolean statisticsTask(String miniName){ public boolean statisticsTask(String miniName){
boolean result = true; boolean result = true;
try{ try{
// 第一步需要将 今天之前已完成 任务的 today_data_total 改成 0 update_time
long current = System.currentTimeMillis();
long zero = current/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset();
String updateTime = DateUtil.parseDateByTime(zero);
System.out.println("----- "+ updateTime);
taskRepository.updateTodayTotalCount(updateTime);
EsQueryMiniService esQueryMiniService = new EsQueryMiniService(); EsQueryMiniService esQueryMiniService = new EsQueryMiniService();
// 从库中查出当前任务表中的所有任务 id 和对应的 cid,cralwDataFlag 以及 subjectId // 从库中查出当前任务表中的所有任务 id 和对应的 cid,cralwDataFlag 以及 subjectId
List<Task> taskList = taskRepository.findAllBydel0(); List<Task> taskList = taskRepository.findAllBydel0();
@ -221,15 +240,20 @@ public class StatisticsService {
for (Task task: taskList) { for (Task task: taskList) {
Long taskId = task.getId().longValue(); Long taskId = task.getId().longValue();
String crawlDataFlag = task.getCrawlDataFlag(); String crawlDataFlag = task.getCrawlDataFlag();
Map<String,Long> countMap = esQueryMiniService.getTaskCount(miniName,taskId,task,crawlDataFlag);
// 直接更新 cl_task 表中的 data_total today_data_total
long totalCount = 0L;
long todayCount = 0L;
if(countMap.containsKey("totalCount") && countMap.containsKey("todayCount")) {
totalCount = countMap.get("totalCount");
todayCount = countMap.get("todayCount");
String indexNamePre = config.getIndexNamePre();
Map<String, Long> countMap = new HashMap<>();
if(null != task.getCid() && !task.getCid().equals("test")) {
countMap = esQueryMiniService.getTaskCount(miniName, taskId, task, crawlDataFlag, indexNamePre);
// 直接更新 cl_task 表中的 data_total today_data_total
long totalCount = 0L;
long todayCount = 0L;
if(countMap.containsKey("totalCount") && countMap.containsKey("todayCount")) {
totalCount = countMap.get("totalCount");
todayCount = countMap.get("todayCount");
}
taskRepository.updateTaskCount(taskId,totalCount,todayCount);
} }
taskRepository.updateTaskCount(taskId,totalCount,todayCount );
} }
LOGGER.info("[StatisticsService] statisticsTask finish ..."); LOGGER.info("[StatisticsService] statisticsTask finish ...");
}catch (Exception e){ }catch (Exception e){

21
cl_query_data_job/src/main/java/com/bfd/mf/job/service/TotalCountService.java → cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java

@ -1,10 +1,12 @@
package com.bfd.mf.job.service;
package com.bfd.mf.job.service.statistics;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.domain.entity.ResultDetail;
import com.bfd.mf.job.domain.entity.SubjectCount; import com.bfd.mf.job.domain.entity.SubjectCount;
import com.bfd.mf.job.domain.repository.ResultDetailRepository; import com.bfd.mf.job.domain.repository.ResultDetailRepository;
import com.bfd.mf.job.domain.repository.SubjectCountRepository; import com.bfd.mf.job.domain.repository.SubjectCountRepository;
import com.bfd.mf.job.service.statistics.StatisticsService;
import com.bfd.mf.job.util.DateUtil;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -85,10 +87,16 @@ public class TotalCountService {
if(null != subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT)) { if(null != subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT)) {
account_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT)); account_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT));
} }
if(null != subjectCrawlDataFlagMap.get("upload")){
System.out.println("上传的任务的数据量对应的专题 " + subjectId +" == "+ subjectCrawlDataFlagMap.get("upload"));
if(subjectCrawlDataFlagMap.get("upload").compareTo(0L) >0)
sum = subjectCrawlDataFlagMap.get("upload");
}
} }
subjectCount.setUrlTypeTotalCount(url_type_total_count); subjectCount.setUrlTypeTotalCount(url_type_total_count);
subjectCount.setKeywordTypeTotalCount(keyword_type_total_count); subjectCount.setKeywordTypeTotalCount(keyword_type_total_count);
subjectCount.setAccountTypeTotalCount(account_type_total_count); subjectCount.setAccountTypeTotalCount(account_type_total_count);
if (subjectCrawlDataFlagTodayMap.size() > 0) { if (subjectCrawlDataFlagTodayMap.size() > 0) {
if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.URL)) { if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.URL)) {
url_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.URL)); url_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.URL));
@ -188,6 +196,15 @@ public class TotalCountService {
social_total_count, social_count, bbs_total_count, bbs_count, blog_total_count, blog_count, news_total_count, news_count, social_total_count, social_count, bbs_total_count, bbs_count, blog_total_count, blog_count, news_total_count, news_count,
search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count); search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count);
} else { } else {
// 先获取这个专题昨天的 update_time ,然后添加到 UpdateTime 字段中
// 获取昨天日期
String yesterday = DateUtil.parseDateByday(System.currentTimeMillis() - 1000 * 60 * 60 * 24);
Date updateTime = subjectCountRepository.getUpdateTimeBySubjectId(subjectId,yesterday);
if(updateTime != null){
subjectCount.setUpdateTime(updateTime);
}else {
subjectCount.setUpdateTime(new Date());
}
subjectCountRepository.save(subjectCount); subjectCountRepository.save(subjectCount);
} }
}catch ( Exception e){ }catch ( Exception e){

311
cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java

@ -0,0 +1,311 @@
package com.bfd.mf.job.service.taskCount;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.domain.entity.Task;
import com.bfd.mf.job.domain.entity.TaskCount;
import com.bfd.mf.job.domain.repository.SubjectRepository;
import com.bfd.mf.job.domain.repository.TaskCountRepository;
import com.bfd.mf.job.domain.repository.TaskRepository;
import com.bfd.mf.job.service.es.EsQueryMiniService;
import com.bfd.mf.job.service.es.EsQueryNormalService;
import com.bfd.mf.job.service.statistics.TotalCountService;
import com.bfd.mf.job.util.DateUtil;
import com.bfd.mf.job.util.EsUtils;
import kafka.utils.Json;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.math.BigInteger;
import java.text.DecimalFormat;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
@Service
public class TaskCountService {
private static final Logger LOGGER = LoggerFactory.getLogger(TaskCountService.class);
@Autowired
private AppConfig config;
@Autowired
private TaskRepository taskRepository;
@Autowired
private TaskCountRepository taskCountRepository;
@PostConstruct
public void init() {
// 注册数据查询来源
EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source
EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target
}
/*
尝试在指定时间内获得许可如果获得了则直接返回如果没有获得则执行下面的流程
*/
public void tryAcquire() {
long start = System.currentTimeMillis();
LOGGER.info("------------------------------------------------------------------ TaskCountService ------------------------------------------------------");
// 获取当天 0 ~当天12点的时间
long current = System.currentTimeMillis();
long zero = current/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset();
long twelve=zero+24*60*60*1000-1;//今天23点59分59秒的毫秒数
// 任务的统计统计的是前一天的因此
long newZero = zero - 24*60*60*1000;
long newTweleve = twelve - 24*60*60*1000;
String taskStartTime = DateUtil.parseDateByTime(newZero);
String taskEndTime = DateUtil.parseDateByTime(newTweleve);
System.out.println(taskStartTime + " -- " + taskEndTime);
// 直接查 cl_task 表中开始时间和结束时间再当天范围内的任务的today_count;
/**
* 1.只统计单次任务
2.只统计结束时间都在当天的任务去掉开始时间以任务完成时间为判断
3.任务的数据量大于0拿这一条过滤掉不合理的任务
*/
Long sumTodayTotalCount = taskRepository.findTodayDataTotal(taskStartTime,taskEndTime);
//Long count = taskRepository.findTodayDataTotalTaskNum();
// List<Map<String,String>> list = taskRepository.findByCrawlTime(taskStartTime,taskEndTime);
// for (Map<String,String> map :list) {
// System.out.println("任务:"+JSONObject.toJSONString(map));
// }
// 获取任务的时间差
List<BigInteger> avgSpeedList = taskRepository.findTaskByCrawlTime(taskStartTime,taskEndTime);
Integer count = avgSpeedList.size();
Long sumSpeedCount = 0L;
for (BigInteger avg: avgSpeedList) {
sumSpeedCount = sumSpeedCount + Long.valueOf(avg.toString());
}
System.out.println("sumTodayTotalCount : "+sumTodayTotalCount);
String avgSpeed = "0";
String avgCount = "0";
if(sumSpeedCount != null && sumTodayTotalCount != null) {
avgSpeed = String.format("%.1f", sumSpeedCount * 1.0 / count);
avgCount = String.format("%.1f", sumTodayTotalCount * 1.0 / count);
System.out.println("任务数量:" + count + "; 当天的新增总量: " + sumTodayTotalCount + "; 平均任务量:" + avgCount + "; 平均速度:" + avgSpeed);
}
// 将结果插入到 cl_task_count 表中
String day = taskStartTime.split(" ")[0];
TaskCount taskCount = new TaskCount();
taskCount.setCountDate(day);
taskCount.setAvgCount(Float.valueOf(avgCount));
taskCount.setAvgSpeed(Float.valueOf(avgSpeed));
System.out.println(JSONObject.toJSONString(taskCount));
taskCountRepository.save(taskCount);
long end = System.currentTimeMillis();
LOGGER.info("TaskCountService finish, took:{} ms.",(end - start));
// //-------统计147上的 每个任务的总量-------------------------------------------------------------------------
// start = System.currentTimeMillis();
// clusterName = config.esMiniClusterName(); // 获得 147 clusterName
// statisticsTask(clusterName);
// end = System.currentTimeMillis();
// LOGGER.info("Statistics Task, took:{} ms.",(end - start));
//
// //-------统计每个专题的量------------------------------------------------------------------------------------
// start = System.currentTimeMillis();
// // 如果是正常任务的用这种方式统计
// List<BigInteger> subjectIds = subjectRepository.findAllSubjectIds();
// for (BigInteger subjectId: subjectIds) {
// statisticsSubjectBySumTask(subjectId);
// }
// end = System.currentTimeMillis();
// LOGGER.info("Statistics Subject Normal, took:{} ms.",(end - start));
// // 如果是欧莱雅任务的得用这个方式统计呀
// start = System.currentTimeMillis();
// List<BigInteger> subjectIds1 = subjectRepository.findAllOlySubjectIds();
// for (BigInteger subjectId: subjectIds1) {
// statisticsSubject(subjectId,clusterName);
// }
// end = System.currentTimeMillis();
// LOGGER.info("Statistics Subject OLY, took:{} ms.",(end - start));
}
// private boolean statisticsSubjectBySumTask(BigInteger subjectId) {
// boolean result = true;
// try{
// Map<String,Long> subjectChannelMap = new HashMap<>();
// Map<String,Long> subjectChannelTodayMap = new HashMap<>();
// long count = 0L;
// long todayCount = 0L;
// // 按渠道统计每个渠道的数据量
// for(int i = 0 ; i < 8 ; i ++){
// if(null != taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i)){
// count = taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i);
// }else{
// count = 0;
// }
// if(null != taskRepository.findTodayDataTotalBySbujectIdAndSiteType(subjectId,i)) {
// todayCount = taskRepository.findTodayDataTotalBySbujectIdAndSiteType(subjectId, i);
// }else{
// todayCount = 0;
// }
// switch (i){
// case 0:
// subjectChannelMap.put("social",count);
// subjectChannelTodayMap.put("social",todayCount);
// break;
// case 1:
// subjectChannelMap.put("news",count);
// subjectChannelTodayMap.put("news",todayCount);
// break;
// case 2:
// subjectChannelMap.put("blog",count);
// subjectChannelTodayMap.put("blog",todayCount);
// break;
// case 3:
// subjectChannelMap.put("bbs",count);
// subjectChannelTodayMap.put("bbs",todayCount);
// break;
// case 4:
// subjectChannelMap.put("video",count);
// subjectChannelTodayMap.put("video",todayCount);
// break;
// case 5:
// subjectChannelMap.put("item",count);
// subjectChannelTodayMap.put("item",todayCount);
// break;
// case 6:
// subjectChannelMap.put("search",count);
// subjectChannelTodayMap.put("search",todayCount);
// break;
// case 7:
// subjectChannelMap.put("lief",count);
// subjectChannelTodayMap.put("lief",todayCount);
// break;
// }
// }
// // 按采集方式统计数据量
// Map<String,Long> subjectCrawlDatFlagMap = new HashMap<>();
// Map<String,Long> subjectCrawlDataFlagTodayMap = new HashMap<>();
// long siteCount = 0L;
// long siteTodayCount = 0L;
// for(int i = 0 ; i < 4 ; i ++) {
// if(null != taskRepository.findDataTotalBySbujectIdAndTaskType(subjectId, i)) {
// siteCount = taskRepository.findDataTotalBySbujectIdAndTaskType(subjectId, i);
// }else{
// siteCount = 0;
// }
// if(null != taskRepository.findTodayDataTotalBySbujectIdAndTaskType(subjectId, i)) {
// siteTodayCount = taskRepository.findTodayDataTotalBySbujectIdAndTaskType(subjectId, i);
// }else{
// siteTodayCount = 0;
// }
// switch (i) {
// case 0:
// subjectCrawlDatFlagMap.put("keyword", siteCount);
// subjectCrawlDataFlagTodayMap.put("keyword", siteTodayCount);
// break;
// case 1:
// subjectCrawlDatFlagMap.put("account", siteCount);
// subjectCrawlDataFlagTodayMap.put("account", siteTodayCount);
// break;
// case 2:
// subjectCrawlDatFlagMap.put("url", siteCount);
// subjectCrawlDataFlagTodayMap.put("url", siteTodayCount);
// break;
// case 3:
// subjectCrawlDatFlagMap.put("upload", siteCount);
// subjectCrawlDataFlagTodayMap.put("upload", siteTodayCount);
// break;
// }
// }
// totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap);
// }catch (Exception e){
// result = false;
// LOGGER.error("[StatisticsService] statisticsSubject ERROR... subjectId : " + subjectId + "error : " );
// e.printStackTrace();
// }
// return result;
// }
//
// public boolean statisticsSubject(BigInteger subjectId, String miniName){
// boolean result = true;
// try{
// // LOGGER.info("[StatisticsService] statisticsSubject start... subjectId : " + subjectId);
// String indexName = config.getIndexNamePre() + subjectId;
// // 统计这个专题下每个渠道的总量
// Map<String,Long> subjectChannelMap = esQueryMiniService.getSubjectChannelStatistics(miniName,indexName);
// // 统计这个专题下每个渠道的增量
// Map<String,Long> subjectChannelTodayMap = esQueryMiniService.getSubjectChannelTodayStatistics(miniName,indexName);
// // 统计这个专题下每种采集类型的总量
// Map<String,Long> subjectCrawlDatFlagMap = esQueryMiniService.getSubjectCrawlDataFlagStatistics(miniName,indexName);
// // 统计这个专题下每种采集类型的增量
// Map<String,Long> subjectCrawlDataFlagTodayMap = esQueryMiniService.getSubjectCrawlDataFlagTodayStatistics(miniName,indexName);
// // 查入或修改表
// totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap);
// totalCountService.updateResultDetil(subjectId,subjectChannelMap);
// }catch (Exception e){
// result = false;
// LOGGER.error("[StatisticsService] statisticsSubject ERROR... subjectId : " + subjectId + "error : " );
// e.printStackTrace();
// }
// return result;
// }
//
// public boolean statisticsTotal(String normalName){
// boolean result = true;
// try{
// LOGGER.info("[StatisticsService] statisticsTotal start... ");
// EsQueryNormalService esQueryNormalService = new EsQueryNormalService();
// // 查询 全局数据 每种类型的统计结果
// Map<String,Long> channelMap = esQueryNormalService.getChannelStatistics(normalName);
// Map<String,Long> channelTodayMap = esQueryNormalService.getChannelTodayStatistics(normalName);
// Map<String,Long> crawlDataFlagMap = esQueryNormalService.getCrawlDataFlagStatistics(normalName);
// Map<String,Long> crawlDataFlagTodayMap = esQueryNormalService.getCrawlDataFlagTodayStatistics(normalName);
// BigInteger subjectId = new BigInteger("0");
// totalCountService.updateSubjectCount(subjectId,channelMap,channelTodayMap,crawlDataFlagMap,crawlDataFlagTodayMap);
// }catch (Exception e){
// result = false;
// LOGGER.error("[StatisticsService] statisticsTotal ERROR... ");
// e.printStackTrace();
// }
// return result;
// }
//
// public boolean statisticsTask(String miniName){
// boolean result = true;
// try{
// // 第一步需要将 今天之前已完成 任务的 today_data_total 改成 0 update_time
//
//
// EsQueryMiniService esQueryMiniService = new EsQueryMiniService();
// // 从库中查出当前任务表中的所有任务 id 和对应的 cid,cralwDataFlag 以及 subjectId
// List<Task> taskList = taskRepository.findAllBydel0();
// // 遍历任务List 根据条件组装ES查询语句去对应的索引下查结果然后回写到任务表中
// for (Task task: taskList) {
// Long taskId = task.getId().longValue();
// String crawlDataFlag = task.getCrawlDataFlag();
// String indexNamePre = config.getIndexNamePre();
// Map<String, Long> countMap = new HashMap<>();
// if(null != task.getCid() && !task.getCid().equals("test")) {
// countMap = esQueryMiniService.getTaskCount(miniName, taskId, task, crawlDataFlag, indexNamePre);
// // 直接更新 cl_task 表中的 data_total today_data_total
// long totalCount = 0L;
// long todayCount = 0L;
// if(countMap.containsKey("totalCount") && countMap.containsKey("todayCount")) {
// totalCount = countMap.get("totalCount");
// todayCount = countMap.get("todayCount");
// }
// taskRepository.updateTaskCount(taskId,totalCount,todayCount);
// }
//
// }
// LOGGER.info("[StatisticsService] statisticsTask finish ...");
// }catch (Exception e){
// e.printStackTrace();
// result = false;
// LOGGER.error("[StatisticsService] statisticsTask ERROR...");
// }
// return result;
// }
}

545
cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadExcelService.java

@ -0,0 +1,545 @@
package com.bfd.mf.job.service.upload;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.AllKeys;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.config.ESConstants;
import com.bfd.mf.job.download.DownLoadFile;
import com.bfd.mf.job.util.DataCheckUtil;
import com.bfd.mf.job.util.EsUtils2;
import com.bfd.mf.job.util.ReadLine;
import com.monitorjbl.xlsx.StreamingReader;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.assertj.core.util.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.*;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import static com.bfd.mf.job.config.ESConstants.INDEX_TYPE;
@Service
public class UpLoadExcelService {
private static final Logger LOGGER = LoggerFactory.getLogger(UpLoadExcelService.class);
private static BlockingQueue<EsUtils2.BulkItem> DATA_CACHE = new LinkedBlockingQueue<>(10240);
@PostConstruct
public void init() {
// 注册数据查询来源
String address []= {config.getEsMini().get("address").toString()};
EsUtils2.registerCluster(config.getEsMini().get("name").toString(), address);// 配置文件中的 es-source
}
@Autowired
private AppConfig config;
/**
* 解析 Excel 中的数据并将数据处理后写入到 对应subjectId 的ES索引中
* 目前是一条一条写这块需要优化哦
*/
// public int parseExcel(String subjectId ,String path ,String excelName, Map<String, List<String>> fileNameMap,String crawlDataFlag) {
// LOGGER.info("UpLoadExcelService [parseExcel] parseExcel start ...");
// int dataCount = 0;
// try{
// XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(path+excelName));
// //获取每一个工作簿的数据
// long start = System.currentTimeMillis();
// for (int i = 0; i < xssfWorkbook.getNumberOfSheets(); i++) {
// XSSFSheet sheet = xssfWorkbook.getSheetAt(i);
// int rowNum = sheet.getLastRowNum();
// int cellNum = sheet.getRow(0).getLastCellNum();
// if(cellNum < 22){
// return dataCount;
// }
// dataCount = rowNum ;
// List<JSONObject> resultList = new ArrayList<>();
// for (int row = 1; row <= rowNum; row++) {
// JSONObject resultJson = new JSONObject();
// Map<String, Object> resultMap = AllKeys.getMap();
// String dataId = String.valueOf(sheet.getRow(row).getCell(0));
// resultMap.put(ESConstants.DATA_ID,dataId);
// resultMap.put(ESConstants._ID,dataId);
// resultMap.put(ESConstants.DOC_ID,String.valueOf(sheet.getRow(row).getCell(1)));
// resultMap.put(ESConstants.CHANNEL,String.valueOf(sheet.getRow(row).getCell(2)));
// resultMap.put(ESConstants.SOURCE,String.valueOf(sheet.getRow(row).getCell(3)));
// resultMap.put(ESConstants.EN_SOURCE,String.valueOf(sheet.getRow(row).getCell(4)));
// resultMap.put(ESConstants.URL,String.valueOf(sheet.getRow(row).getCell(5)));
// resultMap.put(ESConstants.TITLE,String.valueOf(sheet.getRow(row).getCell(6)));
// resultMap.put(ESConstants.TRANSLATETITLE,String.valueOf(sheet.getRow(row).getCell(7)));
// // 发表时间的 4 个字段
// String pubTimeStr = String.valueOf(sheet.getRow(row).getCell(8));
// long pubTime = DataCheckUtil.convertDateTotime(pubTimeStr)*1000;
// long pubDay = DataCheckUtil.getDay(pubTime);
// String pubDate = DataCheckUtil.getDate(pubTime);
// resultMap.put(ESConstants.PUBTIME, pubTime);
// resultMap.put(ESConstants.PUBTIMESTR,pubTimeStr);
// resultMap.put(ESConstants.PUBDAY,pubDay);
// resultMap.put(ESConstants.PUBDATE,pubDate);
//
// resultMap.put(ESConstants.AUTHOR,String.valueOf(sheet.getRow(row).getCell(9)));
// resultMap.put(ESConstants.AUTHORID,String.valueOf(sheet.getRow(row).getCell(10)));
// resultMap.put(ESConstants.CONTENT,String.valueOf(sheet.getRow(row).getCell(11)));
// resultMap.put(ESConstants.TRANSLATECONTENT,String.valueOf(sheet.getRow(row).getCell(12)));
// resultMap.put(ESConstants.PRICE,String.valueOf(sheet.getRow(row).getCell(13)));
// resultMap.put(ESConstants.PRODUCTPARAMETER,String.valueOf(sheet.getRow(row).getCell(14)));
// // 抓取时间的 4 个字段
// String crawlTimeStr = String.valueOf(sheet.getRow(row).getCell(15));
// long crawlTime = System.currentTimeMillis() ;
// if(!crawlTimeStr.contains("1970")){
// crawlTime = DataCheckUtil.convertDateTotime(crawlTimeStr)*1000;
// }else{
// crawlTimeStr = DataCheckUtil.getCurrentTime(crawlTime);
// }
// long crawlDay = DataCheckUtil.getDay(crawlTime);
// String crawlDate = DataCheckUtil.getDate(crawlTime);
//
// resultMap.put(ESConstants.CRAWLTIME,crawlTime);
// resultMap.put(ESConstants.CRAWLTIMESTR,crawlTimeStr);
// resultMap.put(ESConstants.CRAWLDAY,crawlDay);
// resultMap.put(ESConstants.CRAWLDATE,crawlDate);
// // crawlDataFlag 这个字段值不用数据中原有的而是要用页面传过来的不然任务查询的时候查不到数据
// resultMap.put(ESConstants.CRAWLDATAFLAG,crawlDataFlag);
// resultMap.put(ESConstants.SYS_SENTIMENT,String.valueOf(sheet.getRow(row).getCell(17)));
// // 提取的关键字字段的值
// XSSFCell hlKeywords = sheet.getRow(row).getCell(18);
// List<String> hl = new ArrayList<>();
// if (null != hlKeywords) {
// if (hlKeywords.toString().equals("[]")) {
// resultMap.put(ESConstants.HL_KEYWORDS, hl);
// } else {
// if (hlKeywords.toString().contains(",")) {
// String hlk[] = hlKeywords.toString().replace("[", "").replace("]", "").replace("\"", "").split(",");
// hl = Arrays.asList(hlk);
// } else {
// String hlk = hlKeywords.toString().replace("[", "").replace("]", "");
// hl.add(hlk);
// }
// }
// }else {
// resultMap.put(ESConstants.HL_KEYWORDS, hl);
// }
// // 转发评论点赞
// String quoteCount = sheet.getRow(row).getCell(19).toString();
// if(quoteCount.equals("")){
// quoteCount = "0";
// }
// resultMap.put("quoteCount",Integer.valueOf(quoteCount));
// String commentsCount = sheet.getRow(row).getCell(20).toString();
// if(commentsCount.equals("")){
// commentsCount = "0";
// }
// resultMap.put("commentsCount",Integer.valueOf(commentsCount));
// String attitudesCount = sheet.getRow(row).getCell(21).toString();
// if(attitudesCount.equals("")){
// attitudesCount = "0";
// }
// resultMap.put("attitudesCount",Integer.valueOf(attitudesCount));
// // 插入时间的 4个字段
// long createTime = System.currentTimeMillis() ;
// resultMap.put(ESConstants.CREATETIME, createTime);
// resultMap.put(ESConstants.CREATETIMESTR, DataCheckUtil.getCurrentTime(createTime));
// resultMap.put(ESConstants.CREATEDAY, DataCheckUtil.getDay(createTime));
// resultMap.put(ESConstants.CREATEDATE, DataCheckUtil.getDate(createTime));
//
// // 根据路径和数据ID读取附件组装附件的字段值
// resultMap = getPathSize(path,dataId,resultMap,fileNameMap);
//
// LOGGER.info("The Result: " + JSONObject.toJSONString(resultMap));
// resultJson.putAll(resultMap);
// resultList.add(resultJson);
// // 一条一条的数据插入
// // uploadData(subjectId,resultJson);
// }
// LOGGER.info("Writer Data To ES totalCount = " + resultList.size());
// long end = System.currentTimeMillis();
// System.out.println(end-start + " === "+resultList.size());
// // 批量的数据插入
//// if(resultList.size() >= 1000) {
//// uploadData(subjectId, resultList);
//// resultList.clear();
//// }
// }
// }catch (Exception e){
// e.printStackTrace();
// dataCount = 0;
// }
// return dataCount;
// }
public Map<String,Object> parseExcel2(String subjectId ,String path ,
String excelName,
Map<String, List<String>> fileNameMap,
String crawlDataFlag)
throws FileNotFoundException {
LOGGER.info("UpLoadExcelService [parseExcel] 222 parseExcel2 start ...");
Map<String,Object> returnMap = new HashMap<>();
int dataCount = 0;
try{
FileInputStream in = new FileInputStream(path+excelName);
Workbook wk = StreamingReader.builder()
.rowCacheSize(100) //缓存到内存中的行数默认是10
.bufferSize(4096) //读取资源时缓存到内存的字节大小默认是1024
.open(in); //打开资源必须可以是InputStream或者是File注意只能打开XLSX格式的文件
Sheet sheet = wk.getSheetAt(0);
//遍历所有的行
List<JSONObject> resultList = new ArrayList<>();
Map<String,Long> pubTimeMap = new HashMap<>();
pubTimeMap.put("min", System.currentTimeMillis());
pubTimeMap.put("max", 0L);
for (Row row : sheet) {
if(row.getRowNum() != 0){
JSONObject resultJson = new JSONObject();
Map<String, Object> resultMap = AllKeys.getMap();
String dataId = String.valueOf(row.getCell(0).getStringCellValue());
resultMap.put(ESConstants.DATA_ID, dataId);
resultMap.put(ESConstants._ID, dataId);
resultMap.put(ESConstants.DOC_ID, String.valueOf(row.getCell(1).getStringCellValue()));
String channel = String.valueOf(row.getCell(2).getStringCellValue());
resultMap.put(ESConstants.CHANNEL,channel );
resultMap.put(ESConstants.DOC_TYPE,getDocType(channel));
resultMap.put(ESConstants.SOURCE, String.valueOf(row.getCell(3).getStringCellValue()));
resultMap.put(ESConstants.EN_SOURCE, String.valueOf(row.getCell(4).getStringCellValue()));
resultMap.put(ESConstants.URL, String.valueOf(row.getCell(5).getStringCellValue()));
resultMap.put(ESConstants.TITLE, String.valueOf(row.getCell(6).getStringCellValue()));
resultMap.put(ESConstants.TRANSLATETITLE, String.valueOf(row.getCell(7).getStringCellValue()));
// 发表时间的 4 个字段
String pubTimeStr = String.valueOf(row.getCell(8).getStringCellValue());
long pubTime = DataCheckUtil.convertDateTotime(pubTimeStr) * 1000;
long pubDay = DataCheckUtil.getDay(pubTime);
String pubDate = DataCheckUtil.getDate(pubTime);
resultMap.put(ESConstants.PUBTIME, pubTime);
if(pubTime < pubTimeMap.get("min")){
pubTimeMap.put("min",pubTime);
}
if(pubTime > pubTimeMap.get("max")){
pubTimeMap.put("max",pubTime);
}
resultMap.put(ESConstants.PUBTIMESTR, pubTimeStr);
resultMap.put(ESConstants.PUBDAY, pubDay);
resultMap.put(ESConstants.PUBDATE, pubDate);
resultMap.put(ESConstants.AUTHOR, String.valueOf(row.getCell(9).getStringCellValue()));
resultMap.put(ESConstants.AUTHORID, String.valueOf(row.getCell(10).getStringCellValue()));
resultMap.put(ESConstants.CONTENT, String.valueOf(row.getCell(11).getStringCellValue()));
resultMap.put(ESConstants.TRANSLATECONTENT, String.valueOf(row.getCell(12).getStringCellValue()));
resultMap.put(ESConstants.PRICE, String.valueOf(row.getCell(13).getStringCellValue()));
resultMap.put(ESConstants.PRODUCTPARAMETER, String.valueOf(row.getCell(14).getStringCellValue()));
// 抓取时间的 4 个字段
String crawlTimeStr = String.valueOf(row.getCell(15).getStringCellValue());
long crawlTime = System.currentTimeMillis();
if (!crawlTimeStr.contains("1970")) {
crawlTime = DataCheckUtil.convertDateTotime(crawlTimeStr) * 1000;
} else {
crawlTimeStr = DataCheckUtil.getCurrentTime(crawlTime);
}
long crawlDay = DataCheckUtil.getDay(crawlTime);
String crawlDate = DataCheckUtil.getDate(crawlTime);
resultMap.put(ESConstants.CRAWLTIME, crawlTime);
resultMap.put(ESConstants.CRAWLTIMESTR, crawlTimeStr);
resultMap.put(ESConstants.CRAWLDAY, crawlDay);
resultMap.put(ESConstants.CRAWLDATE, crawlDate);
// crawlDataFlag 这个字段值不用数据中原有的而是要用页面传过来的不然任务查询的时候查不到数据
resultMap.put(ESConstants.CRAWLDATAFLAG, crawlDataFlag);
resultMap.put(ESConstants.SYS_SENTIMENT, String.valueOf(row.getCell(17).getStringCellValue()));
// 提取的关键字字段的值
String hlKeywords = row.getCell(18).getStringCellValue();
List<String> hl = getHlKeywords(hlKeywords);
resultMap.put(ESConstants.HL_KEYWORDS, hl);
// 转发评论点赞
String quoteCount = row.getCell(19).getStringCellValue();
if (quoteCount.equals("")) {
quoteCount = "0";
}
resultMap.put("quoteCount", Integer.valueOf(quoteCount));
String commentsCount = row.getCell(20).getStringCellValue();
if (commentsCount.equals("")) {
commentsCount = "0";
}
resultMap.put("commentsCount", Integer.valueOf(commentsCount));
String attitudesCount = row.getCell(21).getStringCellValue();
if (attitudesCount.equals("")) {
attitudesCount = "0";
}
resultMap.put("attitudesCount", Integer.valueOf(attitudesCount));
// 图像识别结果
String ocrText = row.getCell(22).getStringCellValue();
List<String> ocrList = getHlKeywords(ocrText);
resultMap.put(ESConstants.OCRTEXT,ocrList);
// 语音识别结果
String asrText = row.getCell(23).getStringCellValue();
resultMap.put(ESConstants.ASRTEXT,asrText);
// 插入时间的 4个字段
long createTime = System.currentTimeMillis();
resultMap.put(ESConstants.CREATETIME, createTime);
resultMap.put(ESConstants.CREATETIMESTR, DataCheckUtil.getCurrentTime(createTime));
resultMap.put(ESConstants.CREATEDAY, DataCheckUtil.getDay(createTime));
resultMap.put(ESConstants.CREATEDATE, DataCheckUtil.getDate(createTime));
// 根据路径和数据ID读取附件组装附件的字段值
resultMap = getPathSize(path, dataId, resultMap, fileNameMap);
LOGGER.info("The Result: " + JSONObject.toJSONString(resultMap));
resultJson.putAll(resultMap);
resultList.add(resultJson);
// 一条一条的数据插入
//uploadData(subjectId,resultJson);
}
if(resultList.size() >=100){
dataCount = dataCount+resultList.size();
uploadData(subjectId, resultList);
resultList.clear();
}
}
// 最后多出来的但是少于100条的数据
dataCount = dataCount + resultList.size();
uploadData(subjectId, resultList);
returnMap.put("pubTimeMap",pubTimeMap);
returnMap.put("dataCount",dataCount);
LOGGER.info("Writer Data To ES totalCount = " + dataCount);
}catch (Exception e){
e.printStackTrace();
}
return returnMap;
}
private List<String> getHlKeywords(String hlKeywords) {
List<String> hl = new ArrayList<>();
if (null != hlKeywords ) {
if (hlKeywords.toString().equals("[]")) {
return hl;
} else {
if (hlKeywords.toString().contains(",")) {
String hlk[] = hlKeywords.toString().replace("[", "").replace("]", "").replace("\"", "").split(",");
hl = Arrays.asList(hlk);
} else {
String hlk = hlKeywords.toString().replace("[", "").replace("]", "");
hl.add(hlk);
}
}
}
return hl;
}
/**
* 有附件的需要上传附件然后替换字段中的附件路径值
*/
private Map<String,Object> getPathSize(String path, String dataId,
Map<String, Object> resultMap,
Map<String, List<String>> fileNameMap) {
// LOGGER.info("UpLoadExcelService [getPathSize] need Download !");
// 判断文件夹是否尊在若不存在 isDownload = false pgc ugc egc 都为0
File file=new File(path+dataId);
resultMap.put("pgc",0);
resultMap.put("ugc",0);
resultMap.put("egc",0);
List<Map<String,String>> filePathSize = new ArrayList<>();
List<Map<String,String>> imagePathSize = new ArrayList<>();
List<Map<String,String>> videoPathSize = new ArrayList<>();
List<String> filePath = new ArrayList<>();
List<String> imagePath = new ArrayList<>();
List<String> videoPath = new ArrayList<>();
if(!file.exists()){//如果文件夹不存在
resultMap .put("isDownload",false);
}else{
resultMap .put("isDownload",true);
List<String> fileNames = fileNameMap.get(dataId);
for (String fileName:fileNames) { // videoPath == egc filePath == ugc imagePath == pgc
// 根据路径读取文件并上传到 go-fast 并根据前缀组装对应的 path pathSize
String goFastUrl = config.getGoFastPostUrl();
// String zipPath = bfdApiConfig.getUploadZipPath();
// String url = DownLoadFile.upload(goFastUrl,dataId+fileName,content);
String file1 = path + dataId + "/" + fileName;
Map urlMap = DownLoadFile.upload(goFastUrl,dataId+fileName,new File(file1));
String url = urlMap.get("path").toString();
Map<String,String> pathMap = new HashMap<>();
pathMap.put("url",url);
// 获取文件的大小
long size = Long.valueOf(urlMap.get("size").toString());
Double newSize =(double)(Math.round(size/1024)/100.0);
pathMap.put("size",newSize+"KB");
// 获取分辨率
String resolution = "";
if(fileName.startsWith("image")) {
if(url.endsWith(".svg")){
resolution = "";
}else {
resolution = ReadLine.getImageDim(file1);
}
}
if(fileName.startsWith("video")){
if(url.endsWith(".mp3")){
resolution = "400*240";
}else if(url.endsWith(".flv")) {
resolution = "";
}else{
resolution = ReadLine.videosize(file1);
}
}
// System.out.println(resolution);
pathMap.put("resolution",resolution);
// 视频的时长
String videoTime = "";
pathMap.put("videoTime",videoTime);
if(fileName.startsWith("file")){
resultMap.put("ugc",1);
filePathSize.add(pathMap);
filePath.add(url);
}
if(fileName.startsWith("image")){
resultMap.put("pgc",1);
imagePathSize.add(pathMap);
imagePath.add(url);
}
if(fileName.startsWith("video")){
resultMap.put("egc",1);
videoPathSize.add(pathMap);
videoPath.add(url);
}
}
}
resultMap.put("filePathSize",JSONObject.toJSONString(filePathSize));
resultMap.put("imagePathSize",JSONObject.toJSONString(imagePathSize));
resultMap.put("videoPathSize",JSONObject.toJSONString(videoPathSize));
resultMap.put("filePath",filePath);
resultMap.put("imagePath",imagePath);
resultMap.put("videoPath",videoPath);
return resultMap;
}
/**
* 读文件将数据导入到ES中
* @param subjectId
*/
public void uploadData(String subjectId, List<String> list,String crawlDataFlag) {
String indexNamePre = config.getIndexNamePre();
String indexName = indexNamePre + subjectId;
for (String l:list) {
String c = l.replace("\\\"","\\\"");
JSONObject data = new JSONObject();
try {
data = JSONObject.parseObject(c);
data.put("crawlDataFlag",crawlDataFlag);
}catch (Exception e){
//e.printStackTrace();
// 数据转json 失败
return;
}
if(data.size() >0) {
try {
DATA_CACHE.put(EsUtils2.buildBulkItem(indexName, INDEX_TYPE, data));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
flushData();
}
/**
* 将传入的一条数据导入到ES中
* @param subjectId
* @param result
*/
public void uploadData(String subjectId, JSONObject result) {
String indexNamePre = config.getIndexNamePre();
String indexName = indexNamePre + subjectId;
try {
if(result.size() >0) {
try {
DATA_CACHE.put(EsUtils2.buildBulkItem(indexName, INDEX_TYPE, result));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}catch (Exception e){
e.printStackTrace();
}
flushData();
}
/**
* 将一个List 的数据导入到ES中
*/
public void uploadData(String subjectId, List<JSONObject> resultList) {
String indexNamePre = config.getIndexNamePre();
String indexName = indexNamePre + subjectId;
for (JSONObject data:resultList) {
if(data.size() >0) {
try {
DATA_CACHE.put(EsUtils2.buildBulkItem(indexName, INDEX_TYPE, data));
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
flushData();
}
public void flushData() {
LOGGER.info("ES flushData");
List<EsUtils2.BulkItem> dataList = Lists.newArrayList();
EsUtils2.BulkItem item = DATA_CACHE.poll();
while (Objects.nonNull(item)) {
if (dataList.size() >= 100) {
EsUtils2.bulkIndex(config.esMiniClusterName(), dataList, "_id_");//"SQ_Mini" config.esMiniClusterName()
LOGGER.debug("Flush data, size:{}.", dataList.size());
dataList.clear();
}
dataList.add(item);
item = DATA_CACHE.poll();
}
if (dataList.size() > 0) {
EsUtils2.bulkIndex(config.esMiniClusterName(), dataList, "_id_");
LOGGER.debug("Flush data, size:{}.", dataList.size());
}
}
public int uploadTxt(String subjectId, String filPath, String crawlDataFlag) {
int dataCount = 0;
try {
List<String> list = ReadLine.readLine(new File(filPath));
dataCount = list.size();
uploadData(subjectId, list, crawlDataFlag);
}catch (Exception e){
e.printStackTrace();
}
return dataCount;
}
private String getDocType (String channel){
Map<String,String> channelMap = new HashMap<>();
channelMap.put("社交媒体","social");
channelMap.put("网络视频","video");
channelMap.put("新闻资讯","news");
channelMap.put("博客智库","blog");
channelMap.put("论坛贴吧","bbs");
channelMap.put("搜索引擎","search");
channelMap.put("电商网站","item");
channelMap.put("生活方式","life");
return channelMap.get(channel);
}
}

214
cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadService.java

@ -0,0 +1,214 @@
package com.bfd.mf.job.service.upload;
import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.domain.entity.UploadTask;
import com.bfd.mf.job.domain.repository.UploadTaskRepository;
import com.bfd.mf.job.util.EsUtils;
import com.bfd.mf.job.util.ZipUtils;
import com.google.common.collect.Maps;
import org.assertj.core.util.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import javax.annotation.PostConstruct;
import java.io.*;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
@Service
public class UpLoadService {
private static final Logger LOGGER = LoggerFactory.getLogger(UpLoadService.class);
private static BlockingQueue<Map<Long, List<? extends Number>>> P_TASK_CACHE_RANGE = new LinkedBlockingQueue<>();
@Autowired
private AppConfig config;
@Autowired
private UploadTaskRepository uploadTaskRepository;
@Autowired
private UpLoadExcelService upLoadExcelService;
@PostConstruct
public void init() {
// 注册数据查询来源
EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target
}
public void tryAcquire() {
// 获取 task_type 3 crawl_status = 0 的任务进行上传获取到后先将状态改成1 表示正在上传
List<UploadTask> taskList2 = uploadTaskRepository.getTaskNeedUpLoad();
for (UploadTask task : taskList2) {
Map<Long, List<? extends Number>> cache = Maps.newHashMap();
long taskId = task.getId().longValue();
cache.put(taskId, Lists.newArrayList(0L, 0L, 0, 1, 1));
try { // 将数据库中任务的状态暂时改为 4
uploadTaskRepository.updateCrawlStatus(taskId,1,0,0,0);
P_TASK_CACHE_RANGE.put(cache);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}
public void produce() {
Map<Long, List<? extends Number>> range = P_TASK_CACHE_RANGE.poll();// poll -->若队列为空返回null
if (Objects.isNull(range)) {
return;
}
long taskId = 0L;
for (Map.Entry<Long, List<? extends Number>> entry : range.entrySet()) {
entry.getValue();
taskId = entry.getKey();
}
// 这块可能需要改一下因为 site_type 需要关联表才能拿到哦
UploadTask task = uploadTaskRepository.findById(taskId).get();
LOGGER.info("开始上传的任务是:" + JSONObject.toJSONString(task));
String subjectId = task.getSubjectId().toString();
String crawlDataFlag = task.getCrawlDataFlag();
String zipPath = config.getUploadZipPath();
String zipName = task.getFileName();
String zipFileName = ZipUtils.getZipFileName(zipName,zipPath);// zip解压到指定的文件夹中名字与 zip 名保持一致
// 根据数据库中 fileName 可知已经上传的文件的名称从配置文件中获取文件的存储路径组装后拿到文件开始解压
// 解压zip ,校验数据进行上传
Map<String, List<String>> fileNameMap = ZipUtils.unZip(new File(zipPath+zipName),zipPath+zipFileName);
// fileNameMap 是解压后的所有文件名称的 Map ,如果是 Excel
if(fileNameMap.size() == 0){
// 解压后的文件中没有东西或者找不到 zip 文件将状态改成5
uploadTaskRepository.updateCrawlStatus(taskId,5,0,0,0);
LOGGER.error("[上传失败] 已经上传完的任务是:" + taskId + " ,但是解析文件失败,得将 crawl_status 改成5 !");
}else {
String fileName = fileNameMap.get("excelName").get(0);
fileNameMap.remove("excelName");
int dataTotal = 0;
Map<String,Long> pubTimeMap = new HashMap<>();
if (fileName.contains("xlsx")) { // 传的是Excel
LOGGER.info("上传的是 Excel 类型的数据");
try {
Map<String,Object> returnMap = upLoadExcelService.parseExcel2(subjectId, zipPath + zipFileName + "/", fileName, fileNameMap, crawlDataFlag);
dataTotal = Integer.valueOf(returnMap.get("dataCount").toString());
pubTimeMap = (Map<String, Long>) returnMap.get("pubTimeMap");
}catch (Exception e){
e.printStackTrace();
}
} else { // 传的是文本数据
LOGGER.info("上传的是 非 Excel 类型的数据");
String theFinalFilePath = zipPath + zipFileName + "/" + fileName;
dataTotal = upLoadExcelService.uploadTxt(subjectId, theFinalFilePath, crawlDataFlag);
}
// 完成后将数据库中 crawl_status改为3 表示完成
if (dataTotal == 0) {
LOGGER.error("[上传失败] 已经上传完的任务是:" + taskId + " , 但是写入成功的数据是0条,得将 crawl_status 改成5 !");
uploadTaskRepository.updateCrawlStatus(taskId, 5, dataTotal,0,0);
} else {
LOGGER.info("[上传成功] 已经上传完的任务是:" + taskId + " ,可以将 crawl_status 改成3 了!");
// 这块改状态之前应该先 sleep 一下因为数据写ES是有一定延时的
try {
Thread.sleep(30000);
} catch (InterruptedException e) {
e.printStackTrace();
}
long pubMin = pubTimeMap.get("min");
long pubMax = pubTimeMap.get("max");
uploadTaskRepository.updateCrawlStatus(taskId, 3, dataTotal,pubMin,pubMax);
}
}
}
// public static void unZipGetFileType(File srcFile) throws RuntimeException {
// // 判断源文件是否存在
// if (!srcFile.exists()) {
// throw new RuntimeException(srcFile.getPath() + "所指文件不存在");
// }
//
// // 开始解压
// ZipFile zipFile = null;
// try {
// zipFile = new ZipFile(srcFile);
// Enumeration<?> entries = zipFile.entries();
// ZipEntry entry = (ZipEntry) entries.nextElement();
// String fileName = entry.getName();
// System.out.println(fileName);
// String substring = fileName.substring(fileName.lastIndexOf(".")+1, fileName.length());
// System.out.println(substring);
//
// } catch (Exception e) {
// throw new RuntimeException("unzip error from ZipUtils", e);
// } finally {
// if (zipFile != null) {
// try {
// zipFile.close();
// } catch (IOException e) {
// e.printStackTrace();
// }
// }
// }
// }
//
// public static void unZip(File srcFile, String destDirPath) throws RuntimeException {
// long start = System.currentTimeMillis();
// // 判断源文件是否存在
// if (!srcFile.exists()) {
// throw new RuntimeException(srcFile.getPath() + "所指文件不存在");
// }
//
// // 开始解压
// ZipFile zipFile = null;
// try {
// zipFile = new ZipFile(srcFile);
// Enumeration<?> entries = zipFile.entries();
// while (entries.hasMoreElements()) {
// ZipEntry entry = (ZipEntry) entries.nextElement();
// System.out.println("解压" + entry.getName());
// // 如果是文件夹就创建个文件夹
// if (entry.isDirectory()) {
// String dirPath = destDirPath + "/" + entry.getName();
// File dir = new File(dirPath);
// dir.mkdirs();
// } else {
// // 如果是文件就先创建一个文件然后用io流把内容copy过去
// File targetFile = new File(destDirPath + "/" + entry.getName());
// // 保证这个文件的父文件夹必须要存在
// if (!targetFile.getParentFile().exists()) {
// targetFile.getParentFile().mkdirs();
// }
// targetFile.createNewFile();
// // 将压缩文件内容写入到这个文件中
// InputStream is = zipFile.getInputStream(entry);
// FileOutputStream fos = new FileOutputStream(targetFile);
// int len;
// byte[] buf = new byte[1024];
// while ((len = is.read(buf)) != -1) {
// fos.write(buf, 0, len);
// }
// // 关流顺序先打开的后关闭
// fos.close();
// is.close();
// }
// }
// long end = System.currentTimeMillis();
// System.out.println("解压完成,耗时:" + (end - start) + " ms");
// } catch (Exception e) {
// throw new RuntimeException("unzip error from ZipUtils", e);
// } finally {
// if (zipFile != null) {
// try {
// zipFile.close();
// } catch (IOException e) {
// e.printStackTrace();
// }
// }
// }
// }
}

321
cl_query_data_job/src/main/java/com/bfd/mf/job/util/DataCheckUtil.java

@ -0,0 +1,321 @@
package com.bfd.mf.job.util;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class DataCheckUtil {
public static Pattern datePattrn = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}\\s\\d{2}\\:\\d{2}:\\d{2}$");
public static Pattern dayPattrn = Pattern.compile("^\\d{2,4}\\-\\d{1,2}\\-\\d{1,2}$");
private static SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
public static Pattern p = Pattern.compile("\\s+");
private static final Logger LOG = Logger.getLogger(DataCheckUtil.class);
public static String chechData2(String dataStr){
dataStr = dataStr.replace("Z","");
dataStr = checkData(dataStr);
Matcher matcher = datePattrn.matcher(dataStr);
if(!matcher.find()){
System.out.println("格式错误,使用当前时间 : " + dataStr);
dataStr = DateUtil.getDateTime();
}else{
dataStr = matcher.group(0);
}
return dataStr;
}
public static String checkData(String dataStr){
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
if(StringUtils.isBlank(dataStr)){
return ddf.format(new Date());
}
if(dataStr.contains("-:")){
dataStr = dataStr.replace("-:",":");
}
if(dataStr.contains(":-")){
dataStr = dataStr.replace(":-",":");
}
Matcher matcher = datePattrn.matcher(dataStr);
if(!matcher.find()){
dataStr = dataStr.trim();
if(!p.matcher(dataStr).find()){
if(!dayPattrn.matcher(dataStr).find()){
return ddf.format(new Date());
}
}
String[] dates = dataStr.split("\\s+");
String years = "";
String times = "";
if(dates.length == 2){
years = dates[0];
times = dates[1];
}else{
years = dates[0];
}
if(years.contains("/")){
years = years.replace("/", "-");
}
String[] yearStr = years.split("-");
String yms = "" ;
if(yearStr.length == 3){
String year = yearStr[0];
String month = yearStr[1];
String day = yearStr[2];
if(year.length() == 2){
year = "20"+year;
}
if(month.length() == 1){
month = "0"+month;
}
if(day.length() == 1){
day = "0"+day;
}
yms = year+"-"+month+"-"+day;
}
String hms = "";
if(StringUtils.isBlank(times)){
hms = "00:00:00";
}else{
times = times.replace("/", ":");
if(times.contains(":")){
String[] timeStr = times.split(":");
if( timeStr.length >= 3 ){
String hours = timeStr[0];
String mins = timeStr[1];
String s = timeStr[2];
if(hours.length() == 1){
hours = "0"+hours;
}
if(mins.length() == 1){
mins = "0"+mins;
}
if(s.length() == 1){
s = "0"+s;
}
hms = hours+":"+mins+":"+s;
}else if(timeStr.length == 2){
String hours = timeStr[0];
String mins = timeStr[1];
String s = "00";
if(hours.length() == 1){
hours = "0"+hours;
}
if(mins.length() == 1){
mins = "0"+mins;
}
hms = hours+":"+mins+":"+s;
} else {
String hours = timeStr[0];
String mins = "00" ;
String s = "00";
if(hours.length() == 1){
hours = "0"+hours;
}
hms = hours+":"+mins+":"+s;
}
}else{
if(isNum(times) && times.length()==2){
hms = times+":00:00";
}else if(isNum(times) && times.length()==1){
hms = "0"+times+":00:00";
}else{
hms = "00:00:00" ;
}
}
}
if(StringUtils.isBlank(yms)){
return ddf.format(new Date());
}
if(yms != "" || hms != ""){
return yms+" "+hms;
}
}
return dataStr ;
}
private static boolean isNum(String time){
Pattern p = Pattern.compile("\\d+");
if(p.matcher(time).find()){
return true ;
}
return false ;
}
public static String convertStringTotime(String datetime){
if(StringUtils.isBlank(datetime)){
return DateUtil.getDateTime(System.currentTimeMillis());
}
String creationTime = "";
if(datetime.length() == 13){
creationTime = DateUtil.getDateTime(Long.valueOf(datetime));
}else{
creationTime = DateUtil.getDateTime(Long.valueOf(datetime) *1000);
}
return creationTime ;
}
public static long convertStringToLong(String datetime){
if(StringUtils.isBlank(datetime)){
return System.currentTimeMillis();
}
long creationTime ;
if(datetime.length() == 13){
creationTime = Long.valueOf(datetime);
}else{
creationTime = Long.valueOf(datetime) *1000;
}
return creationTime ;
}
public static long convertTimeTotime(String datetime){
if(StringUtils.isBlank(datetime)){
return System.currentTimeMillis() / 1000;
}
long creationTime ;
if(datetime.length() == 13){
creationTime = Long.valueOf(datetime) / 1000;
}else{
creationTime = Long.valueOf(datetime) ;
}
return creationTime ;
}
/**
* String long
*/
public static long convertDateTotime(String datetime){
if(StringUtils.isBlank(datetime)){
return System.currentTimeMillis() / 1000;
}
long creationTime = 0;
try {
if(null != datetime && !("null").equals(datetime)) {
SimpleDateFormat ddf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
creationTime = Long.valueOf(ddf1.parse(datetime).getTime()) / 1000;
}else{
creationTime = new Date().getTime()/1000;
}
} catch (Exception e) {
e.printStackTrace();
}
return creationTime ;
}
/**
* 获取当前的 string 类型时间
*/
public static String getCurrentTime(){
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return ddf.format(new Date());
}
/**
* long string pubTimeStr crawlTimeStr createTimeStr
*/
public static String getCurrentTime(long dateTime){
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return ddf.format(new Date(dateTime));
}
/**
* String long
*/
// long 转为 时间格式为 yyyy-MM-dd'T'HH:mm:ss.SSSXXX 的时间 pubDate crawlDate createDate
public static String getDate(long dateTime){
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
return sdf.format(new Date(dateTime));
}
/**
* String long
*/
// String 转为 时间格式为 yyyy-MM-dd'T'HH:mm:ss.SSSXXX 的时间 pubDate crawlDate createDate
public static String getDate(String dateTime){
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
try {
Date date = ddf.parse(dateTime) ;
return sdf.format(date);
} catch (ParseException e) {
e.printStackTrace();
LOG.error("DataCheckUtil getDate() err data:"+dateTime);
}
return sdf.format(new Date());
}
/**
* String long
*/
public static long getDay(long dateTime){
try{
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String dayStr = sdf.format(new Date(dateTime));
Date date = sdf.parse(dayStr);
return date.getTime();
}catch(Exception e){
e.printStackTrace();
LOG.error("DataCheckUtil getDay() err data:"+dateTime);
}
return 0;
}
/**
* String long
*/
public static long getDay(String dateTime){
try{
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
Date date = sdf.parse(dateTime);
return date.getTime();
}catch(Exception e){
e.printStackTrace();
LOG.error("DataCheckUtil getDay2() err data:"+dateTime);
}
return 0;
}
// public static void main(String[] args) {
// //System.out.println(checkData(""));
// /*System.out.println(System.currentTimeMillis());
// System.out.println(Calendar.getInstance().getTimeInMillis() / 1000);
// System.out.println(new Date().getTime() / 1000);
// System.out.println(DateUtil.getDateTime((System.currentTimeMillis() / 1000) * 1000));
// System.out.println(convertStringTotime("1558077405"));
// System.out.println(convertTimeTotime(null));*/
// //System.out.println(DateUtil.getTimeMillis("2019-03-01 01:01:01"));
//
// /*String aa = DataCheckUtil.convertStringTotime("1563245342");
// System.out.println(aa);*/
// /*SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
// try {
// Date date = sdf.parse("2019-03-01");
// System.out.println(date.getTime());
// } catch (ParseException e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }*/
// System.out.println(getDate("2019-03-01 01:01:01"));
// }
}

365
cl_query_data_job/src/main/java/com/bfd/mf/job/util/DateUtil.java

@ -0,0 +1,365 @@
/*
* Copyright (C) 2016 Baifendian Corporation
* <p>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bfd.mf.job.util;
import com.bfd.nlp.common.util.string.TStringUtils;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
public class DateUtil {
private static final String DATE_UNIT_DAY = "D";
public static final String DATE_UNIT_HOUR = "H";
public static String TIME_FORMAT = "yyyy-MM-dd HH:mm:ss";
public static String DATE_FORMAT = "yyyy-MM-dd";
public static String DATE_FORMAT2 = "yyyy.MM.dd";
/**
* @param startTime 开始时间
* @param endTime 结束时间
* @param unit D H
* @return
*/
public static double getTimeIntervalByUnit(long startTime, long endTime, String unit) {
int interval = 0;
long dateDistance = endTime - startTime;
if (null == unit || dateDistance <= 0)
return -1;
if (DATE_UNIT_DAY.equals(unit))
interval = 24 * 3600 * 1000;
if (DATE_UNIT_HOUR.equals(unit))
interval = 3600 * 1000;
return Math.ceil(dateDistance / interval);
}
/*
* 20160807190815678:yyyyMMddhhmmssSSS
*/
public static String getTimeStrForNow() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS");
return sdf.format(new Date());
}
private static String getTimeStrDefault() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS");
Date date = new Date();
date.setYear(1970);
return sdf.format(date);
}
public static byte[] timeStr2Chars(String timeStr) {
try {
return timeStr.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return new byte[0];
}
public static long timeStr2Long(String timeStr) {
if (TStringUtils.isEmpty(timeStr)) {
String defTm = getTimeStrDefault();
return Long.parseLong(defTm);
}
return Long.parseLong(timeStr);
}
private static Date parseDate(long time) {
return new Date(time);
}
/***
* timestamp to yyyy-MM-dd
*
* @param timestamp
* @return
*/
public static String parseDateByday(long timestamp) {
Date date = parseDate(timestamp);
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT);
return format.format(date);
}
public static String parseDateByday2(long timestamp) {
Date date = parseDate(timestamp);
SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT2);
return format.format(date);
}
/***
* timestamp to yyyy-MM-dd HH:mm:ss
*
* @param timestamp
* @return
*/
public static String parseDateByTime(long timestamp) {
Date date = parseDate(timestamp);
SimpleDateFormat format = new SimpleDateFormat(TIME_FORMAT);
return format.format(date);
}
/**
* timestamp with special format
*
* @param timestamp
* @param format
* @return
*/
public static String parseDateByFormat(long timestamp, String format) {
Date date = parseDate(timestamp);
SimpleDateFormat dateFormat = new SimpleDateFormat(format);
return dateFormat.format(date);
}
/**
* 获取今天是周几
*
* @return 一个表示周几的数字
*/
public static int getDay() {
Calendar cal = Calendar.getInstance();
int day = cal.get(Calendar.DAY_OF_WEEK) - 1;
day = day == 0 ? 7 : day;
return day;
}
/**
* 获取现在是今天的多少秒
*
* @return 一个数字表示现在是今天的多少秒
*/
public static int getSecondsNow() {
Calendar curDate = Calendar.getInstance();
Calendar tommorowDate = new GregorianCalendar(curDate
.get(Calendar.YEAR), curDate.get(Calendar.MONTH), curDate
.get(Calendar.DATE) + 1, 0, 0, 0);
return 24 * 3600 - ((int) (tommorowDate.getTimeInMillis() - curDate.getTimeInMillis()) / 1000);
}
public static class CronDate extends Date {
private int hour;
private int minute;
public CronDate(int h, int m) {
this.hour = h;
this.minute = m;
}
CronDate() {
this.hour = 0;
this.minute = 0;
}
int getHour() {
return hour;
}
void setHour(int hour) {
this.hour = hour;
}
public int getMinute() {
return minute;
}
public void setMinute(int minute) {
this.minute = minute;
}
public boolean before(CronDate date) {
if (null == date) {
return false;
}
if (date.getHour() != this.getHour()) {
return (this.getHour() - date.getHour() < 0);
}
// compare minute
return (this.getMinute() - date.getMinute() < 0);
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
if (hour >= 10) {
sb.append(hour);
} else {
sb.append("0").append(hour);
}
sb.append(":");
if (minute >= 10) {
sb.append(minute);
} else {
sb.append("0").append(minute);
}
return sb.toString();
}
// @Override
// public String toString() {
// Date date = new Date();
// date.setHours(hour);
// date.setMinutes(minute);
// String str = cronDdateFormate.format(date);
// return str;
// }
}
/**
* @param dateStr
* @return
*/
public static CronDate parseDateFromStr(String dateStr) {
if (TStringUtils.isEmpty(dateStr)) {
return null;
}
String[] ts = dateStr.split(":");
if (null == ts || ts.length == 0) {
return null;
}
CronDate date = new CronDate();
for (int i = 0; i < ts.length; i++) {
String s = ts[i];
int num = parseDoubleStr(s);
if (i == 0) {
date.setHour(num);
} else if (i == 1) {
date.setMinute(num);
}
}
return date;
}
/**
* @param st
* @return
*/
private static Integer parseDoubleStr(String st) {
if (TStringUtils.isEmpty(st)) {
return null;
}
while (st.startsWith("0") && st.length() > 1) {
st = st.substring(1);
}
if (TStringUtils.isEmpty(st)) {
return 0;
}
return Integer.parseInt(st);
}
/**
* 获取当前时间的小时数和分钟数
*
* @return
*/
public static int[] getCurrentHourAndMinute() {
int[] dat = new int[2];
Date date = new Date();
dat[0] = date.getHours();
dat[1] = date.getMinutes();
return dat;
}
// public static String extractDataScope(long from, long to, boolean fileName) {
// return fileName ?
// (MfTimeUtil.getCSTDateStr(from, "yyyyMMdd") + "_"
// + MfTimeUtil.getCSTDateStr(to, "yyyyMMdd"))
// : ("[" + MfTimeUtil.getCSTDateStr(from, "yyyy-MM-dd") + " ~ "
// + MfTimeUtil.getCSTDateStr(to, "yyyy-MM-dd") + "]");
//
// }
public static Date stringToDate(String dateStr) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
try {
return sdf.parse(dateStr);
} catch (ParseException e) {
return new Date();
}
}
/**
* 获得服务器当前日期及时间以格式为yyyy-MM-dd HH:mm:ss的日期字符串形式返回
*/
public static String getDateTime(){
try{
SimpleDateFormat datetime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return datetime.format(Calendar.getInstance().getTime());
} catch(Exception e){
//log.debug("DateUtil.getDateTime():" + e.getMessage());
return "";
}
}
/**
* 获得服务器当前日期及时间以格式为yyyy-MM-dd HH:mm:ss的日期字符串形式返回
*/
public static String getDateTime(long date){
try{
SimpleDateFormat datetime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return datetime.format(new Date(date));
} catch(Exception e){
// log.debug("DateUtil.getDateTime():" + e.getMessage());
return "";
}
}
public static long getcurr(){
Date date = new Date();
Long l_date = date.getTime();
return l_date;
}
// public static long getDayStart(long time){
// long zero = time/(1000*3600*24)*(1000*3600*24)- TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数
// long zero2 = time/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset();
// return zero;
// }
// public static long getDayEnd(long time){
// //long zero=time/(1000*3600*24)*(1000*3600*24)- TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数
// long twelve=time+24*60*60*1000-1;//今天23点59分59秒的毫秒数
// return twelve;
// }
// public static void main(String[] args) {
// long time = 1611591055000L ;
// long start = getDayStart(time);
// long end = getDayEnd(start);
//
//
// System.out.println(time);
// System.out.println(start);
// System.out.println(end);
//
// System.out.println(parseDateByday(time));
// System.out.println(parseDateByday(start));
// System.out.println(parseDateByday(end));
//
//
// long zero=time/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数
// long twelve=zero+24*60*60*1000-1;//今天23点59分59秒的毫秒数
// long yesterday=System.currentTimeMillis()-24*60*60*1000;//昨天的这一时间的毫秒数
// System.out.println(new Timestamp(time));//当前时间
// System.out.println(new Timestamp(yesterday));//昨天这一时间点
// System.out.println(new Timestamp(zero));//今天零点零分零秒
// System.out.println(new Timestamp(twelve));//今天23点59分59秒
//
// }
}

286
cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java

@ -0,0 +1,286 @@
package com.bfd.mf.job.util;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import javax.mail.Session;
import javax.mail.Transport;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeMessage;
import com.bfd.crawler.utils.JsonUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
/**
* 邮件发送处理
* @author yabo.li
*
*/
public class EMailUtils {
private static final Logger LOG = Logger.getLogger(EMailUtils.class);
// 发件人的 邮箱 密码替换为自己的邮箱和密码
// PS: 某些邮箱服务器为了增加邮箱本身密码的安全性 SMTP 客户端设置了独立密码有的邮箱称为授权码,
// 对于开启了独立密码的邮箱, 这里的邮箱密码必需使用这个独立密码授权码
public static String myEmailAccount = "bfd_crawler_alarm@baifendian.com";
public static String myEmailPassword = "bfd_crawler_alarm";
// 发件人邮箱的 SMTP 服务器地址, 必须准确, 不同邮件服务器地址不同, 一般(只是一般, 绝非绝对)格式为: smtp.xxx.com
// 网易163邮箱的 SMTP 服务器地址为: smtp.163.com
private static String myEmailSMTPHost = "smtp.baifendian.com";
// 收件人邮箱替换为自己知道的有效邮箱
public static String receiveMailAccount = "chaofan.tan@baifendian.com";
private String confPath = "../etc/config.properties";
private static EMailUtils instance = null;
private String protocol = "smtp";
private String smtpAuth = "true";
private static String emailEncode = "UTF-8";
private static String emailTitle = "[{cid}]数据采集异常报警 — 智能数据采集平台";
private static String emailContent = "你好:\r\n\r\n报警对象:{cid}-{categoryName} \r\n报警原因:{type},请及时检查!。 \r\n\r\n报警时间:{time}";
private EMailUtils() {
LOG.info("EMailUtils:init");
// Properties pro = LoadConfig.getInstance().getPro(confPath);
Properties pro = new Properties();
/**
* 注释了读配置文件直接写死了配置
* crawl.alert.mail.transport.protocol=smtp
crawl.alert.mail.smtp.host=intmail.baifendian.com
crawl.alert.mail.smtp.auth=true
crawl.alert.email.userName=bfd_crawler_alarm@baifendian.com
crawl.alert.email.userPasswd=z26Iyf3vMRb5ejrI
crawl.alert.email.emailEncode=UTF-8
*/
protocol =pro.getProperty("crawl.alert.mail.transport.protocol", "smtp");
myEmailSMTPHost =pro.getProperty("crawl.alert.mail.smtp.host", "intmail.baifendian.com");
smtpAuth = pro.getProperty("crawl.alert.mail.smtp.auth", "true");
myEmailAccount = pro.getProperty("crawl.alert.email.userName", "bfd_crawler_alarm@baifendian.com");
myEmailPassword = pro.getProperty("crawl.alert.email.userPasswd", "z26Iyf3vMRb5ejrI");
emailEncode = pro.getProperty("crawl.alert.email.emailEncode", "UTF-8");
emailTitle = pro.getProperty("crawl.alert.email.emailTitle", "[{cid}]数据采集异常报警 — 智能数据采集平台");
emailContent = pro.getProperty("crawl.alert.email.emailContent1", "你好:\r\n\r\n报警对象:{cid}-{categoryName} \r\n报警原因:{type},请及时检查!。 \r\n\r\n报警时间:{time}\r\n\r\n排查线索:{sample}");
LOG.info("EMailUtils protocol:" + protocol + " myEmailSMTPHost:" + myEmailSMTPHost
+ " smtpAuth: " + smtpAuth + " myEmailAccount: " + myEmailAccount
+ " emailEncode: " + emailEncode + " config path: " + confPath);
}
public static EMailUtils getInstance() {
if (instance == null) {
synchronized (EMailUtils.class) {
if (instance == null) {
instance = new EMailUtils();
}
}
}
return instance;
}
public void setConfigPath (String confPath) {
this.confPath = confPath;
}
public void sendWechat(List<String> emailList, String message) {
HttpClientBuilder httpBuilder = HttpClientBuilder.create();
HttpClient client = httpBuilder.build();
HttpPost httppost = new HttpPost("http://172.18.1.181:8412/sendwechatalarm/"); //Constants.getWechatURL()
try {
Map<String, Object> requestMap = new HashMap<String, Object>();
requestMap.put("emails", emailList);
requestMap.put("message", message);
StringEntity entity = new StringEntity(JsonUtils.toJSONString(requestMap),"UTF-8");
entity.setContentType("application/json");
httppost.setEntity(entity);
HttpResponse response = client.execute(httppost);
HttpEntity en = response.getEntity();
String content = EntityUtils.toString(en,"utf8");
LOG.info("SENT WECHAT ALARM:" + JsonUtils.toJSONString(emailList) + " " + JsonUtils.toJSONString(requestMap));
} catch (Exception e) {
e.printStackTrace();
} finally {
client = null;
httpBuilder = null;
httppost = null;
}
}
public void sendEmail(int type, Map<String, Object> siteMessage, List<String> emailList, String time1) {
LOG.info("有报警任务,开始发送邮件");
try {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
String cid = (String) siteMessage.get("cid");
String categoryName = null;
String title = emailTitle.replace("{cid}", cid);
String content = null;
content = emailContent.replace("{cid}", cid);
if (siteMessage.containsKey("categoryName")) {
categoryName = (String) siteMessage.get("categoryName");
content = content.replace("{categoryName}", categoryName);
} else {
content = content.replace("-{categoryName}","");
}
if (siteMessage.containsKey("sample")) {
categoryName = (String) siteMessage.get("sample");
content = content.replace("{sample}", categoryName);
} else {
content = content.replace("{sample}","");
}
content = content.replace("{time}", sdf.format(new Date()));
//需要分类处理
switch(type) {
case 1:
content = content.replace("{type}", "数据的时间格式有误");
break;
case 2:
content = content.replace("{type}", "任务下发后" + time1 + "分钟数据未及时返回");
break;
case 3:
content = content.replace("{type}", "任务下发后" + time1 + "分钟数据未及时返回");
break;
case 4:
content = content.replace("{type}", "数据字段丢失,字段丢失为"+time1);
break;
case 5:
content = content.replace("{type}", "数据关键字段值为空");
break;
case 6:
content = confPath.replace("{type}","解析失败次数超过100次");
break;
default:
return ;
}
LOG.info("EMailUtils:sendEmail get: siteMessage:" + siteMessage + " emailList:" + emailList + " content:" + content);
Properties props = new Properties(); // 参数配置
props.setProperty("mail.transport.protocol", protocol); // 使用的协议JavaMail规范要求
props.setProperty("mail.smtp.host", myEmailSMTPHost); // 发件人的邮箱的 SMTP 服务器地址
props.setProperty("mail.smtp.auth", smtpAuth); // 需要请求认证
Session session = Session.getInstance(props);
session.setDebug(true);
MimeMessage message = createMimeMessage(session, myEmailAccount, emailList, title, content);
Transport transport = session.getTransport();
transport.connect(myEmailAccount, myEmailPassword);
transport.sendMessage(message, message.getAllRecipients());
List<String> emails = new ArrayList();
for (String email:emailList) {
emails.add(email.replace("@percent.cn", ""));
}
sendWechat(emails,content);
transport.close();
} catch (Throwable e) {
e.printStackTrace();
LOG.error("EMailUtils:sendEmail error. title:" + siteMessage + " emailList:" + emailList);
}
}
// public static void main(String[] args) throws Exception {
// // 1. 创建参数配置, 用于连接邮件服务器的参数配置
// Properties props = new Properties(); // 参数配置
// props.setProperty("mail.transport.protocol", "smtp"); // 使用的协议JavaMail规范要求
// props.setProperty("mail.smtp.host", myEmailSMTPHost); // 发件人的邮箱的 SMTP 服务器地址
// props.setProperty("mail.smtp.auth", "true"); // 需要请求认证
//
// // PS: 某些邮箱服务器要求 SMTP 连接需要使用 SSL 安全认证 (为了提高安全性, 邮箱支持SSL连接, 也可以自己开启),
// // 如果无法连接邮件服务器, 仔细查看控制台打印的 log, 如果有有类似 连接失败, 要求 SSL 安全连接 等错误,
// // 打开下面 /* ... */ 之间的注释代码, 开启 SSL 安全连接
// /*
// // SMTP 服务器的端口 ( SSL 连接的端口一般默认为 25, 可以不添加, 如果开启了 SSL 连接,
// // 需要改为对应邮箱的 SMTP 服务器的端口, 具体可查看对应邮箱服务的帮助,
// // QQ邮箱的SMTP(SLL)端口为465或587, 其他邮箱自行去查看)
// final String smtpPort = "465";
// props.setProperty("mail.smtp.port", smtpPort);
// props.setProperty("mail.smtp.socketFactory.class", "javax.net.ssl.SSLSocketFactory");
// props.setProperty("mail.smtp.socketFactory.fallback", "false");
// props.setProperty("mail.smtp.socketFactory.port", smtpPort);
// */
//
// // 2. 根据配置创建会话对象, 用于和邮件服务器交互
// Session session = Session.getInstance(props);
// session.setDebug(true); // 设置为debug模式, 可以查看详细的发送 log
// List<String> emails = new ArrayList<String>();
// // 3. 创建一封邮件
// MimeMessage message = createMimeMessage(session, myEmailAccount, emails, "小司机", "小司机去开车");
//
// // 4. 根据 Session 获取邮件传输对象
// Transport transport = session.getTransport();
//
// // 5. 使用 邮箱账号 密码 连接邮件服务器, 这里认证的邮箱必须与 message 中的发件人邮箱一致, 否则报错
// //
// // PS_01: 成败的判断关键在此一句, 如果连接服务器失败, 都会在控制台输出相应失败原因的 log,
// // 仔细查看失败原因, 有些邮箱服务器会返回错误码或查看错误类型的链接, 根据给出的错误
// // 类型到对应邮件服务器的帮助网站上查看具体失败原因
// //
// // PS_02: 连接失败的原因通常为以下几点, 仔细检查代码:
// // (1) 邮箱没有开启 SMTP 服务;
// // (2) 邮箱密码错误, 例如某些邮箱开启了独立密码;
// // (3) 邮箱服务器要求必须要使用 SSL 安全连接;
// // (4) 请求过于频繁或其他原因, 被邮件服务器拒绝服务;
// // (5) 如果以上几点都确定无误, 到邮件服务器网站查找帮助
// //
// // PS_03: 仔细看log, 认真看log, 看懂log, 错误原因都在log已说明
// transport.connect(myEmailAccount, myEmailPassword);
//
// // 6. 发送邮件, 发到所有的收件地址, message.getAllRecipients() 获取到的是在创建邮件对象时添加的所有收件人, 抄送人, 密送人
// transport.sendMessage(message, message.getAllRecipients());
//
// // 7. 关闭连接
// transport.close();
// }
/**
* 创建一封只包含文本的简单邮件
*
* @param session 和服务器交互的会话
* @param sendMail 发件人邮箱
* @param receiveMail 收件人邮箱
* @return
* @throws Exception
*/
public static MimeMessage createMimeMessage(Session session, String sendMail, List<String> receiveMail ,String title, String content) throws Exception {
// 1. 创建一封邮件
MimeMessage message = new MimeMessage(session);
// 2. From: 发件人昵称有广告嫌疑避免被邮件服务器误认为是滥发广告以至返回失败请修改昵称
message.setFrom(new InternetAddress(sendMail, sendMail.split("@")[0], "UTF-8"));
// 3. To: 收件人可以增加多个收件人抄送密送
for (String email : receiveMail) {
message.addRecipient(MimeMessage.RecipientType.TO, new InternetAddress(email, email.split("@")[0], "UTF-8"));
}
// 4. Subject: 邮件主题标题有广告嫌疑避免被邮件服务器误认为是滥发广告以至返回失败请修改标题
message.setSubject(title, emailEncode);
// 5. Content: 邮件正文可以使用html标签内容有广告嫌疑避免被邮件服务器误认为是滥发广告以至返回失败请修改发送内容
message.setText(content);//setContent(content, "text/html;charset=UTF-8");
// 6. 设置发件时间
message.setSentDate(new Date());
// 7. 保存设置
message.saveChanges();
return message;
}
}

57
cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils.java

@ -2,6 +2,7 @@ package com.bfd.mf.job.util;
import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSONObject;
import com.bfd.mf.job.config.AppConfig;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
@ -35,7 +36,6 @@ import org.slf4j.LoggerFactory;
import org.springframework.util.Assert; import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils; import org.springframework.util.CollectionUtils;
import java.math.BigInteger;
import java.net.InetAddress; import java.net.InetAddress;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
@ -79,11 +79,13 @@ public abstract class EsUtils {
.setIndices(indices) .setIndices(indices)
.setIndicesOptions(IndicesOptions.fromOptions(true, true, .setIndicesOptions(IndicesOptions.fromOptions(true, true,
true, false)) true, false))
.setTypes(type)
//.setTypes(type)
.setQuery(queryBuilder) .setQuery(queryBuilder)
.setScroll(TimeValue.timeValueMinutes(minutes)) .setScroll(TimeValue.timeValueMinutes(minutes))
.setSize(size); .setSize(size);
System.out.println(searchRequestBuilder);
long s = System.currentTimeMillis(); long s = System.currentTimeMillis();
SearchResponse response = searchRequestBuilder.execute().actionGet(); SearchResponse response = searchRequestBuilder.execute().actionGet();
long e = System.currentTimeMillis(); long e = System.currentTimeMillis();
@ -181,7 +183,8 @@ public abstract class EsUtils {
*/ */
public static String[] getIndices(String prefix, String separator, public static String[] getIndices(String prefix, String separator,
long startMills, long endMils, long startMills, long endMils,
String pattern, Long upperMills, String standbyIndex) {
String pattern, Long upperMills,
String standbyIndex,Long year) {
List<String> indexList = Lists.newArrayList(); List<String> indexList = Lists.newArrayList();
LocalDateTime start = new LocalDateTime(startMills); LocalDateTime start = new LocalDateTime(startMills);
LocalDateTime end = new LocalDateTime(endMils); LocalDateTime end = new LocalDateTime(endMils);
@ -190,6 +193,18 @@ public abstract class EsUtils {
indexList.add(standbyIndex); indexList.add(standbyIndex);
start = upper; start = upper;
} }
if(startMills < year){
for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusYears(1)) {
String dtStr = dt.toString(DateTimeFormat.forPattern("YYYY"));
String index = new StringBuilder()
.append(prefix)
.append(separator)
.append(dtStr)
.toString();
indexList.add(index);
}
start = new LocalDateTime(year);
}
for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusDays(1)) { for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusDays(1)) {
String dtStr = dt.toString(DateTimeFormat.forPattern(pattern)); String dtStr = dt.toString(DateTimeFormat.forPattern(pattern));
String index = new StringBuilder() String index = new StringBuilder()
@ -199,14 +214,44 @@ public abstract class EsUtils {
.toString(); .toString();
indexList.add(index); indexList.add(index);
} }
// indexList.add("cl_index_video");
// indexList.add("cl_index_social");
// indexList.add("cl_index_news");
// 只拉主贴
indexList.add("cl_index_item");
String[] indices = new String[indexList.size()]; String[] indices = new String[indexList.size()];
indices = indexList.toArray(indices); indices = indexList.toArray(indices);
return indices; return indices;
} }
// public static void main(String[] args) {
// String prefix = "cl_aaa_";
// String separator = "-";
// long startMills = 1083340800000L;
// long endMils = 1556640000000L; //1556640000 1546272000000L
// String pattern = AppConfig.DATE_FORMAT;
// Long upperMills = 946656000L;
// String standbyIndex = "cl_index_0";
// String [] indexs = {};
// if(startMills < 1546272000000L){
// LocalDateTime start = new LocalDateTime(startMills);
// LocalDateTime end = new LocalDateTime(endMils);
// LocalDateTime upper = new LocalDateTime(upperMills);
// for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusYears(1)) {
// String dtStr = dt.toString(DateTimeFormat.forPattern("YYYY"));
// String index = new StringBuilder()
// .append(prefix)
// .append(separator)
// .append(dtStr)
// .toString();
// System.out.println("*** "+ index);
// //indexs.add(index);
// }
// }
// startMills = 1546272000000L;
// indexs = getIndices(prefix,separator,startMills,endMils,pattern,upperMills,standbyIndex);
// for (int i = 0 ; i < indexs.length ; i ++){
// System.out.println(indexs[i]);
// }
// }
/** /**
* 根据indexName获取一定存在的index * 根据indexName获取一定存在的index
* 如果indexName存在则返回不存在则创建 * 如果indexName存在则返回不存在则创建

451
cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils2.java

@ -0,0 +1,451 @@
package com.bfd.mf.job.util;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexResponse;
import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest;
import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.ClearScrollRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.transport.client.PreBuiltTransportClient;
import org.joda.time.LocalDateTime;
import org.joda.time.format.DateTimeFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils;
import java.net.InetAddress;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
public abstract class EsUtils2 {
private static final Logger LOGGER = LoggerFactory.getLogger(EsUtils.class);
private static final Map<String, TransportClient> CLIENT_MAP = Maps.newHashMap();
public static void registerCluster(String clusterName, String[] addresses) {
System.setProperty("es.set.netty.runtime.available.processors", "false");
Assert.hasLength(clusterName, "Param clusterName must not be empty.");
Assert.notEmpty(addresses, "Param addresses must not be empty.");
Settings settings = Settings.builder()
.put("cluster.name", clusterName).build();
TransportClient client = new PreBuiltTransportClient(settings);
try {
for (int i = 0; i < addresses.length; i++) {
String[] ipAndPort = addresses[i].split(":");
String ip = ipAndPort[0];
int port = Integer.parseInt(ipAndPort[1]);
client.addTransportAddress(new TransportAddress(InetAddress.getByName(ip), port));
}
CLIENT_MAP.put(clusterName, client);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* 查询
* produce 查询主贴的时候会调用这个方法哦
* @param clusterName
* @param indices
* @param size
* @param consumer
*/
public static void scrollQuery(String clusterName, String indices, String type,
QueryBuilder queryBuilder, Integer size, int minutes,
Consumer<List<JSONObject>> consumer) {
TransportClient client = getClient(clusterName);
SearchRequestBuilder searchRequestBuilder = client.prepareSearch()
.setIndices(indices)
.setIndicesOptions(IndicesOptions.fromOptions(true, true,
true, false))
.setTypes(type)
.setQuery(queryBuilder)
.setScroll(TimeValue.timeValueMinutes(minutes))
.setSize(size);
long s = System.currentTimeMillis();
SearchResponse response = searchRequestBuilder.execute().actionGet();
long e = System.currentTimeMillis();
LOGGER.debug("First query es, size:{}, took:{} ms.",
response.getHits().getHits().length, (e - s));
List<String> scrollIds = Lists.newArrayList();
while (response.getHits().getHits().length > 0) {
List<JSONObject> dataList = Lists.newLinkedList();
for (SearchHit hit : response.getHits().getHits()) {
dataList.add(JSON.parseObject(hit.getSourceAsString()));
}
consumer.accept(dataList);
if (dataList.size() < size) {
break;
}
String scrollId = response.getScrollId();
scrollIds.add(scrollId);
long s1 = System.currentTimeMillis();
response = client.prepareSearchScroll(scrollId)
.setScroll(TimeValue.timeValueMinutes(minutes))
.execute()
.actionGet();
long e1 = System.currentTimeMillis();
LOGGER.debug("Query es, size:{}, took:{} ms",
response.getHits().getHits().length, (e1 - s1));
}
if (!CollectionUtils.isEmpty(scrollIds)) {
ClearScrollRequestBuilder clearScroll = client.prepareClearScroll()
.setScrollIds(scrollIds);
client.clearScroll(clearScroll.request());
}
}
/**
* 查询前[size]满足条件的数据
*
* @param clusterName
* @param index
* @param queryBuilder
* @param size
* @return 没有数据返回null有数据返回数据列表
*/
public static List<JSONObject> query(String clusterName, String index, final QueryBuilder queryBuilder, int size) {
TransportClient client = getClient(clusterName);
SearchResponse response = client.prepareSearch()
.setIndices(index)
.setIndicesOptions(IndicesOptions.fromOptions(true, true,
true, false))
.setSize(size)
.setFrom(0)
.setQuery(queryBuilder)
.execute().actionGet();
if (response.getHits().totalHits > 0) {
List<JSONObject> dataList = Lists.newLinkedList();
SearchHit[] hits = response.getHits().getHits();
for (int i = 0; i < hits.length; i++) {
JSONObject data = new JSONObject();
data.putAll(hits[i].getSourceAsMap());
dataList.add(data);
}
return dataList;
}
return null;
}
/**
* 根据时间范围获取index集合
*
* @param startMills 起始时间ms
* @param endMils 结束时间ms
* @return
*/
public static String[] getIndices(String prefix, String separator,
long startMills, long endMils, String pattern) {
List<String> indexList = Lists.newArrayList();
LocalDateTime start = new LocalDateTime(startMills);
LocalDateTime end = new LocalDateTime(endMils);
for (LocalDateTime dt = start; dt.isBefore(end); dt = dt.plusDays(1)) {
String dtStr = dt.toString(DateTimeFormat.forPattern(pattern));
String index = new StringBuilder()
.append(prefix)
.append(separator)
.append(dtStr)
.toString();
indexList.add(index);
}
String[] indices = new String[indexList.size()];
indices = indexList.toArray(indices);
return indices;
}
/**
* 根据时间范围获取index集合
*
* @param startMills 起始时间ms
* @param endMils 结束时间ms
* @return
*/
public static String[] getIndices(String prefix, String separator,
long startMills, long endMils, String pattern, Long upperMills, String standbyIndex) {
List<String> indexList = Lists.newArrayList();
LocalDateTime start = new LocalDateTime(startMills);
LocalDateTime end = new LocalDateTime(endMils);
LocalDateTime upper = new LocalDateTime(upperMills);
if (start.isBefore(upper)) {
indexList.add(standbyIndex);
start = upper;
}
for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusDays(1)) {
String dtStr = dt.toString(DateTimeFormat.forPattern(pattern));
String index = new StringBuilder()
.append(prefix)
.append(separator)
.append(dtStr)
.toString();
indexList.add(index);
}
String[] indices = new String[indexList.size()];
indices = indexList.toArray(indices);
return indices;
}
/**
* 根据indexName获取一定存在的index
* 如果indexName存在则返回不存在则创建
*
* @param clusterName
* @param indexName
* @param type
* @param mappingFile
* @return
*/
// public static String getOrCreateIndex(String clusterName, String indexName, String type,
// int shard, int replica, String mappingFile) {
// try {
// if (!EsUtils.exists(clusterName, indexName)) {
// byte[] bytes = Files.readAllBytes(Paths.get(mappingFile));
// String mappingDef = new String(bytes);
// boolean flag = EsUtils.createIndex(clusterName, indexName, type,
// shard, replica, mappingDef);
// if (!flag) {
// throw new RuntimeException("Create index " + indexName + " error.");
// }
// }
// } catch (Exception e) {
// throw new RuntimeException(e);
// }
//
// return indexName;
// }
/**
* index一个文档
*
* @param clusterName
* @param indexName
* @param data
* @return
*/
public static String index(String clusterName, String indexName, String type, final JSONObject data, String idField) {
TransportClient client = getClient(clusterName);
IndexResponse response = client.prepareIndex(indexName, type)
.setSource(data, XContentType.JSON)
.setId(data.getString(idField))
.get();
return response.getId();
}
/**
* index一个文档
*
* @param clusterName
* @param indexName
* @param dataList
* @return
*/
public static void index(String clusterName, String indexName, String type, final List<JSONObject> dataList, String idField) {
if (CollectionUtils.isEmpty(dataList)) {
return;
}
TransportClient client = getClient(clusterName);
for (int i = 0; i < dataList.size(); i++) {
JSONObject data = dataList.get(i);
client.prepareIndex(indexName, type)
.setSource(data, XContentType.JSON)
.setId(data.getString(idField))
.get();
}
}
/**
* 批量index文档
* @param clusterName
* @param bulkItemList
* @return
*/
public static boolean bulkIndex(String clusterName, final List<BulkItem> bulkItemList, String idField) {
if (CollectionUtils.isEmpty(bulkItemList)) {
return true;
}
TransportClient client = getClient(clusterName);
BulkRequestBuilder rb = client.prepareBulk();
for (BulkItem item : bulkItemList) {
rb.add(client.prepareIndex(item.getIndexName(), item.getType(), item.getData().getString(idField))
.setSource(item.getData(), XContentType.JSON));
}
BulkResponse response = rb.get();
LOGGER.info("Bulk index, size:{}.", bulkItemList.size());
return response.hasFailures();
}
/**
* 判断索引是否存在
*
* @param clusterName
* @param indexName
* @return
*/
public static Boolean exists(String clusterName, String indexName) {
TransportClient client = getClient(clusterName);
IndicesExistsRequest request = new IndicesExistsRequest()
.indices(indexName);
IndicesExistsResponse response = client.admin().indices().exists(request).actionGet();
return response.isExists();
}
/**
* 创建一个index
*
* @param clusterName
* @param indexName
* @param type
* @param shardCount
* @param replicaCount
* @param mappingDef
* @return
*/
public static Boolean createIndex(String clusterName, String indexName, String type,
Integer shardCount, Integer replicaCount, String mappingDef) {
TransportClient client = getClient(clusterName);
CreateIndexRequest request = new CreateIndexRequest(indexName);
request.settings(Settings.builder()
.put("index.number_of_shards", shardCount)
.put("index.number_of_replicas", replicaCount)
.put("index.refresh_interval", 2, TimeUnit.SECONDS)
.put("index.analysis.filter.shingle_filter.type", "shingle")
.put("index.analysis.filter.shingle_filter.min_shingle_size", 2)
.put("index.analysis.filter.shingle_filter.max_shingle_size", 2)
.put("index.analysis.filter.shingle_filter.output_unigrams", false)
.put("index.analysis.analyzer.shingle_analyzer.type", "custom")
.put("index.analysis.analyzer.shingle_analyzer.tokenizer", "ik_smart")
.putArray("index.analysis.analyzer.shingle_analyzer.filter", "lowercase", "shingle_filter")
);
request.mapping(type, mappingDef, XContentType.JSON);
CreateIndexResponse createIndexResponse = client.admin().indices().create(request).actionGet();
boolean acknowledged = createIndexResponse.isAcknowledged();
boolean shardsAcknowledged = createIndexResponse.isShardsAcked();
if (acknowledged && shardsAcknowledged) {
return true;
}
return false;
}
/**
* 删除index
*
* @param clusterName
* @param indexName
* @return
*/
public static Boolean deleteIndex(String clusterName, String indexName) {
TransportClient client = getClient(clusterName);
DeleteIndexRequest request = new DeleteIndexRequest()
.indices(indexName);
AcknowledgedResponse response = client.admin().indices().delete(request).actionGet();
return response.isAcknowledged();
}
private static TransportClient getClient(String clusterName) {
return CLIENT_MAP.get(clusterName);
}
public static BulkItem buildBulkItem(String indexName, String type, final JSONObject data) {
return new BulkItem()
.setIndexName(indexName)
.setType(type)
.setData(data);
}
/**
* 查询某个Index 的总量
*/
public static Long scrollQuery(String clusterName, String indices, String type,
QueryBuilder queryBuilder){
Long totalHits = 0L;
try{
TransportClient client = getClient(clusterName);
SearchRequestBuilder searchRequestBuilder = client.prepareSearch()
.setIndices(indices)
.setIndicesOptions(IndicesOptions.fromOptions(true, true,
true, false))
.setTypes(type)
.setQuery(queryBuilder);
SearchResponse response = searchRequestBuilder.execute().actionGet();
totalHits = response.getHits().totalHits;
}catch (Exception e){
e.printStackTrace();
}
return totalHits;
}
public static class BulkItem {
String indexName;
String type;
JSONObject data;
public String getIndexName() {
return indexName;
}
public BulkItem setIndexName(String indexName) {
this.indexName = indexName;
return this;
}
public String getType() {
return type;
}
public BulkItem setType(String type) {
this.type = type;
return this;
}
public JSONObject getData() {
return data;
}
public BulkItem setData(JSONObject data) {
this.data = data;
return this;
}
public BulkItem setStringData(String data) {
this.type = data;
return this;
}
}
// public static void etl(String srcClusterName, String srcIndex, String srcType, QueryBuilder qb,
// Integer size, int minutes,
// String tarClusterName, String tarIndex, String tarType, String idField) {
// scrollQuery(srcClusterName, new String[]{srcClusterName}, srcType, qb, size, minutes, dataList -> {
// EsUtils.index(tarClusterName, tarIndex, tarType, dataList, idField);
// });
// }
}

239
cl_query_data_job/src/main/java/com/bfd/mf/job/util/ReadLine.java

@ -0,0 +1,239 @@
package com.bfd.mf.job.util;
import it.sauronsoftware.jave.Encoder;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.FileImageInputStream;
import javax.imageio.stream.ImageInputStream;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* Created by BFD-229 on 2017/7/6.
*/
public class ReadLine {
public static List<String> readLine( File fileName){
List<String> list = new ArrayList<String> ();
String line;
try {
InputStreamReader read = new InputStreamReader(new FileInputStream(fileName), "utf-8");
BufferedReader reader = new BufferedReader(read);
while ((line = reader.readLine()) != null) {
try {
if (line.length() > 0) {
list.add(line);
}
} catch (Exception e) {
e.printStackTrace();
}
}
return list;
}catch (UnsupportedEncodingException e) {
e.printStackTrace();
return null;
} catch (FileNotFoundException e) {
e.printStackTrace();
return null;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
// public static List<JSONObject> readLine(File fileName){
// List<JSONObject> list = new ArrayList<JSONObject> ();
// String line;
// try {
// InputStreamReader read = new InputStreamReader(new FileInputStream(fileName), "utf-8");
// BufferedReader reader = new BufferedReader(read);
// while ((line = reader.readLine()) != null) {
// try {
// if (line.length() > 0) {
// list.add(line);
// }
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
// return list;
// }catch (UnsupportedEncodingException e) {
// e.printStackTrace();
// return null;
// } catch (FileNotFoundException e) {
// e.printStackTrace();
// return null;
// } catch (IOException e) {
// e.printStackTrace();
// return null;
// }
// }
// 读取文件内容
public static String readFile(String path){
File file = new File(path);
StringBuilder result = new StringBuilder();
try{
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));//构造一个BufferedReader类来读取文件
String s = null;
while((s = br.readLine())!=null){//使用readLine方法一次读一行
result.append( System.lineSeparator() + s);
}
br.close();
}catch(Exception e){
e.printStackTrace();
}
return result.toString();
}
public static void readFiles(File file){
if (file.exists()) {
System.err.println("exist");
try {
FileInputStream fis = new FileInputStream(file);
InputStreamReader isr = new InputStreamReader(fis, "UTF-8");
BufferedReader br = new BufferedReader(isr);
String line;
while((line = br.readLine()) != null){
System.out.println(line);
}
br.close();
isr.close();
fis.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static String getResolution1(File file) throws IOException {
BufferedImage image = ImageIO.read(file);
return image.getWidth() + "x" + image.getHeight();
}
// public static String getResolution(File file){
// Encoder encoder = new Encoder();
// try {
// MultimediaInfo m = encoder.getInfo(file);
// int height = m.getVideo().getSize().getHeight();
// int width = m.getVideo().getSize().getWidth();
// System.out.println("width:"+width);
// System.out.println("height:" + height);
// FileInputStream fis = new FileInputStream(source);
// FileChannel fc = fis.getChannel();
// BigDecimal fileSize = new BigDecimal(fc.size());
// String size = fileSize.divide(new BigDecimal(1048576), 2, RoundingMode.HALF_UP) + "MB";
// System.out.println("size:" + size);
// long duration = m.getDuration()/1000;
// System.out.println("duration:" + duration + "s");
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
public static String getImageDim(String path) {
String result = null;
String suffix = getFileSuffix(path);
//解码具有给定后缀的文件
Iterator<ImageReader> iter = ImageIO.getImageReadersBySuffix(suffix);
// System.out.println(ImageIO.getImageReadersBySuffix(suffix));
if (iter.hasNext()) {
ImageReader reader = iter.next();
try {
ImageInputStream stream = new FileImageInputStream(new File(path));
reader.setInput(stream);
int width = reader.getWidth(reader.getMinIndex());
int height = reader.getHeight(reader.getMinIndex());
result = width + "×" + height;
} catch (IOException e) {
e.printStackTrace();
} finally {
reader.dispose();
}
}
// System.out.println("getImageDim:" + result);
return result;
}
private static String getFileSuffix(final String path) {
String result = null;
if (path != null) {
result = "";
if (path.lastIndexOf('.') != -1) {
result = path.substring(path.lastIndexOf('.'));
if (result.startsWith(".")) {
result = result.substring(1);
}
}
}
// System.out.println("getFileSuffix:" + result);
return result;
}
public static String videosize(String video) {
File source = new File(video);
Encoder encoder = new Encoder();
try {
it.sauronsoftware.jave.MultimediaInfo m = encoder.getInfo(source);
return m.getVideo().getSize().getHeight() + "×" + m.getVideo().getSize().getWidth();
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
// public static String getVideoTime (String path){
// File source = new File(path);
// Encoder encoder = new Encoder();
// File[] file = source.listFiles();
// long sum =0;
// for (File file2 : file) {
// try {
// MultimediaInfo m = encoder.getInfo(file2);
// long ls = m.getDuration()/1000; //ls是获取到的秒数
// sum += ls;
// } catch (Exception e) {
// e.printStackTrace();
// }
// }
// double sum1 = (double)sum;
// double sum2 =sum1/3600;// 转换成为了小时
// System.out.println(sum2);
// return sum2+"";
// }
//
// public static byte[] readFile(String path){
// try {
// FileInputStream fileInputStream = new FileInputStream(path);
// BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream));
// String line = null;
// while ((line = bufferedReader.readLine()) != null) {
// System.out.println(line);
// }
// fileInputStream.close();
// }catch (Exception e){
// e.printStackTrace();
// }
// }
}

119
cl_query_data_job/src/main/java/com/bfd/mf/job/util/ZipUtils.java

@ -0,0 +1,119 @@
package com.bfd.mf.job.util;
import com.bfd.mf.job.worker.UpLoadProducer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import static org.apache.lucene.store.BufferedIndexInput.BUFFER_SIZE;
public class ZipUtils {
private static final Logger LOGGER = LoggerFactory.getLogger(ZipUtils.class);
/**
* zip解压
* @param srcFile zip源文件
* @param destDirPath 解压后的目标文件夹
* @throws RuntimeException 解压失败会抛出运行时异常
*/
public static Map<String,List<String>> unZip(File srcFile, String destDirPath) throws RuntimeException {
Map<String,List<String>> fileNameMap = new HashMap<>();
long start = System.currentTimeMillis();
// 判断源文件是否存在
if (!srcFile.exists()) {
return fileNameMap;
// throw new RuntimeException(srcFile.getPath() + "所指文件不存在");
}
// 开始解压
ZipFile zipFile = null;
try {
zipFile = new ZipFile(srcFile);
Enumeration<?> entries = zipFile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = (ZipEntry) entries.nextElement();
// System.out.println("解压后文件名称 :" + entry.getName());
List fileNameList = new ArrayList<>();
if(entry.getName().contains(".xlsx")){
fileNameList.add(entry.getName());
fileNameMap.put("excelName",fileNameList);
}else if(entry.getName().contains("txt")){
fileNameList.add(entry.getName());
fileNameMap.put("excelName",fileNameList);
}else{
if(entry.getName().contains("/")) {
String files[] = entry.getName().split("/");
String key = entry.getName().split("/")[0];
if (files.length >1) {
String value = entry.getName().split("/")[1];
if (fileNameMap.containsKey(key)) {
fileNameList = fileNameMap.get(key);
fileNameList.add(value);
fileNameMap.put(key, fileNameList);
} else {
fileNameList.add(value);
fileNameMap.put(key, fileNameList);
}
}
}
}
// 如果是文件夹就创建个文件夹
if (entry.isDirectory()) {
String dirPath = destDirPath + "/" + entry.getName();
File dir = new File(dirPath);
dir.mkdirs();
} else {
// 如果是文件就先创建一个文件然后用io流把内容copy过去
File targetFile = new File(destDirPath + "/" + entry.getName());
// 保证这个文件的父文件夹必须要存在
if(!targetFile.getParentFile().exists()){
targetFile.getParentFile().mkdirs();
}
targetFile.createNewFile();
// 将压缩文件内容写入到这个文件中
InputStream is = zipFile.getInputStream(entry);
FileOutputStream fos = new FileOutputStream(targetFile);
int len;
byte[] buf = new byte[BUFFER_SIZE];
while ((len = is.read(buf)) != -1) {
fos.write(buf, 0, len);
}
// 关流顺序先打开的后关闭
fos.close();
is.close();
}
}
long end = System.currentTimeMillis();
LOGGER.info("解压完成,耗时:" + (end - start) +" ms");
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("unzip error from ZipUtils", e);
} finally {
if(zipFile != null){
try {
zipFile.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return fileNameMap;
}
public static String getZipFileName(String zipName, String zipPath) {
String zipFileName = zipName.replace(".zip","");
// 判断zip这个文件夹是否存在不存在则创建
File zipFile=new File(zipPath+zipFileName);
if(!zipFile.exists()){//如果文件夹不存在
zipFile.mkdir();//创建文件夹
}
return zipFileName;
}
}

38
cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java

@ -0,0 +1,38 @@
package com.bfd.mf.job.worker;
import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.service.alarm.AlarmService;
import com.bfd.mf.job.service.taskCount.TaskCountService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Component
public class AlarmProducer extends AbstractWorker {
private static final Logger LOGGER = LoggerFactory.getLogger(AlarmProducer.class);
@Autowired
private AppConfig config;
@Autowired
private AlarmService alarmService;
@Override
protected Integer getThreadCount() {
return config.getAlarmProducerThreadCount();
}
@Override
protected String getThreadNameFormat() {
return "alarm-producer-%d";
}
@Override
protected void work(String json) {
LOGGER.info("[AlarmProducer] work start ... ");
alarmService.produce();
try {
Thread.sleep(config.getIntervalTime());
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

3
cl_query_data_job/src/main/java/com/bfd/mf/job/worker/BacktraceProducer.java

@ -1,8 +1,7 @@
package com.bfd.mf.job.worker; package com.bfd.mf.job.worker;
import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.domain.repository.SubjectRepository;
import com.bfd.mf.job.service.BacktraceService;
import com.bfd.mf.job.service.backtrace.BacktraceService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;

14
cl_query_data_job/src/main/java/com/bfd/mf/job/worker/QueryProducer.java

@ -1,7 +1,7 @@
package com.bfd.mf.job.worker; package com.bfd.mf.job.worker;
import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.service.QueryService;
import com.bfd.mf.job.service.query.QueryService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -31,13 +31,13 @@ public class QueryProducer extends AbstractWorker {
*/ */
@Override @Override
protected void work(String json) { protected void work(String json) {
LOGGER.info("[QueryProducer] work start ... ");
// LOGGER.info("[QueryProducer] work start ... ");
queryBacktraceService.tryAcquire(); queryBacktraceService.tryAcquire();
queryBacktraceService.produce(); queryBacktraceService.produce();
try {
Thread.sleep(300000);
} catch (InterruptedException e) {
e.printStackTrace();
}
// try {
// Thread.sleep(config.getIntervalTime());
// } catch (InterruptedException e) {
// e.printStackTrace();
// }
} }
} }

1
cl_query_data_job/src/main/java/com/bfd/mf/job/worker/ReadWriterOlyDataProducer.java

@ -1,7 +1,6 @@
package com.bfd.mf.job.worker; package com.bfd.mf.job.worker;
import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.service.BacktraceService;
import com.bfd.mf.job.service.WriterTXTService; import com.bfd.mf.job.service.WriterTXTService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;

40
cl_query_data_job/src/main/java/com/bfd/mf/job/worker/SQOutPutProducer.java

@ -0,0 +1,40 @@
package com.bfd.mf.job.worker;
import com.bfd.mf.job.config.AppConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
@Component
public class SQOutPutProducer extends AbstractWorker {
private static final Logger LOGGER = LoggerFactory.getLogger(SQOutPutProducer.class);
@Autowired
private AppConfig config;
@Override
protected Integer getThreadCount() {
return config.getQueryProducerThreadCount();
}
@Override
protected String getThreadNameFormat() {
return "backtrace-producer-%d";
}
/**
* 这个 是用来 做数据拉取的专门针对专题数据的拉取
*/
@Override
protected void work(String json) {
LOGGER.info("[SQ - OutPutProducer] work start ... ");
// outputService.tryAcquire();
// outputService.produce();
try {
Thread.sleep(config.getIntervalTime());
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

4
cl_query_data_job/src/main/java/com/bfd/mf/job/worker/StatisticsProducer.java

@ -1,7 +1,7 @@
package com.bfd.mf.job.worker; package com.bfd.mf.job.worker;
import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.AppConfig;
import com.bfd.mf.job.service.StatisticsService;
import com.bfd.mf.job.service.statistics.StatisticsService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -29,7 +29,7 @@ public class StatisticsProducer extends AbstractWorker {
LOGGER.info("[StatisticsProducer] work start ... "); LOGGER.info("[StatisticsProducer] work start ... ");
statisticsService.tryAcquire(); statisticsService.tryAcquire();
try { try {
Thread.sleep(3600000);
Thread.sleep(config.getIntervalTime());
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
} }

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save