diff --git a/.idea/.name b/.idea/.name deleted file mode 100644 index 0080ff1..0000000 --- a/.idea/.name +++ /dev/null @@ -1 +0,0 @@ -cl_stream_30 \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml index 5bef374..88f2527 100644 --- a/.idea/compiler.xml +++ b/.idea/compiler.xml @@ -13,7 +13,7 @@ - + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_alibaba_fastjson_1_2_6.xml b/.idea/libraries/Maven__com_alibaba_fastjson_1_2_60.xml similarity index 64% rename from .idea/libraries/Maven__com_alibaba_fastjson_1_2_6.xml rename to .idea/libraries/Maven__com_alibaba_fastjson_1_2_60.xml index 0064979..567a643 100644 --- a/.idea/libraries/Maven__com_alibaba_fastjson_1_2_6.xml +++ b/.idea/libraries/Maven__com_alibaba_fastjson_1_2_60.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_fasterxml_classmate_1_3_1.xml b/.idea/libraries/Maven__com_fasterxml_classmate_1_4_0.xml similarity index 63% rename from .idea/libraries/Maven__com_fasterxml_classmate_1_3_1.xml rename to .idea/libraries/Maven__com_fasterxml_classmate_1_4_0.xml index cfcab12..bbd3a88 100644 --- a/.idea/libraries/Maven__com_fasterxml_classmate_1_3_1.xml +++ b/.idea/libraries/Maven__com_fasterxml_classmate_1_4_0.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_5.xml b/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_6.xml similarity index 65% rename from .idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_5.xml rename to .idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_6.xml index 467b779..51087da 100644 --- a/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_5.xml +++ b/.idea/libraries/Maven__com_fasterxml_jackson_core_jackson_databind_2_9_6.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_github_virtuald_curvesapi_1_04.xml b/.idea/libraries/Maven__com_github_virtuald_curvesapi_1_06.xml similarity index 61% rename from .idea/libraries/Maven__com_github_virtuald_curvesapi_1_04.xml rename to .idea/libraries/Maven__com_github_virtuald_curvesapi_1_06.xml index 2db7431..d156c18 100644 --- a/.idea/libraries/Maven__com_github_virtuald_curvesapi_1_04.xml +++ b/.idea/libraries/Maven__com_github_virtuald_curvesapi_1_06.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml b/.idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml new file mode 100644 index 0000000..0e66824 --- /dev/null +++ b/.idea/libraries/Maven__com_google_code_findbugs_jsr305_1_3_9.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_google_errorprone_error_prone_annotations_2_1_3.xml b/.idea/libraries/Maven__com_google_errorprone_error_prone_annotations_2_1_3.xml new file mode 100644 index 0000000..2343a7f --- /dev/null +++ b/.idea/libraries/Maven__com_google_errorprone_error_prone_annotations_2_1_3.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_google_guava_guava_25_0_jre.xml b/.idea/libraries/Maven__com_google_guava_guava_25_0_jre.xml new file mode 100644 index 0000000..7de6f07 --- /dev/null +++ b/.idea/libraries/Maven__com_google_guava_guava_25_0_jre.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_google_j2objc_j2objc_annotations_1_1.xml b/.idea/libraries/Maven__com_google_j2objc_j2objc_annotations_1_1.xml new file mode 100644 index 0000000..c06f999 --- /dev/null +++ b/.idea/libraries/Maven__com_google_j2objc_j2objc_annotations_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_ibm_icu_icu4j_4_6.xml b/.idea/libraries/Maven__com_ibm_icu_icu4j_4_6.xml new file mode 100644 index 0000000..670405c --- /dev/null +++ b/.idea/libraries/Maven__com_ibm_icu_icu4j_4_6.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_monitorjbl_xlsx_streamer_2_1_0.xml b/.idea/libraries/Maven__com_monitorjbl_xlsx_streamer_2_1_0.xml new file mode 100644 index 0000000..8797c89 --- /dev/null +++ b/.idea/libraries/Maven__com_monitorjbl_xlsx_streamer_2_1_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_rackspace_apache_xerces2_xsd11_2_11_1.xml b/.idea/libraries/Maven__com_rackspace_apache_xerces2_xsd11_2_11_1.xml new file mode 100644 index 0000000..50545e3 --- /dev/null +++ b/.idea/libraries/Maven__com_rackspace_apache_xerces2_xsd11_2_11_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_rackspace_eclipse_webtools_sourceediting_org_eclipse_wst_xml_xpath2_processor_2_1_100.xml b/.idea/libraries/Maven__com_rackspace_eclipse_webtools_sourceediting_org_eclipse_wst_xml_xpath2_processor_2_1_100.xml new file mode 100644 index 0000000..5624ade --- /dev/null +++ b/.idea/libraries/Maven__com_rackspace_eclipse_webtools_sourceediting_org_eclipse_wst_xml_xpath2_processor_2_1_100.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_9_1.xml b/.idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_6_0.xml similarity index 62% rename from .idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_9_1.xml rename to .idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_6_0.xml index ba9cb81..58edcd1 100644 --- a/.idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_9_1.xml +++ b/.idea/libraries/Maven__com_squareup_okhttp3_okhttp_3_6_0.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_squareup_okio_okio_1_13_0.xml b/.idea/libraries/Maven__com_squareup_okio_okio_1_11_0.xml similarity index 65% rename from .idea/libraries/Maven__com_squareup_okio_okio_1_13_0.xml rename to .idea/libraries/Maven__com_squareup_okio_okio_1_11_0.xml index cb99e2f..0ddf7cf 100644 --- a/.idea/libraries/Maven__com_squareup_okio_okio_1_13_0.xml +++ b/.idea/libraries/Maven__com_squareup_okio_okio_1_11_0.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_mail_javax_mail_1_6_2.xml b/.idea/libraries/Maven__com_sun_mail_javax_mail_1_6_2.xml new file mode 100644 index 0000000..f5980e4 --- /dev/null +++ b/.idea/libraries/Maven__com_sun_mail_javax_mail_1_6_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_swagger_ui_swagger_bootstrap_ui_1_8_8.xml b/.idea/libraries/Maven__com_swagger_ui_swagger_bootstrap_ui_1_8_8.xml deleted file mode 100644 index b8ce1b6..0000000 --- a/.idea/libraries/Maven__com_swagger_ui_swagger_bootstrap_ui_1_8_8.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_codec_commons_codec_1_10.xml b/.idea/libraries/Maven__commons_codec_commons_codec_1_12.xml similarity index 61% rename from .idea/libraries/Maven__commons_codec_commons_codec_1_10.xml rename to .idea/libraries/Maven__commons_codec_commons_codec_1_12.xml index 27424a1..0953394 100644 --- a/.idea/libraries/Maven__commons_codec_commons_codec_1_10.xml +++ b/.idea/libraries/Maven__commons_codec_commons_codec_1_12.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__edu_princeton_cup_java_cup_10k.xml b/.idea/libraries/Maven__edu_princeton_cup_java_cup_10k.xml new file mode 100644 index 0000000..12ad41d --- /dev/null +++ b/.idea/libraries/Maven__edu_princeton_cup_java_cup_10k.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_springfox_springfox_core_2_6_1.xml b/.idea/libraries/Maven__io_springfox_springfox_core_2_9_2.xml similarity index 59% rename from .idea/libraries/Maven__io_springfox_springfox_core_2_6_1.xml rename to .idea/libraries/Maven__io_springfox_springfox_core_2_9_2.xml index 6b4a6c2..03588dc 100644 --- a/.idea/libraries/Maven__io_springfox_springfox_core_2_6_1.xml +++ b/.idea/libraries/Maven__io_springfox_springfox_core_2_9_2.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_springfox_springfox_schema_2_6_1.xml b/.idea/libraries/Maven__io_springfox_springfox_schema_2_9_2.xml similarity index 58% rename from .idea/libraries/Maven__io_springfox_springfox_schema_2_6_1.xml rename to .idea/libraries/Maven__io_springfox_springfox_schema_2_9_2.xml index 9e4a94d..4a47148 100644 --- a/.idea/libraries/Maven__io_springfox_springfox_schema_2_6_1.xml +++ b/.idea/libraries/Maven__io_springfox_springfox_schema_2_9_2.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_springfox_springfox_spi_2_6_1.xml b/.idea/libraries/Maven__io_springfox_springfox_spi_2_9_2.xml similarity index 60% rename from .idea/libraries/Maven__io_springfox_springfox_spi_2_6_1.xml rename to .idea/libraries/Maven__io_springfox_springfox_spi_2_9_2.xml index 6458cbb..f934649 100644 --- a/.idea/libraries/Maven__io_springfox_springfox_spi_2_6_1.xml +++ b/.idea/libraries/Maven__io_springfox_springfox_spi_2_9_2.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_springfox_springfox_spring_web_2_6_1.xml b/.idea/libraries/Maven__io_springfox_springfox_spring_web_2_9_2.xml similarity index 55% rename from .idea/libraries/Maven__io_springfox_springfox_spring_web_2_6_1.xml rename to .idea/libraries/Maven__io_springfox_springfox_spring_web_2_9_2.xml index 5c4d152..7898a19 100644 --- a/.idea/libraries/Maven__io_springfox_springfox_spring_web_2_6_1.xml +++ b/.idea/libraries/Maven__io_springfox_springfox_spring_web_2_9_2.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_springfox_springfox_swagger2_2_6_1.xml b/.idea/libraries/Maven__io_springfox_springfox_swagger2_2_9_2.xml similarity index 56% rename from .idea/libraries/Maven__io_springfox_springfox_swagger2_2_6_1.xml rename to .idea/libraries/Maven__io_springfox_springfox_swagger2_2_9_2.xml index 3d5a04d..4ed7d0d 100644 --- a/.idea/libraries/Maven__io_springfox_springfox_swagger2_2_6_1.xml +++ b/.idea/libraries/Maven__io_springfox_springfox_swagger2_2_9_2.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_springfox_springfox_swagger_common_2_6_1.xml b/.idea/libraries/Maven__io_springfox_springfox_swagger_common_2_9_2.xml similarity index 65% rename from .idea/libraries/Maven__io_springfox_springfox_swagger_common_2_6_1.xml rename to .idea/libraries/Maven__io_springfox_springfox_swagger_common_2_9_2.xml index a3eee0b..c40ef9f 100644 --- a/.idea/libraries/Maven__io_springfox_springfox_swagger_common_2_6_1.xml +++ b/.idea/libraries/Maven__io_springfox_springfox_swagger_common_2_9_2.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_springfox_springfox_swagger_ui_2_9_2.xml b/.idea/libraries/Maven__io_springfox_springfox_swagger_ui_2_9_2.xml new file mode 100644 index 0000000..0700f01 --- /dev/null +++ b/.idea/libraries/Maven__io_springfox_springfox_swagger_ui_2_9_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_swagger_swagger_annotations_1_5_10.xml b/.idea/libraries/Maven__io_swagger_swagger_annotations_1_5_20.xml similarity index 56% rename from .idea/libraries/Maven__io_swagger_swagger_annotations_1_5_10.xml rename to .idea/libraries/Maven__io_swagger_swagger_annotations_1_5_20.xml index 0097e48..1c4a621 100644 --- a/.idea/libraries/Maven__io_swagger_swagger_annotations_1_5_10.xml +++ b/.idea/libraries/Maven__io_swagger_swagger_annotations_1_5_20.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__io_swagger_swagger_models_1_5_10.xml b/.idea/libraries/Maven__io_swagger_swagger_models_1_5_20.xml similarity index 60% rename from .idea/libraries/Maven__io_swagger_swagger_models_1_5_10.xml rename to .idea/libraries/Maven__io_swagger_swagger_models_1_5_20.xml index cd838bb..395508f 100644 --- a/.idea/libraries/Maven__io_swagger_swagger_models_1_5_10.xml +++ b/.idea/libraries/Maven__io_swagger_swagger_models_1_5_20.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__it_sauronsoftware_jave_1_0_2.xml b/.idea/libraries/Maven__it_sauronsoftware_jave_1_0_2.xml new file mode 100644 index 0000000..85fabc6 --- /dev/null +++ b/.idea/libraries/Maven__it_sauronsoftware_jave_1_0_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_activation_activation_1_1.xml b/.idea/libraries/Maven__javax_activation_activation_1_1.xml new file mode 100644 index 0000000..180d587 --- /dev/null +++ b/.idea/libraries/Maven__javax_activation_activation_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_mail_javax_mail_api_1_6_2.xml b/.idea/libraries/Maven__javax_mail_javax_mail_api_1_6_2.xml new file mode 100644 index 0000000..b9b6e8f --- /dev/null +++ b/.idea/libraries/Maven__javax_mail_javax_mail_api_1_6_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_collections4_4_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_collections4_4_3.xml similarity index 65% rename from .idea/libraries/Maven__org_apache_commons_commons_collections4_4_1.xml rename to .idea/libraries/Maven__org_apache_commons_commons_collections4_4_3.xml index 1e0b53d..93ddf9a 100644 --- a/.idea/libraries/Maven__org_apache_commons_commons_collections4_4_1.xml +++ b/.idea/libraries/Maven__org_apache_commons_commons_collections4_4_3.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_compress_1_18.xml b/.idea/libraries/Maven__org_apache_commons_commons_compress_1_18.xml new file mode 100644 index 0000000..cdc29f4 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_compress_1_18.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math3_3_6_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_6_1.xml new file mode 100644 index 0000000..ebfe0a8 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_6_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_poi_poi_3_15.xml b/.idea/libraries/Maven__org_apache_poi_poi_3_15.xml deleted file mode 100644 index 7c6539d..0000000 --- a/.idea/libraries/Maven__org_apache_poi_poi_3_15.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_poi_poi_4_1_0.xml b/.idea/libraries/Maven__org_apache_poi_poi_4_1_0.xml new file mode 100644 index 0000000..1a04a9b --- /dev/null +++ b/.idea/libraries/Maven__org_apache_poi_poi_4_1_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_poi_poi_ooxml_3_15.xml b/.idea/libraries/Maven__org_apache_poi_poi_ooxml_4_1_0.xml similarity index 63% rename from .idea/libraries/Maven__org_apache_poi_poi_ooxml_3_15.xml rename to .idea/libraries/Maven__org_apache_poi_poi_ooxml_4_1_0.xml index ce194b5..b2b940d 100644 --- a/.idea/libraries/Maven__org_apache_poi_poi_ooxml_3_15.xml +++ b/.idea/libraries/Maven__org_apache_poi_poi_ooxml_4_1_0.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_3_15.xml b/.idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_4_1_0.xml similarity index 56% rename from .idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_3_15.xml rename to .idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_4_1_0.xml index 374f748..674fc7f 100644 --- a/.idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_3_15.xml +++ b/.idea/libraries/Maven__org_apache_poi_poi_ooxml_schemas_4_1_0.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_2_6_0.xml b/.idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_3_1_0.xml similarity index 61% rename from .idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_2_6_0.xml rename to .idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_3_1_0.xml index cb0f76d..0695d4b 100644 --- a/.idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_2_6_0.xml +++ b/.idea/libraries/Maven__org_apache_xmlbeans_xmlbeans_3_1_0.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_checkerframework_checker_compat_qual_2_0_0.xml b/.idea/libraries/Maven__org_checkerframework_checker_compat_qual_2_0_0.xml new file mode 100644 index 0000000..29defe6 --- /dev/null +++ b/.idea/libraries/Maven__org_checkerframework_checker_compat_qual_2_0_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_mojo_animal_sniffer_annotations_1_14.xml b/.idea/libraries/Maven__org_codehaus_mojo_animal_sniffer_annotations_1_14.xml new file mode 100644 index 0000000..1af11e4 --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_mojo_animal_sniffer_annotations_1_14.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_mapstruct_mapstruct_1_0_0_Final.xml b/.idea/libraries/Maven__org_mapstruct_mapstruct_1_2_0_Final.xml similarity index 58% rename from .idea/libraries/Maven__org_mapstruct_mapstruct_1_0_0_Final.xml rename to .idea/libraries/Maven__org_mapstruct_mapstruct_1_2_0_Final.xml index 5599974..87cb599 100644 --- a/.idea/libraries/Maven__org_mapstruct_mapstruct_1_0_0_Final.xml +++ b/.idea/libraries/Maven__org_mapstruct_mapstruct_1_2_0_Final.xml @@ -1,13 +1,13 @@ - + - + - + - + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_12.xml b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_12.xml new file mode 100644 index 0000000..411efc6 --- /dev/null +++ b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_12.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__stax_stax_api_1_0_1.xml b/.idea/libraries/Maven__stax_stax_api_1_0_1.xml deleted file mode 100644 index 0b13335..0000000 --- a/.idea/libraries/Maven__stax_stax_api_1_0_1.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/libraries/Maven__xml_apis_xml_apis_1_4_01.xml b/.idea/libraries/Maven__xml_apis_xml_apis_1_4_01.xml new file mode 100644 index 0000000..9aadd29 --- /dev/null +++ b/.idea/libraries/Maven__xml_apis_xml_apis_1_4_01.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xml_resolver_xml_resolver_1_2.xml b/.idea/libraries/Maven__xml_resolver_xml_resolver_1_2.xml new file mode 100644 index 0000000..3c94b04 --- /dev/null +++ b/.idea/libraries/Maven__xml_resolver_xml_resolver_1_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml index ae08aaa..d6ff43e 100644 --- a/.idea/modules.xml +++ b/.idea/modules.xml @@ -4,7 +4,7 @@ - + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 70c1f8f..94a25f7 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,7 +2,5 @@ - - \ No newline at end of file diff --git a/cl_query_data_job/cl_query_data_job.iml b/cl_query_data_job/cl_query_data_job.iml index 683f253..2a965b9 100644 --- a/cl_query_data_job/cl_query_data_job.iml +++ b/cl_query_data_job/cl_query_data_job.iml @@ -79,7 +79,7 @@ - + @@ -128,11 +128,8 @@ - - - @@ -157,5 +154,51 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cl_query_data_job/pom.xml b/cl_query_data_job/pom.xml index 98edc7b..07bfe79 100644 --- a/cl_query_data_job/pom.xml +++ b/cl_query_data_job/pom.xml @@ -4,9 +4,9 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> 4.0.0 - cl_stream_3.0 + cl_stream_4.0 com.bfd.mf - 3.0-SNAPSHOT + 4.0-SNAPSHOT cl_query_data_job @@ -72,10 +72,15 @@ 19.0 + + + + + com.alibaba fastjson - 1.2.6 + 1.2.60 @@ -100,13 +105,6 @@ 6.0.0 - - - com.squareup.okhttp3 - okhttp - 3.9.1 - - org.apache.kafka kafka-clients @@ -138,6 +136,66 @@ kafka 0.10 + + org.apache.poi + poi + 4.1.0 + + + org.apache.poi + poi-ooxml + 4.1.0 + + + com.monitorjbl + xlsx-streamer + 2.1.0 + + + com.squareup.okhttp3 + okhttp + 3.6.0 + + + + it.sauronsoftware + jave + 1.0.2 + + + com.bfd.nlp + nlp_common_util + 1.1 + + + httpclient + org.apache.httpcomponents + + + + + + + logback-classic + ch.qos.logback + + + + + + + javax.mail + javax.mail-api + 1.6.2 + + + + com.sun.mail + javax.mail + 1.6.2 + + + diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/Application.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/Application.java index 14df1e0..a5df4ae 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/Application.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/Application.java @@ -23,6 +23,12 @@ public class Application { private QueryProducer queryProducer; @Autowired private StatisticsProducer statisticsProducer; + @Autowired + private UpLoadProducer upLoadProducer; + @Autowired + private TaskCountProducer taskCountProducer; + @Autowired + private AlarmProducer alarmProducer; public static void main(String[] args) { ConfigurableApplicationContext context = SpringApplication.run(Application.class, args); @@ -42,7 +48,19 @@ public class Application { } if(config.getEnableBacktraceProducer()){ backtraceProducer.start(); - LOGGER.info("---- Statistics producer started successfully. ----"); + LOGGER.info("---- Backtrace producer started successfully. ----"); + } + if(config.getEnableUpLoadProducer()){ + upLoadProducer.start(); + LOGGER.info("---- Backtrace producer started successfully. ----"); + } + if(config.getEnableTaskcountProducer()){ + taskCountProducer.start(); + LOGGER.info("---- TaskCount producer started successfully. ----"); + } + if(config.getEnableAlarmProducer()){ + alarmProducer.start(); + LOGGER.info("---- Alarm producer started successfully. ----"); } LOGGER.info("---- Application started successfully. ----"); diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/AllKeys.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/AllKeys.java new file mode 100644 index 0000000..1713b83 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/AllKeys.java @@ -0,0 +1,162 @@ +package com.bfd.mf.job.config; + +import com.bfd.mf.job.util.DateUtil; + +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; + +public class AllKeys { + private static Map map = new HashMap(); + + public static Map getMap() { + return map; + } + + public static void setMap(Map map) { + AllKeys.map = map; + } + + static { + map.put("_id_",""); + map.put("age",""); + map.put("area",""); + map.put("attitudesCount",""); + map.put("attr",""); + map.put("author",""); + map.put("authorId",""); + map.put("authorLevel",""); + map.put("authornickname",""); + map.put("availability",0); + map.put("avatar",""); + map.put("brand",""); + map.put("brandId",""); + map.put("cate",""); + map.put("channel",""); + map.put("city",""); + map.put("collectCount",0); + map.put("commentId",""); + map.put("commentScore",0); + map.put("commentsCount",0); + map.put("commentUrl",""); + map.put("content",""); + map.put("contentLength",0); + map.put("contentSimHash",""); + map.put("contentTag",""); + map.put("country",""); + map.put("crawlDataFlag",""); + map.put("crawlDate",new Date ()); + map.put("crawlDay",0L); + map.put("crawlTime",0L); + map.put("crawlTimeStr",""); + map.put("createDate",new Date ()); + map.put("createDay",0L); + map.put("createTime",0L); + map.put("createTimeStr",""); + map.put("dataCount",0); + map.put("dataId",""); + map.put("docId",""); + map.put("docType",""); + map.put("downCnt",0); + map.put("egc",0); + map.put("enSource",""); + map.put("expression",new ArrayList<>()); + map.put("extension",""); + map.put("fansCount",""); + map.put("favorCnt",0); + map.put("filePath",new ArrayList<>()); + map.put("imagePath",new ArrayList<>()); + map.put("videoPath",new ArrayList<>()); + map.put("filePathSize",new ArrayList<>()); + map.put("imagePathSize",new ArrayList<>()); + map.put("videoPathSize",new ArrayList<>()); + map.put("finalPhrase",""); + map.put("firstListBrand",""); + map.put("fiveListBrand",""); + map.put("forumScore",""); + map.put("forwardAttitudesCount",0); + map.put("forwardAuthor",""); + map.put("forwardAvatar",""); + map.put("forwardCommentsCount",0); + map.put("forwardContent",""); + map.put("forwardImgs",""); + map.put("forwardPostSource",""); + map.put("forwardPubTime",0L); + map.put("forwardQuoteCount",0); + map.put("forwardUrl",""); + map.put("forwardUserId",""); + map.put("forwardUserType",0); + map.put("forwardUserUrl",""); + map.put("fourListBrand",""); + map.put("friendsCount",""); + map.put("getSource",""); + map.put("hashTag",new ArrayList<>()); + map.put("hlKeywords",new ArrayList<>()); + map.put("impression",""); + map.put("isDownload",false); + map.put("isVip",0); + map.put("language",""); + map.put("lastModifiedTime",0L); + map.put("listBrand",""); + map.put("location",""); + map.put("nomorprice",0); + map.put("opinions",new ArrayList<>()); + map.put("originalPhrase",""); + map.put("otherSourceJson",""); + map.put("pageCommentCount",0); + map.put("pageTranspondCount",0); + map.put("pageType",""); + map.put("pgc",0); + map.put("pictureList",""); + map.put("places",new ArrayList<>()); + map.put("postCount",""); + map.put("postId",""); + map.put("postSource",""); + map.put("price",0); + map.put("primary",1); + map.put("productParameter",""); + map.put("projectName",""); + map.put("promotionInfo",""); + map.put("province",""); + map.put("pubDate",new Date()); + map.put("pubDay", DateUtil.getcurr()); + map.put("pubTime",DateUtil.getcurr()); + map.put("pubTimeStr", DateUtil.getDateTime()); + map.put("quoteCount",0); + map.put("readCount",0); + map.put("resolution",""); + map.put("secondListBrand",""); + map.put("sex",""); + map.put("sign",""); + map.put("siteId",""); + map.put("skuProperties",""); + map.put("smallImgs",""); + map.put("source",""); + map.put("sysAbstract",""); + map.put("sysKeywords",""); + map.put("sysSentiment",0.0); + map.put("threeListBrand",""); + map.put("thumbnails",""); + map.put("title",""); + map.put("titleLength",0); + map.put("titleSimHash",""); + map.put("translateContent",""); + map.put("translateTitle",""); + map.put("ugc",0); + map.put("url",""); + map.put("urlHash",""); + map.put("userType",""); + map.put("userUrl",""); + map.put("videoTime",""); + map.put("videoUrl",""); + map.put("avatarPath",""); + map.put("viewCnt",0); + map.put("channelNum",""); + map.put("crawlDataFlagType",""); + map.put("primaryPost",""); + map.put("dns",""); + map.put("asrText",""); + map.put("ocrText",new ArrayList<>()); + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/AppConfig.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/AppConfig.java index 7d50c21..1154b30 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/AppConfig.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/AppConfig.java @@ -1,8 +1,5 @@ package com.bfd.mf.job.config; -import com.alibaba.fastjson.JSON; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.joda.time.Instant; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Configuration; @@ -26,6 +23,7 @@ public class AppConfig { public static final String SEPARATOR_UP = "丄"; public static final String SEPARATOR_DOWN = "丅"; public static final String CL_INDEX= "cl_index"; + public static final String SEPARATOR = "_"; // 从配置文件中读的参数 private static final String CONFIG_ES_CLUSTER_NAME = "name"; @@ -41,19 +39,34 @@ public class AppConfig { private Integer testThreadCount; private Long testTaskId; private String brokerList; + private String sendTopic; private List analysisTopic; private String analysisGroup; - // private Boolean enableAnalysisProducer; -// private Boolean enableAnalysisConsumer; -// private Integer analysisProducerThreadCount; -// private Integer analysisConsumerThreadCount; private Boolean enableStatisticsProducer; // 离线查询统计服务的状态 private Boolean enableQueryProducer; // 离线查询数据服务的状态 private Boolean enableBacktraceProducer; // 离线拉取数据服务的状态(欧莱雅) + private Boolean enableUpLoadProducer; + private Boolean enableOutputProducer; + private Boolean enableTaskcountProducer; + private Boolean enableAlarmProducer; private Integer statisticsProducerThreadCount; // 离线查询统计服务的线程数 private Integer queryProducerThreadCount; private Integer backtraceProducerThreadCount; - // private Boolean enableCompany; + private Integer upLoadProducerThreadCount; + private Integer outputProducerThreadCount; + private Integer taskcountProducerThreadCount; + private Integer alarmProducerThreadCount; + private String goFastPostUrl; + private String goFastDomain; + private String uploadOLYExcelPath; + private String uploadZipPath; + private String indexNamePre; + + // private Boolean enableAnalysisProducer; +// private Boolean enableAnalysisConsumer; +// private Integer analysisProducerThreadCount; +// private Integer analysisConsumerThreadCount; +// private Boolean enableCompany; // private Integer companyThreadCount; // private Boolean enableCompanyProducer; // private Boolean enableCompanyConsumer; @@ -61,6 +74,8 @@ public class AppConfig { // private Integer companyConsumerThreadCount; // private Boolean enableZombie; private Integer periodS; + private Long intervalTime; + private Long queryDataYearStarttime; private String ruleRest; private String commentRest; private Integer ruleRestConcurrency; @@ -68,8 +83,25 @@ public class AppConfig { private Integer failureUpper; private Map esNormal; private Map esMini; + private Map esLogstash; + public Boolean getEnableAlarmProducer() { + return enableAlarmProducer; + } + + public void setEnableAlarmProducer(Boolean enableAlarmProducer) { + this.enableAlarmProducer = enableAlarmProducer; + } + + public Integer getAlarmProducerThreadCount() { + return alarmProducerThreadCount; + } + + public void setAlarmProducerThreadCount(Integer alarmProducerThreadCount) { + this.alarmProducerThreadCount = alarmProducerThreadCount; + } + public Integer getPeriodS() { return periodS; } @@ -78,6 +110,22 @@ public class AppConfig { this.periodS = periodS; } + public Long getIntervalTime() { + return intervalTime; + } + + public void setIntervalTime(Long intervalTime) { + this.intervalTime = intervalTime; + } + + public Long getQueryDataYearStarttime() { + return queryDataYearStarttime; + } + + public void setQueryDataYearStarttime(Long queryDataYearStarttime) { + this.queryDataYearStarttime = queryDataYearStarttime; + } + public Integer getQueryProducerThreadCount() { return queryProducerThreadCount; } @@ -118,6 +166,22 @@ public class AppConfig { this.enableStatisticsProducer = enableStatisticsProducer; } + public Boolean getEnableTaskcountProducer() { + return enableTaskcountProducer; + } + + public void setEnableTaskcountProducer(Boolean enableTaskcountProducer) { + this.enableTaskcountProducer = enableTaskcountProducer; + } + + public Integer getTaskcountProducerThreadCount() { + return taskcountProducerThreadCount; + } + + public void setTaskcountProducerThreadCount(Integer taskcountProducerThreadCount) { + this.taskcountProducerThreadCount = taskcountProducerThreadCount; + } + public Integer getStatisticsProducerThreadCount() { return statisticsProducerThreadCount; } @@ -126,6 +190,38 @@ public class AppConfig { this.statisticsProducerThreadCount = statisticsProducerThreadCount; } + public Boolean getEnableUpLoadProducer() { + return enableUpLoadProducer; + } + + public void setEnableUpLoadProducer(Boolean enableUpLoadProducer) { + this.enableUpLoadProducer = enableUpLoadProducer; + } + + public Integer getUpLoadProducerThreadCount() { + return upLoadProducerThreadCount; + } + + public void setUpLoadProducerThreadCount(Integer upLoadProducerThreadCount) { + this.upLoadProducerThreadCount = upLoadProducerThreadCount; + } + + public Boolean getEnableOutputProducer() { + return enableOutputProducer; + } + + public void setEnableOutputProducer(Boolean enableOutputProducer) { + this.enableOutputProducer = enableOutputProducer; + } + + public Integer getOutputProducerThreadCount() { + return outputProducerThreadCount; + } + + public void setOutputProducerThreadCount(Integer outputProducerThreadCount) { + this.outputProducerThreadCount = outputProducerThreadCount; + } + public String getVersion() { return version; } @@ -166,6 +262,14 @@ public class AppConfig { this.brokerList = brokerList; } + public String getSendTopic() { + return sendTopic; + } + + public void setSendTopic(String sendTopic) { + this.sendTopic = sendTopic; + } + public List getAnalysisTopic() { return analysisTopic; } @@ -182,7 +286,6 @@ public class AppConfig { this.analysisGroup = analysisGroup; } - public String getRuleRest() { return ruleRest; } @@ -215,6 +318,31 @@ public class AppConfig { this.failureUpper = failureUpper; } + public Map getEsLogstash() { + return esLogstash; + } + + public void setEsLogstash(Map esLogstash) { + this.esLogstash = esLogstash; + } + + public String esLogstashClusterName() { + return (String) esLogstash.get(CONFIG_ES_CLUSTER_NAME); + } + + public String[] esLogstashAddress() { + return ((String) esLogstash.get(CONFIG_ES_ADDRESS)).split(","); + } + + public Long esLogstashUpper() { + String upper = (String) esLogstash.get(CONFIG_ES_SOURCE_UPPER); + return Instant.parse(upper).getMillis(); + } + + public String esLogstashStandby() { + return (String) esLogstash.get(CONFIG_ES_SOURCE_STANDBY); + } + public Map getEsNormal() { return esNormal; } @@ -271,6 +399,55 @@ public class AppConfig { return size; } + public String getCommentRest() { + return commentRest; + } + + public void setCommentRest(String commentRest) { + this.commentRest = commentRest; + } + + public String getGoFastPostUrl() { + return goFastPostUrl; + } + + public void setGoFastPostUrl(String goFastPostUrl) { + this.goFastPostUrl = goFastPostUrl; + } + + public String getGoFastDomain() { + return goFastDomain; + } + + public void setGoFastDomain(String goFastDomain) { + this.goFastDomain = goFastDomain; + } + + public String getUploadOLYExcelPath() { + return uploadOLYExcelPath; + } + + public void setUploadOLYExcelPath(String uploadOLYExcelPath) { + this.uploadOLYExcelPath = uploadOLYExcelPath; + } + + public String getUploadZipPath() { + return uploadZipPath; + } + + public void setUploadZipPath(String uploadZipPath) { + this.uploadZipPath = uploadZipPath; + } + + public String getIndexNamePre() { + return indexNamePre; + } + + public void setIndexNamePre(String indexNamePre) { + this.indexNamePre = indexNamePre; + } + + public void verify() { @@ -280,34 +457,24 @@ public class AppConfig { Assert.isTrue(testTaskId > 0, "Config testTaskId must gt 0"); } Assert.hasLength(brokerList, "Config brokerList must not be empty"); - if(enableStatisticsProducer){ + if(enableStatisticsProducer){ // 统计 Assert.isTrue(statisticsProducerThreadCount > 0, "Config statisticsProducerThreadCount must gt 0"); } - if(enableQueryProducer){ - Assert.isTrue(queryProducerThreadCount > 0, "Config statisticsProducerThreadCount must gt 0"); + if(enableQueryProducer){ // 离线拉数 + Assert.isTrue(queryProducerThreadCount > 0, "Config queryProducerThreadCount must gt 0"); + } + if(enableBacktraceProducer){ // 欧莱雅离线拉数 + Assert.isTrue(backtraceProducerThreadCount > 0, "Config backtraceProducerThreadCount must gt 0"); + } + if(enableUpLoadProducer){ // 数据上传 + Assert.isTrue(upLoadProducerThreadCount > 0, "Config upLoadProducerThreadCount must gt 0"); + } + if(enableTaskcountProducer){ // 驾驶舱-每天的平均任务量统计 + Assert.isTrue(taskcountProducerThreadCount > 0, "Config taskcountProducerThreadCount must gt 0"); } - if(enableStatisticsProducer){ - Assert.isTrue(backtraceProducerThreadCount > 0, "Config statisticsProducerThreadCount must gt 0"); + if(enableAlarmProducer){ // 驾驶舱-每天的平均任务量统计 + Assert.isTrue(alarmProducerThreadCount > 0, "Config alarmProducerThreadCount must gt 0"); } -// if (enableAnalysisProducer) { -// Assert.isTrue(analysisProducerThreadCount > 0, "Config analysisProducerThreadCount must gt 0"); -// Assert.notEmpty(analysisTopic, "Config analysisTopic must not be empty."); -// } -// if (enableAnalysisConsumer) { -// Assert.isTrue(analysisConsumerThreadCount > 0, "Config analysisConsumerThreadCount must gt 0"); -// Assert.hasLength(analysisGroup, "Config analysisGroup must not be empty."); -// } -// if (enableCompany) { -// Assert.isTrue(companyThreadCount > 0, "Config companyThreadCount must gt 0"); -// } -// if (enableCompanyProducer) { -// Assert.isTrue(companyProducerThreadCount > 0, "Config companyProducerThreadCount must gt 0"); -// -// } -// if (enableCompanyConsumer) { -// Assert.isTrue(companyConsumerThreadCount > 0, "Config companyConsumerThreadCount must gt 0"); -// -// } Assert.isTrue(periodS > 0, "Config periodS must gt 0"); Assert.hasLength(ruleRest, "Config ruleRest must not be empty"); Assert.isTrue(ruleRestConcurrency > 0, "Config ruleRestConcurrency must gt 0"); @@ -317,38 +484,7 @@ public class AppConfig { Assert.notEmpty(esMini, "Config esTarget must not be empty"); } - public String getCommentRest() { - return commentRest; - } - public void setCommentRest(String commentRest) { - this.commentRest = commentRest; - } -// public Map getRedis() { -// return redis; -// } -// public void setRedis(Map redis) { -// this.redis = redis; -// } - -// public String redisModel() { -// return (String) redis.get(REDIS_MODEL); -// } -// public String redisZkadress() { -// return (String) redis.get(REDIS_ZKADRESS); -// } -// public String redisZksessiontimeoutms() { -// return (String) redis.get(REDIS_ZKSESSIONTIMEOUTMS); -// } -// public String redisProxypath() { -// return (String) redis.get(REDIS_PORT); -// } -// public String redisAddress() { -// return (String) redis.get(REDIS_ADDRESS); -// } -// public String redisPort() { -// return (String) redis.get(REDIS_PORT); -// } } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/BFDApiConfig.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/BFDApiConfig.java deleted file mode 100644 index af24f24..0000000 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/BFDApiConfig.java +++ /dev/null @@ -1,1116 +0,0 @@ -package com.bfd.mf.job.config; - -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.springframework.boot.context.properties.ConfigurationProperties; -import org.springframework.boot.context.properties.EnableConfigurationProperties; -import org.springframework.context.annotation.Configuration; -import org.springframework.validation.annotation.Validated; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -@Configuration -@EnableConfigurationProperties -@ConfigurationProperties(prefix = "bfd.api.mf") -@Validated -public class BFDApiConfig { - - private ThreadConfig threadConfig; - private Download download; - private ES es; - private Data data; - private Image image; - private Email email; - private Thrift thrift; - private ManualImage manualImage; - private ReportImage reportImage; - private String restBaseUrl; - private String webBaseUrl; - private String metaSearchUrl; - private String foreignMetaSearchUrl; - private String restDataApi; - private String similarityMonitorWarnNumber; - private String similarityMonitorSplitNumber; - private String mergeSegmentsIndexNumber; - private String maxEsNumber; - private String weiboChannelTimeUrl; - private Screen screen; - private int defaultCacheLimitDay; - private Integer failedTaskRetryNum; - private Integer dataCompareCount; - private String weChatDomainName; - private String textPostUrl; - private String emotionPostUrl; - private String wordCloudPostUrl; - private Integer emotionPostType; - private boolean codisCacheFlag = false; - private ReportBehaviorInterface reportBehaviorInterface; - /** - * 访客 IP 白名单 - */ - private ArrayList accessWhiteList; - /** - * 请求 内网地址 白名单 - */ - private ArrayList accessRequestUrl; - /** - * 会话 session 过期时间 6小时 60*60*6 - */ - private int cacheTimeout = 21600; - /** - * 单次批量收藏最大数量 - */ - private int maxFavourNumber = 1000; - /** - * 微信简报对应渠道的查询数目 - */ - private int wechatNumber = 20; - private Integer filterType; - private Integer queryType; - private Boolean bigram; - /** - * 数据监控页数据缓存时间: seconds - */ - public Integer cacheAtDataMonitor; - - public Map esMini; - public Map esNormal; - - public String esMiniName() { - return (String) esMini.get("name"); - } - - public String[] esMiniAddress() { - return ((String) esMini.get("address")).split(","); - } - - public String esNormalName() { - return (String) esNormal.get("name"); - } - - public String[] esNormalAddress() { - return ((String) esNormal.get("address")).split(","); - } - - public Map getEsMini() { - return esMini; - } - - public void setEsMini(Map esMini) { - this.esMini = esMini; - } - - public Map getEsNormal() { - return esNormal; - } - - public void setEsNormal(Map esNormal) { - this.esNormal = esNormal; - } - - public Integer getEmotionPostType() { - return emotionPostType; - } - - public void setEmotionPostType(Integer emotionPostType) { - this.emotionPostType = emotionPostType; - } - - public String getWordCloudPostUrl() { - return wordCloudPostUrl; - } - - public void setWordCloudPostUrl(String wordCloudPostUrl) { - this.wordCloudPostUrl = wordCloudPostUrl; - } - - public String getTextPostUrl() { - return textPostUrl; - } - - public void setTextPostUrl(String textPostUrl) { - this.textPostUrl = textPostUrl; - } - - public String getEmotionPostUrl() { - return emotionPostUrl; - } - - public void setEmotionPostUrl(String emotionPostUrl) { - this.emotionPostUrl = emotionPostUrl; - } - - public String getSimilarityMonitorWarnNumber() { - return similarityMonitorWarnNumber; - } - - public void setSimilarityMonitorWarnNumber(String similarityMonitorWarnNumber) { - this.similarityMonitorWarnNumber = similarityMonitorWarnNumber; - } - - public String getSimilarityMonitorSplitNumber() { - return similarityMonitorSplitNumber; - } - - public void setSimilarityMonitorSplitNumber(String similarityMonitorSplitNumber) { - this.similarityMonitorSplitNumber = similarityMonitorSplitNumber; - } - - public String getWeiboChannelTimeUrl() { - return weiboChannelTimeUrl; - } - - public void setWeiboChannelTimeUrl(String weiboChannelTimeUrl) { - this.weiboChannelTimeUrl = weiboChannelTimeUrl; - } - - public String getMaxEsNumber() { - return maxEsNumber; - } - - public void setMaxEsNumber(String maxEsNumber) { - this.maxEsNumber = maxEsNumber; - } - - public String getMergeSegmentsIndexNumber() { - return mergeSegmentsIndexNumber; - } - - public void setMergeSegmentsIndexNumber(String mergeSegmentsIndexNumber) { - this.mergeSegmentsIndexNumber = mergeSegmentsIndexNumber; - } - - public String getRestDataApi() { - return restDataApi; - } - - public void setRestDataApi(String restDataApi) { - this.restDataApi = restDataApi; - } - - public Integer getDataCompareCount() { - return dataCompareCount; - } - - public void setDataCompareCount(Integer dataCompareCount) { - this.dataCompareCount = dataCompareCount; - } - - public ReportBehaviorInterface getReportBehaviorInterface() { - return reportBehaviorInterface; - } - - public void setReportBehaviorInterface(ReportBehaviorInterface reportBehaviorInterface) { - this.reportBehaviorInterface = reportBehaviorInterface; - } - - public int getMaxFavourNumber() { - return maxFavourNumber; - } - - public void setMaxFavourNumber(int maxFavourNumber) { - this.maxFavourNumber = maxFavourNumber; - } - - public String getMetaSearchUrl() { - return metaSearchUrl; - } - - public void setMetaSearchUrl(String metaSearchUrl) { - this.metaSearchUrl = metaSearchUrl; - } - - public String getRestBaseUrl() { - return restBaseUrl; - } - - public void setRestBaseUrl(String restBaseUrl) { - this.restBaseUrl = restBaseUrl; - } - - public ManualImage getManualImage() { - return manualImage; - } - - public void setManualImage(ManualImage manualImage) { - this.manualImage = manualImage; - } - - public ES getEs() { - return es; - } - - public void setEs(ES es) { - this.es = es; - } - - public Download getDownload() { - return download; - } - - public void setDownload(Download download) { - this.download = download; - } - - public Data getData() { - return data; - } - - public void setData(Data data) { - this.data = data; - } - - public Email getEmail() { - return email; - } - - public void setEmail(Email email) { - this.email = email; - } - - public Thrift getThrift() { - return thrift; - } - - public void setThrift(Thrift thrift) { - this.thrift = thrift; - } - - public String getWebBaseUrl() { - return webBaseUrl; - } - - public void setWebBaseUrl(String webBaseUrl) { - this.webBaseUrl = webBaseUrl; - } - - public Screen getScreen() { - return screen; - } - - public void setScreen(Screen screen) { - this.screen = screen; - } - - public ArrayList getAccessWhiteList() { - return accessWhiteList; - } - - public void setAccessWhiteList(ArrayList accessWhiteList) { - this.accessWhiteList = accessWhiteList; - } - - public ArrayList getAccessRequestUrl() { - return accessRequestUrl; - } - - public void setAccessRequestUrl(ArrayList accessRequestUrl) { - this.accessRequestUrl = accessRequestUrl; - } - - public int getCacheTimeout() { - return cacheTimeout; - } - - public long getDefaultCacheLimitDay() { - return defaultCacheLimitDay; - } - - public void setDefaultCacheLimitDay(int defaultCacheLimitDay) { - this.defaultCacheLimitDay = defaultCacheLimitDay; - } - - public void setCacheTimeout(int cacheTimeout) { - this.cacheTimeout = cacheTimeout; - } - - public Integer getFailedTaskRetryNum() { - return failedTaskRetryNum; - } - - public void setFailedTaskRetryNum(Integer failedTaskRetryNum) { - this.failedTaskRetryNum = failedTaskRetryNum; - } - - public Integer getFilterType() { - return filterType; - } - - public void setFilterType(Integer filterType) { - this.filterType = filterType; - } - - public Integer getQueryType() { - return queryType; - } - - public void setQueryType(Integer queryType) { - this.queryType = queryType; - } - - public Boolean getBigram() { - return bigram; - } - - public void setBigram(Boolean bigram) { - this.bigram = bigram; - } - - public int getWechatNumber() { - return wechatNumber; - } - - public void setWechatNumber(int wechatNumber) { - this.wechatNumber = wechatNumber; - } - - public String getForeignMetaSearchUrl() { - return foreignMetaSearchUrl; - } - - public void setForeignMetaSearchUrl(String foreignMetaSearchUrl) { - this.foreignMetaSearchUrl = foreignMetaSearchUrl; - } - - public Integer getCacheAtDataMonitor() { - return cacheAtDataMonitor; - } - - public void setCacheAtDataMonitor(int cacheAtDataMonitor) { - this.cacheAtDataMonitor = cacheAtDataMonitor; - } - - public boolean isCodisCacheFlag() { - return codisCacheFlag; - } - - public void setCodisCacheFlag(boolean codisCacheFlag) { - this.codisCacheFlag = codisCacheFlag; - } - - public ThreadConfig getThreadConfig() { - return threadConfig; - } - - public void setThreadConfig(ThreadConfig threadConfig) { - this.threadConfig = threadConfig; - } - - public String getWeChatDomainName() { - return weChatDomainName; - } - - public void setWeChatDomainName(String weChatDomainName) { - this.weChatDomainName = weChatDomainName; - } - - public static class Download { - private String codisTaskQueueKey; - private String filePath; - private int sleepSeconds; - private int zombieSeconds; - private Schedule schedule; - private String active = "N"; - private String fileSecondPath; - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("active", active) - .append("codisTaskQueueKey", codisTaskQueueKey) - .append("filePath", filePath) - .append("sleepSeconds", sleepSeconds) - .append("schedule", schedule) - .append("fileSecondPath", fileSecondPath) - .toString(); - } - - public int getSleepSeconds() { - return sleepSeconds; - } - - public void setSleepSeconds(int sleepSeconds) { - this.sleepSeconds = sleepSeconds; - } - - public String getCodisTaskQueueKey() { - return codisTaskQueueKey; - } - - public void setCodisTaskQueueKey(String codisTaskQueueKey) { - this.codisTaskQueueKey = codisTaskQueueKey; - } - - public String getFilePath() { - return filePath; - } - - public void setFilePath(String filePath) { - this.filePath = filePath; - } - - public Schedule getSchedule() { - return schedule; - } - - public void setSchedule(Schedule schedule) { - this.schedule = schedule; - } - - public String getActive() { - return active; - } - - public void setActive(String active) { - this.active = active; - } - - public int getZombieSeconds() { - return zombieSeconds; - } - - public void setZombieSeconds(int zombieSeconds) { - this.zombieSeconds = zombieSeconds; - } - - public String getFileSecondPath() { - return fileSecondPath; - } - - public void setFileSecondPath(String fileSecondPath) { - this.fileSecondPath = fileSecondPath; - } - } - - public static class ES { - private String url, cluster, secondUrl, itemIndex, indexPrefix, primaryIndex; - private Integer port; - private Long dailyIndexCount; - private Integer availableMiniClusterDays = 3; - private Integer scrollSize ; - private Integer sliceNumber; - private Integer filterNumber; - private Boolean indexDynamic; - private String httpPort; - private String httpNormalPort; - private Integer secondPort; - - public String getPrimaryIndex() { - return primaryIndex; - } - - public void setPrimaryIndex(String primaryIndex) { - this.primaryIndex = primaryIndex; - } - - public Integer getScrollSize() { - return scrollSize; - } - - public void setScrollSize(Integer scrollSize) { - this.scrollSize = scrollSize; - } - - public Boolean getIndexDynamic() { - return indexDynamic; - } - - public void setIndexDynamic(Boolean indexDynamic) { - this.indexDynamic = indexDynamic; - } - - public Integer getSliceNumber() { - return sliceNumber; - } - - public void setSliceNumber(Integer sliceNumber) { - this.sliceNumber = sliceNumber; - } - - public Long getDailyIndexCount() { - return dailyIndexCount; - } - - public void setDailyIndexCount(Long dailyIndexCount) { - this.dailyIndexCount = dailyIndexCount; - } - - public String getHttpPort() { - return httpPort; - } - - public Integer getFilterNumber() { - return filterNumber; - } - - public void setFilterNumber(Integer filterNumber) { - this.filterNumber = filterNumber; - } - - public void setHttpPort(String httpPort) { - this.httpPort = httpPort; - } - - public String getHttpNormalPort() { - return httpNormalPort; - } - - public void setHttpNormalPort(String httpNormalPort) { - this.httpNormalPort = httpNormalPort; - } - - public Integer getSecondPort() { - return secondPort; - } - - public void setSecondPort(Integer secondPort) { - this.secondPort = secondPort; - } - - public String getUrl() { - return url; - } - - public void setUrl(String url) { - this.url = url; - } - - public String getCluster() { - return cluster; - } - - public void setCluster(String cluster) { - this.cluster = cluster; - } - - public Integer getPort() { - return port; - } - - public void setPort(Integer port) { - this.port = port; - } - - public String getSecondUrl() { - return secondUrl; - } - - public void setSecondUrl(String secondUrl) { - this.secondUrl = secondUrl; - } - - public String getItemIndex() { - return itemIndex; - } - - public void setItemIndex(String itemIndex) { - this.itemIndex = itemIndex; - } - - public String getDefaultIndexPre() { - return indexPrefix + "_*"; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("url", url) - .append("port", port) - .append("secondUrl", secondUrl) - .append("secondPort", secondPort) - .append("cluster", cluster) - .append("itemIndex", itemIndex) - .append("indexPrefix", indexPrefix) - .append("dailyIndexCount", dailyIndexCount) - .toString(); - } - - public String getIndexPrefix() { - return indexPrefix; - } - - public void setIndexPrefix(String indexPrefix) { - this.indexPrefix = indexPrefix; - } - - public Integer getAvailableMiniClusterDays() { - return availableMiniClusterDays; - } - - public void setAvailableMiniClusterDays(Integer availableMiniClusterDays) { - this.availableMiniClusterDays = availableMiniClusterDays; - } - } - - public static class Schedule { - private String jobPrefix, jobGroup, triggerPrefix, triggerGroup; - private String cronTime; - private int threadNumber = 10, delaySeconds = 30; - - public int getThreadNumber() { - return threadNumber; - } - - public void setThreadNumber(int threadNumber) { - this.threadNumber = threadNumber; - } - - public String getJobPrefix() { - return jobPrefix; - } - - public void setJobPrefix(String jobPrefix) { - this.jobPrefix = jobPrefix; - } - - public String getJobGroup() { - return jobGroup; - } - - public void setJobGroup(String jobGroup) { - this.jobGroup = jobGroup; - } - - public String getTriggerPrefix() { - return triggerPrefix; - } - - public void setTriggerPrefix(String triggerPrefix) { - this.triggerPrefix = triggerPrefix; - } - - public String getTriggerGroup() { - return triggerGroup; - } - - public void setTriggerGroup(String triggerGroup) { - this.triggerGroup = triggerGroup; - } - - public String getCronTime() { - return cronTime; - } - - public void setCronTime(String cronTime) { - this.cronTime = cronTime; - } - - public int getDelaySeconds() { - return delaySeconds; - } - - public void setDelaySeconds(int delaySeconds) { - this.delaySeconds = delaySeconds; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("jobPrefix", jobPrefix) - .append("jobGroup", jobGroup) - .append("triggerPrefix", triggerPrefix) - .append("triggerGroup", triggerGroup) - .append("cronTime", cronTime) - .append("threadNumber", threadNumber) - .append("delaySeconds", delaySeconds) - .toString(); - } - } - - public static class Data { - HashMap typeKv; - String filePath; - - public HashMap getTypeKv() { - return typeKv; - } - - public void setTypeKv(HashMap typeKv) { - this.typeKv = typeKv; - } - - public String getFilePath() { - return filePath; - } - - public void setFilePath(String filePath) { - this.filePath = filePath; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("typeKv", typeKv) - .append("filePath", filePath) - .toString(); - } - } - - public Image getImage() { - return image; - } - - public void setImage(Image image) { - this.image = image; - } - - public static class Image { - private String icon, dot, logo, footer, arr, articleSign, baseUrl, chartServerUrl, defaultChartUrl, - pieChartUrl, lineChartUrl, baseNginxPath; - - public String getIcon() { - return icon; - } - - public void setIcon(String icon) { - this.icon = icon; - } - - public String getDot() { - return dot; - } - - public void setDot(String dot) { - this.dot = dot; - } - - public String getLogo() { - return logo; - } - - public void setLogo(String logo) { - this.logo = logo; - } - - public String getFooter() { - return footer; - } - - public void setFooter(String footer) { - this.footer = footer; - } - - public String getArr() { - return arr; - } - - public void setArr(String arr) { - this.arr = arr; - } - - public String getBaseUrl() { - return baseUrl; - } - - public void setBaseUrl(String baseUrl) { - this.baseUrl = baseUrl; - } - - public String getChartServerUrl() { - return chartServerUrl; - } - - public void setChartServerUrl(String chartServerUrl) { - this.chartServerUrl = chartServerUrl; - } - - public String getDefaultChartUrl() { - return defaultChartUrl; - } - - public void setDefaultChartUrl(String defaultChartUrl) { - this.defaultChartUrl = defaultChartUrl; - } - - public String getPieChartUrl() { - return pieChartUrl; - } - - public void setPieChartUrl(String pieChartUrl) { - this.pieChartUrl = pieChartUrl; - } - - public String getLineChartUrl() { - return lineChartUrl; - } - - public void setLineChartUrl(String lineChartUrl) { - this.lineChartUrl = lineChartUrl; - } - - public String getArticleSign() { - return articleSign; - } - - public void setArticleSign(String articleSign) { - this.articleSign = articleSign; - } - - public String getBaseNginxPath() { - return baseNginxPath; - } - - public void setBaseNginxPath(String baseNginxPath) { - this.baseNginxPath = baseNginxPath; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("icon", icon) - .append("dot", dot) - .append("logo", logo) - .append("footer", footer) - .append("arr", arr) - .append("articleSign", articleSign) - .append("baseUrl", baseUrl) - .append("chartServerUrl", chartServerUrl) - .append("defaultChartUrl", defaultChartUrl) - .append("pieChartUrl", pieChartUrl) - .append("lineChartUrl", lineChartUrl) - .append("baseNginxPath", baseNginxPath) - .toString(); - } - } - - public static class Email { - private String sender; - private String subject; - - public String getSender() { - return sender; - } - - public void setSender(String sender) { - this.sender = sender; - } - - public String getSubject() { - return subject; - } - - public void setSubject(String subject) { - this.subject = subject; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("sender", sender) - .append("subject", subject) - .toString(); - } - } - - public static class Thrift { - private String ip; - private Integer port; - - public String getIp() { - return ip; - } - - public void setIp(String ip) { - this.ip = ip; - } - - public Integer getPort() { - return port; - } - - public void setPort(Integer port) { - this.port = port; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("ip", ip) - .append("port", port) - .toString(); - } - } - - public static class ReportBehaviorInterface { - private String reportInterface; - private String manualReportInterface; - - public String getReportInterface() { - return reportInterface; - } - - public void setReportInterface(String reportInterface) { - this.reportInterface = reportInterface; - } - - public String getManualReportInterface() { - return manualReportInterface; - } - - public void setManualReportInterface(String manualReportInterface) { - this.manualReportInterface = manualReportInterface; - } - } - - public static class ManualImage { - private String path; - private String userUploadPath; - private String reportHeadPath; - private String chartsPath; - - public String getPath() { - return path; - } - - public void setPath(String path) { - this.path = path; - } - - public String getUserUploadPath() { - return userUploadPath; - } - - public void setUserUploadPath(String userUploadPath) { - this.userUploadPath = userUploadPath; - } - - public String getReportHeadPath() { - return reportHeadPath; - } - - public void setReportHeadPath(String reportHeadPath) { - this.reportHeadPath = reportHeadPath; - } - - public String getChartsPath() { - return chartsPath; - } - - public void setChartsPath(String chartsPath) { - this.chartsPath = chartsPath; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("path", path) - .append("userUploadPath", userUploadPath) - .append("reportHeadPath", reportHeadPath) - .append("chartsPath", chartsPath) - .toString(); - } - } - - public ReportImage getReportImage() { - return reportImage; - } - - public void setReportImage(ReportImage reportImage) { - this.reportImage = reportImage; - } - - public static class ReportImage { - private String chartsPath; - - public String getChartsPath() { - return chartsPath; - } - - public void setChartsPath(String chartsPath) { - this.chartsPath = chartsPath; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("chartsPath", chartsPath) - .toString(); - } - } - - public static class Screen { - private Integer limit; - - public Integer getLimit() { - return limit; - } - - public void setLimit(Integer limit) { - this.limit = limit; - } - - @Override - public String toString() { - return new ToStringBuilder(this) - .append("limit", limit) - .toString(); - } - } - - public static class ThreadConfig { - private int threadLimit = 1000; - - public long getAwaitTermination() { - return awaitTermination; - } - - public void setAwaitTermination(long awaitTermination) { - this.awaitTermination = awaitTermination; - } - - private long awaitTermination = 20L; - - public int getThreadLimit() { - return threadLimit; - } - - public void setThreadLimit(int threadLimit) { - this.threadLimit = threadLimit; - } - } - - @Override - public String toString() { - return "BFDApiConfig{" + - "threadConfig=" + threadConfig + - ", download=" + download + - ", es=" + es + - ", data=" + data + - ", image=" + image + - ", email=" + email + - ", thrift=" + thrift + - ", manualImage=" + manualImage + - ", reportImage=" + reportImage + - ", restBaseUrl='" + restBaseUrl + '\'' + - ", webBaseUrl='" + webBaseUrl + '\'' + - ", metaSearchUrl='" + metaSearchUrl + '\'' + - ", foreignMetaSearchUrl='" + foreignMetaSearchUrl + '\'' + - ", restDataApi='" + restDataApi + '\'' + - ", similarityMonitorWarnNumber='" + similarityMonitorWarnNumber + '\'' + - ", similarityMonitorSplitNumber='" + similarityMonitorSplitNumber + '\'' + - ", mergeSegmentsIndexNumber='" + mergeSegmentsIndexNumber + '\'' + - ", maxEsNumber='" + maxEsNumber + '\'' + - ", weiboChannelTimeUrl='" + weiboChannelTimeUrl + '\'' + - ", screen=" + screen + - ", defaultCacheLimitDay=" + defaultCacheLimitDay + - ", failedTaskRetryNum=" + failedTaskRetryNum + - ", dataCompareCount=" + dataCompareCount + - ", weChatDomainName='" + weChatDomainName + '\'' + - ", textPostUrl='" + textPostUrl + '\'' + - ", emotionPostUrl='" + emotionPostUrl + '\'' + - ", codisCacheFlag=" + codisCacheFlag + - ", reportBehaviorInterface=" + reportBehaviorInterface + - ", accessWhiteList=" + accessWhiteList + - ", accessRequestUrl=" + accessRequestUrl + - ", cacheTimeout=" + cacheTimeout + - ", maxFavourNumber=" + maxFavourNumber + - ", wechatNumber=" + wechatNumber + - ", filterType=" + filterType + - ", queryType=" + queryType + - ", bigram=" + bigram + - ", cacheAtDataMonitor=" + cacheAtDataMonitor + - '}'; - } - -} \ No newline at end of file diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java index 40bf76e..ea623fc 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/config/ESConstants.java @@ -6,6 +6,19 @@ import java.util.List; import java.util.Map; public class ESConstants { + //0:社交媒体1:新闻资讯2:博客智库3:论坛贴吧4:网络视频5:电商网站6:搜索引擎7:生活方式 + public static final String DOCTYPESOCIAL = "0"; + public static final String DOCTYPENEWS = "1"; + public static final String DOCTYPEBLOG = "2"; + public static final String DOCTYPEBBS = "3"; + public static final String DOCTYPEVIDEO = "4"; + public static final String DOCTYPEITEM = "5"; + public static final String DOCTYPESEARDH = "6"; + public static final String DOCTYPELIFE = "7"; + + public static final String LOGSTASH = "logstash-"; + + /** * 是否是主贴 */ @@ -465,9 +478,6 @@ public class ESConstants { */ public static final String DOC_TYPE_WEIXIN = "weixin"; - - - public static final String MEDIA_AREA_KEY = "dict"; public static final float BOOST_WEIGHT = 10f; @@ -495,7 +505,6 @@ public class ESConstants { public static final String SUMMARY = "summary"; - // public static final String FILEPATH = "filePath"; public static final String VIDEOURL = "videoUrl"; @@ -650,8 +659,8 @@ public class ESConstants { /* * docType(news\bbs...):int 类型常量 */ - public static final int DOCTYPENEWS = 0; - public static final int DOCTYPEBBS = 1; +// public static final int DOCTYPENEWS = 0; +// public static final int DOCTYPEBBS = 1; /* * 组合查询语句key值类型 */ @@ -771,19 +780,33 @@ public class ESConstants { *define es mapping fields */ public static String PUBTIME = "pubTime"; - public static String CHANNEL_HYLANDA = "hylanda"; + public static String PUBTIMESTR = "pubTimeStr"; + public static String PUBDAY = "pubDay"; + public static String PUBDATE = "pubDate"; - public static String PUBTIME_STR = "pubTimeStr"; + public static String CRAWLTIME = "crawlTime"; + public static String CRAWLTIMESTR = "crawlTimeStr"; + public static String CRAWLDAY = "crawlDay"; + public static String CRAWLDATE = "crawlDate"; public static String CREATETIME = "createTime"; public static String CREATETIMESTR = "createTimeStr"; - public static String CREATETIMEDAY = "createDay"; + public static String CREATEDAY = "createDay"; + public static String CREATEDATE = "createDate"; + + public static String OCRTEXT = "ocrText"; + public static String ASRTEXT = "asrText"; + + + - public static String PUTTIME = "putTime"; - public static String CRAWLTIME = "crawlTime"; public static String ORIGINAL_SOURCE = "originalSource"; public static String CONTENT_SIMHASH = "contentSimHash"; public static String QUOTE_COUNT = "quoteCount"; + + + public static String PUTTIME = "putTime"; + public static String CHANNEL_HYLANDA = "hylanda"; /** * 内容 */ @@ -802,7 +825,13 @@ public class ESConstants { public static final String TITLE = "title"; public static final String USER_LEVEL_NAME = "userLevelName"; public static String SOURCE = "source"; - public static String CRAWLDATAFLAG= "crawlDataFlag"; + public static String TRANSLATETITLE = "translateTitle"; + public static String TRANSLATECONTENT = "translateContent"; + public static String AUTHORID = "authorId"; + public static String PRICE = "price"; + public static String PRODUCTPARAMETER = "productParameter"; + public static String CRAWLDATAFLAG = "crawlDataFlag"; + public static String CRAWLDATAFLAGTYPE = "crawlDataFlagType"; //微信专属字段,内容固定 value="微信" public static String WEIXIN_SOURCE = "weixinSource"; @@ -821,7 +850,7 @@ public class ESConstants { public static String KEYWORD = "keyword"; public static String ACCOUNT = "account"; - // public static String URL = "url"; + // public static String URL = "url"; public static String SYS_ABSTRACT = "sysAbstract"; public static String SYS_KEYWORDS = "sysKeywords"; @@ -905,15 +934,13 @@ public class ESConstants { public static final String APP = "app"; public static String WEIBO_DEFAULT_AVATOR = "http://tva1.sinaimg.cn/default/images/default_avatar_male_180.gif"; - public static final String BAIDUKOUBEI = "baidukoubei"; - public static final String DIANPING = "dianping"; - public static final String FANGTIANXIA = "fangtianxia"; - public static final String BAIDUTIEBA = "baidutieba"; - public static final String SOUHU = "souhu"; - public static final String XIECHENG = "xiecheng"; - public static final String AUTOHOME = "autohome"; public static final String TMALL = "tmall"; - public static final String YILONG = "yilong"; + public static final String TAOBAO = "taobao"; + public static final String EJINGDONG = "ejingdong"; + public static final String SUNING = "suning"; + public static final String WEIPINHUI = "weipinhui"; + public static final String EJUMEI = "ejumei"; + public static final String SEPHORA = "sephora"; // 电商 public static final String ITEM = "item"; public static final String ITEM_NAME = "itemName"; @@ -991,21 +1018,18 @@ public class ESConstants { public static final String FORWARD_USER_TYPE = "forwardUserType"; public static final String FORWARD_PUBTIME = "forwardPubTime"; - public static final String FORWARD_FLAG = "forwardFlag"; // 转发、回帖标志 1(转发、回帖) 0(原贴) public static final int REGULAR_PUBTIME_QUERY = 0; public static final int REGULAR_PUBTIME_AND_CREATETIME_QUERY = 1; - public static final List TYPE_LIST = new ArrayList<>(); public static final List BACKEND_ANALYSIS_LIST = new ArrayList<>(); public static final List BACKEND_AVAILABILITY_LIST = new ArrayList<>(); - public static final List APP_TYPE_LIST = new ArrayList(); @@ -1035,15 +1059,22 @@ public class ESConstants { public static final String IMAGEPATHSIZE = "imagePathSize"; public static final String FILEPATHSIZE = "filePathSize"; public static final String VIDEOPATHSIZE = "videoPathSize"; + public static final String SRCIMAGEPATH = "srcimagePath"; + public static final String SRCVIDEOPATH = "srcvideoPath"; + public static final String SRCFILEPATH = "srcfilePath"; public static final String PGC = "pgc"; public static final String UGC = "ugc"; public static final String EGC = "egc"; - public static final String URL = "url"; public static final String SIZE = "size"; public static final String RESOLUTION= "resolution"; public static final String VIDEOTIME = "videoTime"; + public static final String GOFASTURL = "gofastUrl"; + public static final String ORIGINALURL = "originalUrl"; + public static final String PATHSIZELIST = "pathSizeList"; + public static final String PATH = "path"; + } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/EmailGroup.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/EmailGroup.java new file mode 100644 index 0000000..80486b3 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/EmailGroup.java @@ -0,0 +1,27 @@ +package com.bfd.mf.job.domain.entity; + +import javax.persistence.Entity; +import javax.persistence.Table; + +@Entity +@Table(name = "cl_email_group") +public class EmailGroup extends AbstractEntity{ + private String email; + private String groupName; + + public String getEmail() { + return email; + } + + public void setEmail(String email) { + this.email = email; + } + + public String getGroupName() { + return groupName; + } + + public void setGroupName(String groupName) { + this.groupName = groupName; + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/ServiceLoad.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/ServiceLoad.java new file mode 100644 index 0000000..32a22a4 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/ServiceLoad.java @@ -0,0 +1,101 @@ +package com.bfd.mf.job.domain.entity; + +import javax.persistence.Entity; +import javax.persistence.Table; +import java.util.Date; + +@Entity +@Table(name = "cl_service_load") +public class ServiceLoad extends AbstractEntity { + + private String channelId; + private String clientId; + private Integer status; + private String serviceUrl; + private String serviceNames; + private String token; + private String serviceCluster; + private Float serviceStatus; + private Date updateTime; + private String updateUser; + + public String getChannelId() { + return channelId; + } + + public void setChannelId(String channelId) { + this.channelId = channelId; + } + + public String getClientId() { + return clientId; + } + + public void setClientId(String clientId) { + this.clientId = clientId; + } + + public Integer getStatus() { + return status; + } + + public void setStatus(Integer status) { + this.status = status; + } + + public String getServiceUrl() { + return serviceUrl; + } + + public void setServiceUrl(String serviceUrl) { + this.serviceUrl = serviceUrl; + } + + public String getServiceNames() { + return serviceNames; + } + + public void setServiceNames(String serviceNames) { + this.serviceNames = serviceNames; + } + + public String getToken() { + return token; + } + + public void setToken(String token) { + this.token = token; + } + + public String getServiceCluster() { + return serviceCluster; + } + + public void setServiceCluster(String serviceCluster) { + this.serviceCluster = serviceCluster; + } + + public Float getServiceStatus() { + return serviceStatus; + } + + public void setServiceStatus(Float serviceStatus) { + this.serviceStatus = serviceStatus; + } + + public Date getUpdateTime() { + return updateTime; + } + + public void setUpdateTime(Date updateTime) { + this.updateTime = updateTime; + } + + public String getUpdateUser() { + return updateUser; + } + + public void setUpdateUser(String updateUser) { + this.updateUser = updateUser; + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Subject.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Subject.java index a2e0782..e43695f 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Subject.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Subject.java @@ -11,7 +11,7 @@ public class Subject extends AbstractEntity { private Integer top; //0:未置顶1:置顶 private String name; // 专题名称 private String description; // 话题描述 - private Integer status; //专题状态 0使用中 1已暂停 + // private Integer status; //专题状态 0使用中 1已暂停 private Integer subjectType; //话题类型:1:分类,2:话题,3:帖子 private Integer keywordsType; //关键词类型:1:简单型,2:关联型,3:专家型 private String simpleContent; // 简单型原始内容,逗号分割 @@ -68,14 +68,14 @@ public class Subject extends AbstractEntity { public void setDescription(String description) { this.description = description; } - - public Integer getStatus() { - return status; - } - - public void setStatus(Integer status) { - this.status = status; - } +// +// public Integer getStatus() { +// return status; +// } +// +// public void setStatus(Integer status) { +// this.status = status; +// } public Integer getSubjectType() { return subjectType; @@ -339,7 +339,7 @@ public class Subject extends AbstractEntity { "top=" + top + ", name='" + name + '\'' + ", description='" + description + '\'' + - ", status=" + status + + // ", status=" + status + ", subjectType=" + subjectType + ", keywordsType=" + keywordsType + ", simpleContent='" + simpleContent + '\'' + diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/SubjectCount.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/SubjectCount.java index 05cd1ef..4feceb9 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/SubjectCount.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/SubjectCount.java @@ -10,6 +10,7 @@ import java.util.Date; public class SubjectCount extends AbstractEntity { private BigInteger subjectId; private Date createTime; + private Date updateTime; private BigInteger totalCount; private BigInteger todayTotalCount; private BigInteger socialTotalCount; @@ -51,6 +52,14 @@ public class SubjectCount extends AbstractEntity { this.createTime = createTime; } + public Date getUpdateTime() { + return updateTime; + } + + public void setUpdateTime(Date updateTime) { + this.updateTime = updateTime; + } + public BigInteger getTotalCount() { return totalCount; } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Task.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Task.java index 5f4eab9..fd120ba 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Task.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/Task.java @@ -2,53 +2,49 @@ package com.bfd.mf.job.domain.entity; import javax.persistence.*; import java.math.BigInteger; +import java.sql.Timestamp; import java.util.Date; @Entity @Table(name = "cl_task") public class Task extends AbstractEntity { - // @Id -// @GeneratedValue(strategy = GenerationType.IDENTITY) -// private BigInteger id; - // private Integer top; +//id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark + + // private long top; private BigInteger subjectId; + // private String appId; private String externalId; - // private Integer crawlId; + // private long crawlId; private Integer siteType; - private Integer taskType; - //private Integer siteId; + private Integer taskType; // u + // private long siteId; private String cid; - // private String attachTag; - // private Integer crawlStatus; - // private String crawlKeyword; - // private String crawlPageTypes; - // private String crawlContentKey; - // private Integer crawlMode; - // private Integer crawlCyclicityTime; - //private Integer crawlPeriodHour; - // private BigInteger maxPageNum; - // private BigInteger secondaryDataMaxPage; + // private String attachTag; + private Integer crawlStatus; // u + // private String crawlKeyword; +// private String crawlPageTypes; + private String crawlContentKey; + // private long crawlMode; +// private int crawlCyclicityTime; //crawl_cyclicity_time +// private long crawlPeriodHour; +// private long maxPageNum; +// private long secondaryDataMaxPage; private BigInteger crawlStartTime; private BigInteger crawlEndTime; private String crawlDataFlag; private BigInteger dataTotal; private BigInteger todayDataTotal; private Integer cacheNum; - // private Date createTime; - //private String createUser; - // private String createUserId; - private Date updateTime; - // private String updateUser; - // private String updateUserId; - // private Integer del; - -// public BigInteger getId() { -// return id; -// } -// -// public void setId(BigInteger id) { -// this.id = id; -// } + // private java.sql.Timestamp createTime; +// private String createUser; +// private String createUserId; + private java.sql.Timestamp updateTime; + // private String updateUser; +// private String updateUserId; + private Integer del; +// private String fileName; +// private String fileRemark; + public BigInteger getSubjectId() { return subjectId; @@ -66,14 +62,6 @@ public class Task extends AbstractEntity { this.externalId = externalId; } - public Integer getTaskType() { - return taskType; - } - - public void setTaskType(Integer taskType) { - this.taskType = taskType; - } - public Integer getSiteType() { return siteType; } @@ -81,13 +69,14 @@ public class Task extends AbstractEntity { public void setSiteType(Integer siteType) { this.siteType = siteType; } -// public Integer getSiteId() { -// return siteId; -// } -// -// public void setSiteId(Integer siteId) { -// this.siteId = siteId; -// } + + public Integer getTaskType() { + return taskType; + } + + public void setTaskType(Integer taskType) { + this.taskType = taskType; + } public String getCid() { return cid; @@ -97,6 +86,22 @@ public class Task extends AbstractEntity { this.cid = cid; } + public Integer getCrawlStatus() { + return crawlStatus; + } + + public String getCrawlContentKey() { + return crawlContentKey; + } + + public void setCrawlContentKey(String crawlContentKey) { + this.crawlContentKey = crawlContentKey; + } + + public void setCrawlStatus(Integer crawlStatus) { + this.crawlStatus = crawlStatus; + } + public BigInteger getCrawlStartTime() { return crawlStartTime; } @@ -145,11 +150,19 @@ public class Task extends AbstractEntity { this.cacheNum = cacheNum; } - public Date getUpdateTime() { + public Timestamp getUpdateTime() { return updateTime; } - public void setUpdateTime(Date updateTime) { + public void setUpdateTime(Timestamp updateTime) { this.updateTime = updateTime; } + + public Integer getDel() { + return del; + } + + public void setDel(Integer del) { + this.del = del; + } } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/TaskCount.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/TaskCount.java new file mode 100644 index 0000000..f48faff --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/TaskCount.java @@ -0,0 +1,36 @@ +package com.bfd.mf.job.domain.entity; + +import javax.persistence.Entity; +import javax.persistence.Table; + +@Entity +@Table(name = "cl_task_count") +public class TaskCount extends AbstractEntity { + private String countDate; + private float avgCount; + private float avgSpeed; + + public String getCountDate() { + return countDate; + } + + public void setCountDate(String countDate) { + this.countDate = countDate; + } + + public float getAvgCount() { + return avgCount; + } + + public void setAvgCount(float avgCount) { + this.avgCount = avgCount; + } + + public float getAvgSpeed() { + return avgSpeed; + } + + public void setAvgSpeed(float avgSpeed) { + this.avgSpeed = avgSpeed; + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/UploadTask.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/UploadTask.java new file mode 100644 index 0000000..9e44397 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/entity/UploadTask.java @@ -0,0 +1,90 @@ +package com.bfd.mf.job.domain.entity; + +import javax.persistence.*; +import java.math.BigInteger; + +@Entity +@Table(name = "cl_task") +public class UploadTask extends AbstractEntity { + + // private long top; + private BigInteger subjectId; + private Integer taskType; // u + private Integer crawlStatus; // u + private String fileName; + private String crawlDataFlag; + + public String getCrawlDataFlag() { + return crawlDataFlag; + } + + public void setCrawlDataFlag(String crawlDataFlag) { + this.crawlDataFlag = crawlDataFlag; + } + + public BigInteger getSubjectId() { + return subjectId; + } + + public void setSubjectId(BigInteger subjectId) { + this.subjectId = subjectId; + } + + public int getTaskType() { + return taskType; + } + + public void setTaskType(int taskType) { + this.taskType = taskType; + } + + public int getCrawlStatus() { + return crawlStatus; + } + + public void setCrawlStatus(int crawlStatus) { + this.crawlStatus = crawlStatus; + } + + public String getFileName() { + return fileName; + } + + public void setFileName(String fileName) { + this.fileName = fileName; + } + + // private int del; +// private String fileRemark; + // private String appId; +// private String externalId; + // private long crawlId; +// private int siteType; + + // private long siteId; + // private String cid; + // private String attachTag; + + // private String crawlKeyword; +// private String crawlPageTypes; +// private String crawlContentKey; +// private long crawlMode; +// private int crawlCyclicityTime; //crawl_cyclicity_time +// private long crawlPeriodHour; +// private long maxPageNum; +// private long secondaryDataMaxPage; +// private BigInteger crawlStartTime; +// private BigInteger crawlEndTime; +// private String crawlDataFlag; +// private BigInteger dataTotal; +// private BigInteger todayDataTotal; +// private Integer cacheNum; + // private java.sql.Timestamp createTime; +// private String createUser; +// private String createUserId; +// private Timestamp updateTime; + // private String updateUser; +// private String updateUserId; + + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/EmailGroupRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/EmailGroupRepository.java new file mode 100644 index 0000000..1db188d --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/EmailGroupRepository.java @@ -0,0 +1,14 @@ +package com.bfd.mf.job.domain.repository; + +import com.bfd.mf.job.domain.entity.EmailGroup; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.CrudRepository; + +import java.util.List; + +public interface EmailGroupRepository extends CrudRepository { + + @Query(value = "SELECT * FROM cl_email_group WHERE id IN (SELECT email_group_id from cl_site_email_res WHERE site_id = (SELECT id FROM `cl_site` WHERE is_usable = 0 AND cid = ?1));", nativeQuery = true) + List getEmailGroupsByCid(String cid); + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ResultDetailRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ResultDetailRepository.java index bf7ea8d..717d959 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ResultDetailRepository.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ResultDetailRepository.java @@ -1,7 +1,6 @@ package com.bfd.mf.job.domain.repository; import com.bfd.mf.job.domain.entity.ResultDetail; -import com.bfd.mf.job.domain.entity.Task; import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.CrudRepository; diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ServiceLoadRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ServiceLoadRepository.java new file mode 100644 index 0000000..4fb22ab --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/ServiceLoadRepository.java @@ -0,0 +1,17 @@ +package com.bfd.mf.job.domain.repository; + +import com.bfd.mf.job.domain.entity.ServiceLoad; +import com.bfd.mf.job.domain.entity.TaskCount; +import org.springframework.data.jpa.repository.Modifying; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.CrudRepository; +import org.springframework.transaction.annotation.Transactional; + +public interface ServiceLoadRepository extends CrudRepository { + + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_service_load set service_status =?2 ,update_time = now() where id =?1 ", nativeQuery = true) + void updateTaskCount(Integer id, Float serviceStatus); + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java index 8678bba..9194515 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/SubjectCountRepository.java @@ -7,7 +7,7 @@ import org.springframework.data.repository.CrudRepository; import org.springframework.transaction.annotation.Transactional; import java.math.BigInteger; -import java.util.List; +import java.util.Date; public interface SubjectCountRepository extends CrudRepository { @@ -20,8 +20,8 @@ public interface SubjectCountRepository extends CrudRepository findAllSubject(); +// @Query(value = "SELECT ct.id,ct.subject_id,ct.cache_num,ct.cid,ct.external_id,ct.crawl_data_flag,ct.crawl_start_time,ct.crawl_end_time,cs.kafka_switch,cs.kafka_addr,cs.kafka_topic,cs.go_fast_addr,cs.`status`,ct.del as delt,cs.del as dels FROM cl_task ct JOIN cl_subject cs ON (ct.subject_id = cs.id) WHERE cs.status = 0 AND ct.del = 0 AND cs.del = 0",nativeQuery = true) +// List findAllSubject(); /** @@ -41,7 +41,7 @@ public interface SubjectCountRepository extends CrudRepository { @Query(value = "update cl_subject set cache_recalculate_status=?2 where id=?1", nativeQuery = true) void updateCacheRecalculateStatus(Long subjectId,int reCacheRecalculateStatus); + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_task set crawl_status=?2,end_time=now() where subject_id=?1", nativeQuery = true) + void updateTaskStatus(Long subjectId,int status); + // // /** // * 更新进度 diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskCountRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskCountRepository.java new file mode 100644 index 0000000..ea27752 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskCountRepository.java @@ -0,0 +1,7 @@ +package com.bfd.mf.job.domain.repository; + +import com.bfd.mf.job.domain.entity.TaskCount; +import org.springframework.data.repository.CrudRepository; + +public interface TaskCountRepository extends CrudRepository { +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java index d37dfe8..6fd0539 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/TaskRepository.java @@ -8,33 +8,59 @@ import org.springframework.transaction.annotation.Transactional; import java.math.BigInteger; import java.util.List; +import java.util.Map; public interface TaskRepository extends CrudRepository { - @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE NOW() > SUBDATE(update_time,interval -15 minute) AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)", nativeQuery = true) - List findAllTask(); +// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE NOW() > SUBDATE(update_time,interval -15 minute) AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)", nativeQuery = true) +// List findAllTask(); - @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE cache_num = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE `status` = 0 AND del =0)",nativeQuery = true) + @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM cl_task WHERE task_type <> 3 AND crawl_status = 1 AND cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true) List findAllNewTask(); - @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0",nativeQuery = true) + // 统计服务查询 要统计的任务,之前由于所有任务都要半小时统计,任务太多会把E搞挂,就只统计 update_time 近一天的吧! +// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark FROM cl_task WHERE del = 0 AND crawl_status <> 3",nativeQuery = true) // AND crawl_status <> 3 + // 每天只统计两种情况的任务 + //1、当天完成的任务:crawl_status=3 and end_time > 前天 + //2、状态为采集中或者 暂停的任务 crawl_status=0 or crawl_tatus=1 + // 其他的任务就不用每天都统计了!! + @Query(value = " SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del,file_name,file_remark,crawl_content_key FROM `cl_task` WHERE del = 0 AND ((crawl_status = 1 OR crawl_status = 0) OR (crawl_status = 3 AND end_time > date_sub(curdate(),interval 2 day))) ;",nativeQuery = true) List findAllBydel0(); +// +// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE cache_num = 0 AND data_total = 0 AND del = 0 AND subject_id in (SELECT id from cl_subject WHERE del =0) ORDER BY id DESC ",nativeQuery = true) +// List findAllNewTask(); +// +// @Query(value = "SELECT id,subject_id,external_id,site_type, task_type,cid,crawl_status,crawl_start_time,crawl_end_time,crawl_data_flag,data_total,today_data_total,cache_num,update_time,del FROM cl_task WHERE del=0 ",nativeQuery = true) +// List findAllBydel0(); - @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0 AND subject_id=?1",nativeQuery = true) - List findTasksBySbujectIdAndDel0(BigInteger subjectId); +// @Query(value = "SELECT id,subject_id,external_id,site_type,task_type,cid,crawl_data_flag,cache_num,crawl_start_time,crawl_end_time,data_total,today_data_total,update_time FROM cl_task WHERE del=0 AND subject_id=?1",nativeQuery = true) +// List findTasksBySbujectIdAndDel0(BigInteger subjectId); - @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2",nativeQuery = true) + @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true) Long findDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType); - @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2",nativeQuery = true) + @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND site_type=?2 ",nativeQuery = true) Long findTodayDataTotalBySbujectIdAndSiteType(BigInteger subjectId,int siteType); - @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) + @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true) Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); - @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) + @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2 ",nativeQuery = true) Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); + // 统计任务的抓取量! 任务质量 任务状态为“已完成” 的 今天入库的总数据量 / 总任务数 + @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE end_time >?1 AND end_time 3 ",nativeQuery = true) + Long findTodayDataTotal(String taskStartTime ,String taskEndTime); + +// @Query(value = " SELECT count(*) FROM cl_task WHERE today_data_total > 0 AND task_level < 2 AND crawl_status = 3 ",nativeQuery = true) +// Long findTodayDataTotalTaskNum(); +// @Query(value = "SELECT id,subject_id,task_type,crawl_status,file_name,del from cl_task WHERE del = 0 AND task_type = 3 AND crawl_status=1 ",nativeQuery = true) +// List getTaskNeedUpLoad(); +// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) +// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); +// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) +// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); + /** @@ -61,7 +87,22 @@ public interface TaskRepository extends CrudRepository { @Query(value = "update cl_task set data_total =?2 , today_data_total =?3 where id =?1 ", nativeQuery = true) void updateTaskCount(Long id, Long totalCount, Long todayCount); + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_task set crawl_status =?4 where id =?1 ", nativeQuery = true) + void updateCrawlStatus(long taskId); + + @Query(value = " SELECT id,start_time,end_time FROM cl_task WHERE del=0 AND crawl_status = 3 AND start_time >?1 AND end_time > findByCrawlTime(String taskStartTime, String taskEndTime); + + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "UPDATE cl_task SET today_data_total=0 WHERE end_time 0 ", nativeQuery = true) + void updateTodayTotalCount(String updateTime); + // 获得前一天完成的任务的时间差(除欧莱雅的任务和上传的任务) + @Query(value = "SELECT TIMESTAMPDIFF(MINUTE, start_time,end_time) FROM cl_task WHERE del = 0 AND task_type <>3 AND crawl_status = 3 AND data_total > 0 AND end_time > ?1 AND end_time < ?2 ",nativeQuery = true) + List findTaskByCrawlTime(String taskStartTime, String taskEndTime); // // /** diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/UploadTaskRepository.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/UploadTaskRepository.java new file mode 100644 index 0000000..59630fc --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/domain/repository/UploadTaskRepository.java @@ -0,0 +1,100 @@ +package com.bfd.mf.job.domain.repository; + +import com.bfd.mf.job.domain.entity.UploadTask; +import org.springframework.data.jpa.repository.Modifying; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.CrudRepository; +import org.springframework.transaction.annotation.Transactional; + +import java.util.List; + +public interface UploadTaskRepository extends CrudRepository { + + @Query(value = "SELECT id,subject_id,task_type,crawl_status,file_name,crawl_data_flag,del " + + "from cl_task " + + "WHERE del = 0 AND task_type = 3 AND crawl_status=0 limit 1",nativeQuery = true) + List getTaskNeedUpLoad(); + + +// @Query(value = "SELECT id,subject_id,task_type,crawl_status,crawl_data_flag,del from cl_task WHERE del = 0 AND task_type = 3 AND crawl_status=1 ",nativeQuery = true) +// List getTaskNeedUpLoad(); + + + +// @Query(value = " SELECT SUM(data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) +// Long findDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); +// +// @Query(value = " SELECT SUM(today_data_total) FROM cl_task WHERE del=0 AND subject_id=?1 AND task_type=?2",nativeQuery = true) +// Long findTodayDataTotalBySbujectIdAndTaskType(BigInteger subjectId,int taskType); + + + + /** + * 更新每个任务 拉数据次数 + */ + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_task set cache_num=?1 where id=?2", nativeQuery = true) + Integer updateStatus(int cache_num, long id); + + /** + * 乐观锁 + */ + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_task set cache_num=?1 where id=?2 and cache_num=?3", nativeQuery = true) + Integer tryLock(Integer newStatus, long id, Integer oldStatus); + + /** + * 修改每个任务的统计结果 + */ + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_task set data_total =?2 , today_data_total =?3 where id =?1 ", nativeQuery = true) + void updateTaskCount(Long id, Long totalCount, Long todayCount); + + @Modifying + @Transactional(rollbackFor = Exception.class) + @Query(value = "update cl_task set crawl_status =?2 ,data_total = ?3 ,crawl_start_time = ?4 , crawl_end_time = ?5 where id =?1 ", nativeQuery = true) + void updateCrawlStatus(long taskId,int crawlStatus,int dataTotal,long crawlStartTime,long crawlEndTime); + + +// +// /** +// * 更新进度 +// */ +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "update cl_label_backtrace_task set processed=processed+:processed,satisfied=satisfied+:satisfied,updated_time=:updatedTime where id=:id", nativeQuery = true) +// Integer increaseStat(@Param("processed") long processed, @Param("satisfied") long satisfied, @Param("updatedTime") long updatedTime, @Param("id") long id); +// +// /** +// * 设置进度 +// */ +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "update cl_label_backtrace_task set progress=:progress,updated_time=:updatedTime where id=:id and progress <= :progress", nativeQuery = true) +// Integer setProgress(@Param("progress") double progress, @Param("updatedTime") long updatedTime, @Param("id") long id); +// +// /** +// * 增加进度 +// * +// * @Query(value = "update cl_label_backtrace_task set progress=progress+:progress,updated_time=:updatedTime where id=:id", nativeQuery = true) +// */ +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "update cl_label_backtrace_task set progress=progress+:progress,updated_time=:updatedTime where id=:id", nativeQuery = true) +// Integer increaseProgress(@Param("progress") double progress, @Param("updatedTime") long updatedTime, @Param("id") long id); +// +// +// /** +// * 重置状态 +// * 如果status=2,并且updated_time<给定lastUpdatedTime,并且retry_times<=max_retry_times,重置status为1 +// */ +// @Modifying +// @Transactional(rollbackFor = Exception.class) +// @Query(value = "update cl_label_backtrace_task set status=1 where updated_time<=?1 and status=2 and retry_times<=max_retry_times", nativeQuery = true) +// Integer resetStatus(Long lastUpdatedTime); + + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java index 0672dfa..66502a6 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/download/DownLoadFile.java @@ -5,6 +5,7 @@ import okhttp3.*; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; +import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.URL; @@ -12,8 +13,9 @@ import java.util.HashMap; import java.util.Map; public class DownLoadFile { + public static Map downloadAndSaveFile(String getUrl,String putUrl){ - String realUrl = ""; + String realUrl = "";double size; Map realresult= new HashMap<>(); try{ String files [] = getUrl.split("/"); @@ -23,16 +25,17 @@ public class DownLoadFile { header.put("Connection","keep-alive"); try { Map downloadresult = OkHttpUtils.doGetBytes(getUrl,header); - double size= (double) downloadresult.get("size"); - if (downloadresult.containsKey("content") && size > 0){ + size= (double) downloadresult.get("size"); + if (downloadresult.containsKey("content")&&size>0){ byte[] content = (byte[]) downloadresult.get("content"); size= (double) downloadresult.get("size"); + size = Double.valueOf(String.format("%.3f", size)); Thread.sleep(4000); String result = DownLoadFile.upload(putUrl,fileName,content); Thread.sleep(4000); realUrl = JSONObject.parseObject(result).getString("url"); realresult.put("realUrl",realUrl); - realresult.put("size",String.format("%.2f", size)); + realresult.put("size",size+""); } } catch (IOException e) { @@ -72,7 +75,7 @@ public class DownLoadFile { return result; } - public static String imagesize(String getUrl ) throws IOException{ + public static String imagesize(String getUrl) throws IOException{ String realUrl = "";Integer size; String realresult=""; try{ @@ -88,5 +91,33 @@ public class DownLoadFile { return realresult; } + public static Map upload(String uploadUrl,String fileName,File file) { + Map resultMap = new HashMap<>(); + try { + OkHttpClient httpClient = new OkHttpClient(); + MultipartBody multipartBody = new MultipartBody.Builder(). + setType(MultipartBody.FORM) + .addFormDataPart("file", fileName, + RequestBody.create(MediaType.parse("multipart/form-data;charset=utf-8"), + file)) + .addFormDataPart("output", "json") + .build(); + Request request = new Request.Builder() + .url(uploadUrl) + .post(multipartBody) + .build(); + Response response = httpClient.newCall(request).execute(); + if (response.isSuccessful()) { + ResponseBody body = response.body(); + if (body != null) { + resultMap =JSONObject.parseObject( body.string()); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + return resultMap; + } + } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/download/OkHttpUtils.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/download/OkHttpUtils.java index 6857675..9b6215e 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/download/OkHttpUtils.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/download/OkHttpUtils.java @@ -188,5 +188,4 @@ public class OkHttpUtils { System.gc(); } } - } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/BacktraceService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/BacktraceService.java deleted file mode 100644 index 003f9a5..0000000 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/BacktraceService.java +++ /dev/null @@ -1,1315 +0,0 @@ -package com.bfd.mf.job.service; - -import com.alibaba.fastjson.JSON; -import com.alibaba.fastjson.JSONArray; -import com.alibaba.fastjson.JSONObject; -import com.bfd.crawler.elasti.ElastiProducer; -import com.bfd.crawler.utils.JsonUtils; -import com.bfd.mf.job.config.AppConfig; -import com.bfd.mf.job.config.ESConstants; -import com.bfd.mf.job.domain.entity.Subject; -import com.bfd.mf.job.domain.entity.Task; -import com.bfd.mf.job.domain.repository.SubjectRepository; -import com.bfd.mf.job.util.EsUtils; -import com.bfd.mf.job.util.Kafka010Utils; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.RateLimiter; -import org.apache.commons.lang3.ArrayUtils; -import org.apache.commons.lang3.exception.ExceptionUtils; -import org.assertj.core.util.Lists; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.index.query.RangeQueryBuilder; -import org.joda.time.LocalDateTime; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; -import org.springframework.util.CollectionUtils; -import org.springframework.util.StringUtils; - -import javax.annotation.PostConstruct; -import java.util.*; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; - -@Service -public class BacktraceService { - private static final Logger LOGGER = LoggerFactory.getLogger(BacktraceService.class); - private static final long PERIOD_MILLS = 1 * 3600 * 1000L; - private static BlockingQueue>> P_TASK_CACHE_RANGE = new LinkedBlockingQueue<>(); - // private static Map C_UNNORMAL_TASK_CACHE = new ConcurrentHashMap<>(); - private static Map C_TASK_PROGRESS_CACHE = Maps.newHashMap(); - private static Lock C_TASK_PROGRESS_CACHE_LOCK = new ReentrantLock(); - private static Map C_TASK_PROCESSED_CACHE = Maps.newHashMap(); - private static Map C_TASK_SATISFIED_CACHE = Maps.newHashMap(); - private static Map C_TASK_SEGMENT_CACHE = Maps.newHashMap(); - private static Lock C_TASK_STAT_CACHE_LOCK = new ReentrantLock(); - private static BlockingQueue DATA_CACHE = new LinkedBlockingQueue<>(10240); - private static BlockingQueue NEW_DATA_CACHE = new LinkedBlockingQueue<>(10240); - private RateLimiter dataRateLimiter; - private RateLimiter pRateLimiter; - private RateLimiter cRateLimiter; - private static int subjectEsNum = 1; - private static String indexType = "docs"; - private static int bussinessType = 1; - - @Autowired - private AppConfig config; - @Autowired - private SubjectRepository subjectRepository; - - private Kafka010Utils.KafkaProducer kafkaProducer; - - @PostConstruct - public void init() { - // 注册数据查询来源 - EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source - EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target - pRateLimiter = RateLimiter.create(1.0D / config.getPeriodS()); - cRateLimiter = RateLimiter.create(1.0D / config.getPeriodS()); - dataRateLimiter = RateLimiter.create(config.esMiniBulkRate()); - kafkaProducer = Kafka010Utils.getProducer(config.getBrokerList()); - } - - /* - 尝试在指定时间内获得许可,如果获得了,则直接返回,如果没有获得,则执行下面的流程 - */ - public void tryAcquire() { - if (!pRateLimiter.tryAcquire()) {//是在指定的时间内尝试地获得1个许可,如果获取不到则返回false - return; - } - // 查询 cl_subject 表中 status=0 del =0 cache_recalculate_status = 1 - List> subjectTaskList = subjectRepository.querySubjectTaskByCacheRecalculateStatus(); - for (Map subject: subjectTaskList) { - System.out.println("需要拉数据的任务:"+JSONObject.toJSONString(subject)); - Map> cache = Maps.newHashMap(); - Long subjectId = Long.valueOf( subject.get("id").toString()); - subjectRepository.updateCacheRecalculateStatus(subjectId,2);// - cache.put(subjectId, Lists.newArrayList(0L, 0L, 1.0, 1L, 1L)); - try { - P_TASK_CACHE_RANGE.put(cache); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } -// // 查询 cl_label_backtrace_task 表中 状态为 1 and retry_times <= max_retry_times 的任务出来 -// List backtraceTaskList = SubjectRepository.findAllByStatus(1); -// for (LabelBacktraceTask task : taskList) { -// if (!isAvailable(task)) { -// continue; -// } -// LOGGER.info("Executing task:{}.", JSON.toJSONString(task)); -// failureService.createTable(task.getFailureTableName()); -// Long totalSegment = 1L;//(task.getDateEnd() - task.getDateStart()) / PERIOD_MILLS; // 3600000 -// Long segment = 1L; -// Double progressFactor = 1.0 / totalSegment; - -// } - } - - - public void produce(){ - Map> range = P_TASK_CACHE_RANGE.poll();// poll -->若队列为空,返回null - if (Objects.isNull(range)) { - return; - } - Long subjectId = 0L; - - for (Map.Entry> entry : range.entrySet()) { - subjectId = entry.getKey(); - } - Subject subject = subjectRepository.findById(subjectId).get(); - - String clusterName = config.esNormalClusterName(); - String subjectIndexName = "cl_major_"+subjectId; - - long fromMills = subject.getCacheStart().longValue(); - long toMills = subject.getCacheEnd().longValue(); - String[] sourceIndices = EsUtils.getIndices(AppConfig.CL_INDEX, "_", - fromMills, toMills, AppConfig.DATE_FORMAT, config.esNormalUpper(), config.esNormalStandby()); - - String esQuery = subject.getEsQuery(); - BoolQueryBuilder qb = EsUtils.getBoolQueryBuilderFromSqlStr(esQuery); - - LOGGER.info("索引范围:"+sourceIndices[0]+" ~ "+ sourceIndices[sourceIndices.length-1] +" ; QB : \n{}.",qb); - - Long finalSubjectId = subjectId; - EsUtils.scrollQuery(clusterName, sourceIndices, ESConstants.INDEX_TYPE, - qb, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES, - dataList -> { - try { - for (JSONObject data : dataList) { - data.put("subjectId", finalSubjectId); - System.out.println(subjectIndexName + " data --- "+data); - writerToMajorES(subjectIndexName,data); - } - } catch (Exception e) { - throw new RuntimeException(e); - } - }); - } - - private static void writerToMajorES(String indexName , Map responseMap) { - System.out.println("==========================写入到【专题】ES : ==========" + indexName + " - "+responseMap.get("docId") ); - ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, subjectEsNum, indexName, indexType); - elastiProducer.sendMessageToEs(JsonUtils.toJSONString(responseMap)); - } - -// public void flushData() { -// if (dataRateLimiter.tryAcquire()) { -// List dataList = Lists.newArrayList(); -// EsUtils.BulkItem item = DATA_CACHE.poll(); -// while (Objects.nonNull(item)) { -// if (dataList.size() >= config.esMiniBulkSize()) { -// EsUtils.bulkIndex(config.esMiniClusterName(), dataList, ESConstants._ID); -// LOGGER.debug("Flush data, size:{}.", dataList.size()); -// dataList.clear(); -// } -// dataList.add(item); -// item = DATA_CACHE.poll(); -// } -// if (dataList.size() > 0) { -// EsUtils.bulkIndex(config.esMiniClusterName(), dataList, ESConstants._ID); -// LOGGER.debug("Flush data, size:{}.", dataList.size()); -// } -// -// } -// } - - -// public void produce() { -// Map> range = P_TASK_CACHE_RANGE.poll();// poll -->若队列为空,返回null -// if (Objects.isNull(range)) { -// return; -// } -// Long taskId = 0L; -// Long fromMills = 0L, toMills = 0L; -// Double progressFactor = 0D; -// Long totalSegment = 0L, segment = 0L; -// for (Map.Entry> entry : range.entrySet()) { -// taskId = entry.getKey(); -// fromMills = (Long) entry.getValue().get(0); -// toMills = (Long) entry.getValue().get(1); -// progressFactor = (Double) entry.getValue().get(2); -// totalSegment = (Long) entry.getValue().get(3); -// segment = (Long) entry.getValue().get(4); -// } -// LabelBacktraceTask task = labelBacktraceTaskRepository.findById(taskId).get(); -// if (Objects.isNull(task) || !task.getStatus().equals(2)) { -// return; -// } -// try { -// // 创建过滤条件 & 任务预处理 -// ConditionDef condition = ConditionDef.build(task, config.getRuleRest()); -// String index_name = task.getTaskCode();// "_content" -// //String index_name = "cl_index_2019-05-13"; -//// String[] sourceIndices = EsUtils.getIndices(AppConfig.CONFIG_ES_SOURCE_INDEX_PREFIX, "_", -//// fromMills, toMills, AppConfig.DATE_FORMAT, config.esSourceUpper(), config.esSourceStandby()); -// QueryBuilder queryBuilder; -// final long from = fromMills, to = toMills, seg = segment, totalSeg = totalSegment; -// String clusterName,srcIndexName; -// //如果是已有任务创建 -// if(ConditionDef.TASK_TYPE_EXIST.equals(condition.getType())){ -// LabelBacktraceTask existsTask = labelBacktraceTaskRepository.findById(condition.getExistTaskId()).get(); -// clusterName = config.esReplySourceClusterName(); -// // srcIndexName = existsTask.getIndexName(); -// queryBuilder = getExistQb(fromMills, toMills); -// } else{ -// clusterName = config.esSourceClusterName(); -// // srcIndexName = String.join(",", sourceIndices); -// // 由于主贴不计算时间,所以这里的时间可以写死了!! -// fromMills = 0L; -// toMills = new Date().getTime(); -// queryBuilder = getPrimaryQb(condition, fromMills, toMills); -// } -// LOGGER.info("Query primary, task:{}, from:{}, to:{}, indices:{}, dsl:{}.", -// task.getId(), -// new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT), -// new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT), -// index_name, -// queryBuilder.toString()); -// // 传入的参数 集群名称,索引名称,索引类型(type), 查询Builder,scroll查询页面大小,scroll查询scrollId有效时间 -// EsUtils.scrollQuery(clusterName, index_name, EsConstants.INDEX_TYPE, -// queryBuilder, EsConstants.SCROLL_PAGE_SIZE, EsConstants.SCROLL_MINUTES, -// dataList -> { -// try { -// for (JSONObject data : dataList) { -// saveService.initPtDt(data); -// // 发送主贴 -// Message msg = new Message() -// .setTaskId(task.getId()) -// .setSrcIndexName(index_name) -// .setIndexName(task.getIndexName()) -// .setTopic(task.getTopic()) -// .setReplyTopic(task.getReplyTopic()) -// .setFailureTableName(task.getFailureTableName()) -// .setCondition(condition) -// .setFromMills(from) -// .setToMills(to) -// .setData(data) -// .setIsData(true) -// .setSendTime(System.currentTimeMillis()) -// .setProgressFactor(-1.0) -// .setTotalSegment(totalSeg) -// .setSegment(seg); -// kafkaProducer.send(task.getTopic(), JSON.toJSONString(msg)); -// LOGGER.debug("Send message, taskId:{}, isData:{}, progressFactor:{}, totalSegment:{},segment:{}.", -// task.getId(), msg.getIsData(), -// msg.getProgressFactor(), msg.getTotalSegment(), msg.getSegment()); -// } -// } catch (Exception e) { -// throw new RuntimeException(e); -// } -// }); -// // 发送任务进度消息 -// Message msg = new Message() -// .setTaskId(task.getId()) -// .setProgressFactor(progressFactor) -// .setTotalSegment(totalSegment) -// .setSegment(segment) -// .setFromMills(fromMills) -// .setToMills(toMills) -// .setFailureTableName(task.getFailureTableName()) -// .setIsData(false) -// .setSendTime(System.currentTimeMillis()); -// kafkaProducer.send(task.getTopic(), JSON.toJSONString(msg)); -// LOGGER.debug("Send message, taskId:{}, isData:{}, progressFactor:{}, totalSegment:{},segment:{}.", -// task.getId(), msg.getIsData(), -// msg.getProgressFactor(), msg.getTotalSegment(), msg.getSegment()); -// } catch (Exception e) { -// JSONObject msg = new JSONObject(); -// msg.put("message", "produce error due to [" + ExceptionUtils.getStackTrace(e) + "]"); -// msg.put("from", fromMills); -// msg.put("to", toMills); -// labelBacktraceTaskRepository.updateStatus(5, msg.toJSONString(), System.currentTimeMillis(), task.getId()); -// LOGGER.error("Produce error due to [{}].", e.getMessage(), e); -// } -// } -// -// public boolean isAvailable(LabelBacktraceTask task) { -// // 乐观锁,更新status=1的数据 -// int r = labelBacktraceTaskRepository.tryLock(2, task.getId(), task.getStatus()); -// if (r <= 0) { -// return false; -// } -// -// long currentTime = System.currentTimeMillis(); -// String indexName = new StringBuilder() -// .append(AppConfig.CONFIG_ES_TARGET_ANALYSIS_INDEX_PREFIX) -// .append("_") -// .append(config.getVersion()) -// .append("_") -// .append(task.getId()) -// .append("_") -// .append(currentTime) -// .toString(); -// String topic = config.getAnalysisTopic().get((int) (task.getId() % config.getAnalysisTopic().size())); -// String replyTopic = config.getAnalysisReplyTopic().get((int) (task.getId() % config.getAnalysisReplyTopic().size())); -// String failureTableName = new StringBuilder() -// .append(AppConfig.CONFIG_ANALYSIS_FAILURE_TABLE_PREFIX) -// .append("_") -// .append(task.getId()) -// .toString(); -// -// task.setIndexName(indexName); -// task.setTopic(topic); -// task.setReplyTopic(replyTopic); -// task.setFailureTableName(failureTableName); -// task.setStatus(2); -// task.setMessage("执行中"); -// task.setUpdatedTime(currentTime); -// task.setRetryTimes(task.getRetryTimes() + 1); -// labelBacktraceTaskRepository.save(task); -// return true; -// } -// -// public QueryBuilder getPrimaryQb(final ConditionDef condition,long fromMills, long toMills) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// -// RangeQueryBuilder rangeQueryBuilder = QueryBuilders -// .rangeQuery(EsConstants.FIELD_PUB_TIME) -// .gte(fromMills) -// .lt(toMills); -// qb.must(rangeQueryBuilder); -// -// // 有效性 -// Integer[] availability = condition.getAvailability(); -// if (ArrayUtils.isNotEmpty(availability)) { -// qb.must(QueryBuilders.termsQuery(EsConstants.FIELD_AVAILABILITY, condition.getAvailability())); -// } -// // 渠道 -// JSONArray channel = condition.getChannel(); -// if (!CollectionUtils.isEmpty(channel)) { -// BoolQueryBuilder channelQb = QueryBuilders.boolQuery(); -// for (int i = 0; i < channel.size(); i++) { -// JSONObject item = channel.getJSONObject(i); -// String name = item.getString("channel"); -// JSONArray site = item.getJSONArray("site"); -// if (CollectionUtils.isEmpty(site)) { -// channelQb.should(QueryBuilders.termsQuery(EsConstants.FIELD_DOC_TYPE, name)); -// } else { -// channelQb.should(QueryBuilders -// .boolQuery() -// .must(QueryBuilders.termQuery(EsConstants.FIELD_DOC_TYPE, name)) -// .must(QueryBuilders.termsQuery(EsConstants.FIELD_SOURCE, site.toArray(new String[0]))) -// ); -// } -// } -// qb.must(channelQb); -// } -// // 情感 -// Integer[] sentimentDef = condition.getSentiment(); -// if (Objects.nonNull(sentimentDef)) { -// BoolQueryBuilder sentimentQb = QueryBuilders.boolQuery(); -// for (int i = 0; i < sentimentDef.length; i++) { -// if (sentimentDef[i] == 1) { -// // 正面 -// sentimentQb.should(QueryBuilders.rangeQuery(EsConstants.FIELD_SYS_SENTIMENT) -// .gte(0.8).lt(1.1)); -// } else if (sentimentDef[i] == 2) { -// // 中性 -// sentimentQb.should(QueryBuilders.rangeQuery(EsConstants.FIELD_SYS_SENTIMENT) -// .gte(0.2).lt(0.8)); -// } else if (sentimentDef[i] == 3) { -// // 负面 -// sentimentQb.should(QueryBuilders.rangeQuery(EsConstants.FIELD_SYS_SENTIMENT) -// .gte(0).lt(0.2)); -// } -// } -// qb.must(sentimentQb); -// } -// // content长度 -// if (condition.getEnableContentLimit()) { -// qb.must(QueryBuilders.rangeQuery(EsConstants.FIELD_CONTENT_LENGTH) -// .gt(0).lte(config.getContentLimit())); -// } -// -// String type = condition.getType(); -// if (ConditionDef.TASK_TYPE_KEYWORD.equals(type)) { -// BoolQueryBuilder kwQb = QueryBuilders.boolQuery(); -// BoolQueryBuilder itemQb = QueryBuilders.boolQuery(); -// // 关键词过滤条件 -// // 产品名称 -// JSONObject product = condition.getProductName(); -// if (Objects.nonNull(product)) { -// String productInclude = product.getString("include"); -// String productExclude = product.getString("exclude"); -// BoolQueryBuilder productQb = getKeywordQb(productInclude, productExclude, EsConstants.FIELD_ITEM_NAME); -// if (productQb.hasClauses()) { -// itemQb.must(productQb); -// } -// } -// // 品牌名称 -// JSONObject brand = condition.getBrandName(); -// if (Objects.nonNull(brand)) { -// String brandInclude = brand.getString("include"); -// String brandExclude = brand.getString("exclude"); -// BoolQueryBuilder brandQb = getKeywordQb(brandInclude, brandExclude, EsConstants.FIELD_PRODUCT_BRAND); -// if (brandQb.hasClauses()) { -// itemQb.must(brandQb); -// } -// } -// // 产品参数 -// JSONObject parameter = condition.getProductParameter(); -// if (Objects.nonNull(parameter)) { -// String parameterInclude = parameter.getString("include"); -// String parameterExclude = parameter.getString("exclude"); -// BoolQueryBuilder parameterQb = getKeywordQb(parameterInclude, parameterExclude, EsConstants.FIELD_PRODUCT_PARAMETER); -// if (parameterQb.hasClauses()) { -// itemQb.must(parameterQb); -// } -// } -// if (itemQb.hasClauses()) { -// kwQb.should(itemQb); -// } -// BoolQueryBuilder tcQb = QueryBuilders.boolQuery(); -// // 标题 -// JSONObject title = condition.getTitle(); -// if (Objects.nonNull(title)) { -// String titleInclude = title.getString("include"); -// String titleExclude = title.getString("exclude"); -// BoolQueryBuilder titleQb = getKeywordQb(titleInclude, titleExclude, EsConstants.FIELD_TITLE); -// if (titleQb.hasClauses()) { -// tcQb.must(titleQb); -// } -// } -// // 内容 -// JSONObject content = condition.getContent(); -// if (Objects.nonNull(content)) { -// String contentInclude = content.getString("include"); -// String contentExclude = content.getString("exclude"); -// BoolQueryBuilder contentQb = getKeywordQb(contentInclude, contentExclude, EsConstants.FIELD_CONTENT); -// if (contentQb.hasClauses()) { -// tcQb.must(contentQb); -// } -// } -// if (tcQb.hasClauses()) { -// kwQb.should(tcQb); -// } -// if (kwQb.hasClauses()) { -// qb.must(kwQb); -// } -// } else if (ConditionDef.TASK_TYPE_PRODUCT.equals(type)) { -// // 商品url过滤条件 -// String[] urls = condition.getProductUrls(); -// QueryBuilder queryBuilder = QueryBuilders.termsQuery(EsConstants.FIELD_URL, urls); -// qb.must(queryBuilder); -// } else if (ConditionDef.TASK_TYPE_ACCOUNT.equals(type)) { -// BoolQueryBuilder accountQb = QueryBuilders.boolQuery(); -// String[] weiboAccounts = condition.getWeiboAccounts(); -// if (weiboAccounts != null && weiboAccounts.length > 0) { -// BoolQueryBuilder weiboBuilder = QueryBuilders.boolQuery(); -// QueryBuilder accountBuilder = QueryBuilders.termsQuery(EsConstants.FILED_AUTHOR, weiboAccounts); -// QueryBuilder docTypeBuilder = QueryBuilders.termQuery(EsConstants.FIELD_DOC_TYPE, EsConstants.DOC_TYPE_WEIBO); -// weiboBuilder.must(accountBuilder); -// weiboBuilder.must(docTypeBuilder); -// accountQb.should(weiboBuilder); -// } -// String[] weixinAccounts = condition.getWeixinAccounts(); -// if (weixinAccounts != null && weixinAccounts.length > 0) { -// BoolQueryBuilder weixinBuilder = QueryBuilders.boolQuery(); -// QueryBuilder accountBuilder = QueryBuilders.termsQuery(EsConstants.FIELD_EN_SOURCE, weixinAccounts); -// QueryBuilder docTypeBuilder = QueryBuilders.termQuery(EsConstants.FIELD_DOC_TYPE, EsConstants.DOC_TYPE_WEIXIN); -// weixinBuilder.must(accountBuilder); -// weixinBuilder.must(docTypeBuilder); -// accountQb.should(weixinBuilder); -// } -// String[] otherAccounts = condition.getOtherAccounts(); -// if (otherAccounts != null && otherAccounts.length > 0) { -// BoolQueryBuilder otherBuilder = QueryBuilders.boolQuery(); -// QueryBuilder accountBuilder = QueryBuilders.termsQuery(EsConstants.FILED_AUTHOR, otherAccounts); -// otherBuilder.must(accountBuilder); -// accountQb.should(otherBuilder); -// } -// qb.must(accountQb); -// } -// -// return qb; -// } -// -// public BoolQueryBuilder getKeywordQb(String include, String exclude, String field) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// if (StringUtils.hasLength(include)) { -// qb = ExpressionUtils.getBoolQueryBuilderFromExpression(include, field); -// } -// if (StringUtils.hasLength(exclude)) { -// String[] excludes = exclude.replaceAll("\\s", "").split(","); -// for (int i = 0; i < excludes.length; i++) { -// qb.mustNot(QueryBuilders.matchPhraseQuery(field, excludes[i])); -// } -// } -// return qb; -// } -// -// /** -// * 获取已有任务QB -// */ -// public QueryBuilder getExistQb(long fromMills, long toMills) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// RangeQueryBuilder rangeQueryBuilder = QueryBuilders -// .rangeQuery(EsConstants.FIELD_PUB_TIME) -// .gte(fromMills) -// .lt(toMills); -// qb.must(rangeQueryBuilder); -// QueryBuilder queryBuilder = QueryBuilders.termQuery(EsConstants.FIELD_DATA_COUNT, 1); -// qb.must(queryBuilder); -// return qb; -// } -// -// public void consume(Message message) { -// flushConsumer(); -// Long taskId = message.getTaskId(); -// LOGGER.debug("Consume message, taskId:{}, isData:{}, progressFactor:{}, totalSegment:{},segment:{}.", -// taskId, message.getIsData(), -// message.getProgressFactor(), message.getTotalSegment(), message.getSegment()); -// -// if (C_UNNORMAL_TASK_CACHE.containsKey(taskId)) { -// return; -// } -// LabelBacktraceTask task = labelBacktraceTaskRepository.findById(taskId).get(); -//// Double progress = task.getProgress(); -// if (!message.getIsData()) { -// System.out.println("** 报送信息:"+JsonUtils.toJSONString(message)); -// // 进度信息 -// try { -// // 判断是否要调用煜东的接口对回帖进行打标 "segment":0 说明是主贴的报送信息。 -// if(null != message.getSegment() && message.getSegment() == 0 ){ -// if(message.getProgressFactor() == 0.5){ // 说明主贴数据已经读完,可以调用回帖 这个是我自己定义的报送信息 -// List> channels = JsonUtils.parseArray(task.getChannel()); -// Double progress = task.getProgress(); // 当前数据库中的 百分比 -// //[{"channel":"item","site":[],"site_num":"1562207595668704"}] -// if(channels.size() == 1 && channels.get(0).get("channel").equals(EsConstants.DOC_TYPE_PRODUCT) && progress < 1.0 && progress > 0.5){ // 如果只有电商平台 -// labelBacktraceTaskRepository.setProgress(0.99, System.currentTimeMillis(), task.getId()); -// }else{ -// System.out.println("** 报送信息:需要调用评论啦~ TaskId = " +taskId + "Channels = " + JsonUtils.toJSONString(channels) ); -// Thread.sleep(15000); // 怀疑 一拿到报送就调用的话会有一些主贴正在跑,调出来的回帖会打不上标!! -// boolean status = startGetReply(task); -// if (status && progress < 0.51 ) { -// labelBacktraceTaskRepository.setProgress(0.51, System.currentTimeMillis(), taskId); -// } else { -// LOGGER.error("回帖接口出错,请查看"); -// } -// } -// } -// // 如果主贴的报送信息 以处理的数据量 == 总量,那就可以 -// } -// // 回帖的百分比处理!! 只要有回帖,百分比都这么处理!! -// if(null != message.getPercent() && message.getPercent() > 0) { -// flushProgressReply(message,task); -//// Double real_progress = message.getPercent(); // 只有回帖才有这个字段哦!! -//// LOGGER.info("**** TaskId : " + taskId + " 回帖的百分比为:数据库中:" + progress + " || Message中: " + real_progress); -//// // message 中报送的 百分比是 1(说明回帖推送完了) ,并且数据库中的百分比 小于 0.91 就强制改成 0.96 否则的话任务会 一直卡再【执行中】,不能完成。 -//// if (progress == 1) { // 数据库中的百分比 -//// System.out.println("已经是 1 了,就不用管了!!"); -//// } else if (real_progress == 1.0 && progress < 1) { // message 过来的 是 1.0 ,但是 数据库中小于 1 -//// labelBacktraceTaskRepository.setProgress(0.98, System.currentTimeMillis(), taskId); -//// // 当 数据库中的百分比小于 1 的时候修改,否则就不用修改了! message 过来的 小于1 但是 大于 0.1 的话,此时的百分比就是 0.5+(real_progress/2) -//// } else if (progress < 1.0 && 1.0 > real_progress && real_progress > 0.03) { -//// labelBacktraceTaskRepository.setProgress(0.5 + (real_progress / 2), System.currentTimeMillis(), taskId); -//// } -// } -// }catch (Exception e){ -// e.printStackTrace(); -// } -// return; -// } -// try { -// System.out.println("The Message :" + JsonUtils.toJSONString(message)); -// // 3 打标 -// if (null != message || !message.equals("")) { -// boolean flag; -// if (message.getData().getInteger(EsConstants.FIELD_PRIMARY_POST).equals(1)) { -// flag = markPrimary(message); -// } else { -// flag = markReply(message); -// } -// try { -// C_TASK_STAT_CACHE_LOCK.lock(); -// // C_TASK_PROGRESS_CACHE_LOCK.lock(); -// // C_TASK_PROGRESS_CACHE.putIfAbsent(taskId,0D); -// C_TASK_SATISFIED_CACHE.putIfAbsent(taskId, 0L);// 已打标的数据量统计 -// C_TASK_PROCESSED_CACHE.putIfAbsent(taskId, 0L);// 已处理的数据量统计 -// C_TASK_SEGMENT_CACHE.put(taskId,0L); -// C_TASK_PROCESSED_CACHE.put(taskId, C_TASK_PROCESSED_CACHE.get(taskId) + 1);// 更新要打标的所有数据量 对应数据库中的 processed -// Long processed = task.getProcessed(); // 数据库中的 已处理的条数 -// // message 中的 数据总量 > 数据库中的已处理的数据总量 并且 channel 中不只有 item 的时候 -// if(message.getTotalSegment() >= processed ) {// 只要没有回帖,这个必然是成立的! 回帖的时候 message.getTotalSegment()=0 -// flushProgressPrimary(message,task); -//// Double new_progress = processed * 1.0/message.getTotalSegment(); -//// try { -//// List> channels = JsonUtils.parseArray(task.getChannel()); -//// // 如果channel 只有一个,并且是 item 的,百分比就直接更新 -//// if(channels.size() == 1 && channels.toString().contains(EsConstants.DOC_TYPE_PRODUCT) && progress != 1.0){ -//// labelBacktraceTaskRepository.setProgress(new_progress, System.currentTimeMillis(), task.getId()); -//// }else if( progress < 0.5){ // 选择多个平台,不管是否是电商的,都按 0.5 算 -//// labelBacktraceTaskRepository.setProgress(new_progress/2, System.currentTimeMillis(), taskId); -//// } -//// } catch (Exception e) { -//// e.printStackTrace(); -//// } -// C_TASK_SEGMENT_CACHE.put(taskId, message.getTotalSegment()); -// } -// if (flag) { -// C_TASK_SATISFIED_CACHE.put(taskId, C_TASK_SATISFIED_CACHE.get(taskId) + 1);// 打标成功的数据量 对应数据库中的 satisfied -// } -// } finally { -// C_TASK_STAT_CACHE_LOCK.unlock(); -// // C_TASK_PROGRESS_CACHE_LOCK.unlock(); -// } -// } else { -// System.out.println("message 为空!!!"); -// } -// } catch (Exception e) { -// long t = System.currentTimeMillis(); -// Failure failure = new Failure() -// .setIndexName(message.getIndexName()) -// .setFromMills(0L)//message.getFromMills() -// .setToMills(0L)//message.getToMills() -// .setDocId(Objects.nonNull(message.getData()) ? message.getData().getString(EsConstants.FILED_DOC_ID) : "") -// .setStatus(1) -// //.setMessage("consume error due to [" + ExceptionUtils.getStackTrace(e) + "]") -// .setMessage("consume error due to [consume 处理报错!]") -// .setCreatedTime(t) -// .setUpdatedTime(t); -// System.out.println("message == " +JsonUtils.toJSONString(message)); -// System.out.println("docId == "+message.getData().getString(EsConstants.FILED_DOC_ID)); -// if (!failureService.insert(message.getFailureTableName(), failure)) { -// labelBacktraceTaskRepository.updateStatus(5, "失败", System.currentTimeMillis(), taskId); -// } -// LOGGER.error("Consume error due to [{}].", e.getMessage(), e); -// } -// // } -// } -// -// /** -// * 传入的参数就是 kafka 中的 json -// * kafka 中的 json 是 组装好的 需要打标的任务 -// * condition 是从数据库中查 taskID 拿到的任务的 参数 -// * data 是 数据 -// * @param primary -// * @return -// */ -// -// public boolean markPrimary(final Message primary) { -// saveService.initPtDt(primary.getData()); -// List dataGroup = filterService.filter(primary.getCondition(), primary.getData()); -// if (CollectionUtils.isEmpty(dataGroup)) { -// return false; -// } -// // 主贴打标结果 -// JSONObject pPtResult = new JSONObject(); -// AtomicInteger commentsCount = new AtomicInteger(0); -// AtomicInteger quoteCount = new AtomicInteger(0); -// // 电商数据不用查询回帖 item != dataGroup(0).getString(docType) -// if (!EsConstants.DOC_TYPE_PRODUCT.equals(dataGroup.get(0).getString(EsConstants.FIELD_DOC_TYPE))) { -// for (JSONObject data : dataGroup) { -// if (StringUtils.hasLength(data.getString(EsConstants.FIELD_PT))) { -// pPtResult.put(data.getString(EsConstants.FIELD_PT), data.getInteger(EsConstants.FIELD_PT_SENTIMENT)); -// } -// } -// //System.out.println(JsonUtils.toJSONString(primary)); -// Map contentPTresult = AddToRedisService.getContentPTResult(primary.getCondition(), pPtResult, primary.getData()); -// AddToRedisService.sed2Redis(primary.getTaskId() ,primary.getData(), contentPTresult, primary.getSrcIndexName()); -//// long fromMills = primary.getData().getLongValue(EsConstants.FIELD_PUB_TIME); -//// long toMills = fromMills; -//// String [] sourceIndices = {}; -//// String[] sourceIndices = EsUtils.getIndices(AppConfig.CONFIG_ES_REPLY_SOURCE_INDEX_PREFIX, "_", -//// fromMills, toMills, AppConfig.DATE_FORMAT, config.esReplySourceUpper(), config.esReplySourceStandby()); -// // 如果是关键词任务 -// // 如果配置了dt,命中关键词的回帖(打dt)&(记录声量),没命中关键词的(不打dt)&(记录声量) -// // 如果没有配置dt,所有回帖记录声量 -//// if (ConditionDef.TASK_TYPE_KEYWORD.equals(primary.getCondition().getType()) -//// && StringUtils.hasLength(primary.getCondition().getDt())) { -//// QueryBuilder qbMatchKw = getReplyQbMatchKw(primary); -//// sendReply(primary, fromMills, toMills, sourceIndices, qbMatchKw, dataGroup, pPtResult, commentsCount, quoteCount); -//// QueryBuilder qbNotMatchKw = getReplyQbNotMatchKw(primary); -//// primary.getCondition().setDt(""); -//// sendReply(primary, fromMills, toMills, sourceIndices, qbNotMatchKw, dataGroup, pPtResult, commentsCount, quoteCount); -//// } else { -//// QueryBuilder qb = getReplyQb(primary); -//// sendReply(primary, fromMills, toMills, sourceIndices, qb, dataGroup, pPtResult, commentsCount, quoteCount); -//// } -// // 查询该主贴有多少对话数(回帖数) ,索引不同,需要修改 -// String srcIndexName = primary.getSrcIndexName().replace("content","comment"); -// // 加个判断,主贴没有时间限制的话,查一下回帖再时间范围内的对话数是否为0 -// if(Long.valueOf(primary.getData().get("pubTime").toString()) == 0) { -// AtomicInteger a_commentsCount = new AtomicInteger(0); -// QueryBuilder qb1 = getReplyQbWithDate(primary); -// //System.out.println("*** "+srcIndexName+" **** "+qb1.toString()); -// sendReply(srcIndexName, qb1, a_commentsCount); -// // 如果是 0 的话 就丢弃了!! -// if (a_commentsCount.get() == 0) { -// return false; -// } -// } -// QueryBuilder qb = getReplyQbWithOutDate(primary); -// sendReply(srcIndexName, qb, commentsCount, quoteCount); // 获取到已入库的主贴对应的回帖、转发数 -// } -// -// for (JSONObject data : dataGroup) { -// data.put(EsConstants.FIELD_COMMENTS_COUNT, commentsCount.get()); -// data.put(EsConstants.FIELD_QUOTE_COUNT, quoteCount.get()); -// //System.out.println(JsonUtils.toJSONString(data)); -// // 如果有 title ,但是标题为空,则直接写标题为空 -// if(data.containsKey(EsConstants.FIELD_TITLE)){ -// if(null == data.get(EsConstants.FIELD_TITLE) || data.get(EsConstants.FIELD_TITLE).equals("")){ -// data.put(EsConstants.FIELD_TITLE , "标题为空"); -// } -// }else {// 如果没有 title ,并且content 不为空的话 就把 content 写到 title 中 -// if(null != data.get(EsConstants.FIELD_CONTENT) || !data.get(EsConstants.FIELD_CONTENT).equals("")){ -// data.put(EsConstants.FIELD_TITLE , data.get(EsConstants.FIELD_CONTENT)); -// }else{ -// data.put(EsConstants.FIELD_TITLE , "标题为空"); -// } -// } -// try { -// LOGGER.info("IndexName == " +primary.getIndexName() + " || data == "+ JsonUtils.toJSONString(data) ); -// DATA_CACHE.put(EsUtils.buildBulkItem(primary.getIndexName(), EsConstants.INDEX_TYPE, data)); -// // 将符合要求的数据写到另一个Index 中 index 的名字就在原来的基础上加上 ddup 吧 -// Integer dataCount = data.getInteger("dataCount"); -// Set ptAll = (Set) data.get("ptAll"); -// if(dataCount == 1){ -// for (String pt:ptAll) { -// if(!pt.contains("丅null")) { -// data.put(pt, 1); -// } -// } -// NEW_DATA_CACHE.put(EsUtils.buildBulkItem(primary.getIndexName()+"_dedup", EsConstants.INDEX_TYPE, data)); -// } -// } catch (InterruptedException e) { -// Thread.currentThread().interrupt(); -// } -// } -// return true; -// } -// -// public void sendReply(final Message primary, long fromMills, long toMills, String srcIndexName, QueryBuilder qb, -// List dataGroup, JSONObject pPtResult, AtomicInteger commentsCount, AtomicInteger quoteCount) { -//// LOGGER.info("Query reply, task:{}, from:{}, to:{}, indices:{}, dsl:{}.", -//// primary.getTaskId(), -//// new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT), -//// new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT), -//// srcIndexName, -//// qb.toString()); -// EsUtils.scrollQuery(config.esReplySourceClusterName(), srcIndexName, EsConstants.INDEX_TYPE, -// qb, EsConstants.SCROLL_PAGE_SIZE_REPLY, EsConstants.SCROLL_MINUTES_REPLY, -// replyList -> { -// try { -// for (JSONObject reply : replyList) { -//// saveService.initPtDt(reply); -//// reply.put(EsConstants.FIELD_PRIMARY_POST, 0); -//// reply.put(EsConstants.FIELD_DOC_TYPE, dataGroup.get(0).getString(EsConstants.FIELD_DOC_TYPE)); -//// -//// JSONObject pPt = new JSONObject(); -//// if (StringUtils.hasLength(primary.getCondition().getPt())) { -//// pPt.put("id", primary.getCondition().getPt()); -//// pPt.put("version", primary.getCondition().getPtVersion()); -//// pPt.put("result", pPtResult); -//// } -//// primary.getCondition() -//// .setDtFields(new String[]{EsConstants.FIELD_CONTENT}) -//// .setPrimaryPt(pPt); -//// Message item = new Message() -//// .setTaskId(primary.getTaskId()) -//// .setCondition(primary.getCondition()) -//// .setSrcIndexName(srcIndexName) -//// .setTopic(primary.getTopic()) -//// .setReplyTopic(primary.getReplyTopic()) -//// .setFailureTableName(primary.getFailureTableName()) -//// .setIndexName(primary.getIndexName()) -//// .setFromMills(primary.getFromMills()) -//// .setToMills(primary.getToMills()) -//// .setData(reply) -//// .setIsData(true) -//// .setProgressFactor(-1.0) -//// .setTotalSegment(primary.getTotalSegment()) -//// .setSegment(primary.getSegment()) -//// .setSendTime(System.currentTimeMillis()); -//// kafkaProducer.send(item.getReplyTopic(), JSON.toJSONString(item)); -// Integer sign = reply.getInteger(EsConstants.FIELD_REPLY_SIGN); -// if (Objects.isNull(sign)) { -// commentsCount.incrementAndGet(); -// } else if (sign.equals(1)) { -// // sign=1 转发 -// quoteCount.incrementAndGet(); -// } else if (sign.equals(2)) { -// // sign=2 评论 -// commentsCount.incrementAndGet(); -// } -// //commentsCount.incrementAndGet(); -// } -// } catch (Exception e) { -// throw new RuntimeException(e); -// } -// }); -// } -// -// -// /** -// * 只是为了查询 某个主贴的对话数! -// * @param srcIndexName 回帖的 ES索引 -// * @param qb -// * @param commentsCount 评论数 -// * @param quoteCount 转发数 -// */ -// public void sendReply( String srcIndexName, QueryBuilder qb, AtomicInteger commentsCount, AtomicInteger quoteCount) { -// EsUtils.scrollQuery(config.esReplySourceClusterName(), srcIndexName, EsConstants.INDEX_TYPE, -// qb, EsConstants.SCROLL_PAGE_SIZE_REPLY, EsConstants.SCROLL_MINUTES_REPLY, -// replyList -> { -// try { -// for (JSONObject reply : replyList) { -// Integer sign = reply.getInteger(EsConstants.FIELD_REPLY_SIGN); -// if (Objects.isNull(sign)) { -// commentsCount.incrementAndGet(); -// } else if (sign.equals(1)) { -// // sign=1 转发 -// quoteCount.incrementAndGet(); -// } else if (sign.equals(2)) { -// // sign=2 评论 -// commentsCount.incrementAndGet(); -// } -// //commentsCount.incrementAndGet(); -// } -// } catch (Exception e) { -// throw new RuntimeException(e); -// } -// }); -// } -// -// -// public void sendReply( String srcIndexName, QueryBuilder qb, AtomicInteger commentsCount) { -// EsUtils.scrollQuery(config.esReplySourceClusterName(), srcIndexName, EsConstants.INDEX_TYPE, -// qb, EsConstants.SCROLL_PAGE_SIZE_REPLY, EsConstants.SCROLL_MINUTES_REPLY, -// replyList -> { -// try { -// for (JSONObject reply : replyList) { -// Integer sign = reply.getInteger(EsConstants.FIELD_REPLY_SIGN); -// if (Objects.isNull(sign)) { -// commentsCount.incrementAndGet(); -// } else if (sign.equals(2)) { -// // sign=2 评论 -// commentsCount.incrementAndGet(); -// } -// } -// } catch (Exception e) { -// throw new RuntimeException(e); -// } -// }); -// } -// -// -// /** -// * 打标回帖 -// * @param reply -// * @return -// */ -// public boolean markReply(final Message reply) { -// List replyGroup = filterService.filter(reply.getCondition(), reply.getData()); -// if (CollectionUtils.isEmpty(replyGroup)) { -// return false; -// } -// for (JSONObject data : replyGroup) { -// try { -// LOGGER.info("IndexName == " +reply.getIndexName() + " || data == "+ JsonUtils.toJSONString(data) ); -// DATA_CACHE.put(EsUtils.buildBulkItem(reply.getIndexName(), EsConstants.INDEX_TYPE, data)); -// -// Integer dataCount = data.getInteger("dataCount"); -// Set ptAll = (Set) data.get("ptAll"); -// if(dataCount == 1){ -// for (String pt:ptAll) { -// if(!pt.contains("丅null")) { -// data.put(pt, 1); -// } -// } -// NEW_DATA_CACHE.put(EsUtils.buildBulkItem(reply.getIndexName()+"_dedup", EsConstants.INDEX_TYPE, data)); -// } -// } catch (InterruptedException e) { -// Thread.currentThread().interrupt(); -// } -// } -// return true; -// } -// -// /** -// * @param primary -// * 查询的是 回帖的对话数,不加时间限制,要抓取到的总量 -// * @return -// */ -// public QueryBuilder getReplyQbWithOutDate(final Message primary) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// qb.must(QueryBuilders.termQuery(EsConstants.FILED_DOC_ID, -// primary.getData().getString(EsConstants.FILED_DOC_ID))); -// -// // 有效性 -// Integer[] availability = primary.getCondition().getAvailability(); -// if (ArrayUtils.isNotEmpty(availability)) { -// qb.must(QueryBuilders.termsQuery(EsConstants.FIELD_AVAILABILITY, primary.getCondition().getAvailability())); -// } -// -// // content长度 -// if (primary.getCondition().getEnableContentLimit()) { -// qb.must(QueryBuilders.rangeQuery(EsConstants.FIELD_CONTENT_LENGTH) -// .gt(0).lte(config.getContentLimit())); -// } -// RangeQueryBuilder rangeQueryBuilder = QueryBuilders -// .rangeQuery(EsConstants.FIELD_PUB_TIME) -// .gte(primary.getCondition().getDateStart()) -// .lt(primary.getCondition().getDateEnd()); -// qb.must(rangeQueryBuilder); -// return qb; -// } -// -// -// /** -// * @param primary -// * 查询的是 回帖的对话数,有时间限制的!! -// * @return -// */ -// public QueryBuilder getReplyQbWithDate(final Message primary) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// qb.must(QueryBuilders.termQuery(EsConstants.FILED_DOC_ID, primary.getData().getString(EsConstants.FILED_DOC_ID))); -// -// // 有效性 -// Integer[] availability = primary.getCondition().getAvailability(); -// if (ArrayUtils.isNotEmpty(availability)) { -// qb.must(QueryBuilders.termsQuery(EsConstants.FIELD_AVAILABILITY, primary.getCondition().getAvailability())); -// } -// -// // content长度 -// if (primary.getCondition().getEnableContentLimit()) { -// qb.must(QueryBuilders.rangeQuery(EsConstants.FIELD_CONTENT_LENGTH) -// .gt(0).lte(config.getContentLimit())); -// } -// // 时间 -// RangeQueryBuilder rangeQueryBuilder = QueryBuilders -// .rangeQuery(EsConstants.FIELD_PUB_TIME) -// .gte(primary.getCondition().getDateStart()) -// .lt(primary.getCondition().getDateEnd()); -// qb.must(rangeQueryBuilder); -// -// return qb; -// } -// -// public QueryBuilder getReplyQbMatchKw(final Message primary) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// qb.must(QueryBuilders.termQuery(EsConstants.FILED_DOC_ID, -// primary.getData().getString(EsConstants.FILED_DOC_ID))); -// -// // 有效性 -// Integer[] availability = primary.getCondition().getAvailability(); -// if (ArrayUtils.isNotEmpty(availability)) { -// qb.must(QueryBuilders.termsQuery(EsConstants.FIELD_AVAILABILITY, primary.getCondition().getAvailability())); -// } -// -// // content长度 -// if (primary.getCondition().getEnableContentLimit()) { -// qb.must(QueryBuilders.rangeQuery(EsConstants.FIELD_CONTENT_LENGTH) -// .gt(0).lte(config.getContentLimit())); -// } -// -// RangeQueryBuilder rangeQueryBuilder = QueryBuilders -// .rangeQuery(EsConstants.FIELD_PUB_TIME) -// .gte(primary.getCondition().getDateStart()) -// .lt(primary.getCondition().getDateEnd()); -// qb.must(rangeQueryBuilder); -// -// BoolQueryBuilder tcQb = QueryBuilders.boolQuery(); -// // 标题 -// JSONObject title = primary.getCondition().getTitle(); -// if (Objects.nonNull(title)) { -// String titleInclude = title.getString("include"); -// String titleExclude = title.getString("exclude"); -// BoolQueryBuilder titleQb = getKeywordQb(titleInclude, titleExclude, EsConstants.FIELD_TITLE); -// if (titleQb.hasClauses()) { -// tcQb.must(titleQb); -// } -// } -// // 内容 -// JSONObject content = primary.getCondition().getContent(); -// if (Objects.nonNull(content)) { -// String contentInclude = content.getString("include"); -// String contentExclude = content.getString("exclude"); -// BoolQueryBuilder contentQb = getKeywordQb(contentInclude, contentExclude, EsConstants.FIELD_CONTENT); -// if (contentQb.hasClauses()) { -// tcQb.must(contentQb); -// } -// } -// if (tcQb.hasClauses()) { -// qb.must(tcQb); -// } -// -// return qb; -// } -// -// public QueryBuilder getReplyQbNotMatchKw(final Message primary) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// qb.must(QueryBuilders.termQuery(EsConstants.FILED_DOC_ID, -// primary.getData().getString(EsConstants.FILED_DOC_ID))); -// -// // 有效性 -// Integer[] availability = primary.getCondition().getAvailability(); -// if (ArrayUtils.isNotEmpty(availability)) { -// qb.must(QueryBuilders.termsQuery(EsConstants.FIELD_AVAILABILITY, primary.getCondition().getAvailability())); -// } -// -// // content长度 -// if (primary.getCondition().getEnableContentLimit()) { -// qb.must(QueryBuilders.rangeQuery(EsConstants.FIELD_CONTENT_LENGTH) -// .gt(0).lte(config.getContentLimit())); -// } -// -// RangeQueryBuilder rangeQueryBuilder = QueryBuilders -// .rangeQuery(EsConstants.FIELD_PUB_TIME) -// .gte(primary.getCondition().getDateStart()) -// .lt(primary.getCondition().getDateEnd()); -// qb.must(rangeQueryBuilder); -// -// BoolQueryBuilder tcQb = QueryBuilders.boolQuery(); -// // 标题 -// JSONObject title = primary.getCondition().getTitle(); -// if (Objects.nonNull(title)) { -// String titleInclude = title.getString("include"); -// String titleExclude = title.getString("exclude"); -// BoolQueryBuilder titleQb = getKeywordQb(titleInclude, titleExclude, EsConstants.FIELD_TITLE); -// if (titleQb.hasClauses()) { -// tcQb.must(titleQb); -// } -// } -// // 内容 -// JSONObject content = primary.getCondition().getContent(); -// if (Objects.nonNull(content)) { -// String contentInclude = content.getString("include"); -// String contentExclude = content.getString("exclude"); -// BoolQueryBuilder contentQb = getKeywordQb(contentInclude, contentExclude, EsConstants.FIELD_CONTENT); -// if (contentQb.hasClauses()) { -// tcQb.must(contentQb); -// } -// } -// if (tcQb.hasClauses()) { -// qb.mustNot(tcQb); -// } -// -// return qb; -// } -// -// public void flushConsumer() { -// // 刷新缓存 -// if (cRateLimiter.tryAcquire()) { -// syncTask(); // 刷新任务状态,尤其是评论进度百分比,及任务是否完成 -// // flushProgress(); // 刷新进度缓存到db -// flushStat(); -// failureService.flushAll(); -// } -// } -// -// /** -// * 刷新正在执行的任务到缓存, 并且跟进 progress 的值是否 >0.9 如果 该值大于0.9 表示任务完成,修改 status和 message -// */ -// public void syncTask() { -// C_UNNORMAL_TASK_CACHE.clear(); -// List errorTaskList = labelBacktraceTaskRepository.findAllByStatusError(); -// for (LabelBacktraceTask task : errorTaskList) { -// C_UNNORMAL_TASK_CACHE.put(task.getId(), task); -// } -// LOGGER.debug("Sync unnormal tasks, taskIds:{}.", JSON.toJSONString(C_UNNORMAL_TASK_CACHE.keySet())); -// List taskList = labelBacktraceTaskRepository.findAllByStatus(2); -// for (LabelBacktraceTask task : taskList) { -// System.out.println("taskId = " + task.getId() + " | getTotalSegment = "+C_TASK_SEGMENT_CACHE.get(task.getId()) + -// " | getProcessed = " + task.getProcessed() + " | getProgress = " + task.getProgress()); -// // 表示已经处理完成 -// if (task.getProgress() > 0.95 && (System.currentTimeMillis() - task.getUpdatedTime()) > 240000) { -// labelBacktraceTaskRepository.updateStatus(3, "完成", System.currentTimeMillis(), task.getId()); -// labelBacktraceTaskRepository.setProgress(1.0, System.currentTimeMillis(), task.getId()); -// LOGGER.info("Task {} finished.", task.getId()); -// labelParseResultService.remove(task.getId()); -// -// try { -// Thread.sleep(3000); -// // 将redis 中的 对应的任务删除 -// List tasksList = labelBacktraceTaskRepository.findAllByStatus(3); -// for (LabelBacktraceTask task_3 : tasksList) { -// System.out.println("DELETE REDIS : " + task_3.getId()); -// // if((System.currentTimeMillis() - task1.getUpdatedTime()) < 1500000 && (System.currentTimeMillis() - task1.getUpdatedTime()) > 1000000) { -// boolean status = AddToRedisService.deleteRedis(task_3.getId()); -// if(status) { -// System.out.println("Delete Redis Success ,TaskId = " + task_3.getId()); -// } -// //} -// } -// } catch (InterruptedException e) { -// e.printStackTrace(); -// } -// } -// } -// } -// -// /** -// * 调用接口将回帖组装写入 kafka -// * @param task -// */ -// public boolean startGetReply(LabelBacktraceTask task) { -// boolean status = true; -// try{ -// JSONObject params = getParams(task); -// LOGGER.info("TaskId : " + task.getId() + " | startGetReply : "+JsonUtils.toJSONString(params)); -// System.out.println("TaskId : " + task.getId() + " #### params #### : "+JsonUtils.toJSONString(params)); -// String response = RestUtils.post_c(config.getCommentRest(), params); -// System.out.println("TaskId : " + task.getId() + " #### Response #### : "+response); -// LOGGER.info("TaskId : " + task.getId() + " | The reply Response : "+response); -// if(!response.contains("add reputation task success")){ -// return false; -// } -// }catch (Exception e){ -// e.printStackTrace(); -// return false; -// } -// return status; -// } -// -// /** -// * 刷新进度缓存到db -// */ -// public void flushProgress() { -// try { -// C_TASK_PROGRESS_CACHE_LOCK.lock(); -// for (Long taskId : C_TASK_PROGRESS_CACHE.keySet()) { -// double progress = C_TASK_PROGRESS_CACHE.get(taskId); -// labelBacktraceTaskRepository.increaseProgress(progress, System.currentTimeMillis(), taskId); -// LOGGER.debug("Flush progress, taskId:{}, progress:{}.", taskId, progress); -// } -// C_TASK_PROGRESS_CACHE.clear(); -// } finally { -// C_TASK_PROGRESS_CACHE_LOCK.unlock(); -// } -// } -// -// -// public void flushProgressReply(Message message,LabelBacktraceTask task){ -// try{ -// Long taskId = message.getTaskId(); -// Double progress = task.getProgress(); // 当前数据库中的 百分比 -// Double real_progress = message.getPercent(); // 只有回帖才有这个字段哦!! -// LOGGER.info("**** TaskId : " + taskId + " 回帖的百分比为 { 数据库中:" + progress + " || Message中: " + real_progress +" }"); -// // message 中报送的 百分比是 1(说明回帖推送完了) ,并且数据库中的百分比 小于 0.91 就强制改成 0.96 否则的话任务会 一直卡再【执行中】,不能完成。 -// if (progress == 1) { // 数据库中的百分比 -// System.out.println("已经是 1 了,就不用管了!!"); -// } else if (real_progress == 1.0 && progress < 0.99) { // message 过来的 是 1.0 ,但是 数据库中小于 1 -// labelBacktraceTaskRepository.setProgress(0.985, System.currentTimeMillis(), taskId); -// // 当 数据库中的百分比小于 1 的时候修改,否则就不用修改了! message 过来的 小于1 但是 大于 0.1 的话,此时的百分比就是 0.5+(real_progress/2) -// } else if ((progress-0.5) < (real_progress/2) && progress < 1.0 && 1.0 > real_progress && real_progress > 0.03) { -// // 当 数据库中的百分比,小于message 中的 并且都小于 1 的情况下才进这个方法 -// labelBacktraceTaskRepository.setProgress(0.5 + (real_progress / 2), System.currentTimeMillis(), taskId); -// } -// }catch (Exception e){ -// e.printStackTrace(); -// } -// } -// -// /** -// * 更新百分比 -// */ -// public void flushProgressPrimary(Message message,LabelBacktraceTask task){ -// try{ -// Long taskId = message.getTaskId(); -// Long processed = task.getProcessed();// 当前数据库中以处理数据的总条数 -// Double progress = task.getProgress(); // 当前数据库中的 百分比 -// Double new_progress = processed * 1.0/message.getTotalSegment(); -// try { -// Integer status = task.getStatus(); -// if(status == 2) { -// System.out.println("flushProgressPrimary : TaskId = " + taskId + " | 数据库中 processed = " + processed + " | Message 中 getTotalSegment = " + message.getTotalSegment()); -// List> channels = JsonUtils.parseArray(task.getChannel()); -// System.out.println("flushProgressPrimary : Channels = " + JsonUtils.toJSONString(channels)); -// // 如果channel 只有一个,并且是 item 的,百分比就直接更新 -// if (message.getTotalSegment() == processed) { -// labelBacktraceTaskRepository.setProgress(0.99, System.currentTimeMillis(), task.getId()); -// } -// if (progress == 1) { -// return; -// } else if (channels.size() == 1 && channels.get(0).get("channel").equals(EsConstants.DOC_TYPE_PRODUCT) && progress < 1.0) { -// System.out.println("new_progress = " + new_progress + " | progress = " + progress); -// if (new_progress > progress) { // 只有新的百分比大于数据库中的百分比才更新,否则不更新 -// labelBacktraceTaskRepository.setProgress(new_progress, System.currentTimeMillis(), task.getId()); -// } -// } else if (progress < 0.51) { // 选择多个平台,不管是否是电商的,都按 0.5 算 -// labelBacktraceTaskRepository.setProgress(new_progress / 2, System.currentTimeMillis(), taskId); -// } -// } -// } catch (Exception e) { -// e.printStackTrace(); -// } -// }catch (Exception e){ -// e.printStackTrace(); -// } -// } -// -// public void flushStat() { -// try { -// C_TASK_STAT_CACHE_LOCK.lock(); -// for (Long taskId : C_TASK_PROCESSED_CACHE.keySet()) { -// long processed = C_TASK_PROCESSED_CACHE.get(taskId); // 对应数据库中的 processed 字段值 要打标的数据总量 -// long satisfied = C_TASK_SATISFIED_CACHE.get(taskId); // 对应数据库中的 satisfied 字段值 打标成功的总数居量 -// labelBacktraceTaskRepository.increaseStat(processed, satisfied, System.currentTimeMillis(), taskId); -// LOGGER.debug("Flush stat, taskId:{}, processed:{}, satisfied:{}.", -// taskId, processed, satisfied); -// } -// C_TASK_PROCESSED_CACHE.clear(); -// C_TASK_SATISFIED_CACHE.clear(); -// } finally { -// C_TASK_STAT_CACHE_LOCK.unlock(); -// } -// } -// -// /** -// * 上报进度 -// */ -// public void flushData() { -// if (dataRateLimiter.tryAcquire()) { -// List dataList = Lists.newArrayList(); -// EsUtils.BulkItem item = DATA_CACHE.poll(); -// while (Objects.nonNull(item)) { -// if (dataList.size() >= config.esTargetBulkSize()) { -// EsUtils.bulkIndex(config.esTargetClusterName(), dataList, EsConstants.FIELD_ID); -// LOGGER.debug("Flush data, size:{}.", dataList.size()); -// dataList.clear(); -// } -// dataList.add(item); -// item = DATA_CACHE.poll(); -// } -// if (dataList.size() > 0) { -// EsUtils.bulkIndex(config.esTargetClusterName(), dataList, EsConstants.FIELD_ID); -// LOGGER.debug("Flush data, size:{}.", dataList.size()); -// } -// List newdataList = Lists.newArrayList(); -// EsUtils.BulkItem newitem = NEW_DATA_CACHE.poll(); -// while (Objects.nonNull(newitem)){ -// if (newdataList.size() >= config.esTargetBulkSize()) { -// EsUtils.bulkIndex(config.esTargetClusterName(), newdataList, EsConstants.FIELD_ID); -// LOGGER.debug("Flush data, size:{}.", newdataList.size()); -// newdataList.clear(); -// } -// newdataList.add(newitem); -// newitem = NEW_DATA_CACHE.poll(); -// } -// if (newdataList.size() > 0) { -// EsUtils.bulkIndex(config.esTargetClusterName(), newdataList, EsConstants.FIELD_ID); -// LOGGER.debug("Flush data, size:{}.", newdataList.size()); -// } -// } -// } -// -// /** -// * 恢复僵尸任务 -// * 如果任务状态处于执行中(status=2), -// * 判断该任务更新时间(updated_time)到当前时间,间隔大于60分钟, -// * 则认为该任务是僵尸任务,重置该任务状态为准备中(status=1) -// */ -// public void resetZombie() { -// long dt = System.currentTimeMillis() - 60 * 3600 * 1000; -// labelBacktraceTaskRepository.resetStatus(dt); -// } -// -// /** -// * 组装要调用 回帖的参数 -// * @param task -// * @return -// */ -// private JSONObject getParams(LabelBacktraceTask task) { -// JSONObject params = new JSONObject(); -// try{ -// System.out.println(JsonUtils.toJSONString(task)); -// // String rest = "http://rule.sq.percent.cn/data_match/content/"; -// //ConditionDef condition = ConditionDef.build(task,rest); -// ConditionDef condition = ConditionDef.build(task, config.getRuleRest()); -// Map docTypeMap = new HashMap<>(); -// try { -// List> channel = JsonUtils.parseArray(task.getChannel()); -// for (Map chann: channel) { -// docTypeMap.put(chann.get("channel").toString(),chann.get("site")); -// } -// } catch (Exception e) { -// e.printStackTrace(); -// } -// params.put("taskID",task.getId()); -// params.put("indexName",task.getIndexName()); -// params.put("outTopic",task.getReplyTopic()); -// params.put("ESIndex",task.getTaskCode()+"_comment"); -// params.put("isQueryAll",false); -// params.put("condition",condition); -// Map queryParams = new HashMap<>(); -// queryParams.put("docType",docTypeMap); -// queryParams.put("date_start",task.getDateStart()); -// queryParams.put("date_end",task.getDateEnd()); -// params.put("queryParams",queryParams); -// }catch (Exception e){ -// e.printStackTrace(); -// } -// return params; -// } -} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryMiniService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryMiniService.java deleted file mode 100644 index cf0822e..0000000 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryMiniService.java +++ /dev/null @@ -1,207 +0,0 @@ -package com.bfd.mf.job.service; - -import com.bfd.mf.job.config.ESConstants; -import com.bfd.mf.job.domain.entity.Task; -import com.bfd.mf.job.util.EsUtils; -import org.apache.lucene.index.Term; -import org.elasticsearch.index.query.*; -import org.elasticsearch.search.aggregations.AggregationBuilder; -import org.elasticsearch.search.aggregations.bucket.terms.Terms; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.stereotype.Service; - -import java.sql.Timestamp; -import java.util.HashMap; -import java.util.Map; -import java.util.TimeZone; - -@Service -public class EsQueryMiniService { - private static Logger logger = LoggerFactory.getLogger(EsQueryMiniService.class); - private static String clSubject = "cl_major_"; - private static String subjectPre = "major"; - - - /** - * 统计 每个专题下,每个渠道 的总量 - */ - public Map getSubjectChannelStatistics(String clusterName,String indexName) { - Map resultMap = new HashMap<>(); - try{ - if(indexName.contains(subjectPre)) { - boolean isExists = EsUtils.indexExists(clusterName, indexName); - if (isExists) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE); - String indexNames [] = {indexName}; - Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag"); - resultMap = EsUtils.parseTerms(result); - } - } - }catch (Exception e){ - e.printStackTrace(); - } - return resultMap; - } - - /** - * 统计 每个专题下,每个渠道 当天的增量 - */ - public Map getSubjectChannelTodayStatistics(String clusterName,String indexName) { - Map resultMap = new HashMap<>(); - try{ - if(indexName.contains(subjectPre)) { - boolean isExists = EsUtils.indexExists(clusterName, indexName); - if (isExists) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - long current=System.currentTimeMillis(); - long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); - Long startTime = new Timestamp(zero).getTime(); - RangeQueryBuilder rangeQueryBuilder = QueryBuilders - .rangeQuery(ESConstants.CRAWLTIME) - .gte(startTime) - .lt(current); - qb.must(rangeQueryBuilder); - AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE); - String indexNames [] = {indexName}; - Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag"); - resultMap = EsUtils.parseTerms(result); - } - } - }catch (Exception e){ - e.printStackTrace(); - } - return resultMap; - } - - /** - * 统计 每个专题下,crawlDataFlag 三种类型当天的总量 - */ - public Map getSubjectCrawlDataFlagStatistics(String clusterName, String indexName) { - Map resultMap = new HashMap<>(); - try{ - if(indexName.contains(subjectPre)) { - boolean isExists = EsUtils.indexExists(clusterName, indexName); - if (isExists) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG); - String indexNames [] = {indexName}; - Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag"); - Map termsMap = EsUtils.parseTerms(result); - resultMap = EsUtils.getResultMap(termsMap); - } - } - }catch (Exception e){ - e.printStackTrace(); - } - return resultMap; - } - - /** - * 统计 每个专题下,crawlDataFlag 三种类型 的增量 - */ - public Map getSubjectCrawlDataFlagTodayStatistics(String clusterName, String indexName) { - Map resultMap = new HashMap<>(); - try{ - if(indexName.contains(subjectPre)) { - boolean isExists = EsUtils.indexExists(clusterName, indexName); - if (isExists) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - long current=System.currentTimeMillis(); - long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); - Long startTime = new Timestamp(zero).getTime(); - RangeQueryBuilder rangeQueryBuilder = QueryBuilders - .rangeQuery(ESConstants.CRAWLTIME) - .gte(startTime) - .lt(current); - qb.must(rangeQueryBuilder); - AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG); - String indexNames [] = {indexName}; - Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag"); - Map termsMap = EsUtils.parseTerms(result); - resultMap = EsUtils.getResultMap(termsMap); - } - } - }catch (Exception e){ - e.printStackTrace(); - } - return resultMap; - } - - - public Map getTaskCount(String clusterName,Long taskId, Task task,String crawlDataFlag) { - Map countMap = new HashMap<>(); - String indexName = clSubject + task.getSubjectId();//subject_id - String cid = task.getCid().toLowerCase(); - Long crawlStartTime = task.getCrawlStartTime().longValue(); - Long crawlEndTime = task.getCrawlEndTime().longValue(); - // String crawlDataFlag =task.getCrawlDataFlag(); - if(indexName.contains(subjectPre)) { - boolean isExists = EsUtils.indexExists(clusterName, indexName); - if (isExists) { - BoolQueryBuilder qb = QueryBuilders.boolQuery(); - // 任务ID 筛选 - TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE,cid); - TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG,crawlDataFlag); - qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder); - // 时间范围筛选 - BoolQueryBuilder shouldbq = QueryBuilders.boolQuery(); - RangeQueryBuilder rangeQueryBuilder = QueryBuilders - .rangeQuery(ESConstants.PUBTIME) - .gte(crawlStartTime) - .lt(crawlEndTime); - // 用户数据 - BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); - TermQueryBuilder primartTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PRIMARY,2); -// TermQueryBuilder pubTimeTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PUBTIME,0); - boolQueryBuilder.must(primartTermQueryBuilder); - shouldbq.should(boolQueryBuilder).should(rangeQueryBuilder); - qb.must(shouldbq); - - logger.info("QB1 : indexName: {}. taskId : {}.{\"query\": {}}.",indexName,taskId,qb.toString().replace("\n","").replace("\r","").replace(" ","")); - - Long count = EsUtils.queryCount(clusterName, indexName, qb); - countMap.put("totalCount",count); - long current=System.currentTimeMillis(); - long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); - Long startTime = new Timestamp(zero).getTime(); - RangeQueryBuilder rangeQueryBuilder2 = QueryBuilders - .rangeQuery(ESConstants.CRAWLTIME) - .gte(startTime).lt(current); - qb.must(rangeQueryBuilder2); - logger.info("QB2 : : indexName: {}. taskId : {}.{\"query\": {}}.",indexName,taskId,qb.toString().replace("\n","").replace("\r","").replace(" ","")); - Long todayCount = EsUtils.queryCount(clusterName,indexName,qb); - countMap.put("todayCount",todayCount); - } - } - return countMap; - } - -// public Long getTaskTodayCount(String clusterName,Integer id, Map task) { -// Long count = 0L; -// String indexName = clSubject + (String) task.get("subject_id"); -// String cid = (String) task.get(ESConstants.CID); -// Long crawlStartTime = (Long) task.get("crawl_start_time"); -// Long crawlEndTime = (Long) task.get("crawl_end_time"); -// String crawlDataFlag = (String) task.get("crawl_data_flag"); -// -// if(indexName.contains(subjectPre)) { -// boolean isExists = EsUtils.indexExists(clusterName, indexName); -// if (isExists) { -// BoolQueryBuilder qb = QueryBuilders.boolQuery(); -// long current=System.currentTimeMillis(); -// long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); -// Long startTime = new Timestamp(zero).getTime(); -// RangeQueryBuilder rangeQueryBuilder = QueryBuilders -// .rangeQuery(ESConstants.CRAWLTIME) -// .gte(startTime) -// .lt(current); -// qb.must(rangeQueryBuilder); -//// Terms result = EsUtils.queryTag(clusterName, indexName, qb, ab, ESConstant.DOC_TYPE + "Tag"); -//// resultMap = parseTerms(result); -// } -// } -// return count; -// } -} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/WriterTXTService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/WriterTXTService.java index 9b51d3c..976c81d 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/WriterTXTService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/WriterTXTService.java @@ -1,20 +1,13 @@ package com.bfd.mf.job.service; -import com.alibaba.fastjson.JSONObject; -import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.domain.entity.ResultDetail; import com.bfd.mf.job.domain.repository.ResultDetailRepository; -import com.bfd.mf.job.util.EsUtils; -import com.google.common.collect.Maps; import com.google.common.util.concurrent.RateLimiter; -import org.assertj.core.util.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; - import java.util.List; -import java.util.Map; @Service public class WriterTXTService { diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java new file mode 100644 index 0000000..aa847cd --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/alarm/AlarmService.java @@ -0,0 +1,241 @@ +package com.bfd.mf.job.service.alarm; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.job.config.AppConfig; +import com.bfd.mf.job.config.ESConstants; +import com.bfd.mf.job.domain.entity.EmailGroup; +import com.bfd.mf.job.domain.entity.TaskCount; +import com.bfd.mf.job.domain.repository.EmailGroupRepository; +import com.bfd.mf.job.domain.repository.ServiceLoadRepository; +import com.bfd.mf.job.domain.repository.TaskCountRepository; +import com.bfd.mf.job.domain.repository.TaskRepository; +import com.bfd.mf.job.util.DateUtil; +import com.bfd.mf.job.util.EMailUtils; +import com.bfd.mf.job.util.EsUtils; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.jpa.repository.Query; +import org.springframework.stereotype.Service; + +import javax.annotation.PostConstruct; +import java.math.BigInteger; +import java.util.*; + +import static org.elasticsearch.index.query.QueryBuilders.rangeQuery; + +@Service +public class AlarmService { + private static final Logger LOGGER = LoggerFactory.getLogger(AlarmService.class); + @Autowired + private AppConfig config; + @Autowired + private TaskRepository taskRepository; + @Autowired + private TaskCountRepository taskCountRepository; + @Autowired + private ServiceLoadRepository serviceLoadRepository; + @Autowired + private EmailGroupRepository emailGroupRepository; + + @PostConstruct + public void init() { + // 注册数据查询来源 +// EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source +// EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target + EsUtils.registerCluster(config.esLogstashClusterName(),config.esLogstashAddress()); + } + + /* + 尝试在指定时间内获得许可,如果获得了,则直接返回,如果没有获得,则执行下面的流程 + */ +// public void tryAcquire() { +// long start = System.currentTimeMillis(); +// LOGGER.info("------------------------------------------------------------------ AlarmService ------------------------------------------------------"); +// long end = System.currentTimeMillis(); +// LOGGER.info("TaskCountService finish, took:{} ms.",(end - start)); +// +// +// } + + + public void produce() { + long start = System.currentTimeMillis(); + LOGGER.info("------------------------------------------------------------------ AlarmService ------------------------------------------------------"); + // 索引 + String date = DateUtil.parseDateByday2(new Date().getTime()); + String index = ESConstants.LOGSTASH + date; + + // System.out.println(index); //logstash-2021.05.20 logstash-2021.05.21 + String startTime = DateUtil.getDateTime(System.currentTimeMillis()); + String endTime = DateUtil.getDateTime(System.currentTimeMillis() - 60 * 30 * 1000); + String type = "datasave"; + QueryBuilder queryBuilder = getQueryBuilder(startTime,endTime,type); + + String clusterName = config.esLogstashClusterName(); + String sourceIndices [] = {index}; + Map errorCid = new HashMap<>(); + // 查询语句 + EsUtils.scrollQuery(clusterName, sourceIndices,"doc", + queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES, + dataList -> { + try { + if (dataList.size() == 0) { + System.out.println("没查到相关的 评论 数据"); + return; + } + for (JSONObject data : dataList) { + Map dataMap = data; + String cid = (String) dataMap.get("cid"); + if(null == cid){ + // System.out.println(data); + String message = (String) dataMap.get("message"); + if(message.contains("cid")) { + cid = message.split("cid=")[1]; + // System.out.println("--- " + cid); + if(cid.contains("}")){ + cid = cid.split("}")[0]; + } + } + } + if(null != cid && cid.contains("}")){ + cid = cid.split("}")[0]; + } + // System.out.println(cid); + if(null == cid){ + System.out.println(data); + } + if(errorCid.containsKey(cid)){ + Integer errorNum = errorCid.get(cid); + errorCid.put(cid,errorNum+1); + }else{ + errorCid.put(cid,1); + } + + } + } catch (Exception e) { + e.printStackTrace(); + } + }); + + System.out.println(JSONObject.toJSONString(errorCid)); + + // 遍历统计的map ,将 value> 10 的报警 + for(Map.Entry entry : errorCid.entrySet()){ + String cid = entry.getKey(); + Integer errorNum = entry.getValue(); + if(errorNum > 50){ + System.out.println( "这个站点30分钟内的解析失败次数超过15次 " +cid + " : " + errorNum); + saveToAlarm(cid,errorNum); + } + } + + + + /** + * 1、时间范围是半小时内 + * 2、 + */ + + + // 根据查询半小时内下载负载率求平均后修改 负载率表的值 + List serviceStatusList = new ArrayList<>(); + serviceStatusList.add(32.6F); + serviceStatusList.add(51F); + serviceStatusList.add(0.0F); + serviceStatusList.add(0.0F); + serviceStatusList.add(18.3F); + serviceStatusList.add(23.3F); + serviceStatusList.add(64F); + serviceStatusList.add(73F); + serviceStatusList.add(44.6F); + serviceStatusList.add(38F); + + for(int i = 0; i < 10 ; i ++) { + serviceLoadRepository.updateTaskCount(i+1,serviceStatusList.get(i)); + } + long end = System.currentTimeMillis(); + LOGGER.info("ServiceLoadService finish, took:{} ms.",(end - start)); + + } + + + + private void saveToAlarm(String cid, Integer errorNum) { + /** + `alarm_config` varchar(255) NOT NULL COMMENT '报警任务配置', + */ + Integer alarm_tag = 3; + Integer alarm_reason = 1; + String alarm_message = "[chenrui.li]这个站点解析失败次数为:"+errorNum; + String alarm_task_url = ""; // 无法确认是哪个任务 + String alarm_task_content = ""; // 无法确认是哪个任务 + String alarm_cid = cid; + String alarm_config = ""; + String alarm_trigger_time = DateUtil.getDateTime(new Date().getTime()); + Date alarm_assign_time = null; + Date alarm_finish_time = null; + String alarm_handler = "jing.du@percent.cn"; + List emailList = new ArrayList<>(); + emailList.add(alarm_handler); + // 根据站点查询站点的处理人 + String email_addr = getEmailByCid(cid); + String opinion = ""; + Integer status = 2; + String create_time = DateUtil.getDateTime(new Date().getTime()); + Date update_time = null; + int del = 0; + + String sql = "INSERT INTO cl_alarm (alarm_tag,alarm_reason,alarm_message,alarm_task_url,alarm_task_content,alarm_cid," + + "alarm_config,alarm_trigger_time,alarm_handler,status,create_time,del) " + + "values ("+alarm_tag+","+alarm_reason+",'"+alarm_message+"','"+alarm_task_url+"','"+alarm_task_content+"'," + + "'"+alarm_cid+"','"+alarm_config+"','"+alarm_trigger_time+"','"+alarm_handler+"',"+status+",'"+create_time+"',"+del+") "; + System.out.println(sql); + + Map siteMessage = new HashMap(); + siteMessage.put("cid", cid); +// siteMessage.put("categoryName", pagetype); +// siteMessage.put("sample", crawldataflag); + + EMailUtils.getInstance().sendEmail(6, siteMessage, emailList,"30"); + } + + private String getEmailByCid(String cid) { + List emails = emailGroupRepository.getEmailGroupsByCid(cid); + String alarmEmail = ""; + String emailGroup = emails.get(0).getEmail(); + if(emailGroup.contains(",")){ + alarmEmail = emailGroup.split(",")[0]; + }else{ + alarmEmail = emailGroup; + } + return alarmEmail; + } + + private QueryBuilder getQueryBuilder(String nowTime, String befor30min,String type) { + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + try { + // 筛选时间 + boolean boo = true; + QueryBuilder crawlTimeRange = buildRangeQueryBuilder( + "crawl_time.keyword",befor30min ,nowTime, boo, boo); + boolQueryBuilder.must(crawlTimeRange); + //parse + QueryBuilder termQueryBuilder = QueryBuilders.termsQuery("type",type); + }catch (Exception e){ + e.printStackTrace(); + } + return boolQueryBuilder; + } + + private QueryBuilder buildRangeQueryBuilder(String field, Object startVal, Object endVal, Boolean isIncludeLower, Boolean isIncludeUpper) { + return rangeQuery(field) + .from(startVal) + .to(endVal) + .includeLower(isIncludeLower) + .includeUpper(isIncludeUpper); + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/backtrace/BacktraceService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/backtrace/BacktraceService.java new file mode 100644 index 0000000..e295c81 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/backtrace/BacktraceService.java @@ -0,0 +1,230 @@ +package com.bfd.mf.job.service.backtrace; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.crawler.elasti.ElastiProducer; +import com.bfd.crawler.utils.JsonUtils; +import com.bfd.mf.job.config.AppConfig; +import com.bfd.mf.job.config.ESConstants; +import com.bfd.mf.job.domain.entity.Subject; +import com.bfd.mf.job.domain.repository.SubjectRepository; +import com.bfd.mf.job.util.EsUtils; +import com.bfd.mf.job.util.Kafka010Utils; +import com.google.common.collect.Maps; +import com.google.common.util.concurrent.RateLimiter; +import org.assertj.core.util.Lists; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import javax.annotation.PostConstruct; +import java.util.*; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +@Service +public class BacktraceService { + private static final Logger LOGGER = LoggerFactory.getLogger(BacktraceService.class); + private static final long PERIOD_MILLS = 1 * 3600 * 1000L; + private static BlockingQueue>> P_TASK_CACHE_RANGE = new LinkedBlockingQueue<>(); + // private static Map C_UNNORMAL_TASK_CACHE = new ConcurrentHashMap<>(); + private static Map C_TASK_PROGRESS_CACHE = Maps.newHashMap(); + private static Lock C_TASK_PROGRESS_CACHE_LOCK = new ReentrantLock(); + private static Map C_TASK_PROCESSED_CACHE = Maps.newHashMap(); + private static Map C_TASK_SATISFIED_CACHE = Maps.newHashMap(); + private static Map C_TASK_SEGMENT_CACHE = Maps.newHashMap(); + private static Lock C_TASK_STAT_CACHE_LOCK = new ReentrantLock(); + private static BlockingQueue DATA_CACHE = new LinkedBlockingQueue<>(10240); + private static BlockingQueue NEW_DATA_CACHE = new LinkedBlockingQueue<>(10240); + private RateLimiter dataRateLimiter; + private RateLimiter pRateLimiter; + private RateLimiter cRateLimiter; + private static int subjectEsNum = 1; + private static String indexType = "docs"; + private static int bussinessType = 1; + + @Autowired + private AppConfig config; + @Autowired + private SubjectRepository subjectRepository; + + @PostConstruct + public void init() { + // 注册数据查询来源 + EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source + EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target + pRateLimiter = RateLimiter.create(1.0D / config.getPeriodS()); + cRateLimiter = RateLimiter.create(1.0D / config.getPeriodS()); + dataRateLimiter = RateLimiter.create(config.esMiniBulkRate()); + // kafkaProducer = Kafka010Utils.getProducer(config.getBrokerList()); + } + + /* + 尝试在指定时间内获得许可,如果获得了,则直接返回,如果没有获得,则执行下面的流程 + */ + public void tryAcquire() { + if (!pRateLimiter.tryAcquire()) {//是在指定的时间内尝试地获得1个许可,如果获取不到则返回false + return; + } + // 查询 cl_subject 表中 status=0 del =0 cache_recalculate_status = 1 + List> subjectTaskList = subjectRepository.querySubjectTaskByCacheRecalculateStatus(); + for (Map subject: subjectTaskList) { + System.out.println("需要拉数据的任务:"+JSONObject.toJSONString(subject)); + Map> cache = Maps.newHashMap(); + Long subjectId = Long.valueOf( subject.get(ESConstants.ID).toString()); + subjectRepository.updateCacheRecalculateStatus(subjectId,2);// + cache.put(subjectId, Lists.newArrayList(0L, 0L, 1.0, 1L, 1L)); + try { + P_TASK_CACHE_RANGE.put(cache); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +// // 查询 cl_label_backtrace_task 表中 状态为 1 and retry_times <= max_retry_times 的任务出来 +// List backtraceTaskList = SubjectRepository.findAllByStatus(1); +// for (LabelBacktraceTask task : taskList) { +// if (!isAvailable(task)) { +// continue; +// } +// LOGGER.info("Executing task:{}.", JSON.toJSONString(task)); +// failureService.createTable(task.getFailureTableName()); +// Long totalSegment = 1L;//(task.getDateEnd() - task.getDateStart()) / PERIOD_MILLS; // 3600000 +// Long segment = 1L; +// Double progressFactor = 1.0 / totalSegment; + +// } + } + + + public void produce(){ + Map> range = P_TASK_CACHE_RANGE.poll();// poll -->若队列为空,返回null + if (Objects.isNull(range)) { + return; + } + Long subjectId = 0L; + + for (Map.Entry> entry : range.entrySet()) { + subjectId = entry.getKey(); + } + Subject subject = subjectRepository.findById(subjectId).get(); + + String clusterName = config.esNormalClusterName(); + String subjectIndexName = config.getIndexNamePre() + subjectId; + + long fromMills = subject.getCacheStart().longValue(); + long toMills = subject.getCacheEnd().longValue(); + Long year = config.getQueryDataYearStarttime(); + String[] sourceIndices = EsUtils.getIndices(AppConfig.CL_INDEX, "_", + fromMills, toMills, AppConfig.DATE_FORMAT, config.esNormalUpper(), + config.esNormalStandby(),year); + + String esQuery = subject.getEsQuery(); + BoolQueryBuilder qb = EsUtils.getBoolQueryBuilderFromSqlStr(esQuery); + + LOGGER.info("索引范围:"+sourceIndices[0]+" ~ "+ sourceIndices[sourceIndices.length-1] +" ; QB : \n{}.",qb); + + Long finalSubjectId = subjectId; + // sourceIndices.size/100*i 就是当前的百分比 + Double percent = 0.0; + for(int i = 0 ; i < sourceIndices.length ; i ++) { + String index [] = {sourceIndices[i]}; + percent = (i*1.0)/sourceIndices.length ; + EsUtils.scrollQuery(clusterName, index, ESConstants.INDEX_TYPE, + qb, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES, + dataList -> { + try { + for (JSONObject data : dataList) { + data.put(ESConstants.SUBJECT_ID, finalSubjectId); + String url = data.getString(ESConstants.URL); + // System.out.println(url); + String product_id = getProductId(data,url); + data.put(ESConstants.CRAWLDATAFLAGTYPE,2); + data.put(ESConstants.CRAWLDATAFLAG,"url:"+product_id); + // System.out.println(subjectIndexName + " data --- "+data.get(ESConstants.DOC_ID)); + writerToMajorES(subjectIndexName, data); + } + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + percent = Double.valueOf(String.format("%.3f", percent)); + // 数据拉完了,需要修改一下 cl_subject 表中的 任务的 cache_recalculate_status 状态位, + // 同时,也需要修改一下 cl_task 中每个任务的状态位 + if(percent >= 0.996){ + int status = 3; + subjectRepository.updateCacheRecalculateStatus(subjectId,status); + subjectRepository.updateTaskStatus(subjectId,status); + } + } + LOGGER.info("******** ******** subjectIndexName : " + subjectIndexName + "这个专题下的数据拉完了,可以改状态了!"); + } + + private String getProductId(JSONObject data, String url) { + String product_id = data.getString(ESConstants.PRODUCT_ID); + try { + if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.TMALL)) { + if (url.contains("&id=")) { + product_id = url.split("&id=")[1].split("&")[0].replace("/", "*"); + } else { //https://detail.tmall.com/item.htm?id=582242698961&rn=08db719e4a7ee5b6d4f5b58825d1f261&abbucket=20 + product_id = url.split("/?id=")[1].split("&")[0].replace("/", "*"); + } + } + if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.EJINGDONG)) { + product_id = url.split(".html")[0].split("item.jd.")[1].replace("/", "*"); + } + if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.TAOBAO)) { + if (url.contains("&id=")) { + product_id = url.split("&id=")[1].split("&")[0].replace("/", "*"); + } else { + product_id = url.split("/?id=")[1].split("&")[0].replace("/", "*"); + } + } + if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.WEIPINHUI)) { + product_id = url.split(".vip.com/")[1].split(".html")[0].replace("/", "*"); + } + if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.EJUMEI)) { + product_id = url.split("com/")[1].split(".html")[0].replace("/", "*"); + } + if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.SUNING)) { + product_id = url.split("suning.com/")[1].split(".html")[0].replace("/", "*"); + } + if (data.get(ESConstants.EN_SOURCE).equals(ESConstants.SEPHORA)) { + product_id = url.split(".cn")[1].split(".html")[0].replace("/", "*"); + } + }catch (Exception e){ + e.printStackTrace(); + } + return product_id; + + } + +// public static void main(String[] args) { +// BacktraceService b = new BacktraceService(); +// String url = "https://detail.tmall.com/item.htm?id=582242698961&rn=08db719e4a7ee5b6d4f5b58825d1f261&abbucket=20"; +// JSONObject data = new JSONObject(); +// data.put("enSource","tmall"); +// String id = b.getProductId(data,url); +// System.out.println(id); +// } + + private static void writerToMajorES(String indexName , Map responseMap) { + System.out.println("==========================写入到【专题】ES : ==========" + indexName + " - "+responseMap.get("docId") ); + ElastiProducer elastiProducer = ElastiProducer.getInstance(bussinessType, subjectEsNum, indexName, indexType); + elastiProducer.sendMessageToEs(JsonUtils.toJSONString(responseMap)); + } + + +// public static void main(String[] args) { +// String url = "https://detail.tmall.com/item.htm?id=598372446974&skuId=4336725650385&user_id=2549841410&cat_id=50031573&is_b=1&rn=66410a97e53d6338e3bff62cfd307a80"; +// String product_id = ""; +// if(url.contains("&id=")) { +// product_id = url.split("&id=")[1].split("&")[0].replace("/", "*"); +// }else{ +// product_id = url.split("/?id=")[1].split("&")[0].replace("/", "*"); +// } +// System.out.println(product_id); +// } + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java new file mode 100644 index 0000000..dd178ed --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryMiniService.java @@ -0,0 +1,203 @@ +package com.bfd.mf.job.service.es; + +import com.bfd.mf.job.config.ESConstants; +import com.bfd.mf.job.domain.entity.Task; +import com.bfd.mf.job.util.EsUtils; +import org.elasticsearch.index.query.*; +import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.stereotype.Service; + +import java.sql.Timestamp; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +@Service +public class EsQueryMiniService { + private static Logger logger = LoggerFactory.getLogger(EsQueryMiniService.class); + + /** + * 统计 每个专题下,每个渠道 的总量 + */ + public Map getSubjectChannelStatistics(String clusterName,String indexName) { + + Map resultMap = new HashMap<>(); + try{ + + boolean isExists = EsUtils.indexExists(clusterName, indexName); + if (isExists) { + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE); + String indexNames [] = {indexName}; + Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag"); + resultMap = EsUtils.parseTerms(result); + } + + }catch (Exception e){ + e.printStackTrace(); + } + return resultMap; + } + + /** + * 统计 每个专题下,每个渠道 当天的增量 + */ + public Map getSubjectChannelTodayStatistics(String clusterName,String indexName) { + Map resultMap = new HashMap<>(); + try{ + + boolean isExists = EsUtils.indexExists(clusterName, indexName); + if (isExists) { + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + long current=System.currentTimeMillis(); + long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); + Long startTime = new Timestamp(zero).getTime(); + RangeQueryBuilder rangeQueryBuilder = QueryBuilders + .rangeQuery(ESConstants.CRAWLTIME) + .gte(startTime) + .lt(current); + qb.must(rangeQueryBuilder); + AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.DOC_TYPE); + String indexNames [] = {indexName}; + Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.DOC_TYPE + "Tag"); + resultMap = EsUtils.parseTerms(result); + + } + }catch (Exception e){ + e.printStackTrace(); + } + return resultMap; + } + + /** + * 统计 每个专题下,crawlDataFlag 三种类型当天的总量 + */ + public Map getSubjectCrawlDataFlagStatistics(String clusterName, String indexName) { + Map resultMap = new HashMap<>(); + try{ + boolean isExists = EsUtils.indexExists(clusterName, indexName); + if (isExists) { + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG); + String indexNames [] = {indexName}; + Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag"); + Map termsMap = EsUtils.parseTerms(result); + resultMap = EsUtils.getResultMap(termsMap); + } + }catch (Exception e){ + e.printStackTrace(); + } + return resultMap; + } + + /** + * 统计 每个专题下,crawlDataFlag 三种类型 的增量 + */ + public Map getSubjectCrawlDataFlagTodayStatistics(String clusterName, String indexName) { + Map resultMap = new HashMap<>(); + try{ + + boolean isExists = EsUtils.indexExists(clusterName, indexName); + if (isExists) { + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + long current=System.currentTimeMillis(); + long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); + Long startTime = new Timestamp(zero).getTime(); + RangeQueryBuilder rangeQueryBuilder = QueryBuilders + .rangeQuery(ESConstants.CRAWLTIME) + .gte(startTime) + .lt(current); + qb.must(rangeQueryBuilder); + AggregationBuilder ab = EsUtils.getSubjectChannelAB(ESConstants.CRAWLDATAFLAG); + String indexNames [] = {indexName}; + Terms result = EsUtils.queryTag(clusterName, indexNames, qb, ab, ESConstants.CRAWLDATAFLAG + "Tag"); + Map termsMap = EsUtils.parseTerms(result); + resultMap = EsUtils.getResultMap(termsMap); + + } + }catch (Exception e){ + e.printStackTrace(); + } + return resultMap; + } + + + public Map getTaskCount(String clusterName,Long taskId, Task task,String crawlDataFlag,String indexNamePre) { + Map countMap = new HashMap<>(); + String indexName = indexNamePre + task.getSubjectId();//subject_id + if(null != task.getCid()) { + String cid = task.getCid().toLowerCase(); + Long crawlStartTime = task.getCrawlStartTime().longValue(); + Long crawlEndTime = task.getCrawlEndTime().longValue(); + // String crawlDataFlag =task.getCrawlDataFlag(); + if (indexName.contains(indexNamePre)) { + boolean isExists = EsUtils.indexExists(clusterName, indexName); + if (isExists) { + BoolQueryBuilder qb = QueryBuilders.boolQuery(); + // 任务ID 筛选 + TermQueryBuilder cidTermQueryBuilder = QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid); + TermQueryBuilder taskIdTermQueryBuilder = QueryBuilders.termQuery(ESConstants.CRAWLDATAFLAG, crawlDataFlag); + qb.must(taskIdTermQueryBuilder).must(cidTermQueryBuilder); + // 时间范围筛选 + BoolQueryBuilder shouldbq = QueryBuilders.boolQuery(); + RangeQueryBuilder rangeQueryBuilder = QueryBuilders + .rangeQuery(ESConstants.PUBTIME) + .gte(crawlStartTime) + .lt(crawlEndTime); + // 用户数据 + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + TermQueryBuilder primartTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PRIMARY, 2); +// TermQueryBuilder pubTimeTermQueryBuilder = QueryBuilders.termQuery(ESConstants.PUBTIME,0); + boolQueryBuilder.must(primartTermQueryBuilder); + shouldbq.should(boolQueryBuilder).should(rangeQueryBuilder); + qb.must(shouldbq); + logger.info("QB1 : indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long count = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put("totalCount", count); + // 上面的语句是查询 该任务的 总数据量:totalCount,下面的语句是查询 该任务当天的数据量:todayCount + long current = System.currentTimeMillis(); + long zero = current / (1000 * 3600 * 24) * (1000 * 3600 * 24) - TimeZone.getDefault().getRawOffset(); + Long startTime = new Timestamp(zero).getTime(); + RangeQueryBuilder rangeQueryBuilder2 = QueryBuilders + .rangeQuery(ESConstants.CRAWLTIME) + .gte(startTime).lt(current); + qb.must(rangeQueryBuilder2); + logger.info("QB2 : indexName: {}. taskId : {}.{\"query\": {}}.", indexName, taskId, qb.toString().replace("\n", "").replace("\r", "").replace(" ", "")); + Long todayCount = EsUtils.queryCount(clusterName, indexName, qb); + countMap.put("todayCount", todayCount); + } + } + } + return countMap; + } + +// public Long getTaskTodayCount(String clusterName,Integer id, Map task) { +// Long count = 0L; +// String indexName = clSubject + (String) task.get("subject_id"); +// String cid = (String) task.get(ESConstants.CID); +// Long crawlStartTime = (Long) task.get("crawl_start_time"); +// Long crawlEndTime = (Long) task.get("crawl_end_time"); +// String crawlDataFlag = (String) task.get("crawl_data_flag"); +// +// if(indexName.contains(subjectPre)) { +// boolean isExists = EsUtils.indexExists(clusterName, indexName); +// if (isExists) { +// BoolQueryBuilder qb = QueryBuilders.boolQuery(); +// long current=System.currentTimeMillis(); +// long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); +// Long startTime = new Timestamp(zero).getTime(); +// RangeQueryBuilder rangeQueryBuilder = QueryBuilders +// .rangeQuery(ESConstants.CRAWLTIME) +// .gte(startTime) +// .lt(current); +// qb.must(rangeQueryBuilder); +//// Terms result = EsUtils.queryTag(clusterName, indexName, qb, ab, ESConstant.DOC_TYPE + "Tag"); +//// resultMap = parseTerms(result); +// } +// } +// return count; +// } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryNormalService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryNormalService.java similarity index 99% rename from cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryNormalService.java rename to cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryNormalService.java index af69831..6a44741 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/EsQueryNormalService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/es/EsQueryNormalService.java @@ -1,4 +1,4 @@ -package com.bfd.mf.job.service; +package com.bfd.mf.job.service.es; import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.util.EsUtils; diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/QueryService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java similarity index 78% rename from cl_query_data_job/src/main/java/com/bfd/mf/job/service/QueryService.java rename to cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java index 8a3b5ec..d469c6b 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/QueryService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/QueryService.java @@ -1,7 +1,9 @@ -package com.bfd.mf.job.service; +package com.bfd.mf.job.service.query; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.serializer.SerializerFeature; +import com.bfd.crawler.utils.JsonUtils; import com.bfd.mf.job.config.AppConfig; import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.domain.entity.Subject; @@ -9,8 +11,10 @@ import com.bfd.mf.job.domain.entity.Task; import com.bfd.mf.job.domain.repository.SubjectRepository; import com.bfd.mf.job.domain.repository.TaskRepository; import com.bfd.mf.job.download.DownLoadFile; +import com.bfd.mf.job.util.DataCheckUtil; import com.bfd.mf.job.util.EsUtils; import com.bfd.mf.job.util.Kafka010Utils; +import com.bfd.mf.job.util.ReadLine; import com.google.common.collect.Maps; import com.google.common.util.concurrent.RateLimiter; import org.apache.commons.lang3.exception.ExceptionUtils; @@ -21,17 +25,14 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; - import javax.annotation.PostConstruct; +import java.io.File; import java.io.IOException; import java.math.BigInteger; import java.sql.Timestamp; import java.util.*; import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; import static org.elasticsearch.index.query.QueryBuilders.rangeQuery; @@ -64,7 +65,7 @@ public class QueryService { EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target pRateLimiter = RateLimiter.create(1.0D / config.getPeriodS()); kafkaProducer = Kafka010Utils.getProducer(config.getBrokerList()); - // + // cRateLimiter = RateLimiter.create(1.0D / config.getPeriodS()); // dataRateLimiter = RateLimiter.create(config.esMiniBulkRate()); } @@ -76,7 +77,7 @@ public class QueryService { if (!pRateLimiter.tryAcquire()) {//是在指定的时间内尝试地获得1个许可,如果获取不到则返回false return; } - LOGGER.info("================================================================ QueryService ================================================================="); + LOGGER.info("================================================================ QueryService (离线拉数据)================================================================="); // 把要统计的结果事儿也在这里面统计了!!!! // List allSubject = subjectCountRepository.findAllSubject(); // for (Object ob:allSubject) { @@ -94,6 +95,8 @@ public class QueryService { Double progressFactor = 1.0 / totalSegment; Map> cache = Maps.newHashMap(); long taskId = task.getId().longValue(); + int cache_num = 1; + taskRepository.updateStatus(cache_num, task.getId().longValue()); cache.put(taskId, Lists.newArrayList(0L, 0L, progressFactor, totalSegment, segment)); try { P_TASK_CACHE_RANGE.put(cache); @@ -119,56 +122,81 @@ public class QueryService { taskId = entry.getKey(); } Task task = taskRepository.findById(taskId).get(); - System.out.println("开始拉数据的任务是:" + JSONObject.toJSONString(task)); + LOGGER.info("开始拉数据的任务是:" + JSONObject.toJSONString(task)); + List docIdsList = new ArrayList<>(); try { // 创建过滤条件 & 任务预处理 fromMills = task.getCrawlStartTime().longValue(); toMills = task.getCrawlEndTime().longValue(); - String[] sourceIndices = EsUtils.getIndices(AppConfig.CL_INDEX, "_", - fromMills, toMills, AppConfig.DATE_FORMAT, config.esNormalUpper(), config.esNormalStandby()); - QueryBuilder queryBuilder; - - String clusterName = config.esNormalClusterName(); // 获得 109 的 clusterName - String cid = task.getCid().toLowerCase(); - String crawlDataFlag = task.getCrawlDataFlag(); - BigInteger subjectId = task.getSubjectId(); - Subject subject = subjectRepository.getSubjectBySubjectId(subjectId.longValue()); - String indexName = "cl_major_" + task.getSubjectId(); - Integer cacheNum = task.getCacheNum(); + Long year = config.getQueryDataYearStarttime(); // 获取配置文件中用直接拉年份的时间节点,现在设置的是2019年,2019年前的全部用年做索引,不拆成天 + String clusterName = config.esNormalClusterName(); // 获取配置文件中ES的名称 + // 根据条件获取到要查询的索引的集合 + if(toMills > new Date().getTime()){ + toMills = new Date().getTime(); + } + String[] sourceIndices = EsUtils.getIndices(AppConfig.CL_INDEX, AppConfig.SEPARATOR, + fromMills, toMills, AppConfig.DATE_FORMAT, config.esNormalUpper(), config.esNormalStandby(),year); + String cid = task.getCid().toLowerCase(); // 站点的cid + String siteType = task.getSiteType().toString(); // 站点的类型 ,主要看是不是电商的,因为电商的主贴和评论在ES中的存储方式跟其他的相反 + String crawlDataFlag = task.getCrawlDataFlag(); // 任务的抓取条件 + String crawlContentKey = task.getCrawlContentKey(); // 要拉取的字段,主要看是否需要拉评论 + // BigInteger subjectId = task.getSubjectId(); + // Subject subject = subjectRepository.getSubjectBySubjectId(subjectId.longValue()); + String indexName = "cl_major_" + task.getSubjectId(); // 索引名称 + Integer cacheNum = task.getCacheNum(); // 拉取数据的次数 // 当拉数据的次数 大于1 次的时候,再拉数据的开始时间就不用是任务设置的开始时间了,同时可以再加个采集时间范围限制一下,确保拉的数据都是任务添加之后才采集的就行 + QueryBuilder queryBuilder; // 根据条件组装查询用具 if(cacheNum > 1 ) { // 已经拉过历史数据的任务,将 开始时间改成当天凌晨,查询发表和抓取都是当天的数据。 long current=System.currentTimeMillis();//当前时间毫秒数 long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数 fromMills = new Timestamp(zero).getTime(); - queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag,cacheNum); + queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag,cacheNum,siteType); }else{ fromMills = task.getCrawlStartTime().longValue(); - queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag,cacheNum); + queryBuilder = getQueryBuilder(fromMills, toMills, cid, crawlDataFlag,cacheNum,siteType); } LOGGER.info("Query primary, task:{}, index:{}, from:{}, to:{}, indices:{}, dsl:{}.", taskId, indexName, new LocalDateTime(fromMills).toString(AppConfig.DATE_TIME_FORMAT), new LocalDateTime(toMills).toString(AppConfig.DATE_TIME_FORMAT), - sourceIndices.toString(), + JSONObject.toJSONString(sourceIndices), queryBuilder.toString()); // 传入的参数 集群名称,索引名称,索引类型(type), 查询Builder,scroll查询页面大小,scroll查询scrollId有效时间 String finalTaskId = taskId+""; + long pubTime = fromMills; + long finalFromMills = fromMills; + long finalToMills = toMills; EsUtils.scrollQuery(clusterName, sourceIndices, ESConstants.INDEX_TYPE, queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES, dataList -> { try { + if(dataList.size() == 0){ + System.out.println("没查到相关的 主贴 数据"); + return; + } for (JSONObject data : dataList) { + data = getCreateTime(data,crawlDataFlag); + // 离线拉的数据加个字段吧!跟正常拉的数据做区分 + if(data.get(ESConstants.DOC_TYPE).equals(ESConstants.ITEM) && data.get(ESConstants.PRIMARY).equals(1)){ + data = getPubTime(data,pubTime); + } saveService.initData(data,finalTaskId); // 发送主贴 // 是否要下载图片到指定的 go-fast上 - if(subject.getGoFastSwitch() == 1){ - String goFastAddr = subject.getGoFastAddr(); - if("" == goFastAddr){ - goFastAddr = defultAddr; - } + // 现在判断视频、图片、文件是否下载的方式只取决于isDownload 字段 + boolean isDownload = data.getBoolean(ESConstants.ISDOWNLOAD); + if(isDownload){ + String goFastAddr = defultAddr; data = downloadAndChangePath(data,goFastAddr); } +// if(subject.getGoFastSwitch() == 1){ +// String goFastAddr = subject.getGoFastAddr(); +// if("" == goFastAddr){ +// goFastAddr = defultAddr; +// } +// data = downloadAndChangePath(data,goFastAddr); +// } // 是否写入到指定的kafka // if(subject.getKafkaSwitch() == 1) { // String kafkaTopic = subject.getKafkaTopic(); @@ -178,18 +206,34 @@ public class QueryService { // } // kafkaProducer.send(kafkaTopic, JSONObject.toJSONString(data)); // } - saveService.saveToEsWithFilter(config.esMiniClusterName(), indexName, data); - LOGGER.debug("Send message, indexName :{} , taskId:{} , ID :{}.", indexName, task.getId(), data.getString("_id_")); - System.out.println("=== "+data); + if(!data.get("_id_").equals("")) { + saveService.saveToEsWithFilter(config.esMiniClusterName(), indexName, data); + kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data)); + LOGGER.debug("Send message, indexName :{} , taskId:{} , ID :{}.", indexName, task.getId(), data.getString("_id_")); + // 将要拉评论的ID 添加到list 中,(电商的数据不用拉评论哦)! + if(!siteType.equals(ESConstants.DOCTYPEITEM)) { + if (crawlContentKey.contains("comment") || crawlContentKey.contains("socialComment")) { + docIdsList.add(data.get(ESConstants.DOC_ID).toString()); + } + } + } } } catch (Exception e) { + System.out.println("******* " + dataList ); throw new RuntimeException(e); } }); + + // 开始拉评论数据 + if(docIdsList.size() > 0) { + String docType = docIdsList.get(0).split("_")[1]; + String docIds [] = docIdsList.toArray(new String[0]); + queryComments(docIds, docType, finalFromMills, finalToMills,finalTaskId,crawlDataFlag,indexName); + } + LOGGER.info("This Task is OK ! taskId = " + taskId); Integer cache_num = task.getCacheNum(); cache_num = cache_num +1; taskRepository.updateStatus(cache_num, task.getId().longValue()); - } catch (Exception e) { JSONObject msg = new JSONObject(); msg.put("message", "produce error due to [" + ExceptionUtils.getStackTrace(e) + "]"); @@ -199,6 +243,76 @@ public class QueryService { } } + private JSONObject getPubTime(JSONObject data,long pubTime) { + data.put(ESConstants.PUBTIME,pubTime); + data.put(ESConstants.PUBTIMESTR, DataCheckUtil.getCurrentTime(pubTime)); + data.put(ESConstants.PUBDAY, DataCheckUtil.getDay(pubTime)); + data.put(ESConstants.PUBDATE, DataCheckUtil.getDate(pubTime)); + return data; + } + + private JSONObject getCreateTime(JSONObject data,String crawlDataFlag) { + long createTime = System.currentTimeMillis() ; + data.put(ESConstants.CREATETIME, createTime); + data.put(ESConstants.CREATETIMESTR, DataCheckUtil.getCurrentTime(createTime)); + data.put(ESConstants.CREATEDAY, DataCheckUtil.getDay(createTime)); + data.put(ESConstants.CREATEDATE, DataCheckUtil.getDate(createTime)); + data.put(ESConstants.CRAWLDATAFLAG,crawlDataFlag); + return data; + } + + private void queryComments(String[] docId,String docType, + long startTime,long endTime, + String crawlDataFlag,String finalTaskId, + String indexName) { + LOGGER.info("开始拉取评论数据:"); + QueryBuilder queryBuilder = getQueryBuilder(docId, startTime, endTime); + String index = "cl_index_"+docType; //cl_index_video + String[] sourceIndices = {index}; + String clusterName = config.esNormalClusterName(); + LOGGER.info("QB : "+queryBuilder); + LOGGER.info("queryComments index : "+ JSONObject.toJSONString(sourceIndices)); + EsUtils.scrollQuery(clusterName, sourceIndices, ESConstants.INDEX_TYPE, + queryBuilder, ESConstants.SCROLL_PAGE_SIZE, ESConstants.SCROLL_MINUTES, + dataList -> { + try { + if (dataList.size() == 0) { + System.out.println("没查到相关的 评论 数据"); + return; + } + for (JSONObject data : dataList) { + data = getCreateTime(data,crawlDataFlag); + saveService.initData(data,finalTaskId); + if(!data.get("_id_").equals("")) { + saveService.saveToEsWithFilter(config.esMiniClusterName(), indexName, data); + kafkaProducer.send(config.getSendTopic(),JSONObject.toJSONString(data)); + LOGGER.debug("Send comments message, indexName :{} , taskId:{} , ID :{}.", indexName, finalTaskId, data.getString("_id_")); + } + } + } catch (Exception e) { + e.printStackTrace(); + } + }); + } + + // 组装查询评论的查询语句! + private QueryBuilder getQueryBuilder(String[] docId, long startTime, long endTime) { + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); + try{ + // 筛选时间 + boolean boo = true; + QueryBuilder pubTimeRange = buildRangeQueryBuilder( + ESConstants.PUBTIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo); + boolQueryBuilder.must(pubTimeRange); + // 筛选ID + QueryBuilder termQueryBuilder = QueryBuilders.termsQuery(ESConstants.DOC_ID,docId); + boolQueryBuilder.must(termQueryBuilder); + }catch (Exception e){ + e.printStackTrace(); + } + return boolQueryBuilder; + } + /** * 下载 文件、视频、图片,并将新的路径替换写入到 pathSize中 * videoPath == egc @@ -206,47 +320,173 @@ public class QueryService { imagePath == pgc */ private JSONObject downloadAndChangePath(JSONObject data, String goFastAddr) { + String isDownload = data.get(ESConstants.ISDOWNLOAD).toString(); + Map goFastMap = new HashMap<>(); + List> filePathSize = new ArrayList<>(); + List> videoPathSize = new ArrayList<>(); + List> imagePathSize = new ArrayList<>(); + // 文件下载 List filePath = (List) data.get(ESConstants.FILEPATH); - String isDownload = "true"; if(filePath.size() > 0){ - List> filePathSize = getPathSize(filePath,goFastAddr,0,data); + // 调用下载接口,下载并将附件上传到自己的go-fast 上 + Map srcPathMap = getPathSize(filePath,goFastAddr,0,data); + filePath = (List) srcPathMap.get(ESConstants.PATH); + data.put(ESConstants.FILEPATH,filePath); + // 组装 FILEPATHSIZE 字段 + filePathSize = (List>) srcPathMap.get(ESConstants.PATHSIZELIST); if(filePathSize.size() >0){ data.put(ESConstants.FILEPATHSIZE,JSONObject.toJSONString(filePathSize)); data.put(ESConstants.UGC,1); data.put(ESConstants.ISDOWNLOAD,isDownload); } + // 组装 SRCFILEPATH 字段 + Map srcAndGofastUrlMap = (Map) srcPathMap.get("srcMap"); + if(data.containsKey("forwardUrl") && null != data.get("forwardUrl") ) { + try { + List> forwardUrl = JsonUtils.parseArray(data.get("forwardUrl").toString()); + List> srcPath = getSrcPath(forwardUrl,srcAndGofastUrlMap); + data.put(ESConstants.SRCFILEPATH, JSON.toJSONString(srcPath, SerializerFeature.DisableCircularReferenceDetect)); + } catch (Exception e) { + e.printStackTrace(); + } + + } } + // 视频下载 List videoPath = (List) data.get(ESConstants.VIDEOPATH); if(videoPath.size() > 0){ - List> videoPathSize = getPathSize(videoPath,goFastAddr,1,data); + // List> videoPathSize = getPathSize(videoPath,goFastAddr,1,data); + System.out.println("************ 要下载的视频链接的 List : "+videoPath); + Map srcPathMap = getPathSize(videoPath,goFastAddr,0,data); + videoPath = (List) srcPathMap.get(ESConstants.PATH); + data.put(ESConstants.VIDEOPATH,videoPath); + videoPathSize = (List>) srcPathMap.get(ESConstants.PATHSIZELIST); if(videoPathSize.size() >0){ data.put(ESConstants.VIDEOPATHSIZE,JSONObject.toJSONString(videoPathSize)); data.put(ESConstants.EGC,1); data.put(ESConstants.ISDOWNLOAD,isDownload); } + // 组装 SRCVIDEOPATH 字段 + Map srcAndGofastUrlMap = (Map) srcPathMap.get("srcMap"); + if(data.containsKey("videoUrl") && null != data.get("videoUrl") ) { + List> srcPath = new ArrayList<>(); + if(data.get("videoUrl").toString().contains("originalUrl")){ + try { + List> videoUrl = JsonUtils.parseArray( data.get("videoUrl").toString()); + srcPath = getSrcPath(videoUrl,srcAndGofastUrlMap); + } catch (Exception e) { + e.printStackTrace(); + } + }else{ + List videoUrl = new ArrayList<>(); + try { + if(data.get("videoUrl").toString().contains("[")) { + videoUrl = JsonUtils.parseArray(data.get("videoUrl").toString()); + }else{ + videoUrl.add(data.get("videoUrl").toString()); + } + }catch (Exception e){ + e.printStackTrace(); + } + srcPath = new ArrayList<>(); + Map srcurlMap = new HashMap<>(); + if(videoPath.size() > 0) { + srcurlMap.put(ESConstants.GOFASTURL, videoPath.get(0)); + } + System.out.println("===============视频原链接的List: " + videoUrl); + if(videoUrl.size() > 0) { + srcurlMap.put(ESConstants.ORIGINALURL, videoUrl.get(0)); + } + if(srcurlMap.size() > 0) { + srcPath.add(srcurlMap); + } + } + data.put(ESConstants.SRCVIDEOPATH,JSON.toJSONString(srcPath, SerializerFeature.DisableCircularReferenceDetect)); + } + } + // 图片下载 List imagePath = (List) data.get(ESConstants.IMAGEPATH); if(imagePath.size() > 0){ - List> imagePathSize = getPathSize(imagePath,goFastAddr,2,data); + //List> imagePathSize = getPathSize(imagePath,goFastAddr,2,data); + Map srcPathMap = getPathSize(imagePath,goFastAddr,0,data); + imagePath = (List) srcPathMap.get(ESConstants.PATH); + data.put(ESConstants.IMAGEPATH,imagePath); + imagePathSize = (List>) srcPathMap.get(ESConstants.PATHSIZELIST); if(imagePathSize.size() >0){ data.put(ESConstants.IMAGEPATHSIZE,JSONObject.toJSONString(imagePathSize)); data.put(ESConstants.PGC,1); data.put(ESConstants.ISDOWNLOAD,isDownload); } + Map srcAndGofastUrlMap = (Map) srcPathMap.get("srcMap"); + List> srcPath = new ArrayList<>(); + if(data.containsKey("pictureList") && null != data.get("pictureList")){ + Map pictureList = JSONObject.parseObject(data.get("pictureList").toString()); + if(!pictureList.isEmpty()){ + Map srcurlMap=new HashMap<>(); + for (Map.Entry entry : pictureList.entrySet()) { + Map imgmap= (Map) entry.getValue(); + if(imgmap.containsKey("uploadImg") && imgmap.get("uploadImg") != null && imgmap.get("uploadImg") != ""){ + srcurlMap.put(ESConstants.GOFASTURL,srcAndGofastUrlMap.get(imgmap.get("uploadImg"))); + srcurlMap.put(ESConstants.ORIGINALURL,imgmap.get("img").toString()); + } + srcPath.add(srcurlMap); + } + } + } + data.put(ESConstants.SRCIMAGEPATH,JSON.toJSONString(srcPath, SerializerFeature.DisableCircularReferenceDetect)); + } + // 当三个 pathSize 都为 0 的时候,表示三个下载结果都为空,为了保持页面和实际结果的统一,这块改成 false + if(filePathSize.size() == 0 && videoPathSize.size() == 0 && imagePathSize.size() == 0){ + data.put(ESConstants.ISDOWNLOAD,false); } return data; } + + + + private List> getSrcPath(List> forwardUrl, Map srcAndGofastUrlMap) { + List> srcPathList = new ArrayList<>(); + for (Map urlMap : forwardUrl) { + if(null != urlMap) { + Map srcurlMap = new HashMap<>(); + if (urlMap.containsKey(ESConstants.GOFASTURL) && null != urlMap.get(ESConstants.GOFASTURL)) { + srcurlMap.put(ESConstants.GOFASTURL, srcAndGofastUrlMap.get(urlMap.get(ESConstants.GOFASTURL))); + } else { + srcurlMap.put(ESConstants.GOFASTURL, ""); + } + srcurlMap.put(ESConstants.ORIGINALURL, urlMap.get(ESConstants.ORIGINALURL)); + srcPathList.add(srcurlMap); + } + } + return srcPathList; + } + + +// public static void main(String[] args) { +// QueryService queryService = new QueryService(); +// List list = ReadLine.readLine(new File("E:\\work/test1.txt")); +// JSONObject data = JSONObject.parseObject(list.get(0)); +// String goFastAddr = "http://172.18.1.113:8080/upload"; +// JSONObject result = queryService.downloadAndChangePath(data,goFastAddr); +// System.out.println(result); +// +// } + /** * downloadType =0 文件 =1 图片 = 2 视频 */ - private List> getPathSize(List pathList, String goFastAddr,Integer downloadType,JSONObject data) { + private Map getPathSize(List pathList, String goFastAddr,Integer downloadType,JSONObject data) { + Map pathMap = new HashMap<>(); List> pathSizeList = new ArrayList<>(); + List path = new ArrayList<>(); + Map srcMap = new HashMap<>(); for (String downloadUrl:pathList) { String resolution = ""; String videoTime = ""; try { - if(!downloadUrl.contains("si-te.percent.cn")){ + if(null != downloadUrl && !downloadUrl.contains("si-te.percent.cn")){ Map pathSizeMap = DownLoadFile.downloadAndSaveFile(downloadUrl, goFastAddr); LOGGER.info("[QueryService] getPathSize goFaskAddr {}. resultMap {}.",goFastAddr ,pathSizeMap); if(pathSizeMap.size() > 0){ @@ -257,43 +497,57 @@ public class QueryService { if(downloadType == 2) { // 图片 resolution = DownLoadFile.imagesize(downloadUrl); } - String url = pathSizeMap.get("realUrl"); - String size = pathSizeMap.get("size"); + //String url = pathSizeMap.get("realUrl").replace(config.getGoFastDomain(),""); + String url = pathSizeMap.get("realUrl").replace("http://172.18.1.113:8080",""); + String size = pathSizeMap.get("size") + "KB"; pathSizeMap.put(ESConstants.URL,url); pathSizeMap.put(ESConstants.SIZE,size); pathSizeMap.put(ESConstants.RESOLUTION,resolution); pathSizeMap.put(ESConstants.VIDEOTIME,videoTime); pathSizeMap.remove("realUrl"); + // 这个是三个PathSize imagePathSize ,videoPathSize filePathSize pathSizeList.add(pathSizeMap); + // 这个是 用来做 gofast 和原链接替换的,key 是原链接,value 是go-fast 链接, + srcMap.put(downloadUrl,url); + // 这个值使用来替换 三个 Path 的 imagePath,videoPath,filePath + path.add(url); + + } } } catch (IOException e) { e.printStackTrace(); } } - return pathSizeList; + pathMap.put(ESConstants.PATHSIZELIST,pathSizeList); + pathMap.put("srcMap",srcMap); + pathMap.put(ESConstants.PATH,path); + return pathMap; + //return pathSizeList; } private QueryBuilder getQueryBuilder(Long startTime, Long endTime, - String cid,String crawlDataFlag,Integer cacheNum) { + String cid,String crawlDataFlag,Integer cacheNum,String siteType) { // 查询语句组装 BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); try { // 当拉取次数大于1的,还需要限制 采集时间,采集时间不早于今天。 boolean boo = true; - if(cacheNum > 1 ){ +// if(cacheNum > 1 ){ +// QueryBuilder pubTimeRange = buildRangeQueryBuilder( +// ESConstants.CREATETIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo); +// boolQueryBuilder.must(pubTimeRange); +// } +// // 筛选发表时间 + if(!siteType.equals(ESConstants.DOCTYPEITEM)) { QueryBuilder pubTimeRange = buildRangeQueryBuilder( - ESConstants.CREATETIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo); + ESConstants.PUBTIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo); boolQueryBuilder.must(pubTimeRange); } - // 筛选发表时间 - QueryBuilder pubTimeRange = buildRangeQueryBuilder( - ESConstants.PUBTIME, startTime - 2 * ONE_MINUTE, endTime, boo, boo); - boolQueryBuilder.must(pubTimeRange); // 筛选站点 - if(cid.equals("taobao") || cid.equals("tmall")){ - boolQueryBuilder.must(QueryBuilders.termsQuery(ESConstants.EN_SOURCE, "taobao","tmall")); + if(cid.equals(ESConstants.TAOBAO) || cid.equals(ESConstants.TMALL)){ + boolQueryBuilder.must(QueryBuilders.termsQuery(ESConstants.EN_SOURCE, ESConstants.TAOBAO,ESConstants.TMALL)); }else { boolQueryBuilder.must(QueryBuilders.termQuery(ESConstants.EN_SOURCE, cid)); } @@ -345,7 +599,7 @@ public class QueryService { String account = crawlDataFlag.split("account:")[1]; System.out.println("[buildCrawlDataFlagBuilder] account --- " + account); TermQueryBuilder queryAccountBuilders = QueryBuilders.termQuery(ESConstants.USER_URL,account); - queryBuilder = QueryBuilders.boolQuery().must(queryAccountBuilders).should(queryCrawlDataFlagBuilder); + queryBuilder = QueryBuilders.boolQuery().should(queryAccountBuilders).should(queryCrawlDataFlagBuilder); } }catch (Exception e){ e.printStackTrace(); diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/SaveService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java similarity index 79% rename from cl_query_data_job/src/main/java/com/bfd/mf/job/service/SaveService.java rename to cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java index cbd18be..0deac05 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/SaveService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/query/SaveService.java @@ -1,11 +1,10 @@ -package com.bfd.mf.job.service; +package com.bfd.mf.job.service.query; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.util.EsUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @Service @@ -15,7 +14,6 @@ public class SaveService { public void initData(final JSONObject data, String taskId) { // 初始化自定义字段 data.put(ESConstants.TASKID, taskId); - data.put(ESConstants.ISDOWNLOAD,"false"); data.put("where","backtrace"); } @@ -24,7 +22,7 @@ public class SaveService { try { LOGGER.info("[SaveService] saveToEsWithFilter 写入ES " + miniCluster + " | " + miniIndex); EsUtils.index(miniCluster, miniIndex, ESConstants.INDEX_TYPE, data, ESConstants._ID); - EsUtils.index(miniCluster, miniIndex, ESConstants.INDEX_TYPE, data, ESConstants._ID); + // EsUtils.index(miniCluster, miniIndex, ESConstants.INDEX_TYPE, data, ESConstants._ID); } catch (Exception e) { throw new RuntimeException(e); } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/StatisticsService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java similarity index 75% rename from cl_query_data_job/src/main/java/com/bfd/mf/job/service/StatisticsService.java rename to cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java index 0a70479..25fa197 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/StatisticsService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/StatisticsService.java @@ -1,9 +1,13 @@ -package com.bfd.mf.job.service; +package com.bfd.mf.job.service.statistics; import com.bfd.mf.job.config.AppConfig; +import com.bfd.mf.job.config.ESConstants; import com.bfd.mf.job.domain.entity.Task; import com.bfd.mf.job.domain.repository.SubjectRepository; import com.bfd.mf.job.domain.repository.TaskRepository; +import com.bfd.mf.job.service.es.EsQueryMiniService; +import com.bfd.mf.job.service.es.EsQueryNormalService; +import com.bfd.mf.job.util.DateUtil; import com.bfd.mf.job.util.EsUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -15,11 +19,11 @@ import java.math.BigInteger; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.TimeZone; @Service public class StatisticsService { private static final Logger LOGGER = LoggerFactory.getLogger(StatisticsService.class); - private static String indexPre = "cl_major_"; @Autowired private AppConfig config; @Autowired @@ -45,16 +49,18 @@ public class StatisticsService { LOGGER.info("------------------------------------------------------------------ StatisticsService ------------------------------------------------------"); long start = System.currentTimeMillis(); //-------统计134上的总量------------------------------------------------------------------------------------ - String clusterName = config.esNormalClusterName(); // 获得 137 的 clusterName + String clusterName = config.esNormalClusterName(); // 获得 134 的 clusterName statisticsTotal(clusterName); long end = System.currentTimeMillis(); LOGGER.info("Statistics Total, took:{} ms.",(end - start)); + //-------统计147上的 每个任务的总量------------------------------------------------------------------------- start = System.currentTimeMillis(); clusterName = config.esMiniClusterName(); // 获得 147 的 clusterName statisticsTask(clusterName); end = System.currentTimeMillis(); LOGGER.info("Statistics Task, took:{} ms.",(end - start)); + //-------统计每个专题的量------------------------------------------------------------------------------------ start = System.currentTimeMillis(); // 如果是正常任务的,用这种方式统计 @@ -82,6 +88,7 @@ public class StatisticsService { Map subjectChannelTodayMap = new HashMap<>(); long count = 0L; long todayCount = 0L; + // 按渠道统计每个渠道的数据量 for(int i = 0 ; i < 8 ; i ++){ if(null != taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i)){ count = taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i); @@ -95,51 +102,52 @@ public class StatisticsService { } switch (i){ case 0: - subjectChannelMap.put("social",count); - subjectChannelTodayMap.put("social",todayCount); + subjectChannelMap.put(ESConstants.SOCIAL,count); + subjectChannelTodayMap.put(ESConstants.SOCIAL,todayCount); break; case 1: - subjectChannelMap.put("news",count); - subjectChannelTodayMap.put("news",todayCount); + subjectChannelMap.put(ESConstants.NEWS,count); + subjectChannelTodayMap.put(ESConstants.NEWS,todayCount); break; case 2: - subjectChannelMap.put("blog",count); - subjectChannelTodayMap.put("blog",todayCount); + subjectChannelMap.put(ESConstants.BLOG,count); + subjectChannelTodayMap.put(ESConstants.BLOG,todayCount); break; case 3: - subjectChannelMap.put("bbs",count); - subjectChannelTodayMap.put("bbs",todayCount); + subjectChannelMap.put(ESConstants.BBS,count); + subjectChannelTodayMap.put(ESConstants.BBS,todayCount); break; case 4: - subjectChannelMap.put("video",count); - subjectChannelTodayMap.put("video",todayCount); + subjectChannelMap.put(ESConstants.VIDEO,count); + subjectChannelTodayMap.put(ESConstants.VIDEO,todayCount); break; case 5: - subjectChannelMap.put("item",count); - subjectChannelTodayMap.put("item",todayCount); + subjectChannelMap.put(ESConstants.ITEM,count); + subjectChannelTodayMap.put(ESConstants.ITEM,todayCount); break; case 6: - subjectChannelMap.put("search",count); - subjectChannelTodayMap.put("search",todayCount); + subjectChannelMap.put(ESConstants.SEARCH,count); + subjectChannelTodayMap.put(ESConstants.SEARCH,todayCount); break; case 7: - subjectChannelMap.put("lief",count); - subjectChannelTodayMap.put("lief",todayCount); + subjectChannelMap.put(ESConstants.LIFE,count); + subjectChannelTodayMap.put(ESConstants.LIFE,todayCount); break; } } + // 按采集方式统计数据量 Map subjectCrawlDatFlagMap = new HashMap<>(); Map subjectCrawlDataFlagTodayMap = new HashMap<>(); long siteCount = 0L; long siteTodayCount = 0L; - for(int i = 0 ; i < 8 ; i ++) { - if(null != taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId, i)) { - siteCount = taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId, i); + for(int i = 0 ; i < 4 ; i ++) { + if(null != taskRepository.findDataTotalBySbujectIdAndTaskType(subjectId, i)) { + siteCount = taskRepository.findDataTotalBySbujectIdAndTaskType(subjectId, i); }else{ siteCount = 0; } - if(null != taskRepository.findTodayDataTotalBySbujectIdAndSiteType(subjectId, i)) { - siteTodayCount = taskRepository.findTodayDataTotalBySbujectIdAndSiteType(subjectId, i); + if(null != taskRepository.findTodayDataTotalBySbujectIdAndTaskType(subjectId, i)) { + siteTodayCount = taskRepository.findTodayDataTotalBySbujectIdAndTaskType(subjectId, i); }else{ siteTodayCount = 0; } @@ -156,6 +164,10 @@ public class StatisticsService { subjectCrawlDatFlagMap.put("url", siteCount); subjectCrawlDataFlagTodayMap.put("url", siteTodayCount); break; + case 3: + subjectCrawlDatFlagMap.put("upload", siteCount); + subjectCrawlDataFlagTodayMap.put("upload", siteTodayCount); + break; } } totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap); @@ -171,7 +183,7 @@ public class StatisticsService { boolean result = true; try{ // LOGGER.info("[StatisticsService] statisticsSubject start... subjectId : " + subjectId); - String indexName = indexPre + subjectId; + String indexName = config.getIndexNamePre() + subjectId; // 统计这个专题下每个渠道的总量 Map subjectChannelMap = esQueryMiniService.getSubjectChannelStatistics(miniName,indexName); // 统计这个专题下每个渠道的增量 @@ -214,6 +226,13 @@ public class StatisticsService { public boolean statisticsTask(String miniName){ boolean result = true; try{ + // 第一步,需要将 今天之前已完成 的 任务的 today_data_total 改成 0, update_time + long current = System.currentTimeMillis(); + long zero = current/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset(); + String updateTime = DateUtil.parseDateByTime(zero); + System.out.println("----- "+ updateTime); + taskRepository.updateTodayTotalCount(updateTime); + EsQueryMiniService esQueryMiniService = new EsQueryMiniService(); // 从库中查出当前任务表中的所有任务 id 和对应的 cid,cralwDataFlag 以及 subjectId List taskList = taskRepository.findAllBydel0(); @@ -221,15 +240,20 @@ public class StatisticsService { for (Task task: taskList) { Long taskId = task.getId().longValue(); String crawlDataFlag = task.getCrawlDataFlag(); - Map countMap = esQueryMiniService.getTaskCount(miniName,taskId,task,crawlDataFlag); - // 直接更新 cl_task 表中的 data_total 和 today_data_total - long totalCount = 0L; - long todayCount = 0L; - if(countMap.containsKey("totalCount") && countMap.containsKey("todayCount")) { - totalCount = countMap.get("totalCount"); - todayCount = countMap.get("todayCount"); + String indexNamePre = config.getIndexNamePre(); + Map countMap = new HashMap<>(); + if(null != task.getCid() && !task.getCid().equals("test")) { + countMap = esQueryMiniService.getTaskCount(miniName, taskId, task, crawlDataFlag, indexNamePre); + // 直接更新 cl_task 表中的 data_total 和 today_data_total + long totalCount = 0L; + long todayCount = 0L; + if(countMap.containsKey("totalCount") && countMap.containsKey("todayCount")) { + totalCount = countMap.get("totalCount"); + todayCount = countMap.get("todayCount"); + } + taskRepository.updateTaskCount(taskId,totalCount,todayCount); } - taskRepository.updateTaskCount(taskId,totalCount,todayCount ); + } LOGGER.info("[StatisticsService] statisticsTask finish ..."); }catch (Exception e){ diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/TotalCountService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java similarity index 91% rename from cl_query_data_job/src/main/java/com/bfd/mf/job/service/TotalCountService.java rename to cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java index 4d7baee..044884b 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/TotalCountService.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/statistics/TotalCountService.java @@ -1,10 +1,12 @@ -package com.bfd.mf.job.service; +package com.bfd.mf.job.service.statistics; +import com.alibaba.fastjson.JSONObject; import com.bfd.mf.job.config.ESConstants; -import com.bfd.mf.job.domain.entity.ResultDetail; import com.bfd.mf.job.domain.entity.SubjectCount; import com.bfd.mf.job.domain.repository.ResultDetailRepository; import com.bfd.mf.job.domain.repository.SubjectCountRepository; +import com.bfd.mf.job.service.statistics.StatisticsService; +import com.bfd.mf.job.util.DateUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -85,10 +87,16 @@ public class TotalCountService { if(null != subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT)) { account_type_total_count = BigInteger.valueOf(subjectCrawlDataFlagMap.get(ESConstants.ACCOUNT)); } + if(null != subjectCrawlDataFlagMap.get("upload")){ + System.out.println("上传的任务的数据量对应的专题 " + subjectId +" == "+ subjectCrawlDataFlagMap.get("upload")); + if(subjectCrawlDataFlagMap.get("upload").compareTo(0L) >0) + sum = subjectCrawlDataFlagMap.get("upload"); + } } subjectCount.setUrlTypeTotalCount(url_type_total_count); subjectCount.setKeywordTypeTotalCount(keyword_type_total_count); subjectCount.setAccountTypeTotalCount(account_type_total_count); + if (subjectCrawlDataFlagTodayMap.size() > 0) { if(null != subjectCrawlDataFlagTodayMap.get(ESConstants.URL)) { url_type_count = BigInteger.valueOf(subjectCrawlDataFlagTodayMap.get(ESConstants.URL)); @@ -188,6 +196,15 @@ public class TotalCountService { social_total_count, social_count, bbs_total_count, bbs_count, blog_total_count, blog_count, news_total_count, news_count, search_total_count, search_count, item_total_count, item_count, video_total_count, video_count, life_total_count, life_count); } else { + // 先获取这个专题昨天的 update_time ,然后添加到 UpdateTime 字段中 + // 获取昨天日期 + String yesterday = DateUtil.parseDateByday(System.currentTimeMillis() - 1000 * 60 * 60 * 24); + Date updateTime = subjectCountRepository.getUpdateTimeBySubjectId(subjectId,yesterday); + if(updateTime != null){ + subjectCount.setUpdateTime(updateTime); + }else { + subjectCount.setUpdateTime(new Date()); + } subjectCountRepository.save(subjectCount); } }catch ( Exception e){ diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java new file mode 100644 index 0000000..a59a456 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/taskCount/TaskCountService.java @@ -0,0 +1,311 @@ +package com.bfd.mf.job.service.taskCount; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.job.config.AppConfig; +import com.bfd.mf.job.domain.entity.Task; +import com.bfd.mf.job.domain.entity.TaskCount; +import com.bfd.mf.job.domain.repository.SubjectRepository; +import com.bfd.mf.job.domain.repository.TaskCountRepository; +import com.bfd.mf.job.domain.repository.TaskRepository; +import com.bfd.mf.job.service.es.EsQueryMiniService; +import com.bfd.mf.job.service.es.EsQueryNormalService; +import com.bfd.mf.job.service.statistics.TotalCountService; +import com.bfd.mf.job.util.DateUtil; +import com.bfd.mf.job.util.EsUtils; +import kafka.utils.Json; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.jpa.repository.Query; +import org.springframework.stereotype.Service; + +import javax.annotation.PostConstruct; +import java.math.BigInteger; +import java.text.DecimalFormat; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TimeZone; + +@Service +public class TaskCountService { + private static final Logger LOGGER = LoggerFactory.getLogger(TaskCountService.class); + @Autowired + private AppConfig config; + @Autowired + private TaskRepository taskRepository; + @Autowired + private TaskCountRepository taskCountRepository; + + @PostConstruct + public void init() { + // 注册数据查询来源 + EsUtils.registerCluster(config.esNormalClusterName(), config.esNormalAddress());// 配置文件中的 es-source + EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target + } + + /* + 尝试在指定时间内获得许可,如果获得了,则直接返回,如果没有获得,则执行下面的流程 + */ + public void tryAcquire() { + long start = System.currentTimeMillis(); + LOGGER.info("------------------------------------------------------------------ TaskCountService ------------------------------------------------------"); + // 获取当天 0 点~当天12点的时间 + long current = System.currentTimeMillis(); + long zero = current/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset(); + long twelve=zero+24*60*60*1000-1;//今天23点59分59秒的毫秒数 + // 任务的统计,统计的是前一天的,因此: + long newZero = zero - 24*60*60*1000; + long newTweleve = twelve - 24*60*60*1000; + String taskStartTime = DateUtil.parseDateByTime(newZero); + String taskEndTime = DateUtil.parseDateByTime(newTweleve); + System.out.println(taskStartTime + " -- " + taskEndTime); + + // 直接查 cl_task 表中,开始时间和结束时间再当天范围内的任务的today_count; + /** + * 1.只统计单次任务 + 2.只统计结束时间都在当天的任务(去掉开始时间,以任务完成时间为判断) + 3.任务的数据量大于0(拿这一条过滤掉不合理的任务) + */ + Long sumTodayTotalCount = taskRepository.findTodayDataTotal(taskStartTime,taskEndTime); + //Long count = taskRepository.findTodayDataTotalTaskNum(); + +// List> list = taskRepository.findByCrawlTime(taskStartTime,taskEndTime); +// for (Map map :list) { +// System.out.println("任务:"+JSONObject.toJSONString(map)); +// } + + // 获取任务的时间差 + List avgSpeedList = taskRepository.findTaskByCrawlTime(taskStartTime,taskEndTime); + Integer count = avgSpeedList.size(); + Long sumSpeedCount = 0L; + for (BigInteger avg: avgSpeedList) { + sumSpeedCount = sumSpeedCount + Long.valueOf(avg.toString()); + } + System.out.println("sumTodayTotalCount : "+sumTodayTotalCount); + String avgSpeed = "0"; + String avgCount = "0"; + if(sumSpeedCount != null && sumTodayTotalCount != null) { + avgSpeed = String.format("%.1f", sumSpeedCount * 1.0 / count); + avgCount = String.format("%.1f", sumTodayTotalCount * 1.0 / count); + System.out.println("任务数量:" + count + "; 当天的新增总量: " + sumTodayTotalCount + "; 平均任务量:" + avgCount + "; 平均速度:" + avgSpeed); + } + // 将结果插入到 cl_task_count 表中 + String day = taskStartTime.split(" ")[0]; + + TaskCount taskCount = new TaskCount(); + taskCount.setCountDate(day); + taskCount.setAvgCount(Float.valueOf(avgCount)); + taskCount.setAvgSpeed(Float.valueOf(avgSpeed)); + System.out.println(JSONObject.toJSONString(taskCount)); + taskCountRepository.save(taskCount); + long end = System.currentTimeMillis(); + LOGGER.info("TaskCountService finish, took:{} ms.",(end - start)); + +// //-------统计147上的 每个任务的总量------------------------------------------------------------------------- +// start = System.currentTimeMillis(); +// clusterName = config.esMiniClusterName(); // 获得 147 的 clusterName +// statisticsTask(clusterName); +// end = System.currentTimeMillis(); +// LOGGER.info("Statistics Task, took:{} ms.",(end - start)); +// +// //-------统计每个专题的量------------------------------------------------------------------------------------ +// start = System.currentTimeMillis(); +// // 如果是正常任务的,用这种方式统计 +// List subjectIds = subjectRepository.findAllSubjectIds(); +// for (BigInteger subjectId: subjectIds) { +// statisticsSubjectBySumTask(subjectId); +// } +// end = System.currentTimeMillis(); +// LOGGER.info("Statistics Subject Normal, took:{} ms.",(end - start)); +// // 如果是【欧莱雅】任务的,得用这个方式统计呀 +// start = System.currentTimeMillis(); +// List subjectIds1 = subjectRepository.findAllOlySubjectIds(); +// for (BigInteger subjectId: subjectIds1) { +// statisticsSubject(subjectId,clusterName); +// } +// end = System.currentTimeMillis(); +// LOGGER.info("Statistics Subject OLY, took:{} ms.",(end - start)); + + } + +// private boolean statisticsSubjectBySumTask(BigInteger subjectId) { +// boolean result = true; +// try{ +// Map subjectChannelMap = new HashMap<>(); +// Map subjectChannelTodayMap = new HashMap<>(); +// long count = 0L; +// long todayCount = 0L; +// // 按渠道统计每个渠道的数据量 +// for(int i = 0 ; i < 8 ; i ++){ +// if(null != taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i)){ +// count = taskRepository.findDataTotalBySbujectIdAndSiteType(subjectId,i); +// }else{ +// count = 0; +// } +// if(null != taskRepository.findTodayDataTotalBySbujectIdAndSiteType(subjectId,i)) { +// todayCount = taskRepository.findTodayDataTotalBySbujectIdAndSiteType(subjectId, i); +// }else{ +// todayCount = 0; +// } +// switch (i){ +// case 0: +// subjectChannelMap.put("social",count); +// subjectChannelTodayMap.put("social",todayCount); +// break; +// case 1: +// subjectChannelMap.put("news",count); +// subjectChannelTodayMap.put("news",todayCount); +// break; +// case 2: +// subjectChannelMap.put("blog",count); +// subjectChannelTodayMap.put("blog",todayCount); +// break; +// case 3: +// subjectChannelMap.put("bbs",count); +// subjectChannelTodayMap.put("bbs",todayCount); +// break; +// case 4: +// subjectChannelMap.put("video",count); +// subjectChannelTodayMap.put("video",todayCount); +// break; +// case 5: +// subjectChannelMap.put("item",count); +// subjectChannelTodayMap.put("item",todayCount); +// break; +// case 6: +// subjectChannelMap.put("search",count); +// subjectChannelTodayMap.put("search",todayCount); +// break; +// case 7: +// subjectChannelMap.put("lief",count); +// subjectChannelTodayMap.put("lief",todayCount); +// break; +// } +// } +// // 按采集方式统计数据量 +// Map subjectCrawlDatFlagMap = new HashMap<>(); +// Map subjectCrawlDataFlagTodayMap = new HashMap<>(); +// long siteCount = 0L; +// long siteTodayCount = 0L; +// for(int i = 0 ; i < 4 ; i ++) { +// if(null != taskRepository.findDataTotalBySbujectIdAndTaskType(subjectId, i)) { +// siteCount = taskRepository.findDataTotalBySbujectIdAndTaskType(subjectId, i); +// }else{ +// siteCount = 0; +// } +// if(null != taskRepository.findTodayDataTotalBySbujectIdAndTaskType(subjectId, i)) { +// siteTodayCount = taskRepository.findTodayDataTotalBySbujectIdAndTaskType(subjectId, i); +// }else{ +// siteTodayCount = 0; +// } +// switch (i) { +// case 0: +// subjectCrawlDatFlagMap.put("keyword", siteCount); +// subjectCrawlDataFlagTodayMap.put("keyword", siteTodayCount); +// break; +// case 1: +// subjectCrawlDatFlagMap.put("account", siteCount); +// subjectCrawlDataFlagTodayMap.put("account", siteTodayCount); +// break; +// case 2: +// subjectCrawlDatFlagMap.put("url", siteCount); +// subjectCrawlDataFlagTodayMap.put("url", siteTodayCount); +// break; +// case 3: +// subjectCrawlDatFlagMap.put("upload", siteCount); +// subjectCrawlDataFlagTodayMap.put("upload", siteTodayCount); +// break; +// } +// } +// totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap); +// }catch (Exception e){ +// result = false; +// LOGGER.error("[StatisticsService] statisticsSubject ERROR... subjectId : " + subjectId + "error : " ); +// e.printStackTrace(); +// } +// return result; +// } +// +// public boolean statisticsSubject(BigInteger subjectId, String miniName){ +// boolean result = true; +// try{ +// // LOGGER.info("[StatisticsService] statisticsSubject start... subjectId : " + subjectId); +// String indexName = config.getIndexNamePre() + subjectId; +// // 统计这个专题下每个渠道的总量 +// Map subjectChannelMap = esQueryMiniService.getSubjectChannelStatistics(miniName,indexName); +// // 统计这个专题下每个渠道的增量 +// Map subjectChannelTodayMap = esQueryMiniService.getSubjectChannelTodayStatistics(miniName,indexName); +// // 统计这个专题下每种采集类型的总量 +// Map subjectCrawlDatFlagMap = esQueryMiniService.getSubjectCrawlDataFlagStatistics(miniName,indexName); +// // 统计这个专题下每种采集类型的增量 +// Map subjectCrawlDataFlagTodayMap = esQueryMiniService.getSubjectCrawlDataFlagTodayStatistics(miniName,indexName); +// // 查入或修改表 +// totalCountService.updateSubjectCount(subjectId,subjectChannelMap,subjectChannelTodayMap,subjectCrawlDatFlagMap,subjectCrawlDataFlagTodayMap); +// totalCountService.updateResultDetil(subjectId,subjectChannelMap); +// }catch (Exception e){ +// result = false; +// LOGGER.error("[StatisticsService] statisticsSubject ERROR... subjectId : " + subjectId + "error : " ); +// e.printStackTrace(); +// } +// return result; +// } +// +// public boolean statisticsTotal(String normalName){ +// boolean result = true; +// try{ +// LOGGER.info("[StatisticsService] statisticsTotal start... "); +// EsQueryNormalService esQueryNormalService = new EsQueryNormalService(); +// // 查询 全局数据 每种类型的统计结果 +// Map channelMap = esQueryNormalService.getChannelStatistics(normalName); +// Map channelTodayMap = esQueryNormalService.getChannelTodayStatistics(normalName); +// Map crawlDataFlagMap = esQueryNormalService.getCrawlDataFlagStatistics(normalName); +// Map crawlDataFlagTodayMap = esQueryNormalService.getCrawlDataFlagTodayStatistics(normalName); +// BigInteger subjectId = new BigInteger("0"); +// totalCountService.updateSubjectCount(subjectId,channelMap,channelTodayMap,crawlDataFlagMap,crawlDataFlagTodayMap); +// }catch (Exception e){ +// result = false; +// LOGGER.error("[StatisticsService] statisticsTotal ERROR... "); +// e.printStackTrace(); +// } +// return result; +// } +// +// public boolean statisticsTask(String miniName){ +// boolean result = true; +// try{ +// // 第一步,需要将 今天之前已完成 的 任务的 today_data_total 改成 0, update_time +// +// +// EsQueryMiniService esQueryMiniService = new EsQueryMiniService(); +// // 从库中查出当前任务表中的所有任务 id 和对应的 cid,cralwDataFlag 以及 subjectId +// List taskList = taskRepository.findAllBydel0(); +// // 遍历任务List ,根据条件组装ES查询语句去对应的索引下查结果,然后回写到任务表中 +// for (Task task: taskList) { +// Long taskId = task.getId().longValue(); +// String crawlDataFlag = task.getCrawlDataFlag(); +// String indexNamePre = config.getIndexNamePre(); +// Map countMap = new HashMap<>(); +// if(null != task.getCid() && !task.getCid().equals("test")) { +// countMap = esQueryMiniService.getTaskCount(miniName, taskId, task, crawlDataFlag, indexNamePre); +// // 直接更新 cl_task 表中的 data_total 和 today_data_total +// long totalCount = 0L; +// long todayCount = 0L; +// if(countMap.containsKey("totalCount") && countMap.containsKey("todayCount")) { +// totalCount = countMap.get("totalCount"); +// todayCount = countMap.get("todayCount"); +// } +// taskRepository.updateTaskCount(taskId,totalCount,todayCount); +// } +// +// } +// LOGGER.info("[StatisticsService] statisticsTask finish ..."); +// }catch (Exception e){ +// e.printStackTrace(); +// result = false; +// LOGGER.error("[StatisticsService] statisticsTask ERROR..."); +// } +// return result; +// } + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadExcelService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadExcelService.java new file mode 100644 index 0000000..e28d4d9 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadExcelService.java @@ -0,0 +1,545 @@ +package com.bfd.mf.job.service.upload; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.job.config.AllKeys; +import com.bfd.mf.job.config.AppConfig; +import com.bfd.mf.job.config.ESConstants; +import com.bfd.mf.job.download.DownLoadFile; +import com.bfd.mf.job.util.DataCheckUtil; +import com.bfd.mf.job.util.EsUtils2; +import com.bfd.mf.job.util.ReadLine; +import com.monitorjbl.xlsx.StreamingReader; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFCell; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.assertj.core.util.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import javax.annotation.PostConstruct; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.util.*; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + +import static com.bfd.mf.job.config.ESConstants.INDEX_TYPE; + +@Service +public class UpLoadExcelService { + private static final Logger LOGGER = LoggerFactory.getLogger(UpLoadExcelService.class); + private static BlockingQueue DATA_CACHE = new LinkedBlockingQueue<>(10240); + + + @PostConstruct + public void init() { + // 注册数据查询来源 + String address []= {config.getEsMini().get("address").toString()}; + EsUtils2.registerCluster(config.getEsMini().get("name").toString(), address);// 配置文件中的 es-source + } + + @Autowired + private AppConfig config; + + /** + * 解析 Excel 中的数据,并将数据处理后写入到 对应subjectId 的ES索引中 + * 目前是一条一条写,这块需要优化哦!! + */ +// public int parseExcel(String subjectId ,String path ,String excelName, Map> fileNameMap,String crawlDataFlag) { +// LOGGER.info("UpLoadExcelService [parseExcel] parseExcel start ..."); +// int dataCount = 0; +// try{ +// XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(path+excelName)); +// //获取每一个工作簿的数据 +// long start = System.currentTimeMillis(); +// for (int i = 0; i < xssfWorkbook.getNumberOfSheets(); i++) { +// XSSFSheet sheet = xssfWorkbook.getSheetAt(i); +// int rowNum = sheet.getLastRowNum(); +// int cellNum = sheet.getRow(0).getLastCellNum(); +// if(cellNum < 22){ +// return dataCount; +// } +// dataCount = rowNum ; +// List resultList = new ArrayList<>(); +// for (int row = 1; row <= rowNum; row++) { +// JSONObject resultJson = new JSONObject(); +// Map resultMap = AllKeys.getMap(); +// String dataId = String.valueOf(sheet.getRow(row).getCell(0)); +// resultMap.put(ESConstants.DATA_ID,dataId); +// resultMap.put(ESConstants._ID,dataId); +// resultMap.put(ESConstants.DOC_ID,String.valueOf(sheet.getRow(row).getCell(1))); +// resultMap.put(ESConstants.CHANNEL,String.valueOf(sheet.getRow(row).getCell(2))); +// resultMap.put(ESConstants.SOURCE,String.valueOf(sheet.getRow(row).getCell(3))); +// resultMap.put(ESConstants.EN_SOURCE,String.valueOf(sheet.getRow(row).getCell(4))); +// resultMap.put(ESConstants.URL,String.valueOf(sheet.getRow(row).getCell(5))); +// resultMap.put(ESConstants.TITLE,String.valueOf(sheet.getRow(row).getCell(6))); +// resultMap.put(ESConstants.TRANSLATETITLE,String.valueOf(sheet.getRow(row).getCell(7))); +// // 发表时间的 4 个字段 +// String pubTimeStr = String.valueOf(sheet.getRow(row).getCell(8)); +// long pubTime = DataCheckUtil.convertDateTotime(pubTimeStr)*1000; +// long pubDay = DataCheckUtil.getDay(pubTime); +// String pubDate = DataCheckUtil.getDate(pubTime); +// resultMap.put(ESConstants.PUBTIME, pubTime); +// resultMap.put(ESConstants.PUBTIMESTR,pubTimeStr); +// resultMap.put(ESConstants.PUBDAY,pubDay); +// resultMap.put(ESConstants.PUBDATE,pubDate); +// +// resultMap.put(ESConstants.AUTHOR,String.valueOf(sheet.getRow(row).getCell(9))); +// resultMap.put(ESConstants.AUTHORID,String.valueOf(sheet.getRow(row).getCell(10))); +// resultMap.put(ESConstants.CONTENT,String.valueOf(sheet.getRow(row).getCell(11))); +// resultMap.put(ESConstants.TRANSLATECONTENT,String.valueOf(sheet.getRow(row).getCell(12))); +// resultMap.put(ESConstants.PRICE,String.valueOf(sheet.getRow(row).getCell(13))); +// resultMap.put(ESConstants.PRODUCTPARAMETER,String.valueOf(sheet.getRow(row).getCell(14))); +// // 抓取时间的 4 个字段 +// String crawlTimeStr = String.valueOf(sheet.getRow(row).getCell(15)); +// long crawlTime = System.currentTimeMillis() ; +// if(!crawlTimeStr.contains("1970")){ +// crawlTime = DataCheckUtil.convertDateTotime(crawlTimeStr)*1000; +// }else{ +// crawlTimeStr = DataCheckUtil.getCurrentTime(crawlTime); +// } +// long crawlDay = DataCheckUtil.getDay(crawlTime); +// String crawlDate = DataCheckUtil.getDate(crawlTime); +// +// resultMap.put(ESConstants.CRAWLTIME,crawlTime); +// resultMap.put(ESConstants.CRAWLTIMESTR,crawlTimeStr); +// resultMap.put(ESConstants.CRAWLDAY,crawlDay); +// resultMap.put(ESConstants.CRAWLDATE,crawlDate); +// // crawlDataFlag 这个字段值不用数据中原有的,而是要用页面传过来的,不然任务查询的时候查不到数据 +// resultMap.put(ESConstants.CRAWLDATAFLAG,crawlDataFlag); +// resultMap.put(ESConstants.SYS_SENTIMENT,String.valueOf(sheet.getRow(row).getCell(17))); +// // 提取的关键字字段的值 +// XSSFCell hlKeywords = sheet.getRow(row).getCell(18); +// List hl = new ArrayList<>(); +// if (null != hlKeywords) { +// if (hlKeywords.toString().equals("[]")) { +// resultMap.put(ESConstants.HL_KEYWORDS, hl); +// } else { +// if (hlKeywords.toString().contains(",")) { +// String hlk[] = hlKeywords.toString().replace("[", "").replace("]", "").replace("\"", "").split(","); +// hl = Arrays.asList(hlk); +// } else { +// String hlk = hlKeywords.toString().replace("[", "").replace("]", ""); +// hl.add(hlk); +// } +// } +// }else { +// resultMap.put(ESConstants.HL_KEYWORDS, hl); +// } +// // 转发、评论、点赞 +// String quoteCount = sheet.getRow(row).getCell(19).toString(); +// if(quoteCount.equals("")){ +// quoteCount = "0"; +// } +// resultMap.put("quoteCount",Integer.valueOf(quoteCount)); +// String commentsCount = sheet.getRow(row).getCell(20).toString(); +// if(commentsCount.equals("")){ +// commentsCount = "0"; +// } +// resultMap.put("commentsCount",Integer.valueOf(commentsCount)); +// String attitudesCount = sheet.getRow(row).getCell(21).toString(); +// if(attitudesCount.equals("")){ +// attitudesCount = "0"; +// } +// resultMap.put("attitudesCount",Integer.valueOf(attitudesCount)); +// // 插入时间的 4个字段 +// long createTime = System.currentTimeMillis() ; +// resultMap.put(ESConstants.CREATETIME, createTime); +// resultMap.put(ESConstants.CREATETIMESTR, DataCheckUtil.getCurrentTime(createTime)); +// resultMap.put(ESConstants.CREATEDAY, DataCheckUtil.getDay(createTime)); +// resultMap.put(ESConstants.CREATEDATE, DataCheckUtil.getDate(createTime)); +// +// // 根据路径和数据ID,读取附件,组装附件的字段值 +// resultMap = getPathSize(path,dataId,resultMap,fileNameMap); +// +// LOGGER.info("The Result: " + JSONObject.toJSONString(resultMap)); +// resultJson.putAll(resultMap); +// resultList.add(resultJson); +// // 一条一条的数据插入 +// // uploadData(subjectId,resultJson); +// } +// LOGGER.info("Writer Data To ES totalCount = " + resultList.size()); +// long end = System.currentTimeMillis(); +// System.out.println(end-start + " === "+resultList.size()); +// // 批量的数据插入 +//// if(resultList.size() >= 1000) { +//// uploadData(subjectId, resultList); +//// resultList.clear(); +//// } +// } +// }catch (Exception e){ +// e.printStackTrace(); +// dataCount = 0; +// } +// return dataCount; +// } + + + + public Map parseExcel2(String subjectId ,String path , + String excelName, + Map> fileNameMap, + String crawlDataFlag) + throws FileNotFoundException { + LOGGER.info("UpLoadExcelService [parseExcel] 222 parseExcel2 start ..."); + Map returnMap = new HashMap<>(); + int dataCount = 0; + try{ + FileInputStream in = new FileInputStream(path+excelName); + Workbook wk = StreamingReader.builder() + .rowCacheSize(100) //缓存到内存中的行数,默认是10 + .bufferSize(4096) //读取资源时,缓存到内存的字节大小,默认是1024 + .open(in); //打开资源,必须,可以是InputStream或者是File,注意:只能打开XLSX格式的文件 + Sheet sheet = wk.getSheetAt(0); + //遍历所有的行 + List resultList = new ArrayList<>(); + Map pubTimeMap = new HashMap<>(); + pubTimeMap.put("min", System.currentTimeMillis()); + pubTimeMap.put("max", 0L); + for (Row row : sheet) { + if(row.getRowNum() != 0){ + JSONObject resultJson = new JSONObject(); + Map resultMap = AllKeys.getMap(); + String dataId = String.valueOf(row.getCell(0).getStringCellValue()); + resultMap.put(ESConstants.DATA_ID, dataId); + resultMap.put(ESConstants._ID, dataId); + resultMap.put(ESConstants.DOC_ID, String.valueOf(row.getCell(1).getStringCellValue())); + String channel = String.valueOf(row.getCell(2).getStringCellValue()); + resultMap.put(ESConstants.CHANNEL,channel ); + resultMap.put(ESConstants.DOC_TYPE,getDocType(channel)); + resultMap.put(ESConstants.SOURCE, String.valueOf(row.getCell(3).getStringCellValue())); + resultMap.put(ESConstants.EN_SOURCE, String.valueOf(row.getCell(4).getStringCellValue())); + resultMap.put(ESConstants.URL, String.valueOf(row.getCell(5).getStringCellValue())); + resultMap.put(ESConstants.TITLE, String.valueOf(row.getCell(6).getStringCellValue())); + resultMap.put(ESConstants.TRANSLATETITLE, String.valueOf(row.getCell(7).getStringCellValue())); + // 发表时间的 4 个字段 + String pubTimeStr = String.valueOf(row.getCell(8).getStringCellValue()); + long pubTime = DataCheckUtil.convertDateTotime(pubTimeStr) * 1000; + long pubDay = DataCheckUtil.getDay(pubTime); + String pubDate = DataCheckUtil.getDate(pubTime); + resultMap.put(ESConstants.PUBTIME, pubTime); + if(pubTime < pubTimeMap.get("min")){ + pubTimeMap.put("min",pubTime); + } + if(pubTime > pubTimeMap.get("max")){ + pubTimeMap.put("max",pubTime); + } + resultMap.put(ESConstants.PUBTIMESTR, pubTimeStr); + resultMap.put(ESConstants.PUBDAY, pubDay); + resultMap.put(ESConstants.PUBDATE, pubDate); + + resultMap.put(ESConstants.AUTHOR, String.valueOf(row.getCell(9).getStringCellValue())); + resultMap.put(ESConstants.AUTHORID, String.valueOf(row.getCell(10).getStringCellValue())); + resultMap.put(ESConstants.CONTENT, String.valueOf(row.getCell(11).getStringCellValue())); + resultMap.put(ESConstants.TRANSLATECONTENT, String.valueOf(row.getCell(12).getStringCellValue())); + resultMap.put(ESConstants.PRICE, String.valueOf(row.getCell(13).getStringCellValue())); + resultMap.put(ESConstants.PRODUCTPARAMETER, String.valueOf(row.getCell(14).getStringCellValue())); + // 抓取时间的 4 个字段 + String crawlTimeStr = String.valueOf(row.getCell(15).getStringCellValue()); + long crawlTime = System.currentTimeMillis(); + if (!crawlTimeStr.contains("1970")) { + crawlTime = DataCheckUtil.convertDateTotime(crawlTimeStr) * 1000; + } else { + crawlTimeStr = DataCheckUtil.getCurrentTime(crawlTime); + } + long crawlDay = DataCheckUtil.getDay(crawlTime); + String crawlDate = DataCheckUtil.getDate(crawlTime); + + resultMap.put(ESConstants.CRAWLTIME, crawlTime); + resultMap.put(ESConstants.CRAWLTIMESTR, crawlTimeStr); + resultMap.put(ESConstants.CRAWLDAY, crawlDay); + resultMap.put(ESConstants.CRAWLDATE, crawlDate); + // crawlDataFlag 这个字段值不用数据中原有的,而是要用页面传过来的,不然任务查询的时候查不到数据 + resultMap.put(ESConstants.CRAWLDATAFLAG, crawlDataFlag); + resultMap.put(ESConstants.SYS_SENTIMENT, String.valueOf(row.getCell(17).getStringCellValue())); + // 提取的关键字字段的值 + String hlKeywords = row.getCell(18).getStringCellValue(); + List hl = getHlKeywords(hlKeywords); + resultMap.put(ESConstants.HL_KEYWORDS, hl); + // 转发、评论、点赞 + String quoteCount = row.getCell(19).getStringCellValue(); + if (quoteCount.equals("")) { + quoteCount = "0"; + } + resultMap.put("quoteCount", Integer.valueOf(quoteCount)); + + String commentsCount = row.getCell(20).getStringCellValue(); + if (commentsCount.equals("")) { + commentsCount = "0"; + } + resultMap.put("commentsCount", Integer.valueOf(commentsCount)); + + String attitudesCount = row.getCell(21).getStringCellValue(); + if (attitudesCount.equals("")) { + attitudesCount = "0"; + } + resultMap.put("attitudesCount", Integer.valueOf(attitudesCount)); + // 图像识别结果 + String ocrText = row.getCell(22).getStringCellValue(); + List ocrList = getHlKeywords(ocrText); + resultMap.put(ESConstants.OCRTEXT,ocrList); + // 语音识别结果 + String asrText = row.getCell(23).getStringCellValue(); + resultMap.put(ESConstants.ASRTEXT,asrText); + // 插入时间的 4个字段 + long createTime = System.currentTimeMillis(); + resultMap.put(ESConstants.CREATETIME, createTime); + resultMap.put(ESConstants.CREATETIMESTR, DataCheckUtil.getCurrentTime(createTime)); + resultMap.put(ESConstants.CREATEDAY, DataCheckUtil.getDay(createTime)); + resultMap.put(ESConstants.CREATEDATE, DataCheckUtil.getDate(createTime)); + + + // 根据路径和数据ID,读取附件,组装附件的字段值 + resultMap = getPathSize(path, dataId, resultMap, fileNameMap); + LOGGER.info("The Result: " + JSONObject.toJSONString(resultMap)); + resultJson.putAll(resultMap); + resultList.add(resultJson); + // 一条一条的数据插入 + //uploadData(subjectId,resultJson); + } + if(resultList.size() >=100){ + dataCount = dataCount+resultList.size(); + uploadData(subjectId, resultList); + resultList.clear(); + } + } + + // 最后多出来的但是少于100条的数据 + dataCount = dataCount + resultList.size(); + uploadData(subjectId, resultList); + returnMap.put("pubTimeMap",pubTimeMap); + returnMap.put("dataCount",dataCount); + LOGGER.info("Writer Data To ES totalCount = " + dataCount); + }catch (Exception e){ + e.printStackTrace(); + } + return returnMap; + } + + private List getHlKeywords(String hlKeywords) { + List hl = new ArrayList<>(); + if (null != hlKeywords ) { + if (hlKeywords.toString().equals("[]")) { + return hl; + } else { + if (hlKeywords.toString().contains(",")) { + String hlk[] = hlKeywords.toString().replace("[", "").replace("]", "").replace("\"", "").split(","); + hl = Arrays.asList(hlk); + } else { + String hlk = hlKeywords.toString().replace("[", "").replace("]", ""); + hl.add(hlk); + } + } + } + return hl; + } + + /** + * 有附件的,需要上传附件,然后替换字段中的附件路径值, + */ + private Map getPathSize(String path, String dataId, + Map resultMap, + Map> fileNameMap) { +// LOGGER.info("UpLoadExcelService [getPathSize] need Download !"); + // 判断文件夹是否尊在,若不存在,则 isDownload = false ,pgc ugc egc 都为0; + File file=new File(path+dataId); + resultMap.put("pgc",0); + resultMap.put("ugc",0); + resultMap.put("egc",0); + List> filePathSize = new ArrayList<>(); + List> imagePathSize = new ArrayList<>(); + List> videoPathSize = new ArrayList<>(); + List filePath = new ArrayList<>(); + List imagePath = new ArrayList<>(); + List videoPath = new ArrayList<>(); + if(!file.exists()){//如果文件夹不存在 + resultMap .put("isDownload",false); + }else{ + resultMap .put("isDownload",true); + List fileNames = fileNameMap.get(dataId); + for (String fileName:fileNames) { // videoPath == egc filePath == ugc imagePath == pgc + // 根据路径读取文件,并上传到 go-fast 上,并根据前缀组装对应的 path 和 pathSize + String goFastUrl = config.getGoFastPostUrl(); + // String zipPath = bfdApiConfig.getUploadZipPath(); + // String url = DownLoadFile.upload(goFastUrl,dataId+fileName,content); + String file1 = path + dataId + "/" + fileName; + Map urlMap = DownLoadFile.upload(goFastUrl,dataId+fileName,new File(file1)); + String url = urlMap.get("path").toString(); + + Map pathMap = new HashMap<>(); + pathMap.put("url",url); + // 获取文件的大小 + long size = Long.valueOf(urlMap.get("size").toString()); + Double newSize =(double)(Math.round(size/1024)/100.0); + pathMap.put("size",newSize+"KB"); + // 获取分辨率 + String resolution = ""; + if(fileName.startsWith("image")) { + if(url.endsWith(".svg")){ + resolution = ""; + }else { + resolution = ReadLine.getImageDim(file1); + } + } + if(fileName.startsWith("video")){ + if(url.endsWith(".mp3")){ + resolution = "400*240"; + }else if(url.endsWith(".flv")) { + resolution = ""; + }else{ + resolution = ReadLine.videosize(file1); + } + } + // System.out.println(resolution); + pathMap.put("resolution",resolution); + // 视频的时长 + String videoTime = ""; + pathMap.put("videoTime",videoTime); + + if(fileName.startsWith("file")){ + resultMap.put("ugc",1); + filePathSize.add(pathMap); + filePath.add(url); + } + if(fileName.startsWith("image")){ + resultMap.put("pgc",1); + imagePathSize.add(pathMap); + imagePath.add(url); + } + if(fileName.startsWith("video")){ + resultMap.put("egc",1); + videoPathSize.add(pathMap); + videoPath.add(url); + } + } + } + resultMap.put("filePathSize",JSONObject.toJSONString(filePathSize)); + resultMap.put("imagePathSize",JSONObject.toJSONString(imagePathSize)); + resultMap.put("videoPathSize",JSONObject.toJSONString(videoPathSize)); + resultMap.put("filePath",filePath); + resultMap.put("imagePath",imagePath); + resultMap.put("videoPath",videoPath); + return resultMap; + } + + /** + * 读文件,将数据导入到ES中 + * @param subjectId + */ + public void uploadData(String subjectId, List list,String crawlDataFlag) { + String indexNamePre = config.getIndexNamePre(); + String indexName = indexNamePre + subjectId; + for (String l:list) { + String c = l.replace("\\\"","\\\""); + JSONObject data = new JSONObject(); + try { + data = JSONObject.parseObject(c); + data.put("crawlDataFlag",crawlDataFlag); + }catch (Exception e){ + //e.printStackTrace(); + // 数据转json 失败 + return; + } + if(data.size() >0) { + try { + DATA_CACHE.put(EsUtils2.buildBulkItem(indexName, INDEX_TYPE, data)); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + flushData(); + } + + /** + * 将传入的一条数据导入到ES中 + * @param subjectId + * @param result + */ + public void uploadData(String subjectId, JSONObject result) { + String indexNamePre = config.getIndexNamePre(); + String indexName = indexNamePre + subjectId; + try { + if(result.size() >0) { + try { + DATA_CACHE.put(EsUtils2.buildBulkItem(indexName, INDEX_TYPE, result)); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + }catch (Exception e){ + e.printStackTrace(); + } + flushData(); + } + + /** + * 将一个List 的数据导入到ES中 + */ + public void uploadData(String subjectId, List resultList) { + String indexNamePre = config.getIndexNamePre(); + String indexName = indexNamePre + subjectId; + for (JSONObject data:resultList) { + if(data.size() >0) { + try { + DATA_CACHE.put(EsUtils2.buildBulkItem(indexName, INDEX_TYPE, data)); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + } + flushData(); + } + + public void flushData() { + LOGGER.info("ES flushData"); + List dataList = Lists.newArrayList(); + EsUtils2.BulkItem item = DATA_CACHE.poll(); + while (Objects.nonNull(item)) { + if (dataList.size() >= 100) { + EsUtils2.bulkIndex(config.esMiniClusterName(), dataList, "_id_");//"SQ_Mini" config.esMiniClusterName() + LOGGER.debug("Flush data, size:{}.", dataList.size()); + dataList.clear(); + } + dataList.add(item); + item = DATA_CACHE.poll(); + } + if (dataList.size() > 0) { + EsUtils2.bulkIndex(config.esMiniClusterName(), dataList, "_id_"); + LOGGER.debug("Flush data, size:{}.", dataList.size()); + } + } + + public int uploadTxt(String subjectId, String filPath, String crawlDataFlag) { + int dataCount = 0; + try { + List list = ReadLine.readLine(new File(filPath)); + dataCount = list.size(); + uploadData(subjectId, list, crawlDataFlag); + }catch (Exception e){ + e.printStackTrace(); + } + return dataCount; + } + + private String getDocType (String channel){ + Map channelMap = new HashMap<>(); + channelMap.put("社交媒体","social"); + channelMap.put("网络视频","video"); + channelMap.put("新闻资讯","news"); + channelMap.put("博客智库","blog"); + channelMap.put("论坛贴吧","bbs"); + channelMap.put("搜索引擎","search"); + channelMap.put("电商网站","item"); + channelMap.put("生活方式","life"); + return channelMap.get(channel); + } + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadService.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadService.java new file mode 100644 index 0000000..10c35a5 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/service/upload/UpLoadService.java @@ -0,0 +1,214 @@ +package com.bfd.mf.job.service.upload; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.job.config.AppConfig; + +import com.bfd.mf.job.domain.entity.UploadTask; +import com.bfd.mf.job.domain.repository.UploadTaskRepository; +import com.bfd.mf.job.util.EsUtils; +import com.bfd.mf.job.util.ZipUtils; +import com.google.common.collect.Maps; +import org.assertj.core.util.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import javax.annotation.PostConstruct; +import java.io.*; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; + + +@Service +public class UpLoadService { + private static final Logger LOGGER = LoggerFactory.getLogger(UpLoadService.class); + private static BlockingQueue>> P_TASK_CACHE_RANGE = new LinkedBlockingQueue<>(); + + @Autowired + private AppConfig config; + @Autowired + private UploadTaskRepository uploadTaskRepository; + @Autowired + private UpLoadExcelService upLoadExcelService; + + @PostConstruct + public void init() { + // 注册数据查询来源 + EsUtils.registerCluster(config.esMiniClusterName(), config.esMiniAddress()); // 配置文件中的 es-target + } + + public void tryAcquire() { + // 获取 task_type 3 crawl_status = 0 的任务进行上传,获取到后先将状态改成1 表示正在上传 + List taskList2 = uploadTaskRepository.getTaskNeedUpLoad(); + for (UploadTask task : taskList2) { + Map> cache = Maps.newHashMap(); + long taskId = task.getId().longValue(); + cache.put(taskId, Lists.newArrayList(0L, 0L, 0, 1, 1)); + try { // 将数据库中任务的状态暂时改为 4 + uploadTaskRepository.updateCrawlStatus(taskId,1,0,0,0); + P_TASK_CACHE_RANGE.put(cache); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + } + + public void produce() { + Map> range = P_TASK_CACHE_RANGE.poll();// poll -->若队列为空,返回null + if (Objects.isNull(range)) { + return; + } + long taskId = 0L; + for (Map.Entry> entry : range.entrySet()) { + entry.getValue(); + taskId = entry.getKey(); + } + // 这块可能需要改一下,因为 site_type 需要关联表才能拿到哦! + UploadTask task = uploadTaskRepository.findById(taskId).get(); + LOGGER.info("开始上传的任务是:" + JSONObject.toJSONString(task)); + String subjectId = task.getSubjectId().toString(); + String crawlDataFlag = task.getCrawlDataFlag(); + String zipPath = config.getUploadZipPath(); + String zipName = task.getFileName(); + String zipFileName = ZipUtils.getZipFileName(zipName,zipPath);// zip解压到指定的文件夹中。名字与 zip 名保持一致。 + // 根据数据库中 fileName 可知已经上传的文件的名称,从配置文件中获取文件的存储路径,组装后拿到文件开始解压 + // 解压zip ,校验数据,进行上传 + Map> fileNameMap = ZipUtils.unZip(new File(zipPath+zipName),zipPath+zipFileName); + // fileNameMap 是解压后的所有文件名称的 Map ,如果是 Excel + if(fileNameMap.size() == 0){ + // 解压后的文件中没有东西,或者找不到 zip 文件,将状态改成5 + uploadTaskRepository.updateCrawlStatus(taskId,5,0,0,0); + LOGGER.error("[上传失败] 已经上传完的任务是:" + taskId + " ,但是解析文件失败,得将 crawl_status 改成5 !"); + }else { + String fileName = fileNameMap.get("excelName").get(0); + fileNameMap.remove("excelName"); + int dataTotal = 0; + Map pubTimeMap = new HashMap<>(); + if (fileName.contains("xlsx")) { // 传的是Excel + LOGGER.info("上传的是 Excel 类型的数据"); + try { + Map returnMap = upLoadExcelService.parseExcel2(subjectId, zipPath + zipFileName + "/", fileName, fileNameMap, crawlDataFlag); + dataTotal = Integer.valueOf(returnMap.get("dataCount").toString()); + pubTimeMap = (Map) returnMap.get("pubTimeMap"); + }catch (Exception e){ + e.printStackTrace(); + } + } else { // 传的是文本数据 + LOGGER.info("上传的是 非 Excel 类型的数据"); + String theFinalFilePath = zipPath + zipFileName + "/" + fileName; + dataTotal = upLoadExcelService.uploadTxt(subjectId, theFinalFilePath, crawlDataFlag); + } + // 完成后将数据库中 crawl_status改为3 表示完成 + if (dataTotal == 0) { + LOGGER.error("[上传失败] 已经上传完的任务是:" + taskId + " , 但是写入成功的数据是0条,得将 crawl_status 改成5 !"); + uploadTaskRepository.updateCrawlStatus(taskId, 5, dataTotal,0,0); + } else { + LOGGER.info("[上传成功] 已经上传完的任务是:" + taskId + " ,可以将 crawl_status 改成3 了!"); + // 这块改状态之前应该先 sleep 一下,因为数据写ES是有一定延时的。 + try { + Thread.sleep(30000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + long pubMin = pubTimeMap.get("min"); + long pubMax = pubTimeMap.get("max"); + uploadTaskRepository.updateCrawlStatus(taskId, 3, dataTotal,pubMin,pubMax); + } + } + + } + + + +// public static void unZipGetFileType(File srcFile) throws RuntimeException { +// // 判断源文件是否存在 +// if (!srcFile.exists()) { +// throw new RuntimeException(srcFile.getPath() + "所指文件不存在"); +// } +// +// // 开始解压 +// ZipFile zipFile = null; +// try { +// zipFile = new ZipFile(srcFile); +// Enumeration entries = zipFile.entries(); +// ZipEntry entry = (ZipEntry) entries.nextElement(); +// String fileName = entry.getName(); +// System.out.println(fileName); +// String substring = fileName.substring(fileName.lastIndexOf(".")+1, fileName.length()); +// System.out.println(substring); +// +// } catch (Exception e) { +// throw new RuntimeException("unzip error from ZipUtils", e); +// } finally { +// if (zipFile != null) { +// try { +// zipFile.close(); +// } catch (IOException e) { +// e.printStackTrace(); +// } +// } +// } +// } +// +// public static void unZip(File srcFile, String destDirPath) throws RuntimeException { +// long start = System.currentTimeMillis(); +// // 判断源文件是否存在 +// if (!srcFile.exists()) { +// throw new RuntimeException(srcFile.getPath() + "所指文件不存在"); +// } +// +// // 开始解压 +// ZipFile zipFile = null; +// try { +// zipFile = new ZipFile(srcFile); +// Enumeration entries = zipFile.entries(); +// while (entries.hasMoreElements()) { +// ZipEntry entry = (ZipEntry) entries.nextElement(); +// System.out.println("解压" + entry.getName()); +// // 如果是文件夹,就创建个文件夹 +// if (entry.isDirectory()) { +// String dirPath = destDirPath + "/" + entry.getName(); +// File dir = new File(dirPath); +// dir.mkdirs(); +// } else { +// // 如果是文件,就先创建一个文件,然后用io流把内容copy过去 +// File targetFile = new File(destDirPath + "/" + entry.getName()); +// // 保证这个文件的父文件夹必须要存在 +// if (!targetFile.getParentFile().exists()) { +// targetFile.getParentFile().mkdirs(); +// } +// targetFile.createNewFile(); +// // 将压缩文件内容写入到这个文件中 +// InputStream is = zipFile.getInputStream(entry); +// FileOutputStream fos = new FileOutputStream(targetFile); +// int len; +// byte[] buf = new byte[1024]; +// while ((len = is.read(buf)) != -1) { +// fos.write(buf, 0, len); +// } +// // 关流顺序,先打开的后关闭 +// fos.close(); +// is.close(); +// } +// } +// long end = System.currentTimeMillis(); +// System.out.println("解压完成,耗时:" + (end - start) + " ms"); +// } catch (Exception e) { +// throw new RuntimeException("unzip error from ZipUtils", e); +// } finally { +// if (zipFile != null) { +// try { +// zipFile.close(); +// } catch (IOException e) { +// e.printStackTrace(); +// } +// } +// } +// } + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/DataCheckUtil.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/DataCheckUtil.java new file mode 100644 index 0000000..4530c41 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/DataCheckUtil.java @@ -0,0 +1,321 @@ +package com.bfd.mf.job.util; + +import org.apache.commons.lang3.StringUtils; +import org.apache.log4j.Logger; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + + +public class DataCheckUtil { + + public static Pattern datePattrn = Pattern.compile("^\\d{4}\\-\\d{2}\\-\\d{2}\\s\\d{2}\\:\\d{2}:\\d{2}$"); + + public static Pattern dayPattrn = Pattern.compile("^\\d{2,4}\\-\\d{1,2}\\-\\d{1,2}$"); + + private static SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + + public static Pattern p = Pattern.compile("\\s+"); + + private static final Logger LOG = Logger.getLogger(DataCheckUtil.class); + + public static String chechData2(String dataStr){ + dataStr = dataStr.replace("Z",""); + dataStr = checkData(dataStr); + Matcher matcher = datePattrn.matcher(dataStr); + if(!matcher.find()){ + System.out.println("格式错误,使用当前时间 : " + dataStr); + dataStr = DateUtil.getDateTime(); + }else{ + dataStr = matcher.group(0); + } + return dataStr; + } + + public static String checkData(String dataStr){ + SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + if(StringUtils.isBlank(dataStr)){ + return ddf.format(new Date()); + } + if(dataStr.contains("-:")){ + dataStr = dataStr.replace("-:",":"); + } + if(dataStr.contains(":-")){ + dataStr = dataStr.replace(":-",":"); + } + + Matcher matcher = datePattrn.matcher(dataStr); + + if(!matcher.find()){ + dataStr = dataStr.trim(); + if(!p.matcher(dataStr).find()){ + if(!dayPattrn.matcher(dataStr).find()){ + return ddf.format(new Date()); + } + } + + String[] dates = dataStr.split("\\s+"); + String years = ""; + String times = ""; + if(dates.length == 2){ + years = dates[0]; + times = dates[1]; + }else{ + years = dates[0]; + } + + if(years.contains("/")){ + years = years.replace("/", "-"); + } + String[] yearStr = years.split("-"); + String yms = "" ; + if(yearStr.length == 3){ + String year = yearStr[0]; + String month = yearStr[1]; + String day = yearStr[2]; + if(year.length() == 2){ + year = "20"+year; + } + if(month.length() == 1){ + month = "0"+month; + } + if(day.length() == 1){ + day = "0"+day; + } + yms = year+"-"+month+"-"+day; + } + + String hms = ""; + if(StringUtils.isBlank(times)){ + hms = "00:00:00"; + }else{ + times = times.replace("/", ":"); + if(times.contains(":")){ + String[] timeStr = times.split(":"); + if( timeStr.length >= 3 ){ + String hours = timeStr[0]; + String mins = timeStr[1]; + String s = timeStr[2]; + + if(hours.length() == 1){ + hours = "0"+hours; + } + if(mins.length() == 1){ + mins = "0"+mins; + } + if(s.length() == 1){ + s = "0"+s; + } + hms = hours+":"+mins+":"+s; + }else if(timeStr.length == 2){ + String hours = timeStr[0]; + String mins = timeStr[1]; + String s = "00"; + if(hours.length() == 1){ + hours = "0"+hours; + } + if(mins.length() == 1){ + mins = "0"+mins; + } + hms = hours+":"+mins+":"+s; + } else { + String hours = timeStr[0]; + String mins = "00" ; + String s = "00"; + if(hours.length() == 1){ + hours = "0"+hours; + } + hms = hours+":"+mins+":"+s; + } + }else{ + if(isNum(times) && times.length()==2){ + hms = times+":00:00"; + }else if(isNum(times) && times.length()==1){ + hms = "0"+times+":00:00"; + }else{ + hms = "00:00:00" ; + } + } + } + if(StringUtils.isBlank(yms)){ + return ddf.format(new Date()); + } + if(yms != "" || hms != ""){ + return yms+" "+hms; + } + } + return dataStr ; + } + + private static boolean isNum(String time){ + Pattern p = Pattern.compile("\\d+"); + if(p.matcher(time).find()){ + return true ; + } + return false ; + } + + public static String convertStringTotime(String datetime){ + if(StringUtils.isBlank(datetime)){ + return DateUtil.getDateTime(System.currentTimeMillis()); + } + String creationTime = ""; + if(datetime.length() == 13){ + creationTime = DateUtil.getDateTime(Long.valueOf(datetime)); + }else{ + creationTime = DateUtil.getDateTime(Long.valueOf(datetime) *1000); + } + return creationTime ; + + } + + public static long convertStringToLong(String datetime){ + if(StringUtils.isBlank(datetime)){ + return System.currentTimeMillis(); + } + long creationTime ; + if(datetime.length() == 13){ + creationTime = Long.valueOf(datetime); + }else{ + creationTime = Long.valueOf(datetime) *1000; + } + return creationTime ; + } + + public static long convertTimeTotime(String datetime){ + if(StringUtils.isBlank(datetime)){ + return System.currentTimeMillis() / 1000; + } + long creationTime ; + if(datetime.length() == 13){ + creationTime = Long.valueOf(datetime) / 1000; + }else{ + creationTime = Long.valueOf(datetime) ; + } + return creationTime ; + + } + + /** + * String 转 long + */ + public static long convertDateTotime(String datetime){ + if(StringUtils.isBlank(datetime)){ + return System.currentTimeMillis() / 1000; + } + long creationTime = 0; + try { + if(null != datetime && !("null").equals(datetime)) { + SimpleDateFormat ddf1 = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + creationTime = Long.valueOf(ddf1.parse(datetime).getTime()) / 1000; + }else{ + creationTime = new Date().getTime()/1000; + } + } catch (Exception e) { + e.printStackTrace(); + } + return creationTime ; + + } + + /** + * 获取当前的 string 类型时间 + */ + public static String getCurrentTime(){ + SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + return ddf.format(new Date()); + } + + /** + * long 转 string pubTimeStr crawlTimeStr createTimeStr + */ + public static String getCurrentTime(long dateTime){ + SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + return ddf.format(new Date(dateTime)); + } + /** + * String 转 long + */ + // long 转为 时间格式为 yyyy-MM-dd'T'HH:mm:ss.SSSXXX 的时间 pubDate crawlDate createDate + public static String getDate(long dateTime){ + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); + return sdf.format(new Date(dateTime)); + } + + /** + * String 转 long + */ + // String 转为 时间格式为 yyyy-MM-dd'T'HH:mm:ss.SSSXXX 的时间 pubDate crawlDate createDate + public static String getDate(String dateTime){ + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"); + SimpleDateFormat ddf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + try { + Date date = ddf.parse(dateTime) ; + return sdf.format(date); + } catch (ParseException e) { + e.printStackTrace(); + LOG.error("DataCheckUtil getDate() err data:"+dateTime); + } + return sdf.format(new Date()); + } + + /** + * String 转 long + */ + public static long getDay(long dateTime){ + try{ + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + String dayStr = sdf.format(new Date(dateTime)); + Date date = sdf.parse(dayStr); + return date.getTime(); + }catch(Exception e){ + e.printStackTrace(); + LOG.error("DataCheckUtil getDay() err data:"+dateTime); + } + return 0; + } + + /** + * String 转 long + */ + public static long getDay(String dateTime){ + try{ + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); + Date date = sdf.parse(dateTime); + return date.getTime(); + }catch(Exception e){ + e.printStackTrace(); + LOG.error("DataCheckUtil getDay2() err data:"+dateTime); + } + return 0; + } + + + +// public static void main(String[] args) { +// //System.out.println(checkData("")); +// /*System.out.println(System.currentTimeMillis()); +// System.out.println(Calendar.getInstance().getTimeInMillis() / 1000); +// System.out.println(new Date().getTime() / 1000); +// System.out.println(DateUtil.getDateTime((System.currentTimeMillis() / 1000) * 1000)); +// System.out.println(convertStringTotime("1558077405")); +// System.out.println(convertTimeTotime(null));*/ +// //System.out.println(DateUtil.getTimeMillis("2019-03-01 01:01:01")); +// +// /*String aa = DataCheckUtil.convertStringTotime("1563245342"); +// System.out.println(aa);*/ +// /*SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd"); +// try { +// Date date = sdf.parse("2019-03-01"); +// System.out.println(date.getTime()); +// } catch (ParseException e) { +// // TODO Auto-generated catch block +// e.printStackTrace(); +// }*/ +// System.out.println(getDate("2019-03-01 01:01:01")); +// } + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/DateUtil.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/DateUtil.java new file mode 100644 index 0000000..0222f35 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/DateUtil.java @@ -0,0 +1,365 @@ +/* + * Copyright (C) 2016 Baifendian Corporation + *

+ * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bfd.mf.job.util; + +import com.bfd.nlp.common.util.string.TStringUtils; + +import java.io.UnsupportedEncodingException; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; + +public class DateUtil { + + private static final String DATE_UNIT_DAY = "D"; + public static final String DATE_UNIT_HOUR = "H"; + + public static String TIME_FORMAT = "yyyy-MM-dd HH:mm:ss"; + public static String DATE_FORMAT = "yyyy-MM-dd"; + public static String DATE_FORMAT2 = "yyyy.MM.dd"; + + /** + * @param startTime 开始时间 + * @param endTime 结束时间 + * @param unit D H + * @return + */ + public static double getTimeIntervalByUnit(long startTime, long endTime, String unit) { + int interval = 0; + long dateDistance = endTime - startTime; + if (null == unit || dateDistance <= 0) + return -1; + if (DATE_UNIT_DAY.equals(unit)) + interval = 24 * 3600 * 1000; + if (DATE_UNIT_HOUR.equals(unit)) + interval = 3600 * 1000; + return Math.ceil(dateDistance / interval); + } + + /* + * 20160807190815678:yyyyMMddhhmmssSSS + */ + public static String getTimeStrForNow() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS"); + return sdf.format(new Date()); + } + + private static String getTimeStrDefault() { + SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS"); + Date date = new Date(); + date.setYear(1970); + return sdf.format(date); + } + + public static byte[] timeStr2Chars(String timeStr) { + try { + return timeStr.getBytes("UTF-8"); + } catch (UnsupportedEncodingException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return new byte[0]; + } + + public static long timeStr2Long(String timeStr) { + if (TStringUtils.isEmpty(timeStr)) { + String defTm = getTimeStrDefault(); + return Long.parseLong(defTm); + } + return Long.parseLong(timeStr); + } + + private static Date parseDate(long time) { + return new Date(time); + } + + /*** + * timestamp to yyyy-MM-dd + * + * @param timestamp + * @return + */ + public static String parseDateByday(long timestamp) { + Date date = parseDate(timestamp); + SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT); + return format.format(date); + } + + public static String parseDateByday2(long timestamp) { + Date date = parseDate(timestamp); + SimpleDateFormat format = new SimpleDateFormat(DATE_FORMAT2); + return format.format(date); + } + + /*** + * timestamp to yyyy-MM-dd HH:mm:ss + * + * @param timestamp + * @return + */ + public static String parseDateByTime(long timestamp) { + Date date = parseDate(timestamp); + SimpleDateFormat format = new SimpleDateFormat(TIME_FORMAT); + return format.format(date); + } + + /** + * timestamp with special format + * + * @param timestamp + * @param format + * @return + */ + public static String parseDateByFormat(long timestamp, String format) { + Date date = parseDate(timestamp); + SimpleDateFormat dateFormat = new SimpleDateFormat(format); + return dateFormat.format(date); + } + + /** + * 获取今天是周几 + * + * @return 一个表示周几的数字 + */ + public static int getDay() { + Calendar cal = Calendar.getInstance(); + int day = cal.get(Calendar.DAY_OF_WEEK) - 1; + day = day == 0 ? 7 : day; + return day; + } + + /** + * 获取现在是今天的多少秒 + * + * @return 一个数字表示现在是今天的多少秒 + */ + public static int getSecondsNow() { + Calendar curDate = Calendar.getInstance(); + Calendar tommorowDate = new GregorianCalendar(curDate + .get(Calendar.YEAR), curDate.get(Calendar.MONTH), curDate + .get(Calendar.DATE) + 1, 0, 0, 0); + return 24 * 3600 - ((int) (tommorowDate.getTimeInMillis() - curDate.getTimeInMillis()) / 1000); + } + + public static class CronDate extends Date { + private int hour; + private int minute; + + public CronDate(int h, int m) { + this.hour = h; + this.minute = m; + } + + CronDate() { + this.hour = 0; + this.minute = 0; + } + + int getHour() { + return hour; + } + + void setHour(int hour) { + this.hour = hour; + } + + public int getMinute() { + return minute; + } + + public void setMinute(int minute) { + this.minute = minute; + } + + public boolean before(CronDate date) { + if (null == date) { + return false; + } + if (date.getHour() != this.getHour()) { + return (this.getHour() - date.getHour() < 0); + } + // compare minute + return (this.getMinute() - date.getMinute() < 0); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + if (hour >= 10) { + sb.append(hour); + } else { + sb.append("0").append(hour); + } + sb.append(":"); + if (minute >= 10) { + sb.append(minute); + } else { + sb.append("0").append(minute); + } + return sb.toString(); + } + // @Override + // public String toString() { + // Date date = new Date(); + // date.setHours(hour); + // date.setMinutes(minute); + // String str = cronDdateFormate.format(date); + // return str; + // } + } + + /** + * @param dateStr + * @return + */ + public static CronDate parseDateFromStr(String dateStr) { + if (TStringUtils.isEmpty(dateStr)) { + return null; + } + String[] ts = dateStr.split(":"); + if (null == ts || ts.length == 0) { + return null; + } + CronDate date = new CronDate(); + for (int i = 0; i < ts.length; i++) { + String s = ts[i]; + int num = parseDoubleStr(s); + if (i == 0) { + date.setHour(num); + } else if (i == 1) { + date.setMinute(num); + } + } + return date; + } + + /** + * @param st + * @return + */ + private static Integer parseDoubleStr(String st) { + if (TStringUtils.isEmpty(st)) { + return null; + } + while (st.startsWith("0") && st.length() > 1) { + st = st.substring(1); + } + if (TStringUtils.isEmpty(st)) { + return 0; + } + return Integer.parseInt(st); + } + + /** + * 获取当前时间的小时数和分钟数 + * + * @return + */ + public static int[] getCurrentHourAndMinute() { + int[] dat = new int[2]; + Date date = new Date(); + dat[0] = date.getHours(); + dat[1] = date.getMinutes(); + return dat; + } + +// public static String extractDataScope(long from, long to, boolean fileName) { +// return fileName ? +// (MfTimeUtil.getCSTDateStr(from, "yyyyMMdd") + "_" +// + MfTimeUtil.getCSTDateStr(to, "yyyyMMdd")) +// : ("[" + MfTimeUtil.getCSTDateStr(from, "yyyy-MM-dd") + " ~ " +// + MfTimeUtil.getCSTDateStr(to, "yyyy-MM-dd") + "]"); +// +// } + + public static Date stringToDate(String dateStr) { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + try { + return sdf.parse(dateStr); + } catch (ParseException e) { + return new Date(); + } + } + + /** + * 获得服务器当前日期及时间,以格式为:yyyy-MM-dd HH:mm:ss的日期字符串形式返回 + */ + public static String getDateTime(){ + try{ + SimpleDateFormat datetime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + return datetime.format(Calendar.getInstance().getTime()); + } catch(Exception e){ + //log.debug("DateUtil.getDateTime():" + e.getMessage()); + return ""; + } + } + /** + * 获得服务器当前日期及时间,以格式为:yyyy-MM-dd HH:mm:ss的日期字符串形式返回 + */ + public static String getDateTime(long date){ + try{ + SimpleDateFormat datetime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + return datetime.format(new Date(date)); + } catch(Exception e){ + // log.debug("DateUtil.getDateTime():" + e.getMessage()); + return ""; + } + } + public static long getcurr(){ + Date date = new Date(); + Long l_date = date.getTime(); + return l_date; + } + +// public static long getDayStart(long time){ +// long zero = time/(1000*3600*24)*(1000*3600*24)- TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数 +// long zero2 = time/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset(); +// return zero; +// } +// public static long getDayEnd(long time){ +// //long zero=time/(1000*3600*24)*(1000*3600*24)- TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数 +// long twelve=time+24*60*60*1000-1;//今天23点59分59秒的毫秒数 +// return twelve; +// } + +// public static void main(String[] args) { +// long time = 1611591055000L ; +// long start = getDayStart(time); +// long end = getDayEnd(start); +// +// +// System.out.println(time); +// System.out.println(start); +// System.out.println(end); +// +// System.out.println(parseDateByday(time)); +// System.out.println(parseDateByday(start)); +// System.out.println(parseDateByday(end)); +// +// +// long zero=time/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数 +// long twelve=zero+24*60*60*1000-1;//今天23点59分59秒的毫秒数 +// long yesterday=System.currentTimeMillis()-24*60*60*1000;//昨天的这一时间的毫秒数 +// System.out.println(new Timestamp(time));//当前时间 +// System.out.println(new Timestamp(yesterday));//昨天这一时间点 +// System.out.println(new Timestamp(zero));//今天零点零分零秒 +// System.out.println(new Timestamp(twelve));//今天23点59分59秒 +// +// } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java new file mode 100644 index 0000000..c578355 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EMailUtils.java @@ -0,0 +1,286 @@ +package com.bfd.mf.job.util; + +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import javax.mail.Session; +import javax.mail.Transport; +import javax.mail.internet.InternetAddress; +import javax.mail.internet.MimeMessage; + +import com.bfd.crawler.utils.JsonUtils; +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.util.EntityUtils; +import org.apache.log4j.Logger; + + + +/** + * 邮件发送处理 + * @author yabo.li + * + */ +public class EMailUtils { + private static final Logger LOG = Logger.getLogger(EMailUtils.class); + // 发件人的 邮箱 和 密码(替换为自己的邮箱和密码) + // PS: 某些邮箱服务器为了增加邮箱本身密码的安全性,给 SMTP 客户端设置了独立密码(有的邮箱称为“授权码”), + // 对于开启了独立密码的邮箱, 这里的邮箱密码必需使用这个独立密码(授权码)。 + public static String myEmailAccount = "bfd_crawler_alarm@baifendian.com"; + public static String myEmailPassword = "bfd_crawler_alarm"; + + // 发件人邮箱的 SMTP 服务器地址, 必须准确, 不同邮件服务器地址不同, 一般(只是一般, 绝非绝对)格式为: smtp.xxx.com + // 网易163邮箱的 SMTP 服务器地址为: smtp.163.com + private static String myEmailSMTPHost = "smtp.baifendian.com"; + + // 收件人邮箱(替换为自己知道的有效邮箱) + public static String receiveMailAccount = "chaofan.tan@baifendian.com"; + private String confPath = "../etc/config.properties"; + + private static EMailUtils instance = null; + private String protocol = "smtp"; + private String smtpAuth = "true"; + private static String emailEncode = "UTF-8"; + private static String emailTitle = "[{cid}]数据采集异常报警 — 智能数据采集平台"; + private static String emailContent = "你好:\r\n\r\n报警对象:{cid}-{categoryName} \r\n报警原因:{type},请及时检查!。 \r\n\r\n报警时间:{time}"; + + private EMailUtils() { + LOG.info("EMailUtils:init"); + // Properties pro = LoadConfig.getInstance().getPro(confPath); + Properties pro = new Properties(); + /** + * 注释了读配置文件,直接写死了配置 + * crawl.alert.mail.transport.protocol=smtp + crawl.alert.mail.smtp.host=intmail.baifendian.com + crawl.alert.mail.smtp.auth=true + crawl.alert.email.userName=bfd_crawler_alarm@baifendian.com + crawl.alert.email.userPasswd=z26Iyf3vMRb5ejrI + crawl.alert.email.emailEncode=UTF-8 + */ + protocol =pro.getProperty("crawl.alert.mail.transport.protocol", "smtp"); + myEmailSMTPHost =pro.getProperty("crawl.alert.mail.smtp.host", "intmail.baifendian.com"); + smtpAuth = pro.getProperty("crawl.alert.mail.smtp.auth", "true"); + myEmailAccount = pro.getProperty("crawl.alert.email.userName", "bfd_crawler_alarm@baifendian.com"); + myEmailPassword = pro.getProperty("crawl.alert.email.userPasswd", "z26Iyf3vMRb5ejrI"); + emailEncode = pro.getProperty("crawl.alert.email.emailEncode", "UTF-8"); + emailTitle = pro.getProperty("crawl.alert.email.emailTitle", "[{cid}]数据采集异常报警 — 智能数据采集平台"); + emailContent = pro.getProperty("crawl.alert.email.emailContent1", "你好:\r\n\r\n报警对象:{cid}-{categoryName} \r\n报警原因:{type},请及时检查!。 \r\n\r\n报警时间:{time}\r\n\r\n排查线索:{sample}"); + + + LOG.info("EMailUtils protocol:" + protocol + " myEmailSMTPHost:" + myEmailSMTPHost + + " smtpAuth: " + smtpAuth + " myEmailAccount: " + myEmailAccount + + " emailEncode: " + emailEncode + " config path: " + confPath); + } + + public static EMailUtils getInstance() { + if (instance == null) { + synchronized (EMailUtils.class) { + if (instance == null) { + instance = new EMailUtils(); + } + } + } + return instance; + } + + public void setConfigPath (String confPath) { + this.confPath = confPath; + } + + + public void sendWechat(List emailList, String message) { + HttpClientBuilder httpBuilder = HttpClientBuilder.create(); + HttpClient client = httpBuilder.build(); + HttpPost httppost = new HttpPost("http://172.18.1.181:8412/sendwechatalarm/"); //Constants.getWechatURL() + try { + Map requestMap = new HashMap(); + requestMap.put("emails", emailList); + requestMap.put("message", message); + StringEntity entity = new StringEntity(JsonUtils.toJSONString(requestMap),"UTF-8"); + entity.setContentType("application/json"); + httppost.setEntity(entity); + HttpResponse response = client.execute(httppost); + HttpEntity en = response.getEntity(); + String content = EntityUtils.toString(en,"utf8"); + LOG.info("SENT WECHAT ALARM:" + JsonUtils.toJSONString(emailList) + " " + JsonUtils.toJSONString(requestMap)); + } catch (Exception e) { + e.printStackTrace(); + } finally { + client = null; + httpBuilder = null; + httppost = null; + } + + } + public void sendEmail(int type, Map siteMessage, List emailList, String time1) { + LOG.info("有报警任务,开始发送邮件"); + try { + SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + String cid = (String) siteMessage.get("cid"); + String categoryName = null; + String title = emailTitle.replace("{cid}", cid); + String content = null; + + content = emailContent.replace("{cid}", cid); + + if (siteMessage.containsKey("categoryName")) { + categoryName = (String) siteMessage.get("categoryName"); + content = content.replace("{categoryName}", categoryName); + } else { + content = content.replace("-{categoryName}",""); + } + if (siteMessage.containsKey("sample")) { + categoryName = (String) siteMessage.get("sample"); + content = content.replace("{sample}", categoryName); + } else { + content = content.replace("{sample}",""); + } + content = content.replace("{time}", sdf.format(new Date())); + //需要分类处理 + + switch(type) { + case 1: + content = content.replace("{type}", "数据的时间格式有误"); + break; + case 2: + content = content.replace("{type}", "任务下发后" + time1 + "分钟数据未及时返回"); + break; + case 3: + content = content.replace("{type}", "任务下发后" + time1 + "分钟数据未及时返回"); + break; + case 4: + content = content.replace("{type}", "数据字段丢失,字段丢失为"+time1); + break; + case 5: + content = content.replace("{type}", "数据关键字段值为空"); + break; + case 6: + content = confPath.replace("{type}","解析失败次数超过100次"); + break; + default: + return ; + } + + LOG.info("EMailUtils:sendEmail get: siteMessage:" + siteMessage + " emailList:" + emailList + " content:" + content); + Properties props = new Properties(); // 参数配置 + props.setProperty("mail.transport.protocol", protocol); // 使用的协议(JavaMail规范要求) + props.setProperty("mail.smtp.host", myEmailSMTPHost); // 发件人的邮箱的 SMTP 服务器地址 + props.setProperty("mail.smtp.auth", smtpAuth); // 需要请求认证 + Session session = Session.getInstance(props); + session.setDebug(true); + MimeMessage message = createMimeMessage(session, myEmailAccount, emailList, title, content); + Transport transport = session.getTransport(); + transport.connect(myEmailAccount, myEmailPassword); + transport.sendMessage(message, message.getAllRecipients()); + List emails = new ArrayList(); + for (String email:emailList) { + emails.add(email.replace("@percent.cn", "")); + } + sendWechat(emails,content); + transport.close(); + } catch (Throwable e) { + e.printStackTrace(); + LOG.error("EMailUtils:sendEmail error. title:" + siteMessage + " emailList:" + emailList); + } + } + + +// public static void main(String[] args) throws Exception { +// // 1. 创建参数配置, 用于连接邮件服务器的参数配置 +// Properties props = new Properties(); // 参数配置 +// props.setProperty("mail.transport.protocol", "smtp"); // 使用的协议(JavaMail规范要求) +// props.setProperty("mail.smtp.host", myEmailSMTPHost); // 发件人的邮箱的 SMTP 服务器地址 +// props.setProperty("mail.smtp.auth", "true"); // 需要请求认证 +// +// // PS: 某些邮箱服务器要求 SMTP 连接需要使用 SSL 安全认证 (为了提高安全性, 邮箱支持SSL连接, 也可以自己开启), +// // 如果无法连接邮件服务器, 仔细查看控制台打印的 log, 如果有有类似 “连接失败, 要求 SSL 安全连接” 等错误, +// // 打开下面 /* ... */ 之间的注释代码, 开启 SSL 安全连接。 +// /* +// // SMTP 服务器的端口 (非 SSL 连接的端口一般默认为 25, 可以不添加, 如果开启了 SSL 连接, +// // 需要改为对应邮箱的 SMTP 服务器的端口, 具体可查看对应邮箱服务的帮助, +// // QQ邮箱的SMTP(SLL)端口为465或587, 其他邮箱自行去查看) +// final String smtpPort = "465"; +// props.setProperty("mail.smtp.port", smtpPort); +// props.setProperty("mail.smtp.socketFactory.class", "javax.net.ssl.SSLSocketFactory"); +// props.setProperty("mail.smtp.socketFactory.fallback", "false"); +// props.setProperty("mail.smtp.socketFactory.port", smtpPort); +// */ +// +// // 2. 根据配置创建会话对象, 用于和邮件服务器交互 +// Session session = Session.getInstance(props); +// session.setDebug(true); // 设置为debug模式, 可以查看详细的发送 log +// List emails = new ArrayList(); +// // 3. 创建一封邮件 +// MimeMessage message = createMimeMessage(session, myEmailAccount, emails, "小司机", "小司机去开车"); +// +// // 4. 根据 Session 获取邮件传输对象 +// Transport transport = session.getTransport(); +// +// // 5. 使用 邮箱账号 和 密码 连接邮件服务器, 这里认证的邮箱必须与 message 中的发件人邮箱一致, 否则报错 +// // +// // PS_01: 成败的判断关键在此一句, 如果连接服务器失败, 都会在控制台输出相应失败原因的 log, +// // 仔细查看失败原因, 有些邮箱服务器会返回错误码或查看错误类型的链接, 根据给出的错误 +// // 类型到对应邮件服务器的帮助网站上查看具体失败原因。 +// // +// // PS_02: 连接失败的原因通常为以下几点, 仔细检查代码: +// // (1) 邮箱没有开启 SMTP 服务; +// // (2) 邮箱密码错误, 例如某些邮箱开启了独立密码; +// // (3) 邮箱服务器要求必须要使用 SSL 安全连接; +// // (4) 请求过于频繁或其他原因, 被邮件服务器拒绝服务; +// // (5) 如果以上几点都确定无误, 到邮件服务器网站查找帮助。 +// // +// // PS_03: 仔细看log, 认真看log, 看懂log, 错误原因都在log已说明。 +// transport.connect(myEmailAccount, myEmailPassword); +// +// // 6. 发送邮件, 发到所有的收件地址, message.getAllRecipients() 获取到的是在创建邮件对象时添加的所有收件人, 抄送人, 密送人 +// transport.sendMessage(message, message.getAllRecipients()); +// +// // 7. 关闭连接 +// transport.close(); +// } + + /** + * 创建一封只包含文本的简单邮件 + * + * @param session 和服务器交互的会话 + * @param sendMail 发件人邮箱 + * @param receiveMail 收件人邮箱 + * @return + * @throws Exception + */ + public static MimeMessage createMimeMessage(Session session, String sendMail, List receiveMail ,String title, String content) throws Exception { + // 1. 创建一封邮件 + MimeMessage message = new MimeMessage(session); + + // 2. From: 发件人(昵称有广告嫌疑,避免被邮件服务器误认为是滥发广告以至返回失败,请修改昵称) + message.setFrom(new InternetAddress(sendMail, sendMail.split("@")[0], "UTF-8")); + + // 3. To: 收件人(可以增加多个收件人、抄送、密送) + for (String email : receiveMail) { + message.addRecipient(MimeMessage.RecipientType.TO, new InternetAddress(email, email.split("@")[0], "UTF-8")); + } + // 4. Subject: 邮件主题(标题有广告嫌疑,避免被邮件服务器误认为是滥发广告以至返回失败,请修改标题) + message.setSubject(title, emailEncode); + + // 5. Content: 邮件正文(可以使用html标签)(内容有广告嫌疑,避免被邮件服务器误认为是滥发广告以至返回失败,请修改发送内容) + message.setText(content);//setContent(content, "text/html;charset=UTF-8"); + + // 6. 设置发件时间 + message.setSentDate(new Date()); + + // 7. 保存设置 + message.saveChanges(); + + return message; + } + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils.java index 7f7ce2b..78a9be1 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils.java @@ -2,6 +2,7 @@ package com.bfd.mf.job.util; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.job.config.AppConfig; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; @@ -35,7 +36,6 @@ import org.slf4j.LoggerFactory; import org.springframework.util.Assert; import org.springframework.util.CollectionUtils; -import java.math.BigInteger; import java.net.InetAddress; import java.util.HashMap; import java.util.List; @@ -79,11 +79,13 @@ public abstract class EsUtils { .setIndices(indices) .setIndicesOptions(IndicesOptions.fromOptions(true, true, true, false)) - .setTypes(type) + //.setTypes(type) .setQuery(queryBuilder) .setScroll(TimeValue.timeValueMinutes(minutes)) .setSize(size); + System.out.println(searchRequestBuilder); + long s = System.currentTimeMillis(); SearchResponse response = searchRequestBuilder.execute().actionGet(); long e = System.currentTimeMillis(); @@ -181,7 +183,8 @@ public abstract class EsUtils { */ public static String[] getIndices(String prefix, String separator, long startMills, long endMils, - String pattern, Long upperMills, String standbyIndex) { + String pattern, Long upperMills, + String standbyIndex,Long year) { List indexList = Lists.newArrayList(); LocalDateTime start = new LocalDateTime(startMills); LocalDateTime end = new LocalDateTime(endMils); @@ -190,6 +193,18 @@ public abstract class EsUtils { indexList.add(standbyIndex); start = upper; } + if(startMills < year){ + for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusYears(1)) { + String dtStr = dt.toString(DateTimeFormat.forPattern("YYYY")); + String index = new StringBuilder() + .append(prefix) + .append(separator) + .append(dtStr) + .toString(); + indexList.add(index); + } + start = new LocalDateTime(year); + } for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusDays(1)) { String dtStr = dt.toString(DateTimeFormat.forPattern(pattern)); String index = new StringBuilder() @@ -199,14 +214,44 @@ public abstract class EsUtils { .toString(); indexList.add(index); } -// indexList.add("cl_index_video"); -// indexList.add("cl_index_social"); -// indexList.add("cl_index_news"); + // 只拉主贴, + indexList.add("cl_index_item"); String[] indices = new String[indexList.size()]; indices = indexList.toArray(indices); return indices; } +// public static void main(String[] args) { +// String prefix = "cl_aaa_"; +// String separator = "-"; +// long startMills = 1083340800000L; +// long endMils = 1556640000000L; //1556640000 1546272000000L +// String pattern = AppConfig.DATE_FORMAT; +// Long upperMills = 946656000L; +// String standbyIndex = "cl_index_0"; +// String [] indexs = {}; +// if(startMills < 1546272000000L){ +// LocalDateTime start = new LocalDateTime(startMills); +// LocalDateTime end = new LocalDateTime(endMils); +// LocalDateTime upper = new LocalDateTime(upperMills); +// for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusYears(1)) { +// String dtStr = dt.toString(DateTimeFormat.forPattern("YYYY")); +// String index = new StringBuilder() +// .append(prefix) +// .append(separator) +// .append(dtStr) +// .toString(); +// System.out.println("*** "+ index); +// //indexs.add(index); +// } +// } +// startMills = 1546272000000L; +// indexs = getIndices(prefix,separator,startMills,endMils,pattern,upperMills,standbyIndex); +// for (int i = 0 ; i < indexs.length ; i ++){ +// System.out.println(indexs[i]); +// } +// } + /** * 根据indexName获取一定存在的index * 如果indexName存在则返回,不存在则创建 diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils2.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils2.java new file mode 100644 index 0000000..43e99dd --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/EsUtils2.java @@ -0,0 +1,451 @@ +package com.bfd.mf.job.util; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; +import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; +import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.action.search.ClearScrollRequestBuilder; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.client.transport.TransportClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.transport.client.PreBuiltTransportClient; +import org.joda.time.LocalDateTime; +import org.joda.time.format.DateTimeFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.util.Assert; +import org.springframework.util.CollectionUtils; + +import java.net.InetAddress; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +public abstract class EsUtils2 { + private static final Logger LOGGER = LoggerFactory.getLogger(EsUtils.class); + private static final Map CLIENT_MAP = Maps.newHashMap(); + + public static void registerCluster(String clusterName, String[] addresses) { + System.setProperty("es.set.netty.runtime.available.processors", "false"); + Assert.hasLength(clusterName, "Param clusterName must not be empty."); + Assert.notEmpty(addresses, "Param addresses must not be empty."); + Settings settings = Settings.builder() + .put("cluster.name", clusterName).build(); + TransportClient client = new PreBuiltTransportClient(settings); + try { + for (int i = 0; i < addresses.length; i++) { + String[] ipAndPort = addresses[i].split(":"); + String ip = ipAndPort[0]; + int port = Integer.parseInt(ipAndPort[1]); + client.addTransportAddress(new TransportAddress(InetAddress.getByName(ip), port)); + } + CLIENT_MAP.put(clusterName, client); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * 查询 + * produce 查询主贴的时候会调用这个方法哦 + * @param clusterName + * @param indices + * @param size + * @param consumer + */ + public static void scrollQuery(String clusterName, String indices, String type, + QueryBuilder queryBuilder, Integer size, int minutes, + Consumer> consumer) { + TransportClient client = getClient(clusterName); + + SearchRequestBuilder searchRequestBuilder = client.prepareSearch() + .setIndices(indices) + .setIndicesOptions(IndicesOptions.fromOptions(true, true, + true, false)) + .setTypes(type) + .setQuery(queryBuilder) + .setScroll(TimeValue.timeValueMinutes(minutes)) + .setSize(size); + + long s = System.currentTimeMillis(); + SearchResponse response = searchRequestBuilder.execute().actionGet(); + long e = System.currentTimeMillis(); + LOGGER.debug("First query es, size:{}, took:{} ms.", + response.getHits().getHits().length, (e - s)); + List scrollIds = Lists.newArrayList(); + while (response.getHits().getHits().length > 0) { + List dataList = Lists.newLinkedList(); + for (SearchHit hit : response.getHits().getHits()) { + dataList.add(JSON.parseObject(hit.getSourceAsString())); + } + consumer.accept(dataList); + if (dataList.size() < size) { + break; + } + String scrollId = response.getScrollId(); + scrollIds.add(scrollId); + long s1 = System.currentTimeMillis(); + response = client.prepareSearchScroll(scrollId) + .setScroll(TimeValue.timeValueMinutes(minutes)) + .execute() + .actionGet(); + long e1 = System.currentTimeMillis(); + LOGGER.debug("Query es, size:{}, took:{} ms", + response.getHits().getHits().length, (e1 - s1)); + } + if (!CollectionUtils.isEmpty(scrollIds)) { + ClearScrollRequestBuilder clearScroll = client.prepareClearScroll() + .setScrollIds(scrollIds); + client.clearScroll(clearScroll.request()); + } + } + + /** + * 查询前[size]满足条件的数据 + * + * @param clusterName + * @param index + * @param queryBuilder + * @param size + * @return 没有数据:返回null,有数据:返回数据列表 + */ + public static List query(String clusterName, String index, final QueryBuilder queryBuilder, int size) { + TransportClient client = getClient(clusterName); + + SearchResponse response = client.prepareSearch() + .setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(true, true, + true, false)) + .setSize(size) + .setFrom(0) + .setQuery(queryBuilder) + .execute().actionGet(); + if (response.getHits().totalHits > 0) { + List dataList = Lists.newLinkedList(); + SearchHit[] hits = response.getHits().getHits(); + for (int i = 0; i < hits.length; i++) { + JSONObject data = new JSONObject(); + data.putAll(hits[i].getSourceAsMap()); + dataList.add(data); + } + return dataList; + } + + return null; + } + + /** + * 根据时间范围获取index集合 + * + * @param startMills 起始时间(ms) + * @param endMils 结束时间(ms) + * @return + */ + public static String[] getIndices(String prefix, String separator, + long startMills, long endMils, String pattern) { + List indexList = Lists.newArrayList(); + LocalDateTime start = new LocalDateTime(startMills); + LocalDateTime end = new LocalDateTime(endMils); + for (LocalDateTime dt = start; dt.isBefore(end); dt = dt.plusDays(1)) { + String dtStr = dt.toString(DateTimeFormat.forPattern(pattern)); + String index = new StringBuilder() + .append(prefix) + .append(separator) + .append(dtStr) + .toString(); + indexList.add(index); + } + + String[] indices = new String[indexList.size()]; + indices = indexList.toArray(indices); + return indices; + } + + /** + * 根据时间范围获取index集合 + * + * @param startMills 起始时间(ms) + * @param endMils 结束时间(ms) + * @return + */ + public static String[] getIndices(String prefix, String separator, + long startMills, long endMils, String pattern, Long upperMills, String standbyIndex) { + List indexList = Lists.newArrayList(); + LocalDateTime start = new LocalDateTime(startMills); + LocalDateTime end = new LocalDateTime(endMils); + LocalDateTime upper = new LocalDateTime(upperMills); + if (start.isBefore(upper)) { + indexList.add(standbyIndex); + start = upper; + } + for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusDays(1)) { + String dtStr = dt.toString(DateTimeFormat.forPattern(pattern)); + String index = new StringBuilder() + .append(prefix) + .append(separator) + .append(dtStr) + .toString(); + indexList.add(index); + } + + String[] indices = new String[indexList.size()]; + indices = indexList.toArray(indices); + return indices; + } + + /** + * 根据indexName获取一定存在的index + * 如果indexName存在则返回,不存在则创建 + * + * @param clusterName + * @param indexName + * @param type + * @param mappingFile + * @return + */ +// public static String getOrCreateIndex(String clusterName, String indexName, String type, +// int shard, int replica, String mappingFile) { +// try { +// if (!EsUtils.exists(clusterName, indexName)) { +// byte[] bytes = Files.readAllBytes(Paths.get(mappingFile)); +// String mappingDef = new String(bytes); +// boolean flag = EsUtils.createIndex(clusterName, indexName, type, +// shard, replica, mappingDef); +// if (!flag) { +// throw new RuntimeException("Create index " + indexName + " error."); +// } +// } +// } catch (Exception e) { +// throw new RuntimeException(e); +// } +// +// return indexName; +// } + + /** + * index一个文档 + * + * @param clusterName + * @param indexName + * @param data + * @return + */ + public static String index(String clusterName, String indexName, String type, final JSONObject data, String idField) { + TransportClient client = getClient(clusterName); + IndexResponse response = client.prepareIndex(indexName, type) + .setSource(data, XContentType.JSON) + .setId(data.getString(idField)) + .get(); + return response.getId(); + } + + /** + * index一个文档 + * + * @param clusterName + * @param indexName + * @param dataList + * @return + */ + public static void index(String clusterName, String indexName, String type, final List dataList, String idField) { + if (CollectionUtils.isEmpty(dataList)) { + return; + } + TransportClient client = getClient(clusterName); + for (int i = 0; i < dataList.size(); i++) { + JSONObject data = dataList.get(i); + client.prepareIndex(indexName, type) + .setSource(data, XContentType.JSON) + .setId(data.getString(idField)) + .get(); + } + } + + /** + * 批量index文档 + * @param clusterName + * @param bulkItemList + * @return + */ + public static boolean bulkIndex(String clusterName, final List bulkItemList, String idField) { + if (CollectionUtils.isEmpty(bulkItemList)) { + return true; + } + TransportClient client = getClient(clusterName); + BulkRequestBuilder rb = client.prepareBulk(); + for (BulkItem item : bulkItemList) { + rb.add(client.prepareIndex(item.getIndexName(), item.getType(), item.getData().getString(idField)) + .setSource(item.getData(), XContentType.JSON)); + } + BulkResponse response = rb.get(); + LOGGER.info("Bulk index, size:{}.", bulkItemList.size()); + return response.hasFailures(); + } + + /** + * 判断索引是否存在 + * + * @param clusterName + * @param indexName + * @return + */ + public static Boolean exists(String clusterName, String indexName) { + TransportClient client = getClient(clusterName); + IndicesExistsRequest request = new IndicesExistsRequest() + .indices(indexName); + IndicesExistsResponse response = client.admin().indices().exists(request).actionGet(); + return response.isExists(); + } + + /** + * 创建一个index + * + * @param clusterName + * @param indexName + * @param type + * @param shardCount + * @param replicaCount + * @param mappingDef + * @return + */ + public static Boolean createIndex(String clusterName, String indexName, String type, + Integer shardCount, Integer replicaCount, String mappingDef) { + TransportClient client = getClient(clusterName); + CreateIndexRequest request = new CreateIndexRequest(indexName); + request.settings(Settings.builder() + .put("index.number_of_shards", shardCount) + .put("index.number_of_replicas", replicaCount) + .put("index.refresh_interval", 2, TimeUnit.SECONDS) + .put("index.analysis.filter.shingle_filter.type", "shingle") + .put("index.analysis.filter.shingle_filter.min_shingle_size", 2) + .put("index.analysis.filter.shingle_filter.max_shingle_size", 2) + .put("index.analysis.filter.shingle_filter.output_unigrams", false) + .put("index.analysis.analyzer.shingle_analyzer.type", "custom") + .put("index.analysis.analyzer.shingle_analyzer.tokenizer", "ik_smart") + .putArray("index.analysis.analyzer.shingle_analyzer.filter", "lowercase", "shingle_filter") + ); + + request.mapping(type, mappingDef, XContentType.JSON); + CreateIndexResponse createIndexResponse = client.admin().indices().create(request).actionGet(); + boolean acknowledged = createIndexResponse.isAcknowledged(); + boolean shardsAcknowledged = createIndexResponse.isShardsAcked(); + if (acknowledged && shardsAcknowledged) { + return true; + } + + return false; + } + + /** + * 删除index + * + * @param clusterName + * @param indexName + * @return + */ + public static Boolean deleteIndex(String clusterName, String indexName) { + TransportClient client = getClient(clusterName); + DeleteIndexRequest request = new DeleteIndexRequest() + .indices(indexName); + AcknowledgedResponse response = client.admin().indices().delete(request).actionGet(); + return response.isAcknowledged(); + } + + private static TransportClient getClient(String clusterName) { + return CLIENT_MAP.get(clusterName); + } + + public static BulkItem buildBulkItem(String indexName, String type, final JSONObject data) { + return new BulkItem() + .setIndexName(indexName) + .setType(type) + .setData(data); + } + + + /** + * 查询某个Index 的总量 + */ + + public static Long scrollQuery(String clusterName, String indices, String type, + QueryBuilder queryBuilder){ + Long totalHits = 0L; + try{ + TransportClient client = getClient(clusterName); + SearchRequestBuilder searchRequestBuilder = client.prepareSearch() + .setIndices(indices) + .setIndicesOptions(IndicesOptions.fromOptions(true, true, + true, false)) + .setTypes(type) + .setQuery(queryBuilder); + SearchResponse response = searchRequestBuilder.execute().actionGet(); + totalHits = response.getHits().totalHits; + }catch (Exception e){ + e.printStackTrace(); + } + return totalHits; + } + + public static class BulkItem { + String indexName; + String type; + JSONObject data; + + public String getIndexName() { + return indexName; + } + + public BulkItem setIndexName(String indexName) { + this.indexName = indexName; + return this; + } + + public String getType() { + return type; + } + + public BulkItem setType(String type) { + this.type = type; + return this; + } + + public JSONObject getData() { + return data; + } + + public BulkItem setData(JSONObject data) { + this.data = data; + return this; + } + + public BulkItem setStringData(String data) { + this.type = data; + return this; + } + } + +// public static void etl(String srcClusterName, String srcIndex, String srcType, QueryBuilder qb, +// Integer size, int minutes, +// String tarClusterName, String tarIndex, String tarType, String idField) { +// scrollQuery(srcClusterName, new String[]{srcClusterName}, srcType, qb, size, minutes, dataList -> { +// EsUtils.index(tarClusterName, tarIndex, tarType, dataList, idField); +// }); +// } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/ReadLine.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/ReadLine.java new file mode 100644 index 0000000..51a4545 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/ReadLine.java @@ -0,0 +1,239 @@ +package com.bfd.mf.job.util; + +import it.sauronsoftware.jave.Encoder; + +import javax.imageio.ImageIO; +import javax.imageio.ImageReader; +import javax.imageio.stream.FileImageInputStream; +import javax.imageio.stream.ImageInputStream; +import java.awt.image.BufferedImage; +import java.io.*; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + + +/** + * Created by BFD-229 on 2017/7/6. + */ +public class ReadLine { + + public static List readLine( File fileName){ + List list = new ArrayList (); + String line; + try { + InputStreamReader read = new InputStreamReader(new FileInputStream(fileName), "utf-8"); + BufferedReader reader = new BufferedReader(read); + while ((line = reader.readLine()) != null) { + try { + if (line.length() > 0) { + list.add(line); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + return list; + }catch (UnsupportedEncodingException e) { + e.printStackTrace(); + return null; + } catch (FileNotFoundException e) { + e.printStackTrace(); + return null; + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } + + +// public static List readLine(File fileName){ +// List list = new ArrayList (); +// String line; +// try { +// InputStreamReader read = new InputStreamReader(new FileInputStream(fileName), "utf-8"); +// BufferedReader reader = new BufferedReader(read); +// while ((line = reader.readLine()) != null) { +// try { +// if (line.length() > 0) { +// list.add(line); +// } +// } catch (Exception e) { +// e.printStackTrace(); +// } +// } +// return list; +// }catch (UnsupportedEncodingException e) { +// e.printStackTrace(); +// return null; +// } catch (FileNotFoundException e) { +// e.printStackTrace(); +// return null; +// } catch (IOException e) { +// e.printStackTrace(); +// return null; +// } +// } + + // 读取文件内容 + public static String readFile(String path){ + File file = new File(path); + StringBuilder result = new StringBuilder(); + try{ + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));//构造一个BufferedReader类来读取文件 + String s = null; + while((s = br.readLine())!=null){//使用readLine方法,一次读一行 + result.append( System.lineSeparator() + s); + } + br.close(); + }catch(Exception e){ + e.printStackTrace(); + } + return result.toString(); + } + + + public static void readFiles(File file){ + if (file.exists()) { + System.err.println("exist"); + try { + FileInputStream fis = new FileInputStream(file); + InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); + BufferedReader br = new BufferedReader(isr); + String line; + while((line = br.readLine()) != null){ + System.out.println(line); + } + br.close(); + isr.close(); + fis.close(); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + + + public static String getResolution1(File file) throws IOException { + BufferedImage image = ImageIO.read(file); + return image.getWidth() + "x" + image.getHeight(); + } + + +// public static String getResolution(File file){ +// Encoder encoder = new Encoder(); +// try { +// MultimediaInfo m = encoder.getInfo(file); +// int height = m.getVideo().getSize().getHeight(); +// int width = m.getVideo().getSize().getWidth(); +// System.out.println("width:"+width); +// System.out.println("height:" + height); +// FileInputStream fis = new FileInputStream(source); +// FileChannel fc = fis.getChannel(); +// BigDecimal fileSize = new BigDecimal(fc.size()); +// String size = fileSize.divide(new BigDecimal(1048576), 2, RoundingMode.HALF_UP) + "MB"; +// System.out.println("size:" + size); +// long duration = m.getDuration()/1000; +// System.out.println("duration:" + duration + "s"); +// } catch (Exception e) { +// e.printStackTrace(); +// } +// } + + public static String getImageDim(String path) { + String result = null; + String suffix = getFileSuffix(path); + //解码具有给定后缀的文件 + Iterator iter = ImageIO.getImageReadersBySuffix(suffix); + // System.out.println(ImageIO.getImageReadersBySuffix(suffix)); + if (iter.hasNext()) { + ImageReader reader = iter.next(); + try { + ImageInputStream stream = new FileImageInputStream(new File(path)); + reader.setInput(stream); + int width = reader.getWidth(reader.getMinIndex()); + int height = reader.getHeight(reader.getMinIndex()); + result = width + "×" + height; + } catch (IOException e) { + e.printStackTrace(); + } finally { + reader.dispose(); + } + } + // System.out.println("getImageDim:" + result); + return result; + } + + private static String getFileSuffix(final String path) { + String result = null; + if (path != null) { + result = ""; + if (path.lastIndexOf('.') != -1) { + result = path.substring(path.lastIndexOf('.')); + if (result.startsWith(".")) { + result = result.substring(1); + } + } + } + // System.out.println("getFileSuffix:" + result); + return result; + } + + + public static String videosize(String video) { + File source = new File(video); + Encoder encoder = new Encoder(); + try { + it.sauronsoftware.jave.MultimediaInfo m = encoder.getInfo(source); + return m.getVideo().getSize().getHeight() + "×" + m.getVideo().getSize().getWidth(); + } catch (Exception e) { + e.printStackTrace(); + return null; + } + } + + + +// public static String getVideoTime (String path){ +// File source = new File(path); +// Encoder encoder = new Encoder(); +// File[] file = source.listFiles(); +// long sum =0; +// for (File file2 : file) { +// try { +// MultimediaInfo m = encoder.getInfo(file2); +// long ls = m.getDuration()/1000; //ls是获取到的秒数 +// sum += ls; +// } catch (Exception e) { +// e.printStackTrace(); +// } +// } +// double sum1 = (double)sum; +// double sum2 =sum1/3600;// 转换成为了小时 +// System.out.println(sum2); +// return sum2+""; +// } +// + + +// public static byte[] readFile(String path){ +// try { +// FileInputStream fileInputStream = new FileInputStream(path); +// BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(fileInputStream)); +// String line = null; +// while ((line = bufferedReader.readLine()) != null) { +// System.out.println(line); +// } +// fileInputStream.close(); +// }catch (Exception e){ +// e.printStackTrace(); +// } +// } + + + +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/util/ZipUtils.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/ZipUtils.java new file mode 100644 index 0000000..f191293 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/util/ZipUtils.java @@ -0,0 +1,119 @@ +package com.bfd.mf.job.util; + +import com.bfd.mf.job.worker.UpLoadProducer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.*; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import static org.apache.lucene.store.BufferedIndexInput.BUFFER_SIZE; + +public class ZipUtils { + private static final Logger LOGGER = LoggerFactory.getLogger(ZipUtils.class); + /** + * zip解压 + * @param srcFile zip源文件 + * @param destDirPath 解压后的目标文件夹 + * @throws RuntimeException 解压失败会抛出运行时异常 + */ + + public static Map> unZip(File srcFile, String destDirPath) throws RuntimeException { + Map> fileNameMap = new HashMap<>(); + long start = System.currentTimeMillis(); + // 判断源文件是否存在 + if (!srcFile.exists()) { + return fileNameMap; + // throw new RuntimeException(srcFile.getPath() + "所指文件不存在"); + } + // 开始解压 + ZipFile zipFile = null; + try { + zipFile = new ZipFile(srcFile); + Enumeration entries = zipFile.entries(); + while (entries.hasMoreElements()) { + ZipEntry entry = (ZipEntry) entries.nextElement(); + // System.out.println("解压后文件名称 :" + entry.getName()); + List fileNameList = new ArrayList<>(); + if(entry.getName().contains(".xlsx")){ + fileNameList.add(entry.getName()); + fileNameMap.put("excelName",fileNameList); + }else if(entry.getName().contains("txt")){ + fileNameList.add(entry.getName()); + fileNameMap.put("excelName",fileNameList); + }else{ + if(entry.getName().contains("/")) { + String files[] = entry.getName().split("/"); + String key = entry.getName().split("/")[0]; + if (files.length >1) { + String value = entry.getName().split("/")[1]; + if (fileNameMap.containsKey(key)) { + fileNameList = fileNameMap.get(key); + fileNameList.add(value); + fileNameMap.put(key, fileNameList); + } else { + fileNameList.add(value); + fileNameMap.put(key, fileNameList); + } + } + } + } + // 如果是文件夹,就创建个文件夹 + if (entry.isDirectory()) { + String dirPath = destDirPath + "/" + entry.getName(); + File dir = new File(dirPath); + dir.mkdirs(); + } else { + // 如果是文件,就先创建一个文件,然后用io流把内容copy过去 + File targetFile = new File(destDirPath + "/" + entry.getName()); + // 保证这个文件的父文件夹必须要存在 + if(!targetFile.getParentFile().exists()){ + targetFile.getParentFile().mkdirs(); + } + targetFile.createNewFile(); + // 将压缩文件内容写入到这个文件中 + InputStream is = zipFile.getInputStream(entry); + FileOutputStream fos = new FileOutputStream(targetFile); + int len; + byte[] buf = new byte[BUFFER_SIZE]; + while ((len = is.read(buf)) != -1) { + fos.write(buf, 0, len); + } + // 关流顺序,先打开的后关闭 + fos.close(); + is.close(); + } + } + long end = System.currentTimeMillis(); + LOGGER.info("解压完成,耗时:" + (end - start) +" ms"); + } catch (Exception e) { + e.printStackTrace(); + throw new RuntimeException("unzip error from ZipUtils", e); + } finally { + if(zipFile != null){ + try { + zipFile.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + return fileNameMap; + } + + + public static String getZipFileName(String zipName, String zipPath) { + String zipFileName = zipName.replace(".zip",""); + // 判断zip这个文件夹是否存在,不存在则创建 + File zipFile=new File(zipPath+zipFileName); + if(!zipFile.exists()){//如果文件夹不存在 + zipFile.mkdir();//创建文件夹 + } + return zipFileName; + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java new file mode 100644 index 0000000..ad04f67 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/AlarmProducer.java @@ -0,0 +1,38 @@ +package com.bfd.mf.job.worker; + +import com.bfd.mf.job.config.AppConfig; +import com.bfd.mf.job.service.alarm.AlarmService; +import com.bfd.mf.job.service.taskCount.TaskCountService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +@Component +public class AlarmProducer extends AbstractWorker { + private static final Logger LOGGER = LoggerFactory.getLogger(AlarmProducer.class); + + @Autowired + private AppConfig config; + @Autowired + private AlarmService alarmService; + @Override + protected Integer getThreadCount() { + return config.getAlarmProducerThreadCount(); + } + @Override + protected String getThreadNameFormat() { + return "alarm-producer-%d"; + } + + @Override + protected void work(String json) { + LOGGER.info("[AlarmProducer] work start ... "); + alarmService.produce(); + try { + Thread.sleep(config.getIntervalTime()); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/BacktraceProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/BacktraceProducer.java index c2f06ea..d9a996c 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/BacktraceProducer.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/BacktraceProducer.java @@ -1,8 +1,7 @@ package com.bfd.mf.job.worker; import com.bfd.mf.job.config.AppConfig; -import com.bfd.mf.job.domain.repository.SubjectRepository; -import com.bfd.mf.job.service.BacktraceService; +import com.bfd.mf.job.service.backtrace.BacktraceService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/QueryProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/QueryProducer.java index 8fe3be9..14bf7da 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/QueryProducer.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/QueryProducer.java @@ -1,7 +1,7 @@ package com.bfd.mf.job.worker; import com.bfd.mf.job.config.AppConfig; -import com.bfd.mf.job.service.QueryService; +import com.bfd.mf.job.service.query.QueryService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -31,13 +31,13 @@ public class QueryProducer extends AbstractWorker { */ @Override protected void work(String json) { - LOGGER.info("[QueryProducer] work start ... "); + // LOGGER.info("[QueryProducer] work start ... "); queryBacktraceService.tryAcquire(); queryBacktraceService.produce(); - try { - Thread.sleep(300000); - } catch (InterruptedException e) { - e.printStackTrace(); - } +// try { +// Thread.sleep(config.getIntervalTime()); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } } } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/ReadWriterOlyDataProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/ReadWriterOlyDataProducer.java index f65c48c..4b436e7 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/ReadWriterOlyDataProducer.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/ReadWriterOlyDataProducer.java @@ -1,7 +1,6 @@ package com.bfd.mf.job.worker; import com.bfd.mf.job.config.AppConfig; -import com.bfd.mf.job.service.BacktraceService; import com.bfd.mf.job.service.WriterTXTService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/SQOutPutProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/SQOutPutProducer.java new file mode 100644 index 0000000..2e6a3fa --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/SQOutPutProducer.java @@ -0,0 +1,40 @@ +package com.bfd.mf.job.worker; + +import com.bfd.mf.job.config.AppConfig; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +@Component +public class SQOutPutProducer extends AbstractWorker { + private static final Logger LOGGER = LoggerFactory.getLogger(SQOutPutProducer.class); + + @Autowired + private AppConfig config; + + @Override + protected Integer getThreadCount() { + return config.getQueryProducerThreadCount(); + } + + @Override + protected String getThreadNameFormat() { + return "backtrace-producer-%d"; + } + + /** + * 这个 是用来 做数据拉取的,专门针对专题数据的拉取 + */ + @Override + protected void work(String json) { + LOGGER.info("[SQ - OutPutProducer] work start ... "); +// outputService.tryAcquire(); +// outputService.produce(); + try { + Thread.sleep(config.getIntervalTime()); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/StatisticsProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/StatisticsProducer.java index 78c83f2..5ad63f7 100644 --- a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/StatisticsProducer.java +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/StatisticsProducer.java @@ -1,7 +1,7 @@ package com.bfd.mf.job.worker; import com.bfd.mf.job.config.AppConfig; -import com.bfd.mf.job.service.StatisticsService; +import com.bfd.mf.job.service.statistics.StatisticsService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -29,7 +29,7 @@ public class StatisticsProducer extends AbstractWorker { LOGGER.info("[StatisticsProducer] work start ... "); statisticsService.tryAcquire(); try { - Thread.sleep(3600000); + Thread.sleep(config.getIntervalTime()); } catch (InterruptedException e) { e.printStackTrace(); } diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/TaskCountProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/TaskCountProducer.java new file mode 100644 index 0000000..9a9b0a5 --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/TaskCountProducer.java @@ -0,0 +1,40 @@ +package com.bfd.mf.job.worker; + +import com.bfd.mf.job.config.AppConfig; +import com.bfd.mf.job.service.statistics.StatisticsService; +import com.bfd.mf.job.service.taskCount.TaskCountService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +@Component +public class TaskCountProducer extends AbstractWorker { + private static final Logger LOGGER = LoggerFactory.getLogger(TaskCountProducer.class); + + @Autowired + private AppConfig config; + @Autowired + private TaskCountService taskCountService; + @Override + protected Integer getThreadCount() { + return config.getTaskcountProducerThreadCount(); + } + @Override + protected String getThreadNameFormat() { + return "backtrace-producer-%d"; + } + + @Override + protected void work(String json) { + LOGGER.info("[TaskCountProducer] work start ... "); + taskCountService.tryAcquire(); + try { + for(int i = 0 ; i < 49 ; i ++) { + Thread.sleep(config.getIntervalTime()); // 86400000000 + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + } +} diff --git a/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/UpLoadProducer.java b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/UpLoadProducer.java new file mode 100644 index 0000000..4b8522c --- /dev/null +++ b/cl_query_data_job/src/main/java/com/bfd/mf/job/worker/UpLoadProducer.java @@ -0,0 +1,68 @@ +package com.bfd.mf.job.worker; + +import com.bfd.mf.job.config.AppConfig; +import com.bfd.mf.job.service.upload.UpLoadService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Component; + +import java.util.concurrent.ThreadPoolExecutor; + +@Component +public class UpLoadProducer extends AbstractWorker { + private static final Logger LOGGER = LoggerFactory.getLogger(UpLoadProducer.class); + + @Autowired + private AppConfig config; + @Autowired + private UpLoadService upLoadService; + +// @Override +// public void start() { +// super.start(); +// Thread t = new Thread(() -> { +// while (running) { +// try { +// upLoadService.flushConsumer(); // 刷新 +// } catch (Exception e) { +// LOGGER.error("Flush consumer error due to [{}].", +// e.getMessage(), e); +// } +// } +// }); +// t.setDaemon(true); +// t.start(); +// ThreadPoolExecutor executor = buildExecutor(1, TASK_MAX_COUNT, "bulk-%d"); +// //ThreadPoolExecutor executor = buildExecutor(config.esTargetBulkThreadCount(), TASK_MAX_COUNT, "bulk-%d"); +// // for (int i = 0; i < config.esTargetBulkThreadCount(); i++) { +// for (int i = 0; i < 1; i++) { +// executor.submit(() -> { +// while (running) { +// try { +// upLoadService.flushData(); +// } catch (Exception e) { +// LOGGER.error("Flush data error due to [{}].", +// e.getMessage(), e); +// } +// } +// }); +// } +// } + + + @Override + protected Integer getThreadCount() { + return config.getUpLoadProducerThreadCount(); + } + @Override + protected String getThreadNameFormat() { + return "backtrace-producer-%d"; + } + + @Override + protected void work(String json) { + upLoadService.tryAcquire(); + upLoadService.produce(); + } +} diff --git a/cl_query_data_job/src/main/resources/application-107.yml b/cl_query_data_job/src/main/resources/application-107.yml new file mode 100644 index 0000000..ae23631 --- /dev/null +++ b/cl_query_data_job/src/main/resources/application-107.yml @@ -0,0 +1,70 @@ +debug: false + +logging: + level: + com.bfd.mf: debug +spring: + datasource: + driver-class-name: com.mysql.jdbc.Driver + username: root + password: baifendian + url: jdbc:mysql://192.168.162.48:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + hikari: + maximum-pool-size: 10 + minimum-idle: 1 + + +worker: + version: 3.0.1 + enable-test: false + test-thread-count: 10 + test-task-id: 180 + ## 数据默认要写的 kafka + broker-list: 172.18.1.113:9092 + analysis-topic: + - sq_topic_cl_query_analysis_1 + + analysis-group: sq_group_cl_analysis_1 + ## 服务的状态,true 为启动 + enable-analysis-producer: false + enable-analysis-consumer: false + enable-statistics-producer: false + enable-query-producer: false + enable-backtrace-producer: false + enable-rw-oly-producer: false + enable-up-load-producer: true + ## 启动服务的线程数 + statistics-producer-thread-count: 1 + query-producer-thread-count: 5 + backtrace-producer-thread-count: 10 + rw-oly-producer-thread-count: 1 + up-load-producer-thread-count: 1 + + period-s: 5 + interval-time: 1800000 + + rule-rest: http://rule.sq.baifendian.com/data_match/content/ + comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask + rule-rest-concurrency: 500 + content-limit: 2000 + failure-upper: 2000 + + goFastPostUrl : http://192.168.162.107:18080/group1/upload + goFastDomain : http://192.168.162.107:18080 + uploadOLYExcelPath : /opt/nfsdata/excelTask/ + uploadZipPath : /opt/nfsdata/uploadFiles/ + indexNamePre : cl_major_ + + es-mini: + name: sicluster + address: 192.168.162.107:19300 + upper: 2018-09-01 + standby: cl_major_ + bulk-thread-count: 5 + bulk-rate: 3 + bulk-size: 100 + es-normal: + name: sicluster + address: 192.168.162.107:19300 + upper: 2018-09-01 + standby: cl_index_* diff --git a/cl_query_data_job/src/main/resources/application-113.yml b/cl_query_data_job/src/main/resources/application-113.yml new file mode 100644 index 0000000..d0ebb3e --- /dev/null +++ b/cl_query_data_job/src/main/resources/application-113.yml @@ -0,0 +1,70 @@ +debug: false + +logging: + level: + com.bfd.mf: debug +spring: + datasource: + driver-class-name: com.mysql.jdbc.Driver + username: root + password: bfd123 + url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + hikari: + maximum-pool-size: 10 + minimum-idle: 1 + + +worker: + version: 3.0.1 + enable-test: false + test-thread-count: 10 + test-task-id: 180 + ## 数据默认要写的 kafka + broker-list: 172.18.1.113:9092 + analysis-topic: + - sq_topic_cl_query_analysis_1 + + analysis-group: sq_group_cl_analysis_1 + ## 服务的状态,true 为启动 + enable-analysis-producer: false + enable-analysis-consumer: false + enable-statistics-producer: true + enable-query-producer: false + enable-backtrace-producer: false + enable-rw-oly-producer: false + enable-up-load-producer: false + ## 启动服务的线程数 + statistics-producer-thread-count: 1 + query-producer-thread-count: 5 + backtrace-producer-thread-count: 10 + rw-oly-producer-thread-count: 1 + up-load-producer-thread-count: 3 + + period-s: 5 + interval-time: 1800000 + + rule-rest: http://rule.sq.baifendian.com/data_match/content/ + comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask + rule-rest-concurrency: 500 + content-limit: 2000 + failure-upper: 2000 + + goFastPostUrl : http://172.18.1.113:8080/upload + goFastDomain : http://172.18.1.113:8080 + uploadOLYExcelPath : /opt/nfsdata/excelTask/ + uploadZipPath : /opt/nfsdata/uploadFiles/ + indexNamePre : cl_major_ + + es-mini: + name: SQ_Mini + address: 172.26.11.111:9301 + upper: 2018-09-01 + standby: cl_major_ + bulk-thread-count: 5 + bulk-rate: 3 + bulk-size: 100 + es-normal: + name: SQ_Normal + address: 172.26.11.109:9301 + upper: 2018-09-01 + standby: cl_index_* diff --git a/cl_query_data_job/src/main/resources/application-134.yml b/cl_query_data_job/src/main/resources/application-134.yml new file mode 100644 index 0000000..be259dc --- /dev/null +++ b/cl_query_data_job/src/main/resources/application-134.yml @@ -0,0 +1,75 @@ +debug: false + +logging: + level: + com.bfd.mf: debug +spring: + datasource: + driver-class-name: com.mysql.jdbc.Driver + username: root + password: Bfd123!@# + url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + hikari: + maximum-pool-size: 10 + minimum-idle: 1 + + +worker: + version: 3.0.1 + enable-test: false + test-thread-count: 10 + test-task-id: 180 + ## 数据默认要写的 kafka + broker-list: 172.18.1.113:9092 + analysis-topic: + - sq_topic_cl_query_analysis_1 + + analysis-group: sq_group_cl_analysis_1 + ## 服务的状态,true 为启动 + enable-analysis-producer: false + enable-analysis-consumer: false + enable-statistics-producer: false + enable-query-producer: false + enable-backtrace-producer: false + enable-rw-oly-producer: false + enable-up-load-producer: false + enable-output-producer: true + ## 启动服务的线程数 + statistics-producer-thread-count: 1 + query-producer-thread-count: 1 + backtrace-producer-thread-count: 1 + rw-oly-producer-thread-count: 1 + up-load-producer-thread-count: 1 + output-producer-thread-count: 1 + + period-s: 5 + interval-time: 1800000 + + rule-rest: http://rule.sq.baifendian.com/data_match/content/ + comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask + rule-rest-concurrency: 500 + content-limit: 2000 + failure-upper: 2000 + + goFastPostUrl : http://172.18.1.113:8080/upload + goFastDomain : http://172.18.1.113:8080 + uploadOLYExcelPath : /opt/nfsdata/excelTask/ + uploadZipPath : /opt/nfsdata/uploadFiles/ + indexNamePre : cl_major_ + + es-normal: + name: SQ_Normal_new + address: 172.18.1.134:9301 + upper: 2018-09-01 + standby: cl_index_0 + es-reply-source: + name: SQ_Mini_new + address: 172.18.1.148:9303 + upper: 2018-09-01 + standby: cl_index_0 + es-mini: + name: SQ_Mini + address: 172.18.1.147:9313 + bulk-thread-count: 5 + bulk-rate: 3 + bulk-size: 100 diff --git a/cl_query_data_job/src/main/resources/application-prod.yml b/cl_query_data_job/src/main/resources/application-prod.yml new file mode 100644 index 0000000..ef5fddc --- /dev/null +++ b/cl_query_data_job/src/main/resources/application-prod.yml @@ -0,0 +1,67 @@ +debug: false + +logging: + level: + com.bfd.mf: debug +spring: + datasource: + driver-class-name: com.mysql.jdbc.Driver + username: root + password: Bfd123!@# + url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + hikari: + maximum-pool-size: 10 + minimum-idle: 1 + + +worker: + version: 3.0.1 + enable-test: false + test-thread-count: 10 + test-task-id: 180 + ## 数据默认要写的 kafka + broker-list: 172.18.1.113:9092 + analysis-topic: + - sq_topic_cl_query_analysis_1 + + analysis-group: sq_group_cl_analysis_1 + ## 服务的状态,true 为启动 + enable-analysis-producer: false + enable-analysis-consumer: false + enable-statistics-producer: false + enable-query-producer: true + enable-backtrace-producer: false + enable-rw-oly-producer: false + enable-up-load-producer: false + ## 启动服务的线程数 + statistics-producer-thread-count: 1 + query-producer-thread-count: 5 + backtrace-producer-thread-count: 3 + rw-oly-producer-thread-count: 1 + up-load-producer-thread-count: 1 + + period-s: 5 + interval-time: 1800000 + + rule-rest: http://rule.sq.baifendian.com/data_match/content/ + comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask + rule-rest-concurrency: 500 + content-limit: 2000 + failure-upper: 2000 + + es-normal: + name: SQ_Normal_new + address: 172.18.1.134:9301 + upper: 2018-09-01 + standby: cl_index_0 + es-reply-source: + name: SQ_Mini_new + address: 192.168.67.148:9303 + upper: 2018-09-01 + standby: cl_index_0 + es-mini: + name: SQ_Mini + address: 172.18.1.147:9313 + bulk-thread-count: 5 + bulk-rate: 3 + bulk-size: 100 diff --git a/cl_query_data_job/src/main/resources/application-test.yml b/cl_query_data_job/src/main/resources/application-test.yml new file mode 100644 index 0000000..bab584e --- /dev/null +++ b/cl_query_data_job/src/main/resources/application-test.yml @@ -0,0 +1,61 @@ +debug: false + +logging: + level: + com.bfd.mf: debug +spring: + datasource: + driver-class-name: com.mysql.jdbc.Driver + username: root + password: baifendian + url: jdbc:mysql://192.168.94.24:6446/intelligent_schema?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + hikari: + maximum-pool-size: 10 + minimum-idle: 1 + +worker: + version: 2.2.3 + enable-test: false + test-thread-count: 10 + test-task-id: 180 + ## 数据默认要写的 kafka + broker-list: 172.18.1.113:9092 + analysis-topic: + - sq_topic_cl_query_analysis_1 + + analysis-group: sq_group_cl_analysis_1 + ## 服务的状态,true 为启动 + enable-analysis-producer: false + enable-analysis-consumer: false + enable-statistics-producer: false + enable-query-producer: true + enable-backtrace-producer: false + ## 启动服务的线程数 + statistics-producer-thread-count: 1 + query-producer-thread-count: 5 + backtrace-producer-thread-count: 3 + + period-s: 5 + interval-time: 1800000 + + rule-rest: http://rule.sq.baifendian.com/data_match/content/ + comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask + rule-rest-concurrency: 500 + content-limit: 2000 + failure-upper: 2000 + es-normal: + name: SQ_Normal + address: 172.26.11.109:9301 + upper: 2018-09-01 + standby: cl_index_0 + es-reply-source: + name: SQ_Mini_new + address: 192.168.67.148:9303 + upper: 2018-09-01 + standby: cl_index_reply_0 + es-mini: + name: SQ_Mini + address: 172.26.11.111:9301 + bulk-thread-count: 5 + bulk-rate: 3 + bulk-size: 100 diff --git a/cl_query_data_job/src/main/resources/application.yml b/cl_query_data_job/src/main/resources/application.yml index d1e8e56..5f36ade 100644 --- a/cl_query_data_job/src/main/resources/application.yml +++ b/cl_query_data_job/src/main/resources/application.yml @@ -13,13 +13,15 @@ spring: maximum-pool-size: 10 minimum-idle: 1 + worker: - version: 2.2.3 + version: 3.0.1 enable-test: false test-thread-count: 10 test-task-id: 180 ## 数据默认要写的 kafka broker-list: 172.18.1.113:9092 + send-topic : databasestokafka analysis-topic: - sq_topic_cl_query_analysis_1 @@ -27,54 +29,59 @@ worker: ## 服务的状态,true 为启动 enable-analysis-producer: false enable-analysis-consumer: false - enable-statistics-producer: true + enable-statistics-producer: false enable-query-producer: false enable-backtrace-producer: false enable-rw-oly-producer: false + enable-up-load-producer: false + enable-output-producer: false + enable-taskcount-producer: false + enable-alarm-producer: true ## 启动服务的线程数 statistics-producer-thread-count: 1 - query-producer-thread-count: 5 - backtrace-producer-thread-count: 3 + query-producer-thread-count: 10 + backtrace-producer-thread-count: 1 rw-oly-producer-thread-count: 1 + up-load-producer-thread-count: 1 + output-producer-thread-count: 1 + taskcount-producer-thread-count: 1 + alarm-producer-thread-count: 1 period-s: 5 + interval-time: 1800000 + # 拉数年份查询的开始时间,现在是19年之前的合并成年了,这个时间就是:2019-01-01 00:00:00 + query-data-year-starttime: 1546272000000 rule-rest: http://rule.sq.baifendian.com/data_match/content/ comment-rest: http://rule.sq.baifendian.com/reputation/addReputationTask rule-rest-concurrency: 500 content-limit: 2000 failure-upper: 2000 + + goFastPostUrl : http://172.18.1.113:8080/upload + goFastDomain : http://172.18.1.113:8080 + uploadOLYExcelPath : /opt/nfsdata/excelTask/ + uploadZipPath : /opt/nfsdata/uploadFiles/ + indexNamePre : cl_major_ + es-normal: name: SQ_Normal_new address: 172.18.1.134:9301 - upper: 2018-09-01 + upper: 2000-01-01 standby: cl_index_0 es-reply-source: name: SQ_Mini_new - address: 192.168.67.148:9303 - upper: 2018-09-01 - standby: cl_index_reply_0 + address: 172.18.1.148:9303 + upper: 2000-01-01 + standby: cl_index_0 es-mini: name: SQ_Mini address: 172.18.1.147:9313 bulk-thread-count: 5 bulk-rate: 3 bulk-size: 100 - - - - # analysis-producer-thread-count: 32 - # analysis-consumer-thread-count: 64 - # analysis-producer-thread-count: 5 - # analysis-consumer-thread-count: 64 - # statistics-consumer-thread-count: 1 - - # enable-company: false - # company-thread-count: 32 - # enable-company-producer: false - # enable-company-consumer: false - - # company-group: sq_group_cl_label_company - # company-producer-thread-count: 32 - # company-consumer-thread-count: 64 - # enable-zombie: false + es-logstash: + name: SQ_Log + address: 172.26.11.111:9301 + upper: 2021-01-01 + standby: logstash-2021.05.13 diff --git a/cl_query_data_job/src/main/resources/log4j.properties b/cl_query_data_job/src/main/resources/log4j.properties new file mode 100644 index 0000000..8ca672a --- /dev/null +++ b/cl_query_data_job/src/main/resources/log4j.properties @@ -0,0 +1,18 @@ +log4j.rootLogger=INFO, error + +###### error appender definition ####### +log4j.appender.error=org.apache.log4j.DailyRollingFileAppender +log4j.appender.error.File=logs/sdkclient_error.log +log4j.appender.error.Append=true +log4j.appender.error.DatePattern='.'yyyy-MM-dd-HH +log4j.appender.error.layout=org.apache.log4j.PatternLayout +log4j.appender.error.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p [%c] %m%n + +#error only +log4j.appender.error.filter.F1=org.apache.log4j.varia.LevelRangeFilter +log4j.appender.error.filter.F1.LevelMin=ERROR +log4j.appender.error.filter.F1.LevelMax=ERROR +# +log4j.appender.error.filter.F2=org.apache.log4j.varia.LevelMatchFilter +log4j.appender.error.filter.F2.levelToMatch=WARN +log4j.appender.error.filter.F2.acceptOnMatch=false \ No newline at end of file diff --git a/cl_query_data_job/src/test/java/com/bfd/mf/job/service/upload/ParseExcelServiceTest.java b/cl_query_data_job/src/test/java/com/bfd/mf/job/service/upload/ParseExcelServiceTest.java new file mode 100644 index 0000000..57473d8 --- /dev/null +++ b/cl_query_data_job/src/test/java/com/bfd/mf/job/service/upload/ParseExcelServiceTest.java @@ -0,0 +1,159 @@ +package com.bfd.mf.job.service.upload; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.job.config.AllKeys; +import com.bfd.mf.job.config.ESConstants; +import com.bfd.mf.job.util.DataCheckUtil; +import com.monitorjbl.xlsx.StreamingReader; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; + +import java.io.FileInputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class ParseExcelServiceTest { + + public void testLoad(String filePath,String crawlDataFlag){ + try { + FileInputStream in = new FileInputStream(filePath); + Workbook wk = StreamingReader.builder() + .rowCacheSize(100) //缓存到内存中的行数,默认是10 + .bufferSize(4096) //读取资源时,缓存到内存的字节大小,默认是1024 + .open(in); //打开资源,必须,可以是InputStream或者是File,注意:只能打开XLSX格式的文件 + Sheet sheet = wk.getSheetAt(0); + + //遍历所有的行 + List resultList = new ArrayList<>(); + for (Row row : sheet) { + System.out.println(row.getRowNum()); + if(row.getRowNum() == 0){ + System.out.println("第 0 行不处理"); + }else { + JSONObject resultJson = new JSONObject(); + Map resultMap = AllKeys.getMap(); + String dataId = String.valueOf(row.getCell(0).getStringCellValue()); + resultMap.put(ESConstants.DATA_ID, dataId); + resultMap.put(ESConstants._ID, dataId); + resultMap.put(ESConstants.DOC_ID, String.valueOf(row.getCell(1).getStringCellValue())); + resultMap.put(ESConstants.CHANNEL, String.valueOf(row.getCell(2).getStringCellValue())); + resultMap.put(ESConstants.SOURCE, String.valueOf(row.getCell(3).getStringCellValue())); + resultMap.put(ESConstants.EN_SOURCE, String.valueOf(row.getCell(4).getStringCellValue())); + resultMap.put(ESConstants.URL, String.valueOf(row.getCell(5).getStringCellValue())); + resultMap.put(ESConstants.TITLE, String.valueOf(row.getCell(6).getStringCellValue())); + resultMap.put(ESConstants.TRANSLATETITLE, String.valueOf(row.getCell(7).getStringCellValue())); + // 发表时间的 4 个字段 + String pubTimeStr = String.valueOf(row.getCell(8).getStringCellValue()); + long pubTime = DataCheckUtil.convertDateTotime(pubTimeStr) * 1000; + long pubDay = DataCheckUtil.getDay(pubTime); + String pubDate = DataCheckUtil.getDate(pubTime); + resultMap.put(ESConstants.PUBTIME, pubTime); + resultMap.put(ESConstants.PUBTIMESTR, pubTimeStr); + resultMap.put(ESConstants.PUBDAY, pubDay); + resultMap.put(ESConstants.PUBDATE, pubDate); + + resultMap.put(ESConstants.AUTHOR, String.valueOf(row.getCell(9).getStringCellValue())); + resultMap.put(ESConstants.AUTHORID, String.valueOf(row.getCell(10).getStringCellValue())); + resultMap.put(ESConstants.CONTENT, String.valueOf(row.getCell(11).getStringCellValue())); + resultMap.put(ESConstants.TRANSLATECONTENT, String.valueOf(row.getCell(12).getStringCellValue())); + resultMap.put(ESConstants.PRICE, String.valueOf(row.getCell(13).getStringCellValue())); + resultMap.put(ESConstants.PRODUCTPARAMETER, String.valueOf(row.getCell(14).getStringCellValue())); + // 抓取时间的 4 个字段 + String crawlTimeStr = String.valueOf(row.getCell(15).getStringCellValue()); + long crawlTime = System.currentTimeMillis(); + if (!crawlTimeStr.contains("1970")) { + crawlTime = DataCheckUtil.convertDateTotime(crawlTimeStr) * 1000; + } else { + crawlTimeStr = DataCheckUtil.getCurrentTime(crawlTime); + } + long crawlDay = DataCheckUtil.getDay(crawlTime); + String crawlDate = DataCheckUtil.getDate(crawlTime); + + resultMap.put(ESConstants.CRAWLTIME, crawlTime); + resultMap.put(ESConstants.CRAWLTIMESTR, crawlTimeStr); + resultMap.put(ESConstants.CRAWLDAY, crawlDay); + resultMap.put(ESConstants.CRAWLDATE, crawlDate); + // crawlDataFlag 这个字段值不用数据中原有的,而是要用页面传过来的,不然任务查询的时候查不到数据 + resultMap.put(ESConstants.CRAWLDATAFLAG, crawlDataFlag); + resultMap.put(ESConstants.SYS_SENTIMENT, String.valueOf(row.getCell(17).getStringCellValue())); + // 提取的关键字字段的值 + String hlKeywords = row.getCell(18).getStringCellValue(); + List hl = getHlKeywords(hlKeywords); + resultMap.put(ESConstants.HL_KEYWORDS, hl); + // 转发、评论、点赞 + String quoteCount = row.getCell(19).getStringCellValue(); + if (quoteCount.equals("")) { + quoteCount = "0"; + } + resultMap.put("quoteCount", Integer.valueOf(quoteCount)); + String commentsCount = row.getCell(20).getStringCellValue(); + if (commentsCount.equals("")) { + commentsCount = "0"; + } + resultMap.put("commentsCount", Integer.valueOf(commentsCount)); + String attitudesCount = row.getCell(21).getStringCellValue(); + if (attitudesCount.equals("")) { + attitudesCount = "0"; + } + resultMap.put("attitudesCount", Integer.valueOf(attitudesCount)); + // 插入时间的 4个字段 + long createTime = System.currentTimeMillis(); + resultMap.put(ESConstants.CREATETIME, createTime); + resultMap.put(ESConstants.CREATETIMESTR, DataCheckUtil.getCurrentTime(createTime)); + resultMap.put(ESConstants.CREATEDAY, DataCheckUtil.getDay(createTime)); + resultMap.put(ESConstants.CREATEDATE, DataCheckUtil.getDate(createTime)); + + // 根据路径和数据ID,读取附件,组装附件的字段值 + // resultMap = getPathSize(path, dataId, resultMap, fileNameMap); + + System.out.println("The Result: " + JSONObject.toJSONString(resultMap)); + resultJson.putAll(resultMap); + resultList.add(resultJson); + } + // 一条一条的数据插入 + // uploadData(subjectId,resultJson); + } + System.out.println("========================================================="); +// for (Row row : sheet) { +// System.out.println("开始遍历第" + row.getRowNum() + "行数据:"); +// if(row.getRowNum() == 0){ +// System.out.println("第0 行 不处理"); +// +// } +// //遍历所有的列 +// for (Cell cell : row) { +// System.out.print(cell.getStringCellValue() + " "); +// } +// System.out.println(" "); +// } + }catch (Exception e){ + e.printStackTrace(); + } + } + + private List getHlKeywords(String hlKeywords) { + List hl = new ArrayList<>(); + if (null != hlKeywords ) { + if (hlKeywords.toString().equals("[]")) { + return hl; + } else { + if (hlKeywords.toString().contains(",")) { + String hlk[] = hlKeywords.toString().replace("[", "").replace("]", "").replace("\"", "").split(","); + hl = Arrays.asList(hlk); + } else { + String hlk = hlKeywords.toString().replace("[", "").replace("]", ""); + hl.add(hlk); + } + } + } + return hl; + } + + public static void main(String[] args) throws Exception { + ParseExcelServiceTest t = new ParseExcelServiceTest(); + t.testLoad("E:\\opt\\nfsdata\\uploadFiles\\海外演示_0224_001/海外演示_0224_001.xlsx","测试123"); + } +} diff --git a/cl_search_api/.idea/workspace.xml b/cl_search_api/.idea/workspace.xml index db2ff23..4adfa0b 100644 --- a/cl_search_api/.idea/workspace.xml +++ b/cl_search_api/.idea/workspace.xml @@ -11,8 +11,8 @@

- * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.bfd.mf.common.util.enums; - - -public enum DataExportStatus { - INIT(0, "Initial", "等待处理"), - PROCESSING(1, "Processing", "处理中"), - FAIL(2, "Failed", "处理失败"), - SUCCESS(3, "Success", "完成"), - NODATA(4, "NoData", "暂无数据"), - UNKNOWN(-1, "Unknown", "未知错误"); - - private int code; - private String desc, locale; - - DataExportStatus(int code, String desc, String locale) { - this.code = code; - this.desc = desc; - this.locale = locale; - } - - public int getCode() { - return code; - } - - public void setCode(int code) { - this.code = code; - } - - public String getDesc() { - return desc; - } - - public void setDesc(String desc) { - this.desc = desc; - } - - public String getLocale() { - return locale; - } - - public void setLocale(String locale) { - this.locale = locale; - } - - public static DataExportStatus getStatusByCode(int code) { - switch (code) { - case 0: - return INIT; - case 1: - return PROCESSING; - case 2: - return FAIL; - case 3: - return SUCCESS; - case 4: - return NODATA; - default: - return UNKNOWN; - } - } - -} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java index f6819de..1f1d07b 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils.java @@ -2,6 +2,7 @@ package com.bfd.mf.common.util.es; import com.alibaba.fastjson.JSONObject; import com.bfd.mf.common.util.constants.ESConstant; +import com.bfd.mf.config.BFDApiConfig; import com.google.common.collect.Maps; import org.apache.http.entity.ContentType; import org.apache.http.nio.entity.NStringEntity; @@ -11,6 +12,9 @@ import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsRequest import org.elasticsearch.action.admin.indices.settings.put.UpdateSettingsResponse; import org.elasticsearch.action.search.*; import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.action.update.UpdateRequest; +import org.elasticsearch.action.update.UpdateResponse; import org.elasticsearch.client.Requests; import org.elasticsearch.client.transport.TransportClient; import org.elasticsearch.common.settings.Settings; @@ -19,6 +23,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.script.Script; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.AggregationBuilder; @@ -29,15 +34,19 @@ import org.elasticsearch.transport.client.PreBuiltTransportClient; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.util.Assert; +import springfox.documentation.spring.web.json.Json; import java.net.InetAddress; import java.util.*; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; + public abstract class EsUtils { private static final Logger logger = LoggerFactory.getLogger(EsUtils.class); private static final Map CLIENT_MAP = Maps.newHashMap(); + private static final String DOCS = "docs"; public static void registerCluster(String clusterName, String[] addresses) { System.setProperty("es.set.netty.runtime.available.processors", "false"); @@ -75,19 +84,22 @@ public abstract class EsUtils { boolean options = true; boolean optionsf = false; - CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID + ".keyword"); - if(searchType == 0 || searchType == 2) { // 表示查的是主贴,电商数据要根据 docId 消重才行。 - collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); - } +// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID + ".keyword"); +// if(searchType == 0 || searchType == 2) { // 表示查的是主贴,电商数据要根据 docId 消重才行。 +// collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); +// } // from +size 的 分页 查询方式 SearchRequestBuilder requestBuilder = client.prepareSearch().setIndices(index) .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) .setQuery(queryBuilder) - .setCollapse(collapseBuilder) + // .setCollapse(collapseBuilder) .setSize(size) .setFrom(from); + + System.out.println(requestBuilder); + SearchResponse searchResponse = requestBuilder.execute().actionGet(); // List dataList = Lists.newLinkedList(); List dataList = new ArrayList<>(); @@ -96,7 +108,7 @@ public abstract class EsUtils { for (int i = 0; i < hits.length; i++) { JSONObject data = new JSONObject(); data.putAll(hits[i].getSourceAsMap()); - data.put("subjectId",hits[i].getIndex().replace("cl_major_","")); + data.put("subjectId",hits[i].getIndex().replace("cl_major_","").replace("cl_subject_","")); dataList.add(data); } } @@ -108,11 +120,11 @@ public abstract class EsUtils { Integer searchType) { boolean options = true; boolean optionsf = false; - CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID+".keyword"); - if(searchType == 0) { // 表示查的是主贴,电商数据要根据 docId 消重才行。 - collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); - } - int querysize = 40000; +// CollapseBuilder collapseBuilder = new CollapseBuilder(ESConstant.DATA_ID+".keyword"); +// if(searchType == 0) { // 表示查的是主贴,电商数据要根据 docId 消重才行。 +// collapseBuilder = new CollapseBuilder(ESConstant.DOC_ID); +// } + int querysize = 400000; try { TransportClient client = getClient(clusterName); // JSONObject settings = JSONObject.parseObject("{\"max_result_window\": \"200000000\"}"); @@ -120,31 +132,31 @@ public abstract class EsUtils { settings.put("index.max_result_window","200000000"); updSetting(index[0],settings,client); SearchResponse response = null; - if(searchType == 0){ // 查的是主贴,需要用 limit 取数不然 默认limit =10 - response = client.prepareSearch().setIndices(index) - .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) - .setQuery(queryBuilder) - .setCollapse(collapseBuilder) - .setSize(querysize) - .execute().actionGet(); - }else { // 查的不是主贴,就不用 限制条数了! - response = client.prepareSearch().setIndices(index) - .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) - .setQuery(queryBuilder) - .setCollapse(collapseBuilder) - .execute().actionGet(); - } +// if(searchType == 0){ // 查的是主贴,需要用 limit 取数不然 默认limit =10 +// response = client.prepareSearch().setIndices(index) +// .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) +// .setQuery(queryBuilder) +//// .setCollapse(collapseBuilder) +// .setSize(querysize) +// .execute().actionGet(); +// }else { // 查的不是主贴,就不用 限制条数了! + response = client.prepareSearch().setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(options, options, options, optionsf)) + .setQuery(queryBuilder) +// .setCollapse(collapseBuilder) + .execute().actionGet(); +// } Long size = response.getHits().getTotalHits(); - logger.info(" 聚合前的总量 : "+ size+ " 这个是聚合前的数据量"); + logger.info("[queryTotalCount] 聚合前的总量 : "+ size+ " 这个是聚合前的数据量"); Long aggrSize = Long.valueOf(response.getHits().getHits().length); - logger.info(" 聚合后的数据量 : " + aggrSize); + logger.info("[queryTotalCount] 聚合后的数据量 : " + aggrSize); // 当聚合后的结果为 10000 时 ,或者 是评论数据的时候,就不用聚合了 - if(aggrSize == 10000 || searchType == 1 || searchType == 2){ - size = size; - }else{ - size = aggrSize; - } +// if(aggrSize == 10000 || searchType == 1 || searchType == 2){ +// size = size; +// }else{ +// size = aggrSize; +// } return size; }catch (Exception e){ e.printStackTrace(); @@ -202,15 +214,15 @@ public abstract class EsUtils { if(monitorLists.size() == 0){ // 回收 scrollId boolean clear = clearScroll(client,scrollId); } - result.put("monitorLists", monitorLists); + result.put(ESConstant.MONITORLISTS, monitorLists); } - result.put("foldDocAllNumber", totalCount); - result.put("scrollId", searchResponse.getScrollId()); + result.put(ESConstant.ALLDOCNUMBER, totalCount); + result.put(ESConstant.SCROLLID, searchResponse.getScrollId()); }catch (Exception e){ logger.info("[EsUtils] queryForExport error !!! "); - result.put("monitorLists", new ArrayList()); - result.put("foldDocAllNumber", 0L); - result.put("scrollId", ""); + result.put(ESConstant.MONITORLISTS, new ArrayList()); + result.put(ESConstant.ALLDOCNUMBER, 0L); + result.put(ESConstant.SCROLLID, ""); } return new JSONObject(result); } @@ -268,7 +280,8 @@ public abstract class EsUtils { AggregationBuilder aggregationBuilder){ TransportClient client = getClient(clusterName); - SearchRequestBuilder requestBuilder = client.prepareSearch(index).setTypes("docs") + SearchRequestBuilder requestBuilder = client.prepareSearch(index) + .setTypes(DOCS) .setQuery(queryBuilder) .addAggregation(aggregationBuilder); @@ -302,4 +315,25 @@ public abstract class EsUtils { } } + public static long updateByDocId(String clusterName, String index,String script, + String docId, Map params) { + TransportClient client = getClient(clusterName); + try { + UpdateRequest updateRequest = new UpdateRequest(); + updateRequest.index(index); + updateRequest.type(DOCS); + updateRequest.id(docId); + updateRequest.script(new Script((script))); + // 这两句是为了解决 update 延时添加的强制刷新(可以手动上ES对一条数据进行修改后再查询看是否可以及时看到修改后的结果) + updateRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + updateRequest.setRefreshPolicy("true"); + UpdateResponse response = client.update(updateRequest).get(); + + return response.getVersion(); + + }catch (Exception e){ + e.printStackTrace(); + return 0L; + } + } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils2.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils2.java new file mode 100644 index 0000000..56e5ed5 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/es/EsUtils2.java @@ -0,0 +1,453 @@ +package com.bfd.mf.common.util.es; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.util.concurrent.RateLimiter; +import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; +import org.elasticsearch.action.admin.indices.create.CreateIndexResponse; +import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; +import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsRequest; +import org.elasticsearch.action.admin.indices.exists.indices.IndicesExistsResponse; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.action.search.ClearScrollRequestBuilder; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.client.transport.TransportClient; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.transport.client.PreBuiltTransportClient; +import org.joda.time.LocalDateTime; +import org.joda.time.format.DateTimeFormat; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.util.Assert; +import org.springframework.util.CollectionUtils; + +import javax.annotation.PostConstruct; +import java.net.InetAddress; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +public abstract class EsUtils2 { + private static final Logger LOGGER = LoggerFactory.getLogger(EsUtils.class); + private static final Map CLIENT_MAP = Maps.newHashMap(); + + public static void registerCluster(String clusterName, String[] addresses) { + System.setProperty("es.set.netty.runtime.available.processors", "false"); + Assert.hasLength(clusterName, "Param clusterName must not be empty."); + Assert.notEmpty(addresses, "Param addresses must not be empty."); + Settings settings = Settings.builder() + .put("cluster.name", clusterName).build(); + TransportClient client = new PreBuiltTransportClient(settings); + try { + for (int i = 0; i < addresses.length; i++) { + String[] ipAndPort = addresses[i].split(":"); + String ip = ipAndPort[0]; + int port = Integer.parseInt(ipAndPort[1]); + client.addTransportAddress(new TransportAddress(InetAddress.getByName(ip), port)); + } + CLIENT_MAP.put(clusterName, client); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** + * 查询 + * produce 查询主贴的时候会调用这个方法哦 + * @param clusterName + * @param indices + * @param size + * @param consumer + */ + public static void scrollQuery(String clusterName, String indices, String type, + QueryBuilder queryBuilder, Integer size, int minutes, + Consumer> consumer) { + TransportClient client = getClient(clusterName); + + SearchRequestBuilder searchRequestBuilder = client.prepareSearch() + .setIndices(indices) + .setIndicesOptions(IndicesOptions.fromOptions(true, true, + true, false)) + .setTypes(type) + .setQuery(queryBuilder) + .setScroll(TimeValue.timeValueMinutes(minutes)) + .setSize(size); + + long s = System.currentTimeMillis(); + SearchResponse response = searchRequestBuilder.execute().actionGet(); + long e = System.currentTimeMillis(); + LOGGER.debug("First query es, size:{}, took:{} ms.", + response.getHits().getHits().length, (e - s)); + List scrollIds = Lists.newArrayList(); + while (response.getHits().getHits().length > 0) { + List dataList = Lists.newLinkedList(); + for (SearchHit hit : response.getHits().getHits()) { + dataList.add(JSON.parseObject(hit.getSourceAsString())); + } + consumer.accept(dataList); + if (dataList.size() < size) { + break; + } + String scrollId = response.getScrollId(); + scrollIds.add(scrollId); + long s1 = System.currentTimeMillis(); + response = client.prepareSearchScroll(scrollId) + .setScroll(TimeValue.timeValueMinutes(minutes)) + .execute() + .actionGet(); + long e1 = System.currentTimeMillis(); + LOGGER.debug("Query es, size:{}, took:{} ms", + response.getHits().getHits().length, (e1 - s1)); + } + if (!CollectionUtils.isEmpty(scrollIds)) { + ClearScrollRequestBuilder clearScroll = client.prepareClearScroll() + .setScrollIds(scrollIds); + client.clearScroll(clearScroll.request()); + } + } + + /** + * 查询前[size]满足条件的数据 + * + * @param clusterName + * @param index + * @param queryBuilder + * @param size + * @return 没有数据:返回null,有数据:返回数据列表 + */ + public static List query(String clusterName, String index, final QueryBuilder queryBuilder, int size) { + TransportClient client = getClient(clusterName); + + SearchResponse response = client.prepareSearch() + .setIndices(index) + .setIndicesOptions(IndicesOptions.fromOptions(true, true, + true, false)) + .setSize(size) + .setFrom(0) + .setQuery(queryBuilder) + .execute().actionGet(); + if (response.getHits().totalHits > 0) { + List dataList = Lists.newLinkedList(); + SearchHit[] hits = response.getHits().getHits(); + for (int i = 0; i < hits.length; i++) { + JSONObject data = new JSONObject(); + data.putAll(hits[i].getSourceAsMap()); + dataList.add(data); + } + return dataList; + } + + return null; + } + + /** + * 根据时间范围获取index集合 + * + * @param startMills 起始时间(ms) + * @param endMils 结束时间(ms) + * @return + */ + public static String[] getIndices(String prefix, String separator, + long startMills, long endMils, String pattern) { + List indexList = Lists.newArrayList(); + LocalDateTime start = new LocalDateTime(startMills); + LocalDateTime end = new LocalDateTime(endMils); + for (LocalDateTime dt = start; dt.isBefore(end); dt = dt.plusDays(1)) { + String dtStr = dt.toString(DateTimeFormat.forPattern(pattern)); + String index = new StringBuilder() + .append(prefix) + .append(separator) + .append(dtStr) + .toString(); + indexList.add(index); + } + + String[] indices = new String[indexList.size()]; + indices = indexList.toArray(indices); + return indices; + } + + /** + * 根据时间范围获取index集合 + * + * @param startMills 起始时间(ms) + * @param endMils 结束时间(ms) + * @return + */ + public static String[] getIndices(String prefix, String separator, + long startMills, long endMils, String pattern, Long upperMills, String standbyIndex) { + List indexList = Lists.newArrayList(); + LocalDateTime start = new LocalDateTime(startMills); + LocalDateTime end = new LocalDateTime(endMils); + LocalDateTime upper = new LocalDateTime(upperMills); + if (start.isBefore(upper)) { + indexList.add(standbyIndex); + start = upper; + } + for (LocalDateTime dt = start; dt.isEqual(end) || dt.isBefore(end); dt = dt.plusDays(1)) { + String dtStr = dt.toString(DateTimeFormat.forPattern(pattern)); + String index = new StringBuilder() + .append(prefix) + .append(separator) + .append(dtStr) + .toString(); + indexList.add(index); + } + + String[] indices = new String[indexList.size()]; + indices = indexList.toArray(indices); + return indices; + } + + /** + * 根据indexName获取一定存在的index + * 如果indexName存在则返回,不存在则创建 + * + * @param clusterName + * @param indexName + * @param type + * @param mappingFile + * @return + */ +// public static String getOrCreateIndex(String clusterName, String indexName, String type, +// int shard, int replica, String mappingFile) { +// try { +// if (!EsUtils.exists(clusterName, indexName)) { +// byte[] bytes = Files.readAllBytes(Paths.get(mappingFile)); +// String mappingDef = new String(bytes); +// boolean flag = EsUtils.createIndex(clusterName, indexName, type, +// shard, replica, mappingDef); +// if (!flag) { +// throw new RuntimeException("Create index " + indexName + " error."); +// } +// } +// } catch (Exception e) { +// throw new RuntimeException(e); +// } +// +// return indexName; +// } + + /** + * index一个文档 + * + * @param clusterName + * @param indexName + * @param data + * @return + */ + public static String index(String clusterName, String indexName, String type, final JSONObject data, String idField) { + TransportClient client = getClient(clusterName); + IndexResponse response = client.prepareIndex(indexName, type) + .setSource(data, XContentType.JSON) + .setId(data.getString(idField)) + .get(); + return response.getId(); + } + + /** + * index一个文档 + * + * @param clusterName + * @param indexName + * @param dataList + * @return + */ + public static void index(String clusterName, String indexName, String type, final List dataList, String idField) { + if (CollectionUtils.isEmpty(dataList)) { + return; + } + TransportClient client = getClient(clusterName); + for (int i = 0; i < dataList.size(); i++) { + JSONObject data = dataList.get(i); + client.prepareIndex(indexName, type) + .setSource(data, XContentType.JSON) + .setId(data.getString(idField)) + .get(); + } + } + + /** + * 批量index文档 + * @param clusterName + * @param bulkItemList + * @return + */ + public static boolean bulkIndex(String clusterName, final List bulkItemList, String idField) { + if (CollectionUtils.isEmpty(bulkItemList)) { + return true; + } + TransportClient client = getClient(clusterName); + BulkRequestBuilder rb = client.prepareBulk(); + for (BulkItem item : bulkItemList) { + rb.add(client.prepareIndex(item.getIndexName(), item.getType(), item.getData().getString(idField)) + .setSource(item.getData(), XContentType.JSON)); + } + BulkResponse response = rb.get(); + LOGGER.info("Bulk index, size:{}.", bulkItemList.size()); + return response.hasFailures(); + } + + /** + * 判断索引是否存在 + * + * @param clusterName + * @param indexName + * @return + */ + public static Boolean exists(String clusterName, String indexName) { + TransportClient client = getClient(clusterName); + IndicesExistsRequest request = new IndicesExistsRequest() + .indices(indexName); + IndicesExistsResponse response = client.admin().indices().exists(request).actionGet(); + return response.isExists(); + } + + /** + * 创建一个index + * + * @param clusterName + * @param indexName + * @param type + * @param shardCount + * @param replicaCount + * @param mappingDef + * @return + */ + public static Boolean createIndex(String clusterName, String indexName, String type, + Integer shardCount, Integer replicaCount, String mappingDef) { + TransportClient client = getClient(clusterName); + CreateIndexRequest request = new CreateIndexRequest(indexName); + request.settings(Settings.builder() + .put("index.number_of_shards", shardCount) + .put("index.number_of_replicas", replicaCount) + .put("index.refresh_interval", 2, TimeUnit.SECONDS) + .put("index.analysis.filter.shingle_filter.type", "shingle") + .put("index.analysis.filter.shingle_filter.min_shingle_size", 2) + .put("index.analysis.filter.shingle_filter.max_shingle_size", 2) + .put("index.analysis.filter.shingle_filter.output_unigrams", false) + .put("index.analysis.analyzer.shingle_analyzer.type", "custom") + .put("index.analysis.analyzer.shingle_analyzer.tokenizer", "ik_smart") + .putArray("index.analysis.analyzer.shingle_analyzer.filter", "lowercase", "shingle_filter") + ); + + request.mapping(type, mappingDef, XContentType.JSON); + CreateIndexResponse createIndexResponse = client.admin().indices().create(request).actionGet(); + boolean acknowledged = createIndexResponse.isAcknowledged(); + boolean shardsAcknowledged = createIndexResponse.isShardsAcked(); + if (acknowledged && shardsAcknowledged) { + return true; + } + + return false; + } + + /** + * 删除index + * + * @param clusterName + * @param indexName + * @return + */ + public static Boolean deleteIndex(String clusterName, String indexName) { + TransportClient client = getClient(clusterName); + DeleteIndexRequest request = new DeleteIndexRequest() + .indices(indexName); + AcknowledgedResponse response = client.admin().indices().delete(request).actionGet(); + return response.isAcknowledged(); + } + + private static TransportClient getClient(String clusterName) { + return CLIENT_MAP.get(clusterName); + } + + public static BulkItem buildBulkItem(String indexName, String type, final JSONObject data) { + return new BulkItem() + .setIndexName(indexName) + .setType(type) + .setData(data); + } + + + /** + * 查询某个Index 的总量 + */ + + public static Long scrollQuery(String clusterName, String indices, String type, + QueryBuilder queryBuilder){ + Long totalHits = 0L; + try{ + TransportClient client = getClient(clusterName); + SearchRequestBuilder searchRequestBuilder = client.prepareSearch() + .setIndices(indices) + .setIndicesOptions(IndicesOptions.fromOptions(true, true, + true, false)) + .setTypes(type) + .setQuery(queryBuilder); + SearchResponse response = searchRequestBuilder.execute().actionGet(); + totalHits = response.getHits().totalHits; + }catch (Exception e){ + e.printStackTrace(); + } + return totalHits; + } + + public static class BulkItem { + String indexName; + String type; + JSONObject data; + + public String getIndexName() { + return indexName; + } + + public BulkItem setIndexName(String indexName) { + this.indexName = indexName; + return this; + } + + public String getType() { + return type; + } + + public BulkItem setType(String type) { + this.type = type; + return this; + } + + public JSONObject getData() { + return data; + } + + public BulkItem setData(JSONObject data) { + this.data = data; + return this; + } + + public BulkItem setStringData(String data) { + this.type = data; + return this; + } + } + +// public static void etl(String srcClusterName, String srcIndex, String srcType, QueryBuilder qb, +// Integer size, int minutes, +// String tarClusterName, String tarIndex, String tarType, String idField) { +// scrollQuery(srcClusterName, new String[]{srcClusterName}, srcType, qb, size, minutes, dataList -> { +// EsUtils.index(tarClusterName, tarIndex, tarType, dataList, idField); +// }); +// } +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java index 1a36cf6..bbb30ad 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/slice/SliceScrollUtil.java @@ -40,7 +40,6 @@ public class SliceScrollUtil { */ private void parseEsData(SearchResponse searchResponse, List list) { if (searchResponse != null) { - logger.info("[SliceScrollUtil] parseEsData : "); SearchHits hits = searchResponse.getHits(); SearchHit[] searchHists = hits.getHits(); for (SearchHit searchHit : searchHists) { @@ -96,6 +95,12 @@ public class SliceScrollUtil { if(sourceAsMap.containsKey(ESConstant.PICTURE_LIST)) { esMonitorBaseEntity.setPictureList((List) sourceAsMap.getOrDefault(ESConstant.PICTURE_LIST, new ArrayList<>())); } + if(sourceAsMap.containsKey(ESConstant.VALUELABEL) && null !=sourceAsMap.get(ESConstant.VALUELABEL)){ + esMonitorBaseEntity.setValueLabel(sourceAsMap.get(ESConstant.VALUELABEL).toString()); + } + if(sourceAsMap.containsKey(ESConstant.CATEGORYLABEL)){ + esMonitorBaseEntity.setCategoryLabel(sourceAsMap.get(ESConstant.CATEGORYLABEL).toString()); + } try { esMonitorBaseEntity.setHlKeyWords((List) sourceAsMap.getOrDefault(ESConstant.HL_KEYWORDS, new ArrayList<>())); } catch (Exception e) { @@ -131,7 +136,7 @@ public class SliceScrollUtil { Cluster cluster = null; List currentIndexList = null; if(null != queryRequest.getSubjectId() && !("all").equals(queryRequest.getSubjectId())){ - System.out.println("查询 【专题数据】 subjectId = " + queryRequest.getSubjectId()); + logger.info("查询 【专题数据】 subjectId = " + queryRequest.getSubjectId()); cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); }else{ @@ -142,7 +147,6 @@ public class SliceScrollUtil { Long clusterId = cluster.getId(); logger.info("[SliceScrollUtil] dataAnalysisCloud : queryDataList clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); - logger.info("==========进入数据分析Es and Cache,计算开始执行============"); String sortFlag = ""; String orderFlag = "desc"; @@ -166,16 +170,16 @@ public class SliceScrollUtil { } catch (Exception e) { e.printStackTrace(); } - //return jsonObjectList.stream().distinct().collect(Collectors.toList()); - return jsonObjectList; + return jsonObjectList.stream().distinct().collect(Collectors.toList()); + //return jsonObjectList; } + + public void parseResponseList(List jsonObjectList, List responseList) { for (SearchResponse searchResponse : responseList) { // 解析数据 parseEsData(searchResponse, jsonObjectList); } } - - } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java index 20605eb..bbeb741 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/spread/SpreadServiceUtil.java @@ -179,6 +179,32 @@ public class SpreadServiceUtil { } +// public static List> convertToTimeStringList(List> trendList,Long[] timeList, Integer type) { +// String[] timeStringList = new String[timeList.length]; +// try { +// String dateFormat; +// if (Objects.equals(type, SpreadEnums.TREND.UNIT_HOUR)) { +// dateFormat = DateUtil.TIME_FORMAT; +// } else if (Objects.equals(type, SpreadEnums.TREND.UNIT_DAY)) { +// dateFormat = DateUtil.DATE_FORMAT; +// } else { +// throw new Exception(" !!! type is illegal !!!"); +// } +// +// for (int i = 0; i < timeList.length; i++) { +// Map map = new HashMap<>(); +// timeStringList[i] = DateUtil.parseDateByFormat(timeList[i], dateFormat); +// map.put("name",timeStringList[i]); +// trendList.add(map); +// } +// } catch (Exception e) { +// logger.error("[SpreadServiceUtil] getTimeStringList: failed, timeList is {}, error is ", Arrays.asList(timeStringList), e); +// } +// // return timeStringList; +// return trendList; +// } + + /** * 构造list搜索返回值 */ diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/thread/SubjectDataQueryThread.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/thread/SubjectDataQueryThread.java index 18f4b8a..96545b3 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/thread/SubjectDataQueryThread.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/thread/SubjectDataQueryThread.java @@ -68,7 +68,7 @@ public class SubjectDataQueryThread implements Runnable { } latch.countDown(); } catch (Exception e) { - logger.error("当前线程errors = {},线程 = {} ", ExceptionUtils.getFullStackTrace(e), Thread.currentThread().getId()); + logger.error("当前线程 errors = {},线程 = {} ", ExceptionUtils.getFullStackTrace(e), Thread.currentThread().getId()); } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/DateUtil.java b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/DateUtil.java index fca7955..80b21fb 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/DateUtil.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/util/utility/DateUtil.java @@ -19,11 +19,13 @@ package com.bfd.mf.common.util.utility; import com.bfd.nlp.common.util.string.TStringUtils; import java.io.UnsupportedEncodingException; +import java.sql.Timestamp; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; +import java.util.TimeZone; public class DateUtil { @@ -32,6 +34,7 @@ public class DateUtil { public static String TIME_FORMAT = "yyyy-MM-dd HH:mm:ss"; public static String DATE_FORMAT = "yyyy-MM-dd"; + public static String YEAR_FORMAT = "yyyy"; /** * @param startTime 开始时间 @@ -100,6 +103,12 @@ public class DateUtil { return format.format(date); } + public static String parseDateByyear(long timestamp) { + Date date = parseDate(timestamp); + SimpleDateFormat format = new SimpleDateFormat(YEAR_FORMAT); + return format.format(date); + } + /*** * timestamp to yyyy-MM-dd HH:mm:ss * @@ -289,4 +298,70 @@ public class DateUtil { return new Date(); } } + + /** + * 获得服务器当前日期及时间,以格式为:yyyy-MM-dd HH:mm:ss的日期字符串形式返回 + */ + public static String getDateTime(){ + try{ + SimpleDateFormat datetime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + return datetime.format(Calendar.getInstance().getTime()); + } catch(Exception e){ + //log.debug("DateUtil.getDateTime():" + e.getMessage()); + return ""; + } + } + /** + * 获得服务器当前日期及时间,以格式为:yyyy-MM-dd HH:mm:ss的日期字符串形式返回 + */ + public static String getDateTime(long date){ + try{ + SimpleDateFormat datetime = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + return datetime.format(new Date(date)); + } catch(Exception e){ + // log.debug("DateUtil.getDateTime():" + e.getMessage()); + return ""; + } + } + public static long getcurr(){ + Date date = new Date(); + Long l_date = date.getTime(); + return l_date; + } + +// public static long getDayStart(long time){ +// long zero = time/(1000*3600*24)*(1000*3600*24)- TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数 +// long zero2 = time/(1000*3600*24)*(1000*3600*24) - TimeZone.getDefault().getRawOffset(); +// return zero; +// } +// public static long getDayEnd(long time){ +// //long zero=time/(1000*3600*24)*(1000*3600*24)- TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数 +// long twelve=time+24*60*60*1000-1;//今天23点59分59秒的毫秒数 +// return twelve; +// } + +// public static void main(String[] args) { +// long time = 1611591055000L ; +// long start = getDayStart(time); +// long end = getDayEnd(start); +// +// +// System.out.println(time); +// System.out.println(start); +// System.out.println(end); +// +// System.out.println(parseDateByday(time)); +// System.out.println(parseDateByday(start)); +// System.out.println(parseDateByday(end)); +// +// +// long zero=time/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset();//今天零点零分零秒的毫秒数 +// long twelve=zero+24*60*60*1000-1;//今天23点59分59秒的毫秒数 +// long yesterday=System.currentTimeMillis()-24*60*60*1000;//昨天的这一时间的毫秒数 +// System.out.println(new Timestamp(time));//当前时间 +// System.out.println(new Timestamp(yesterday));//昨天这一时间点 +// System.out.println(new Timestamp(zero));//今天零点零分零秒 +// System.out.println(new Timestamp(twelve));//今天23点59分59秒 +// +// } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/entity/mysql/User.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/entity/mysql/User.java deleted file mode 100644 index 4b7c3b8..0000000 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/entity/mysql/User.java +++ /dev/null @@ -1,322 +0,0 @@ -///* -// * Copyright (C) 2016 Baifendian Corporation -// *

-// * Licensed under the Apache License, Version 2.0 (the "License"); -// * you may not use this file except in compliance with the License. -// * You may obtain a copy of the License at -// *

-// * http://www.apache.org/licenses/LICENSE-2.0 -// *

-// * Unless required by applicable law or agreed to in writing, software -// * distributed under the License is distributed on an "AS IS" BASIS, -// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// * See the License for the specific language governing permissions and -// * limitations under the License. -// */ -// -//package com.bfd.mf.common.web.entity.mysql; -// -//import org.apache.commons.lang3.builder.ToStringBuilder; -// -//import javax.persistence.Column; -//import javax.persistence.Entity; -//import javax.persistence.Table; -//import javax.persistence.Transient; -//import java.io.Serializable; -// -//@Entity -//@Table(name = "cl_user") -//public class User extends Model implements Serializable { -// -// private static final long serialVersionUID = 1947459646071591960L; -// -// @Column(name = "token") -// private String token; -// -// @Column(name = "user_group_id") -// private Long userGroupId; -// -// @Column(name = "account") -// private String account; -// -// @Column(name = "username") -// private String userName; -// -// @Column(name = "password") -// private String password; -// -// @Column(name = "parent_id") -// private Long parentId; -// -// @Column(name = "email") -// private String email; -// -// @Column(name = "wechat") -// private String wechat; -// -// @Column(name = "wechat_id") -// private String wechatId; -// -// // Y N -// @Column(name = "wechat_verified") -// private String wechatVerified; -// @Column(name = "wechat_open_id") -// private String wechatOpenId; -// -// @Column(name = "mobile") -// private String mobile; -// -// @Column(name = "role_id") -// private String roleId; -// -// @Column(name = "status") -// private Integer status; -// -// @Column(name = "last_login_time") -// private Long lastLoginTime; -// -// @Column(name = "company_id") -// private Long companyId; -// -// @Column(name = "created_time") -// private Long createdTime; -// -// @Column(name = "updated_time") -// private Long updatedTime; -// -// @Column(name = "del") -// private Boolean del; -// -// @Column(name = "is_shadow") -// private Integer isShadow; -// -// @Transient -// private Integer foreignChannelStatus; -// -// @Transient -// private Boolean commentStatus; -// -// @Transient -// private Integer cacheStatus; -// -// public Integer getCacheStatus() { -// return cacheStatus; -// } -// -// public void setCacheStatus(Integer cacheStatus) { -// this.cacheStatus = cacheStatus; -// } -// -// public User() { -// } -// -// public Integer getForeignChannelStatus() { -// return foreignChannelStatus; -// } -// -// public void setForeignChannelStatus(Integer foreignChannelStatus) { -// this.foreignChannelStatus = foreignChannelStatus; -// } -// -// public Integer getIsShadow() { -// return isShadow; -// } -// -// public void setIsShadow(Integer isShadow) { -// this.isShadow = isShadow; -// } -// -// public Long getUserGroupId() { -// return userGroupId; -// } -// -// public void setUserGroupId(Long userGroupId) { -// this.userGroupId = userGroupId; -// } -// -// public String getUserName() { -// return userName; -// } -// -// public void setUserName(String userName) { -// this.userName = userName; -// } -// -// public String getToken() { -// return token; -// } -// -// public void setToken(String token) { -// this.token = token; -// } -// -// -// public String getAccount() { -// return account; -// } -// -// public void setAccount(String account) { -// this.account = account; -// } -// -// public String getPassword() { -// return password; -// } -// -// public void setPassword(String password) { -// this.password = password; -// } -// -// public long getParentId() { -// return parentId; -// } -// -// public void setParentId(long parentId) { -// this.parentId = parentId; -// } -// -// public String getEmail() { -// return email; -// } -// -// public void setEmail(String email) { -// this.email = email; -// } -// -// public String getWechat() { -// return wechat; -// } -// -// public void setWechat(String wechat) { -// this.wechat = wechat; -// } -// -// public String getWechatId() { -// return wechatId; -// } -// -// public void setWechatId(String wechatId) { -// this.wechatId = wechatId; -// } -// -// public String getMobile() { -// return mobile; -// } -// -// public void setMobile(String mobile) { -// this.mobile = mobile; -// } -// -// public String getRoleId() { -// return roleId; -// } -// -// public void setRoleId(String roleId) { -// this.roleId = roleId; -// } -// -// public void setStatus(Integer status) { -// this.status = status; -// } -// -// public int getStatus() { -// return status; -// } -// -// public void setStatus(int status) { -// this.status = status; -// } -// -// public long getLastLoginTime() { -// return lastLoginTime; -// } -// -// public Long getCompanyId() { -// return companyId; -// } -// -// public void setCompanyId(Long companyId) { -// this.companyId = companyId; -// } -// -// public Long getCreatedTime() { -// return createdTime; -// } -// -// public void setCreatedTime(Long createdTime) { -// this.createdTime = createdTime; -// } -// -// public Long getUpdatedTime() { -// return updatedTime; -// } -// -// public void setUpdatedTime(Long updatedTime) { -// this.updatedTime = updatedTime; -// } -// -// public void setParentId(Long parentId) { -// this.parentId = parentId; -// } -// -// public void setLastLoginTime(Long lastLoginTime) { -// this.lastLoginTime = lastLoginTime; -// } -// -// public Boolean getDel() { -// return del; -// } -// -// public void setDel(Boolean del) { -// this.del = del; -// } -// -// public String getWechatVerified() { -// return wechatVerified; -// } -// -// public void setWechatVerified(String wechatVerified) { -// this.wechatVerified = wechatVerified; -// } -// -// public String getWechatOpenId() { -// return wechatOpenId; -// } -// -// public void setWechatOpenId(String wechatOpenId) { -// this.wechatOpenId = wechatOpenId; -// } -// -// public Boolean getCommentStatus() { -// return commentStatus; -// } -// -// public void setCommentStatus(Boolean commentStatus) { -// this.commentStatus = commentStatus; -// } -// -// @Override -// public String toString() { -// return new ToStringBuilder(this) -// .append("token", token) -// .append("account", account) -// .append("userName", userName) -// .append("password", password) -// .append("parentId", parentId) -// .append("email", email) -// .append("wechat", wechat) -// .append("wechatId", wechatId) -// .append("wechatVerified", wechatVerified) -// .append("wechatOpenId", wechatOpenId) -// .append("mobile", mobile) -// .append("roleId", roleId) -// .append("status", status) -// .append("lastLoginTime", lastLoginTime) -// .append("companyId", companyId) -// .append("commentStatus", commentStatus) -// .append("createdTime", createdTime) -// .append("updatedTime", updatedTime) -// .append("del", del) -// .toString(); -// } -//} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/entity/mysql/topic/Subject.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/entity/mysql/topic/Subject.java index 50e939b..5af170e 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/entity/mysql/topic/Subject.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/entity/mysql/topic/Subject.java @@ -24,6 +24,9 @@ public class Subject extends SubjectModel implements Serializable { @Column(name = "name") private String name; + @Column(name = "status") + private Integer status; + @Column(name = "node_type") private Integer nodeType; @@ -101,6 +104,14 @@ public class Subject extends SubjectModel implements Serializable { this.name = name; } + public Integer getStatus() { + return status; + } + + public void setStatus(Integer status) { + this.status = status; + } + public Integer getNodeType() { return nodeType; } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/base/SiteRepository.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/base/SiteRepository.java index dcd876c..499e5bd 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/base/SiteRepository.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/base/SiteRepository.java @@ -11,15 +11,20 @@ import java.util.Map; @Repository public interface SiteRepository extends CrudRepository { - @Query(value = "select cid,site_id from cl_site WHERE del = 0", nativeQuery = true) - List> findClusterByDel(int del); + @Query(value = "SELECT cid,site_id,site_icon,site_type FROM cl_site WHERE site_id IS NOT NULL AND del = 0", nativeQuery = true) + List> findsiteByDel(int del); +// @Query(value = "select cid,site_icon from cl_site WHERE del = 0", nativeQuery = true) +// List> findsiteIconByDel(int del); - @Query(value = "select cid from cl_site WHERE area =?1 ", nativeQuery = true) + @Query(value = "SELECT cid FROM cl_site WHERE area =?1 ", nativeQuery = true) List findCidsByArea(String area); // @Query(value = "select id,site_id,cid from cl_site WHERE del = 0", nativeQuery = true) // Site findAllSiteIds(); + @Query(value = "SELECT cid,site_id,site_icon,site_type FROM cl_site WHERE cid = ?1 AND del = 0", nativeQuery = true) + List> findSiteByEnSource(String enSource); + } diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/ParseExcelTaskRepository.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/ParseExcelTaskRepository.java index 3fddb2c..7144943 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/ParseExcelTaskRepository.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/repository/mysql/topic/ParseExcelTaskRepository.java @@ -1,6 +1,8 @@ package com.bfd.mf.common.web.repository.mysql.topic; import com.bfd.mf.common.web.entity.mysql.topic.ParseExcelTask; +import com.bfd.mf.common.web.entity.mysql.topic.Subject; +import org.apache.ibatis.annotations.Options; import org.springframework.data.jpa.repository.Modifying; import org.springframework.data.jpa.repository.Query; import org.springframework.data.repository.CrudRepository; @@ -36,7 +38,24 @@ public interface ParseExcelTaskRepository extends CrudRepository + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.bfd.mf.common.web.vo.view.analysis; + +import java.util.HashMap; +import java.util.Map; + +public class DataCount extends DataCountBase implements Comparable { + + private String name; + private String type; + private Long value; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public Long getValue() { + return value; + } + + public void setValue(Long value) { + this.value = value; + } + + @Override + public String toString() { + return "DataCount{" + + "name='" + name + '\'' + + ", type='" + type + '\'' + + ", value=" + value + + '}'; + } + + @Override + public int compareTo(DataCount o) { + return 0; + } + /** + * 时间戳 + */ + // private Long timestamp; + /** + * 值 + */ + // private Long data; + + /** + * 坐标 + */ + // private String name; + + public static Map getChannelMap() { + Map channelMap = new HashMap<>(); + channelMap.put("social","社交媒体"); + channelMap.put("video","网络视频"); + channelMap.put("news","新闻资讯"); + channelMap.put("blog","博客智库"); + channelMap.put("bbs","论坛贴吧"); + channelMap.put("search","搜索引擎"); + channelMap.put("item","电商网站"); + channelMap.put("life","生活方式"); + return channelMap; + } + + public static String getChannel(String docType) { + Map channelMap = new HashMap<>(); + channelMap.put("social","社交媒体"); + channelMap.put("video","网络视频"); + channelMap.put("news","新闻资讯"); + channelMap.put("blog","博客智库"); + channelMap.put("bbs","论坛贴吧"); + channelMap.put("search","搜索引擎"); + channelMap.put("item","电商网站"); + channelMap.put("life","生活方式"); + return channelMap.get(docType); + } + + public static String getSentimentMap(String sentiment) { + Map sentimentMap = new HashMap<>(); + sentimentMap.put("0","Neutral"); + sentimentMap.put("0.0","Neutral"); + sentimentMap.put("0.1","Negative"); + sentimentMap.put("0.5","Neutral"); + sentimentMap.put("0.9","Positive"); + return sentimentMap.get(sentiment); + } + + +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/analysis/DataLineCount.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/analysis/DataLineCount.java index e4e2399..0cef43a 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/analysis/DataLineCount.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/analysis/DataLineCount.java @@ -1,88 +1,88 @@ -/* - * Copyright (C) 2016 Baifendian Corporation - *

- * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.bfd.mf.common.web.vo.view.analysis; - -/** - * 折线图统计 - * - * @author quanyou.chang - * @create 2016-09-22 10:43 - **/ -public class DataLineCount extends DataCountBase implements Comparable { - /** - * 时间戳 - */ - private Long timestamp; - /** - * 值 - */ - private Long data; - - public Long getTimestamp() { - return timestamp; - } - - public void setTimestamp(Long timestamp) { - this.timestamp = timestamp; - } - - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - /** - * 坐标 - */ - private String name; - - - public Long getData() { - return data; - } - - public void setData(Long data) { - this.data = data; - } - - @Override - public int compareTo(DataLineCount o) { - return this.getTimestamp().compareTo(o.getTimestamp()); - } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - DataLineCount that = (DataLineCount) o; - - if (timestamp != null ? !timestamp.equals(that.timestamp) : that.timestamp != null) return false; - return name != null ? name.equals(that.name) : that.name == null; - - } - - @Override - public int hashCode() { - int result = timestamp != null ? timestamp.hashCode() : 0; - result = 31 * result + (name != null ? name.hashCode() : 0); - return result; - } -} +///* +// * Copyright (C) 2016 Baifendian Corporation +// *

+// * Licensed under the Apache License, Version 2.0 (the "License"); +// * you may not use this file except in compliance with the License. +// * You may obtain a copy of the License at +// *

+// * http://www.apache.org/licenses/LICENSE-2.0 +// *

+// * Unless required by applicable law or agreed to in writing, software +// * distributed under the License is distributed on an "AS IS" BASIS, +// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// * See the License for the specific language governing permissions and +// * limitations under the License. +// */ +// +//package com.bfd.mf.common.web.vo.view.analysis; +// +///** +// * 折线图统计 +// * +// * @author quanyou.chang +// * @create 2016-09-22 10:43 +// **/ +//public class DataLineCount extends DataCountBase implements Comparable { +// /** +// * 时间戳 +// */ +// private Long timestamp; +// /** +// * 值 +// */ +// private Long data; +// +// public Long getTimestamp() { +// return timestamp; +// } +// +// public void setTimestamp(Long timestamp) { +// this.timestamp = timestamp; +// } +// +// +// public String getName() { +// return name; +// } +// +// public void setName(String name) { +// this.name = name; +// } +// +// /** +// * 坐标 +// */ +// private String name; +// +// +// public Long getData() { +// return data; +// } +// +// public void setData(Long data) { +// this.data = data; +// } +// +// @Override +// public int compareTo(DataLineCount o) { +// return this.getTimestamp().compareTo(o.getTimestamp()); +// } +// @Override +// public boolean equals(Object o) { +// if (this == o) return true; +// if (o == null || getClass() != o.getClass()) return false; +// +// DataLineCount that = (DataLineCount) o; +// +// if (timestamp != null ? !timestamp.equals(that.timestamp) : that.timestamp != null) return false; +// return name != null ? name.equals(that.name) : that.name == null; +// +// } +// +// @Override +// public int hashCode() { +// int result = timestamp != null ? timestamp.hashCode() : 0; +// result = 31 * result + (name != null ? name.hashCode() : 0); +// return result; +// } +//} diff --git a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java index fa6d2b7..8d39b6a 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java +++ b/cl_search_api/src/main/java/com/bfd/mf/common/web/vo/view/monitor/ESMonitorBaseEntity.java @@ -32,7 +32,7 @@ public class ESMonitorBaseEntity implements Comparable, Ser private static final long serialVersionUID = 3296318610448822680L; private String docId = ""; //unique Id - private String dataId ; + private String dataId ; // 唯一ID private String author; // 作者 private String authorId; // 作者ID @@ -40,30 +40,34 @@ public class ESMonitorBaseEntity implements Comparable, Ser private String title; // 文章title private Long pubTime = 0L; //文章发布时间 - private String pubTimeStr ; + private String pubTimeStr ; // 发表时间 + private Long crawlTime; // 抓取时间 + private String crawlTimeStr; // 抓取时间 + // 情感相关 private Double sysSentiment; private String sysSentimentTag; //敏感 OR 非敏感 private Double sysSentimentValue; - + // 链接相关 private String url; //链接地址 + private String userUrl; // 用户链接 private String urlHash; //url hash - private String source; //网站名称 - private String enSource; + private String source; //网站中文名称 + private String enSource; // 网站英文名称 private String siteId; private String docType; //网站类型 private String channel; //渠道 + private String icon; // 图标 + private String siteType ; // 数据渠道类型 private Integer availability; - // private static String DEFAULT_ICON = "http://tva1.sinaimg.cn/default/images/default_avatar_male_180.gif"; -// private String icon = DEFAULT_ICON; //图标 private String commentId; - private String content; + private String content; // 内容 private String contentSimHash; //文章SimHash private Integer contentSize; //正文长度 - + // 转、赞、评 数字 private String quoteCount ; private String attitudesCount; private Integer commentsCount = 0; @@ -78,23 +82,23 @@ public class ESMonitorBaseEntity implements Comparable, Ser private List pictureList; private String videoUrl; - private Long crawlTime; private String postSource; private boolean isDownload; private List filePath; private List imagePath; private List videoPath; - // private List> fileList; -// private List> imageList; -// private List> videoList; private List> filePathSize; private List> imagePathSize; private List> videoPathSize; // 用户信息 private String city; private String province; - // 专题ID + private String fansCount; + private String friendsCount; + private String postCount; + private String location; + // 专题ID 任务ID 主贴回帖区分 抓取任务标识 private String subjectId; private String taskId; private Integer primary; @@ -102,6 +106,136 @@ public class ESMonitorBaseEntity implements Comparable, Ser // 译文 private String translateTitle; private String translateContent; + // 视频流分析结果 + private List ocrText; + private String asrText; + // 电商相关字段 + private String productParameter; + private String price; + + private String valueLabel; + private String categoryLabel; + private String tag; + + public String getTag() { + return tag; + } + + public void setTag(String tag) { + this.tag = tag; + } + + public String getValueLabel() { + return valueLabel; + } + + public void setValueLabel(String valueLabel) { + this.valueLabel = valueLabel; + } + + public String getCategoryLabel() { + return categoryLabel; + } + + public void setCategoryLabel(String categoryLabel) { + this.categoryLabel = categoryLabel; + } + + public String getSiteType() { + return siteType; + } + + public void setSiteType(String siteType) { + this.siteType = siteType; + } + + public String getProductParameter() { + return productParameter; + } + + public void setProductParameter(String productParameter) { + this.productParameter = productParameter; + } + + public String getPrice() { + return price; + } + + public void setPrice(String price) { + this.price = price; + } + + public List getOcrText() { + return ocrText; + } + + public void setOcrText(List ocrText) { + this.ocrText = ocrText; + } + + public String getAsrText() { + return asrText; + } + + public void setAsrText(String asrText) { + this.asrText = asrText; + } + + public String getCrawlTimeStr() { + return crawlTimeStr; + } + + public void setCrawlTimeStr(String crawlTimeStr) { + this.crawlTimeStr = crawlTimeStr; + } + + public String getUserUrl() { + return userUrl; + } + + public void setUserUrl(String userUrl) { + this.userUrl = userUrl; + } + + public String getFansCount() { + return fansCount; + } + + public void setFansCount(String fansCount) { + this.fansCount = fansCount; + } + + public String getFriendsCount() { + return friendsCount; + } + + public void setFriendsCount(String friendsCount) { + this.friendsCount = friendsCount; + } + + public String getPostCount() { + return postCount; + } + + public void setPostCount(String postCount) { + this.postCount = postCount; + } + + public String getLocation() { + return location; + } + + public void setLocation(String location) { + this.location = location; + } + + public String getIcon() { + return icon; + } + + public void setIcon(String icon) { + this.icon = icon; + } public String getEnSource() { return enSource; diff --git a/cl_search_api/src/main/java/com/bfd/mf/config/BFDApiConfig.java b/cl_search_api/src/main/java/com/bfd/mf/config/BFDApiConfig.java index b0431bf..9e31e8a 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/config/BFDApiConfig.java +++ b/cl_search_api/src/main/java/com/bfd/mf/config/BFDApiConfig.java @@ -46,6 +46,53 @@ public class BFDApiConfig { private Integer emotionPostType; private boolean codisCacheFlag = false; private ReportBehaviorInterface reportBehaviorInterface; + + private String goFastPostUrl; + private String goFastDomain; + private String uploadOLYExcelPath; + private String uploadZipPath; + private String indexNamePre; + + public String getGoFastPostUrl() { + return goFastPostUrl; + } + + public void setGoFastPostUrl(String goFastPostUrl) { + this.goFastPostUrl = goFastPostUrl; + } + + public String getGoFastDomain() { + return goFastDomain; + } + + public void setGoFastDomain(String goFastDomain) { + this.goFastDomain = goFastDomain; + } + + public String getIndexNamePre() { + return indexNamePre; + } + + public void setIndexNamePre(String indexNamePre) { + this.indexNamePre = indexNamePre; + } + + public String getUploadOLYExcelPath() { + return uploadOLYExcelPath; + } + + public void setUploadOLYExcelPath(String uploadOLYExcelPath) { + this.uploadOLYExcelPath = uploadOLYExcelPath; + } + + public String getUploadZipPath() { + return uploadZipPath; + } + + public void setUploadZipPath(String uploadZipPath) { + this.uploadZipPath = uploadZipPath; + } + /** * 访客 IP 白名单 */ diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java index 7aa8322..e327c2a 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAnalysisController.java @@ -9,6 +9,7 @@ import com.bfd.mf.common.util.enums.RTCodeEnum; import com.bfd.mf.common.util.slice.SliceScrollUtil; import com.bfd.mf.common.web.component.wrapper.ResponseWrapper; import com.bfd.nlp.common.util.constants.MediaTypes; +import io.swagger.annotations.Api; import io.swagger.annotations.ApiOperation; import org.apache.catalina.servlet4preview.http.HttpServletRequest; import org.slf4j.Logger; @@ -28,25 +29,26 @@ import java.util.Set; @Controller @RequestMapping("/analysis") -@ResponseBody +@Api(value="数据分析结果&词云查询") public class SearchAnalysisController { private static final Logger logger = LoggerFactory.getLogger(SearchAnalysisController.class); @Autowired private SearchAnalysisService searchAnalysisService; - @Autowired - private SearchKeywordsCouldService searchKeywordsCouldService; + /** * 查询总体分析结果 */ - @ApiOperation(value = "查询总体分析结果", httpMethod = "POST") - @PostMapping(value = "/trend/lineAll", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody + @ApiOperation(value = "查询总体分析结果") + @RequestMapping(value = "/trend/lineAll", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject queryAll(@RequestBody QueryRequest queryRequest) { logger.info("[queryAll] partial / Params: {}", JSONObject.toJSONString(queryRequest)); JSONObject jsonObject; try { jsonObject = searchAnalysisService.getAnalysisResponse(queryRequest); + //JSONObject cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(queryRequest); + // jsonObject.put("cloudCounts",cloudCounts); } catch (Exception e) { logger.error("[queryAll error = ]", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_FAIL); @@ -54,18 +56,18 @@ public class SearchAnalysisController { return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, jsonObject); } - @RequestMapping(value = "/cloudCrawl", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) - public JSONObject queryWordCloudCountCrawl(@RequestBody QueryRequest queryRequest) { - logger.info("[queryWordCloudCountCrawl] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject cloudCounts; - try { - cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(queryRequest); - } catch (Exception e) { - logger.error("[queryWordCloudCountCrawl] Failed,The error message is :{}", e); - return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); - } - return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, cloudCounts); - } - - +// @ResponseBody +// @ApiOperation(value = "查询词云结果") +// @RequestMapping(value = "/cloudCrawl", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) +// public JSONObject queryWordCloudCountCrawl(@RequestBody QueryRequest queryRequest) { +// logger.info("[queryWordCloudCountCrawl] partial / Params: {}", JSONObject.toJSONString(queryRequest)); +// JSONObject cloudCounts; +// try { +// cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(queryRequest); +// } catch (Exception e) { +// logger.error("[queryWordCloudCountCrawl] Failed,The error message is :{}", e); +// return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); +// } +// return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, cloudCounts); +// } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java index 20fa478..7e53e1d 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchAuthorController.java @@ -1,10 +1,12 @@ package com.bfd.mf.controller; import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.util.enums.RTCodeEnum; import com.bfd.mf.common.web.component.wrapper.ResponseWrapper; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.service.SearchAuthorService; +import com.bfd.mf.service.SearchDataService; import com.bfd.nlp.common.util.constants.MediaTypes; import io.swagger.annotations.ApiOperation; import org.slf4j.Logger; @@ -13,6 +15,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.*; +import java.util.ArrayList; import java.util.List; @Controller @@ -23,6 +26,8 @@ public class SearchAuthorController { @Autowired private SearchAuthorService searchAuthorService; + @Autowired + private SearchDataService searchDataService; /** * 查询用户信息数据列表 @@ -34,8 +39,15 @@ public class SearchAuthorController { logger.info("[queryAuthors] partial / Params: {}", JSONObject.toJSONString(queryRequest)); JSONObject result; try { - result = searchAuthorService.queryAuthorList(queryRequest); - Integer allDocNumber = result.getIntValue("AllDocNumber"); + String scorllId = queryRequest.getScrollId(); + if(null != scorllId){ // 这个是导出要用的 + result = searchDataService.exportDataInSubjectIndex(queryRequest); + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + }else{ // 这个是查询 + result = searchAuthorService.queryAuthorList(queryRequest); + } + + Integer allDocNumber = result.getIntValue(ESConstant.ALLDOCNUMBER); Integer limit = queryRequest.getLimit(); Integer page = 1; if(allDocNumber%limit==0){ @@ -71,6 +83,7 @@ public class SearchAuthorController { return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } + @RequestMapping(value="/subject/queryOneAuthor",method= RequestMethod.GET) @ResponseBody public JSONObject queryAuthor(String subjectId,String authorId,String siteId) { @@ -106,7 +119,4 @@ public class SearchAuthorController { } return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } - - - } diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java index b012661..25465be 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/SearchDataController.java @@ -1,11 +1,17 @@ package com.bfd.mf.controller; import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.common.util.constants.ESConstant; +import com.bfd.mf.common.web.repository.mysql.base.SiteRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.service.SearchDataService; import com.bfd.mf.common.util.enums.RTCodeEnum; import com.bfd.mf.common.web.component.wrapper.ResponseWrapper; +import com.bfd.mf.service.UpdateService; import com.bfd.nlp.common.util.encryption.MD5; +import io.swagger.annotations.Api; +import io.swagger.annotations.ApiImplicitParam; +import io.swagger.annotations.ApiImplicitParams; import org.apache.tomcat.util.security.MD5Encoder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -17,21 +23,27 @@ import com.bfd.nlp.common.util.constants.MediaTypes; import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; @Controller @RequestMapping("/crawl") -@ResponseBody +@Api(value="数据查询的控制器") public class SearchDataController { private static final Logger logger = LoggerFactory.getLogger(SearchDataController.class); @Autowired private SearchDataService searchDataService; + @Autowired + private SiteRepository siteRepository; + @Autowired + private UpdateService updateService; + /** * 查询数据列表 */ - @ApiOperation(value = "查询数据列表", httpMethod = "POST") - @PostMapping(value = "/subject/query", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @ApiOperation(value = "查询数据列表") + @RequestMapping(value = "/subject/query", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) @ResponseBody public JSONObject queryDataList(@RequestBody QueryRequest queryRequest) { logger.info("[queryDataList] partial / Params: {}", JSONObject.toJSONString(queryRequest)); @@ -44,28 +56,29 @@ public class SearchDataController { if(subjectId.equals("all") || subjectId.contains(",")){ // 全局数据导出 result = searchDataService.exportDataInDateIndex(queryRequest); }else if(subjectId.equals("")){ // 没有专题 - result.put("foldDocAllNumber",0L); - result.put("monitorLists",new ArrayList<>()); + result.put(ESConstant.ALLDOCNUMBER,0L); + result.put(ESConstant.MONITORLISTS,new ArrayList<>()); }else{ // 专题数据导出 - Integer searchType = queryRequest.getSearchType(); - if(searchType == 0) { - result = searchDataService.exportDataInSubjectIndexTestGroupBy(queryRequest); - }else{ - result = searchDataService.exportDataInSubjectIndex(queryRequest); - } + //Integer searchType = queryRequest.getSearchType(); +// if(searchType == 0) { +// result = searchDataService.exportDataInSubjectIndexTestGroupBy(queryRequest); +// }else{ + result = searchDataService.exportDataInSubjectIndex(queryRequest); + // } } return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); }else { if (subjectId.equals("all") || subjectId.contains(",")) {// 全局数据查询 result = searchDataService.queryDataList(queryRequest); } else if (subjectId.equals("")) { // 没有专题 - result.put("foldDocAllNumber", 0L); - result.put("monitorLists", new ArrayList<>()); - } else { // 专题数据查询 + result.put(ESConstant.ALLDOCNUMBER, 0L); + result.put(ESConstant.MONITORLISTS, new ArrayList<>()); + } else { + // 专题数据查询 result = searchDataService.queryDataInOneIndex(queryRequest); } } - Integer allDocNumber = result.getIntValue("foldDocAllNumber"); + Integer allDocNumber = result.getIntValue(ESConstant.ALLDOCNUMBER); Integer limit = queryRequest.getLimit(); Integer page = 1; if(allDocNumber%limit==0){ @@ -73,8 +86,10 @@ public class SearchDataController { }else{ page = allDocNumber/limit +1; } - if(page >0 && queryRequest.getPage() > page){ - return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配"); + if(null != queryRequest.getPage() && !queryRequest.getPage().equals("")) { + if (page > 0 && queryRequest.getPage() > page) { + return ResponseWrapper.buildResponse(RTCodeEnum.C_SUBJECT_GRAMMAR_ERROR, "总数和分页不匹配"); + } } } catch (Exception e) { logger.error("[queryData] Failed,The error message is :{}", e); @@ -83,9 +98,17 @@ public class SearchDataController { return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } - - @RequestMapping(value="/subject/getInfoByDocId",method=RequestMethod.GET) + /** + * 根据ID 查询 一条数据详情 + */ @ResponseBody + @RequestMapping(value="/subject/getInfoByDocId",method=RequestMethod.GET) + @ApiOperation(value = "查询单条数据") + @ApiImplicitParams({ + @ApiImplicitParam(paramType="query", name = "subjectId", value = "专题ID", required = true, dataType = "String"), + @ApiImplicitParam(paramType="query", name = "docId", value = "主贴唯一ID", required = true, dataType = "String"), + @ApiImplicitParam(paramType="query", name = "siteId", value = "站点ID", required = true, dataType = "String"), + }) public JSONObject getInfo(String subjectId,String docId,String siteId) { QueryRequest queryRequest = new QueryRequest(); queryRequest.setSubjectId(subjectId); @@ -102,9 +125,13 @@ public class SearchDataController { return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } - @ApiOperation(value = "查询数据列表", httpMethod = "POST") - @PostMapping(value = "/subject/getInfoByDocId", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + + /** + * 根据ID 查询 一条数据详情 + */ @ResponseBody + @ApiOperation(value = "查询单条数据") + @RequestMapping(value = "/subject/getInfoByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject getInfoByDocId(@RequestBody QueryRequest queryRequest) { logger.info("[getInfoByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); JSONObject result; @@ -118,8 +145,12 @@ public class SearchDataController { } - @ApiOperation(value = "查询数据列表", httpMethod = "POST") - @PostMapping(value = "/getCommentsByDocId", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + /** + * 查询一条数据对应的 评论列表 + */ + @ResponseBody + @ApiOperation(value = "查询评论列表") + @RequestMapping(value = "/getCommentsByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) public JSONObject getCommentsByDocId(@RequestBody QueryRequest queryRequest) { logger.info("[getCommentsByDocId] partial / Params: {}", JSONObject.toJSONString(queryRequest)); JSONObject result; @@ -133,34 +164,36 @@ public class SearchDataController { } - @ApiOperation(value = "查询数据列表", httpMethod = "POST") - @PostMapping(value = "/getCountByCrawlDataFlag", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) - public JSONObject getCountByCrawlDataFlag(@RequestBody QueryRequest queryRequest) { - logger.info("[getCountByCrawlDataFlag] partial / Params: {}", JSONObject.toJSONString(queryRequest)); - JSONObject result; + + @ApiOperation(value = "查询数据列表") + @RequestMapping(value = "/subject/queryCounts", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @ResponseBody + public JSONObject queryDataCounts(@RequestBody QueryRequest queryRequest) { + logger.info("[queryDataCounts] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + JSONObject result = new JSONObject(); try { - result = searchDataService.queryCountByCrawlDataFlag(queryRequest); + result = searchDataService.queryDataCountsInOneIndex(queryRequest); } catch (Exception e) { - logger.error("[getCommentsByDocId] Failed,The error message is :{}", e); + logger.error("[queryDataCounts] Failed,The error message is :{}", e); return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); } return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); } -// @ApiOperation(value = "查询", httpMethod = "POST") -// @PostMapping(value = "/subject/getCount", consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) -// @ResponseBody -// public JSONObject getCount(@RequestBody QueryRequest queryRequest) { -// logger.info("[getCount] partial / Params: {}", JSONObject.toJSONString(queryRequest)); -// JSONObject result = new JSONObject(); -// try { -// result = searchDataService.queryCountBySubjectIds(queryRequest,result); -// } catch (Exception e) { -// logger.error("[getCount] Failed,The error message is :{}", e); -// return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); -// } -// return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); -// } + @ApiOperation(value = "修改标签") + @RequestMapping(value = "/update/updateByDocId", method = RequestMethod.POST, consumes = MediaTypes.JSON_UTF_8, produces = MediaTypes.JSON_UTF_8) + @ResponseBody + public JSONObject updateLabel(@RequestBody QueryRequest queryRequest) { + logger.info("[updateLabel] partial / Params: {}", JSONObject.toJSONString(queryRequest)); + JSONObject result = new JSONObject(); + try { + result = updateService.updateByDocId(queryRequest); + } catch (Exception e) { + logger.error("[updateLabel] Failed,The error message is :{}", e); + return ResponseWrapper.buildResponse(RTCodeEnum.C_SERVICE_NOT_AVAILABLE, "Query failed"); + } + return ResponseWrapper.buildResponse(RTCodeEnum.C_OK, result); + } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java b/cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java index 1627988..fdc18d2 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java +++ b/cl_search_api/src/main/java/com/bfd/mf/controller/UploadExcelController.java @@ -1,13 +1,9 @@ package com.bfd.mf.controller; -import com.alibaba.druid.support.json.JSONUtils; import com.alibaba.fastjson.JSONObject; -import com.bfd.mf.common.util.enums.RTCodeEnum; -import com.bfd.mf.common.web.component.wrapper.ResponseWrapper; -import com.bfd.mf.common.web.vo.params.QueryRequest; -import com.bfd.mf.service.SearchDataService; +import com.bfd.mf.common.util.ZipUtils; +import com.bfd.mf.config.BFDApiConfig; import com.bfd.mf.service.UploadExcelService; -import com.bfd.nlp.common.util.constants.MediaTypes; import io.swagger.annotations.ApiOperation; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -15,14 +11,9 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.*; import org.springframework.web.multipart.MultipartFile; -import org.springframework.web.multipart.MultipartHttpServletRequest; import java.io.File; -import java.io.FileOutputStream; -import java.io.InputStream; -import java.io.OutputStream; import java.math.BigInteger; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -33,15 +24,20 @@ import java.util.Map; public class UploadExcelController { private static final Logger logger = LoggerFactory.getLogger(UploadExcelController.class); @Autowired + private BFDApiConfig bfdApiConfig; + @Autowired private UploadExcelService uploadExcelService; + /** * 上传Excel */ @ApiOperation(value = "上传Excel", httpMethod = "POST") @PostMapping(value = "/SubjectsExcel") @ResponseBody - public JSONObject insertExcelTask(@RequestParam("file") MultipartFile file,@RequestParam("userId") String userId) { + public JSONObject insertExcelTask(@RequestParam("file") MultipartFile file, + @RequestParam("userId") String userId) { + logger.info("[insertExcelTask] partial / Params: {}", userId); JSONObject jsonObject =new JSONObject(); try { Map userinfo = new HashMap<>(); @@ -50,64 +46,158 @@ public class UploadExcelController { // MultipartFile file = request.getFile("file"); // 先查询一下Excel 名是不是已经存在,要是已经存在的话就提示让修改; String excelName = file.getOriginalFilename(); - boolean isExist = queryByExcelName(excelName); - boolean isTaskExist = queryByStatus(); + if(excelName.contains("xlsx")){ // 传的是Excel + jsonObject = aboutExcel(excelName,file,userinfo,jsonObject); + } + } catch (Exception e) { + e.printStackTrace(); + jsonObject.put("message","lalalalaal 报错了"); + } + return jsonObject; + } + +// private JSONObject aboutTxt(String excelName, MultipartFile file, Map userinfo, JSONObject jsonObject) { +// try { +// String fileName = file.getOriginalFilename(); +// String path = bfdApiConfig.getUploadOLYExcelPath(); +// boolean flag = uploadExcelService.uploadExcel(file, path); +// // 查一下Excel 解析的表中这个任务是否解析成功 +// Thread.sleep(3000); +// if (flag) { +// String subjectId = uploadExcelService.createSubject(excelName,userinfo); +// // 将数据写入导ES中 +// // uploadExcelService.uploadData(subjectId, path + fileName); +// jsonObject.put("code",200); +// } +// }catch (Exception e){ +// e.printStackTrace(); +// jsonObject.put("code",201); +// } +// return jsonObject; +// } + + private JSONObject aboutExcel(String excelName, MultipartFile file, Map userinfo, JSONObject jsonObject) { + try{ + boolean isExist = uploadExcelService.queryByExcelName(excelName); + boolean isTaskExist = uploadExcelService.queryByStatus(); if(isExist){ - jsonObject.put("code", 202); - // jsonObject.put("message", "同名Excel已存在,请改名后重新上传,谢谢!"); + jsonObject.put("code", 202); //同名Excel已存在,请改名后重新上传,谢谢! }else if(!isTaskExist){ - jsonObject.put("code", 203); - //jsonObject.put("message", "当前正在运行任务数超过5个,请稍后再尝试上传,谢谢!"); + jsonObject.put("code", 203); //当前正在运行任务数超过5个,请稍后再尝试上传,谢谢! }else{ String filePath = "/opt/nfsdata/excelTask/"; - boolean flag = uploadExcel(file, filePath); - if (flag) { - uploadExcelService.uploadExcel(excelName, userinfo); + boolean flag = uploadExcelService.uploadExcel(file, filePath); + if (flag) { // 上传成功后,在 cl_parse_excel_task 表中添加对应的记录,每个表格一条记录 + uploadExcelService.insertParseExcelTask(excelName, userinfo); } // 查一下Excel 解析的表中这个任务是否解析成功 Thread.sleep(3000); - boolean taskUploadSuccess = queryByExcelName(excelName); + boolean taskUploadSuccess = uploadExcelService.queryByExcelName(excelName); if(taskUploadSuccess) { // 既然插入成功,那就获取对应的 专题ID ,插入 cl_subject_count; List subjectIds = uploadExcelService.getSubjectIdsByExcelName(excelName); - jsonObject.put("code", 200); - // jsonObject.put("message", "恭喜你,上传成功啦~"); + jsonObject.put("code", 200); //恭喜你,上传成功啦~ }else{ - jsonObject.put("code", 201); - // jsonObject.put("message", "Excel解析失败,请检查Excel."); + jsonObject.put("code", 201); //Excel解析失败,请检查Excel } } - } catch (Exception e) { + + }catch (Exception e){ e.printStackTrace(); - jsonObject.put("message","lalalalaal 报错了"); } return jsonObject; } + /** - * 上传Excel相关 + * 上传zip 需要判断解压后的数据是否符合要求, */ - private boolean queryByExcelName(String excelName) { - try{ - String newExcelName = excelName.replace(".xlsx",""); - boolean success = uploadExcelService.isTaskSucess(newExcelName); - if(success){ - return true; + @ApiOperation(value = "上传数据", httpMethod = "POST") + @PostMapping(value = "/UploadData") + @ResponseBody + public JSONObject uploadData(@RequestParam("file") MultipartFile file, + @RequestParam("userId") String userId, + @RequestParam("user") String user, + @RequestParam("subjectId") String subjectId, + @RequestParam("fileRemak") String fileRemak) { + logger.info("[insertExcelTask] partial / Params: {}", subjectId+" , "+fileRemak); + JSONObject jsonObject =new JSONObject(); + String zipPath = bfdApiConfig.getUploadZipPath(); + try { + Map userinfo = new HashMap<>(); + userinfo.put("user",user); + userinfo.put("userId",userId); + String zipName = file.getOriginalFilename(); + // 将文件上传到指定路径下,并返回是否上传成功的状态位。 + boolean flag = uploadExcelService.uploadExcel(file, zipPath); + String zipFileName = zipName.replace(".zip",""); + if (flag) { + long fileSize = ZipUtils.getFileSize(zipPath+zipName); + if(fileSize < 1024){ + jsonObject.put("code", 205); + jsonObject.put("desc", "上传的文件为空,请核查文件。"); + return jsonObject; + } +// // 解压zip ,校验数据,非excel 的,非txt 的都需要提示 + Map> fileNameMap = ZipUtils.unZip(new File(zipPath+zipName),zipPath+zipFileName); + if(fileNameMap.containsKey("fileName")) { + String fileName = fileNameMap.get("fileName").get(0); + logger.info("The FileName :" + fileName); + // 获取一下文件的大小 + if (!fileName.contains(".xlsx") && !fileName.contains(".txt")) { + jsonObject.put("code", 204); + jsonObject.put("desc", "请上传 Excel 或 txt 文件"); + } else { // 需要在 cl_task 表中添加一个任务 + boolean insertSuccess = uploadExcelService.insertTask(subjectId, user, userId, fileRemak, zipName); + jsonObject.put("code", 200); + jsonObject.put("desc","OK"); + } + fileNameMap.remove("fileName"); + }else{ + jsonObject.put("code", 204); + jsonObject.put("desc", "请上传 Excel 或 txt 文件"); + } + + }else{ + jsonObject.put("code",206); + jsonObject.put("desc","上传失败"); } - return false; - }catch (Exception e){ + + } catch (Exception e) { e.printStackTrace(); - return false; + jsonObject.put("code",206); + jsonObject.put("desc","上传失败"); } + return jsonObject; } - /** - * 上传Excel相关 - */ -// private boolean queryByExcelName(String projectName) { + + + + +// /** +// * 上传Excel相关 +// */ +// private boolean queryByExcelName(String excelName) { // try{ -// projectName = projectName.replace(".xlsx",""); -// boolean isExist = uploadExcelService.isExcelExist(projectName); -// if(isExist){ +// String newExcelName = excelName.replace(".xlsx",""); +// boolean success = uploadExcelService.isTaskSucess(newExcelName); +// if(success){ +// return true; +// } +// return false; +// }catch (Exception e){ +// e.printStackTrace(); +// return false; +// } +// } +// +// /** +// * 上传Excel相关 +// */ +// private boolean queryByStatus() { +// try{ +// boolean isExist = uploadExcelService.isTaskExist(); +// if(isExist){ // 如果任务为空,就说明可以添加新的任务进来,如果不为空,就不要添加新的任务进来啦~ // return true; // }else{ // return false; @@ -117,40 +207,24 @@ public class UploadExcelController { // return false; // } // } - /** - * 上传Excel相关 - */ - private boolean queryByStatus() { - try{ - boolean isExist = uploadExcelService.isTaskExist(); - if(isExist){ // 如果任务为空,就说明可以添加新的任务进来,如果不为空,就不要添加新的任务进来啦~ - return true; - }else{ - return false; - } - }catch (Exception e){ - e.printStackTrace(); - return false; - } - } - /** - * 上传Excel相关 - */ - private boolean uploadExcel(MultipartFile file,String filePath) { - try{ - InputStream inputStream = file.getInputStream(); - byte[] buffer = new byte[inputStream.available()]; - inputStream.read(buffer); - File targetFile = new File(filePath+file.getOriginalFilename()); - OutputStream outStream = new FileOutputStream(targetFile); - outStream.write(buffer); - inputStream.close(); - outStream.close(); - return true; - }catch (Exception e){ - e.printStackTrace(); - return false; - } - } +// /** +// * 上传Excel相关 +// */ +// private boolean uploadExcel(MultipartFile file,String filePath) { +// try{ +// InputStream inputStream = file.getInputStream(); +// byte[] buffer = new byte[inputStream.available()]; +// inputStream.read(buffer); +// File targetFile = new File(filePath+file.getOriginalFilename()); +// OutputStream outStream = new FileOutputStream(targetFile); +// outStream.write(buffer); +// inputStream.close(); +// outStream.close(); +// return true; +// }catch (Exception e){ +// e.printStackTrace(); +// return false; +// } +// } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java index 72bedc1..0b4dc0d 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAnalysisService.java @@ -1,27 +1,25 @@ package com.bfd.mf.service; -import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.common.service.es.ClusterService; +import com.bfd.mf.common.service.es.EsQueryServiceForSQMini; +import com.bfd.mf.common.service.es.SubjectQueryDataService; import com.bfd.mf.common.util.analysis.DataAnalysisUtils; -import com.bfd.mf.common.util.analysis.DateTrendUtils; import com.bfd.mf.common.util.constants.ConditionCommon; import com.bfd.mf.common.util.constants.ESConstant; import com.bfd.mf.common.util.slice.SliceScrollUtil; -import com.bfd.mf.common.util.spread.SpreadServiceUtil; +import com.bfd.mf.common.util.utility.DateUtil; import com.bfd.mf.common.web.vo.params.QueryRequest; -import com.bfd.mf.common.web.vo.view.analysis.DataLineCount; +import com.bfd.mf.common.web.vo.view.analysis.DataCount; import com.bfd.mf.common.web.vo.view.analysis.DataPieCount; -import com.bfd.mf.common.web.vo.view.monitor.ESMonitorBaseEntity; import com.bfd.mf.common.web.vo.view.monitor.ESMonitorEntity; import com.bfd.nlp.common.util.object.TObjectUtils; -import com.bfd.mf.common.util.es.MonitorConstant; -import net.logstash.logback.encoder.org.apache.commons.lang.exception.ExceptionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import springfox.documentation.spring.web.json.Json; -import java.text.ParseException; import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -29,168 +27,219 @@ import java.util.stream.IntStream; @Service public class SearchAnalysisService { private static Logger logger = LoggerFactory.getLogger(SearchAnalysisService.class); + Long ONEYEAR = 60*60*24*365L; + @Autowired private SliceScrollUtil sliceScrollUtil; + @Autowired + private SearchKeywordsCouldService searchKeywordsCouldService; + @Autowired + private ClusterService clusterService; + @Autowired + private SubjectQueryDataService subjectQueryDataService; + @Autowired + private EsQueryServiceForSQMini esQueryServiceForSQMini; public JSONObject getAnalysisResponse(QueryRequest queryRequest) { JSONObject jsonObject = new JSONObject(); try{ - List esMonitorEntity = sliceScrollUtil.fetchResultSubjectCache(queryRequest, ESConstant.FIELD_LIST_ANALYSIS); - jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest, esMonitorEntity); - jsonObject = DataAnalysisUtils.fetchPieAndCountNumberByLineJson(jsonObject); + List esMonitorEntity = sliceScrollUtil. + fetchResultSubjectCache(queryRequest, ESConstant.FIELD_LIST_ANALYSIS); + // 渠道走势 + jsonObject = dataAnalysisTrendByDayQueryTimes(queryRequest,esMonitorEntity); + // 获取 渠道统计结果 分类标签统计结果 价值标签统计结果 + jsonObject = dataAnalysisChannelCounts(jsonObject,esMonitorEntity); + // 获取词云 + JSONObject cloudCounts = searchKeywordsCouldService.dataAnalysisCloud(esMonitorEntity); + jsonObject.put("cloudCounts",cloudCounts); }catch (Exception e){ e.printStackTrace(); } return jsonObject; } +// private JSONObject getLabelStatics(JSONObject jsonObject, List esMonitorEntityList) { +// List> valueLabelTrend = new ArrayList<>(); +// List> categoryLabelTrand = new ArrayList<>(); +// Map valueLabelMap = new HashMap<>(); +// Map categoryLanelMap = new HashMap<>(); +// for (ESMonitorEntity esMonitorEntity : esMonitorEntityList) { +// Integer valueLabel = esMonitorEntity.getValueLabel(); +// String categoryLabel = esMonitorEntity.getCategoryLabel(); +// if(valueLabelMap.containsKey(valueLabel)){ +// long count = valueLabelMap.get(valueLabel); +// valueLabelMap.put(valueLabel,count+1); +// }else{ +// valueLabelMap.put(valueLabel,1L); +// } +// if(categoryLanelMap.containsKey(categoryLabel)){ +// long count = categoryLanelMap.get(categoryLabel); +// categoryLanelMap.put(categoryLabel,count+1); +// }else{ +// categoryLanelMap.put(categoryLabel,1L); +// } +// } +// valueLabelMap.remove(null); +// categoryLanelMap.remove(null); +// valueLabelTrend.add(valueLabelMap); +// categoryLabelTrand.add(categoryLanelMap); +// System.out.println("-----" + JSONObject.toJSONString(valueLabelTrend)); +// System.out.println("=====" + JSONObject.toJSONString(categoryLabelTrand)); +// jsonObject.put("valueLabelTrend",valueLabelTrend); +// jsonObject.put("categoryLabelTrand",categoryLabelTrand); +// return jsonObject; +// } + + private JSONObject dataAnalysisChannelCounts(JSONObject jsonObject, List esMonitorEntityList) { + Map> channelMaps = new HashMap<>(); + Map> valueMaps = new HashMap<>(); + Map> categoryMaps = new HashMap<>(); + logger.info("dataAnalysisChannelCounts : totalNumber = " + esMonitorEntityList.size()); + Map channelMap = new HashMap<>(); + Map valueLabelMap = new HashMap<>(); + Map categoryLabelMap = new HashMap<>(); + for (ESMonitorEntity esMonitorEntity : esMonitorEntityList) { + String docType = esMonitorEntity.getDocType(); // 页面类型 + String valueLabel = esMonitorEntity.getValueLabel(); // 价值标签 + String categoryLabel = esMonitorEntity.getCategoryLabel();// 分类标签 + channelMap.put(docType,getDocTypeNum(docType)); + valueLabelMap.put(valueLabel,valueLabel); + categoryLabelMap.put(categoryLabel,categoryLabel); + if(null != docType) { + channelMaps = getMaps(channelMaps, docType); + } + if(null != valueLabel) { + valueMaps = getMaps(valueMaps, valueLabel); + if(valueMaps.containsKey("")){ + valueMaps.remove(""); + } + } + if(null != categoryLabel) { + categoryMaps = getMaps(categoryMaps, categoryLabel); + } + + } + List> resultList1 = getResultList(channelMaps,channelMap); + List> resultList2 = getResultList(valueMaps,valueLabelMap); + List> resultList3 = getResultList(categoryMaps,categoryLabelMap); + + // System.out.println(JSONObject.toJSONString("111 "+resultList1)); +// System.out.println(JSONObject.toJSONString("222" + resultList2)); +// System.out.println(JSONObject.toJSONString("333" + resultList3)); + jsonObject.put("channelDistributed",resultList1); + jsonObject.put("valueLabelTrend",resultList2); + jsonObject.put("categoryLabelTrand",resultList3); + + return jsonObject; + } + + private Map> getMaps(Map> maps,String staticsType) { + if (maps.containsKey(staticsType)) { + List list = maps.get(staticsType); + boolean flag = false; + for (DataPieCount dataPieCount : list) { + if (dataPieCount.getName().equals(staticsType)) { + long value = dataPieCount.getValue() + 1; + dataPieCount.setValue(value); + flag = true; + } + } + if (!flag) { + DataPieCount dataPieCount = new DataPieCount(staticsType, 1L); + list.add(dataPieCount); + } + } else { + List list = new ArrayList<>(); + DataPieCount dataPieCount = new DataPieCount(staticsType, 1L); + list.add(dataPieCount); + maps.put(staticsType, list); + } + return maps; + } + + + private List> getResultList(Map> maps,Map nameMap) { + + List> resultList = new ArrayList<>(); + for(Map.Entry> entry : maps.entrySet()){ + String docType = entry.getKey(); + Map resultMap = new HashMap<>(); + long value = maps.get(docType).get(0).getValue(); + resultMap.put("name",nameMap.get(docType)); + resultMap.put("value",value); + resultList.add(resultMap); + } + return resultList; + } + + private JSONObject dataAnalysisTrendByDayQueryTimes(QueryRequest queryRequest, List esMonitorEntityList) { JSONObject jsonResult = new JSONObject(); logger.info("dataAnalysisTrendByDayQueryTimes : totalNumber = " + esMonitorEntityList.size()); jsonResult.put("totalNumber", esMonitorEntityList.size()); - Map emotionEngMaps = MonitorConstant.emotionEngByThresholdMaps(); + // Map emotionEngMaps = MonitorConstant.emotionEngByThresholdMaps(); try { // 按发布时间排序 esMonitorEntityList.sort((o1, o2) -> { // 发布时间相同的 return o1.getPubTime().compareTo(o2.getPubTime()) == 0 ? o1.getCrawlTime().compareTo(o2.getCrawlTime()) : o1.getPubTime().compareTo(o2.getPubTime()); }); - + logger.info("总数据条数: "+esMonitorEntityList.size()); Long startTime = queryRequest.getStartTime(); Long endTime = queryRequest.getEndTime(); - if(startTime == 0 && endTime == 0){ - if(queryRequest.getStartTime() == 0 && queryRequest.getEndTime() == 0) { - System.out.println(esMonitorEntityList.size() + "| " + "开始时间:"+esMonitorEntityList.get(0).getPubTime() + "| "+"结束时间:"+esMonitorEntityList.get(esMonitorEntityList.size() - 1).getPubTime()); - startTime = esMonitorEntityList.get(0).getPubTime(); - if(esMonitorEntityList.get(0).getPubTime() < 1577808000000L){ - startTime = 1577808000000L; - } - endTime = esMonitorEntityList.get(esMonitorEntityList.size() - 1).getPubTime(); - } -// Calendar c = Calendar.getInstance(); -// endTime = new Date().getTime(); -// c.setTime(new Date()); -// c.add(Calendar.YEAR, -1); -// startTime = c.getTime().getTime(); + Long time_difference = 0L; + if(startTime == 0 && endTime == 0){ // 说明查的是专题,需要整个时间范围内的数据哦 + // 一天是 86400 ,如果时间差 < 86400000 就拆小时,如果时间差 >86400000 < 2678400000 拆天, 如果时间差 > 2678400000 就拆月 + time_difference = (esMonitorEntityList.get(esMonitorEntityList.size() - 1).getPubTime()- esMonitorEntityList.get(0).getPubTime())/1000; + //System.out.println("时间差:" + (esMonitorEntityList.get(esMonitorEntityList.size() - 1).getPubTime()- esMonitorEntityList.get(0).getPubTime())); + System.out.println("数据条数 : "+esMonitorEntityList.size() + "| " + "时间范围:(开始时间:"+esMonitorEntityList.get(0).getPubTime() + "| "+"结束时间:"+esMonitorEntityList.get(esMonitorEntityList.size() - 1).getPubTime() + ")"); + startTime = esMonitorEntityList.get(0).getPubTime(); + endTime = esMonitorEntityList.get(esMonitorEntityList.size() - 1).getPubTime(); } - - // 按小时 时间序列 - Long[] timeList = DataAnalysisUtils.getTimeList(startTime, endTime, ConditionCommon.HOUR); - // 字符时间区间 - String[] timeStringList = SpreadServiceUtil.convertToTimeStringList(timeList, ConditionCommon.HOUR); - // 数据遍历统计每个渠道时间段的总数 - // 每个渠道时间段统计 - Map> channelMaps = DataAnalysisUtils.fetchPerChannelCountByListData(esMonitorEntityList, timeList, ConditionCommon.HOUR); - // 结果 - ArrayList arrayList = new ArrayList<>(Arrays.asList(timeList)); - // 时间区间 每个渠道的统计结果 - Map> stringListMap = parseTrendDataResult(channelMaps, arrayList, getChannelMap()); - // 各个时间段的统计结果 - Long[] docNumList = DataAnalysisUtils.fetchPerTimeByPerChannelCountResult(stringListMap, timeList); - // 小时为 区间的情感值统计 - // 时间区间 时间点、每个时间点的统计结果、每个时间点按渠道统计的结果 - jsonResult.put("abscissa", timeStringList); - jsonResult.put("total", docNumList); - jsonResult.put("channels", stringListMap); - - // 情感相关的统计 - Map> emotionHourMaps = fetchPerEmotionCountByData( - esMonitorEntityList, - timeList, - ConditionCommon.HOUR); - Map> emotionMaps = new HashMap<>(); - emotionMaps.put("0.0-0.2", new ArrayList<>()); - emotionMaps.put("0.2-0.8", new ArrayList<>()); - emotionMaps.put("0.8-1.0", new ArrayList<>()); - Map> commonMaps = new HashMap<>(); - commonMaps.put(ESConstant.COMMON_TAG, emotionHourMaps.get(ESConstant.COMMON_TAG)); - Map> neuterMaps = new HashMap<>(); - neuterMaps.put(ESConstant.NEUTER_TAG, emotionHourMaps.get(ESConstant.NEUTER_TAG)); - Map> negativeMaps = new HashMap<>(); - negativeMaps.put(ESConstant.NEGATIVE_TAG, emotionHourMaps.get(ESConstant.NEGATIVE_TAG)); - emotionHourMaps.remove(ESConstant.NEUTER_TAG); - emotionHourMaps.remove(ESConstant.COMMON_TAG); - emotionHourMaps.remove(ESConstant.NEGATIVE_TAG); - // 负面 - DataAnalysisUtils.parseTimeSeries(negativeMaps, emotionMaps, ESConstant.NEGATIVE_TAG, channelMaps); - // 中性 - DataAnalysisUtils.parseTimeSeries(neuterMaps, emotionMaps, ESConstant.NEUTER_TAG, channelMaps); - // 正面 - DataAnalysisUtils.parseTimeSeries(commonMaps, emotionMaps, ESConstant.COMMON_TAG, channelMaps); - // 时间区间 情感 正、中、负 分别按时间点统计 - Map> emotionCountMap = parseTrendDataResult(emotionMaps, arrayList, emotionEngMaps); - jsonResult.put("emotions", emotionCountMap); -// Map channelEmotionMap = fetchPerEmotionCountByChannel(esMonitorEntityList,getChannelMap()); -// jsonResult.put("channelEmotions",channelEmotionMap); - - // 覆盖媒体 - // long mediaNumber = esEntityList.stream().map(ESMonitorBaseEntity::getSource).collect(Collectors.toSet()).size(); - Set mediaSet = new HashSet<>(); - // 站点数量统计 - esMonitorEntityList.forEach(e -> { - if (e.getDocType().equals(ESConstant.WEI_XIN)) { - mediaSet.add(ESConstant.WEI_XIN); - } else { - mediaSet.add(e.getSource()); - } - }); - jsonResult.put("totalMediaNumber", mediaSet.size()); - - // 敏感覆盖媒体 - Set negativeSourceSet = new HashSet<>(); - for (ESMonitorEntity esMonitorEntity : esMonitorEntityList) { -// List emotionEntryList = esMonitorEntity.getEmotionEntry(); - String sentimentTag = getSentimentTagBySubject(esMonitorEntity.getSysSentiment()); - if (sentimentTag.equals(ESConstant.NEGATIVE_TAG)) { - negativeSourceSet.add(esMonitorEntity.getSource()); - } +// System.out.println("时间差: " + time_difference); +// System.out.println("一年: " + ONEYEAR); +// Map> dayChannelMaps = new HashMap<>(); +// Map> yearChannelMaps = new HashMap<>(); + Map docTypeMap = new HashMap<>(); + Long[] timeList = {}; + timeList = DataAnalysisUtils.getTimeList(startTime, endTime, ConditionCommon.HOUR); + Map>> resultMap = DataAnalysisUtils.fetchPerChannelCountByListData(docTypeMap,esMonitorEntityList, timeList, ConditionCommon.HOUR); + Map> dayChannelMaps = resultMap.get("dayChannelMap"); // 每个渠道,每天的数据量 + Map> yearChannelMaps = resultMap.get("yearChannelMap"); + Map> dayEmoMaps = resultMap.get("dayEmoMap"); + Map> yearEmoMaps = resultMap.get("yearEmoMap"); + System.out.println("dayChannelMaps"+JSONObject.toJSONString(dayChannelMaps)); + System.out.println("yearChannelMaps"+JSONObject.toJSONString(yearChannelMaps)); + List dayList = new ArrayList<>(); + List yearList = new ArrayList<>(); + for (Long l:timeList) { + dayList.add(DateUtil.parseDateByday(l)); + yearList.add(DateUtil.parseDateByyear(l)); + } + List newDayList = dayList.stream().distinct().collect(Collectors.toList()); + List newYearList = yearList.stream().distinct().collect(Collectors.toList()); + + Map sentimentMap = new HashMap<>(); + sentimentMap.put("Neutral","0.5"); + sentimentMap.put("Negative","0.1"); + sentimentMap.put("Positive","0.9"); + List channelTrendList = new ArrayList<>(); + List emoTrendList = new ArrayList<>(); + if(time_difference > ONEYEAR){ + System.out.println("按年拆"); + channelTrendList = parseChannleMapsResult(docTypeMap, yearChannelMaps, newYearList); + emoTrendList = parseEmoMapsResult(sentimentMap, yearEmoMaps, newYearList); + }else{ + System.out.println(" 按天拆"); + channelTrendList = parseChannleMapsResult(docTypeMap, dayChannelMaps, newDayList); + emoTrendList = parseEmoMapsResult(sentimentMap, dayEmoMaps, newDayList); } - // 负面结果总量,为了计算NSR - jsonResult.put("negativeMediaNumber", negativeSourceSet.size()); - - // 省份统计 - jsonResult.put(ConditionCommon.PROVINCE, fetchArea(esMonitorEntityList, ConditionCommon.PROVINCE)); - // 市统计 - jsonResult.put(ConditionCommon.CITY, fetchArea(esMonitorEntityList, ConditionCommon.CITY)); - - // 天 - JSONArray dayArr = DateTrendUtils.convertStrTime(timeList, ConditionCommon.DAY); - JSONArray dayCountByHourData = DateTrendUtils.getDayCountByHourData(timeList, dayArr, docNumList); - Map arrayDayMap = new HashMap<>(); - arrayDayMap.put(ESConstant.NEGATIVE_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.NEGATIVE_CN_TAG), timeList, dayArr, ConditionCommon.DAY)); - arrayDayMap.put(ESConstant.COMMON_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.COMMON_CN_TAG), timeList, dayArr, ConditionCommon.DAY)); - arrayDayMap.put(ESConstant.NEUTER_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.NEUTER_CN_TAG), timeList, dayArr, ConditionCommon.DAY)); - jsonResult.put("emotionDay", arrayDayMap); - jsonResult.put("dayArr", dayArr); - jsonResult.put("dayData", dayCountByHourData); - jsonResult.put("channelDay", getEmotionOrChannelTrendTime(stringListMap, timeList, dayArr, ConditionCommon.DAY)); - // 月 - JSONArray monthArr = DateTrendUtils.convertStrTime(timeList, ConditionCommon.MONTH); - JSONArray monthCountByHourData = DateTrendUtils.getMonthCountByHourData(timeList, monthArr, docNumList); - Map arrayMonthMap = new HashMap<>(); - arrayMonthMap.put(ESConstant.NEGATIVE_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.NEGATIVE_CN_TAG), timeList, monthArr, ConditionCommon.MONTH)); - arrayMonthMap.put(ESConstant.COMMON_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.COMMON_CN_TAG), timeList, monthArr, ConditionCommon.MONTH)); - arrayMonthMap.put(ESConstant.NEUTER_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.NEUTER_CN_TAG), timeList, monthArr, ConditionCommon.MONTH)); - jsonResult.put("emotionMonth", arrayMonthMap); - jsonResult.put("monthArr", monthArr); - jsonResult.put("monthData", monthCountByHourData); - jsonResult.put("channelMonth", getEmotionOrChannelTrendTime(stringListMap, timeList, monthArr, ConditionCommon.MONTH)); + // 获取渠道声量走势 - // 年 - JSONArray yearArr = DateTrendUtils.convertStrTime(timeList, ConditionCommon.YEAR); - JSONArray yearCountByHourData = DateTrendUtils.getYearCountByHourData(timeList, monthArr, docNumList); - Map arrayYearMap = new HashMap<>(); - arrayYearMap.put(ESConstant.NEGATIVE_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.NEGATIVE_CN_TAG), timeList, yearArr, ConditionCommon.YEAR)); - arrayYearMap.put(ESConstant.COMMON_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.COMMON_CN_TAG), timeList, yearArr, ConditionCommon.YEAR)); - arrayYearMap.put(ESConstant.NEUTER_CN_TAG, DataAnalysisUtils.getEmotionOrChannelTrendTime(emotionCountMap.get(ESConstant.NEUTER_CN_TAG), timeList, yearArr, ConditionCommon.YEAR)); - jsonResult.put("emotionYear", arrayYearMap); - jsonResult.put("yearArr", yearArr); - jsonResult.put("yearData", yearCountByHourData); - jsonResult.put("channelYear", getEmotionOrChannelTrendTime(stringListMap, timeList, yearArr, ConditionCommon.YEAR)); + jsonResult.put("channelTrend",channelTrendList); + jsonResult.put("emotionTrend",emoTrendList); - // 站点统计 - jsonResult.putAll(getMediaALLCountByData(esMonitorEntityList, queryRequest)); } catch (Exception e) { e.printStackTrace(); logger.error("[ dataAnalysisTrendByDayQueryTimes Data FullFill] - ERROR - 趋势统计 ES 数据为 null,填充所有渠道、情感走势为 0...", e); @@ -201,19 +250,25 @@ public class SearchAnalysisService { /** * 获得 小时为区间 每个渠道 的统计结果:stringListMap */ - private Map> parseTrendDataResult(Map> maps, + private Map> parseTrendDataResult(Map> maps, List lists, Map engToCnMaps) { +// System.out.println("parseTrendDataResult"); +// System.out.println("1111"+JSONObject.toJSONString(lists)); +// System.out.println("2222"+JSONObject.toJSONString(maps)); +// System.out.println("3333"+JSONObject.toJSONString(engToCnMaps)); + + Map> resultMaps = new HashMap<>(); - for (Map.Entry> entry : maps.entrySet()) { + for (Map.Entry> entry : maps.entrySet()) { String key = entry.getKey(); - List valueLists = entry.getValue(); + List valueLists = entry.getValue(); Collections.sort(valueLists); List timeValue = initLists(lists.size(), 0L); - for (DataLineCount dataLineCount : valueLists) { - if (lists.contains(dataLineCount.getTimestamp())) { - int indexSize = lists.indexOf(dataLineCount.getTimestamp()); - timeValue.set(indexSize, dataLineCount.getData()); + for (DataCount dataCount : valueLists) { + if (lists.contains(dataCount.getName())) { + int indexSize = lists.indexOf(dataCount.getName()); + timeValue.set(indexSize, dataCount.getValue()); } } String name = engToCnMaps.get(key); @@ -239,299 +294,75 @@ public class SearchAnalysisService { } /** - * 获得 情感 统计结果 + * 渠道声量走势 */ - private Map> fetchPerEmotionCountByData(List cacheEsMonitorEntityList, - Long[] timeList, int type) { - Map> negativeChannelMaps; - // 情感值初始化 - negativeChannelMaps = DataAnalysisUtils.initEmotionList(timeList); - for (ESMonitorEntity esMonitorEntity : cacheEsMonitorEntityList) { - Double sysSentimentValue = esMonitorEntity.getSysSentiment(); - String sentimentTag = ESConstant.COMMON_TAG; - try { - sentimentTag = getSentimentTagBySubject(sysSentimentValue); - } catch (Exception e) { - logger.error("fetchPerEmotionCountByData error ={}", e.getMessage(), e); - } - DataAnalysisUtils.fetchFilterResultByTimeCompare(negativeChannelMaps, sentimentTag, esMonitorEntity, timeList, type); - } - return negativeChannelMaps; - } - -// private Map fetchPerEmotionCountByChannel(List cacheEsMonitorEntityList, -// Map channelMap) { -// Integer newsCommCount = 0; -// Integer newsNeuterCount = 0; -// Integer newsNegativeCount = 0; -// Integer socialCommCount = 0; -// Integer socialNeuterCount = 0; -// Integer socialNegativeCount = 0; -// Integer videoCommCount = 0; -// Integer videoNeuterCount = 0; -// Integer videoNegativeCount = 0; -// Integer itemCommCount = 0; -// Integer itemNeuterCount = 0; -// Integer itemNegativeCount = 0; -// for (ESMonitorEntity esMonitorEntity : cacheEsMonitorEntityList) { -// String docType = esMonitorEntity.getDocType(); -// Double sysSitiment = esMonitorEntity.getSysSentiment(); -// // 电商类 -// if(docType.equals("item") && sysSitiment == 0.1){ -// itemNegativeCount = itemNegativeCount+1; -// } -// if(docType.equals("item") && sysSitiment == 0.5){ -// itemNeuterCount = itemNeuterCount+1; -// } -// if(docType.equals("item") && sysSitiment == 0.9){ -// itemCommCount = itemCommCount+1; -// } -// // 新闻类 -// if(docType.equals("news") && sysSitiment == 0.1){ -// newsNegativeCount = newsNegativeCount+1; -// } -// if(docType.equals("news") && sysSitiment == 0.5){ -// newsNeuterCount = newsNeuterCount+1; -// } -// if(docType.equals("news") && sysSitiment == 0.9){ -// newsCommCount = newsCommCount+1; -// } -// // 视频类 -// if(docType.equals("video") && sysSitiment == 0.1){ -// videoNegativeCount = videoNegativeCount+1; -// } -// if(docType.equals("video") && sysSitiment == 0.5){ -// videoNeuterCount = videoNeuterCount+1; -// } -// if(docType.equals("video") && sysSitiment == 0.9){ -// videoCommCount = videoCommCount+1; -// } -// // 社交类 -// if(docType.equals("social") && sysSitiment == 0.1){ -// socialNegativeCount = socialNegativeCount+1; -// } -// if(docType.equals("social") && sysSitiment == 0.5){ -// socialNeuterCount = socialNeuterCount+1; -// } -// if(docType.equals("social") && sysSitiment == 0.9){ -// socialCommCount = socialCommCount+1; -// } -// } -// Map channelEmotionMap = new HashMap<>(); -// Map commMap = new HashMap<>(); -// commMap.put(channelMap.get("social"),socialCommCount); -// commMap.put(channelMap.get("news"),newsCommCount); -// commMap.put(channelMap.get("video"),videoCommCount); -// commMap.put(channelMap.get("item"),itemCommCount); -// Map natureMap = new HashMap<>(); -// natureMap.put(channelMap.get("social"),socialNeuterCount); -// natureMap.put(channelMap.get("news"),newsNeuterCount); -// natureMap.put(channelMap.get("video"),videoNeuterCount); -// natureMap.put(channelMap.get("item"),itemNeuterCount); -// Map negativeMap = new HashMap<>(); -// negativeMap.put(channelMap.get("social"),socialNegativeCount); -// negativeMap.put(channelMap.get("news"),newsNegativeCount); -// negativeMap.put(channelMap.get("video"),videoNegativeCount); -// negativeMap.put(channelMap.get("item"),itemNegativeCount); -// channelEmotionMap.put("正面",commMap); -// channelEmotionMap.put("中性",natureMap); -// channelEmotionMap.put("负面",negativeMap); -// return channelEmotionMap; -// } - - - public String getSentimentTagBySubject(Double sysSentimentValue) { - return getEmotionChByNum(sysSentimentValue); - } - - public static String getEmotionChByNum(Double sysSentimentValue) { - if (sysSentimentValue >= ESConstant.SENTIMENTAL_MID && sysSentimentValue <= ESConstant.SENTIMENTAL_MAX) { - return ESConstant.COMMON_TAG; - } else if (sysSentimentValue > ESConstant.SENTIMENTAL_THRESHOLD && sysSentimentValue <= ESConstant.SENTIMENTAL_MID) { - return ESConstant.NEUTER_TAG; - } else { - return ESConstant.NEGATIVE_TAG; - } - } - - /** - * 城市、地区的统计 - */ - public List fetchArea(List esEntityList, String type) { - List cityLists = new ArrayList<>(); - for (ESMonitorEntity esEntity : esEntityList) { - String name = null; - if (ConditionCommon.PROVINCE.equals(type)) { - // 省 - name = esEntity.getProvince(); - } else if (ConditionCommon.CITY.equals(type)) { - // 市 - name = esEntity.getCity(); + private List parseChannleMapsResult(Map docTypeMap , + Map> maps, + List dayList) { + docTypeMap.remove("null"); + List resultList = new ArrayList<>(); + try { + for (String day : dayList) { + List dataCounts = maps.get(day); // 符合时间的统计结果 name = 日期 type =docType value = 数量 + for (Map.Entry docType : docTypeMap.entrySet()) { + DataCount resultDataCount = new DataCount(); + resultDataCount.setName(day); + resultDataCount.setType(docType.getValue()); + resultDataCount.setValue(0L); + if (null != docType.getKey() && null != dataCounts && dataCounts.size() > 0) { + for (DataCount dataCount : dataCounts) { + String channel = dataCount.getType(); + if (null != docType.getKey() && docType.getKey().equals(channel)) { + resultDataCount.setValue(dataCount.getValue()); + } + } + } + resultList.add(resultDataCount); + } } - fetchAreaByTypeAndName(cityLists, name, type); - } - Collections.sort(cityLists); - return cityLists; - } - - public void fetchAreaByTypeAndName(List cityLists, String name, String type) { -// if (type.equals(ESConstant.WEIBO_USER_PROVINCE) && RegionUtils.provinces.contains(name)) { -// DataAnalysisUtils.countWeibo(name, cityLists, 1L); -// } else if (type.equals(ESConstant.WEIBO_USER_CITY) && RegionUtils.cities.contains(name)) { -// DataAnalysisUtils.countWeibo(name, cityLists, 1L); -// } - - } - - - /** - * 情感、渠道 双统计 - */ - private Map getEmotionOrChannelTrendTime(Map> stringListMap, - Long[] timeList, JSONArray timeArr, - Integer timeType) throws ParseException { - Map arrayMonthMap = new HashMap<>(); - List listKey = new ArrayList<>(stringListMap.keySet()); - for (String key : listKey) { - arrayMonthMap.put(key, DataAnalysisUtils.getEmotionOrChannelTrendTime(stringListMap.get(key), timeList, timeArr, timeType)); + }catch (Exception e){ + e.printStackTrace(); } - return arrayMonthMap; + return resultList; } /** - * 站点统计 + * 获取情感声量走势 主贴分析第一个图 */ - private JSONObject getMediaALLCountByData(List esMonitorEntityList, QueryRequest queryRequest) throws Exception { - int limit = queryRequest.getLimit(); - JSONObject mediaCountByData = getMediaCountByData(esMonitorEntityList, limit); - Map> mapCounts = (Map>) mediaCountByData.get(ConditionCommon.MEDIA_COUNTS); - List channelNameList = getChannels(); - List entryKey = new ArrayList<>(mapCounts.keySet()); - List allList = new LinkedList<>(); - for (String key : entryKey) { - switch (key) { - case ESConstant.VIDEO: - allList.addAll(mapCounts.get(key)); - break; - case ESConstant.NEWS: - allList.addAll(mapCounts.get(key)); - break; - case ESConstant.ITEM: - allList.addAll(mapCounts.get(key)); - break; - case ESConstant.SOCIAL: - allList.addAll(mapCounts.get(key)); - break; - case ESConstant.BBS: - allList.addAll(mapCounts.get(key)); - break; - case ESConstant.BLOG: - allList.addAll(mapCounts.get(key)); - break; - case ESConstant.SEARCH: - allList.addAll(mapCounts.get(key)); - break; - case ESConstant.LIFE: - allList.addAll(mapCounts.get(key)); - break; - default: - break; - } - } - - filterMapCountsByChannelIds(mapCounts, channelNameList); - allList.sort(DataPieCount::compareTo); - - mapCounts.put(ConditionCommon.MEDIA_COUNTS_ALL, allList.size() > limit ? allList.subList(0, limit) : allList); - - Map> orderMap = new LinkedHashMap<>(); - orderSite(orderMap, mapCounts, ConditionCommon.MEDIA_COUNTS_ALL, getChannels()); - orderSite(orderMap, mapCounts, ESConstant.SOCIAL, getChannels()); - orderSite(orderMap, mapCounts, ESConstant.NEWS, getChannels()); - orderSite(orderMap, mapCounts, ESConstant.VIDEO, getChannels()); - orderSite(orderMap, mapCounts, ESConstant.ITEM, getChannels()); - orderSite(orderMap, mapCounts, ESConstant.BBS, getChannels()); - orderSite(orderMap, mapCounts, ESConstant.BLOG, getChannels()); - orderSite(orderMap, mapCounts, ESConstant.SEARCH, getChannels()); - orderSite(orderMap, mapCounts, ESConstant.LIFE, getChannels()); - mediaCountByData.clear(); - mediaCountByData.put(ConditionCommon.MEDIA_COUNTS, orderMap); - return mediaCountByData; - } - private void filterMapCountsByChannelIds(Map> mapCounts, List channelNameList) { - List channelNameFilter = new ArrayList<>(); - for (String key : mapCounts.keySet()) { - if (!channelNameList.contains(key)) { - channelNameFilter.add(key); - } - } - for (String channelName : channelNameFilter) { - mapCounts.remove(channelName); - } - } - private JSONObject getMediaCountByData(List cacheEsMonitorEntityList, int topSize) throws Exception { - Map> maps = new HashMap<>(); - JSONObject jsonObject = new JSONObject(); + private List parseEmoMapsResult(Map docTypeMap , + Map> maps, + List dayList) { + List resultList = new ArrayList<>(); try { - for (ESMonitorEntity esMonitorEntity : cacheEsMonitorEntityList) { - String source = ""; - String docType = esMonitorEntity.getDocType(); - if (docType.equals(ESConstant.WEI_XIN)) { - source = ESConstant.CH_WEI_XIN; - } else { - source = esMonitorEntity.getSource(); - } - if (maps.containsKey(docType)) { - List list = maps.get(docType); - boolean flag = false; - for (DataPieCount dataPieCount : list) { - if (dataPieCount.getName().equals(source)) { - long value = dataPieCount.getValue() + 1; - dataPieCount.setValue(value); - flag = true; + for (String day : dayList) { + List dataCounts = maps.get(day); // 符合时间的统计结果 name = 日期 type =sentiment value = 数量 + long allValue = 0L; + for (Map.Entry docType : docTypeMap.entrySet()) { + DataCount resultDataCount = new DataCount(); + resultDataCount.setName(day); + resultDataCount.setType(docType.getValue()); + resultDataCount.setValue(0L); + if (null != dataCounts && dataCounts.size() > 0) { + for (DataCount dataCount : dataCounts) { + String channel = dataCount.getType(); + if (docType.getKey().equals(channel)) { + resultDataCount.setValue(dataCount.getValue()); + allValue = allValue + dataCount.getValue(); + } } } - if (!flag) { - DataPieCount dataPieCount = new DataPieCount(source, 1L, docType); - list.add(dataPieCount); - } - } else { - List list = new ArrayList<>(); - DataPieCount dataPieCount = new DataPieCount(source, 1L, docType); - list.add(dataPieCount); - maps.put(docType, list); + resultList.add(resultDataCount); } + DataCount resultDataCount = new DataCount(); + resultDataCount.setName(day); + resultDataCount.setType("all"); + resultDataCount.setValue(allValue); + resultList.add(resultDataCount); } - for (String str : maps.keySet()) { - List list = maps.get(str); - list.sort((o1, o2) -> { - Long value1 = o1.getValue(); - Long value2 = o2.getValue(); - return ((value2).intValue() - (value1).intValue()); - }); - if (list.size() >= topSize) { - list = list.subList(0, 10); - maps.put(str, list); - } - } - jsonObject.put(ConditionCommon.MEDIA_COUNTS, maps); - return jsonObject; - } catch (Exception e) { - logger.error("getMediaCountByData error = ", ExceptionUtils.getFullStackTrace(e)); - throw new Exception("getMediaCountByData Error", e); - } - } - private void orderSite(Map> orderMap, - Map> mapCounts, - String channel, List subjectChannel) { - if (subjectChannel.contains(channel) || channel.equals(ConditionCommon.MEDIA_COUNTS_ALL) || - (channel.equals(ESConstant.NEWS) )) { - if (mapCounts.containsKey(channel)) { - orderMap.put(channel, mapCounts.get(channel)); - } else { - orderMap.put(channel, new ArrayList<>()); - } + }catch (Exception e){ + e.printStackTrace(); } + return resultList; } @@ -552,16 +383,25 @@ public class SearchAnalysisService { return channelsList; } - private Map getChannelMap() { - Map channelMap = new HashMap<>(); - channelMap.put("social","社交媒体"); - channelMap.put("video","网络视频"); - channelMap.put("news","新闻资讯"); - channelMap.put("blog","博客智库"); - channelMap.put("bbs","论坛贴吧"); - channelMap.put("search","搜索引擎"); - channelMap.put("item","电商"); - channelMap.put("life","生活方式"); - return channelMap; + private String getDocTypeNum(String docType) { + Map channelMap = new HashMap<>(); + channelMap.put("social","0"); + channelMap.put("video","4"); + channelMap.put("news","1"); + channelMap.put("blog","2"); + channelMap.put("bbs","3"); + channelMap.put("search","6"); + channelMap.put("item","5"); + channelMap.put("life","7"); + return channelMap.get(docType); + } + + public static String getSentimentMap(String sentiment) { + Map sentimentMap = new HashMap<>(); + sentimentMap.put("0","Neutral"); + sentimentMap.put("0.0","Negative"); + sentimentMap.put("0.5","Neutral"); + sentimentMap.put("0.9","Positive"); + return sentimentMap.get(sentiment); } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java index d79e444..a157e5f 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchAuthorService.java @@ -14,7 +14,6 @@ import com.bfd.mf.common.web.repository.mysql.SentimentRepository; import com.bfd.mf.common.web.repository.mysql.base.SiteRepository; import com.bfd.mf.common.web.vo.params.QueryRequest; import com.bfd.mf.common.web.vo.view.monitor.ESMonitorEntity; -import com.bfd.mf.config.BFDApiConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -28,7 +27,6 @@ import java.util.*; public class SearchAuthorService extends CrudService implements Serializable { private static Logger logger = LoggerFactory.getLogger(SearchAuthorService.class); private static SimpleDateFormat format = new SimpleDateFormat("YYYY-MM-DD HH:mm:SS"); - private static String subjectPre = "cl_major_"; @Autowired private ClusterService clusterService; @Autowired @@ -60,12 +58,12 @@ public class SearchAuthorService extends CrudServicedataList = esQueryAuthorService.queryAuthorListByKeyword(indexNames,queryRequest); logger.info("[SearchAuthorService] queryAuthorList: TotalCount = " + totalCount); - jsonObject.put("AllDocNumber", totalCount); - jsonObject.put("authorLists", dataList); + jsonObject.put(ESConstant.ALLDOCNUMBER, totalCount); + jsonObject.put(ESConstant.MONITORLISTS, dataList); }catch (Exception e){ logger.error("[SearchAuthorService] queryAuthorList eroor !"); - jsonObject.put("AllDocNumber",0); - jsonObject.put("authorLists", new ArrayList<>()); + jsonObject.put(ESConstant.ALLDOCNUMBER,0); + jsonObject.put(ESConstant.MONITORLISTS, new ArrayList<>()); } return jsonObject; @@ -77,7 +75,8 @@ public class SearchAuthorService extends CrudService dataList = esQueryAuthorService.queryAuthorByAuthorId(indexNames, queryRequest); jsonObject = parseAuthorMessage(dataList); @@ -136,7 +135,8 @@ public class SearchAuthorService extends CrudService dataList = esQueryAuthorService.queryContentsByAuthorId(indexNames, queryRequest); List esMonitorEntityLists = new ArrayList<>(); @@ -149,40 +149,42 @@ public class SearchAuthorService extends CrudService()); + jsonObject.put(ESConstant.MONITORLISTS, new ArrayList<>()); } return jsonObject; } private void parseQueryResult(List dataList, List esMonitorListEntity,String indexName) throws Exception { - List> site = siteRepository.findClusterByDel(0); + List> site = siteRepository.findsiteByDel(0); Map siteIdsMap = new HashMap<>(); + Map siteIconMap = new HashMap<>(); for (Map map: site) { siteIdsMap.put(map.get("cid").toString().toLowerCase(),map.get("site_id").toString()); + siteIconMap.put(map.get("cid").toString().toLowerCase(),map.get("site_icon").toString()); } if(null != dataList && dataList.size() > 0) { for (JSONObject json : dataList) { - ESMonitorEntity mainMonitorEntity = parseMainMessage(json, indexName,siteIdsMap); + ESMonitorEntity mainMonitorEntity = parseMainMessage(json, indexName,siteIdsMap,siteIconMap); esMonitorListEntity.add(mainMonitorEntity); } } } - private ESMonitorEntity parseMainMessage(JSONObject jsonObject,String indexName,Map siteIdsMap) throws Exception { + private ESMonitorEntity parseMainMessage(JSONObject jsonObject,String indexName,Map siteIdsMap,Map siteIconMap) throws Exception { // logger.info("[SearchAuthorService] parseMainMessage ... "); Map sourceAsMap = jsonObject; - // System.out.println(JSONObject.toJSONString(sourceAsMap)); String title = "标题为空"; if(sourceAsMap.containsKey(ESConstant.TITLE)) { title = sourceAsMap.get(ESConstant.TITLE).toString(); } String content = sourceAsMap.get(ESConstant.CONTENT).toString(); Long pubTime = Long.valueOf(sourceAsMap.get(ESConstant.PUBTIME).toString()); - String pubTimeStr = sourceAsMap.get(ESConstant.PUBTIME_STR).toString(); + String pubTimeStr = sourceAsMap.get(ESConstant.PUBTIMESTR).toString(); String source = sourceAsMap.get(ESConstant.SOURCE).toString(); String enSource = sourceAsMap.get(ESConstant.EN_SOURCE).toString(); String siteId = siteIdsMap.get(enSource); + String icon = siteIconMap.get(enSource); String docType = sourceAsMap.get(ESConstant.DOC_TYPE).toString(); String channel = sourceAsMap.get(ESConstant.CHANNEL).toString(); if (docType.equals("social")) { @@ -190,7 +192,7 @@ public class SearchAuthorService extends CrudService> videoList = (List>) newjsonObject.get("videoList"); ESMonitorEntity esMonitorEntity = new ESMonitorEntity(); @@ -251,6 +252,7 @@ public class SearchAuthorService extends CrudService implements Serializable { private static Logger logger = LoggerFactory.getLogger(SearchDataService.class); - private static String subjectPre = "cl_major_"; + + @Autowired + private BFDApiConfig bfdApiConfig; @Autowired private ClusterService clusterService; @Autowired - private ESCommonService esCommonService; + private EsCommonService esCommonService; @Autowired private ESServerUtils esServerUtils; @Autowired - private SearchAuthorService searchAuthorService; - @Autowired - private EsQueryAuthorService esQueryAuthorService; - @Autowired private EsQueryServiceForSQMini esQueryServiceForSQMini; @Autowired private EsQueryServiceForSQNormal esQueryServiceForSQNormal; @@ -69,7 +64,7 @@ public class SearchDataService extends CrudService currentIndexList, -// Cluster cluster,QueryBuilder queryBuilder) { -// if(sortFlag.equals("comment")){ -// sortFlag = "commentsCount"; -// } -// BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().filter(queryBuilder); -// SearchRequestBuilder resultBuilder = esServerUtils -// .buildSearchRequestBuilder(cluster.getId(), currentIndexList) -// .setSize(10) -// .setQuery(boolQueryBuilder) -// .setScroll(TimeValue.timeValueMinutes(8)) -// .addSort(sortFlag, orderFlag.equals(ESConstant.ASC) ? SortOrder.ASC : SortOrder.DESC) -// .setFetchSource(ESConstant.FIELD_LIST_MONITOR_NORMAL, null); -// return resultBuilder.get(); -// } /** * 遍历查询结果调用解析组装方法 1 @@ -114,17 +93,21 @@ public class SearchDataService extends CrudService dataList, List esMonitorListEntity,Integer searchType) throws Exception { - List> site = siteRepository.findClusterByDel(0); - Map siteIdsMap = new HashMap<>(); - for (Map map: site) { - siteIdsMap.put(map.get("cid").toString().toLowerCase(),map.get("site_id").toString()); - } - if(null != dataList && dataList.size() > 0) { - for (JSONObject json : dataList) { - ESMonitorEntity mainMonitorEntity = parseMainMessage(json,searchType,siteIdsMap); - esMonitorListEntity.add(mainMonitorEntity); + private void parseQueryResult(List dataList, List esMonitorListEntity,Integer searchType) { + try { + List> site = siteRepository.findsiteByDel(0); + Map> siteMap = new HashMap<>(); + for (Map map : site) { + siteMap.put(map.get("cid").toString().toLowerCase(),map); + } + if (null != dataList && dataList.size() > 0) { + for (JSONObject json : dataList) { + ESMonitorEntity mainMonitorEntity = parseMainMessage(json, searchType, siteMap); + esMonitorListEntity.add(mainMonitorEntity); + } } + }catch (Exception e){ + e.printStackTrace(); } } /** @@ -139,16 +122,16 @@ public class SearchDataService extends CrudService siteIdsMap) throws Exception { - Map sourceAsMap = jsonObject; - String title = "标题为空"; - if(sourceAsMap.containsKey(ESConstant.TITLE)) { - title = sourceAsMap.get(ESConstant.TITLE).toString(); - } - String docType = sourceAsMap.get(ESConstant.DOC_TYPE).toString(); - String channel = sourceAsMap.get(ESConstant.CHANNEL).toString(); - String content = ""; - String author = ""; - String quoteCount = "0"; - String attitudeCount = "0"; - String enSource = sourceAsMap.get(ESConstant.EN_SOURCE).toString(); - String siteId = siteIdsMap.get(enSource); - if(searchType == 0 && docType.equals(ESConstant.ITEM)){ - content = sourceAsMap.get(ESConstant.PRODUCTPARAMETER).toString(); - if(!sourceAsMap.get(ESConstant.AUTHORNICKNAME).equals("")){ - author = sourceAsMap.get(ESConstant.AUTHORNICKNAME).toString(); - } - if(!sourceAsMap.get(ESConstant.FIRSTLISTBRAND).equals("")){ - author = author + "|" + sourceAsMap.get(ESConstant.FIRSTLISTBRAND); - } - if(!sourceAsMap.get(ESConstant.BRAND).equals("")){ - author = author + "|" + sourceAsMap.get(ESConstant.BRAND) ; - } - // 价格对应到转发,销量对应到点赞 - quoteCount = sourceAsMap.get(ESConstant.PRICE).toString(); - attitudeCount = sourceAsMap.get(ESConstant.POSTCOUNT).toString(); - } else { - content = sourceAsMap.get(ESConstant.CONTENT).toString(); - author = sourceAsMap.get(ESConstant.AUTHOR).toString(); - quoteCount = sourceAsMap.get(ESConstant.QUOTE_COUNT).toString(); - attitudeCount = sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString(); - } - - Long pubTime = Long.valueOf(sourceAsMap.get(ESConstant.PUBTIME).toString()); - String pubTimeStr = sourceAsMap.get(ESConstant.PUBTIME_STR).toString(); - String source = ""; - if(sourceAsMap.containsKey(ESConstant.SOURCE)) { - source = sourceAsMap.get(ESConstant.SOURCE).toString(); - } + private ESMonitorEntity parseMainMessage(JSONObject jsonObject,Integer searchType, + Map> siteMap) throws Exception { + ESMonitorEntity esMonitorEntity = new ESMonitorEntity(); + try { + Map sourceAsMap = jsonObject; + String docType = sourceAsMap.get(ESConstant.DOC_TYPE).toString(); + String channel = sourceAsMap.get(ESConstant.CHANNEL).toString(); + String title = "标题为空"; + String content = ""; + + if (sourceAsMap.containsKey(ESConstant.TITLE)) { + title = sourceAsMap.get(ESConstant.TITLE).toString(); + } + if(searchType == 2){ + title = ""; + }else { + if (docType.equals(ESConstant.SOCIAL)) { + title = sourceAsMap.get(ESConstant.AUTHOR).toString(); + } + } - if (docType.equals("social")) { - title = sourceAsMap.get(ESConstant.AUTHOR).toString(); - } - String authorId = ""; - if(sourceAsMap.containsKey("authorId")){ - authorId = sourceAsMap.get(ESConstant.AUTHORID).toString(); - } - String url = sourceAsMap.get(ESConstant.URL).toString(); - String docId = sourceAsMap.get(ESConstant.DOC_ID).toString(); - String dataId = sourceAsMap.get(ESConstant.DATA_ID).toString(); - String subjectId = ""; // 专题ID - String taskId = ""; // 任务ID - if(sourceAsMap.containsKey("subjectId")) { - subjectId = (String) sourceAsMap.get("subjectId"); - } - if(sourceAsMap.containsKey("taskId")) { - taskId = sourceAsMap.get("taskId").toString(); - } + String author = ""; + String quoteCount = "0"; + String attitudeCount = "0"; + String enSource = sourceAsMap.get(ESConstant.EN_SOURCE).toString(); + String source = ""; + String price = ""; + if (sourceAsMap.containsKey(ESConstant.SOURCE)) { + source = sourceAsMap.get(ESConstant.SOURCE).toString(); + } + String siteId = ""; + String icon = ""; + String siteType = ""; + if(enSource.equals("sina")){ + siteId = "183"; + icon = ""; + siteType = ""; + }else { + Map siteOtherMap = siteMap.get(enSource); + if (siteOtherMap.containsKey("site_id")) { + siteId = siteMap.get(enSource).get("site_id").toString(); + } + if (siteOtherMap.containsKey("site_icon")) { + icon = siteMap.get(enSource).get("site_icon").toString(); + } + if (siteOtherMap.containsKey("site_type")) { + siteType = siteMap.get(enSource).get("site_type").toString(); + } + } - // 文件、图片、视频 - List filePath = new ArrayList(); - if(!("").equals(sourceAsMap.get(ESConstant.FILEPATH)) && null != sourceAsMap.get(ESConstant.FILEPATH)){ - filePath = (List) sourceAsMap.get(ESConstant.FILEPATH); - } - List imagePath = new ArrayList(); - if(null != sourceAsMap.get(ESConstant.IMAGEPATH) && !("").equals(sourceAsMap.get(ESConstant.IMAGEPATH))) { - imagePath = (List) sourceAsMap.get(ESConstant.IMAGEPATH); - } - List videoPath = new ArrayList(); - if(null != sourceAsMap.get(ESConstant.VIDEOPATH) && !("").equals(sourceAsMap.get(ESConstant.VIDEOPATH))) { - videoPath = (List) sourceAsMap.get(ESConstant.VIDEOPATH); - } - boolean is = true; - String isDownload = sourceAsMap.get(ESConstant.ISDOWNLOAD).toString(); - if(!isDownload.equals("true")){ - is = false; - } - String vodeoUrl = ""; - if(sourceAsMap.containsKey(ESConstant.VIDEOURL)) { - vodeoUrl = sourceAsMap.get(ESConstant.VIDEOURL).toString(); - } - List filePathSize = new ArrayList(); - if(!("").equals(sourceAsMap.get(ESConstant.FILEPATHSIZE)) && null != sourceAsMap.get(ESConstant.FILEPATHSIZE)){ - filePathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.FILEPATHSIZE).toString()); - } - List imagePathSize = new ArrayList(); - if(null != sourceAsMap.get(ESConstant.IMAGEPATHSIZE) && !("[]").equals(sourceAsMap.get(ESConstant.IMAGEPATHSIZE))) { - imagePathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.IMAGEPATHSIZE).toString()); - } - List videoPathSize = new ArrayList(); - if(null != sourceAsMap.get(ESConstant.VIDEOPATHSIZE) && !("[]").equals(sourceAsMap.get(ESConstant.VIDEOPATHSIZE))) { - videoPathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString()); - } + String productParameter = ""; + if(sourceAsMap.containsKey(ESConstant.PRODUCTPARAMETER)){ + productParameter = sourceAsMap.get(ESConstant.PRODUCTPARAMETER).toString(); + } + if (searchType == 0 && docType.equals(ESConstant.ITEM)) { // 电商主贴 + content = productParameter; + if (!sourceAsMap.get(ESConstant.AUTHORNICKNAME).equals("")) { + author = sourceAsMap.get(ESConstant.AUTHORNICKNAME).toString(); + } + if (!sourceAsMap.get(ESConstant.FIRSTLISTBRAND).equals("")) { + author = author + "|" + sourceAsMap.get(ESConstant.FIRSTLISTBRAND); + } + if (!sourceAsMap.get(ESConstant.BRAND).equals("")) { + author = author + "|" + sourceAsMap.get(ESConstant.BRAND); + } + // 价格对应到转发,销量对应到点赞 + quoteCount = sourceAsMap.get(ESConstant.PRICE).toString(); + price = sourceAsMap.get(ESConstant.PRICE).toString(); + attitudeCount = sourceAsMap.get(ESConstant.POSTCOUNT).toString(); + } else if(searchType == 2){ + content = productParameter; + }else{ + content = sourceAsMap.get(ESConstant.CONTENT).toString(); + author = sourceAsMap.get(ESConstant.AUTHOR).toString(); + quoteCount = sourceAsMap.get(ESConstant.QUOTE_COUNT).toString(); + attitudeCount = sourceAsMap.get(ESConstant.ATTITUDES_COUNT).toString(); + price = sourceAsMap.get(ESConstant.PRICE).toString(); + } - Integer primary = 1; - if(sourceAsMap.containsKey(ESConstant.PRIMARY)) { - primary = (Integer) sourceAsMap.get(ESConstant.PRIMARY); - } - String crawlDataFlag = ""; - if(sourceAsMap.containsKey(ESConstant.CRAWLDATAFLAG)) { - crawlDataFlag = (String) sourceAsMap.get(ESConstant.CRAWLDATAFLAG); - } + Long pubTime = Long.valueOf(sourceAsMap.get(ESConstant.PUBTIME).toString()); + String pubTimeStr = sourceAsMap.get(ESConstant.PUBTIMESTR).toString(); + String authorId = ""; + if (sourceAsMap.containsKey(ESConstant.AUTHORID)) { + authorId = sourceAsMap.get(ESConstant.AUTHORID).toString(); + } + String url = sourceAsMap.get(ESConstant.URL).toString(); + String userUrl = sourceAsMap.get(ESConstant.USER_URL).toString(); - // 译文标题和正文 - String translateTitle = ""; - String translateContent = ""; - if(sourceAsMap.containsKey(ESConstant.TRANSLATETITLE)) { - translateTitle = (String) sourceAsMap.get(ESConstant.TRANSLATETITLE); - } - if(sourceAsMap.containsKey(ESConstant.TRANSLATECONTENT)) { - translateContent = (String) sourceAsMap.get(ESConstant.TRANSLATECONTENT); - } - // 词云 - List hlKeywords = (List) sourceAsMap.get(ESConstant.HL_KEYWORDS); + String docId = sourceAsMap.get(ESConstant.DOC_ID).toString(); + String dataId = sourceAsMap.get(ESConstant.DATA_ID).toString(); - ESMonitorEntity esMonitorEntity = new ESMonitorEntity(); - try { - esMonitorEntity.setDocId(docId); - esMonitorEntity.setDataId(dataId); - esMonitorEntity.setDocType(docType); - esMonitorEntity.setChannel(channel); - esMonitorEntity.setSource(source); - esMonitorEntity.setSiteId(siteId); - esMonitorEntity.setEnSource(enSource); - esMonitorEntity.setUrl(url); - esMonitorEntity.setVideoUrl(vodeoUrl); - esMonitorEntity.setPostSource((String) sourceAsMap.getOrDefault(ESConstant.POST_SOURCE, "")); - esMonitorEntity.setSysSentiment(Double.valueOf(sourceAsMap.get(ESConstant.SYS_SENTIMENT).toString())); - // 评论数、转发数、点赞数 - esMonitorEntity.setCommentsCount(Integer.valueOf(sourceAsMap.getOrDefault(ESConstant.COMMENTS_COUNT, 0).toString())); - esMonitorEntity.setQuoteCount(quoteCount); - esMonitorEntity.setAttitudesCount(attitudeCount); + String subjectId = ""; // 专题ID + String taskId = ""; // 任务ID + if (sourceAsMap.containsKey(ESConstant.SUBJECT_ID)) { + subjectId = (String) sourceAsMap.get(ESConstant.SUBJECT_ID); + } + if (sourceAsMap.containsKey(ESConstant.TASK_ID)) { + taskId = sourceAsMap.get(ESConstant.TASK_ID).toString(); + } - esMonitorEntity.setCrawlTime(Long.valueOf(sourceAsMap.get(ESConstant.CRAWLTIME).toString())); + // 文件、图片、视频 + List filePath = new ArrayList(); + if (!("").equals(sourceAsMap.get(ESConstant.FILEPATH)) && null != sourceAsMap.get(ESConstant.FILEPATH)) { + filePath = (List) sourceAsMap.get(ESConstant.FILEPATH); + } + List imagePath = new ArrayList(); + if (null != sourceAsMap.get(ESConstant.IMAGEPATH) && !("").equals(sourceAsMap.get(ESConstant.IMAGEPATH))) { + imagePath = (List) sourceAsMap.get(ESConstant.IMAGEPATH); + } + List videoPath = new ArrayList(); + if (null != sourceAsMap.get(ESConstant.VIDEOPATH) && !("").equals(sourceAsMap.get(ESConstant.VIDEOPATH))) { + videoPath = (List) sourceAsMap.get(ESConstant.VIDEOPATH); + } + boolean is = true; + String isDownload = sourceAsMap.get(ESConstant.ISDOWNLOAD).toString(); + if (!isDownload.equals("true")) { + is = false; + } + String vodeoUrl = ""; + if (sourceAsMap.containsKey(ESConstant.VIDEOURL)) { + vodeoUrl = sourceAsMap.get(ESConstant.VIDEOURL).toString(); + } + List filePathSize = new ArrayList(); + if(sourceAsMap.containsKey(ESConstant.FILEPATHSIZE)) { + if (!("").equals(sourceAsMap.get(ESConstant.FILEPATHSIZE)) && null != sourceAsMap.get(ESConstant.FILEPATHSIZE)) { + filePathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.FILEPATHSIZE).toString()); + } + } + List imagePathSize = new ArrayList(); + if(sourceAsMap.containsKey(ESConstant.IMAGEPATHSIZE)) { + if (null != sourceAsMap.get(ESConstant.IMAGEPATHSIZE) && !("[]").equals(sourceAsMap.get(ESConstant.IMAGEPATHSIZE))) { + imagePathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.IMAGEPATHSIZE).toString()); + } + } + List videoPathSize = new ArrayList(); + if(sourceAsMap.containsKey(ESConstant.VIDEOPATHSIZE)) { + if (null != sourceAsMap.get(ESConstant.VIDEOPATHSIZE) + && !("[]").equals(sourceAsMap.get(ESConstant.VIDEOPATHSIZE)) + && !("{\"\":null}").equals(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString())) { + if(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString().contains("url")) { + videoPathSize = JSONObject.parseArray(sourceAsMap.get(ESConstant.VIDEOPATHSIZE).toString()); + } + } + } - esMonitorEntity.setIsDownload(is); - esMonitorEntity.setFilePath(filePath); - esMonitorEntity.setImagePath(imagePath); - esMonitorEntity.setVideoPath(videoPath); - esMonitorEntity.setFilePathSize(filePathSize); - esMonitorEntity.setImagePathSize(imagePathSize); - esMonitorEntity.setVideoPathSize(videoPathSize); - // 返回索引名称,查询详情时使用 - esMonitorEntity.setSubjectId(subjectId); - esMonitorEntity.setTaskId(taskId); - // 返回 主贴、回帖、用户的标识字段 - esMonitorEntity.setPrimary(primary); - // 数据采集标识字段 - esMonitorEntity.setCrawlDataFlag(crawlDataFlag); - // 数据详情 - esMonitorEntity.setTitle(title); - esMonitorEntity.setContent(content); - esMonitorEntity.setAuthor(author); - esMonitorEntity.setAuthorId(authorId); - esMonitorEntity.setPubTime(pubTime); - esMonitorEntity.setPubTimeStr(pubTimeStr); - // 译文 - esMonitorEntity.setTranslateTitle(translateTitle); - esMonitorEntity.setTranslateContent(translateContent); + Integer primary = 1; + if (sourceAsMap.containsKey(ESConstant.PRIMARY)) { + primary = Integer.parseInt(sourceAsMap.get(ESConstant.PRIMARY).toString()); + } + String crawlDataFlag = ""; + if (sourceAsMap.containsKey(ESConstant.CRAWLDATAFLAG)) { + crawlDataFlag = (String) sourceAsMap.get(ESConstant.CRAWLDATAFLAG); + } + // 译文标题和正文 + String translateTitle = ""; + String translateContent = ""; + if (sourceAsMap.containsKey(ESConstant.TRANSLATETITLE)) { + translateTitle = (String) sourceAsMap.get(ESConstant.TRANSLATETITLE); + } + if (sourceAsMap.containsKey(ESConstant.TRANSLATECONTENT)) { + translateContent = (String) sourceAsMap.get(ESConstant.TRANSLATECONTENT); + } // 词云 - esMonitorEntity.setHlKeyWords(hlKeywords); - } catch (Exception e) { + List hlKeywords = (List) sourceAsMap.get(ESConstant.HL_KEYWORDS); + // 视频分析结果 + String asrText = ""; + List ocrText = new ArrayList<>(); + if(sourceAsMap.containsKey(ESConstant.ASRTEXT)) { + asrText = sourceAsMap.get(ESConstant.ASRTEXT).toString(); + } + if(sourceAsMap.containsKey(ESConstant.OCRTEXT)) { + ocrText = (List) sourceAsMap.get(ESConstant.OCRTEXT); + } + // 如果是用户数据,需要获取下面四个字段值 + String fansCount = ""; + String friendsCount = ""; + String postCount = ""; + String location = ""; + if(searchType == 2) { + if (sourceAsMap.containsKey(ESConstant.FANS_COUNT)) { + fansCount = sourceAsMap.get(ESConstant.FANS_COUNT).toString(); + } + if (sourceAsMap.containsKey(ESConstant.FRIENDS_COUNT)) { + friendsCount = sourceAsMap.get(ESConstant.FRIENDS_COUNT).toString(); + } + if (sourceAsMap.containsKey(ESConstant.POST_COUNT)) { + postCount = sourceAsMap.get(ESConstant.POST_COUNT).toString(); + } + if (sourceAsMap.containsKey(ESConstant.WEIBO_LOCATION)) { + location = sourceAsMap.get(ESConstant.WEIBO_LOCATION).toString(); + } + } + + // 这个项目新增的三个字段 + String valueLabel = ""; + String categoryLabel = ""; + String tag = ""; + if (sourceAsMap.containsKey("valueLabel") && null != sourceAsMap.get(ESConstant.VALUELABEL)) { +// System.out.println("11111 "+sourceAsMap.get(ESConstant.VALUELABEL)); + valueLabel = sourceAsMap.get("valueLabel").toString(); + } + if(sourceAsMap.containsKey("categoryLabel")){ + categoryLabel = sourceAsMap.get("categoryLabel").toString(); + } + if(sourceAsMap.containsKey("tag")){ + tag = sourceAsMap.get("tag").toString(); + } + + + + try { + esMonitorEntity.setDataId(dataId); + esMonitorEntity.setDocId(docId); + esMonitorEntity.setChannel(channel); + esMonitorEntity.setSource(source); + esMonitorEntity.setEnSource(enSource); + esMonitorEntity.setUrl(url); + esMonitorEntity.setTitle(title); + esMonitorEntity.setTranslateTitle(translateTitle); + esMonitorEntity.setPubTimeStr(pubTimeStr); + esMonitorEntity.setAuthor(author); + esMonitorEntity.setAuthorId(authorId); + esMonitorEntity.setContent(content); + esMonitorEntity.setTranslateContent(translateContent); + esMonitorEntity.setPrice(price); + esMonitorEntity.setProductParameter(productParameter); + esMonitorEntity.setCrawlTimeStr(sourceAsMap.get(ESConstant.CRAWLTIMESTR).toString()); + esMonitorEntity.setCrawlDataFlag(crawlDataFlag); + esMonitorEntity.setHlKeyWords(hlKeywords); + // 评论数、转发数、点赞数 + esMonitorEntity.setCommentsCount(Integer.valueOf(sourceAsMap.getOrDefault(ESConstant.COMMENTS_COUNT, 0).toString())); + esMonitorEntity.setQuoteCount(quoteCount); + esMonitorEntity.setAttitudesCount(attitudeCount); + esMonitorEntity.setOcrText(ocrText); + esMonitorEntity.setAsrText(asrText); + // 用户字段 + esMonitorEntity.setUserUrl(userUrl); + esMonitorEntity.setFansCount(fansCount); + esMonitorEntity.setFriendsCount(friendsCount); + esMonitorEntity.setPostCount(postCount); + esMonitorEntity.setLocation(location); + + //=============================================================== + + esMonitorEntity.setDocType(docType); + esMonitorEntity.setSiteId(siteId); + esMonitorEntity.setSiteType(siteType); + esMonitorEntity.setIcon(icon); + esMonitorEntity.setVideoUrl(vodeoUrl); + esMonitorEntity.setPostSource((String) sourceAsMap.getOrDefault(ESConstant.POST_SOURCE, "")); + esMonitorEntity.setSysSentiment(Double.valueOf(sourceAsMap.get(ESConstant.SYS_SENTIMENT).toString())); + esMonitorEntity.setIsDownload(is); + esMonitorEntity.setFilePath(filePath); + esMonitorEntity.setImagePath(imagePath); + esMonitorEntity.setVideoPath(videoPath); + esMonitorEntity.setFilePathSize(filePathSize); + esMonitorEntity.setImagePathSize(imagePathSize); + esMonitorEntity.setVideoPathSize(videoPathSize); + // 返回索引名称,查询详情时使用 + esMonitorEntity.setSubjectId(subjectId); + esMonitorEntity.setTaskId(taskId); + // 返回 主贴、回帖、用户的标识字段 + esMonitorEntity.setPrimary(primary); + esMonitorEntity.setPubTime(pubTime); + esMonitorEntity.setCrawlTime(Long.valueOf(sourceAsMap.get(ESConstant.CRAWLTIME).toString())); + + esMonitorEntity.setValueLabel(valueLabel); + esMonitorEntity.setCategoryLabel(categoryLabel); + esMonitorEntity.setTag(tag); + + } catch (Exception e) { + e.printStackTrace(); + } + }catch (Exception e){ e.printStackTrace(); } return esMonitorEntity; } + public JSONObject queryComments(QueryRequest queryRequest) { // 先确认一下 要查的主贴是属于 专题还是 全部数据,因此需要查 subjectId,如果没有 subjectId 这个字段说明要查的是 日期索引的ES Cluster cluster = null; List currentIndexList = new ArrayList<>(); String subjectId = queryRequest.getSubjectId(); if(null != queryRequest.getSubjectId() && !("").equals(subjectId)){ // 如果是专题,去专题的索引查就行 - subjectId = subjectPre + subjectId; cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 + subjectId = cluster.getPrefixIndexPattern() +"_"+ subjectId; currentIndexList.add(subjectId); }else{ // 如果是全部数据,就直接去 渠道对应的索引查,渠道可以从 docId 中截取出来 logger.info("[SearchDataService] queryComment: 查询 全局数据"); @@ -401,9 +495,9 @@ public class SearchDataService extends CrudService result = response[i].getSourceAsMap(); jsonObject.putAll(result); - jsonObject.put("siteId",siteId); + jsonObject.put(ESConstant.SITEID,siteId); comments.add(jsonObject); } @@ -466,15 +561,15 @@ public class SearchDataService extends CrudService currentIndexList = new ArrayList<>(); String subjectId = queryRequest.getSubjectId(); if(!("").equals(subjectId) && null != subjectId){ // 如果是专题,去专题的索引查就行 - subjectId = subjectPre + subjectId; cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 + subjectId = cluster.getPrefixIndexPattern() +"_"+ subjectId; }else{ // 如果是全部数据,就直接去 渠道对应的索引查,渠道可以从 docId 中截取出来 - logger.info("[queryOneDataByDocId] queryOneDataByDocId 查询 全局数据 : " + subjectId); + logger.info("[SearchDataService] queryOneDataByDocId 查询 全局数据 : " + subjectId); cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.normal_cluster_type); // 109 } currentIndexList.add(subjectId); String docId = queryRequest.getDocId(); // 根据ID 获取一条详情数据 JSONObject jsonObject = getOneDataByDocId(docId, cluster, currentIndexList); + if(!jsonObject.containsKey(ESConstant.VALUELABEL) || null == jsonObject.get(ESConstant.VALUELABEL)){ + jsonObject.put(ESConstant.VALUELABEL,""); + }else{ + jsonObject.put(ESConstant.VALUELABEL,jsonObject.get(ESConstant.VALUELABEL).toString()); + } + if(!jsonObject.containsKey(ESConstant.CATEGORYLABEL)){ + jsonObject.put(ESConstant.CATEGORYLABEL,""); + } + // 替换几个 pathSize 中的链接的前缀 + if(jsonObject.containsKey(ESConstant.IMAGEPATHSIZE)){ //http://172.18.1.113:8080 + List>imagePathSize = (List>) jsonObject.get(ESConstant.IMAGEPATHSIZE); + for (Map imagePath: imagePathSize) { + String url = imagePath.get(ESConstant.URL); + url = url.replace(bfdApiConfig.getGoFastDomain(),""); + imagePath.put(ESConstant.URL,url); + } + } + if(jsonObject.containsKey(ESConstant.VIDEOPATHSIZE)){ //http://172.18.1.113:8080 + List>videoPathSize = (List>) jsonObject.get(ESConstant.VIDEOPATHSIZE); + for (Map videoPath: videoPathSize) { + String url = videoPath.get(ESConstant.URL); + url = url.replace(bfdApiConfig.getGoFastDomain(),""); + videoPath.put(ESConstant.URL,url); + } + } + if(jsonObject.containsKey(ESConstant.FILEPATHSIZE)){ //http://172.18.1.113:8080 + List>filePathSize = (List>) jsonObject.get(ESConstant.FILEPATHSIZE); + for (Map filePath: filePathSize) { + String url = filePath.get(ESConstant.URL); + url = url.replace(bfdApiConfig.getGoFastDomain(),""); + filePath.put(ESConstant.URL,url); + } + } + String enSource = jsonObject.getString(ESConstant.EN_SOURCE); + if(null != enSource) { + jsonObject = getSite(jsonObject, enSource); + } // 如果是社交媒体类的数据,需要将 author 放到 title 字段中做显示 jsonObject = socialDataChangeAuthorAndTitle(jsonObject); // 根据用户ID 查询一下用户信息,如果没有的话,就把原来的用户名放进去。 @@ -513,11 +645,39 @@ public class SearchDataService extends CrudService 0) { - String siteId = queryRequest.getSiteId(); - jsonObject.put("siteId", siteId); + return jsonObject; + } + + private JSONObject getSite(JSONObject jsonObject, String enSource) { + List> site = siteRepository.findSiteByEnSource(enSource); + Map> siteMap = new HashMap<>(); + for (Map map : site) { + siteMap.put(map.get("cid").toString().toLowerCase(),map); + } + String siteId = ""; + String icon = ""; + String siteType = ""; + Map siteOtherMap = siteMap.get(enSource); + if(enSource.equals("sina")){ + siteId = "183"; + icon = ""; + siteType = ""; + }else { + if (siteOtherMap.containsKey("site_id")) { + siteId = siteMap.get(enSource).get("site_id").toString(); + } + if (siteOtherMap.containsKey("site_icon")) { + icon = siteMap.get(enSource).get("site_icon").toString(); + } + if (siteOtherMap.containsKey("site_type")) { + siteType = siteMap.get(enSource).get("site_type").toString(); + } } + jsonObject.put(ESConstant.SITEID,siteId); + jsonObject.put(ESConstant.SITETYPE,siteType); + jsonObject.put(ESConstant.SITEICON,icon); return jsonObject; + } private JSONObject getItemDataShopMeggage(JSONObject jsonObject,String docType) { @@ -536,7 +696,7 @@ public class SearchDataService extends CrudService imagePathMap = new HashMap<>(); String url = "http:" + img ; - imagePathMap.put("url",url); + imagePathMap.put(ESConstant.URL,url); imagePathMap.put("size","4KB"); imagePathMap.put("videoTime",""); imagePathMap.put("resolution","50×50"); @@ -554,32 +714,12 @@ public class SearchDataService extends CrudService dataList = esQueryAuthorService.queryAuthorByAuthorId(indexList, queryRequest); -// JSONObject newJsonObject = searchAuthorService.parseAuthorMessage(dataList); -// if (newJsonObject.toJSONString().equals("{}")) { -// newJsonObject.put(ESConstant.AUTHOR, author); -// jsonObject.put(ESConstant.AUTHOR, newJsonObject); -// }else { -// jsonObject.put(ESConstant.AUTHOR, newJsonObject); -// } -// } else { JSONObject newJsonObject = new JSONObject(); newJsonObject.put(ESConstant.AUTHOR, jsonObject.get(ESConstant.AUTHOR)); jsonObject.put(ESConstant.AUTHOR, newJsonObject); -// } } return jsonObject; - } /** @@ -587,13 +727,13 @@ public class SearchDataService extends CrudService videoPath = (List) jsonObject.get(ESConstant.VIDEOPATH); -// List> videoPathSize = (List>) jsonObject.get(ESConstant.VIDEOPATHSIZE); -// List> videoList = new ArrayList<>(); -// if(videoPathSize.size() > 0) { -// Map videoSizeMap = videoPathSize.get(0); -// String videoUrl = ""; -// String size = "0"; -// if (videoPath.size() > 0 && null != videoSizeMap) { -// videoUrl = videoPath.get(0); -// if (videoSizeMap.containsKey(videoUrl)) { -// size = videoSizeMap.get(videoUrl); -// } -// } -// Map videoPathMap = new HashMap<>(); -// videoPathMap.put(ESConstant.URL,videoUrl); -// videoPathMap.put("size",size); -// videoPathMap.put(ESConstant.RESOLUTION,resolution); -// videoPathMap.put(ESConstant.VIDEOTIME,videoTime); -// videoList.add(videoPathMap); -// } -// jsonObject.put("videoList",videoList); -// } -// jsonObject.remove(ESConstant.VIDEOPATHSIZE); -// jsonObject.remove(ESConstant.RESOLUTION); -// jsonObject.remove(ESConstant.VIDEOTIME); -// return jsonObject; -// } private JSONObject getOneDataByDocId(String docId, Cluster cluster, List currentIndexList) { JSONObject jsonObject = new JSONObject(); @@ -644,21 +753,25 @@ public class SearchDataService extends CrudService0) { Map result = searchDataResponse.getHits().getHits()[0].getSourceAsMap(); if (result.size() > 0) { result.entrySet().stream() .forEach(entry -> { - if (entry.getKey().equals("filePath") && entry.getValue().equals("")) { + if (entry.getKey().equals(ESConstant.FILEPATH) && entry.getValue().equals("")) { jsonObject.put(entry.getKey(), new ArrayList<>()); } else if (entry.getKey().equals(ESConstant.FILEPATHSIZE) || entry.getKey().equals(ESConstant.IMAGEPATHSIZE) || entry.getKey().equals(ESConstant.VIDEOPATHSIZE)) { - jsonObject.put(entry.getKey(), JSONObject.parseArray(entry.getValue().toString())); + if(entry.getValue().toString() .contains("url")) { + jsonObject.put(entry.getKey(), JSONObject.parseArray(entry.getValue().toString())); + }else{ + jsonObject.put(entry.getKey(),new ArrayList<>()); + } } else { jsonObject.put(entry.getKey(), entry.getValue()); } @@ -668,105 +781,19 @@ public class SearchDataService extends CrudService currentIndexList) { - Long count = 0L ; - try { - // QueryBuilder queryBuilder = QueryBuilders.termQuery(ESConstant.PRIMARY, 1); - // BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().must(queryBuilder); - SearchRequestBuilder builder = esServerUtils - .buildSearchRequestBuilder(cluster.getId(), currentIndexList); - // .setQuery(boolQueryBuilder); - SearchResponse searchDataResponse = builder.execute().actionGet(); - count = searchDataResponse.getHits().getTotalHits(); - }catch (Exception e){ - e.printStackTrace(); - } - return count; - } - - private Long getTodayTotalCount(Cluster cluster, List currentIndexList) { - Long count = 0L ; - try { - long current=System.currentTimeMillis(); - long zero=current/(1000*3600*24)*(1000*3600*24)-TimeZone.getDefault().getRawOffset(); - Long startTime = new Timestamp(zero).getTime(); - - //QueryBuilder queryBuilder1 = QueryBuilders.termQuery(ESConstant.PRIMARY, 1); - RangeQueryBuilder rangeQueryBuilder = QueryBuilders.rangeQuery(ESConstant.CRAWLTIME).gte(startTime).lt(current); - BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery() - // .must(queryBuilder1) - .must(rangeQueryBuilder); - - SearchRequestBuilder builder = esServerUtils - .buildSearchRequestBuilder(cluster.getId(), currentIndexList) - .setQuery(boolQueryBuilder); - - System.out.println("todayTotalCount : " + builder); - SearchResponse searchDataResponse = builder.execute().actionGet(); - count = searchDataResponse.getHits().getTotalHits(); - }catch (Exception e){ - e.printStackTrace(); - } - return count; - } - - public JSONObject queryCountByCrawlDataFlag(QueryRequest queryRequest) { - JSONObject result = new JSONObject(); - try{ - - }catch (Exception e){ - e.printStackTrace(); - } - return result; - } // 之前的 queryDataList public JSONObject queryDataList(QueryRequest queryRequest) throws Exception { // 先看一下有没有 subjectId ,如果没有的话标识查询的是 全部数据 JSONObject jsonObject = new JSONObject(); -// if(null != queryRequest.getSubjectId() && queryRequest.getSubjectId().equals("all")){ -// logger.info("[SearchDataService] 查询 全局数据"); -// cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.normal_cluster_type); // 109 -// currentIndexList = subjectQueryDataService.getIndexListByTimeRange(cluster, queryRequest.getStartTime(),queryRequest.getEndTime()); -// Long totalCount = getTotalCount(cluster,currentIndexList); -// jsonObject.put("foldDocAllNumber",totalCount); -// } - Cluster cluster = null; List currentIndexList = new ArrayList<>(); // 获取ES的参数及要查询的索引列表 String subjectId = queryRequest.getSubjectId(); cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster,subjectId); -// if(subjectId.equals("all")) { // 查全局数据 -// cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.normal_cluster_type); // 109 -// currentIndexList = subjectQueryDataService.getIndexListByTimeRange(cluster, queryRequest.getStartTime(), queryRequest.getEndTime()); -// }else if(subjectId.contains(",")){ // 查多个专题数据 -// cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 -// currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); -// } - //currentIndexList = subjectQueryDataService.getIndexListByTimeRange(cluster, queryRequest.getEndTime(),queryRequest.getEndTime()); Long clusterId = cluster.getId(); logger.info("[SearchDataService] queryDataList clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); @@ -787,8 +814,8 @@ public class SearchDataService extends CrudService 0) { -// pageEndNum = allSimilarityDocNumber; -// } else { -// pageEndNum = start + limit; -// } - // 将查询结果的 dataId 写入到 dataIdList中 Map dedupmap = new HashMap<>(); if(timeSeries.size() > 0) { @@ -823,35 +840,10 @@ public class SearchDataService extends CrudService 0) { -// if(timeSeries.size() > pageEndNum) { -// for (int i = start; i < pageEndNum; i++) { -// // dataIdList.add(timeSeries.get(i).getDataId()); -// dedupmap.put(timeSeries.get(i).getDataId(),timeSeries.get(i).getDataId()); -// } -// }else{ -// for (int i = start; i < timeSeries.size(); i++) { -// // dataIdList.add(timeSeries.get(i).getDataId()); -// dedupmap.put(timeSeries.get(i).getDataId(),timeSeries.get(i).getDataId()); -// } -// } -// } for (String key : dedupmap.keySet()) { dataIdList.add(dedupmap.get(key)); } -// if(timeSeries.size() > 0) { -// if(timeSeries.size() > pageEndNum) { -// for (int i = start; i < pageEndNum; i++) { -// dataIdList.add(timeSeries.get(i).getDataId()); -// } -// }else{ -// for (int i = start; i < timeSeries.size(); i++) { -// dataIdList.add(timeSeries.get(i).getDataId()); -// } -// } -// } - SearchResponse response = buildDataIdQueryCrawl( start, limit, @@ -872,8 +864,8 @@ public class SearchDataService extends CrudService dataList = esQueryServiceForSQMini.queryDataFromOneSubject(indexNames, queryRequest); + List esMonitorEntityLists = new ArrayList<>(); Integer searchType = queryRequest.getSearchType(); - parseQueryResult(dataList, esMonitorEntityLists,searchType); + parseQueryResult(dataList, esMonitorEntityLists, searchType); Long totalCount = esQueryServiceForSQMini.queryDataCountFromOneSubject(indexNames,queryRequest); logger.info("[SearchDataService] queryDataInOneIndex: "+totalCount); - jsonObject.put("foldDocAllNumber",totalCount); - jsonObject.put("monitorLists",esMonitorEntityLists); + jsonObject.put(ESConstant.ALLDOCNUMBER,totalCount); + jsonObject.put(ESConstant.MONITORLISTS,esMonitorEntityLists); }catch (Exception e){ e.printStackTrace(); @@ -908,7 +901,7 @@ public class SearchDataService extends CrudService currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); Long clusterId = cluster.getId(); String [] indexName = currentIndexList.toArray(new String[currentIndexList.size()]); @@ -919,53 +912,7 @@ public class SearchDataService extends CrudService esMonitorEntityLists = new ArrayList<>(); Integer searchType = queryRequest.getSearchType(); parseQueryResult(dataList, esMonitorEntityLists,searchType); - - // Map docIdsMap = dedupMap.get(key); -// if(searchType == 0) { -// List newEsMonitorEntityLists = new ArrayList<>(); -// for (ESMonitorEntity entity : esMonitorEntityLists) { -// String docId = entity.getDocId(); -// if (! docIdsMap.containsKey(docId) ) { -// newEsMonitorEntityLists.add(entity); -// docIdsMap.put(docId, "1"); -// }else{ -// System.out.println("这条数据被消重啦~ !" + docId); -// } -// } -// dedupMap.put(key, docIdsMap); -// jsonObject.put("monitorLists", newEsMonitorEntityLists); -// }else{ -// jsonObject.put("monitorLists", esMonitorEntityLists); -// } -// -// System.out.println("docIdsMapSize = "+docIdsMap.size()); - - jsonObject.put("monitorLists", esMonitorEntityLists); - }catch (Exception e){ - e.printStackTrace(); - } - return jsonObject; - } - - public JSONObject exportDataInSubjectIndexTestGroupBy(QueryRequest queryRequest) { - JSONObject jsonObject = new JSONObject(); - try { - // 获取 ES 的连接方式及要查询的索引列表 - Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 - List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); - Long clusterId = cluster.getId(); - String [] indexName = currentIndexList.toArray(new String[currentIndexList.size()]); - logger.info("[SearchDataService] exportDataInOneIndex: IndexName = " +indexName[0] +" ; clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); - // 开始查询 - - jsonObject= esQueryServiceForSQMini.exportDataFromOneSubjectTestGroupBy(indexName, queryRequest); - - List dataList = (List) jsonObject.get("monitorLists"); - List esMonitorEntityLists = new ArrayList<>(); - Integer searchType = queryRequest.getSearchType(); - parseQueryResult(dataList, esMonitorEntityLists,searchType); - logger.info("Query Finish exportDataInSubjectIndexTestGroupBy size: " + esMonitorEntityLists.size()); - jsonObject.put("monitorLists", esMonitorEntityLists); + jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists); }catch (Exception e){ e.printStackTrace(); } @@ -994,10 +941,65 @@ public class SearchDataService extends CrudService esMonitorEntityLists = new ArrayList<>(); Integer searchType = queryRequest.getSearchType(); parseQueryResult(dataList, esMonitorEntityLists,searchType); - jsonObject.put("monitorLists",esMonitorEntityLists); + jsonObject.put(ESConstant.MONITORLISTS,esMonitorEntityLists); }catch (Exception e){ e.printStackTrace(); } return jsonObject; } + + public JSONObject queryDataCountsInOneIndex(QueryRequest queryRequest) { + JSONObject jsonObject = new JSONObject(); + try{ + Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 + List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); + Long clusterId = cluster.getId(); + logger.info("[SearchDataService] queryDataCountsInOneIndex: clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); + // String indexName = currentIndexList.get(0); + String indexNames [] = currentIndexList.toArray(new String [currentIndexList.size()]); + Long contentCount = 0L; + Long commentCount = 0L; + Long authorCount = 0L; + try { + queryRequest.setSearchType(0); + contentCount = esQueryServiceForSQMini.queryDataCountFromOneSubject(indexNames, queryRequest); + queryRequest.setSearchType(1); + commentCount = esQueryServiceForSQMini.queryDataCountFromOneSubject(indexNames, queryRequest); + queryRequest.setSearchType(2); + authorCount = esQueryServiceForSQMini.queryDataCountFromOneSubject(indexNames, queryRequest); + }catch (Exception e){ + e.printStackTrace(); + } + jsonObject.put("contentCount",contentCount); + jsonObject.put("commentCount",commentCount); + jsonObject.put("authorCount",authorCount); + }catch (Exception e){ + e.printStackTrace(); + } + return jsonObject; + } + +// public JSONObject exportDataInSubjectIndexTestGroupBy(QueryRequest queryRequest) { +// JSONObject jsonObject = new JSONObject(); +// try { +// // 获取 ES 的连接方式及要查询的索引列表 +// Cluster cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 +// List currentIndexList = subjectQueryDataService.getIndexBySubjectIds(cluster, queryRequest.getSubjectId()); +// Long clusterId = cluster.getId(); +// String [] indexName = currentIndexList.toArray(new String[currentIndexList.size()]); +// logger.info("[SearchDataService] exportDataInOneIndex: IndexName = " +indexName[0] +" ; clusterId = " + clusterId + " ; currentIndexList :" + currentIndexList.toString()); +// // 开始查询 +// jsonObject= esQueryServiceForSQMini.exportDataFromOneSubjectTestGroupBy(indexName, queryRequest); +// +// List dataList = (List) jsonObject.get("monitorLists"); +// List esMonitorEntityLists = new ArrayList<>(); +// Integer searchType = queryRequest.getSearchType(); +// parseQueryResult(dataList, esMonitorEntityLists,searchType); +// logger.info("Query Finish exportDataInSubjectIndexTestGroupBy size: " + esMonitorEntityLists.size()); +// jsonObject.put(ESConstant.MONITORLISTS, esMonitorEntityLists); +// }catch (Exception e){ +// e.printStackTrace(); +// } +// return jsonObject; +// } } diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java b/cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java index 25df773..a0aef02 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/SearchKeywordsCouldService.java @@ -31,22 +31,78 @@ public class SearchKeywordsCouldService{ /** * 词云统计接口 */ - public JSONObject dataAnalysisCloud(QueryRequest queryRequest){ +// public JSONObject dataAnalysisCloud(QueryRequest queryRequest){ +// long start = System.currentTimeMillis(); +// JSONObject jsonObject = new JSONObject(); +// /**词云返回个数*/ +// int topSize = queryRequest.getLimit(); +// logger.info("[SearchKeywordsCouldService] dataAnalysisCloud : the top size is:{} ", topSize); +// try { +// List cacheEsMonitorEntityList = sliceScrollUtil.fetchResultSubjectCache(queryRequest,ESConstant.FIELD_CLOUD_ANALYSIS); +// +// Map keyWordsMaps = new HashMap<>(); +// Map placesWordsMaps = new HashMap<>(); // 地点 +// Map emojiWordsMaps = new HashMap<>(); // 表情 +// Map hashTagWordsMaps = new HashMap<>(); // 话题 +// Map opinionsWordsMaps = new HashMap<>(); // 评价 +//// List hlKeywordsList = new ArrayList<>(); +// for (ESMonitorEntity esMonitorEntity : cacheEsMonitorEntityList) { +// List hlKeyWords = esMonitorEntity.getHlKeyWords(); +// getMapCloudKeyWords(placesWordsMaps, esMonitorEntity.getPlaces()); +// // 表情 +// getExpressionMapCloudKeyWords(emojiWordsMaps, esMonitorEntity.getExpression()); +// // 话题 +// getMapCloudKeyWords(hashTagWordsMaps, esMonitorEntity.getHashTag()); +// // 评价 +// getMapCloudKeyWords(opinionsWordsMaps, esMonitorEntity.getOpinions()); +// +// Map finalKeyWordsMaps = keyWordsMaps; +// hlKeyWords.forEach(el -> finalKeyWordsMaps.merge(el, 1, (a, b) -> a + b)); +// Set wordsSets = new HashSet<>(Arrays.asList(StringUtils.split(String.valueOf(hlKeyWords), " "))); +// for (String key : wordsSets) { +// if (TStringUtils.isNotEmpty(key) && key.length() > 1 && !key.equals("null")) { +// // 统计 +// key = key.replace("[","").replace("]",""); +// keyWordsMaps.merge(key, weightValue, (a, b) -> a + b); +// } +// } +// } +// Map keyWordsResultMap = new HashMap<>(); +// Map placesWordsResultMap = new HashMap<>(); +// CollectionUtils.sortByValueForListSubTopSize(keyWordsMaps, topSize * 2, keyWordsResultMap); +// CollectionUtils.sortByValueForListSubTopSize(placesWordsMaps, topSize * 2, placesWordsResultMap); +// //sortByValueForListSubTopSize +// Long keyWordsStart = System.currentTimeMillis(); +// jsonObject.put(ConditionCommon.WORD_CLOUD, CollectionUtils.sortByValueForList(textService.post(keyWordsResultMap, 1), topSize)); +// logger.info("[keysWords Execute Time one] the time used is {} ms", System.currentTimeMillis() - keyWordsStart); +// Long placesWordsStart = System.currentTimeMillis(); +// jsonObject.put(ConditionCommon.PLACE_CLOUD, CollectionUtils.sortByValueForList(textService.post(placesWordsResultMap, 2), topSize)); +// logger.info("[placeWords Exceute Time two] the time used is {} ms", System.currentTimeMillis() - placesWordsStart); +// jsonObject.put(ConditionCommon.HASH_TAG_CLOUD, CollectionUtils.sortByValueForList(hashTagWordsMaps, topSize)); +// jsonObject.put(ConditionCommon.EMOJI_CLOUD, CollectionUtils.sortByValueForList(emojiWordsMaps, topSize)); +// jsonObject.put(ConditionCommon.OPINION_CLOUD, CollectionUtils.sortByValueForList(opinionsWordsMaps, topSize)); +// +// logger.info("[SearchKeywordsCouldService ] the time used is {} ms", (System.currentTimeMillis() - start)); +// } catch (Exception e) { +// logger.error("[SearchKeywordsCouldService] Is Error", e); +// } +// return jsonObject; +// } + + + public JSONObject dataAnalysisCloud( List esMonitorEntityList){ long start = System.currentTimeMillis(); JSONObject jsonObject = new JSONObject(); /**词云返回个数*/ - int topSize = queryRequest.getLimit(); - logger.info("[SearchKeywordsCouldService] dataAnalysisCloud : the top size is:{} ", topSize); + int topSize = 200; try { - List cacheEsMonitorEntityList = sliceScrollUtil.fetchResultSubjectCache(queryRequest,ESConstant.FIELD_CLOUD_ANALYSIS); - Map keyWordsMaps = new HashMap<>(); Map placesWordsMaps = new HashMap<>(); // 地点 Map emojiWordsMaps = new HashMap<>(); // 表情 Map hashTagWordsMaps = new HashMap<>(); // 话题 Map opinionsWordsMaps = new HashMap<>(); // 评价 // List hlKeywordsList = new ArrayList<>(); - for (ESMonitorEntity esMonitorEntity : cacheEsMonitorEntityList) { + for (ESMonitorEntity esMonitorEntity : esMonitorEntityList) { List hlKeyWords = esMonitorEntity.getHlKeyWords(); getMapCloudKeyWords(placesWordsMaps, esMonitorEntity.getPlaces()); // 表情 diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java b/cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java new file mode 100644 index 0000000..ca48c99 --- /dev/null +++ b/cl_search_api/src/main/java/com/bfd/mf/service/UpdateService.java @@ -0,0 +1,87 @@ +package com.bfd.mf.service; + +import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.common.service.common.CrudService; +import com.bfd.mf.common.service.es.ClusterService; +import com.bfd.mf.common.util.ESServerUtils; +import com.bfd.mf.common.util.constants.ESConstant; +import com.bfd.mf.common.util.es.EsUtils; +import com.bfd.mf.common.web.entity.mysql.SentimentModify; +import com.bfd.mf.common.web.entity.mysql.cache.Cluster; +import com.bfd.mf.common.web.repository.mysql.SentimentRepository; +import com.bfd.mf.common.web.vo.params.QueryRequest; +import com.bfd.mf.config.BFDApiConfig; +import org.elasticsearch.action.search.SearchRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.ScriptQueryBuilder; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; + +import javax.annotation.PostConstruct; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@Service +public class UpdateService extends CrudService implements Serializable { + private static Logger logger = LoggerFactory.getLogger(SearchDataService.class); + + @Autowired + private BFDApiConfig bfdApiConfig; + @Autowired + private ClusterService clusterService; + @Autowired + private ESServerUtils esServerUtils; + + private String clusterName =""; + + @PostConstruct + public void init() { + // 注册数据查询来源 + clusterName = bfdApiConfig.esMiniName(); + String sourceAddress [] = bfdApiConfig.esMiniAddress(); + EsUtils.registerCluster(clusterName, sourceAddress);// 配置文件中的 es-source + } + + @Override + public SentimentModify copy(SentimentModify from, SentimentModify to) { + return null; + } + + public JSONObject updateByDocId(QueryRequest queryRequest) { + JSONObject result = new JSONObject(); + String subjectId = queryRequest.getSubjectId(); + String docId = queryRequest.getDocId(); + String valueLabel = queryRequest.getValueLabel(); + String categoryLabel = queryRequest.getCategoryLabel(); + + Cluster cluster = null; + List currentIndexList = new ArrayList<>(); + if(!("").equals(subjectId) && null != subjectId){ // 如果是专题,去专题的索引查就行 + cluster = clusterService.findClusterByType(Cluster.CLUSTER_TYPE.mini_cluster_type); // 111 + subjectId = cluster.getPrefixIndexPattern() +"_"+ subjectId; + } + currentIndexList.add(subjectId); + Map params = new HashMap<>(16); + String script ="ctx._source['valueLabel']='"+valueLabel+"';ctx._source['categoryLabel']='"+categoryLabel+"'"; + String index = subjectId; + docId = docId.split("_")[2]; + System.out.println(docId); + // 先根据docId 查到 _id_ + long updateVersion = EsUtils.updateByDocId(clusterName,index,script,docId,params); + if(updateVersion> 0) { + result.put("message:", "修改成功,这是第:" + updateVersion + " 次修改"); + }else{ + result.put("message","修改失败了"); + } + return result; + } +} diff --git a/cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java b/cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java index 41519a7..c371e1f 100644 --- a/cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java +++ b/cl_search_api/src/main/java/com/bfd/mf/service/UploadExcelService.java @@ -1,45 +1,61 @@ package com.bfd.mf.service; import com.alibaba.fastjson.JSONObject; +import com.bfd.mf.common.util.es.EsUtils2; import com.bfd.mf.common.web.entity.mysql.topic.ParseExcelTask; import com.bfd.mf.common.web.repository.mysql.topic.ParseExcelTaskRepository; -import com.bfd.nlp.common.util.encryption.MD5Util; +import com.bfd.mf.config.BFDApiConfig; +import org.assertj.core.util.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import org.springframework.web.multipart.MultipartFile; +import javax.annotation.PostConstruct; +import java.io.*; import java.math.BigInteger; import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Map; +import java.util.*; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; @Service public class UploadExcelService { private static Logger logger = LoggerFactory.getLogger(UploadExcelService.class); + private static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private static BlockingQueue DATA_CACHE = new LinkedBlockingQueue<>(10240); + private static String INDEX_TYPE = "docs"; @Autowired private ParseExcelTaskRepository parseExcelTaskRepository; + @Autowired + private BFDApiConfig bfdApiConfig; + + @PostConstruct + public void init() { + // 注册数据查询来源 + String address []= {bfdApiConfig.getEsMini().get("address")}; + EsUtils2.registerCluster(bfdApiConfig.getEsMini().get("name"), address);// 配置文件中的 es-source + } /** * 将任务插入到 mysql */ - public void uploadExcel(String excelName,Map userinfo)throws Exception{ + public void insertParseExcelTask(String excelName,Map userinfo)throws Exception{ ParseExcelTask excelDetail = convertExcelTaskDetailEntity(excelName,userinfo); excelDetail = parseExcelTaskRepository.save(excelDetail); logger.info("[UserNodeService] insertExcelTask : "+ JSONObject.toJSONString(excelDetail)); } - public boolean isExcelExist(String excelName) throws Exception{ - Integer num = parseExcelTaskRepository.findTaskByProjectName(excelName); - if(num > 0){ // 如果大于0 ,就表示已经有这个Excel 的名字存在了,不能再 - return true; - }else { - return false; - } - } +// public boolean isExcelExist(String excelName) throws Exception{ +// Integer num = parseExcelTaskRepository.findTaskByProjectName(excelName); +// if(num > 0){ // 如果大于0 ,就表示已经有这个Excel 的名字存在了,不能再 +// return true; +// }else { +// return false; +// } +// } public boolean isTaskExist() throws Exception{ Integer num = parseExcelTaskRepository.findTaskByStatus(0); @@ -63,8 +79,11 @@ public class UploadExcelService { } } + /** + * 上传查询条件的Excel ,生成对应的查询任务 + */ private ParseExcelTask convertExcelTaskDetailEntity(String excelName,Map userinfo)throws Exception{ - //logger.info("[UserNodeService] convertExcelTaskDetailEntity: -enter- request = {}", request); + String path = bfdApiConfig.getUploadOLYExcelPath(); if (null == excelName) { throw new IllegalArgumentException(" request node is null"); } @@ -74,12 +93,11 @@ public class UploadExcelService { excelTaskDetail.setExcelName(excelName.replace(".xlsx","")); excelTaskDetail.setCreateUser(user); excelTaskDetail.setCreateUserId(userId); - excelTaskDetail.setExcelFile("/opt/nfsdata/excelTask/"+excelName); + excelTaskDetail.setExcelFile(path + excelName); excelTaskDetail.setStatus(0); return excelTaskDetail; } - private static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); public List getSubjectIdsByExcelName(String excelName) { List subjectIds = new ArrayList<>(); excelName = excelName.replace(".xlsx",""); @@ -92,6 +110,351 @@ public class UploadExcelService { } } return subjectIds; + } + + + /** + * 上传Excel相关 + */ + public boolean queryByExcelName(String excelName) { + try{ + String newExcelName = excelName.replace(".xlsx",""); + boolean success = isTaskSucess(newExcelName); + if(success){ + return true; + } + return false; + }catch (Exception e){ + e.printStackTrace(); + return false; + } + } + + /** + * 上传Excel相关 + */ + public boolean queryByStatus() { + try{ + boolean isExist = isTaskExist(); + if(isExist){ // 如果任务为空,就说明可以添加新的任务进来,如果不为空,就不要添加新的任务进来啦~ + return true; + }else{ + return false; + } + }catch (Exception e){ + e.printStackTrace(); + return false; + } + } + /** + * 上传文件 + */ + public boolean uploadExcel(MultipartFile file, String filePath) { + try{ + InputStream inputStream = file.getInputStream(); + byte[] buffer = new byte[inputStream.available()]; + inputStream.read(buffer); + File targetFile = new File(filePath+file.getOriginalFilename()); + OutputStream outStream = new FileOutputStream(targetFile); + outStream.write(buffer); + inputStream.close(); + outStream.close(); + return true; + }catch (Exception e){ + e.printStackTrace(); + return false; + } + } +// public BigInteger getOneSubjectId() { +// int subjectId = parseExcelTaskRepository.findOneSubjectId(); +// BigInteger newSubjectId = new BigInteger((subjectId+1)+""); +// return newSubjectId; +// } + +// public void uploadData(String subjectId, String fileName) { +// List list = ReadLine.readLine(new File(fileName)); +// String indexNamePre = bfdApiConfig.getIndexNamePre(); +// String indexName = indexNamePre + subjectId; +// for (String l:list) { +// String c = l.replace("\\\"","\\\""); +// JSONObject data = new JSONObject(); +// try { +// data = JSONObject.parseObject(c); +// }catch (Exception e){ +// e.printStackTrace(); +// } +// if(data.size() >0) { +// try { +// DATA_CACHE.put(EsUtils2.buildBulkItem(indexName, INDEX_TYPE, data)); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } +// } +// } +// flushData(); +// } + +// public void uploadData(String subjectId, JSONObject result) { +// String indexNamePre = bfdApiConfig.getIndexNamePre(); +// String indexName = indexNamePre + subjectId; +// try { +// if(result.size() >0) { +// try { +// DATA_CACHE.put(EsUtils2.buildBulkItem(indexName, INDEX_TYPE, result)); +// } catch (InterruptedException e) { +// e.printStackTrace(); +// } +// } +// }catch (Exception e){ +// e.printStackTrace(); +// } +// flushData(); +// } + +// public void flushData() { +// logger.info("ES flushData"); +// List dataList = Lists.newArrayList(); +// EsUtils2.BulkItem item = DATA_CACHE.poll(); +// while (Objects.nonNull(item)) { +// if (dataList.size() >= 100) { +// EsUtils2.bulkIndex("SQ_Mini", dataList, "_id_"); +// logger.debug("Flush data, size:{}.", dataList.size()); +// dataList.clear(); +// } +// dataList.add(item); +// item = DATA_CACHE.poll(); +// } +// if (dataList.size() > 0) { +// EsUtils2.bulkIndex("SQ_Mini", dataList, "_id_"); +// logger.debug("Flush data, size:{}.", dataList.size()); +// } +// } + +// public String createSubject(String excelName, Map userinfo) { +// excelName = excelName.replace(".txt",""); +// BigInteger subjectId = getOneSubjectId(); +// parseExcelTaskRepository.insertSubject(subjectId,excelName); +// Date date = new Date(); +// String today = formatter.format(date); +// parseExcelTaskRepository.insertCountToSubjectCount(subjectId,today); +// return subjectId.toString(); +// } +// + + +// public void parseExcel(String subjectId ,String path ,String excelName, Map> fileNameMap) { +// try{ +// XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(path+excelName)); +// //获取每一个工作簿的数据 +// for (int i = 0; i < xssfWorkbook.getNumberOfSheets(); i++) { +// StringBuffer finalstring = new StringBuffer(); +// List> should = new ArrayList>(); +// XSSFSheet sheet = xssfWorkbook.getSheetAt(i); +//// String simple_content = xssfWorkbook.getSheetName(j); +//// System.out.println("sheet对象的名称是:" + simple_content); +// //获取最后一行的num,即总行数。此处从0开始计数,返回最后一行的行数 +// int rowNum = sheet.getLastRowNum(); +// for (int row = 1; row <= rowNum; row++) { +// int cellNum = sheet.getRow(row).getLastCellNum(); +// JSONObject resultJson = new JSONObject(); +// Map resultMap = AllKeys.getMap(); +// String dataId = String.valueOf(sheet.getRow(row).getCell(0)); +// resultMap.put("dataId",dataId); +// resultMap.put("_id_",dataId); +// resultMap.put("docId",String.valueOf(sheet.getRow(row).getCell(1))); +// resultMap.put("channel",String.valueOf(sheet.getRow(row).getCell(2))); +// resultMap.put("source",String.valueOf(sheet.getRow(row).getCell(3))); +// resultMap.put("enSource",String.valueOf(sheet.getRow(row).getCell(4))); +// resultMap.put("url",String.valueOf(sheet.getRow(row).getCell(5))); +// resultMap.put("title",String.valueOf(sheet.getRow(row).getCell(6))); +// resultMap.put("translateTitle",String.valueOf(sheet.getRow(row).getCell(7))); +// String pubTimeStr = String.valueOf(sheet.getRow(row).getCell(8)); +// long pubTime = DataCheckUtil.convertDateTotime(pubTimeStr)*1000; +// long pubDay = DataCheckUtil.getDay(pubTime); +// String pubDate = DataCheckUtil.getDate(pubTime); +// resultMap.put("pubTimeStr",pubTimeStr); +// resultMap.put("pubTime", pubTime); +// resultMap.put("pubDay",pubDay); +// resultMap.put("pubDate",pubDate); +// resultMap.put("author",String.valueOf(sheet.getRow(row).getCell(9))); +// resultMap.put("authorId",String.valueOf(sheet.getRow(row).getCell(10))); +// resultMap.put("content",String.valueOf(sheet.getRow(row).getCell(11))); +// resultMap.put("translateContent",String.valueOf(sheet.getRow(row).getCell(12))); +// resultMap.put("price",String.valueOf(sheet.getRow(row).getCell(13))); +// resultMap.put("productParameter",String.valueOf(sheet.getRow(row).getCell(14))); +// String crawlTimeStr = String.valueOf(sheet.getRow(row).getCell(15)); +// long crawlTime = DataCheckUtil.convertDateTotime(crawlTimeStr)*1000; +// long crawlDay = DataCheckUtil.getDay(crawlTime); +// String crawlDate = DataCheckUtil.getDate(crawlTime); +// resultMap.put("crawlTimeStr",crawlTimeStr); +// resultMap.put("crawlTime",crawlTime); +// resultMap.put("crawlDay",crawlDay); +// resultMap.put("crawlDate",crawlDate); +// // resultMap.put("crawlDataFlag",String.valueOf(sheet.getRow(row).getCell(16))); +// resultMap.put("crawlDataFlag","keyword:导入数据"); +// resultMap.put("sysSentiment",String.valueOf(sheet.getRow(row).getCell(17))); +// XSSFCell hlKeywords = sheet.getRow(row).getCell(18); +// System.out.println("111" + hlKeywords); +// List hl = new ArrayList<>(); +// if(hlKeywords.toString().equals("[]")){ +// resultMap.put("hlKeywords",hl); +// }else { +// if(hlKeywords.toString().contains(",")) { +// String hlk[] = hlKeywords.toString().replace("[", "").replace("]", "").replace("\"","").split(","); +// hl = Arrays.asList(hlk); +// }else{ +// String hlk = hlKeywords.toString().replace("[", "").replace("]", ""); +// hl.add(hlk); +// } +// resultMap.put("hlKeywords", hl); +// } +// +// // 转发、评论、点赞 +// String quoteCount = sheet.getRow(row).getCell(19).toString(); +// if(quoteCount.equals("")){ +// quoteCount = "0"; +// } +// resultMap.put("quoteCount",Integer.valueOf(quoteCount)); +// String commentsCount = sheet.getRow(row).getCell(20).toString(); +// if(commentsCount.equals("")){ +// commentsCount = "0"; +// } +// resultMap.put("commentsCount",Integer.valueOf(commentsCount)); +// String attitudesCount = sheet.getRow(row).getCell(21).toString(); +// if(attitudesCount.equals("")){ +// attitudesCount = "0"; +// } +// resultMap.put("attitudesCount",Integer.valueOf(attitudesCount)); +// // 插入时间 +// long createTime = System.currentTimeMillis() ; +// resultMap.put("createTime", createTime); +// resultMap.put("createTimeStr", DataCheckUtil.getCurrentTime(createTime)); +// resultMap.put("createDay", DataCheckUtil.getDay(createTime)); +// resultMap.put("createDate", DataCheckUtil.getDate(createTime)); +// +// // 根据路径和数据ID,读取附件,组装附件的字段值 +// resultMap = getPathSize(path,dataId,resultMap,fileNameMap); +// System.out.println("*** " + JSONObject.toJSONString(resultMap)); +// +// resultJson.putAll(resultMap); +// uploadData(subjectId,resultJson); +// +// Thread.sleep(5000); +// +// } +// } +// +// }catch (Exception e){ +// e.printStackTrace(); +// } +// } + +// private Map getPathSize(String path, String dataId, +// Map resultMap, +// Map> fileNameMap) { +// // 判断文件夹是否尊在,若不存在,则 isDownload = false ,pgc ugc egc 都为0; +// File file=new File(path+dataId); +// resultMap.put("pgc",0); +// resultMap.put("ugc",0); +// resultMap.put("egc",0); +// List> filePathSize = new ArrayList<>(); +// List> imagePathSize = new ArrayList<>(); +// List> videoPathSize = new ArrayList<>(); +// List filePath = new ArrayList<>(); +// List imagePath = new ArrayList<>(); +// List videoPath = new ArrayList<>(); +// if(!file.exists()){//如果文件夹不存在 +// resultMap .put("isDownload",false); +// }else{ +// resultMap .put("isDownload",true); +// List fileNames = fileNameMap.get(dataId); +// for (String fileName:fileNames) { // videoPath == egc filePath == ugc imagePath == pgc +// // 根据路径读取文件,并上传到 go-fast 上,并根据前缀组装对应的 path 和 pathSize +// String goFastUrl = bfdApiConfig.getGoFastPostUrl(); +// // String zipPath = bfdApiConfig.getUploadZipPath(); +// // String url = DownLoadFile.upload(goFastUrl,dataId+fileName,content); +// String file1 = path + dataId + "\\" + fileName; +// Map urlMap = DownLoadFile.upload(goFastUrl,dataId+fileName,new File(file1)); +// String url = urlMap.get("path").toString(); +// +// Map pathMap = new HashMap<>(); +// pathMap.put("url",url); +// // 获取文件的大小 +// long size = Long.valueOf(urlMap.get("size").toString()); +// Double newSize =(double)(Math.round(size/1024)/100.0); +// pathMap.put("size",newSize+"KB"); +// // 获取分辨率 +// String resolution = ""; +// if(fileName.startsWith("image")) { +// resolution = ReadLine.getImageDim(file1); +// } +// if(fileName.startsWith("video")){ +// if(url.endsWith(".mp3")){ +// resolution = "400*240"; +// }else { +// resolution = ReadLine.videosize(file1); +// } +// } +// System.out.println(resolution); +// pathMap.put("resolution",resolution); +// // 视频的时长 +// String videoTime = ""; +// pathMap.put("videoTime",videoTime); +// +// if(fileName.startsWith("file")){ +// resultMap.put("ugc",1); +// filePathSize.add(pathMap); +// filePath.add(url); +// } +// if(fileName.startsWith("image")){ +// resultMap.put("pgc",1); +// imagePathSize.add(pathMap); +// imagePath.add(url); +// } +// if(fileName.startsWith("video")){ +// resultMap.put("egc",1); +// videoPathSize.add(pathMap); +// videoPath.add(url); +// } +// } +// } +// resultMap.put("filePathSize",filePathSize); +// resultMap.put("imagePathSize",imagePathSize); +// resultMap.put("videoPathSize",videoPathSize); +// resultMap.put("filePath",filePath); +// resultMap.put("imagePath",imagePath); +// resultMap.put("videoPath",videoPath); +// +// +// return resultMap; +// } + + public boolean insertTask(String subjectId, String user, String userId, String fileRemak, String zipName) { + boolean flag = true; + try { + BigInteger id = BigInteger.valueOf(Long.valueOf(subjectId)); + String crawlDataFlag = "keyword:" + fileRemak; + parseExcelTaskRepository.insertTask(id,user,userId,fileRemak,zipName,crawlDataFlag); + }catch (Exception e){ + e.printStackTrace(); + return false; + } + return flag; } + +// public static void main(String[] args) { +// List line = ReadLine.readLine(new File("E:\\100.txt")); +// for (String l:line) { +// String c = l.replace("\\\"","\\\""); +// System.out.println("c : " +c); +// JSONObject jsonObject = new JSONObject(); +// try { +// jsonObject = JSONObject.parseObject(c); +// }catch (Exception e){ +// e.printStackTrace(); +// } +// System.out.println("j : "+jsonObject); +// } +// } } diff --git a/cl_search_api/src/main/resources/application-113.yml b/cl_search_api/src/main/resources/application-113.yml new file mode 100644 index 0000000..53c40b1 --- /dev/null +++ b/cl_search_api/src/main/resources/application-113.yml @@ -0,0 +1,61 @@ +server: + port: 18909 + tomcat: + uri-encoding: UTF-8 + max-threads: 800 + maxHttpHeaderSize: 655360 + http2: + enabled: true + +spring: + datasource: + driver-class-name: com.mysql.jdbc.Driver + username: root + password: bfd123 + url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + + hikari: + maximum-pool-size: 10 + minimum-idle: 1 + + jpa: + open-in-view: false + database: mysql + + servlet: + multipart: + max-file-size: 1009MB #单个数据大小 + max-request-size: 2048MB #总数据大小 + +####### +bfd.api.mf: + textPostUrl: http://rule.sq.baifendian.com/nerplace + emotionPostUrl : http://172.18.1.166:15038/bertsentiment + wordCloudPostUrl : http://rule.sq.baifendian.com/wordcloud + + goFastPostUrl : http://172.18.1.113:8080/upload + goFastDomain : http://172.18.1.113:8080 + uploadOLYExcelPath : /opt/nfsdata/excelTask/" + uploadZipPath : /opt/nfsdata/uploadFiles/ + indexNamePre : cl_major_ + +# es-mini: +# name: SQ_Mini +# address: 172.18.1.147:9313 +# upper: 2018-09-01 +# standby: cl_major_* +# es-normal: +# name: SQ_Normal_new +# address: 172.18.1.134:9301 +# upper: 2018-09-01 +# standby: cl_index_* + es-mini: + name: SQ_Mini + address: 172.26.11.111:9301 + upper: 2018-09-01 + standby: cl_major_ + es-normal: + name: SQ_Normal + address: 172.26.11.109:9301 + upper: 2018-09-01 + standby: cl_index_* \ No newline at end of file diff --git a/cl_search_api/src/main/resources/application-134.yml b/cl_search_api/src/main/resources/application-134.yml new file mode 100644 index 0000000..8fc57eb --- /dev/null +++ b/cl_search_api/src/main/resources/application-134.yml @@ -0,0 +1,61 @@ +server: + port: 18909 + tomcat: + uri-encoding: UTF-8 + max-threads: 800 + maxHttpHeaderSize: 655360 + http2: + enabled: true + +spring: + datasource: + driver-class-name: com.mysql.jdbc.Driver + username: root + password: Bfd123!@# + url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + + hikari: + maximum-pool-size: 10 + minimum-idle: 1 + + jpa: + open-in-view: false + database: mysql + + servlet: + multipart: + max-file-size: 1009MB #单个数据大小 + max-request-size: 2048MB #总数据大小 + +####### +bfd.api.mf: + textPostUrl: http://rule.sq.baifendian.com/nerplace + emotionPostUrl : http://172.18.1.166:15038/bertsentiment + wordCloudPostUrl : http://rule.sq.baifendian.com/wordcloud + + goFastPostUrl : http://172.18.1.113:8080/upload + goFastDomain : http://172.18.1.113:8080 + uploadOLYExcelPath : /opt/nfsdata/excelTask/ + uploadZipPath : /opt/nfsdata/uploadFiles/ + indexNamePre : cl_major_ + + es-mini: + name: SQ_Mini + address: 172.18.1.147:9313 + upper: 2018-09-01 + standby: cl_major_* + es-normal: + name: SQ_Normal_new + address: 172.18.1.134:9301 + upper: 2018-09-01 + standby: cl_index_* +# es-mini: +# name: SQ_Mini +# address: 172.26.11.111:9301 +# upper: 2018-09-01 +# standby: cl_major_ +# es-normal: +# name: SQ_Normal +# address: 172.26.11.109:9301 +# upper: 2018-09-01 +# standby: cl_index_* \ No newline at end of file diff --git a/cl_search_api/src/main/resources/application.yml b/cl_search_api/src/main/resources/application.yml index 179b9a7..b4a443b 100644 --- a/cl_search_api/src/main/resources/application.yml +++ b/cl_search_api/src/main/resources/application.yml @@ -11,15 +11,21 @@ spring: datasource: driver-class-name: com.mysql.jdbc.Driver username: root - password: Bfd123!@# - url: jdbc:mysql://172.18.1.134:3306/intelligent_crawl?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + password: bfd123 + url: jdbc:mysql://172.26.11.113:3306/intelligent_crawl_item?useOldAliasMetadataBehavior=true&characterEncoding=UTF-8&zeroDateTimeBehavior=round + hikari: maximum-pool-size: 10 minimum-idle: 1 + jpa: open-in-view: false database: mysql + servlet: + multipart: + max-file-size: 1009MB #单个数据大小 + max-request-size: 2048MB #总数据大小 ####### bfd.api.mf: @@ -27,11 +33,17 @@ bfd.api.mf: emotionPostUrl : http://172.18.1.166:15038/bertsentiment wordCloudPostUrl : http://rule.sq.baifendian.com/wordcloud + goFastPostUrl : http://172.18.1.113:8080/upload + goFastDomain : http://172.18.1.113:8080 + uploadOLYExcelPath : /opt/nfsdata/excelTask/ + uploadZipPath : /opt/nfsdata/uploadFiles/ + indexNamePre : cl_major_ + es-mini: name: SQ_Mini address: 172.18.1.147:9313 upper: 2018-09-01 - standby: cl_major + standby: cl_major_* es-normal: name: SQ_Normal_new address: 172.18.1.134:9301 @@ -41,7 +53,7 @@ bfd.api.mf: # name: SQ_Mini # address: 172.26.11.111:9301 # upper: 2018-09-01 -# standby: cl_subject_10000 +# standby: cl_major_ # es-normal: # name: SQ_Normal # address: 172.26.11.109:9301 diff --git a/cl_stream_3.0.iml b/cl_stream_3.1.iml similarity index 68% rename from cl_stream_3.0.iml rename to cl_stream_3.1.iml index c035f0b..f409c0e 100644 --- a/cl_stream_3.0.iml +++ b/cl_stream_3.1.iml @@ -4,9 +4,6 @@ - - - diff --git a/foreground b/foreground new file mode 100644 index 0000000..9762351 --- /dev/null +++ b/foreground @@ -0,0 +1,534 @@ +{ + "commentUrl":{ + "type":"keyword" + }, + "channel":{ + "type":"keyword" + }, + "channelNum":{ + "type":"keyword" + }, + "readCount":{ + "type":"long" + }, + "quoteCount":{ + "type":"long" + }, + "brand":{ + "analyzer":"ik_smart", + "term_vector":"yes", + "type":"text", + "fields":{ + "shingles":{ + "analyzer":"shingle_analyzer", + "type":"text" + } + } + }, + "brandId":{ + "type":"keyword" + }, + "createTimeStr":{ + "type":"keyword" + }, + "authornickname":{ + "type":"keyword" + }, + "contentSimHash":{ + "type":"keyword" + }, + "crawlDay":{ + "type":"long" + }, + "titleSimHash":{ + "type":"keyword" + }, + "commentId":{ + "type":"keyword" + }, + "originalPhrase":{ + "type":"keyword" + }, + "forwardContent":{ + "analyzer":"ik_smart", + "type":"text", + "fields":{ + "shingles":{ + "analyzer":"shingle_analyzer", + "type":"text" + } + } + }, + "finalPhrase":{ + "type":"keyword" + }, + "availability":{ + "type":"integer" + }, + "forwardUserId":{ + "type":"keyword" + }, + "forwardUserType":{ + "type":"integer" + }, + "forwardUserUrl":{ + "type":"keyword" + }, + "forwardAvatar":{ + "type":"keyword" + }, + "forwardImgs":{ + "type":"keyword" + }, + "forwardPostSource":{ + "type":"keyword" + }, + "forwardAttitudesCount":{ + "type":"long" + }, + "forwardCommentsCount":{ + "type":"long" + }, + "forwardQuoteCount":{ + "type":"long" + }, + "forwardPubTime":{ + "type":"long" + }, + "titleLength":{ + "type":"long" + }, + "forwardAuthor":{ + "type":"keyword" + }, + "sysAbstract":{ + "analyzer":"ik_smart", + "type":"text" + }, + "forwardUrl":{ + "type":"keyword" + }, + "createDate":{ + "type":"date" + }, + "docType":{ + "type":"keyword" + }, + "getSource":{ + "type":"keyword" + }, + "dataCount":{ + "type":"integer" + }, + "primary":{ + "type":"integer" + }, + "cate":{ + "type":"keyword" + }, + "sex":{ + "type":"keyword" + }, + "collectCount":{ + "type":"long" + }, + "crawlDate":{ + "type":"date" + }, + "avatar":{ + "type":"keyword" + }, + "avatarPath":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "url":{ + "type":"keyword" + }, + "skuProperties":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "expression":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "hashTag":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "places":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "opinions":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "hlKeywords":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "createTime":{ + "type":"long" + }, + "contentLength":{ + "type":"integer" + }, + "pubTime":{ + "type":"long" + }, + "fansCount":{ + "type":"keyword" + }, + "language":{ + "type":"keyword" + }, + "source":{ + "type":"keyword" + }, + "enSource":{ + "type":"keyword" + }, + "pictureList":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "userUrl":{ + "type":"keyword" + }, + "videoUrl":{ + "type":"keyword" + }, + "contentTag":{ + "type":"keyword" + }, + "author":{ + "type":"keyword" + }, + "authorId":{ + "type":"keyword" + }, + "authorLevel":{ + "type":"keyword" + }, + "sysSentiment":{ + "type":"double" + }, + "price":{ + "type":"double" + }, + "nomorprice":{ + "type":"double" + }, + "attitudesCount":{ + "type":"keyword" + }, + "createDay":{ + "type":"long" + }, + "postId":{ + "type":"keyword" + }, + "pubDate":{ + "type":"date" + }, + "sysKeywords":{ + "type":"keyword" + }, + "crawlTime":{ + "type":"long" + }, + "userType":{ + "type":"keyword" + }, + "projectName":{ + "type":"keyword" + }, + "lastModifiedTime":{ + "type":"long" + }, + "productParameter":{ + "analyzer":"ik_smart", + "term_vector":"yes", + "type":"text", + "fields":{ + "shingles":{ + "analyzer":"shingle_analyzer", + "type":"text" + } + } + }, + "docId":{ + "type":"keyword" + }, + "commentScore":{ + "type":"long" + }, + "urlHash":{ + "type":"keyword" + }, + "_id_":{ + "type":"keyword" + }, + "title":{ + "analyzer":"ik_smart", + "term_vector":"yes", + "type":"text", + "fields":{ + "shingles":{ + "analyzer":"shingle_analyzer", + "type":"text" + } + } + }, + "pageTranspondCount":{ + "type":"keyword" + }, + "pageCommentCount":{ + "type":"keyword" + }, + "content":{ + "analyzer":"ik_smart", + "term_vector":"yes", + "type":"text", + "fields":{ + "shingles":{ + "analyzer":"shingle_analyzer", + "type":"text" + } + } + }, + "pubDay":{ + "type":"long" + }, + "pubTimeStr":{ + "type":"keyword" + }, + "postSource":{ + "type":"keyword" + }, + "crawlTimeStr":{ + "type":"keyword" + }, + "postCount":{ + "type":"keyword" + }, + "friendsCount":{ + "type":"keyword" + }, + "commentsCount":{ + "type":"long" + }, + "favorCnt":{ + "type":"long" + }, + "viewCnt":{ + "type":"long" + }, + "downCnt":{ + "type":"long" + }, + "sign":{ + "type":"keyword" + }, + "isVip":{ + "type":"integer" + }, + "forumScore":{ + "type":"keyword" + }, + "impression":{ + "type":"keyword" + }, + "promotionInfo":{ + "type":"keyword" + }, + "smallImgs":{ + "type":"keyword" + }, + "listBrand":{ + "analyzer":"ik_smart", + "term_vector":"yes", + "type":"text", + "fields":{ + "shingles":{ + "analyzer":"shingle_analyzer", + "type":"text" + } + } + }, + "firstListBrand":{ + "type":"keyword" + }, + "secondListBrand":{ + "type":"keyword" + }, + "threeListBrand":{ + "type":"keyword" + }, + "fourListBrand":{ + "type":"keyword" + }, + "fiveListBrand":{ + "type":"keyword" + }, + "area":{ + "type":"keyword" + }, + "location":{ + "type":"keyword" + }, + "country":{ + "type":"keyword" + }, + "province":{ + "type":"keyword" + }, + "city":{ + "type":"keyword" + }, + "age":{ + "type":"keyword" + }, + "egc":{ + "type":"integer" + }, + "pgc":{ + "type":"integer" + }, + "ugc":{ + "type":"integer" + }, + "translateTitle":{ + "analyzer":"ik_smart", + "term_vector":"yes", + "type":"text", + "fields":{ + "shingles":{ + "analyzer":"shingle_analyzer", + "type":"text" + } + } + }, + "translateContent":{ + "analyzer":"ik_smart", + "term_vector":"yes", + "type":"text", + "fields":{ + "shingles":{ + "analyzer":"shingle_analyzer", + "type":"text" + } + } + }, + "filePath":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "resolution":{ + "type":"keyword" + }, + "extension":{ + "type":"keyword" + }, + "thumbnails":{ + "type":"keyword" + }, + "videoTime":{ + "type":"keyword" + }, + "isDownload":{ + "type":"keyword" + }, + "crawlDataFlag":{ + "type":"keyword" + }, + "crawlDataFlagType":{ + "type":"keyword" + }, + "attr":{ + "type":"keyword" + }, + "pageType":{ + "type":"keyword" + }, + "siteId":{ + "type":"keyword" + }, + "otherSourceJson":{ + "type":"keyword" + }, + "videoPath":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "imagePath":{ + "type":"text", + "fields":{ + "keyword":{ + "ignore_above":256, + "type":"keyword" + } + } + }, + "videoPathSize":{ + "type":"keyword" + }, + "imagePathSize":{ + "type":"keyword" + }, + "filePathSize":{ + "type":"keyword" + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index c713e02..7e26df6 100644 --- a/pom.xml +++ b/pom.xml @@ -5,8 +5,8 @@ 4.0.0 com.bfd.mf - cl_stream_3.0 - 3.0-SNAPSHOT + cl_stream_4.0 + 4.0-SNAPSHOT pom