Browse Source

千问大模型应用

master
maojian 6 months ago
commit
100ef2a811
  1. 11
      .idea/chatGptNew.iml
  2. 4
      .idea/misc.xml
  3. 8
      .idea/modules.xml
  4. 617
      .idea/workspace.xml
  5. 26
      02_crawl_test.py
  6. 15
      config.ini
  7. 63
      gpt输入样例.txt
  8. BIN
      log_util/__pycache__/set_logger.cpython-310.pyc
  9. BIN
      log_util/__pycache__/set_logger.cpython-36.pyc
  10. BIN
      log_util/__pycache__/set_logger.cpython-38.pyc
  11. 33
      log_util/set_logger.py
  12. 32
      log_util/set_logger.py_bak
  13. 6
      logs/results.log
  14. 20
      manage.py
  15. 1
      start.sh
  16. 1
      stop_uwsgi.sh
  17. 20
      test.py
  18. 0
      text_analysis/__init__.py
  19. BIN
      text_analysis/__pycache__/__init__.cpython-310.pyc
  20. BIN
      text_analysis/__pycache__/__init__.cpython-36.pyc
  21. BIN
      text_analysis/__pycache__/__init__.cpython-38.pyc
  22. BIN
      text_analysis/__pycache__/cusException.cpython-38.pyc
  23. BIN
      text_analysis/__pycache__/read_config.cpython-38.pyc
  24. BIN
      text_analysis/__pycache__/settings.cpython-310.pyc
  25. BIN
      text_analysis/__pycache__/settings.cpython-36.pyc
  26. BIN
      text_analysis/__pycache__/settings.cpython-38.pyc
  27. BIN
      text_analysis/__pycache__/src.cpython-36.pyc
  28. BIN
      text_analysis/__pycache__/urls.cpython-310.pyc
  29. BIN
      text_analysis/__pycache__/urls.cpython-36.pyc
  30. BIN
      text_analysis/__pycache__/urls.cpython-38.pyc
  31. BIN
      text_analysis/__pycache__/views.cpython-310.pyc
  32. BIN
      text_analysis/__pycache__/views.cpython-36.pyc
  33. BIN
      text_analysis/__pycache__/views.cpython-38.pyc
  34. BIN
      text_analysis/__pycache__/wsgi.cpython-310.pyc
  35. BIN
      text_analysis/__pycache__/wsgi.cpython-36.pyc
  36. BIN
      text_analysis/__pycache__/wsgi.cpython-38.pyc
  37. 86
      text_analysis/bak/views.py
  38. 156
      text_analysis/bak/views.py0801
  39. 152
      text_analysis/bak/views.py08012
  40. 161
      text_analysis/bak/views.py0802_3
  41. 163
      text_analysis/bak/views.py_0806
  42. 165
      text_analysis/bak/views.py_20240815
  43. 166
      text_analysis/bak/views.py_20240925
  44. 149
      text_analysis/bak/views.py_bak
  45. 159
      text_analysis/bak/views.py_bak0802_2
  46. 149
      text_analysis/bak/views.pybak
  47. 101
      text_analysis/bak/views_0107.py
  48. 101
      text_analysis/bak/views_0412.py
  49. 101
      text_analysis/bak/views_0415.py
  50. 166
      text_analysis/bak/views_20240925.py
  51. 166
      text_analysis/bak/views_bak_20240806.py
  52. 5
      text_analysis/cusException.py
  53. 10
      text_analysis/read_config.py
  54. 14
      text_analysis/request.py
  55. 148
      text_analysis/settings.py
  56. 18
      text_analysis/src.py
  57. BIN
      text_analysis/tools/__pycache__/cusException.cpython-36.pyc
  58. BIN
      text_analysis/tools/__pycache__/mysql_helper.cpython-36.pyc
  59. BIN
      text_analysis/tools/__pycache__/process.cpython-36.pyc
  60. BIN
      text_analysis/tools/__pycache__/to_kafka.cpython-310.pyc
  61. BIN
      text_analysis/tools/__pycache__/to_kafka.cpython-36.pyc
  62. BIN
      text_analysis/tools/__pycache__/to_kafka.cpython-38.pyc
  63. BIN
      text_analysis/tools/__pycache__/tool.cpython-310.pyc
  64. BIN
      text_analysis/tools/__pycache__/tool.cpython-36.pyc
  65. BIN
      text_analysis/tools/__pycache__/tool.cpython-38.pyc
  66. BIN
      text_analysis/tools/__pycache__/tools.cpython-36.pyc
  67. 74
      text_analysis/tools/bak/to_kafka.py
  68. 105
      text_analysis/tools/bak/tool.py
  69. 114
      text_analysis/tools/bak/tool.py0821
  70. 116
      text_analysis/tools/bak/tool.py_20240801
  71. 172
      text_analysis/tools/bak/tool.py_bak
  72. 170
      text_analysis/tools/bak/tool_1107_final.py
  73. 25
      text_analysis/tools/cusException.py
  74. 65
      text_analysis/tools/kakfa_util.py
  75. 0
      text_analysis/tools/logs/results.log
  76. 338
      text_analysis/tools/mysql_helper.py
  77. 51
      text_analysis/tools/process.py
  78. 171
      text_analysis/tools/seleniumTest.py
  79. 25
      text_analysis/tools/to_kafka.py
  80. 74
      text_analysis/tools/to_kafka_pykafka.py
  81. 119
      text_analysis/tools/tool.py
  82. 44
      text_analysis/tools/zk_util.py
  83. 13
      text_analysis/urls.py
  84. 164
      text_analysis/views.py
  85. 134
      text_analysis/views.py_openai
  86. 16
      text_analysis/wsgi.py
  87. 9
      uwsgi.ini
  88. 39
      wsgi.log
  89. 35
      wsgi.py
  90. 100
      文档/gpt.txt
  91. 75
      文档/gpt参数0905.txt
  92. 58
      文档/gpt参数结构调整.txt
  93. 65
      文档/gpt输入样例.txt
  94. 66
      文档/gpt输入样例2.txt
  95. 58
      文档/gpt输入样例最新-1129
  96. BIN
      文档/最新调整-1127.png

11
.idea/chatGptNew.iml

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>

4
.idea/misc.xml

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8.16 (D:\LH_program\Anaconda3\envs\python38_env\python.exe)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/chatGptNew.iml" filepath="$PROJECT_DIR$/.idea/chatGptNew.iml" />
</modules>
</component>
</project>

617
.idea/workspace.xml

@ -0,0 +1,617 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="83bdfefc-d14d-41ac-9499-e0f5cbacd066" name="Default" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="450">
<file leaf-file-name="wsgi.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/wsgi.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="252">
<caret line="16" column="6" lean-forward="false" selection-start-line="16" selection-start-column="6" selection-end-line="16" selection-end-column="6" />
<folding>
<element signature="e#242#251#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="uwsgi.ini" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/uwsgi.ini">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="252">
<caret line="7" column="18" lean-forward="true" selection-start-line="7" selection-start-column="18" selection-end-line="7" selection-end-column="18" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="start.sh" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/start.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="stop_uwsgi.sh" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/stop_uwsgi.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="36">
<caret line="1" column="0" lean-forward="true" selection-start-line="1" selection-start-column="0" selection-end-line="1" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
</file>
<file leaf-file-name="views.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/text_analysis/views.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="792">
<caret line="43" column="0" lean-forward="true" selection-start-line="43" selection-start-column="0" selection-end-line="43" selection-end-column="0" />
<folding>
<element signature="e#14#28#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="tool.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/text_analysis/tools/tool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-399">
<caret line="44" column="4" lean-forward="false" selection-start-line="44" selection-start-column="4" selection-end-line="44" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>false</find>
<find>KazooState</find>
<find>task_queue</find>
</findStrings>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/text_analysis/urls.py" />
<option value="$PROJECT_DIR$/text_analysis/tools/to_kafka.py" />
<option value="$PROJECT_DIR$/../../asr/text_analysis/tools/to_kafka.py" />
<option value="$PROJECT_DIR$/../asrNew/text_analysis/tools/to_kafka.py" />
<option value="$PROJECT_DIR$/text_analysis/tools/tool_new.py" />
<option value="$PROJECT_DIR$/text_analysis/tools/tool_new_未部署.py" />
<option value="$PROJECT_DIR$/log_util/set_logger.py" />
<option value="$PROJECT_DIR$/text_analysis/tools/zk_util.py" />
<option value="$PROJECT_DIR$/text_analysis/tools/tool.py" />
<option value="$PROJECT_DIR$/../../../2024/Qwen2_70B/eg.py" />
<option value="$PROJECT_DIR$/text_analysis/views.py" />
<option value="$PROJECT_DIR$/wsgi.py" />
<option value="$PROJECT_DIR$/stop_uwsgi.sh" />
<option value="$PROJECT_DIR$/uwsgi.ini" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<option name="x" value="-10" />
<option name="width" value="1940" />
<option name="height" value="1030" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
<flattenPackages />
<showMembers />
<showModules />
<showLibraryContents />
<hideEmptyPackages />
<abbreviatePackageNames />
<autoscrollToSource />
<autoscrollFromSource />
<sortByType />
<manualOrder />
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scratches" />
<pane id="ProjectPane">
<subPane>
<PATH>
<PATH_ELEMENT>
<option name="myItemId" value="QwenModel" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
</PATH_ELEMENT>
<PATH_ELEMENT>
<option name="myItemId" value="QwenModel" />
<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.PsiDirectoryNode" />
</PATH_ELEMENT>
</PATH>
</subPane>
</pane>
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager" selected="Python.eg">
<configuration default="false" name="tool" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/text_analysis/tools" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/text_analysis/tools/tool.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="false" name="tool_new" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/text_analysis/tools" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/text_analysis/tools/tool_new.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="false" name="tool_new_未部署" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/text_analysis/tools" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/text_analysis/tools/tool_new_未部署.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="false" name="zk_util" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/text_analysis/tools" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/text_analysis/tools/zk_util.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="false" name="eg" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/../../../2024/Qwen2_70B" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/../../../2024/Qwen2_70B/eg.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="true" type="PythonConfigurationType" factoryName="Python">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<option name="SCRIPT_NAME" value="" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<method />
</configuration>
<configuration default="true" type="Tox" factoryName="Tox">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Doctests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<option name="SCRIPT_NAME" value="" />
<option name="CLASS_NAME" value="" />
<option name="METHOD_NAME" value="" />
<option name="FOLDER_NAME" value="" />
<option name="TEST_TYPE" value="TEST_SCRIPT" />
<option name="PATTERN" value="" />
<option name="USE_PATTERN" value="false" />
<method />
</configuration>
<configuration default="true" type="tests" factoryName="Unittests">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs />
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="chatGptNew" />
<option name="_new_additionalArguments" value="&quot;&quot;" />
<option name="_new_target" value="&quot;.&quot;" />
<option name="_new_targetType" value="&quot;PATH&quot;" />
<method />
</configuration>
<list size="5">
<item index="0" class="java.lang.String" itemvalue="Python.tool" />
<item index="1" class="java.lang.String" itemvalue="Python.tool_new" />
<item index="2" class="java.lang.String" itemvalue="Python.tool_new_未部署" />
<item index="3" class="java.lang.String" itemvalue="Python.zk_util" />
<item index="4" class="java.lang.String" itemvalue="Python.eg" />
</list>
<recent_temporary>
<list size="5">
<item index="0" class="java.lang.String" itemvalue="Python.eg" />
<item index="1" class="java.lang.String" itemvalue="Python.zk_util" />
<item index="2" class="java.lang.String" itemvalue="Python.tool" />
<item index="3" class="java.lang.String" itemvalue="Python.tool_new_未部署" />
<item index="4" class="java.lang.String" itemvalue="Python.tool_new" />
</list>
</recent_temporary>
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="83bdfefc-d14d-41ac-9499-e0f5cbacd066" name="Default" comment="" />
<created>1699510973301</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1699510973301</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-10" y="0" width="1940" height="1030" extended-state="0" />
<editor active="true" />
<layout>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.13837838" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="true" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.24038461" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.16496018" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="true" content_ui="tabs" />
<window_info id="Data View" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
</layout>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<option name="time" value="2" />
</breakpoint-manager>
<watches-manager />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/uwsgi.ini">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="108">
<caret line="3" column="31" lean-forward="false" selection-start-line="3" selection-start-column="31" selection-end-line="3" selection-end-column="31" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/urls.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="144">
<caret line="6" column="21" lean-forward="false" selection-start-line="6" selection-start-column="11" selection-end-line="6" selection-end-column="21" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/tools/tool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1008">
<caret line="31" column="26" lean-forward="false" selection-start-line="31" selection-start-column="16" selection-end-line="31" selection-end-column="26" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/views.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="2916">
<caret line="82" column="60" lean-forward="true" selection-start-line="82" selection-start-column="60" selection-end-line="82" selection-end-column="60" />
<folding>
<element signature="e#14#28#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/stop_uwsgi.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="12" lean-forward="false" selection-start-line="0" selection-start-column="12" selection-end-line="0" selection-end-column="12" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/uwsgi.ini">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="108">
<caret line="3" column="31" lean-forward="false" selection-start-line="3" selection-start-column="31" selection-end-line="3" selection-end-column="31" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/urls.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="216">
<caret line="6" column="57" lean-forward="false" selection-start-line="6" selection-start-column="57" selection-end-line="6" selection-end-column="57" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/tools/tool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="1008">
<caret line="31" column="26" lean-forward="false" selection-start-line="31" selection-start-column="16" selection-end-line="31" selection-end-column="26" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/views.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="2556">
<caret line="72" column="47" lean-forward="true" selection-start-line="72" selection-start-column="47" selection-end-line="72" selection-end-column="47" />
<folding>
<element signature="e#14#28#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/stop_uwsgi.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="12" lean-forward="false" selection-start-line="0" selection-start-column="12" selection-end-line="0" selection-end-column="12" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/uwsgi.ini">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="108">
<caret line="3" column="31" lean-forward="false" selection-start-line="3" selection-start-column="31" selection-end-line="3" selection-end-column="31" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/urls.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="144">
<caret line="6" column="57" lean-forward="false" selection-start-line="6" selection-start-column="57" selection-end-line="6" selection-end-column="57" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/views.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="2592">
<caret line="73" column="74" lean-forward="true" selection-start-line="73" selection-start-column="74" selection-end-line="73" selection-end-column="74" />
<folding>
<element signature="e#14#28#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../chatGpt/text_analysis/tools/tool_new.py" />
<entry file="file://$PROJECT_DIR$/../../asr/text_analysis/tools/to_kafka.py" />
<entry file="file://$PROJECT_DIR$/../asrNew/text_analysis/tools/to_kafka.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/views_new_1129_未部署.py" />
<entry file="file://$PROJECT_DIR$/../asrNew/text_analysis/tools/tool.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/tools/bak/tool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="36">
<caret line="7" column="27" lean-forward="true" selection-start-line="7" selection-start-column="27" selection-end-line="9" selection-end-column="10" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/tools/tool_new.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/tools/tool_new_未部署.py" />
<entry file="file://$PROJECT_DIR$/../asrNew/text_analysis/views.py" />
<entry file="file://$PROJECT_DIR$/log_util/set_logger.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="684">
<caret line="32" column="0" lean-forward="false" selection-start-line="32" selection-start-column="0" selection-end-line="32" selection-end-column="0" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/tools/to_kafka_1109.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/views_0107.py" />
<entry file="file://$PROJECT_DIR$/../../../../2024/localKnowledge/src/langchain_chatgpt.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/views_0412.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/src.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/linshi.py" />
<entry file="file://$PROJECT_DIR$/../../chatGpt/src/04_chatgpt.py" />
<entry file="file://$PROJECT_DIR$/../../chatGpt/src/03_crawl_test1.py" />
<entry file="file://$PROJECT_DIR$/../../chatGpt/src/03_crawl_test.py" />
<entry file="file://$PROJECT_DIR$/../../chatGpt/src/02_crawl_test.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/urls.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="144">
<caret line="6" column="21" lean-forward="false" selection-start-line="6" selection-start-column="11" selection-end-line="6" selection-end-column="21" />
</state>
</provider>
</entry>
<entry file="file://D:/LH_program/Documents/WeChat Files/wxid_vw6fphlakzr821/FileStorage/File/2024-07/zk_util.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/tools/zk_util.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-98">
<caret line="8" column="10" lean-forward="true" selection-start-line="8" selection-start-column="10" selection-end-line="8" selection-end-column="10" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/tools/to_kafka.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="36">
<caret line="14" column="13" lean-forward="false" selection-start-line="14" selection-start-column="8" selection-end-line="14" selection-end-column="13" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/02_crawl_test.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../大模型场景验证/src/04_chatgpt.py" />
<entry file="file://$PROJECT_DIR$/../../../大模型场景验证/src/linshi.py" />
<entry file="file://$PROJECT_DIR$/../../../大模型场景验证/src/eg2.py" />
<entry file="file://$PROJECT_DIR$/../../../大模型场景验证/src/src1/gpt.py" />
<entry file="file://$PROJECT_DIR$/../../../大模型场景验证/src/02_crawl_test.py" />
<entry file="file://$PROJECT_DIR$/../../chatGpt/src/01_crawl_test.py" />
<entry file="file://$PROJECT_DIR$/text_analysis/tools/tool.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-399">
<caret line="44" column="4" lean-forward="false" selection-start-line="44" selection-start-column="4" selection-end-line="44" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../2024/Qwen2_70B/eg.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="216">
<caret line="27" column="24" lean-forward="false" selection-start-line="27" selection-start-column="24" selection-end-line="27" selection-end-column="59" />
<folding>
<element signature="e#13#38#0" expanded="false" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/wsgi.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="252">
<caret line="16" column="6" lean-forward="false" selection-start-line="16" selection-start-column="6" selection-end-line="16" selection-end-column="6" />
<folding>
<element signature="e#242#251#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/uwsgi.ini">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="252">
<caret line="7" column="18" lean-forward="true" selection-start-line="7" selection-start-column="18" selection-end-line="7" selection-end-column="18" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/start.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/stop_uwsgi.sh">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="36">
<caret line="1" column="0" lean-forward="true" selection-start-line="1" selection-start-column="0" selection-end-line="1" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/text_analysis/views.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="792">
<caret line="43" column="0" lean-forward="true" selection-start-line="43" selection-start-column="0" selection-end-line="43" selection-end-column="0" />
<folding>
<element signature="e#14#28#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
</project>

26
02_crawl_test.py

@ -0,0 +1,26 @@
# coding=utf-8
import requests
import json
url = "https://api.openai.com/v1/chat/completions"
payload = json.dumps({
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "user",
"content": "你好!"
}
]
})
headers = {
'Content-Type': 'application/json',
'Authorization': 'Bearer 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'
}
response = requests.request("POST", url, headers=headers, data=payload)
# print(response.text)
r=json.loads(response.text)
# r=response.text
print(r['choices'][0]['message']['content'])

15
config.ini

@ -0,0 +1,15 @@
[zookeeper]
;zk地址
zkhost=node-01:12181,node-02:12181,node-03:12181
;节点
node=/analyze
[kafka]
;服务器地址
bootstrap_servers=node-01:19092,node-02:19092,node-03:19092
;topic
topic=produce_analyze
[qwenmodel]
;url=http://10.0.32.123:9000/v1/chat/completions
url=http://qwmodel.pontoaplus.com/v1/chat/completions

63
gpt输入样例.txt

@ -0,0 +1,63 @@
{
"id":1,
"module":"ChatGPT",
"version":1,
"name":"信息抽取",
"describe":"此步骤进行相关信息抽取",
"metadata":{
"position":[
100,
200
],
"output":{
"output_type":"table",
"label_col":[
"文件名称",
"识别内容",
"文件路径",
"文件大小",
"上传时间",
"GPT处理结果"
]
},
"input":{
"input_type":"text",
"label":[
"3_文件名称",
"3_识别内容",
"3_文件路径",
"3_文件大小",
"3_上传时间"
]
},
"admin":{
"prompt":"下面我给出一段数据,请抽取相关内容。需抽取的内容是{{tag}}。数据为@@3_识别内容@@",
"authorization":"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",
"model":"gpt-3.5-turbo",
"temperature":"0.2",
"top_p":"1",
"n":"1",
"user_input":[
{
"keyname":"tag",
"keydesc":"需抽取内容"
}
]
},
"user":{
"tag":"专利号,专利名称,申请人"
}
},
"data":{
"3_文件名称":"测试的专利文档.pdf",
"3_识别内容":"\n证书号第2353566号\n发明专利证书\n发明名称:一种浅海大型复杂沙波区地形重构方法\n发 明 人:张华国;傅斌;何谢错;厉冬玲;史爱琴;楼璘林\n专 利 号:ZL 2015 1 0071764.4\n专利申请日:2015年02月11日 专利权人:国家海洋局第二海洋研究所 授权公告日:2017年01月18日\n本发明经过本局依照中华人民共和国专利法进行审查,决定授予专利权,颁发本证书 并在专利登记簿上予以登记-专利权自授权公告之日起生效。\n本专利的专利权期限为二十年,自申请日起算。专利权人应当依照专利法及其实施细 则规定缴纳年费。本专利的年费应当在每年02月11日前缴纳。未按照规定缴纳年费的, 专利权自应当缴纳年费期满之日起终止„\n专利证书记载专利权登记时的法律状况。专利权的转移、质押、无效、终止、恢复和 专利权人的姓名或名称、国籍、地址变更等事项记载在专利登记簿上。 \n",
"3_文件路径":"http://10.0.32.50:/data2/lybtmp/install/知识包专利/测试的专利文档.pdf",
"3_文件大小":"250KB",
"3_上传时间":1687835515
},
"next_app_id":[
],
"wait_condition":[
],
"start_tag":false
}

BIN
log_util/__pycache__/set_logger.cpython-310.pyc

BIN
log_util/__pycache__/set_logger.cpython-36.pyc

BIN
log_util/__pycache__/set_logger.cpython-38.pyc

33
log_util/set_logger.py

@ -0,0 +1,33 @@
# -*- coding:utf-8 -*-
import logging
import logging.handlers
import time
import sys
# reload(sys)
# sys.setdefaultencoding('utf8')
import os
def set_logger(path):
logger = logging.getLogger(path)
logger.setLevel(logging.DEBUG)
# 每隔 1天 划分一个日志文件,interval 是时间间隔,备份文件为 7 个
handler2 = logging.handlers.TimedRotatingFileHandler(path, when="midnight",interval=1, backupCount=7,encoding='utf-8')
handler2.setLevel(logging.DEBUG)
formatter = logging.Formatter("[%(asctime)s] [%(process)d] [%(levelname)s] - %(module)s.%(funcName)s (%(filename)s:%(lineno)d) - %(message)s")
# formatter = logging.Formatter("[%(filename)s] [%(asctime)s] [%(levelname)s] [%(lineno)d] %(message)s", '%Y-%m-%d %H:%M:%S')
handler2.setFormatter(formatter)
logger.addHandler(handler2)
return logger
if __name__ == '__main__':
logger = set_logger("crawlWebsrcCode.log")
while True:
logger.debug('debug测试123')
logger.info("info测试123")
logger.warning('warning测试123')
logger.error('error测试123')
logger.critical('critical测试123 message')
print('休眠一分钟')
time.sleep(60)

32
log_util/set_logger.py_bak

@ -0,0 +1,32 @@
#coding:utf8
import logging
import os
import sys
from logging.handlers import TimedRotatingFileHandler
import re
# cur_dir = os.path.dirname( os.path.abspath(__file__)) or os.getcwd()
# sys.path.append(cur_dir + '/log_util')
def set_logger(filename):
# 创建logger对象。传入logger名字
logger = logging.getLogger(filename)
# log_path = os.path.join(cur_dir, filename)
# 设置日志记录等级
logger.setLevel(logging.INFO)
# interval 滚动周期,
# when="MIDNIGHT", interval=1 表示每天0点为更新点,每天生成一个文件
# backupCount 表示日志保存个数
file_handler = TimedRotatingFileHandler(
filename=filename, when="MIDNIGHT",encoding="utf-8", interval=1, backupCount=7)
# filename="mylog" suffix设置,会生成文件名为mylog.2020-02-25.log
file_handler.suffix = "%Y-%m-%d.log"
# extMatch是编译好正则表达式,用于匹配日志文件名后缀
# 需要注意的是suffix和extMatch一定要匹配的上,如果不匹配,过期日志不会被删除。
file_handler.extMatch = re.compile(r"^\d{4}-\d{2}-\d{2}.log$")
# 定义日志输出格式
file_handler.setFormatter(
logging.Formatter(
"[%(asctime)s] [%(process)d] [%(levelname)s] - %(module)s.%(funcName)s (%(filename)s:%(lineno)d) - %(message)s"
)
)
logger.addHandler(file_handler)
return logger

6
logs/results.log

@ -0,0 +1,6 @@
[2025-01-03 17:37:47,264] [58852] [INFO] - views.Qwen (views.py:53) - 当前任务队列长度1
[2025-01-03 17:37:47,264] [58852] [INFO] - views.Qwen (views.py:61) - 任务数据为:{'polymerization': False, 'createUserId': '662015832180933762', 'data': {'businessKey': '8342a7b9-dc32-4c83-8965-543af9eea80e', '7f5dbc9a-b51a-40b7-bc86-c6d249020b27': '{"content":"你好,很高兴认识你!"}'}, 'dataProcessId': 'd53b06c9-42cf-414c-b92b-69615a9d6cb5', 'scenes_name': 'kafka模型翻译测试', 'dispatchId': 'b60c1c39-8e5b-4ec3-9140-dc5d5330595f', 'output': {'content': 'content'}, 'is_diffusion': False, 'scenes_id': 3719, 'dataId': 'b60c1c39-8e5b-4ec3-9140-dc5d5330595f', 'source_data_id': '306096', 'id': 14245, 'defaultCompletion': False, 'app_code': 'a1a5c8a8-bdd0-4817-ab4d-cae04516e0a6', 'address': 'http://192.168.0.44:8108/IntelliModelManager/model/modelSelect', 'module': '生成式大模型', 'start_tag': False, 'next_app_id': [{'start_id': 'a1a5c8a8-bdd0-4817-ab4d-cae04516e0a6', 'edge_id': 53480, 'end_id': 'e9728e30-1b40-4b48-bbe7-3643e5337a02'}], 'transfer_id': 1, 'version': 1, 'planCode': '6a50afff765d47eaa6598514cc99317c', 'parentId': ['3e278ce0-de7d-4380-b3d6-f0c801c7b887'], 'flag_data': 1, 'input': {'address': 'http://192.168.0.44:8108/IntelliModelManager/model/modelSelect', 'flagOutput': 0, 'modelAddress': {'1': 'http://172.18.1.181:9012/chatGptNew/', '2': 'http://192.168.0.44:8010/bigmodel/qanda/putQuestion', '3': 'http://192.168.0.44:9050/QwenModel', '4': 'http://192.168.0.44:8020/minimaxmodel/qanda/putQuestion', '5': 'http://192.168.0.44:8022/doubaomodel/qanda/putQuestion'}, 'authorizations': {'1': ['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'], '2': ['a60a9115539c5a5a8c46d559259e27ea.sZyBUC4D785Qyobw'], '3': [''], '4': ['eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiLljJfkuqznmb7liIbngrnnp5HmioDpm4blm6LogqHku73mnInpmZDlhazlj7giLCJVc2VyTmFtZSI6IuWMl-S6rOeZvuWIhueCueenkeaKgOmbhuWbouiCoeS7veaciemZkOWFrOWPuCIsIkFjY291bnQiOiIiLCJTdWJqZWN0SUQiOiIxODEwNTQxMzM2MTEwMDU2MDQ4IiwiUGhvbmUiOiIxMzg0MTI3MTY5MCIsIkdyb3VwSUQiOiIxODEwNTQxMzM2MTAxNjY3NDQwIiwiUGFnZU5hbWUiOiIiLCJNYWlsIjoiIiwiQ3JlYXRlVGltZSI6IjIwMjQtMDgtMTIgMTE6Mjk6NDQiLCJpc3MiOiJtaW5pbWF4In0.BCvcvKRmSgLa7YezeSOgVyoH45VGceSDJHQsNPFoHVfB8En2pmRccLxvWI91MRldUayInkZ_5JL_aO1QDgrJuCRAogpQcDbWHYjgcqj1wmeKbJEBu6bClozXYF_nRz9AgGjIuU8JgM3-qMhODtnyRuspUVdpAYEhSHRmkLjwUwqjLbG2Ox5ihO-UT7VFqARyh_c6edyebmNaDGvXvQGu4Llksoz8ZP9QSMTuwdugIkHJNSlcGgNYXgPeHhvhqiDcqu6AjL3cwfVTur3I0HpYcjgw3z8-gLl6r9AVPKHfVc00StW6jG3qbtd9Ng9vdjVKn8CCZsFvt7UFjaADQE4Bxg'], '5': ['9bb3948a-cd50-4e2b-bfe7-c1d10dcaa43c']}, 'defaultForm': [{'field': 'content', 'dataType': 'string', 'label': '输出内容'}], 'defaultPrompt': {'message': '请按照JSON格式返回结果,JSON结构为$$'}, 'modelType': 8, 'llm': '3', 'n': '1', 'top_p': '1', 'authorization': '', 'temperature': '0.5', 'model': 'Qwen2-72B-Int4', 'prompt': [{'type': '1', 'value': '总结一下这段话:'}, {'type': '2', 'value': "7f5dbc9a-b51a-40b7-bc86-c6d249020b27:$['content']"}, {'type': '1', 'value': '\n'}], 'fieldType': 0}, 'module_id': 65, 'name': '生成式大模型', 'businessKey': '8342a7b9-dc32-4c83-8965-543af9eea80e', 'describe': '大语言模型', 'relations': [{'createUserId': '662015832180933762', 'startCode': '7f5dbc9a-b51a-40b7-bc86-c6d249020b27', 'endCode': 'a1a5c8a8-bdd0-4817-ab4d-cae04516e0a6', 'createTime': 1735925787000, 'scenesId': '3719', 'updateUserId': '662015832180933762', 'updateUser': 'analyze', 'createUser': 'analyze', 'del': 0, 'updateTime': 1735925787000, 'id': 53479}, {'createUserId': '662015832180933762', 'startCode': 'a1a5c8a8-bdd0-4817-ab4d-cae04516e0a6', 'endCode': 'e9728e30-1b40-4b48-bbe7-3643e5337a02', 'createTime': 1735925787000, 'scenesId': '3719', 'updateUserId': '662015832180933762', 'updateUser': 'analyze', 'createUser': 'analyze', 'del': 0, 'updateTime': 1735925787000, 'id': 53480}]}
[2025-01-03 17:37:47,264] [58852] [INFO] - views.Qwen (views.py:62) - 当前version信息为:{3079: {'version': 0, 'operation': 'start'}, 3678: {'version': 1, 'operation': 'stop'}, 2475: {'version': 0, 'operation': 'stop'}, 3717: {'version': 2, 'operation': 'start'}, 3718: {'version': 3, 'operation': 'start'}, 3719: {'version': 1, 'operation': 'start'}}
[2025-01-03 17:37:48,393] [58852] [INFO] - views.Qwen (views.py:84) - Prompt为:总结一下这段话:你好,很高兴认识你!
***Qwen返回值:<Response [200]>-{"id":"cmpl-c030f3908e0b4e69be6daccceda0cdb9","object":"chat.completion","created":1735897067,"model":"Qwen2-72B-Instruct-GPTQ-Int4","choices":[{"index":0,"message":{"role":"assistant","content":"这段话是一个简单的问候,表达了对新认识的人的友好和高兴的心情。可以总结为:表达了初次见面的喜悦和友好。","tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":31,"total_tokens":61,"completion_tokens":30}}
[2025-01-03 17:37:48,520] [58852] [INFO] - to_kafka.send_kafka (to_kafka.py:14) - 数据推入kafka!

20
manage.py

@ -0,0 +1,20 @@
#!/usr/bin/env python
import os
import sys
import threading
from text_analysis.views import chatgpt
import queue
import django
# global task_queue
# task_queue = queue.Queue()
if __name__ == "__main__":
t = threading.Thread(target=chatgpt, name='chatgpt')
t.daemon = True
t.start()
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings")
django.setup()
from django.core.management import execute_from_command_line
execute_from_command_line(sys.argv)

1
start.sh

@ -0,0 +1 @@
../../environment/python3.8/bin/uwsgi --ini uwsgi.ini --file wsgi.py --daemonize wsgi.log

1
stop_uwsgi.sh

@ -0,0 +1 @@
lsof -i:9050 |grep -v 'PID' | awk '{print $2}'| xargs kill -9

20
test.py
File diff suppressed because it is too large
View File

0
text_analysis/__init__.py

BIN
text_analysis/__pycache__/__init__.cpython-310.pyc

BIN
text_analysis/__pycache__/__init__.cpython-36.pyc

BIN
text_analysis/__pycache__/__init__.cpython-38.pyc

BIN
text_analysis/__pycache__/cusException.cpython-38.pyc

BIN
text_analysis/__pycache__/read_config.cpython-38.pyc

BIN
text_analysis/__pycache__/settings.cpython-310.pyc

BIN
text_analysis/__pycache__/settings.cpython-36.pyc

BIN
text_analysis/__pycache__/settings.cpython-38.pyc

BIN
text_analysis/__pycache__/src.cpython-36.pyc

BIN
text_analysis/__pycache__/urls.cpython-310.pyc

BIN
text_analysis/__pycache__/urls.cpython-36.pyc

BIN
text_analysis/__pycache__/urls.cpython-38.pyc

BIN
text_analysis/__pycache__/views.cpython-310.pyc

BIN
text_analysis/__pycache__/views.cpython-36.pyc

BIN
text_analysis/__pycache__/views.cpython-38.pyc

BIN
text_analysis/__pycache__/wsgi.cpython-310.pyc

BIN
text_analysis/__pycache__/wsgi.cpython-36.pyc

BIN
text_analysis/__pycache__/wsgi.cpython-38.pyc

86
text_analysis/bak/views.py

@ -0,0 +1,86 @@
#coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
logging=set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content
import time
global task_queue
task_queue = queue.Queue()
@csrf_exempt
def chatGpt(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def chatgpt():
while True:
try:
if task_queue.qsize() >0:
try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
# logging.info(raw_data)
data=get_content(raw_data,logging)
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8",
"Authorization": "Bearer "+data["authorization"]
}
payload = json.dumps({
"model": data["model"],
"messages": [{"role": "user","content": data["prompt"]}],
"temperature":float(data["temperature"]),
"top_p":float(data["top_p"]),
"n":int(data["n"])
})
# response=None
response = requests.request("POST", url, headers=headers, data=payload)
# print(response)
d = json.loads(response.text)
result = d['choices'][0]['message']['content']
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": result}
# print(raw_data)
logging.info(raw_data)
to_kafka.send_kafka(raw_data,logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""}
if response and response.text:
raw_data["result"]["errorLog"] = response.text
else:
raw_data["result"]["errorLog"] = traceback.format_exc()
logging.info(raw_data)
to_kafka.send_kafka(raw_data,logging)
else:
# logging.info("暂无任务,进入休眠--")
time.sleep(10)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""}
raw_data["result"]["errorLog"] = traceback.format_exc()
logging.info(traceback.format_exc())
to_kafka.send_kafka(raw_data, logging)

156
text_analysis/bak/views.py0801

@ -0,0 +1,156 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logging.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("任务数据为:{}".format(raw_data))
logging.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logging.info("已暂停任务,过滤掉。{}".format(raw_data))
continue
data = get_content(raw_data, logging)
# logging.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logging.info("Prompt为:{}—Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
result = d['choices'][0]['message']['content']
# response = client.chat.completions.create(
# model="Qwen2-72B-Instruct-GPTQ-Int4",
# messages=[{"role": "user", "content": data["prompt"]}],
# temperature=float(data["temperature"]),
# top_p=float(data["top_p"]),
# n=int(data["n"])
# # stream=True
# )
# logging.info("Qwen返回值:{}—请求信息:{}".format(response,data))
# result = response.choices[0].message.content
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"}
logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"}
raw_data["result"]["errorLog"] = traceback.format_exc()
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
logging.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logging.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logging.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logging.error(traceback.format_exc())

152
text_analysis/bak/views.py08012

@ -0,0 +1,152 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logging.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("任务数据为:{}".format(raw_data))
logging.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logging.info("已暂停任务,过滤掉。{}".format(raw_data))
continue
data = get_content(raw_data, logging)
# logging.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logging.info("Prompt为:{}—Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回结果非json格式", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"}
else:
logging.info("模型返回值正常但为空-{}".format(raw_data))
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"}
raw_data["result"]["errorLog"] = traceback.format_exc()
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
logging.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logging.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logging.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logging.error(traceback.format_exc())

161
text_analysis/bak/views.py0802_3

@ -0,0 +1,161 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from text_analysis.cusException import promptLen_Exception
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logging.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("任务数据为:{}".format(raw_data))
logging.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logging.info("已暂停任务,过滤掉。{}".format(raw_data))
continue
data = get_content(raw_data, logging)
if len(data["prompt"])>=10000:
raise promptLen_Exception
# logging.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logging.info("Prompt为:{}—Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
if response.status_code==400 and "maximum context length" in d["message"]:
raise promptLen_Exception
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回结果不符合预期", "results": res_tmp_json,"status":2,"message":"GPT返回结果不符合预期"}
else:
logging.info("模型返回值正常但为空-{}".format(raw_data))
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except promptLen_Exception:
logging.info("文本长度超过模型限制-{}".format(raw_data))
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": res_tmp_json, "status": 2,"message": "文本长度超过模型限制"}
to_kafka.send_kafka(raw_data, logging)
except:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"}
logging.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logging.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logging.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logging.error(traceback.format_exc())

163
text_analysis/bak/views.py_0806

@ -0,0 +1,163 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from text_analysis.cusException import promptLen_Exception
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logger = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logger.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logger.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logger.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("任务数据为:{}".format(raw_data))
logger.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logger.info("已暂停任务,数据过滤掉")
continue
data = get_content(raw_data, logger)
if len(data["prompt"])>=10000:
raise promptLen_Exception
# logger.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logger.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logger.info("Prompt为:{}***Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
if response.status_code==400 and "maximum context length" in d["message"]:
raise promptLen_Exception
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
# raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回结果不符合预期", "results": res_tmp_json,"status":2,"message":"Qwen返回结果不符合预期"}
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
else:
logger.info("模型返回值为空")
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
# logger.info(raw_data)
to_kafka.send_kafka(raw_data, logger)
except promptLen_Exception:
logger.info("文本长度超过模型限制-{}".format(raw_data))
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": res_tmp_json, "status": 2,"message": "文本长度超过模型限制"}
to_kafka.send_kafka(raw_data, logger)
except:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"}
logger.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logger.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logger.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logger.error(traceback.format_exc())

165
text_analysis/bak/views.py_20240815

@ -0,0 +1,165 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from text_analysis.cusException import promptLen_Exception
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logger = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logger.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logger.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
res_tmp["isLast"]=1
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logger.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("任务数据为:{}".format(raw_data))
logger.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logger.info("已暂停任务,数据过滤掉")
continue
data = get_content(raw_data, logger)
if len(data["prompt"])>=10000:
raise promptLen_Exception
# logger.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logger.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logger.info("Prompt为:{}***Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
if response.status_code==400 and "maximum context length" in d["message"]:
raise promptLen_Exception
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res["isLast"] = 1
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
# raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回结果不符合预期", "results": res_tmp_json,"status":2,"message":"Qwen返回结果不符合预期"}
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
else:
logger.info("模型返回值为空")
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
logger.info(raw_data)
to_kafka.send_kafka(raw_data, logger)
except promptLen_Exception:
logger.info("文本长度超过模型限制-{}".format(raw_data))
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": res_tmp_json, "status": 2,"message": "文本长度超过模型限制"}
to_kafka.send_kafka(raw_data, logger)
except:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"}
logger.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logger.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logger.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logger.error(traceback.format_exc())

166
text_analysis/bak/views.py_20240925

@ -0,0 +1,166 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from text_analysis.cusException import promptLen_Exception
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logger = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logger.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logger.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
res_tmp["isLast"]=1
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logger.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("任务数据为:{}".format(raw_data))
logger.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logger.info("已暂停任务,数据过滤掉")
continue
data = get_content(raw_data, logger)
prompt_len=len(data["prompt"])
# if len(data["prompt"])>=10000:
# raise promptLen_Exception
# logger.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.123:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logger.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logger.info("Prompt为:{}***Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
if response.status_code==400 and "maximum context length" in d["message"]:
logger.info("文本长度超过模型限制-{}-{}".format(prompt_len, raw_data))
raise promptLen_Exception
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res["isLast"] = 1
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
# raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回结果不符合预期", "results": res_tmp_json,"status":2,"message":"Qwen返回结果不符合预期"}
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
else:
logger.info("模型返回值为空")
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
# logger.info(raw_data)
to_kafka.send_kafka(raw_data, logger)
except promptLen_Exception:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": res_tmp_json, "status": 2,"message": "文本长度超过模型限制"}
to_kafka.send_kafka(raw_data, logger)
except:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"}
logger.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logger.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logger.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logger.error(traceback.format_exc())

149
text_analysis/bak/views.py_bak

@ -0,0 +1,149 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
logging.info("任务数据为:{}".format(raw_data))
logging.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logging.info("已暂停任务,过滤掉。{}".format(raw_data))
continue
data = get_content(raw_data, logging)
# logging.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logging.info("Qwen返回值:{}-{}".format(response,response.text))
d = json.loads(response.text)
result = d['choices'][0]['message']['content']
# response = client.chat.completions.create(
# model="Qwen2-72B-Instruct-GPTQ-Int4",
# messages=[{"role": "user", "content": data["prompt"]}],
# temperature=float(data["temperature"]),
# top_p=float(data["top_p"]),
# n=int(data["n"])
# # stream=True
# )
# logging.info("Qwen返回值:{}—请求信息:{}".format(response,data))
# result = response.choices[0].message.content
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"}
# logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"}
raw_data["result"]["errorLog"] = traceback.format_exc()
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
logging.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logging.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logging.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logging.error(traceback.format_exc())

159
text_analysis/bak/views.py_bak0802_2

@ -0,0 +1,159 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from text_analysis.cusException import promptLen_Exception
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logging.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("任务数据为:{}".format(raw_data))
logging.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logging.info("已暂停任务,过滤掉。{}".format(raw_data))
continue
data = get_content(raw_data, logging)
# logging.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logging.info("Prompt为:{}—Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
if response.status_code==400 and "maximum context length" in d["message"]:
raise promptLen_Exception
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回结果非json格式", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"}
else:
logging.info("模型返回值正常但为空-{}".format(raw_data))
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except promptLen_Exception:
logging.info("文本长度超过模型限制-{}".format(raw_data))
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": res_tmp_json, "status": 2,"message": "文本长度超过模型限制"}
to_kafka.send_kafka(raw_data, logging)
except:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"}
logging.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logging.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logging.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logging.error(traceback.format_exc())

149
text_analysis/bak/views.pybak

@ -0,0 +1,149 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
logging.info("任务数据为:{}".format(raw_data))
logging.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logging.info("已暂停任务,过滤掉。{}".format(raw_data))
continue
data = get_content(raw_data, logging)
# logging.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logging.info("Prompt为:{}—Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
result = d['choices'][0]['message']['content']
# response = client.chat.completions.create(
# model="Qwen2-72B-Instruct-GPTQ-Int4",
# messages=[{"role": "user", "content": data["prompt"]}],
# temperature=float(data["temperature"]),
# top_p=float(data["top_p"]),
# n=int(data["n"])
# # stream=True
# )
# logging.info("Qwen返回值:{}—请求信息:{}".format(response,data))
# result = response.choices[0].message.content
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"}
logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"}
raw_data["result"]["errorLog"] = traceback.format_exc()
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
logging.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logging.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logging.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logging.error(traceback.format_exc())

101
text_analysis/bak/views_0107.py

@ -0,0 +1,101 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content
import uuid
import time
global task_queue
task_queue = queue.Queue()
@csrf_exempt
def chatGptNew(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def chatgpt():
while True:
if task_queue.qsize() > 0:
# try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
try:
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
data = get_content(raw_data, logging)
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8",
"Authorization": "Bearer " + data["authorization"]
}
payload = json.dumps({
"model": data["model"],
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload)
logging.info("GPT返回值:{}-{}".format(response,response.text))
d = json.loads(response.text)
result = d['choices'][0]['message']['content']
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json}
# print(raw_data)
logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""}
raw_data["result"]["errorLog"] = traceback.format_exc()
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
# except:
# raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""}
# if response and response.text:
# raw_data["result"]["errorLog"] = response.text
# else:
# raw_data["result"]["errorLog"] = traceback.format_exc()
# res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
# raw_data["result"]["results"]=res_tmp_json
# logging.info("解析失败{}-{}".format(raw_data, traceback.format_exc()))
# to_kafka.send_kafka(raw_data, logging)
else:
# logging.info("暂无任务,进入休眠--")
time.sleep(10)

101
text_analysis/bak/views_0412.py

@ -0,0 +1,101 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content
import uuid
import time
global task_queue
task_queue = queue.Queue()
@csrf_exempt
def chatGptNew(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def chatgpt():
while True:
if task_queue.qsize() > 0:
# try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
try:
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
data = get_content(raw_data, logging)
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8",
"Authorization": "Bearer " + data["authorization"]
}
payload = json.dumps({
"model": data["model"],
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload)
logging.info("GPT返回值:{}-{}".format(response,response.text))
d = json.loads(response.text)
result = d['choices'][0]['message']['content']
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json}
# print(raw_data)
#logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""}
raw_data["result"]["errorLog"] = traceback.format_exc()
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
# except:
# raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""}
# if response and response.text:
# raw_data["result"]["errorLog"] = response.text
# else:
# raw_data["result"]["errorLog"] = traceback.format_exc()
# res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
# raw_data["result"]["results"]=res_tmp_json
# logging.info("解析失败{}-{}".format(raw_data, traceback.format_exc()))
# to_kafka.send_kafka(raw_data, logging)
else:
# logging.info("暂无任务,进入休眠--")
time.sleep(10)

101
text_analysis/bak/views_0415.py

@ -0,0 +1,101 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
global task_queue
task_queue = queue.Queue()
@csrf_exempt
def chatGptNew(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def chatgpt():
while True:
if task_queue.qsize() > 0:
# try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
try:
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
data = get_content(raw_data, logging)
url = "https://api.openai.com/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8",
"Authorization": "Bearer " + data["authorization"]
}
payload = json.dumps({
"model": data["model"],
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
logging.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload)
logging.info("GPT返回值:{}-{}".format(response,response.text))
d = json.loads(response.text)
result = d['choices'][0]['message']['content']
#添加
if "content" in res_tmp.keys():
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "GPT返回值不是json格式,无法解析!", "results": res_tmp_json}
# logging.info(raw_data)
# to_kafka.send_kafka(raw_data, logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""}
raw_data["result"]["errorLog"] = traceback.format_exc()
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
logging.info("调用gpt失败{}-{}".format(raw_data, traceback.format_exc()))
# to_kafka.send_kafka(raw_data, logging)
else:
# logging.info("暂无任务,进入休眠--")
time.sleep(10)

166
text_analysis/bak/views_20240925.py

@ -0,0 +1,166 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from text_analysis.cusException import promptLen_Exception
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logger = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logger.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logger.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
res_tmp["isLast"]=1
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logger.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("任务数据为:{}".format(raw_data))
logger.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logger.info("已暂停任务,数据过滤掉")
continue
data = get_content(raw_data, logger)
prompt_len=len(data["prompt"])
# if len(data["prompt"])>=10000:
# raise promptLen_Exception
# logger.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.123:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logger.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logger.info("Prompt为:{}***Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
if response.status_code==400 and "maximum context length" in d["message"]:
logger.info("文本长度超过模型限制-{}-{}".format(prompt_len, raw_data))
raise promptLen_Exception
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res["isLast"] = 1
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
# raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回结果不符合预期", "results": res_tmp_json,"status":2,"message":"Qwen返回结果不符合预期"}
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
else:
logger.info("模型返回值为空")
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
# logger.info(raw_data)
to_kafka.send_kafka(raw_data, logger)
except promptLen_Exception:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": res_tmp_json, "status": 2,"message": "文本长度超过模型限制"}
to_kafka.send_kafka(raw_data, logger)
except:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"}
logger.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logger.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logger.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logger.error(traceback.format_exc())

166
text_analysis/bak/views_bak_20240806.py

@ -0,0 +1,166 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from text_analysis.cusException import promptLen_Exception
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
# logging = set_logger('logs/results.log')
import logging
logger = logging.getLogger('text')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
# openai_api_key = "EMPTY"
# openai_api_base = "http://10.0.32.225:9000/v1"
# client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logger.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logger.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
if "data" not in raw_data.keys():
logger.info("任务缺少data—{}".format(raw_data))
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "", "status": 2,"message": "未配置data内容"}
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("任务数据为:{}".format(raw_data))
logger.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logger.info("已暂停任务,数据过滤掉")
continue
data = get_content(raw_data, logger)
if len(data["prompt"])>=10000:
raise promptLen_Exception
# logger.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url="http://10.0.32.225:9000/v1/chat/completions"
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logger.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logger.info("Prompt为:{}***Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
if response.status_code==400 and "maximum context length" in d["message"]:
raise promptLen_Exception
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
# raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回结果不符合预期", "results": res_tmp_json,"status":2,"message":"Qwen返回结果不符合预期"}
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
else:
logger.info("模型返回值为空")
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
# logger.info(raw_data)
to_kafka.send_kafka(raw_data, logger)
except promptLen_Exception:
logger.info("文本长度超过模型限制-{}".format(raw_data))
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": res_tmp_json, "status": 2,"message": "文本长度超过模型限制"}
to_kafka.send_kafka(raw_data, logger)
except:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"}
logger.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logger)
else:
logger.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logger.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logger.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logger.error(traceback.format_exc())

5
text_analysis/cusException.py

@ -0,0 +1,5 @@
# -*- coding:utf-8 -*-
class promptLen_Exception(Exception):
def __str__(self):
return 'prompt长度超过模型处理最大值'

10
text_analysis/read_config.py

@ -0,0 +1,10 @@
import configparser
#加载配置文件
def load_config():
configFile = './config.ini'
# 创建配置文件对象
con = configparser.ConfigParser()
# 读取文件
con.read(configFile, encoding='utf-8')
return con

14
text_analysis/request.py

@ -0,0 +1,14 @@
#coding:utf8
# import leida_ner_bert_crf
import requests
url = "http://172.18.1.166:9000/leidaduikang"
payload = "{\"inputUrl\":\"/home/bfdadmin/leidabert/Project_leidaduikang/AInputdata/content_100.xlsx\"}"
headers = {'user-agent': "vscode-restclient",'header name': "header value"}
response = requests.request("POST", url, timeout=1000000,data=payload, headers=headers)
print(response.text)

148
text_analysis/settings.py

@ -0,0 +1,148 @@
"""
Django settings for Zhijian_Project_WebService project.
Generated by 'django-admin startproject' using Django 1.8.
For more information on this file, see
https://docs.djangoproject.com/en/1.8/topics/settings/
For the full list of settings and their values, see
https://docs.djangoproject.com/en/1.8/ref/settings/
"""
# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# Quick-start development settings - unsuitable for production
# See https://docs.djangoproject.com/en/1.8/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = '330r)_!^qhd7$!w4)$y@4=p2bd*vlxf%4z(bx-fx-1i3txagvz'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
ALLOWED_HOSTS = ['*']
# Application definition
INSTALLED_APPS = (
'django.contrib.admin',
'django.contrib.auth',
'django.contrib.contenttypes',
'django.contrib.sessions',
'django.contrib.messages',
'django.contrib.staticfiles',
)
MIDDLEWARE = [
'django.contrib.sessions.middleware.SessionMiddleware',
'django.middleware.common.CommonMiddleware',
'django.middleware.csrf.CsrfViewMiddleware',
'django.contrib.auth.middleware.AuthenticationMiddleware',
# 'django.contrib.auth.middleware.SessionAuthenticationMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'django.middleware.clickjacking.XFrameOptionsMiddleware',
'django.middleware.security.SecurityMiddleware',
]
ROOT_URLCONF = 'text_analysis.urls'
TEMPLATES = [
{
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [],
'APP_DIRS': True,
'OPTIONS': {
'context_processors': [
'django.template.context_processors.debug',
'django.template.context_processors.request',
'django.contrib.auth.context_processors.auth',
'django.contrib.messages.context_processors.messages',
],
},
},
]
WSGI_APPLICATION = 'text_analysis.wsgi.application'
# Database
# https://docs.djangoproject.com/en/1.8/ref/settings/#databases
# DATABASES = {
# 'default': {
# 'ENGINE': 'django.db.backends.sqlite3',
# 'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
# }
# }
# Internationalization
# https://docs.djangoproject.com/en/1.8/topics/i18n/
LANGUAGE_CODE = 'en-us'
TIME_ZONE = 'Asia/Shanghai'
USE_I18N = True
USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
# https://docs.djangoproject.com/en/1.8/howto/static-files/
STATIC_URL = '/static/'
# U_LOGFILE_SIZE = 1 * 1024 * 1024 # 单日志文件最大100M
# U_LOGFILE_COUNT = 7 # 保留10个日志文件
#
# LOGGING = {
# 'version': 1,
# 'disable_existing_loggers': True, # 禁用所有已经存在的日志配置
# 'filters': {
# 'require_debug_false': {
# '()': 'django.utils.log.RequireDebugFalse'
# }
# },
# 'formatters': {
# 'verbose': {
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] %(module)s %(process)d %(thread)d %(message)s'
# },
# 'simple': {
# 'format': '%(levelname)s %(asctime)s @ %(process)d %(message)s'
# },
# 'complete': {
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] (%(pathname)s/%(funcName)s:%(lineno)d) - %(message)s'
# },
# 'online': {
# 'format': '[%(levelname)s %(asctime)s @ %(process)d] - %(message)s'
# }
# },
# 'handlers': {
# 'text': {
# 'level': 'DEBUG',
# #'class': 'logging.handlers.RotatingFileHandler',
# 'class': 'logging.handlers.TimedRotatingFileHandler',
# 'when': 'H',
# 'interval': 1,
# 'backupCount': U_LOGFILE_COUNT,
# 'formatter': 'complete',
# 'filename': os.path.join(BASE_DIR, 'logs/resultNew.log').replace('\\', '/'),
# }
# },
# 'loggers': {
# 'text': {
# 'handlers': ['text'],
# 'level': 'DEBUG',
# 'propagate': False,
# }
# }
# }

18
text_analysis/src.py

@ -0,0 +1,18 @@
# coding:utf8
# def mySql():
# try:
# db = pymysql.connect(host='172.26.28.30', user='crawl', passwd='crawl13', db='test', port=3306,
# charset='utf8', cursorclass=pymysql.cursors.DictCursor)
# if db.open:
# print("MySQL连接成功!")
# else:
# print("MySQL连接失败!")
# db.close()
# except:
# print(traceback.format_exc())
print("这是一个测试!!")

BIN
text_analysis/tools/__pycache__/cusException.cpython-36.pyc

BIN
text_analysis/tools/__pycache__/mysql_helper.cpython-36.pyc

BIN
text_analysis/tools/__pycache__/process.cpython-36.pyc

BIN
text_analysis/tools/__pycache__/to_kafka.cpython-310.pyc

BIN
text_analysis/tools/__pycache__/to_kafka.cpython-36.pyc

BIN
text_analysis/tools/__pycache__/to_kafka.cpython-38.pyc

BIN
text_analysis/tools/__pycache__/tool.cpython-310.pyc

BIN
text_analysis/tools/__pycache__/tool.cpython-36.pyc

BIN
text_analysis/tools/__pycache__/tool.cpython-38.pyc

BIN
text_analysis/tools/__pycache__/tools.cpython-36.pyc

74
text_analysis/tools/bak/to_kafka.py

@ -0,0 +1,74 @@
#coding:utf8
import traceback
from pykafka import KafkaClient
# from pykafka import partitioners
# from pykafka.simpleconsumer import OwnedPartition, OffsetType
import json
from tqdm import tqdm
# from kafka import KafkaProducer
from pykafka.simpleconsumer import OwnedPartition, OffsetType
def send_kafka(data,logging):
try:
producer = None
# client = KafkaClient(hosts='172.26.28.30:9092', socket_timeout_ms=10 * 1000)
topic = 'analyze'
# producer = client.topics[topic].get_sync_producer(**{'max_request_size': 3000012 * 5})
#producer = client.topics[topic].get_producer(sync=True)
client = KafkaClient(hosts='172.26.28.30:9092', socket_timeout_ms=10 * 1000)
# topic = client.topics['analyze']
producer = client.topics[topic].get_producer()
data1=json.dumps(data,ensure_ascii=False)
producer.produce(bytes(data1, encoding='utf-8'))
# kafkaProduce(topic,bytes(data1, encoding='utf-8'))
logging.info("数据推入kafka!")
except Exception as e:
logging.info(traceback.format_exc())
logging.info('写入kafka失败')
# def kafkaProduce(topic,resultData):
# producer = KafkaProducer(bootstrap_servers = '{}'.format("172.26.28.30:9092"))
# topics = topic.split(',')
# for tc in topics:
# future = producer.send(tc,resultData)
# producer.flush()
def consumer():
# topic = 'ais_caiji_kg_210'.encode('utf-8')
# client = KafkaClient(hosts='172.16.3.153:9092,172.16.3.154:9092,172.16.3.155:9092')
# topic = 'test_mysql_topic'.encode('utf-8')
# client = KafkaClient(hosts='localhost:9092')
# topic = client.topics[topic]
# consumer = topic.get_simple_consumer(consumer_group='test1',
# auto_commit_enable=True, # 去重消费
# auto_commit_interval_ms=1000,
# # consumer_id='test1', # 消费者ID
# reset_offset_on_start=True,
# # auto_offset_reset=OffsetType.LATEST,
# consumer_timeout_ms=100000)
# c = 0
# for msg in consumer:
# c += 1
# if msg:
# val = msg.value.decode('utf-8')
# print(c,val)
# client = KafkaClient(hosts='localhost:9092')
# topic = client.topics['test_mysql_topic']
client = KafkaClient(hosts='172.26.28.30:9092')
topic = client.topics['analyze']
consumer = topic.get_simple_consumer(consumer_group='my_consumer_group',
auto_offset_reset=OffsetType.LATEST,
reset_offset_on_start=True)
# 消费数据
for message in consumer:
if message is not None:
print(message.offset, message.value.decode())
if __name__=="__main__":
# send_kafka()
consumer()

105
text_analysis/tools/bak/tool.py

@ -0,0 +1,105 @@
#coding:utf8
import re
def get_content(inputdata,logging):
"""
:param inputdata:json数据
:return: prompt及其他参数
"""
res={}
admin=inputdata["metadata"]["admin"]
data=inputdata["data"]
prompt=admin["prompt"]
if_user=re.findall("{{(.*)}}",prompt)
if_data=re.findall("@@(.*)@@",prompt)
user_data=inputdata["metadata"]["user"]
if if_user!=[] and if_user[0] in user_data.keys():
tmp=user_data[if_user[0]]
prompt=re.sub("{{(.*)}}",tmp,prompt)
if if_data!=[] and if_data[0] in data.keys():
tmp1=data[if_data[0]]
prompt=re.sub("@@(.*)@@",tmp1,prompt)
res["prompt"]=prompt
res["authorization"]=admin["authorization"]
res["model"]=admin["model"]
res["temperature"]=admin["temperature"]
res["authorization"]=admin["authorization"]
res["top_p"]=admin["top_p"]
res["n"]=admin["n"]
return res
if __name__=="__main__":
inputdata={
"id":1,
"module":"ChatGPT",
"version":1,
"name":"信息抽取",
"describe":"此步骤进行相关信息抽取",
"metadata":{
"position":[
100,
200
],
"output":{
"output_type":"table",
"label_col":[
"文件名称",
"识别内容",
"文件路径",
"文件大小",
"上传时间",
"GPT处理结果"
]
},
"input":{
"input_type":"text",
"label":[
"3_文件名称",
"3_识别内容",
"3_文件路径",
"3_文件大小",
"3_上传时间"
]
},
"admin":{
"prompt":"下面我给出一段数据,请抽取相关内容。需抽取的内容是{{tag}}。数据为@@3_识别内容@@",
"authorization":"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",
"model":"gpt-3.5-turbo",
"temperature":"0.2",
"top_p":"1",
"N":"1",
"user_input":[
{
"keyname":"tag",
"keydesc":"需抽取内容"
}
]
},
"user":{
"tag":"专利号,专利名称,申请人"
}
},
"data":{
"3_文件名称":"测试的专利文档.pdf",
"3_识别内容":"\n证书号第2353566号\n发明专利证书\n发明名称:一种浅海大型复杂沙波区地形重构方法\n发 明 人:张华国;傅斌;何谢错;厉冬玲;史爱琴;楼璘林\n专 利 号:ZL 2015 1 0071764.4\n专利申请日:2015年02月11日 专利权人:国家海洋局第二海洋研究所 授权公告日:2017年01月18日\n本发明经过本局依照中华人民共和国专利法进行审查,决定授予专利权,颁发本证书 并在专利登记簿上予以登记-专利权自授权公告之日起生效。\n本专利的专利权期限为二十年,自申请日起算。专利权人应当依照专利法及其实施细 则规定缴纳年费。本专利的年费应当在每年02月11日前缴纳。未按照规定缴纳年费的, 专利权自应当缴纳年费期满之日起终止„\n专利证书记载专利权登记时的法律状况。专利权的转移、质押、无效、终止、恢复和 专利权人的姓名或名称、国籍、地址变更等事项记载在专利登记簿上。 \n",
"3_文件路径":"http://10.0.32.50:/data2/lybtmp/install/知识包专利/测试的专利文档.pdf",
"3_文件大小":"250KB",
"3_上传时间":1687835515
},
"next_app_id":[
],
"wait_condition":[
],
"start_tag":"false"
}
a=get_content(inputdata)
print(a)

114
text_analysis/tools/bak/tool.py0821

@ -0,0 +1,114 @@
#coding:utf8
import re
def get_content(inputdata,logging):
"""
重新组装参数
:param inputdata:原json数据
:return: 组装的prompt及其他参数
"""
res={}
admin=inputdata["metadata"]["admin"]
data=inputdata["data"]
prompt=admin["prompt"]
if_user=re.findall("{{(.*)}}",prompt)
if_data=re.findall("@@(.*)@@",prompt)
if if_user != []:
user_data=inputdata["metadata"]["user"]
if if_user[0] in user_data.keys():
tmp=user_data[if_user[0]]
prompt=re.sub("{{(.*)}}",tmp,prompt)
if if_data!=[] and if_data[0] in data.keys():
tmp1=data[if_data[0]]
prompt=re.sub("@@(.*)@@",tmp1,prompt)
res["prompt"]=prompt
res["authorization"]=admin["authorization"]
res["model"]=admin["model"]
res["temperature"]=admin["temperature"]
res["authorization"]=admin["authorization"]
res["top_p"]=admin["top_p"]
res["n"]=admin["n"]
return res
if __name__=="__main__":
inputdata={
"metadata":{
"output":{
"output_type":"table",
"label_col":[
"软件著作抽取结果"
]
},
"input":{
"input_type":"text",
"label":[
"7_软件著作过滤器"
]
},
"address":"http://172.18.1.181:9011/chatGpt/",
"admin":{
"authorization":"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",
"top_p":"1",
"user_input":[
{
"keyname":"tag",
"keydesc":""
}
],
"temperature":"0.2",
"model":"gpt-3.5-turbo-16k",
"prompt":"请在下面这句话中提取出:证书号、软件名称、著作权人,以json格式输出,找不到的字段赋值为空字符串,不要有多余的文字输出,只输出json结构。@@7_软件著作过滤器@@",
"n":"1"
},
"index":1
},
"data":{
"1_项目文件上传":"[{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/05/1/1-基于时间序列遥感 影像洪涝检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/1-基于时间序列遥感 影像洪涝检测系统.jpg\",\"fileId\":\"cd6592f0389bb1da25afbb44901f9cde\",\"fileName\":\"1-基于时间序列遥感 影像洪涝检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/08/1/3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/1/3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\",\"fileId\":\"944eec1cf98f216ea953459dac4dd505\",\"fileName\":\"3-基于时空模型的遥感时间序列森林火灾检测系统.jpg\" },{ \"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/09/1/4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\",\"fileType\":\"jpg\", \"filePath\":\"/软件著作/4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\",\"fileId\":\"eb378cb9ee914323f601500378dfad76\",\"fileName\":\"4-基于隐马尔可夫模型的遥感时间序列分类系统.jpg\" }]",
"2_文件分类信息":"{\"软件著作\":4}",
"3_OCR识别内容":"{\"content\":\" 22222222222222222222222222222222222222222222222222\\n中华人民共和国国家版权局\\n计算机软件著作权登记证书\\n证书号:软著登字第1623261号\\n软件名称:\\n基于遥感影像的快速变化检测系统\\nV1.0\\n著作权人:中国科学院遥感与数字地球研究所\\n开发完成日期:2016年08月01日\\n首次发表日期:未发表\\n权利取得方式:原始取得\\n权利范围:全部权利\\n登记号:2017SR037977\\n根据《计算机软件保护条例》和《计算机软件著作权登记办法》的\\n规定,经中国版权保护中心审核,对以上事项予以登记\\n计算机软件著作权\\n登记专用章\\n2017年02月10日\\nNo.01433672\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\",\"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\",\"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"pageNum\":1}",
"businessKey":"185aef3b1c810799a6be8314abf6512c",
"7_软件著作过滤器":"{\"content\":\" 22222222222222222222222222222222222222222222222222\\n中华人民共和国国家版权局\\n计算机软件著作权登记证书\\n证书号:软著登字第1623261号\\n软件名称:\\n基于遥感影像的快速变化检测系统\\nV1.0\\n著作权人:中国科学院遥感与数字地球研究所\\n开发完成日期:2016年08月01日\\n首次发表日期:未发表\\n权利取得方式:原始取得\\n权利范围:全部权利\\n登记号:2017SR037977\\n根据《计算机软件保护条例》和《计算机软件著作权登记办法》的\\n规定,经中国版权保护中心审核,对以上事项予以登记\\n计算机软件著作权\\n登记专用章\\n2017年02月10日\\nNo.01433672\",\"fileId\":\"338847e34904fa96e8834cb220667db8\",\"fileName\":\"2-基于遥感影像的快速变化检测系统.jpg\",\"filePath\":\"/软件著作/2-基于遥感影像的快速变化检测系统.jpg\",\"fileType\":\"jpg\",\"fileUrl\":\"http://172.18.1.130:9985/group33/default/20230816/16/06/1/2-基于遥感影像的快速变化检测系统.jpg\",\"pageNum\":1}"
},
"created":1691004265000,
"module":"OCR",
"start_tag":"false",
"last_edit":1692464331000,
"next_app_id":[
{
"start_id":86,
"edge_id":49,
"end_id":90
}
],
"transfer_id":11,
"blueprint_id":3,
"scenes_id":3,
"scenario":{
"dataloss":1,
"autoCommitTriggerLast":1,
"maxErrors":3,
"autoCommit":1,
"freshVariables":1
},
"wait_condition":[
],
"scheduling":{
"interval":-1,
"type":"single"
},
"name":"软件著作抽取",
"businessKey":"185aef3b1c810799a6be8314abf6512c",
"id":86,
"describe":"软件著作抽取"
}
a=get_content(inputdata,"")
print(a)

116
text_analysis/tools/bak/tool.py_20240801

@ -0,0 +1,116 @@
#coding:utf8
import re
from jsonpath_ng import parse
import json
import traceback
import regex
from log_util.set_logger import set_logger
def parse_data(data,para):
param_split = str(para).split(":")
datasourcestr = data[param_split[0]]
datasource = json.loads(datasourcestr)
# 创建 JsonPath 表达式对象
expr = parse(param_split[1])
# 使用表达式来选择 JSON 元素
match = [match.value for match in expr.find(datasource)]
val = match[0]
return val
def parse_gptResult(output,gptContent):
json_result = {}
try:
json_gpt=json.loads(str(gptContent).replace("```json","").replace("```",""))
for key in output.keys():
if key in json_gpt.keys():
json_result[key]=json_gpt[key]
return json_result
except Exception as e:
try:
# 直接解析失败,使用正则表达式匹配外层的 {}
pattern = r'\{(?:[^{}]*|(?R))*\}'
match = regex.search(pattern, gptContent, flags=regex.DOTALL)
# match = re.search(pattern, gptContent, re.DOTALL)
if match:
json_gpt = json.loads(match.group())
for key in output.keys():
if key in json_gpt.keys():
json_result[key]=json_gpt[key]
return json_result
else:
return None
except:
traceback.print_exc()
return None
def get_content(inputdata,logging):
"""
重新组装参数
:param inputdata:原json数据
:return: 组装的prompt及其他参数
"""
res={}
input=inputdata["input"]
data=inputdata["data"]
prompts=input["prompt"]
prompt_res=""
if prompts:
for prompt in prompts:
if str(prompt["type"])=="1":
prompt_res+=prompt["value"]
elif str(prompt["type"])=="2":
try:
tmp=parse_data(data,prompt["value"])
prompt_res +=tmp
except:
logging.info("动态字段获取数据失败。{}-{}".format(prompt, traceback.format_exc()))
# logging.info("拼接后的问题:{}".format(prompt_res))
res["prompt"]=prompt_res
# res["authorization"]=input["authorization"]
# res["model"]=input["model"]
res["temperature"]=input["temperature"]
res["top_p"]=input["top_p"]
res["n"]=input["n"]
return res
# def get_content(inputdata,logging):
# """
# 重新组装参数
# :param inputdata:原json数据
# :return: 组装的prompt及其他参数
# """
# res={}
# admin=inputdata["metadata"]["admin"]
# data=inputdata["data"]
# prompt=admin["prompt"]
# if_user=re.findall("{{(.*)}}",prompt)
# if_data=re.findall("@@(.*)@@",prompt)
# if if_user != []:
# user_data=inputdata["metadata"]["user"]
# if if_user[0] in user_data.keys():
# tmp=user_data[if_user[0]]
# prompt=re.sub("{{(.*)}}",tmp,prompt)
# if if_data!=[] and if_data[0] in data.keys():
# tmp1=data[if_data[0]]
# prompt=re.sub("@@(.*)@@",tmp1,prompt)
# res["prompt"]=prompt
# res["authorization"]=admin["authorization"]
# res["model"]=admin["model"]
# res["temperature"]=admin["temperature"]
# res["authorization"]=admin["authorization"]
# res["top_p"]=admin["top_p"]
# res["n"]=admin["n"]
# return res
if __name__=="__main__":
parse_gptResult()

172
text_analysis/tools/bak/tool.py_bak
File diff suppressed because it is too large
View File

170
text_analysis/tools/bak/tool_1107_final.py

@ -0,0 +1,170 @@
#coding:utf8
import re
from jsonpath_ng import parse
import json
import traceback
def parse_data(raw_data,para):
all_result = raw_data['data']
param_split = str(para).split(":")
datasourcestr = all_result[param_split[0]]
datasource = json.loads(datasourcestr)
# 创建 JsonPath 表达式对象
expr = parse(param_split[1])
# 使用表达式来选择 JSON 元素
match = [match.value for match in expr.find(datasource)]
val = match[0]
return val
def get_content(inputdata,logging):
"""
:param inputdata:json数据
:return: prompt及其他参数
"""
res={}
input=inputdata["input"]
data=inputdata["data"]
prompt=input["prompt"]
if_data=re.findall("@@(.*?)@@",prompt)
# if_user=re.findall("{{(.*)}}",prompt)
# if if_user != []:
# user_data=inputdata["metadata"]["user"]
# if if_user[0] in user_data.keys():
# tmp=user_data[if_user[0]]
# prompt=re.sub("{{(.*)}}",tmp,prompt)
if if_data!=[] :
for rule in if_data:
try:
if "#json#" in rule:
parm = rule.split("#json#")
data1 = parse_data(inputdata, parm[0])
data1_json = json.loads(data1)
expr = parse(parm[1])
result = str([match.value for match in expr.find(data1_json)][0])
rep = "@@{}@@".format(rule)
# 增加转义字符
rep_escaped = re.escape(rep)
prompt = re.sub(rep_escaped, result, prompt)
elif ":" in rule:
result = parse_data(inputdata, rule)
rep = "@@{}@@".format(rule)
rep_escaped = re.escape(rep)
prompt = re.sub(rep_escaped, result, prompt)
else:
if rule in data.keys():
tmp1=data[rule]
rep = "@@{}@@".format(rule)
prompt=re.sub(rep,tmp1,prompt)
except:
# print(traceback.format_exc())
rep = "@@{}@@".format(rule)
prompt = prompt.replace(rep,'')
logging.info("动态字段获取数据失败。{}-{}".format(rule, traceback.format_exc()))
logging.info("拼接后的问题:{}".format(prompt))
res["prompt"]=prompt
res["authorization"]=input["authorization"]
res["model"]=input["model"]
res["temperature"]=input["temperature"]
res["authorization"]=input["authorization"]
res["top_p"]=input["top_p"]
res["n"]=input["n"]
return res
# def get_content(inputdata,logging):
# """
# 重新组装参数
# :param inputdata:原json数据
# :return: 组装的prompt及其他参数
# """
# res={}
# admin=inputdata["metadata"]["admin"]
# data=inputdata["data"]
# prompt=admin["prompt"]
# if_user=re.findall("{{(.*)}}",prompt)
# if_data=re.findall("@@(.*)@@",prompt)
# if if_user != []:
# user_data=inputdata["metadata"]["user"]
# if if_user[0] in user_data.keys():
# tmp=user_data[if_user[0]]
# prompt=re.sub("{{(.*)}}",tmp,prompt)
# if if_data!=[] and if_data[0] in data.keys():
# tmp1=data[if_data[0]]
# prompt=re.sub("@@(.*)@@",tmp1,prompt)
# res["prompt"]=prompt
# res["authorization"]=admin["authorization"]
# res["model"]=admin["model"]
# res["temperature"]=admin["temperature"]
# res["authorization"]=admin["authorization"]
# res["top_p"]=admin["top_p"]
# res["n"]=admin["n"]
# return res
if __name__=="__main__":
inputdata={
"output":{
"id":"id",
"content":"content"
},
"address":"http://172.18.1.181:9011/chatGpt/",
"input":{
"authorization":"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",
"top_p":"1",
"temperature":"1",
"model":"gpt-3.5-turbo-16k",
"prompt":"根据下面内容:@@1_Youtube采集:$['content']#json#$['test1']@@。生成一条@@1_Youtube采集:$['Count']@@字的关于中国正面的新闻,标题用title,内容用content,以json格式输出。",
"n":"1"
},
"data":{
"1_Youtube采集":"{\"isDownload\":\"true\",\"content\":\"{\\\"test1\\\":\\\"22222\\\"}\",\"Count\":\"555\"}"
},
"created":1691004265000,
"module":"ChatGPT",
"start_tag":"false",
"multi_branch":0,
"last_edit":1698927821000,
"next_app_id":[
{
"start_id":316,
"edge_id":200,
"end_id":317
}
],
"transfer_id":3,
"version":1,
"blueprint_id":12,
"scenes_id":12,
"scenario":{
"dataloss":1,
"autoCommitTriggerLast":1,
"maxErrors":3,
"autoCommit":1,
"freshVariables":1
},
"wait_condition":[
],
"scheduling":{
"interval":-1,
"type":"single"
},
"name":"正面引导",
"businessKey":"78278a5168e45304",
"id":316,
"position":[
100,
200
],
"describe":"正面引导"
}
a=get_content(inputdata,"")
print(a)

25
text_analysis/tools/cusException.py

@ -0,0 +1,25 @@
# -*- coding:utf-8 -*-
class pt_v_Exception(Exception):
def __str__(self):
return 'pt规则未在缓存中命中'
class dt_v_Exception(Exception):
def __str__(self):
return 'dt规则未在缓存中命中'
class dt_v_attr_Exception(Exception):
def __str__(self):
return 'dt_attrcode规则未在缓存中命中'
class dt_v_codeid_Exception(Exception):
def __str__(self):
return 'dt_codeid规则未在缓存中命中'
class dt_v_senti_Exception(Exception):
def __str__(self):
return 'dt_senti规则未在缓存中命中'
class dt_v_res_Exception(Exception):
def __str__(self):
return 'dt_resverse规则未在缓存中命中'

65
text_analysis/tools/kakfa_util.py

@ -0,0 +1,65 @@
# coding=utf-8
from kafka import KafkaProducer
from kafka import KafkaConsumer
import json
import traceback
import time
import traceback
import datetime
import queue
from logUtil import get_logger
"""
kafka
"""
def kafkaProduce(topic,resultData,address):
producer = KafkaProducer(bootstrap_servers = '{}'.format(address),request_timeout_ms=120000)
topics = topic.split(',')
for tc in topics:
future = producer.send(tc,resultData)
result = future.get(timeout=60)
producer.flush()
print (result)
#写入文件
def writeTxt(filePath,result):
f = open(filePath,'a',encoding='utf-8')
f.write(result.encode('utf-8').decode('unicode_escape')+'\n')
f.close
def KafkaConsume(topic,address,group_id,task_queue,logger):
'''
kafka
:param topic:
:param address:
:param group_id:
:param task_queue:
:return:
'''
try:
consumer = KafkaConsumer(topic, auto_offset_reset='earliest',fetch_max_bytes=1024768000,fetch_max_wait_ms=5000, bootstrap_servers=address,group_id = group_id)
i = 1
while True:
for msg in consumer:
print('第{}条数据'.format(i))
data = str(msg.value, encoding = "utf-8")
print(data)
task_queue.put(data)
i = i+1
else:
print('暂无任务------')
time.sleep(10)
except Exception as e:
print('kafka未知异常----')
traceback.print_exc()
def writeTxt(filePath,result):
f = open(filePath,'a')
f.write(result+'\n')
f.close
if __name__ == '__main__':
resultData = {'id': '中文', 'url': 'https://zh.wikipedia.org/zh/%E8%94%A1%E8%8B%B1%E6%96%87'}
kafkaProduce('test', json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),'172.26.28.30:9092')
#task_queue = queue.Queue()
#KafkaConsume('fq-Taobao-eccontent','39.129.129.172:6666,39.129.129.172:6668,39.129.129.172:6669,39.129.129.172:6670,39.129.129.172:6671','news_sche_8',task_queue,logger)
# KafkaConsume('zxbnewstopic','120.133.14.71:9992','group3',task_queue,logger)

0
text_analysis/tools/logs/results.log

338
text_analysis/tools/mysql_helper.py

@ -0,0 +1,338 @@
# coding:utf8
import os, sys
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
import re
# from log_util.set_logger import set_logger
# logging = set_logger('logs/error.log')
import pymysql.cursors
import traceback
def mysqlConn(data,logging):
res={"successCode":"1","errorLog":"","results":""}
p_host=data["Host"]
p_port=int(data["Port"])
p_db=data["Database"]
p_user=data["User"]
p_password=data["Password"]
try:
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port,
charset='utf8', cursorclass=pymysql.cursors.DictCursor)
db.ping(reconnect=True)
cursor = db.cursor()
sql = "SHOW TABLES"
cursor.execute(sql)
tables = cursor.fetchall()
if tables:
table_names = list(map(lambda x: list(x.values())[0], tables))
res["results"] = table_names
else:
res["successCode"] = "0"
cursor.close()
db.close()
return res
except:
res["successCode"] = "0"
res["errorLog"]=traceback.format_exc()
logging.error(traceback.format_exc())
return res
def getTableColumnNames(data,logging):
res={"successCode":"1","errorLog":"","results":""}
p_host=data["Host"]
p_port=int(data["Port"])
p_db=data["Database"]
p_user=data["User"]
p_password=data["Password"]
p_table=data["Table"]
try:
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port,
charset='utf8', cursorclass=pymysql.cursors.DictCursor)
db.ping(reconnect=True)
cursor = db.cursor()
sql = "DESCRIBE "+p_table
cursor.execute(sql)
tables = cursor.fetchall()
if tables:
table_names = list(map(lambda x: x['Field'], tables))
res["results"] = table_names
else:
res["successCode"] = "0"
cursor.close()
db.close()
return res
except:
res["successCode"] = "0"
res["errorLog"]=traceback.format_exc()
logging.error(traceback.format_exc())
return res
def mysqlInsert(input,logging):
res={"successCode":"1","errorLog":"","results":""}
data=input["metadata"]["admin"]
p_host=data["Host"]
p_port=int(data["Port"])
p_db=data["Database"]
p_user=data["User"]
p_password=data["Password"]
p_table=data["Table"]
p_columnName=data["columnName"]
cN='('+','.join(p_columnName)+') '
p_values=data["values"]
val=tuple(p_values)
try:
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port,
charset='utf8', cursorclass=pymysql.cursors.DictCursor)
db.ping(reconnect=True)
cursor = db.cursor()
sql = "insert into " + p_table + cN + "values ("+ ','.join(['%s'] * len(val)) + ")"
cursor.execute(sql,val)
db.commit()
cursor.close()
db.close()
return res
except:
res["successCode"] = "0"
res["errorLog"]=traceback.format_exc()
logging.error(traceback.format_exc())
return res
def mysqlUpdate(input,logging):
res={"successCode":"1","errorLog":"","results":""}
data=input["metadata"]["admin"]
p_host=data["Host"]
p_port=int(data["Port"])
p_db=data["Database"]
p_user=data["User"]
p_password=data["Password"]
p_table=data["Table"]
# p_set=data["Set"]
p_set=get_updateSet(input)
# where=process_where(data["Filter"])
where=get_filter(data["Filter"])
try:
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port,
charset='utf8', cursorclass=pymysql.cursors.DictCursor)
db.ping(reconnect=True)
cursor = db.cursor()
sql = "UPDATE " + p_table + p_set + where
print(sql)
cursor.execute(sql)
db.commit()
cursor.close()
db.close()
return res
except:
res["successCode"] = "0"
res["errorLog"]=traceback.format_exc()
logging.error(traceback.format_exc())
return res
def mysqlExecute(input,logging):
res={"successCode":"1","errorLog":"","results":""}
data=input["metadata"]["admin"]
p_host=data["Host"]
p_port=int(data["Port"])
p_db=data["Database"]
p_user=data["User"]
p_password=data["Password"]
execute=data["Execute"]
try:
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port,
charset='utf8', cursorclass=pymysql.cursors.DictCursor)
db.ping(reconnect=True)
cursor = db.cursor()
cursor.execute(execute)
if 'select' in execute.lower():
result = cursor.fetchall()
res["results"]=json.dumps(result,ensure_ascii=False)
else:
db.commit()
cursor.close()
db.close()
return res
except:
res["successCode"] = "0"
res["errorLog"]=traceback.format_exc()
logging.error(traceback.format_exc())
return res
# def process_where(data):
# '''
# 组装where
# :param data: data["Filter"],{"key":"age","value":"20","operator":">"},{"logicalSymbol":"and"},{"key":"weight","value":"50","operator":"<"}
# :return: WHERE age>20 and weight<50
# '''
# if data=="" or data==[]:
# return ""
# where = " WHERE "
# for line in data:
# if "key" in line.keys():
# val = line["value"]
# if isinstance(val, str):
# val = "\'" + val + "\'"
# tmp = str(line["key"]) + " " + line["operator"] + " " + str(val)
# where += tmp
# else:
# where += " " + line["logicalSymbol"] + " "
# return where
#
# def process_filter(data):
# '''
# 组装key,value,operator
# :param data: data["Filter"],{"key":"age",value:"20","operator":"="}
# :return: age=20
# '''
# if data=="" or data==[]:
# return ""
# res=data["key"]+" "+data["operator"]+" "+data["value"]
# return res
def get_updateSet(input):
metadata=input["metadata"]
user=metadata["user"]
sets=metadata["admin"]["Set"]
res=[]
for line in sets:
part=line.split("=")
tmp = []
for p in part:
user_match=re.findall('##(.*?)##', p)
if user_match!=[]:
tmp.append(user[user_match[0]])
res.append(str(tmp[0])+"="+str(tmp[1]))
result=" SET "+",".join(res)
return result
def get_filter(data):
if "OR" not in data.keys():
return ""
op_or=data["OR"]
res = ""
if len(op_or) == 1:
tmp = []
line = op_or[0]["AND"]
for single_line in line:
val = single_line["value"]
if isinstance(val, str):
val = "\'" + val + "\'"
tmp.append(str(single_line["key"]) + single_line["operator"] + str(val))
if single_line != line[-1]:
tmp.append("and")
res = " WHERE "+" ".join(tmp)
elif len(op_or) > 1:
tmp = []
for single_and in op_or:
line = single_and["AND"]
for sigle_line in line:
val = sigle_line["value"]
if isinstance(val, str):
val = "\'" + val + "\'"
tmp.append(str(sigle_line["key"]) + sigle_line["operator"] + str(val))
if sigle_line != line[-1]:
tmp.append("and")
if single_and != op_or[-1]:
tmp.append("or")
res = " WHERE "+" ".join(tmp)
return res
def mysqlQuery(input,logging):
res={"successCode":"1","errorLog":"","results":""}
data=input["metadata"]["admin"]
p_host=data["Host"]
p_port=int(data["Port"])
p_db=data["Database"]
p_user=data["User"]
p_password=data["Password"]
p_table=data["Table"]
p_columnNames=data["columnNames"]
# p_filter=data["Filter"]
column='*'
if len(p_columnNames)==1:
column=p_columnNames[0]
elif len(p_columnNames)>1:
column=','.join(p_columnNames)
where=get_filter(data["Filter"])
try:
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port,
charset='utf8', cursorclass=pymysql.cursors.DictCursor)
db.ping(reconnect=True)
cursor = db.cursor()
sql = "SELECT " + column +" From "+ p_table + where
# print(sql)
cursor.execute(sql)
result = cursor.fetchall()
res["results"]=json.dumps(result,ensure_ascii=False)
cursor.close()
db.close()
return res
except:
res["successCode"] = "0"
res["errorLog"]=traceback.format_exc()
logging.error(traceback.format_exc())
return res
def mysqlDelete(input,logging):
res={"successCode":"1","errorLog":"","results":""}
data=input["metadata"]["admin"]
p_host=data["Host"]
p_port=int(data["Port"])
p_db=data["Database"]
p_user=data["User"]
p_password=data["Password"]
p_table=data["Table"]
# where=process_where(data["Filter"])
where=get_filter(data["Filter"])
try:
db = pymysql.connect(host=p_host, user=p_user, passwd=p_password, db=p_db, port=p_port,
charset='utf8', cursorclass=pymysql.cursors.DictCursor)
db.ping(reconnect=True)
cursor = db.cursor()
sql = "DELETE From "+ p_table + where
cursor.execute(sql)
db.commit()
cursor.close()
db.close()
return res
except:
res["successCode"] = "0"
res["errorLog"]=traceback.format_exc()
logging.error(traceback.format_exc())
return res
if __name__=="__main__":
input={"metadata":{"admin":{
"type":"query",
"Table":"student",
"columnNames":["name","age"],
"Set":["##tag1##=##value1##","##tag2##=##value2##"],
"Filter":{
"OR":[
{
"AND":[{"key":"age","value":20,"operator":">"},{"key":"weight","value":50,"operator":"<"}]
},
{
"AND":[{"key":"name","value":"ff","operator":"="}]
}
]
},
"Host":"172.26.28.30",
"Port":"3306",
"Database":"test",
"User":"crawl",
"Password":"crawl123"
}},
"user": {
"tag1": "age",
"tag2": "weight",
"value1": 2,
"value2": 100
}
}
res=mysqlUpdate(input,"")
print(res)

51
text_analysis/tools/process.py

@ -0,0 +1,51 @@
#coding:utf8
import os, sys
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from text_analysis.tools import to_kafka
from tools.mysql_helper import mysqlConn,mysqlInsert,mysqlQuery,mysqlExecute,mysqlUpdate,mysqlDelete,getTableColumnNames
import traceback
import time
from log_util.set_logger import set_logger
logging=set_logger('results.log')
from views import task_queue
def process_data():
while True:
try:
# print("task_queue:",task_queue)
if task_queue.qsize() >0:
try:
raw_data = task_queue.get()
res = ""
logging.info("启动数据处理线程——")
logging.info(raw_data)
flag = raw_data["metadata"]["admin"]["type"]
# type分为execute、query、insert、update、delete
if flag == 'insert':
res = mysqlInsert(raw_data, logging)
elif flag == 'execute':
res = mysqlExecute(raw_data, logging)
elif flag == 'update':
res = mysqlUpdate(raw_data, logging)
elif flag == 'query':
res = mysqlQuery(raw_data, logging)
elif flag == 'delete':
res = mysqlDelete(raw_data, logging)
raw_data["result"] = res
logging.info("************写入kafka***********")
to_kafka.send_kafka(raw_data)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": ""}
raw_data["result"]["errorLog"] = traceback.format_exc()
to_kafka.send_kafka(raw_data)
else:
logging.info("暂无任务,进入休眠--")
print("222222222222222222222222")
time.sleep(10)
except:
logging.error(traceback.format_exc())

171
text_analysis/tools/seleniumTest.py

@ -0,0 +1,171 @@
# -*- coding: utf-8 -*-
import time
import threading
from selenium import webdriver
import json
from urllib.parse import urljoin
from kakfa_util import KafkaConsume
from kakfa_util import kafkaProduce
from logUtil import get_logger
from Go_fastDfs import uploadFile
import traceback
import queue
import configparser
import os, sys
import re
logger = get_logger("./logs/crawlWebsrcCode.log")
#加载配置文件
configFile = './config.ini'
# 创建配置文件对象
con = configparser.ConfigParser()
# 读取文件
con.read(configFile, encoding='utf-8')
kafkaConfig = dict(con.items('kafka'))#kafka配置信息
goFastdfsConfig = dict(con.items('goFastdfs'))#goFastdfs配置信息
class Spider(object):
def __init__(self,url):
self.chromeOptions = self.get_profile()
self.browser = self.get_browser()
self.url = url
def get_profile(self):
chromeOptions = webdriver.ChromeOptions()
chromeOptions.add_argument('--headless') # 谷歌无头模式
chromeOptions.add_argument('--disable-gpu') # 禁用显卡
# chromeOptions.add_argument('window-size=1280,800') # 指定浏览器分辨率
chromeOptions.add_argument("--no-sandbox")
return chromeOptions
def get_browser(self):
browser = webdriver.Chrome("D:\\工作使用\\zhaoshang\\chromedriver.exe",chrome_options=self.chromeOptions)
return browser
def _get_page(self,path):
'''
:param path:
:return:
'''
self.browser.get(self.url)
time.sleep(5)
logger.info("休眠结束")
# 向下偏移了10000个像素,到达底部。
scrollTop = 10000
for num in range(1,10):
js = "var q=document.documentElement.scrollTop={}".format(scrollTop*num)
logger.info("第{}次滚动".format(num))
self.browser.execute_script(js)
time.sleep(5)
# 执行 Chome 开发工具命令,得到mhtml内容
res = self.browser.execute_cdp_cmd('Page.captureSnapshot', {})
#获取文章标题
title = '无标题'
try:
title = self.browser.find_element_by_css_selector("title").get_attribute("textContent")
except Exception as e:
logger.error('获取标题异常----')
traceback.print_exc()
pathName = '{}{}.mhtml'.format(path,title)
with open(pathName, 'w',newline='') as f:
f.write(res['data'])
return pathName,title
if __name__ == '__main__':
#初始化任务队列
task_queue = queue.Queue()
#跟读kafka线程
logger.info("开启读取kafka线程---")
t = threading.Thread(target=KafkaConsume, name='LoopThread',args=(kafkaConfig['read_topic'], kafkaConfig['address'], kafkaConfig['group_id'], task_queue,logger))
t.daemon = True
t.start()
#获取任务执行页面原格式保留
while True:
try:
if task_queue.qsize() >0:
taskStr = task_queue.get()
logger.info('当前任务:{}'.format(taskStr))
task = json.loads(taskStr)
p1 = u'(https?|ftp|file)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]'
pattern1 = re.compile(p1)
matcher1 = re.search(p1, task['url'])
if matcher1:
l = Spider(task['url'])
pathName,title = l._get_page(goFastdfsConfig['path'])
l.browser.quit()
#gofast 上传,写入kafka
if '404 Not Found' in title:
logger.error('页面404,无效')
resultData = {
'code': 500,
'id': task['id'],
'message': '页面404'
}
kafkaProduce(kafkaConfig['data_topics'],
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),
kafkaConfig['address'])
time.sleep(2)
continue
try:
uploadStr = uploadFile('{}upload'.format(goFastdfsConfig['uploadaddress']),pathName,logger)
uploadJson = json.loads(uploadStr)
except Exception as e:
logger.error('文件上传异常----')
traceback.print_exc()
resultData = {
'code': 500,
'id': task['id'],
'message': '文件上传失败'
}
kafkaProduce(kafkaConfig['data_topics'],
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),
kafkaConfig['address'])
time.sleep(2)
continue
resultData = {
'code':200,
'id':task['id'],
'url':goFastdfsConfig['downloadaddress']+uploadJson['path'],
'title':title,
'delMd5':uploadJson['md5'],
'uploadTime':uploadJson['mtime'],
'message':'成功'
}
kafkaProduce(kafkaConfig['data_topics'],json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),kafkaConfig['address'])
logger.info('数据写入成功')
#删除文件
if (os.path.exists(pathName)):
os.remove(pathName)
logger.info('清除文件:{}'.format(pathName))
else:
logger.info('要删除的文件不存在:{}'.format(pathName))
else:
logger.error('非正确url:'.format(task['url']))
resultData = {
'code': 500,
'id': task['id'],
'message': '非正确url'
}
kafkaProduce(kafkaConfig['data_topics'],
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),
kafkaConfig['address'])
time.sleep(2)
continue
else:
logger.info("暂无任务,进入休眠--")
time.sleep(10)
except Exception as e:
logger.error('未知异常----')
traceback.print_exc()
resultData = {
'code': 500,
'id': task['id'],
'message': '未知异常'
}
kafkaProduce(kafkaConfig['data_topics'],
json.dumps(resultData).encode('utf-8').decode('unicode_escape').encode(),
kafkaConfig['address'])
time.sleep(2)

25
text_analysis/tools/to_kafka.py

@ -0,0 +1,25 @@
#coding:utf8
import traceback
import json
from kafka import KafkaProducer
from text_analysis.read_config import load_config
config=load_config()
def send_kafka(data,logging):
try:
producer = None
topic = config["kafka"]["topic"]
data1=json.dumps(data,ensure_ascii=False)
kafkaProduce(topic,bytes(data1, encoding='utf-8'))
logging.info("数据推入kafka!")
except Exception as e:
logging.info(traceback.format_exc())
logging.info('写入kafka失败')
def kafkaProduce(topic,resultData):
producer = KafkaProducer(bootstrap_servers = '{}'.format(config["kafka"]["bootstrap_servers"]),max_request_size=52428800)
topics = topic.split(',')
for tc in topics:
future = producer.send(tc,resultData)
producer.flush()

74
text_analysis/tools/to_kafka_pykafka.py

@ -0,0 +1,74 @@
#coding:utf8
import traceback
from pykafka import KafkaClient
# from pykafka import partitioners
# from pykafka.simpleconsumer import OwnedPartition, OffsetType
import json
from tqdm import tqdm
# from kafka import KafkaProducer
from pykafka.simpleconsumer import OwnedPartition, OffsetType
def send_kafka(data,logging):
try:
producer = None
# client = KafkaClient(hosts='172.26.28.30:9092', socket_timeout_ms=10 * 1000)
topic = 'analyze'
# producer = client.topics[topic].get_sync_producer(**{'max_request_size': 3000012 * 5})
#producer = client.topics[topic].get_producer(sync=True)
client = KafkaClient(hosts='172.26.28.30:9092', socket_timeout_ms=10 * 1000)
# topic = client.topics['analyze']
producer = client.topics[topic].get_producer()
data1=json.dumps(data,ensure_ascii=False)
producer.produce(bytes(data1, encoding='utf-8'))
# kafkaProduce(topic,bytes(data1, encoding='utf-8'))
logging.info("数据推入kafka!")
except Exception as e:
logging.info(traceback.format_exc())
logging.info('写入kafka失败')
# def kafkaProduce(topic,resultData):
# producer = KafkaProducer(bootstrap_servers = '{}'.format("172.26.28.30:9092"))
# topics = topic.split(',')
# for tc in topics:
# future = producer.send(tc,resultData)
# producer.flush()
def consumer():
# topic = 'ais_caiji_kg_210'.encode('utf-8')
# client = KafkaClient(hosts='172.16.3.153:9092,172.16.3.154:9092,172.16.3.155:9092')
# topic = 'test_mysql_topic'.encode('utf-8')
# client = KafkaClient(hosts='localhost:9092')
# topic = client.topics[topic]
# consumer = topic.get_simple_consumer(consumer_group='test1',
# auto_commit_enable=True, # 去重消费
# auto_commit_interval_ms=1000,
# # consumer_id='test1', # 消费者ID
# reset_offset_on_start=True,
# # auto_offset_reset=OffsetType.LATEST,
# consumer_timeout_ms=100000)
# c = 0
# for msg in consumer:
# c += 1
# if msg:
# val = msg.value.decode('utf-8')
# print(c,val)
# client = KafkaClient(hosts='localhost:9092')
# topic = client.topics['test_mysql_topic']
client = KafkaClient(hosts='172.18.1.146:9092,172.18.1.147:9092,172.18.1.148:9092')
topic = client.topics['produce_analyze']
consumer = topic.get_simple_consumer(consumer_group='my_consumer_group',
auto_offset_reset=OffsetType.LATEST,
reset_offset_on_start=True)
# 消费数据
for message in consumer:
if message is not None:
print(message.offset, message.value.decode())
if __name__=="__main__":
# send_kafka()
consumer()

119
text_analysis/tools/tool.py

@ -0,0 +1,119 @@
#coding:utf8
import re
from jsonpath_ng import parse
import json
import traceback
import regex
from log_util.set_logger import set_logger
def parse_data(data,para):
param_split = str(para).split(":")
datasourcestr = data[param_split[0]]
datasource = json.loads(datasourcestr)
# 创建 JsonPath 表达式对象
expr = parse(param_split[1])
# 使用表达式来选择 JSON 元素
match = [match.value for match in expr.find(datasource)]
val = match[0]
return val
def parse_gptResult(output,gptContent):
json_result = {}
try:
json_gpt=json.loads(str(gptContent).replace("```json","").replace("```",""))
for key in output.keys():
if key in json_gpt.keys():
json_result[key]=json_gpt[key]
return json_result
except Exception as e:
try:
# 直接解析失败,使用正则表达式匹配外层的 {}
pattern = r'\{(?:[^{}]*|(?R))*\}'
match = regex.search(pattern, gptContent, flags=regex.DOTALL)
# match = re.search(pattern, gptContent, re.DOTALL)
if match:
json_gpt = json.loads(match.group())
for key in output.keys():
if key in json_gpt.keys():
json_result[key]=json_gpt[key]
return json_result
else:
return None
except:
traceback.print_exc()
return None
def get_content(inputdata,logging):
"""
:param inputdata:json数据
:return: prompt及其他参数
"""
res={}
input=inputdata["input"]
if "data" not in inputdata.keys():
data=""
else:
data=inputdata["data"]
prompts=input["prompt"]
prompt_res=""
if prompts:
for prompt in prompts:
if str(prompt["type"])=="1":
prompt_res+=prompt["value"]
elif str(prompt["type"])=="2":
try:
tmp=parse_data(data,prompt["value"])
prompt_res +=tmp
except:
logging.info("动态字段获取数据失败。{}-{}".format(prompt, traceback.format_exc()))
# logging.info("拼接后的问题:{}".format(prompt_res))
res["prompt"]=prompt_res
# res["authorization"]=input["authorization"]
# res["model"]=input["model"]
res["temperature"]=input["temperature"]
res["top_p"]=input["top_p"]
res["n"]=input["n"]
return res
# def get_content(inputdata,logging):
# """
# 重新组装参数
# :param inputdata:原json数据
# :return: 组装的prompt及其他参数
# """
# res={}
# admin=inputdata["metadata"]["admin"]
# data=inputdata["data"]
# prompt=admin["prompt"]
# if_user=re.findall("{{(.*)}}",prompt)
# if_data=re.findall("@@(.*)@@",prompt)
# if if_user != []:
# user_data=inputdata["metadata"]["user"]
# if if_user[0] in user_data.keys():
# tmp=user_data[if_user[0]]
# prompt=re.sub("{{(.*)}}",tmp,prompt)
# if if_data!=[] and if_data[0] in data.keys():
# tmp1=data[if_data[0]]
# prompt=re.sub("@@(.*)@@",tmp1,prompt)
# res["prompt"]=prompt
# res["authorization"]=admin["authorization"]
# res["model"]=admin["model"]
# res["temperature"]=admin["temperature"]
# res["authorization"]=admin["authorization"]
# res["top_p"]=admin["top_p"]
# res["n"]=admin["n"]
# return res
if __name__=="__main__":
parse_gptResult()

44
text_analysis/tools/zk_util.py

@ -0,0 +1,44 @@
'''
"{
    "scenes_id":2222,
    "operation":"stop",
    "version":5
}"
scenes_id=2222
version!=0
'''
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
import time
# 连接到ZooKeeper服务器
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
zk.start()
# 定义数据变更时的回调函数
def data_change_listener(event):
if event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
print("Data changed on node /analyze: {data.decode('utf-8')}")
elif event.type == EventType.DELETED:
print("Node /analyze has been deleted")
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None:
data_change_listener(event)
# 保持程序运行以监听节点变化
try:
while True:
print("ok")
time.sleep(1)
except KeyboardInterrupt:
print("Stopping...")
# 关闭连接
zk.stop()
zk.close()

13
text_analysis/urls.py

@ -0,0 +1,13 @@
from django.conf.urls import include, url
from django.contrib import admin
from text_analysis import views
urlpatterns = [
url(r'^QwenModel',views.QwenModel, name='QwenModel'),
# url(r'^mysqlConnection',views.mysqlConnection, name='mysqlConnection'),
# url(r'^mysqlField', views.mysqlField, name='mysqlField')
]

164
text_analysis/views.py

@ -0,0 +1,164 @@
# coding:utf8
import os, sys
import io
import time
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from text_analysis.cusException import promptLen_Exception
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
logger = set_logger('logs/results.log')
import traceback
# import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
import queue
task_queue = queue.PriorityQueue()
stop_dict={}
from text_analysis.read_config import load_config
config=load_config()
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
raw_data = json.loads(request.body)
if "trace" in raw_data.keys() and raw_data["trace"]==True:
task_queue.put((-1, time.time(),raw_data))
else:
task_queue.put((1,time.time(), raw_data))
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logger.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
try:
if task_queue.qsize()>0:
p,t,raw_data = task_queue.get(timeout=1)
logger.info("当前任务队列长度{}".format(task_queue.qsize()+1))
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
res_tmp["isLast"]=1
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
logger.info("任务数据为:{}".format(raw_data))
logger.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logger.info("已暂停任务,数据过滤掉")
continue
data = get_content(raw_data, logger)
prompt_len=len(data["prompt"])
# if len(data["prompt"])>=10000:
# raise promptLen_Exception
# logger.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
url=config['qwenmodel']['url']
headers = {
"Content-Type": "application/json;charset=UTF-8"
}
payload = json.dumps({
"model":"Qwen2-72B-Instruct-GPTQ-Int4",
"messages": [{"role": "user", "content": data["prompt"]}],
"temperature": float(data["temperature"]),
"top_p": float(data["top_p"]),
"n": int(data["n"])
})
# logger.info("prompt为{}".format(data["prompt"]))
response = requests.request("POST", url, headers=headers, data=payload,timeout=180)
logger.info("Prompt为:{}***Qwen返回值:{}-{}".format(data["prompt"],response,response.text))
d = json.loads(response.text)
if response.status_code==400 and "maximum context length" in d["message"]:
logger.info("文本长度超过模型限制-{}-{}".format(prompt_len, raw_data))
raise promptLen_Exception
result = d['choices'][0]['message']['content']
if result:
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res["isLast"] = 1
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
else:
logger.info("模型返回值为空")
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json, "status": 1,"message": "成功"}
# logger.info(raw_data)
to_kafka.send_kafka(raw_data, logger)
else:
time.sleep(10)
except queue.Empty:
#从空队列取任务
logger.info("该线程任务队列为空,等待新任务")
except promptLen_Exception:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": res_tmp_json, "status": 2,"message": "文本长度超过模型限制"}
to_kafka.send_kafka(raw_data, logger)
except:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": traceback.format_exc(), "results": res_tmp_json,"status":2,"message":"异常"}
logger.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logger)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts=config['zookeeper']['zkhost'])
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch(config['zookeeper']['node'])
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
# logger.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logger.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logger.error(traceback.format_exc())

134
text_analysis/views.py_openai

@ -0,0 +1,134 @@
# coding:utf8
import os, sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')
cur_dir = os.path.dirname(os.path.abspath(__file__)) or os.getcwd()
par_dir = os.path.abspath(os.path.join(cur_dir, os.path.pardir))
sys.path.append(cur_dir)
sys.path.append(par_dir)
import json
from django.http import HttpResponse
from text_analysis.tools import to_kafka
from django.views.decorators.csrf import csrf_exempt
from log_util.set_logger import set_logger
from openai import OpenAI
logging = set_logger('logs/results.log')
import traceback
import queue
import requests
from text_analysis.tools.tool import get_content,parse_gptResult
import uuid
import time
from kazoo.client import KazooClient
from kazoo.protocol.states import EventType
openai_api_key = "EMPTY"
openai_api_base = "http://10.0.32.225:9000/v1"
client = OpenAI(api_key=openai_api_key,base_url=openai_api_base)
task_queue = queue.Queue()
stop_dict={}
@csrf_exempt
def QwenModel(request):
if request.method == 'POST':
try:
# txt=request.body.encode("utf-8")
raw_data = json.loads(request.body)
task_queue.put(raw_data)
return HttpResponse(json.dumps({"code": 1, "msg": "请求正常!"}, ensure_ascii=False))
except:
logging.error(traceback.format_exc())
return HttpResponse(json.dumps({"code": 0, "msg": "请求json格式不正确!"}, ensure_ascii=False))
else:
return HttpResponse(json.dumps({"code": 0, "msg": "请求方式错误,改为post请求"}, ensure_ascii=False))
def Qwen():
while True:
if task_queue.qsize() > 0:
try:
logging.info("取任务队列长度{}".format(task_queue.qsize()))
raw_data = task_queue.get()
output = raw_data["output"]
res_tmp = {key: "" for key in output}
if "id" in res_tmp.keys():
res_tmp["id"] = str(uuid.uuid4())
task_id=raw_data["scenes_id"]
task_version=raw_data["version"]
logging.info("任务数据为:{}".format(raw_data))
logging.info("当前version信息为:{}".format(stop_dict))
if task_id in stop_dict.keys() and task_version!=stop_dict[task_id]["version"]:
logging.info("已暂停任务,过滤掉。{}".format(raw_data))
continue
data = get_content(raw_data, logging)
# logging.info("请求信息为{},解析后模型请求为{}".format(raw_data,data))
response = client.chat.completions.create(
model="Qwen2-72B-Instruct-GPTQ-Int4",
messages=[{"role": "user", "content": data["prompt"]}],
temperature=float(data["temperature"]),
top_p=float(data["top_p"]),
n=int(data["n"])
# stream=True
)
logging.info("Qwen返回值:{}—请求信息:{}".format(response,data))
result = response.choices[0].message.content
#添加 0是文本,1是json格式
fieldType = raw_data["input"]['fieldType']
if fieldType == 0:
res_tmp["content"] = result
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res=parse_gptResult(res_tmp,result)
if res:
res_tmp_json = json.dumps(res, ensure_ascii=False)
raw_data["result"] = {"successCode": "1", "errorLog": "", "results": res_tmp_json,"status":1,"message":"成功"}
else:
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"] = {"successCode": "0", "errorLog": "Qwen返回值不是json格式,无法解析!", "results": res_tmp_json,"status":2,"message":"GPT返回结果非json格式"}
# logging.info(raw_data)
to_kafka.send_kafka(raw_data, logging)
except:
raw_data["result"] = {"successCode": "0", "errorLog": "", "results": "","status":2,"message":"异常"}
raw_data["result"]["errorLog"] = traceback.format_exc()
res_tmp_json = json.dumps(res_tmp, ensure_ascii=False)
raw_data["result"]["results"] = res_tmp_json
logging.info("调用Qwen失败{}-{}".format(raw_data, traceback.format_exc()))
to_kafka.send_kafka(raw_data, logging)
else:
logging.info("暂无任务,进入休眠--")
time.sleep(10)
def zk_monitoring():
try:
#线上环境
zk = KazooClient(hosts='172.18.1.146:2181,172.18.1.147:2181,172.18.1.148:2181')
#测试环境
# zk = KazooClient(hosts='172.16.12.55:2181,172.16.12.56:2181,172.16.12.57:2181')
zk.start()
# 设置监听器
@zk.DataWatch("/analyze")
def watch_node(data, stat, event):
if event is not None and event.type == EventType.CHANGED:
data, stat = zk.get("/analyze")
logging.info("执行删除操作:{}".format(data))
d = json.loads(data)
id = d["scenes_id"]
stop_dict[id] = {}
stop_dict[id]["version"] = d["version"]
stop_dict[id]["operation"] = d["operation"]
# 保持程序运行以监听节点变化
try:
while True:
time.sleep(1)
except:
logging.info("Stopping...")
# 关闭连接
zk.stop()
zk.close()
except:
logging.error(traceback.format_exc())

16
text_analysis/wsgi.py

@ -0,0 +1,16 @@
"""
WSGI config for Zhijian_Project_WebService project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings")
application = get_wsgi_application()

9
uwsgi.ini

@ -0,0 +1,9 @@
[uwsgi]
http = 0.0.0.0:9050
chdir = ../QwenModel
wsgi-file = ../QwenModel/wsgi.py
processes = 1
threads = 5
listen = 1024
http-timeout=21600

39
wsgi.log

@ -0,0 +1,39 @@
*** Starting uWSGI 2.0.21 (64bit) on [Fri Jan 3 10:52:39 2025] ***
compiled with version: 11.2.0 on 24 October 2023 19:53:56
os: Linux-3.10.0-1127.19.1.el7.x86_64 #1 SMP Tue Aug 25 17:23:54 UTC 2020
nodename: node-04
machine: x86_64
clock source: unix
pcre jit disabled
detected number of CPU cores: 64
current working directory: /opt/analyze/apps/QwenModel
detected binary path: /opt/analyze/environment/python3.8/bin/uwsgi
uWSGI running as root, you can use --uid/--gid/--chroot options
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) ***
chdir() to ../QwenModel
*** WARNING: you are running uWSGI without its master process manager ***
your processes number limit is 1031041
your memory page size is 4096 bytes
detected max file descriptor number: 65535
lock engine: pthread robust mutexes
thunder lock: disabled (you can enable it with --thunder-lock)
uWSGI http bound on 0.0.0.0:9050 fd 4
spawned uWSGI http 1 (pid: 58853)
uwsgi socket 0 bound to TCP address 127.0.0.1:35537 (port auto-assigned) fd 3
uWSGI running as root, you can use --uid/--gid/--chroot options
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) ***
Python version: 3.8.16 (default, Jun 12 2023, 18:09:05) [GCC 11.2.0]
Python main interpreter initialized at 0x1eb7220
uWSGI running as root, you can use --uid/--gid/--chroot options
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) ***
python threads support enabled
your server socket listen backlog is limited to 1024 connections
your mercy for graceful operations on workers is 60 seconds
mapped 114744 bytes (112 KB) for 5 cores
*** Operational MODE: threaded ***
WSGI app 0 (mountpoint='') ready in 0 seconds on interpreter 0x1eb7220 pid: 58852 (default app)
uWSGI running as root, you can use --uid/--gid/--chroot options
*** WARNING: you are running uWSGI as root !!! (use the --uid flag) ***
*** uWSGI is running in multiple interpreter mode ***
spawned uWSGI worker 1 (and the only) (pid: 58852, cores: 5)
[pid: 58852|app: 0|req: 1/1] 192.168.0.44 () {34 vars in 532 bytes} [Fri Jan 3 17:37:47 2025] POST /QwenModel => generated 37 bytes in 8 msecs (HTTP/1.1 200) 3 headers in 108 bytes (1 switches on core 1)

35
wsgi.py

@ -0,0 +1,35 @@
"""
WSGI config for Zhijian_Project_WebService project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
https://docs.djangoproject.com/en/1.8/howto/deployment/wsgi/
"""
import os
import threading
from text_analysis.views import Qwen,zk_monitoring
# t = threading.Thread(target=chatgpt, name='chatgpt')
# t.daemon = True
# t.start()
# 启动 5 个 chatgpt 线程
num_threads = 5
chatgpt_threads = [threading.Thread(target=Qwen) for _ in range(num_threads)]
for thread in chatgpt_threads:
thread.daemon = True
thread.start()
#启动zk监听线程
t = threading.Thread(target=zk_monitoring, name='zk_monitoring')
t.daemon = True
t.start()
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "text_analysis.settings")
application = get_wsgi_application()

100
文档/gpt.txt

@ -0,0 +1,100 @@
{
"createUserId": "9999",
"address": "http://172.18.1.181:9012/chatGptNew/",
"data": {
"businessKey": "331223fe-42b6-4d2f-a9c4-bcb284e2b136",
"98acfda4-91fa-4db6-a092-d9deb60d3ff1": "{\"path\":\"http://172.18.1.146:8080/group1/CDAService/20240412/09/53/5/政策测试.docx\",\"createTime\":\"2024-04-12 09:56:33\",\"title\":\"政策测试.docx\",\"content\":\"深圳市发展和改革委员会关于申报深圳市 2024年重大项目计划的通知\\n各有关单位:\\n\\n\\n  根据市委、市政府工作部署,为保障和推动市重大项目顺利实施,持续扩大有效投资,现启动2024年市重大项目申报工作,有关事项通知如下:\\n  一、申报要求\\n  (一)申报项目条件\\n  申报项目应当符合市重大项目的基本条件、规模要求及其他要求,具体详见《深圳市2024年重大项目计划申报指南》。\\n  (二)申报项目分类\\n  本次申报项目按重大建设项目、重大前期项目两类进行申报,竣工项目不在申报范围。具体要求如下:\\n  1.重大建设项目。包括重大续建项目和重大新开工项目。目前已经开工建设的项目,可申报2024年重大续建项目;项目用地已落实(取得《建设用地规划许可证》)且2024年底前具备开工条件的,年内可形成一定实物工作量的,可申报2024年重大新开工项目。\\n  2.重大前期项目。已取得政府投资项目赋码、社会投资项目核准或备案批复(城市更新项目还须列入深圳市城市更新单元计划并已完成实施主体确认),正在开展前期各项筹备、2024年内不具备开工条件的项目,可申报2024年重大前期项目。\\n  二、申报方式\\n  本次重大项目计划申报采取自行申报与相关单位推荐相结合方式。原则上,由项目单位自行通过市投资项目在线平台自行申报;市、区政府投资项目可由市本级、各区政府投资主管部门直接推荐;社会投资重大项目,亦可经市直行业主管部门、各区推荐。\\n  经审核符合要求的推荐项目也应及时登录市投资项目在线平台完善项目信息。\\n  三、在线登记要求\\n  本次重大项目计划申报采取线上方式,所有申报项目均应及时通过市投资项目在线平台录入建设单位、投资计划等信息,便于后续协调调度,具体如下:\\n  (一)登记入口\\n  项目单位登录广东政务服务网—深圳市发展和改革委员会窗口(http://203.91.46.83:8031/FGWPM/sfg/wstb_qybs),进行账号注册和登陆,搜索查询“深圳市年度重大项目计划确认”事项进行在线登记。\\n  (二)申报时间\\n  本次重大项目计划申报时间自发文之日起至2023年9月15日18:00。\\n  (三)咨询电话\\n  现代产业:88127306、88127513,\\n  基础设施:88127561、88127563,\\n  民生改善:88127015、88127562。\\n  系统操作问题请咨询技术人员:刘工,88121147、88127037。\\n\\n\\n  特此通知。\\n\\n\"}"
},
"dataProcessId": "32ac5fc5-3aeb-4a11-8592-ef932d9e7061",
"module": "ChatGPT",
"start_tag": false,
"next_app_id": [
{
"start_id": "ce567a6c-7cfb-44ff-8bea-4628c48013ba",
"edge_id": 4534,
"end_id": "18e40208-b82b-4afd-82e4-cafa965733f9"
}
],
"transfer_id": 1,
"output": {
"申报条件": "申报条件",
"申报时间": "申报时间",
"扶持标准": "扶持标准",
"受理部门": "受理部门",
"申报材料": "申报材料"
},
"input": {
"authorization": "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",
"top_p": "1",
"address": "http://172.18.1.181:9012/chatGptNew/",
"temperature": "1",
"model": "gpt-3.5-turbo-16k",
"defaultForm": [
{
"field": "result",
"dataType": "string",
"label": "输出内容"
}
],
"prompt": [
{
"type": "1",
"value": "请在下面内容中抽取出以下字段,字段名称分别是:"
},
{
"type": "1",
"value": "受理部门、申报时间、申报条件、扶持标准、申报材料 。并且以json的格式输出,只返回json信息即可不要多余的话术。内容如下:"
},
{
"type": "1",
"value": "\n"
},
{
"type": "2",
"value": "98acfda4-91fa-4db6-a092-d9deb60d3ff1:$['content']"
},
{
"type": "1",
"value": "\n"
}
],
"fieldType": 1,
"n": "1"
},
"scenes_id": 2089,
"dataId": "a1f163b77451226a5e6b7c476f2113cb",
"name": "ChatGPT",
"businessKey": "331223fe-42b6-4d2f-a9c4-bcb284e2b136",
"source_data_id": "305818",
"id": 6556,
"describe": "ChatGPT",
"relations": [
{
"createUserId": "9999",
"startCode": "98acfda4-91fa-4db6-a092-d9deb60d3ff1",
"endCode": "ce567a6c-7cfb-44ff-8bea-4628c48013ba",
"createTime": 1712915946000,
"scenesId": "2089",
"updateUserId": "9999",
"updateUser": "演示账号",
"createUser": "演示账号",
"del": 0,
"updateTime": 1712915946000,
"id": 4533
},
{
"createUserId": "9999",
"startCode": "ce567a6c-7cfb-44ff-8bea-4628c48013ba",
"endCode": "18e40208-b82b-4afd-82e4-cafa965733f9",
"createTime": 1712915946000,
"scenesId": "2089",
"updateUserId": "9999",
"updateUser": "演示账号",
"createUser": "演示账号",
"del": 0,
"updateTime": 1712915946000,
"id": 4534
}
],
"app_code": "ce567a6c-7cfb-44ff-8bea-4628c48013ba"
}

75
文档/gpt参数0905.txt

@ -0,0 +1,75 @@
{
"metadata":{
"output":{
"output_type":"table",
"label_col":[
"相似内容抽取"
]
},
"input":{
"input_type":"text",
"label":[
"3_相似内容过滤器"
]
},
"address":"http://172.18.1.181:9011/chatGpt/",
"admin":{
"authorization":"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",
"top_p":"1",
"user_input":[
{
"keyname":"tag",
"keydesc":""
}
],
"temperature":"1",
"model":"gpt-3.5-turbo-16k",
"prompt":"以JSON数组泛型是String类型的格式进行输出,不用多余的文字。参考”@@11_任务拆分:$.content#json#$['test1']@@不仅仅是一种工具,更是一种改变世界的力量“生成@@11_任务拆分:$.quantity@@条@@11_任务拆分:$.lang#json#$['test1']@@",
"n":"1"
},
"index":3,
"user":{
"tag":""
}
},
"data":{
"11_任务拆分":"{\"tenant_id\":237,\"content\":{\"test1\":\"22222\"},\"create_user_id\":\"652468062228768915\",\"collection_quantity\":1253,\"quantity\":10,\"lang\":"{\"test1\":\"333333\"}"}"
},
"created":1691004265000,
"module":"ChatGPT",
"start_tag":false,
"multi_branch":0,
"last_edit":1693932236000,
"next_app_id":[
{
"start_id":188,
"edge_id":92,
"end_id":190
}
],
"transfer_id":5,
"version":1,
"blueprint_id":6,
"scenes_id":7,
"scenario":{
"dataloss":1,
"autoCommitTriggerLast":1,
"maxErrors":3,
"autoCommit":1,
"freshVariables":1
},
"wait_condition":[
],
"scheduling":{
"interval":-1,
"type":"single"
},
"name":"相似内容抽取",
"id":188,
"position":[
100,
200
],
"describe":"相似内容抽取"
}

58
文档/gpt参数结构调整.txt
File diff suppressed because it is too large
View File

65
文档/gpt输入样例.txt

@ -0,0 +1,65 @@
http://172.18.1.181:9011/chatGpt/
{
"id":1,
"module":"ChatGPT",
"version":1,
"name":"信息抽取",
"describe":"此步骤进行相关信息抽取",
"metadata":{
"position":[
100,
200
],
"output":{
"output_type":"table",
"label_col":[
"文件名称",
"识别内容",
"文件路径",
"文件大小",
"上传时间",
"GPT处理结果"
]
},
"input":{
"input_type":"text",
"label":[
"3_文件名称",
"3_识别内容",
"3_文件路径",
"3_文件大小",
"3_上传时间"
]
},
"admin":{
"prompt":"下面我给出一段数据,请抽取相关内容。需抽取的内容是{{tag}}。数据为@@3_识别内容@@",
"authorization":"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",
"model":"gpt-3.5-turbo",
"temperature":"0.2",
"top_p":"1",
"n":"1",
"user_input":[
{
"keyname":"tag",
"keydesc":"需抽取内容"
}
]
},
"user":{
"tag":"专利号,专利名称,申请人"
}
},
"data":{
"3_文件名称":"测试的专利文档.pdf",
"3_识别内容":"\n证书号第2353566号\n发明专利证书\n发明名称:一种浅海大型复杂沙波区地形重构方法\n发 明 人:张华国;傅斌;何谢错;厉冬玲;史爱琴;楼璘林\n专 利 号:ZL 2015 1 0071764.4\n专利申请日:2015年02月11日 专利权人:国家海洋局第二海洋研究所 授权公告日:2017年01月18日\n本发明经过本局依照中华人民共和国专利法进行审查,决定授予专利权,颁发本证书 并在专利登记簿上予以登记-专利权自授权公告之日起生效。\n本专利的专利权期限为二十年,自申请日起算。专利权人应当依照专利法及其实施细 则规定缴纳年费。本专利的年费应当在每年02月11日前缴纳。未按照规定缴纳年费的, 专利权自应当缴纳年费期满之日起终止„\n专利证书记载专利权登记时的法律状况。专利权的转移、质押、无效、终止、恢复和 专利权人的姓名或名称、国籍、地址变更等事项记载在专利登记簿上。 \n",
"3_文件路径":"http://10.0.32.50:/data2/lybtmp/install/知识包专利/测试的专利文档.pdf",
"3_文件大小":"250KB",
"3_上传时间":1687835515
},
"next_app_id":[
],
"wait_condition":[
],
"start_tag":false
}

66
文档/gpt输入样例2.txt

@ -0,0 +1,66 @@
http://172.18.1.181:9011/chatGpt/
{
"id":1,
"module":"ChatGPT",
"version":1,
"name":"信息抽取",
"describe":"此步骤进行相关信息抽取",
"metadata":{
"position":[
100,
200
],
"output":{
"output_type":"table",
"label_col":[
"文件名称",
"识别内容",
"文件路径",
"文件大小",
"上传时间",
"GPT处理结果"
]
},
"input":{
"input_type":"text",
"label":[
"3_文件名称",
"3_识别内容",
"3_文件路径",
"3_文件大小",
"3_上传时间"
]
},
"admin":{
"prompt":"下面我给出一段数据,请抽取相关内容。需抽取的内容是{{tag}}。数据为@@3_识别内容:$.store.book[0].title@@",
"authorization":"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",
"model":"gpt-3.5-turbo",
"temperature":"0.2",
"top_p":"1",
"n":"1",
"user_input":[
{
"keyname":"tag",
"keydesc":"需抽取内容"
}
]
},
"user":{
"tag":"专利号,专利名称,申请人"
}
},
"data":{
"3_文件名称":"测试的专利文档.pdf",
"3_识别内容":"{\"store\": {\"book\": [{\"title\": \"证书号第2353566号。发明专利证书。发明名称:一种浅海大型复杂沙波区地形重构方法 发 明 人:张华国;傅斌;何谢错;厉冬玲;史爱琴;楼璘林。专 利 号:ZL 2015 1 0071764.4 专利申请日:2015年02月11日 专利权人:国家海洋局第二海洋研究所 授权公告日:2017年01月18日。本发明经过本局依照中华人民共和国专利法进行审查,决定授予专利权,颁发本证书 并在专利登记簿上予以登记-专利权自授权公告之日起生效。本专利的专利权期限为二十年,自申请日起算。专利权人应当依照专利法及其实施细 则规定缴纳年费。本专利的年费应当在每年02月11日前缴纳。未按照规定缴纳年费的, 专利权自应当缴纳年费期满之日起终止„。专利证书记载专利权登记时的法律状况。专利权的转移、质押、无效、终止、恢复和 专利权人的姓名或名称、国籍、地址变更等事项记载在专利登记簿上。\", \"price\": 10}, {\"title\": \"Book 2\", \"price\": 15}]}}",
"3_文件路径":"http://10.0.32.50:/data2/lybtmp/install/知识包专利/测试的专利文档.pdf",
"3_文件大小":"250KB",
"3_上传时间":1687835515
},
"next_app_id":[
],
"wait_condition":[
],
"start_tag":false
}

58
文档/gpt输入样例最新-1129
File diff suppressed because it is too large
View File

BIN
文档/最新调整-1127.png

After

Width: 477  |  Height: 387  |  Size: 8.3 KiB

Loading…
Cancel
Save