Bladeren bron

要素提取补充AI提取

luojiehua 2 maanden geleden
bovenliggende
commit
3a2e1413cf

+ 3 - 0
.idea/encodings.xml

@@ -1,6 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
   <component name="Encoding">
+    <file url="file://$PROJECT_DIR$/BaseDataMaintenance/AIUtils/ChatAgent.py" charset="GBK" />
+    <file url="file://$PROJECT_DIR$/BaseDataMaintenance/AIUtils/DoubaoUtils.py" charset="GBK" />
+    <file url="file://$PROJECT_DIR$/BaseDataMaintenance/AIUtils/deepseek_request.py" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/attachmentProcessTime.xlsx" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/chat/chatUtil.py" charset="GBK" />
     <file url="file://$PROJECT_DIR$/BaseDataMaintenance/dataSource/searchPaddle.py" charset="GBK" />

+ 1 - 1
BaseDataMaintenance/dataSource/setttings.py

@@ -61,7 +61,7 @@ smtp_password = "Biaoxun66-"
 
 
 activateMQ_host = "192.168.0.109"
-# activateMQ_host = "120.132.118.205"
+# activateMQ_host = "121.46.18.113"
 activateMQ_port = 61613
 activateMQ_user = "admin"
 activateMQ_pswd = "admin"

+ 9 - 1
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -1047,6 +1047,14 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
             message_id = frame.headers["message-id"]
             subscription = frame.headers.setdefault('subscription', None)
             item = json.loads(frame.body)
+
+            for k,v in item.items():
+                try:
+                    if isinstance(v,bytes):
+                        item[k] = v.decode("utf-8")
+                except Exception as e:
+                    log("docid %d types bytes can not decode"%(item.get("docid")))
+                    item[k] = ""
             dtmp = Document_tmp(item)
 
             dhtml = Document_html({"partitionkey":item.get("partitionkey"),
@@ -1333,7 +1341,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
             if len(_extract_ai.keys())>0:
                 _new_json,_changed = self.merge_json(_extract_json,_json)
                 if _changed:
-                    dtmp.setValue("extract_json_AI",json.dumps(_extract_ai,ensure_ascii=False))
+                    dtmp.setValue("extract_json_ai",json.dumps(_extract_ai,ensure_ascii=False))
                     dtmp.setValue(document_tmp_dochtmlcon,"",False)
                     dtmp.setValue(document_tmp_status,random.randint(*flow_extract_status_succeed_to),True)
                     dtmp.update_row(self.ots_client)