Bläddra i källkod

要素提取补充AI提取

luojiehua 2 månader sedan
förälder
incheckning
d085e37a07

+ 2 - 2
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -1285,7 +1285,8 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
         has_entity = False
         has_tenderee = False
         has_win_tenderer = False
-        docchannel = _extract.get("docchannel",{}).get("life_docchannel","")
+
+        docchannel = _extract.get("docchannel",{}).get("docchannel","")
         if len(_extract.get("dict_enterprise",{}).keys())>0:
             has_entity = True
         prem = _extract.get("prem",{})
@@ -1296,7 +1297,6 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                 if role.get("role_name")=="win_tenderer":
                     has_win_tenderer = True
 
-
         has_tenderee = False
         has_win_tenderer = False
         has_budget = False

+ 47 - 1
BaseDataMaintenance/model/ots/document_tmp.py

@@ -395,8 +395,54 @@ def turn_document_tmp_status():
     mt = MultiThreadHandler(task_queue,_handle,None,30,ots_client=ots_client)
     mt.run()
 
+def export_extract_ai():
+    filename = r"C:\Users\Administrator\Desktop\extract_ai.txt"
+    list_docid = []
+    from BaseDataMaintenance.dataSource.source import getConnect_ots
+    from BaseDataMaintenance.model.ots.document import Document
+    ots_client = getConnect_ots()
+    with open(filename,"r",encoding="utf8") as f:
+        while 1:
+            line = f.readline()
+            if not line:
+                break
+            line = line.strip()
+            if line!="":
+                try:
+                    docid = line.split(":")[-1]
+                    print(docid)
+                    list_docid.append(int(docid))
+                except Exception as e:
+                    pass
+    list_docid.reverse()
+    import pandas as pd
+    list_data = []
+    for docid in list_docid:
+        partitionkey = docid%500+1
+        _d = {document_tmp_partitionkey:partitionkey,
+              document_tmp_docid:docid}
+        dtmp = Document(_d)
+        if dtmp.fix_columns(ots_client,["extract_json_ai","docchannel"],True):
+            docchannel = dtmp.getProperties().get("docchannel",0)
+            extract_json_ai = dtmp.getProperties().get("extract_json_ai")
+            if docchannel in (52,101,118,119,120,121,122) and extract_json_ai is not None and extract_json_ai!="":
+                extract_ai = json.loads(extract_json_ai)
+                data_d = {
+                    "docid":docid,
+                    "招标人":extract_ai.get("招标信息",{}).get("招标人名称",""),
+                    "项目预算":extract_ai.get("招标信息",{}).get("项目预算",""),
+                    "招标人联系方式":extract_ai.get("招标信息",{}).get("招标人联系方式",""),
+                    "中标信息":extract_ai.get("中标信息","[]")
+                }
+                list_data.append(data_d)
+                if len(list_data)>=200:
+                    break
+    df = pd.DataFrame(list_data)
+    df.to_excel("%s.xlsx"%(filename))
+
 
 
 if __name__=="__main__":
     # turn_extract_status()
-    turn_document_tmp_status()
+    # turn_document_tmp_status()
+    export_extract_ai()