Ver código fonte

去重合并接口开发

luojiehua 2 meses atrás
pai
commit
db1f62f342

+ 46 - 0
BaseDataMaintenance/interface/project_merge_interface.py

@@ -112,6 +112,36 @@ def embedding():
 
     return jsonify(_r)
 
+@app.route("/dump_and_merge",methods=["POST"])
+def dump_and_merge():
+    _r = {"success": True}
+    try:
+        item = request.json
+        docid = item.get("docid")
+        document_table = item.get("document_table")
+        document_table_index = item.get("document_table_index")
+        project_table = item.get("project_table")
+        project_table_index = item.get("project_table_index")
+        if docid is None:
+            raise RuntimeError("docid不能为空")
+        if document_table is None:
+            raise RuntimeError("document_table不能为空")
+        if document_table_index is None:
+            raise RuntimeError("document_table_index不能为空")
+        if project_table is None:
+            raise RuntimeError("project_table不能为空")
+        if project_table_index is None:
+            raise RuntimeError("project_table_index不能为空")
+
+        _result = flow.dumplicate_comsumer_handle_interface(docid,document_table,document_table_index,project_table,project_table_index)
+        _r.update(_result)
+    except Exception as e:
+        traceback.print_exc()
+        _r["success"] = False
+        _r["msg"] = str(e)
+
+    return jsonify(_r)
+
 def start_project_merge_server():
     app.run(host="0.0.0.0",port="15010",debug=False)
 
@@ -145,10 +175,26 @@ def test():
     # print(merge_document_interface(_proj,b_log=True))
 
 
+def test_dump_and_merge():
+    _d = {
+        "docid":603504420,
+        "document_table":"document_0000",
+        "document_table_index":"document_0000_index",
+        "project_table":"project_0000",
+        "project_table_index":"project_0000_index_formerge"
+    }
+    import requests
+    resp = requests.post("http://8.149.136.64:15010/dump_and_merge",json=_d)
+    print(resp.content.decode("utf-8"))
+
+    #
+    # print(merge_document_interface(_proj,b_log=True))
+
 if __name__ == '__main__':
 
     # start_project_merge_server()
     test()
+    test_dump_and_merge()
 
 
 

Diferenças do arquivo suprimidas por serem muito extensas
+ 42 - 25
BaseDataMaintenance/maintenance/dataflow.py


Diferenças do arquivo suprimidas por serem muito extensas
+ 99 - 15
BaseDataMaintenance/maintenance/dataflow_mq.py


+ 1 - 116
BaseDataMaintenance/model/ots/document_tmp.py

@@ -395,123 +395,8 @@ def turn_document_tmp_status():
     mt = MultiThreadHandler(task_queue,_handle,None,30,ots_client=ots_client)
     mt.run()
 
-def export_extract_ai():
-    filename = r"C:\Users\Administrator\Desktop\extract_ai.txt"
-    list_docid = []
-    from BaseDataMaintenance.dataSource.source import getConnect_ots
-    from BaseDataMaintenance.model.ots.document import Document
-    ots_client = getConnect_ots()
-    with open(filename,"r",encoding="utf8") as f:
-        while 1:
-            line = f.readline()
-            if not line:
-                break
-            line = line.strip()
-            if line!="":
-                try:
-                    docid = line.split(":")[-1]
-                    print(docid)
-                    list_docid.append(int(docid))
-                except Exception as e:
-                    pass
-    list_docid.reverse()
-    import pandas as pd
-    list_data = []
-    for docid in list_docid:
-        partitionkey = docid%500+1
-        _d = {document_tmp_partitionkey:partitionkey,
-              document_tmp_docid:docid}
-        dtmp = Document(_d)
-        if dtmp.fix_columns(ots_client,["extract_json_ai","docchannel"],True):
-            docchannel = dtmp.getProperties().get("docchannel",0)
-            extract_json_ai = dtmp.getProperties().get("extract_json_ai")
-            if docchannel in (52,101,118,119,120,121,122) and extract_json_ai is not None and extract_json_ai!="":
-                extract_ai = json.loads(extract_json_ai)
-                data_d = {
-                    "docid":docid,
-                    "招标人":extract_ai.get("招标信息",{}).get("招标人名称",""),
-                    "项目预算":extract_ai.get("招标信息",{}).get("项目预算",""),
-                    "招标人联系方式":extract_ai.get("招标信息",{}).get("招标人联系方式",""),
-                    "中标信息":extract_ai.get("中标信息","[]")
-                }
-                list_data.append(data_d)
-                if len(list_data)>=200:
-                    break
-    df = pd.DataFrame(list_data)
-    df.to_excel("%s.xlsx"%(filename))
-
-def export_extract_ai1():
-    filename = r"C:\Users\Administrator\Desktop\extract_ai.txt"
-    list_docid = []
-    from BaseDataMaintenance.dataSource.source import getConnect_ots
-    from BaseDataMaintenance.model.ots.document import Document
-    ots_client = getConnect_ots()
-    with open(filename,"r",encoding="utf8") as f:
-        while 1:
-            line = f.readline()
-            if not line:
-                break
-            line = line.strip()
-            if line!="":
-                try:
-                    docid = line.split(":")[-1]
-                    print(docid)
-                    list_docid.append(int(docid))
-                except Exception as e:
-                    pass
-    list_docid.reverse()
-    import pandas as pd
-    list_data = []
-    _count = 0
-    for docid in list_docid:
-        _count += 1
-        print("%d/%d"%(_count,len(list_docid)))
-        partitionkey = docid%500+1
-        _d = {document_tmp_partitionkey:partitionkey,
-              document_tmp_docid:docid}
-        dtmp = Document(_d)
-        if dtmp.fix_columns(ots_client,["extract_json","docchannel"],True):
-            docchannel = dtmp.getProperties().get("docchannel",0)
-            extract_json = dtmp.getProperties().get("extract_json")
-            _extract = json.loads(extract_json)
-            if docchannel in (52,101,118,119,120,121,122):
-                docchannel_dict = {52:"招标公告",
-                                   101:"中标公告",
-                                   118:"废标公告",
-                                   119:"候选人公示",
-                                   120:"合同公告",
-                                   121:"开标记录",
-                                   122:"验收合同"}
-                changed_tenderee = ""
-                changed_win_tenderer = ""
-                changed_win_price = ""
-                prem = _extract.get("prem",{})
-                for pack,pack_value in prem.items():
-                    rolelist = pack_value.get("roleList",[])
-                    for _role in rolelist:
-                        if _role.get("address") is None:
-                            if _role.get("role_name","")=="tenderee":
-                                changed_tenderee = _role.get("role_text","")
-                            if _role.get("role_name","")=="win_tenderer":
-                                changed_win_tenderer = _role.get("role_text","")
-                                changed_win_price = _role.get("role_money",{}).get("money")
-                if changed_tenderee!="" or changed_win_tenderer!="" or changed_win_price!="":
-                    data_d = {
-                        "docid":docid,
-                        "公告类型":docchannel_dict.get(docchannel,""),
-                        "招标人":changed_tenderee,
-                        "中标人":changed_win_tenderer,
-                        "中标金额":changed_win_price,
-                    }
-                    list_data.append(data_d)
-                    # if len(list_data)>=200:
-                    #     break
-    df = pd.DataFrame(list_data)
-    df.to_excel("%s.xlsx"%(filename))
 
 
 if __name__=="__main__":
     # turn_extract_status()
-    # turn_document_tmp_status()
-    # export_extract_ai()
-    export_extract_ai1()
+    turn_document_tmp_status()

Alguns arquivos não foram mostrados porque muitos arquivos mudaram nesse diff