luojiehua 2 месяцев назад
Родитель
Сommit
f5521ca4f9
1 измененных файлов с 130 добавлено и 1 удалено
  1. 130 1
      BaseDataMaintenance/maintenance/dataflow_mq.py

+ 130 - 1
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -854,7 +854,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
 
 
 
-    def start_extract_AI_listener(self,_count=3):
+    def start_extract_AI_listener(self,_count=2):
 
         self.list_extract_ai_comsumer = []
 
@@ -1338,6 +1338,130 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
             return True
 
 
+    def extractCount(self,extract_dict,page_attachments,web_source_name):
+        # time_pattern = "\d{4}\-\d{2}\-\d{2}.*"
+
+        if len(extract_dict.keys()):
+            _extract = extract_dict
+        else:
+            _extract = {}
+        # print(_extract)
+        dict_pack = _extract.get("prem",{})
+        extract_count = 0
+        list_code = _extract.get("code",[])
+        word_count = _extract.get("word_count",{})
+        if word_count.get("正文",0)>500:
+            extract_count += 3
+        if len(list_code)>0:
+            project_code = list_code[0]
+        else:
+            project_code = ""
+        project_name = _extract.get("name","")
+        bidding_budget = ""
+        win_tenderer = ""
+        win_bid_price = ""
+        linklist_count = 0
+        for _key in dict_pack.keys():
+            if "tendereeMoney" in dict_pack[_key] and dict_pack[_key]["tendereeMoney"]!='' and float(dict_pack[_key]["tendereeMoney"])>0:
+                extract_count += 1
+                if bidding_budget=="":
+                    bidding_budget = str(float(dict_pack[_key]["tendereeMoney"]))
+            for _role in dict_pack[_key]["roleList"]:
+                if isinstance(_role,list):
+                    extract_count += 1
+                    if _role[2]!='' and float(_role[2])>0:
+                        extract_count += 1
+                    if _role[0]=="tenderee":
+                        tenderee = _role[1]
+                    if _role[0]=="win_tenderer":
+                        if _role[1] is not None and _role[1]!="":
+                            extract_count += 2
+                        if  win_tenderer=="":
+                            win_tenderer = _role[1]
+                        if _role[2]!='' and float(_role[2])>0:
+                            extract_count += 2
+                            if win_bid_price=="":
+                                win_bid_price = str(float(_role[2]))
+                    if _role[0]=="agency":
+                        agency = _role[1]
+                if isinstance(_role,dict):
+                    extract_count += 1
+                    if "role_money" in _role:
+                        if str(_role["role_money"].get("money",""))!='' and float(_role["role_money"].get("money",""))>0:
+                            extract_count += 1
+                    if _role.get("role_name")=="tenderee":
+                        tenderee = _role["role_text"]
+                    if _role.get("role_name")=="win_tenderer":
+                        if _role["role_text"] is not None and _role["role_text"]!="":
+                            extract_count += 2
+                        if  win_tenderer=="":
+                            win_tenderer = _role["role_text"]
+                        if "role_money" in _role:
+                            if str(_role["role_money"]["money"])!='' and float(_role["role_money"]["money"])>0:
+                                extract_count += 2
+                                if win_bid_price=="":
+                                    win_bid_price = str(float(_role["role_money"]["money"]))
+                    if _role["role_name"]=="agency":
+                        agency = _role["role_text"]
+
+                    linklist = _role.get("linklist",[])
+                    for link in linklist:
+                        for l in link:
+                            if l!="":
+                                linklist_count += 1
+
+        extract_count += linklist_count//2
+
+        if project_code!="":
+            extract_count += 1
+        if project_name!="":
+            extract_count += 1
+
+        if page_attachments is not None and page_attachments!='':
+            try:
+                _attachments = json.loads(page_attachments)
+                set_md5 = set()
+                has_zhaobiao = False
+                has_qingdan = False
+                if len(_attachments)>0:
+                    for _atta in _attachments:
+                        classification = _atta.get("classification","")
+                        set_md5.add(_atta.get("fileMd5"))
+                        if str(classification)=='招标文件':
+                            has_zhaobiao = True
+                        if str(classification)=='采购清单':
+                            has_qingdan = True
+
+
+                    extract_count += len(set_md5)//2+1
+                if has_zhaobiao:
+                    extract_count += 2
+                if has_qingdan:
+                    extract_count += 1
+            except Exception as e:
+                traceback.print_exc()
+                pass
+
+        list_approval_dict = _extract.get("approval",[])
+        for _dict in list_approval_dict:
+            for k,v in _dict.items():
+                if v is not None and v!='' and v!="未知":
+                    extract_count += 1
+
+
+        punish_dict = _extract.get("punish",{})
+        for k,v in punish_dict.items():
+            if v is not None and v!='' and v!="未知":
+                extract_count += 1
+
+        if web_source_name in set_login_web:
+            extract_count -= 3
+
+        product = _extract.get("product","")
+        extract_count += len(str(product).split(","))//5
+
+        return extract_count
+
     def extract_ai_handle(self,_dict,result_queue):
         frame = _dict["frame"]
         conn = _dict["conn"]
@@ -1419,6 +1543,8 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                     _extract = extract_json
             except Exception as e:
                 pass
+        if "extract_count" not in _extract:
+            _extract["extract_count"] = 0
         _extract_ai = {}
         if extract_ai_json is not None:
             try:
@@ -1490,6 +1616,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                     _role_dict["linklist"] = _linklist
                 Project_rolelist.append(_role_dict)
                 _changed = True
+                _extract["extract_count"] += 1
 
         if not has_budget or budget_unexpected:
             _budget = _extract_ai.get("招标信息",{}).get("项目预算","")
@@ -1498,6 +1625,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                 if _budget>0:
                     Project["tendereeMoney"] = str(float(_budget))
                     _changed = True
+                    _extract["extract_count"] += 1
         if not has_win_tenderer or winprice_unexpected:
             list_win = _extract_ai.get("中标信息",[])
             if len(list_win)>0:
@@ -1535,6 +1663,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                             }
 
                         _changed = True
+                        _extract["extract_count"] += 2
                         if _pack=="Project":
                             Project_rolelist.append(_role_dict)
                         else: