|
@@ -854,7 +854,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
|
|
|
|
|
|
|
|
|
|
|
|
- def start_extract_AI_listener(self,_count=3):
|
|
|
+ def start_extract_AI_listener(self,_count=2):
|
|
|
|
|
|
self.list_extract_ai_comsumer = []
|
|
|
|
|
@@ -1338,6 +1338,130 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
|
|
|
return True
|
|
|
|
|
|
|
|
|
+ def extractCount(self,extract_dict,page_attachments,web_source_name):
|
|
|
+ # time_pattern = "\d{4}\-\d{2}\-\d{2}.*"
|
|
|
+
|
|
|
+ if len(extract_dict.keys()):
|
|
|
+ _extract = extract_dict
|
|
|
+ else:
|
|
|
+ _extract = {}
|
|
|
+ # print(_extract)
|
|
|
+ dict_pack = _extract.get("prem",{})
|
|
|
+ extract_count = 0
|
|
|
+ list_code = _extract.get("code",[])
|
|
|
+ word_count = _extract.get("word_count",{})
|
|
|
+ if word_count.get("正文",0)>500:
|
|
|
+ extract_count += 3
|
|
|
+ if len(list_code)>0:
|
|
|
+ project_code = list_code[0]
|
|
|
+ else:
|
|
|
+ project_code = ""
|
|
|
+ project_name = _extract.get("name","")
|
|
|
+ bidding_budget = ""
|
|
|
+ win_tenderer = ""
|
|
|
+ win_bid_price = ""
|
|
|
+ linklist_count = 0
|
|
|
+ for _key in dict_pack.keys():
|
|
|
+ if "tendereeMoney" in dict_pack[_key] and dict_pack[_key]["tendereeMoney"]!='' and float(dict_pack[_key]["tendereeMoney"])>0:
|
|
|
+ extract_count += 1
|
|
|
+ if bidding_budget=="":
|
|
|
+ bidding_budget = str(float(dict_pack[_key]["tendereeMoney"]))
|
|
|
+ for _role in dict_pack[_key]["roleList"]:
|
|
|
+ if isinstance(_role,list):
|
|
|
+ extract_count += 1
|
|
|
+ if _role[2]!='' and float(_role[2])>0:
|
|
|
+ extract_count += 1
|
|
|
+ if _role[0]=="tenderee":
|
|
|
+ tenderee = _role[1]
|
|
|
+ if _role[0]=="win_tenderer":
|
|
|
+ if _role[1] is not None and _role[1]!="":
|
|
|
+ extract_count += 2
|
|
|
+ if win_tenderer=="":
|
|
|
+ win_tenderer = _role[1]
|
|
|
+ if _role[2]!='' and float(_role[2])>0:
|
|
|
+ extract_count += 2
|
|
|
+ if win_bid_price=="":
|
|
|
+ win_bid_price = str(float(_role[2]))
|
|
|
+ if _role[0]=="agency":
|
|
|
+ agency = _role[1]
|
|
|
+ if isinstance(_role,dict):
|
|
|
+ extract_count += 1
|
|
|
+ if "role_money" in _role:
|
|
|
+ if str(_role["role_money"].get("money",""))!='' and float(_role["role_money"].get("money",""))>0:
|
|
|
+ extract_count += 1
|
|
|
+ if _role.get("role_name")=="tenderee":
|
|
|
+ tenderee = _role["role_text"]
|
|
|
+ if _role.get("role_name")=="win_tenderer":
|
|
|
+ if _role["role_text"] is not None and _role["role_text"]!="":
|
|
|
+ extract_count += 2
|
|
|
+ if win_tenderer=="":
|
|
|
+ win_tenderer = _role["role_text"]
|
|
|
+ if "role_money" in _role:
|
|
|
+ if str(_role["role_money"]["money"])!='' and float(_role["role_money"]["money"])>0:
|
|
|
+ extract_count += 2
|
|
|
+ if win_bid_price=="":
|
|
|
+ win_bid_price = str(float(_role["role_money"]["money"]))
|
|
|
+ if _role["role_name"]=="agency":
|
|
|
+ agency = _role["role_text"]
|
|
|
+
|
|
|
+ linklist = _role.get("linklist",[])
|
|
|
+ for link in linklist:
|
|
|
+ for l in link:
|
|
|
+ if l!="":
|
|
|
+ linklist_count += 1
|
|
|
+
|
|
|
+ extract_count += linklist_count//2
|
|
|
+
|
|
|
+ if project_code!="":
|
|
|
+ extract_count += 1
|
|
|
+ if project_name!="":
|
|
|
+ extract_count += 1
|
|
|
+
|
|
|
+ if page_attachments is not None and page_attachments!='':
|
|
|
+ try:
|
|
|
+ _attachments = json.loads(page_attachments)
|
|
|
+ set_md5 = set()
|
|
|
+ has_zhaobiao = False
|
|
|
+ has_qingdan = False
|
|
|
+ if len(_attachments)>0:
|
|
|
+ for _atta in _attachments:
|
|
|
+ classification = _atta.get("classification","")
|
|
|
+ set_md5.add(_atta.get("fileMd5"))
|
|
|
+ if str(classification)=='招标文件':
|
|
|
+ has_zhaobiao = True
|
|
|
+ if str(classification)=='采购清单':
|
|
|
+ has_qingdan = True
|
|
|
+
|
|
|
+
|
|
|
+ extract_count += len(set_md5)//2+1
|
|
|
+ if has_zhaobiao:
|
|
|
+ extract_count += 2
|
|
|
+ if has_qingdan:
|
|
|
+ extract_count += 1
|
|
|
+ except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
+ pass
|
|
|
+
|
|
|
+ list_approval_dict = _extract.get("approval",[])
|
|
|
+ for _dict in list_approval_dict:
|
|
|
+ for k,v in _dict.items():
|
|
|
+ if v is not None and v!='' and v!="未知":
|
|
|
+ extract_count += 1
|
|
|
+
|
|
|
+
|
|
|
+ punish_dict = _extract.get("punish",{})
|
|
|
+ for k,v in punish_dict.items():
|
|
|
+ if v is not None and v!='' and v!="未知":
|
|
|
+ extract_count += 1
|
|
|
+
|
|
|
+ if web_source_name in set_login_web:
|
|
|
+ extract_count -= 3
|
|
|
+
|
|
|
+ product = _extract.get("product","")
|
|
|
+ extract_count += len(str(product).split(","))//5
|
|
|
+
|
|
|
+ return extract_count
|
|
|
+
|
|
|
def extract_ai_handle(self,_dict,result_queue):
|
|
|
frame = _dict["frame"]
|
|
|
conn = _dict["conn"]
|
|
@@ -1419,6 +1543,8 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
|
|
|
_extract = extract_json
|
|
|
except Exception as e:
|
|
|
pass
|
|
|
+ if "extract_count" not in _extract:
|
|
|
+ _extract["extract_count"] = 0
|
|
|
_extract_ai = {}
|
|
|
if extract_ai_json is not None:
|
|
|
try:
|
|
@@ -1490,6 +1616,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
|
|
|
_role_dict["linklist"] = _linklist
|
|
|
Project_rolelist.append(_role_dict)
|
|
|
_changed = True
|
|
|
+ _extract["extract_count"] += 1
|
|
|
|
|
|
if not has_budget or budget_unexpected:
|
|
|
_budget = _extract_ai.get("招标信息",{}).get("项目预算","")
|
|
@@ -1498,6 +1625,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
|
|
|
if _budget>0:
|
|
|
Project["tendereeMoney"] = str(float(_budget))
|
|
|
_changed = True
|
|
|
+ _extract["extract_count"] += 1
|
|
|
if not has_win_tenderer or winprice_unexpected:
|
|
|
list_win = _extract_ai.get("中标信息",[])
|
|
|
if len(list_win)>0:
|
|
@@ -1535,6 +1663,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
|
|
|
}
|
|
|
|
|
|
_changed = True
|
|
|
+ _extract["extract_count"] += 2
|
|
|
if _pack=="Project":
|
|
|
Project_rolelist.append(_role_dict)
|
|
|
else:
|