|
@@ -62,6 +62,7 @@ def extractCount(extract_dict):
|
|
|
bidding_budget = ""
|
|
|
win_tenderer = ""
|
|
|
win_bid_price = ""
|
|
|
+ linklist_count = 0
|
|
|
for _key in dict_pack.keys():
|
|
|
if "tendereeMoney" in dict_pack[_key] and dict_pack[_key]["tendereeMoney"]!='' and float(dict_pack[_key]["tendereeMoney"])>0:
|
|
|
extract_count += 1
|
|
@@ -100,6 +101,13 @@ def extractCount(extract_dict):
|
|
|
win_bid_price = str(float(_role["role_money"]["money"]))
|
|
|
if _role["role_name"]=="agency":
|
|
|
agency = _role["role_text"]
|
|
|
+ linklist = _role.get("linklist",[])
|
|
|
+ for link in linklist:
|
|
|
+ for l in link:
|
|
|
+ if l!="":
|
|
|
+ linklist_count += 1
|
|
|
+
|
|
|
+ extract_count += linklist_count//2
|
|
|
|
|
|
if project_code!="":
|
|
|
extract_count += 1
|
|
@@ -198,6 +206,9 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
|
|
|
log("get prem done of doc_id%s"%(doc_id))
|
|
|
cost_time["prem"] = round(time.time()-start_time,2)
|
|
|
|
|
|
+ # roles_l = get_role_context(doc_id, list_sentences, list_entitys)
|
|
|
+ # return roles_l
|
|
|
+
|
|
|
# start_time = time.time() # 产品名称及废标原因提取 此处作废 换到后面预测 2022/4/29
|
|
|
# fail = channel_dic['docchannel']['docchannel'] == "废标公告"
|
|
|
# fail_reason = predictor.getPredictor("product").predict(list_sentences,list_entitys,list_articles, fail) #只返回失败原因,产品已加入到Entity类
|
|
@@ -329,7 +340,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
|
|
|
|
|
|
# data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
|
|
|
# data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
|
|
|
- version_date = {'version_date': '2023-07-04'}
|
|
|
+ version_date = {'version_date': '2023-09-13'}
|
|
|
data_res = dict(codeName[0], **prem[0], **channel_dic, **product_attrs[0], **product_attrs[1], **payment_way_dic, **fail_reason, **industry, **district, **candidate_dic, **version_date)
|
|
|
|
|
|
'''最终检查修正招标、中标金额'''
|
|
@@ -382,6 +393,20 @@ def get_ent_context(list_sentences, list_entitys):
|
|
|
rs_list.append("%s %d %.4f; %s ## %s ## %s"%(_entity.entity_type, _entity.label, _entity.values[_entity.label], s[max(0, b-10):b], _entity.entity_text, s[e:e+10]))
|
|
|
return '\n'.join(rs_list)
|
|
|
|
|
|
+def get_role_context(docid, list_sentences, list_entitys):
|
|
|
+ rs_list = []
|
|
|
+ sentences = sorted(list_sentences[0], key=lambda x:x.sentence_index)
|
|
|
+ for list_entity in list_entitys:
|
|
|
+ for _entity in list_entity:
|
|
|
+ if _entity.entity_type in ['org', 'company']:
|
|
|
+ sentence = sentences[_entity.sentence_index]
|
|
|
+ # _span = spanWindow(tokens=sentence.tokens, begin_index=_entity.begin_index, end_index=_entity.end_index, size=20,
|
|
|
+ # center_include=False, word_flag=True, text=_entity.entity_text)
|
|
|
+ _span = get_context(sentence.sentence_text, _entity.wordOffset_begin, _entity.wordOffset_end, size=20, center_include=False)
|
|
|
+ rs_list.append((docid, _entity.entity_type, _entity.label, '%.4f'%_entity.values[_entity.label], _span[0],
|
|
|
+ _entity.entity_text, _span[1]))
|
|
|
+ return rs_list
|
|
|
+
|
|
|
if __name__=="__main__":
|
|
|
import pandas as pd
|
|
|
t1 = time.time()
|