|
@@ -250,6 +250,10 @@ def repair_entity(prem,district_dict,list_articles):
|
|
|
|
|
|
def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="",original_docchannel='',page_attachments='[]',**kwargs):
|
|
def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="",original_docchannel='',page_attachments='[]',**kwargs):
|
|
cost_time = dict()
|
|
cost_time = dict()
|
|
|
|
+ if web_source_no == None:
|
|
|
|
+ web_source_no = ''
|
|
|
|
+ if web_source_name == None:
|
|
|
|
+ web_source_name = ''
|
|
|
|
|
|
start_time = time.time()
|
|
start_time = time.time()
|
|
log("start process doc %s"%(str(doc_id)))
|
|
log("start process doc %s"%(str(doc_id)))
|
|
@@ -446,7 +450,7 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
|
|
|
|
|
|
# data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
|
|
# data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
|
|
# data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
|
|
# data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
|
|
- version_date = {'version_date': '2024-09-26'}
|
|
|
|
|
|
+ version_date = {'version_date': '2024-09-29'}
|
|
data_res = dict(codeName[0], **prem[0], **channel_dic, **product_attrs[0], **product_attrs[1], **payment_way_dic, **fail_reason, **industry, **district, **candidate_dic, **version_date, **all_moneys, **pb_json)
|
|
data_res = dict(codeName[0], **prem[0], **channel_dic, **product_attrs[0], **product_attrs[1], **payment_way_dic, **fail_reason, **industry, **district, **candidate_dic, **version_date, **all_moneys, **pb_json)
|
|
|
|
|
|
if original_docchannel == 302:
|
|
if original_docchannel == 302:
|
|
@@ -503,12 +507,13 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
|
|
# 投标地址
|
|
# 投标地址
|
|
data_res['addr_dic']['addr_bidsend'] = addr_bidsend_text
|
|
data_res['addr_dic']['addr_bidsend'] = addr_bidsend_text
|
|
# 字数
|
|
# 字数
|
|
- if '##attachment##' in list_articles[0].content:
|
|
|
|
- text_main, text_attn = list_articles[0].content.split('##attachment##')
|
|
|
|
- else:
|
|
|
|
- text_main = list_articles[0].content
|
|
|
|
- text_attn = ""
|
|
|
|
- data_res['word_count'] = {'正文': len(text_main), '附件': len(text_attn)}
|
|
|
|
|
|
+ text_main, text_attn = 0, 0
|
|
|
|
+ for sentence in list_sentences[0]:
|
|
|
|
+ if sentence.in_attachment:
|
|
|
|
+ text_attn += len(sentence.sentence_text)
|
|
|
|
+ else:
|
|
|
|
+ text_main += len(sentence.sentence_text)
|
|
|
|
+ data_res['word_count'] = {'正文': text_main, '附件': text_attn}
|
|
# 限制产品数量
|
|
# 限制产品数量
|
|
data_res['product'] = data_res['product'][:500]
|
|
data_res['product'] = data_res['product'][:500]
|
|
data_res['product_attrs']['data'] = data_res['product_attrs']['data'][:500]
|
|
data_res['product_attrs']['data'] = data_res['product_attrs']['data'][:500]
|