|
@@ -1267,7 +1267,9 @@ class Dataflow():
|
|
|
dict_source_count = {}
|
|
|
for _item in base_list:
|
|
|
_web_source = _item.get(document_tmp_web_source_no)
|
|
|
+ _web_source_name = _item.get(document_tmp_web_source_name)
|
|
|
_fingerprint = _item.get(document_tmp_fingerprint)
|
|
|
+ _item['from_bidi'] = 1 if _web_source_name=="比地招标" else 0 # 是否为比地收录的公告
|
|
|
if _web_source is not None:
|
|
|
if _web_source not in dict_source_count:
|
|
|
dict_source_count[_web_source] = set()
|
|
@@ -1292,6 +1294,7 @@ class Dataflow():
|
|
|
base_list.sort(key=lambda x:x["docid"],reverse=False)
|
|
|
base_list.sort(key=lambda x:x.get(document_attachment_extract_status,0),reverse=True)
|
|
|
base_list.sort(key=lambda x:x["extract_count"],reverse=True)
|
|
|
+ base_list.sort(key=lambda x:x["from_bidi"],reverse=False)
|
|
|
return base_list[0]["docid"]
|
|
|
|
|
|
def save_dumplicate(self,base_list,best_docid,status_from,status_to):
|