瀏覽代碼

去重时有比地收录的公告,优先取原网的

znj 2 周之前
父節點
當前提交
50be98ab73
共有 1 個文件被更改,包括 3 次插入0 次删除
  1. 3 0
      BaseDataMaintenance/maintenance/dataflow.py

+ 3 - 0
BaseDataMaintenance/maintenance/dataflow.py

@@ -1267,7 +1267,9 @@ class Dataflow():
         dict_source_count = {}
         for _item in base_list:
             _web_source = _item.get(document_tmp_web_source_no)
+            _web_source_name = _item.get(document_tmp_web_source_name)
             _fingerprint = _item.get(document_tmp_fingerprint)
+            _item['from_bidi'] = 1 if _web_source_name=="比地招标" else 0 # 是否为比地收录的公告
             if _web_source is not None:
                 if _web_source not in dict_source_count:
                     dict_source_count[_web_source] = set()
@@ -1292,6 +1294,7 @@ class Dataflow():
             base_list.sort(key=lambda x:x["docid"],reverse=False)
             base_list.sort(key=lambda x:x.get(document_attachment_extract_status,0),reverse=True)
             base_list.sort(key=lambda x:x["extract_count"],reverse=True)
+            base_list.sort(key=lambda x:x["from_bidi"],reverse=False)
             return base_list[0]["docid"]
 
     def save_dumplicate(self,base_list,best_docid,status_from,status_to):