浏览代码

公告去重合并规则优化

znj 1 周之前
父节点
当前提交
676ce31535
共有 2 个文件被更改,包括 5 次插入4 次删除
  1. 3 2
      BaseDataMaintenance/maintenance/dataflow.py
  2. 2 2
      BaseDataMaintenance/maxcompute/documentMerge.py

+ 3 - 2
BaseDataMaintenance/maintenance/dataflow.py

@@ -5,6 +5,7 @@ from BaseDataMaintenance.common.multiThread import MultiThreadHandler
 from BaseDataMaintenance.common.multiProcess import MultiHandler
 from queue import Queue
 from multiprocessing import Queue as PQueue
+from multiprocessing import Process
 
 from BaseDataMaintenance.model.ots.document_tmp import *
 from BaseDataMaintenance.model.ots.attachment import *
@@ -4032,8 +4033,8 @@ class Dataflow_dumplicate(Dataflow):
                 for _data in list_merge_data:
                     _time = time.time()
                     _check,_prob = check_merge_rule(_proj,_data,b_log=b_log,return_prob=True,project_uuids=project_uuids)
-                    # if b_log:
-                    #     log("merge rule res: %s prob: %s"%(str(_check),str(_prob)))
+                    if b_log:
+                        log("merge rule res: %s prob: %s"%(str(_check),str(_prob)))
                     projects_check_rule_time += time.time()-_time
                     if _check:
                         list_check_data.append([_data,_prob])

+ 2 - 2
BaseDataMaintenance/maxcompute/documentMerge.py

@@ -2687,10 +2687,10 @@ def check_project_codes_merge(list_code,list_code_to_merge,b_log):
     has_same = False
     has_similar = False
     for _c in list_code[:100]:
-        _c = str(_c).replace("【", "[").replace("】", "]")
+        _c = str(_c).replace("【", "[").replace("】", "]").replace("(","(").replace(")",")")
         _c = "".join(re.findall("[\u4e00-\u9fa5a-zA-Z\d]+", _c.upper()))
         for _c1 in list_code_to_merge[:100]:
-            _c1 = str(_c1).replace("【","[").replace("】","]")
+            _c1 = str(_c1).replace("【","[").replace("】","]").replace("(","(").replace(")",")")
             _c1 = "".join(re.findall("[\u4e00-\u9fa5a-zA-Z\d]+", _c1.upper()))
             _simi = getSimilarityOfString(_c,_c1,3)
             if _simi==1: