Browse Source

公告去重合并规则优化

znj 1 week ago
parent
commit
676ce31535

+ 3 - 2
BaseDataMaintenance/maintenance/dataflow.py

@@ -5,6 +5,7 @@ from BaseDataMaintenance.common.multiThread import MultiThreadHandler
 from BaseDataMaintenance.common.multiProcess import MultiHandler
 from BaseDataMaintenance.common.multiProcess import MultiHandler
 from queue import Queue
 from queue import Queue
 from multiprocessing import Queue as PQueue
 from multiprocessing import Queue as PQueue
+from multiprocessing import Process
 
 
 from BaseDataMaintenance.model.ots.document_tmp import *
 from BaseDataMaintenance.model.ots.document_tmp import *
 from BaseDataMaintenance.model.ots.attachment import *
 from BaseDataMaintenance.model.ots.attachment import *
@@ -4032,8 +4033,8 @@ class Dataflow_dumplicate(Dataflow):
                 for _data in list_merge_data:
                 for _data in list_merge_data:
                     _time = time.time()
                     _time = time.time()
                     _check,_prob = check_merge_rule(_proj,_data,b_log=b_log,return_prob=True,project_uuids=project_uuids)
                     _check,_prob = check_merge_rule(_proj,_data,b_log=b_log,return_prob=True,project_uuids=project_uuids)
-                    # if b_log:
-                    #     log("merge rule res: %s prob: %s"%(str(_check),str(_prob)))
+                    if b_log:
+                        log("merge rule res: %s prob: %s"%(str(_check),str(_prob)))
                     projects_check_rule_time += time.time()-_time
                     projects_check_rule_time += time.time()-_time
                     if _check:
                     if _check:
                         list_check_data.append([_data,_prob])
                         list_check_data.append([_data,_prob])

+ 2 - 2
BaseDataMaintenance/maxcompute/documentMerge.py

@@ -2687,10 +2687,10 @@ def check_project_codes_merge(list_code,list_code_to_merge,b_log):
     has_same = False
     has_same = False
     has_similar = False
     has_similar = False
     for _c in list_code[:100]:
     for _c in list_code[:100]:
-        _c = str(_c).replace("【", "[").replace("】", "]")
+        _c = str(_c).replace("【", "[").replace("】", "]").replace("(","(").replace(")",")")
         _c = "".join(re.findall("[\u4e00-\u9fa5a-zA-Z\d]+", _c.upper()))
         _c = "".join(re.findall("[\u4e00-\u9fa5a-zA-Z\d]+", _c.upper()))
         for _c1 in list_code_to_merge[:100]:
         for _c1 in list_code_to_merge[:100]:
-            _c1 = str(_c1).replace("【","[").replace("】","]")
+            _c1 = str(_c1).replace("【","[").replace("】","]").replace("(","(").replace(")",")")
             _c1 = "".join(re.findall("[\u4e00-\u9fa5a-zA-Z\d]+", _c1.upper()))
             _c1 = "".join(re.findall("[\u4e00-\u9fa5a-zA-Z\d]+", _c1.upper()))
             _simi = getSimilarityOfString(_c,_c1,3)
             _simi = getSimilarityOfString(_c,_c1,3)
             if _simi==1:
             if _simi==1: