|
@@ -1048,6 +1048,7 @@ def edit_distance_with_diff(s1, s2):
|
|
|
return dp[m][n], diff[::-1] # 将差异部分反转,因为我们是从后往前回溯的
|
|
|
|
|
|
package_number_pattern = re.compile("(?P<name>(((([^承]|^)包|标[段号的包]|分?包|包组|包件)编?号?|子项目|项目类型|项目)[::]?[0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]{1,4}[^\.]?)[^至]?|((?![\.])第?[ⅠⅡⅢⅣⅤⅥⅦ0-9A-Za-z一二三四五六七八九十]{1,4}(包号|标[段号的包]|分?包)))") # 第? 去掉问号 修复 纯木浆8包/箱复印 这种作为包号
|
|
|
+package_number_pattern2 = re.compile("[0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]+") # 提取标/包号,与上面package_number_pattern同步
|
|
|
code_pattern = re.compile("[A-Za-z0-9\-\(\)()【】\.-]+")
|
|
|
num_pattern = re.compile("^\d+(?:\.\d+)?$")
|
|
|
num1_pattern = re.compile("[一二三四五六七八九十A-Za-z]+")
|
|
@@ -1065,8 +1066,13 @@ def check_doctitle(doctitle_refind_less, doctitle_refind_greater,docchannel_less
|
|
|
code_greater = []
|
|
|
doctitle_refind_less = str(doctitle_refind_less).replace("(","(").replace(")",")")
|
|
|
doctitle_refind_greater = str(doctitle_refind_greater).replace("(","(").replace(")",")")
|
|
|
+ if doctitle_refind_less==doctitle_refind_greater:
|
|
|
+ return True
|
|
|
+
|
|
|
+ codes_less.sort(key=lambda x:len(x),reverse=True)
|
|
|
for _c in codes_less:
|
|
|
doctitle_refind_less = str(doctitle_refind_less).replace(_c,"")
|
|
|
+ code_greater.sort(key=lambda x:len(x), reverse=True)
|
|
|
for _c in code_greater:
|
|
|
doctitle_refind_greater = str(doctitle_refind_greater).replace(_c,"")
|
|
|
|
|
@@ -1094,8 +1100,19 @@ def check_doctitle(doctitle_refind_less, doctitle_refind_greater,docchannel_less
|
|
|
if _match is not None:
|
|
|
_pack2 = _match.groupdict()["name"]
|
|
|
if _pack1 is not None and _pack2 is not None:
|
|
|
- if _pack1!=_pack2:
|
|
|
- return False
|
|
|
+ # if _pack1!=_pack2:
|
|
|
+ # return False
|
|
|
+ if _pack1 != _pack2:
|
|
|
+ _pack1_num = re.search(package_number_pattern2,_pack1)
|
|
|
+ _pack1_num = _pack1_num.group() if _pack1_num else ""
|
|
|
+ _pack2_num = re.search(package_number_pattern2,_pack2)
|
|
|
+ _pack2_num = _pack2_num.group() if _pack2_num else ""
|
|
|
+ if _pack1_num and _pack2_num:
|
|
|
+ if _pack1_num != _pack2_num:
|
|
|
+ return False
|
|
|
+ else:
|
|
|
+ return False
|
|
|
+
|
|
|
|
|
|
#check the nums in title
|
|
|
doctitle_refind_less = re.sub(package_number_pattern,"",doctitle_refind_less)
|