|
@@ -530,11 +530,11 @@ def getPackagesFromArticle(list_sentence,list_entity):
|
|
PackageSet = set()
|
|
PackageSet = set()
|
|
dict_packageCode = dict()
|
|
dict_packageCode = dict()
|
|
|
|
|
|
- package_name_pattern = re.compile("((标[段号的包]|分包)的?(名[称单]?|包名))[::]?([^::]{3,30}?),{1}")
|
|
|
|
- package_N_name_pattern = re.compile("[^承](分?包|标段|标包|标|包|包组|子项目|包件|项目类型)编?号?[::]?[\((]?([0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]){1,2},{1}")
|
|
|
|
- package_number_pattern = re.compile("(([^承](包|标[段号的包]|分?包|包组|包件)编?号?|子项目|项目类型)[::]?[0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]{1,4}[^\.])[^至]?|([^\.]?第?[ⅠⅡⅢⅣⅤⅥⅦ0-9A-Za-z一二三四五六七八九十]{1,4}(包号|标[段号的包]|分?包))")
|
|
|
|
|
|
+ package_name_pattern = re.compile("((标[段号的包]|分包)的?(名[称单]?|包名))[::]*([^,。]{3,30})")
|
|
|
|
+ package_N_name_pattern = re.compile("[^承](分?包|标段|标包|标|包|包组|子项目|包件|项目类型)编?号?[::]?[\((]?([0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]){1,2}")
|
|
|
|
+ package_number_pattern = re.compile("(([^承]*(包|标[段号的包]|分?包|包组|包件)编?号?|子项目|项目类型)[::]?[0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]{1,4}[^\.])[^至]?|([^\.]?第?[ⅠⅡⅢⅣⅤⅥⅦ0-9A-Za-z一二三四五六七八九十]{1,4}(包号|标[段号的包]|分?包))")
|
|
# other_package_pattern = re.compile('(项目名称|物资名称|设备名称|场次名称|标段名称)[::](.{,20}?)(,|项目)') # 新正则识别标段
|
|
# other_package_pattern = re.compile('(项目名称|物资名称|设备名称|场次名称|标段名称)[::](.{,20}?)(,|项目)') # 新正则识别标段
|
|
- other_package_pattern = re.compile('((项目|物资|设备|场次|标段|标的|产品)(名称))[::]([^,。]{,50}?)(,|。)') # # 2020/11/23 大网站规则 调整 package_N_name_pattern, package_N_name_pattern 中的项目 改为 子项目
|
|
|
|
|
|
+ other_package_pattern = re.compile('((项目|物资|设备|场次|标段|标的|产品)(名称))[::]*([^,。]{3,50})') # # 2020/11/23 大网站规则 调整 package_N_name_pattern, package_N_name_pattern 中的项目 改为 子项目
|
|
win_tenderer_pattern = re.compile('(中标人|供应商)[::](.{,25})(,|。)') # 2020/11/23 大网站规则 调整
|
|
win_tenderer_pattern = re.compile('(中标人|供应商)[::](.{,25})(,|。)') # 2020/11/23 大网站规则 调整
|
|
model_pattern = re.compile('(型号|序号)[::]([^,。]{,20})(,|。)') # 2020/11/23 大网站规则 调整
|
|
model_pattern = re.compile('(型号|序号)[::]([^,。]{,20})(,|。)') # 2020/11/23 大网站规则 调整
|
|
number_pattern = re.compile("[0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]{1,4}")
|
|
number_pattern = re.compile("[0-9A-Za-z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦ]{1,4}")
|
|
@@ -594,7 +594,9 @@ def getPackagesFromArticle(list_sentence,list_entity):
|
|
names = re.findall(other_package_pattern, content)
|
|
names = re.findall(other_package_pattern, content)
|
|
N_names = re.findall(package_N_name_pattern,content)
|
|
N_names = re.findall(package_N_name_pattern,content)
|
|
if len(names)==1 and len(N_names)==1:
|
|
if len(names)==1 and len(N_names)==1:
|
|
|
|
+ print("=====",names,N_names)
|
|
package_names.append([names[0][-1],N_names[0][-1]])
|
|
package_names.append([names[0][-1],N_names[0][-1]])
|
|
|
|
+ print("=====",package_names)
|
|
for i in range(len(list_sentence)):
|
|
for i in range(len(list_sentence)):
|
|
PackageList_item = []
|
|
PackageList_item = []
|
|
PackageList_item_scope = []
|
|
PackageList_item_scope = []
|
|
@@ -627,6 +629,7 @@ def getPackagesFromArticle(list_sentence,list_entity):
|
|
PackageList_scope = PackageList_scope+PackageList_item_scope
|
|
PackageList_scope = PackageList_scope+PackageList_item_scope
|
|
PackageList_item.sort(key=lambda x:x["sentence_index"])
|
|
PackageList_item.sort(key=lambda x:x["sentence_index"])
|
|
#PackageList = PackageList+PackageList_item
|
|
#PackageList = PackageList+PackageList_item
|
|
|
|
+ print("=====",PackageList_scope)
|
|
#不作为包
|
|
#不作为包
|
|
# if len(PackageSet)==0:
|
|
# if len(PackageSet)==0:
|
|
# for i in range(len(list_sentence)):
|
|
# for i in range(len(list_sentence)):
|
|
@@ -683,6 +686,7 @@ def getPackagesFromArticle(list_sentence,list_entity):
|
|
PackageList_scope = PackageList_scope+PackageList_item_scope
|
|
PackageList_scope = PackageList_scope+PackageList_item_scope
|
|
PackageList_item.sort(key=lambda x:x["sentence_index"])
|
|
PackageList_item.sort(key=lambda x:x["sentence_index"])
|
|
|
|
|
|
|
|
+
|
|
pattern_punctuation = "[::()\(\),,。;;]"
|
|
pattern_punctuation = "[::()\(\),,。;;]"
|
|
for i in range(len(list_sentence)):
|
|
for i in range(len(list_sentence)):
|
|
for j in range(len(PackageList_scope)):
|
|
for j in range(len(PackageList_scope)):
|