|
@@ -57,11 +57,13 @@ def extract_sentence_list(sentence_list):
|
|
|
requirement_pattern = "(采购需求|需求分析|项目说明|(采购|合同|招标|询比?价|项目|服务|工程|标的|需求|建设)(的?(主要|简要|基本|具体|名称及))?" \
|
|
|
"(内容|概况|概述|范围|信息|规模|简介|介绍|说明|摘要|情况)([及与和]((其它|\w{,2})[要需]求|发包范围|数量))?" \
|
|
|
"|招标项目技术要求|服务要求|服务需求|项目目标|需求内容如下|建设规模)为?([::,]|$)"
|
|
|
-aptitude_pattern = "(资格要求|资质要求)([::,]|$)"
|
|
|
+aptitude_pattern = "((资格|资质)[的及]?要求|竞买资格及要求|供应商报价须知)([::,]|$)|(竞买|竞买人|竞投人)?资格(条件)?:"
|
|
|
addr_bidopen_pattern = "([开评]标|开启|评选|比选|磋商|遴选|寻源|采购|招标|竞价|议价|委托|询比?价|比价|谈判|邀标|邀请|洽谈|约谈|选取|抽取|抽选|递交\w{,4}文件)[))]?(时间[与及和、])?(地址|地点)([与及和、]时间)?([::,]|$)|开启([::,]|$)"
|
|
|
addr_bidsend_pattern = "((\w{,4}文件)?(提交|递交)(\w{,4}文件)?|投标)(截止时间[与及和、])?地[点址]([与及和、]截止时间)?([::,]|$)"
|
|
|
-pinmu_name_pattern = "采购品目名称:(\w{2,50})[,。]"
|
|
|
+pinmu_name_pattern = "采购品目名称([::,]|$)"
|
|
|
out_lines = []
|
|
|
+policy_pattern = "《.+?(通知|办法|条例|规定|规程|规范|须知|规则|标准|细则|意见|协议|条件|要求|手册|法典|方案|指南|指引|法)》"
|
|
|
+not_policy_pattern = "(表|函|书|证|\d页|公告|合同|文件|清单)》$|采购合同|响应方须知|响应文件格式|营业执照|开标一览|采购需求"
|
|
|
|
|
|
def extract_parameters(parse_document):
|
|
|
'''
|
|
@@ -76,6 +78,7 @@ def extract_parameters(parse_document):
|
|
|
addr_bidsend_text = '' # 投标地址
|
|
|
requirement_scope = [] # 采购内容始末位置
|
|
|
pinmu_name = '' # 品目名称
|
|
|
+ list_policy = [] # 政策法规
|
|
|
|
|
|
_find_count = 0
|
|
|
_data_i = -1
|
|
@@ -161,7 +164,23 @@ def extract_parameters(parse_document):
|
|
|
_data_i += len(childs)
|
|
|
_data_i -= 1
|
|
|
elif re.search(pinmu_name_pattern, _text):
|
|
|
- pinmu_name += re.search(pinmu_name_pattern, _text).group(1)
|
|
|
+ childs = get_childs([_data], max_depth=1)
|
|
|
+ for c in childs:
|
|
|
+ pinmu_name += c["text"]
|
|
|
+ _data_i += len(childs)
|
|
|
+ _data_i -= 1
|
|
|
+ _data_i = -1
|
|
|
+ while _data_i<len(list_data)-1:
|
|
|
+ _data_i += 1
|
|
|
+ _data = list_data[_data_i]
|
|
|
+ _type = _data["type"]
|
|
|
+ _text = _data["text"].strip()
|
|
|
+ # print(_data.keys())
|
|
|
+ if _type=="sentence":
|
|
|
+ for it in re.finditer(policy_pattern, _text):
|
|
|
+ if it not in list_policy:
|
|
|
+ list_policy.append(it.group(0))
|
|
|
+
|
|
|
if re.search('时间:', addr_bidopen_text) and re.search('([开评]标|开启|评选|比选|递交\w{,4}文件)?地[点址]([((]网址[))])?:[^,;。]{2,100}[,;。]', addr_bidopen_text):
|
|
|
for ser in re.finditer('([开评]标|开启|评选|比选|递交\w{,4}文件)?地[点址]([((]网址[))])?:[^,;。]{2,100}[,;。]', addr_bidopen_text):
|
|
|
b, e = ser.span()
|
|
@@ -172,7 +191,12 @@ def extract_parameters(parse_document):
|
|
|
for ser in re.finditer('((\w{,4}文件)?(提交|递交)(\w{,4}文件)?|投标)?地[点址]([((]网址[))])?:[^,;。]{2,100}[,;。]', addr_bidsend_text):
|
|
|
b, e = ser.span()
|
|
|
addr_bidsend_text = addr_bidsend_text[b:e]
|
|
|
- return requirement_text, aptitude_text, addr_bidopen_text, addr_bidsend_text, out_lines, requirement_scope, pinmu_name
|
|
|
+ ser = re.search(pinmu_name_pattern, pinmu_name)
|
|
|
+ if ser:
|
|
|
+ pinmu_name = pinmu_name[ser.end():]
|
|
|
+ if re.search('[^\w]$', pinmu_name):
|
|
|
+ pinmu_name = pinmu_name[:-1]
|
|
|
+ return requirement_text, aptitude_text, addr_bidopen_text, addr_bidsend_text, out_lines, requirement_scope, pinmu_name, list_policy
|
|
|
|
|
|
def extract_addr(content):
|
|
|
'''
|