|
@@ -683,6 +683,8 @@ class PREMPredict():
|
|
text = text_list[i]
|
|
text = text_list[i]
|
|
if label in [0, 1, 2, 3, 4] and values[label] < 0.5: # 小于阈值的设为其他,让后面的规则召回重新判断
|
|
if label in [0, 1, 2, 3, 4] and values[label] < 0.5: # 小于阈值的设为其他,让后面的规则召回重新判断
|
|
label = 5
|
|
label = 5
|
|
|
|
+ elif label in [2,3,4] and re.search('序号:\d+,', text):
|
|
|
|
+ label = 5
|
|
elif label == 2:
|
|
elif label == 2:
|
|
if re.search('中标单位和.{,25}签订合同', text):
|
|
if re.search('中标单位和.{,25}签订合同', text):
|
|
label = 0
|
|
label = 0
|
|
@@ -1348,7 +1350,7 @@ class RoleRulePredictor():
|
|
_weight = _group.split("_")[2] if len(_group.split("_"))==3 else ""
|
|
_weight = _group.split("_")[2] if len(_group.split("_"))==3 else ""
|
|
# _label = {"tenderee": 0, "agency": 1, "winTenderer": 2,
|
|
# _label = {"tenderee": 0, "agency": 1, "winTenderer": 2,
|
|
# "secondTenderer": 3, "thirdTenderer": 4}.get(_role)
|
|
# "secondTenderer": 3, "thirdTenderer": 4}.get(_role)
|
|
- if _i_span == 0 and _direct == "left" and re.search('各.{,5}供应商|尊敬的供应商|业绩|拟招|(交易|采购|招标)服务(单位|机构)|第[四五六七4567]|是否中标:否', #135463002 拟招一家供应商为宜宾市第三人民医院、李庄同济医院院区提供消防维保服务
|
|
|
|
|
|
+ if _i_span == 0 and _direct == "left" and re.search('各.{,5}供应商|尊敬的供应商|业绩|拟招|(交易|采购|招标)服务(单位|机构)|第[四五六七4567]|是否中标:否|序号:\d+', #135463002 拟招一家供应商为宜宾市第三人民医院、李庄同济医院院区提供消防维保服务
|
|
list_spans[0]) == None: # 2021/12/22 修正错误中标召回 例子208668937
|
|
list_spans[0]) == None: # 2021/12/22 修正错误中标召回 例子208668937
|
|
_flag = True
|
|
_flag = True
|
|
_label = {"tenderee": 0, "agency": 1, "winTenderer": 2,
|
|
_label = {"tenderee": 0, "agency": 1, "winTenderer": 2,
|
|
@@ -2248,6 +2250,7 @@ class ProductAttributesPredictor():
|
|
for td in tds:
|
|
for td in tds:
|
|
td_text = re.sub('\s', '', td.get_text())
|
|
td_text = re.sub('\s', '', td.get_text())
|
|
td_text = td_text.replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '/').replace('"', '') # 修复272144312 # 产品单价数量提取结果有特殊符号\ 气动执行装置备件\密封组件\NBR+PT
|
|
td_text = td_text.replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '/').replace('"', '') # 修复272144312 # 产品单价数量提取结果有特殊符号\ 气动执行装置备件\密封组件\NBR+PT
|
|
|
|
+ td_text = td_text.replace("(", "(").replace(")", ")").replace(':', ':')
|
|
tr_line.append(td_text)
|
|
tr_line.append(td_text)
|
|
inner_table.append(tr_line)
|
|
inner_table.append(tr_line)
|
|
return inner_table
|
|
return inner_table
|
|
@@ -2459,9 +2462,10 @@ class ProductAttributesPredictor():
|
|
:return: 表头所在列序号,是否表头,表头内容
|
|
:return: 表头所在列序号,是否表头,表头内容
|
|
'''
|
|
'''
|
|
flag = False
|
|
flag = False
|
|
- header_dic = {'名称': '', '数量': '', '单价': '', '品牌': '', '规格': '', '需求': '', '预算': '', '时间': ''}
|
|
|
|
|
|
+ header_dic = {'名称': '', '数量': '', '单位': '', '单价': '', '品牌': '', '规格': '', '需求': '', '预算': '', '时间': ''}
|
|
product = "" # 产品
|
|
product = "" # 产品
|
|
quantity = "" # 数量
|
|
quantity = "" # 数量
|
|
|
|
+ quantity_unit = "" # 数量单位
|
|
unitPrice = "" # 单价
|
|
unitPrice = "" # 单价
|
|
brand = "" # 品牌
|
|
brand = "" # 品牌
|
|
specs = "" # 规格
|
|
specs = "" # 规格
|
|
@@ -2489,9 +2493,12 @@ class ProductAttributesPredictor():
|
|
for j in range(i + 1, len(items)):
|
|
for j in range(i + 1, len(items)):
|
|
if len(items[j]) > 20 and len(re.sub('[\((].*[)\)]|[^\u4e00-\u9fa5]', '', items[j])) > 10:
|
|
if len(items[j]) > 20 and len(re.sub('[\((].*[)\)]|[^\u4e00-\u9fa5]', '', items[j])) > 10:
|
|
continue
|
|
continue
|
|
- if header_dic['数量']=="" and re.search('数量|采购量', items[j]):
|
|
|
|
|
|
+ if header_dic['数量']=="" and re.search('数量|采购量', items[j]) and re.search('单价|用途|要求|规格|型号|运输|承运', items[j])==None:
|
|
header_dic['数量'] = j
|
|
header_dic['数量'] = j
|
|
quantity = items[j]
|
|
quantity = items[j]
|
|
|
|
+ elif header_dic['单位']=="" and re.search('^(数量单位|计量单位|单位)$', items[j]):
|
|
|
|
+ header_dic['单位'] = j
|
|
|
|
+ quantity_unit = items[j]
|
|
elif re.search('单价', items[j]):
|
|
elif re.search('单价', items[j]):
|
|
header_dic['单价'] = j
|
|
header_dic['单价'] = j
|
|
unitPrice = items[j]
|
|
unitPrice = items[j]
|
|
@@ -2518,9 +2525,9 @@ class ProductAttributesPredictor():
|
|
if it != "":
|
|
if it != "":
|
|
num += 1
|
|
num += 1
|
|
if num >=2:
|
|
if num >=2:
|
|
- return header_dic, flag, (product, quantity, unitPrice, brand, specs), (product, demand, budget, order_time)
|
|
|
|
|
|
+ return header_dic, flag, (product, quantity, quantity_unit, unitPrice, brand, specs), (product, demand, budget, order_time)
|
|
flag = False
|
|
flag = False
|
|
- return header_dic, flag, (product, quantity, unitPrice, brand, specs), (product, demand, budget, order_time)
|
|
|
|
|
|
+ return header_dic, flag, (product, quantity, quantity_unit, unitPrice, brand, specs), (product, demand, budget, order_time)
|
|
|
|
|
|
def predict(self, docid='', html='', page_time=""):
|
|
def predict(self, docid='', html='', page_time=""):
|
|
'''
|
|
'''
|
|
@@ -2553,6 +2560,7 @@ class ProductAttributesPredictor():
|
|
# print(inner_table)
|
|
# print(inner_table)
|
|
i = 0
|
|
i = 0
|
|
found_header = False
|
|
found_header = False
|
|
|
|
+ header_quan_unit = "" # 数量表头 包含单位
|
|
header_colnum = 0
|
|
header_colnum = 0
|
|
if flag_yx:
|
|
if flag_yx:
|
|
col0_l = []
|
|
col0_l = []
|
|
@@ -2613,6 +2621,7 @@ class ProductAttributesPredictor():
|
|
continue
|
|
continue
|
|
product = "" # 产品
|
|
product = "" # 产品
|
|
quantity = "" # 数量
|
|
quantity = "" # 数量
|
|
|
|
+ quantity_unit = "" # 数量单位
|
|
unitPrice = "" # 单价
|
|
unitPrice = "" # 单价
|
|
brand = "" # 品牌
|
|
brand = "" # 品牌
|
|
specs = "" # 规格
|
|
specs = "" # 规格
|
|
@@ -2625,6 +2634,13 @@ class ProductAttributesPredictor():
|
|
if len(set([re.sub('[::]','',td) for td in tds]) & self.header_set) > len(tds) * 0.2:
|
|
if len(set([re.sub('[::]','',td) for td in tds]) & self.header_set) > len(tds) * 0.2:
|
|
# if len(set(tds) & self.header_set) > len(tds) * 0.2:
|
|
# if len(set(tds) & self.header_set) > len(tds) * 0.2:
|
|
header_dic, found_header, header_list, header_list2 = self.find_header(tds, self.p1, self.p2)
|
|
header_dic, found_header, header_list, header_list2 = self.find_header(tds, self.p1, self.p2)
|
|
|
|
+ if found_header and isinstance(header_list, tuple) and len(header_list) > 2: # 获取表头中的 数量单位
|
|
|
|
+ quantity_header = header_list[1].replace('单位:', '')
|
|
|
|
+ if re.search('(([\w/]{,5}))', quantity_header):
|
|
|
|
+ header_quan_unit = re.search('(([\w/]{,5}))', quantity_header).group(1)
|
|
|
|
+ else:
|
|
|
|
+ header_quan_unit = ""
|
|
|
|
+
|
|
if found_header and len(headers)<1: # 只保留出现的第一个表头
|
|
if found_header and len(headers)<1: # 只保留出现的第一个表头
|
|
headers.append('_'.join(header_list))
|
|
headers.append('_'.join(header_list))
|
|
headers_demand.append('_'.join(header_list2))
|
|
headers_demand.append('_'.join(header_list2))
|
|
@@ -2638,6 +2654,7 @@ class ProductAttributesPredictor():
|
|
continue
|
|
continue
|
|
id1 = header_dic.get('名称', "")
|
|
id1 = header_dic.get('名称', "")
|
|
id2 = header_dic.get('数量', "")
|
|
id2 = header_dic.get('数量', "")
|
|
|
|
+ id2_2 = header_dic.get('单位', "")
|
|
id3 = header_dic.get('单价', "")
|
|
id3 = header_dic.get('单价', "")
|
|
id4 = header_dic.get('品牌', "")
|
|
id4 = header_dic.get('品牌', "")
|
|
id5 = header_dic.get('规格', "")
|
|
id5 = header_dic.get('规格', "")
|
|
@@ -2651,8 +2668,20 @@ class ProductAttributesPredictor():
|
|
if id2 != "":
|
|
if id2 != "":
|
|
if re.search('\d+|[壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]', tds[id2]):
|
|
if re.search('\d+|[壹贰叁肆伍陆柒捌玖拾一二三四五六七八九十]', tds[id2]):
|
|
quantity = tds[id2]
|
|
quantity = tds[id2]
|
|
- else:
|
|
|
|
- quantity = ""
|
|
|
|
|
|
+ quantity = re.sub('[()(),,约]', '', quantity)
|
|
|
|
+ quantity = re.sub('[一壹]', '1', quantity)
|
|
|
|
+ ser = re.search('^(\d+\.?\d*)([㎡\w/]{,5})', quantity)
|
|
|
|
+ if ser:
|
|
|
|
+ quantity = str(ser.group(1))
|
|
|
|
+ quantity_unit = ser.group(2)
|
|
|
|
+ if quantity_unit == "" and header_quan_unit != "":
|
|
|
|
+ quantity_unit = header_quan_unit
|
|
|
|
+ else:
|
|
|
|
+ quantity = ""
|
|
|
|
+ quantity_unit = ""
|
|
|
|
+ if id2_2 != "":
|
|
|
|
+ if re.search('^\w{1,4}$', tds[id2_2]):
|
|
|
|
+ quantity_unit = tds[id2_2]
|
|
if id3 != "":
|
|
if id3 != "":
|
|
if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', tds[id3]):
|
|
if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', tds[id3]):
|
|
_unitPrice = tds[id3]
|
|
_unitPrice = tds[id3]
|
|
@@ -2697,7 +2726,7 @@ class ProductAttributesPredictor():
|
|
if len(unitPrice) > 15 or len(product)>100: # 单价大于15位数或 产品名称长于100字
|
|
if len(unitPrice) > 15 or len(product)>100: # 单价大于15位数或 产品名称长于100字
|
|
i += 1
|
|
i += 1
|
|
continue
|
|
continue
|
|
- link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
|
|
|
|
|
|
+ link = {'product': product, 'quantity': quantity, 'quantity_unit': quantity_unit, 'unitPrice': unitPrice,
|
|
'brand': brand[:50], 'specs':specs}
|
|
'brand': brand[:50], 'specs':specs}
|
|
if link not in product_link:
|
|
if link not in product_link:
|
|
product_link.append(link)
|
|
product_link.append(link)
|
|
@@ -4238,7 +4267,7 @@ class DistrictPredictor():
|
|
self.short2id = short2id
|
|
self.short2id = short2id
|
|
self.full2id = full2id
|
|
self.full2id = full2id
|
|
|
|
|
|
- def predict(self, project_name, prem, title, list_articles, web_source_name = ""):
|
|
|
|
|
|
+ def predict(self, project_name, prem, title, list_articles, web_source_name = "", list_entitys=""):
|
|
'''
|
|
'''
|
|
先匹配 project_name+tenderee+tenderee_address, 如果缺少省或市 再匹配 title+content
|
|
先匹配 project_name+tenderee+tenderee_address, 如果缺少省或市 再匹配 title+content
|
|
:param project_name:
|
|
:param project_name:
|
|
@@ -4271,8 +4300,8 @@ class DistrictPredictor():
|
|
for _id in self.full2id[name]:
|
|
for _id in self.full2id[name]:
|
|
area = self.dist_dic[_id]['area'] + [''] * (3 - len(self.dist_dic[_id]['area']))
|
|
area = self.dist_dic[_id]['area'] + [''] * (3 - len(self.dist_dic[_id]['area']))
|
|
# score_l.append([_id, score] + area)
|
|
# score_l.append([_id, score] + area)
|
|
- w = self.dist_dic[_id]['权重']
|
|
|
|
- score_l.append([_id, score + w] + area)
|
|
|
|
|
|
+ # w = self.dist_dic[_id]['权重']
|
|
|
|
+ score_l.append([_id, score + 1] + area) # 匹配全称的加1 ,不加权重,因为权重某些赋值不好
|
|
|
|
|
|
flag = 0
|
|
flag = 0
|
|
for it in re.finditer(self.short_name, text):
|
|
for it in re.finditer(self.short_name, text):
|
|
@@ -4286,14 +4315,22 @@ class DistrictPredictor():
|
|
area = self.dist_dic[_id]['area'] + [''] * (3 - len(self.dist_dic[_id]['area']))
|
|
area = self.dist_dic[_id]['area'] + [''] * (3 - len(self.dist_dic[_id]['area']))
|
|
if area[0] in ['2', '16', '20', '30']:
|
|
if area[0] in ['2', '16', '20', '30']:
|
|
_type += 10
|
|
_type += 10
|
|
|
|
+ if w < 1 and it.end() < len(text) and text[it.end()] in ['省', '市', '县']: # 如果简称后面 有省市县权重改为1
|
|
|
|
+ w = 1
|
|
score2 += w
|
|
score2 += w
|
|
if _id not in id_set:
|
|
if _id not in id_set:
|
|
if _type == 20:
|
|
if _type == 20:
|
|
type_w = 3
|
|
type_w = 3
|
|
elif _type == 30:
|
|
elif _type == 30:
|
|
- type_w = 2
|
|
|
|
|
|
+ if it.start()>3 and text[it.start()-1] == '市': # 城市后面 简称不能作为市
|
|
|
|
+ type_w = 0
|
|
|
|
+ else:
|
|
|
|
+ type_w = 2
|
|
else:
|
|
else:
|
|
- type_w = 1
|
|
|
|
|
|
+ if it.end()<len(text) and text[it.end()] == '市': # 简称后面 有市字 改为市级
|
|
|
|
+ type_w = 2
|
|
|
|
+ else:
|
|
|
|
+ type_w = 1
|
|
id_set.add(_id)
|
|
id_set.add(_id)
|
|
score2 += w * type_w
|
|
score2 += w * type_w
|
|
score_l.append([_id, score * w + score2] + area)
|
|
score_l.append([_id, score * w + score2] + area)
|
|
@@ -4344,29 +4381,53 @@ class DistrictPredictor():
|
|
3:地址直接在招标人后面 招标人:xxx,地址:xxx
|
|
3:地址直接在招标人后面 招标人:xxx,地址:xxx
|
|
4:招标、代理一起,两个地址一起 招标人:xxx, 代理人:xxx, 地址:xxx, 地址:xxx.
|
|
4:招标、代理一起,两个地址一起 招标人:xxx, 代理人:xxx, 地址:xxx, 地址:xxx.
|
|
'''
|
|
'''
|
|
- p3 = '(招标|采购)(人|单位)(信息:)?(名称)?:[\w()]{4,15},(联系)?地址:(?P<addr>(\w{2,8}[省市州区县][^\w]*)+)'
|
|
|
|
- p4 = '(招标|采购)(人|单位)(名称)?:[\w()]{4,15},(招标|采购)?代理(人|机构)(名称)?:[\w()]{4,15},(联系)?地址:(?P<addr>(\w{2,8}[省市州区县][^\w]*)+)'
|
|
|
|
|
|
+ p3 = '(招标|采购|甲)(人|方|单位)(信息:|(甲方))?(名称)?:[\w()]{4,15},(联系)?地址:(?P<addr>(\w{2,8}[省市州区县][^\w]*)+)'
|
|
|
|
+ p4 = '(招标|采购|甲)(人|方|单位)(信息:|(甲方))?(名称)?:[\w()]{4,15},(招标|采购)?代理(人|机构)(名称)?:[\w()]{4,15},(联系)?地址:(?P<addr>(\w{2,8}[省市州区县][^\w]*)+)'
|
|
|
|
+ p5 = '(采购|招标)(人|单位)(联系)?地址:(?P<addr>(\w{2,8}[省市州区县][^\w]*)+)'
|
|
if re.search(p3, text):
|
|
if re.search(p3, text):
|
|
return re.search(p3, text).group('addr')
|
|
return re.search(p3, text).group('addr')
|
|
elif re.search(p4, text):
|
|
elif re.search(p4, text):
|
|
return re.search(p4, text).group('addr')
|
|
return re.search(p4, text).group('addr')
|
|
|
|
+ elif re.search(p5, text):
|
|
|
|
+ return re.search(p5, text).group('addr')
|
|
else:
|
|
else:
|
|
return ''
|
|
return ''
|
|
|
|
|
|
def get_project_addr(text):
|
|
def get_project_addr(text):
|
|
- p1 = '(项目|建设|工程|服务|交货|送货|收货|)(地址|地点|位置|所在地区?):(\w{2,8}[省市州区县][^\w]*)+'
|
|
|
|
|
|
+ p1 = '(项目|建设|工程|服务|交货|送货|收货)(地址|地点|位置|所在地区?):(\w{2,8}[省市州区县][^\w]*)+'
|
|
if re.search(p1, text):
|
|
if re.search(p1, text):
|
|
return re.search(p1, text).group(0)
|
|
return re.search(p1, text).group(0)
|
|
else:
|
|
else:
|
|
return ''
|
|
return ''
|
|
|
|
|
|
def get_bid_addr(text):
|
|
def get_bid_addr(text):
|
|
- p2 = '(磋商|谈判|开标|投标|评标|(采购|招标)(人|单位)|报名|递交|评审|发售)(地址|地点|所在地区?):(\w{2,8}[省市州区县][^\w]*)+'
|
|
|
|
|
|
+ p2 = '(磋商|谈判|开标|投标|评标|报名|递交|评审|发售)(地址|地点|所在地区?):(\w{2,8}[省市州区县][^\w]*)+'
|
|
if re.search(p2, text):
|
|
if re.search(p2, text):
|
|
return re.search(p2, text).group(0)
|
|
return re.search(p2, text).group(0)
|
|
else:
|
|
else:
|
|
return ''
|
|
return ''
|
|
|
|
|
|
|
|
+ def get_all_addr(list_entitys):
|
|
|
|
+ tenderee_l = []
|
|
|
|
+ other_roles = []
|
|
|
|
+ addr_l = []
|
|
|
|
+ for ent in list_entitys[0]:
|
|
|
|
+ if ent.entity_type == 'location':
|
|
|
|
+ addr_l.append(ent.entity_text)
|
|
|
|
+ elif ent.entity_type in ['org', 'company']:
|
|
|
|
+ if ent.label == 0:
|
|
|
|
+ tenderee_l.append(ent.entity_text)
|
|
|
|
+ else:
|
|
|
|
+ other_roles.append(ent.entity_text)
|
|
|
|
+ return ' '.join(addr_l), ' '.join(tenderee_l), ' '.join(other_roles)
|
|
|
|
+
|
|
|
|
+ def get_title_addr(text):
|
|
|
|
+ p1 = '(\w{2,8}[省市州区县][^\w]*)+'
|
|
|
|
+ if re.search(p1, text):
|
|
|
|
+ return re.search(p1, text).group(0)
|
|
|
|
+ else:
|
|
|
|
+ return ''
|
|
|
|
+
|
|
if '##attachment##' in list_articles[0].content:
|
|
if '##attachment##' in list_articles[0].content:
|
|
content, attachment = list_articles[0].content.split('##attachment##')
|
|
content, attachment = list_articles[0].content.split('##attachment##')
|
|
if len(content) < 200:
|
|
if len(content) < 200:
|
|
@@ -4384,9 +4445,13 @@ class DistrictPredictor():
|
|
tenderee_address = role_addr
|
|
tenderee_address = role_addr
|
|
|
|
|
|
if tenderee_address == "":
|
|
if tenderee_address == "":
|
|
- bid_addr = get_bid_addr(content)
|
|
|
|
- if bid_addr != "":
|
|
|
|
- tenderee_address = bid_addr
|
|
|
|
|
|
+ title_addr = get_title_addr(title)
|
|
|
|
+ if title_addr != "":
|
|
|
|
+ tenderee_address = title_addr
|
|
|
|
+ else:
|
|
|
|
+ bid_addr = get_bid_addr(content)
|
|
|
|
+ if bid_addr != "":
|
|
|
|
+ tenderee_address = bid_addr
|
|
|
|
|
|
project_name = str(project_name)
|
|
project_name = str(project_name)
|
|
tenderee = str(tenderee)
|
|
tenderee = str(tenderee)
|
|
@@ -4397,15 +4462,22 @@ class DistrictPredictor():
|
|
project_name = project_name.replace(tenderee, '')
|
|
project_name = project_name.replace(tenderee, '')
|
|
|
|
|
|
text1 = "{0} {1} {2}".format(project_name, tenderee, tenderee_address)
|
|
text1 = "{0} {1} {2}".format(project_name, tenderee, tenderee_address)
|
|
- # print('text1:', text1)
|
|
|
|
|
|
|
|
web_source_name = str(web_source_name) # 修复某些不是字符串类型造成报错
|
|
web_source_name = str(web_source_name) # 修复某些不是字符串类型造成报错
|
|
text1 = re.sub('复合肥|铁路|公路|新会计', ' ', text1) #预防提取错 合肥 路南 新会 等地区
|
|
text1 = re.sub('复合肥|铁路|公路|新会计', ' ', text1) #预防提取错 合肥 路南 新会 等地区
|
|
|
|
+ # print('text1:', text1)
|
|
rs = get_area(text1, web_source_name)
|
|
rs = get_area(text1, web_source_name)
|
|
|
|
|
|
if rs['district']['province'] == '全国' or rs['district']['city'] == '未知':
|
|
if rs['district']['province'] == '全国' or rs['district']['city'] == '未知':
|
|
- text2 = title + content if len(content)<2000 else title + content[:1000] + content[-1000:]
|
|
|
|
|
|
+ all_addr, tenderees, other_roles = get_all_addr(list_entitys)
|
|
|
|
+ if tenderees != "":
|
|
|
|
+ text2 = tenderees + " " + all_addr
|
|
|
|
+ # print('所有地址:', all_addr)
|
|
|
|
+ else:
|
|
|
|
+ text2 = other_roles + " " + all_addr
|
|
|
|
+ # text2 = title + content if len(content)<2000 else title + content[:1000] + content[-1000:]
|
|
text2 = re.sub('复合肥|铁路|公路|新会计', ' ', text2)
|
|
text2 = re.sub('复合肥|铁路|公路|新会计', ' ', text2)
|
|
|
|
+ # print('text2:', text2)
|
|
rs2 = get_area(text2, web_source_name, not_in_content=False)
|
|
rs2 = get_area(text2, web_source_name, not_in_content=False)
|
|
rs2['district']['is_in_text'] = True
|
|
rs2['district']['is_in_text'] = True
|
|
if rs['district']['province'] == '全国' and rs2['district']['province'] != '全国':
|
|
if rs['district']['province'] == '全国' and rs2['district']['province'] != '全国':
|