|
@@ -64,8 +64,7 @@ class enterprise2Redis():
|
|
http_auth=('elastic', 'WWBu9#1HWHo$$gJm'),
|
|
http_auth=('elastic', 'WWBu9#1HWHo$$gJm'),
|
|
port=9200)
|
|
port=9200)
|
|
body = {
|
|
body = {
|
|
- "_source": ["name", "history_names", 'legal_person', 'reg_capital', 'credit_code', 'tax_number',
|
|
|
|
- 'reg_number', 'org_number',
|
|
|
|
|
|
+ "_source": ["name", 'credit_code',
|
|
"zhao_biao_number", "zhong_biao_number", "dai_li_number", "bid_number"],
|
|
"zhao_biao_number", "zhong_biao_number", "dai_li_number", "bid_number"],
|
|
'query': { # 查询命令
|
|
'query': { # 查询命令
|
|
"bool": {
|
|
"bool": {
|
|
@@ -114,21 +113,34 @@ class enterprise2Redis():
|
|
for item in result:
|
|
for item in result:
|
|
item = item['_source']
|
|
item = item['_source']
|
|
name = item['name']
|
|
name = item['name']
|
|
- history_names = item.get("history_names", "")
|
|
|
|
- legal_person = item.get("legal_person", "")
|
|
|
|
- reg_capital = item.get("reg_capital", "")
|
|
|
|
|
|
+ # history_names = item.get("history_names", "")
|
|
|
|
+ # legal_person = item.get("legal_person", "")
|
|
|
|
+ # reg_capital = item.get("reg_capital", "")
|
|
credit_code = item.get("credit_code", "")
|
|
credit_code = item.get("credit_code", "")
|
|
- tax_number = item.get("tax_number", "")
|
|
|
|
- reg_number = item.get("reg_number", "")
|
|
|
|
- org_number = item.get("org_number", "")
|
|
|
|
|
|
+ credit_code = re.sub("\s","",credit_code)
|
|
|
|
+ credit_code = credit_code if re.search("^[\dA-Z]{2}\d{6}[\dA-Z]{10}$",credit_code) else ""
|
|
|
|
+ # tax_number = item.get("tax_number", "")
|
|
|
|
+ # tax_number = re.sub("\s","",tax_number)
|
|
|
|
+ # tax_number = tax_number if len(tax_number)>=15 and not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",tax_number) else ""
|
|
|
|
+ # reg_number = item.get("reg_number", "")
|
|
|
|
+ # reg_number = re.sub("\s","",reg_number)
|
|
|
|
+ # reg_number = reg_number if len(reg_number)>=10 and not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",reg_number) else ""
|
|
|
|
+ # org_number = item.get("org_number", "") # 已弃用,统一社会信用代码取代组织机构代码
|
|
|
|
+ # org_number = org_number if not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",org_number) else ""
|
|
zhao_biao_number = item.get("zhao_biao_number", 0)
|
|
zhao_biao_number = item.get("zhao_biao_number", 0)
|
|
|
|
+ zhao_biao_number = zhao_biao_number if zhao_biao_number else 0
|
|
zhong_biao_number = item.get("zhong_biao_number", 0)
|
|
zhong_biao_number = item.get("zhong_biao_number", 0)
|
|
|
|
+ zhong_biao_number = zhong_biao_number if zhong_biao_number else 0
|
|
dai_li_number = item.get("dai_li_number", 0)
|
|
dai_li_number = item.get("dai_li_number", 0)
|
|
|
|
+ dai_li_number = dai_li_number if dai_li_number else 0
|
|
bid_number = item.get("bid_number", 0)
|
|
bid_number = item.get("bid_number", 0)
|
|
|
|
+ bid_number = bid_number if bid_number else 0
|
|
|
|
|
|
num = 0
|
|
num = 0
|
|
- for business in [history_names, legal_person, reg_capital, credit_code, tax_number, reg_number, org_number]:
|
|
|
|
- if len(str(business).replace("-", "")) > 1:
|
|
|
|
|
|
+ for business in [credit_code]: # 新增实体只判断credit_code
|
|
|
|
+ business = re.sub("\s-","",str(business))
|
|
|
|
+ business = re.sub("^nan$","",business)
|
|
|
|
+ if len(business) > 1:
|
|
num += 1
|
|
num += 1
|
|
isLegal = isLegalNewName(name)
|
|
isLegal = isLegalNewName(name)
|
|
if isLegal >= 0:
|
|
if isLegal >= 0:
|
|
@@ -136,14 +148,16 @@ class enterprise2Redis():
|
|
legal_name_num += 1
|
|
legal_name_num += 1
|
|
_json = {"have_business": 1, "zhao_biao_number": zhao_biao_number,
|
|
_json = {"have_business": 1, "zhao_biao_number": zhao_biao_number,
|
|
"zhong_biao_number": zhong_biao_number,
|
|
"zhong_biao_number": zhong_biao_number,
|
|
- "dai_li_number": dai_li_number, "bid_number": bid_number}
|
|
|
|
|
|
+ "dai_li_number": dai_li_number, "bid_number": bid_number,
|
|
|
|
+ "credit_code":credit_code}
|
|
_json = json.dumps(_json, ensure_ascii=False)
|
|
_json = json.dumps(_json, ensure_ascii=False)
|
|
add_redis_list.append((name, _json))
|
|
add_redis_list.append((name, _json))
|
|
elif num == 0 and bid_number > 0 and len(name) > 4:
|
|
elif num == 0 and bid_number > 0 and len(name) > 4:
|
|
legal_name_num += 1
|
|
legal_name_num += 1
|
|
_json = {"have_business": 0, "zhao_biao_number": zhao_biao_number,
|
|
_json = {"have_business": 0, "zhao_biao_number": zhao_biao_number,
|
|
"zhong_biao_number": zhong_biao_number,
|
|
"zhong_biao_number": zhong_biao_number,
|
|
- "dai_li_number": dai_li_number, "bid_number": bid_number}
|
|
|
|
|
|
+ "dai_li_number": dai_li_number, "bid_number": bid_number,
|
|
|
|
+ "credit_code":credit_code}
|
|
_json = json.dumps(_json, ensure_ascii=False)
|
|
_json = json.dumps(_json, ensure_ascii=False)
|
|
add_redis_list.append((name, _json))
|
|
add_redis_list.append((name, _json))
|
|
|
|
|
|
@@ -259,13 +273,13 @@ def isLegalNewName(enterprise_name):
|
|
return -1
|
|
return -1
|
|
if re.search("[区市镇乡县洲州路街]$", enterprise_name) and not re.search("(超市|门市|保护区|园区|景区|校区|社区|服务区|工区|小区|集市|花市|夜市|学区|旅游区|矿区|林区|度假区|示范区|菜市)$", enterprise_name):
|
|
if re.search("[区市镇乡县洲州路街]$", enterprise_name) and not re.search("(超市|门市|保护区|园区|景区|校区|社区|服务区|工区|小区|集市|花市|夜市|学区|旅游区|矿区|林区|度假区|示范区|菜市)$", enterprise_name):
|
|
return -1
|
|
return -1
|
|
- if re.search("^个人|^个体|测试$", enterprise_name):
|
|
|
|
|
|
+ if re.search("^.?(个人|个体|测试)|(个人|个体|测试).?$", enterprise_name):
|
|
return -1
|
|
return -1
|
|
if re.search("个人|个体", enterprise_name):
|
|
if re.search("个人|个体", enterprise_name):
|
|
_split = re.split("个人|个体", enterprise_name)
|
|
_split = re.split("个人|个体", enterprise_name)
|
|
if len(_split[0]) <= 5:
|
|
if len(_split[0]) <= 5:
|
|
return -1
|
|
return -1
|
|
- if re.search("测试", enterprise_name) and len(enterprise_name) < 8:
|
|
|
|
|
|
+ if (re.search("测试", enterprise_name) and len(enterprise_name) < 8) or len(re.findall("测试", enterprise_name))>1:
|
|
return -1
|
|
return -1
|
|
if re.search("^(省|自治[县州区]|市|县|区|镇|乡|街道)", enterprise_name) and not re.search(
|
|
if re.search("^(省|自治[县州区]|市|县|区|镇|乡|街道)", enterprise_name) and not re.search(
|
|
"^(镇江|乡宁|镇原|镇海|镇安|镇巴|镇坪|镇赉|镇康|镇沅|镇雄|镇远|镇宁|乡城|镇平|市中|市南|市北)", enterprise_name):
|
|
"^(镇江|乡宁|镇原|镇海|镇安|镇巴|镇坪|镇赉|镇康|镇沅|镇雄|镇远|镇宁|乡城|镇平|市中|市南|市北)", enterprise_name):
|