|
@@ -64,8 +64,7 @@ class enterprise2Redis():
|
|
|
http_auth=('elastic', 'WWBu9#1HWHo$$gJm'),
|
|
|
port=9200)
|
|
|
body = {
|
|
|
- "_source": ["name", "history_names", 'legal_person', 'reg_capital', 'credit_code', 'tax_number',
|
|
|
- 'reg_number', 'org_number',
|
|
|
+ "_source": ["name", 'credit_code',
|
|
|
"zhao_biao_number", "zhong_biao_number", "dai_li_number", "bid_number"],
|
|
|
'query': { # 查询命令
|
|
|
"bool": {
|
|
@@ -114,23 +113,34 @@ class enterprise2Redis():
|
|
|
for item in result:
|
|
|
item = item['_source']
|
|
|
name = item['name']
|
|
|
- history_names = item.get("history_names", "")
|
|
|
- legal_person = item.get("legal_person", "")
|
|
|
- reg_capital = item.get("reg_capital", "")
|
|
|
+ # history_names = item.get("history_names", "")
|
|
|
+ # legal_person = item.get("legal_person", "")
|
|
|
+ # reg_capital = item.get("reg_capital", "")
|
|
|
credit_code = item.get("credit_code", "")
|
|
|
credit_code = re.sub("\s","",credit_code)
|
|
|
credit_code = credit_code if re.search("^[\dA-Z]{2}\d{6}[\dA-Z]{10}$",credit_code) else ""
|
|
|
- tax_number = item.get("tax_number", "")
|
|
|
- reg_number = item.get("reg_number", "")
|
|
|
- org_number = item.get("org_number", "")
|
|
|
+ # tax_number = item.get("tax_number", "")
|
|
|
+ # tax_number = re.sub("\s","",tax_number)
|
|
|
+ # tax_number = tax_number if len(tax_number)>=15 and not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",tax_number) else ""
|
|
|
+ # reg_number = item.get("reg_number", "")
|
|
|
+ # reg_number = re.sub("\s","",reg_number)
|
|
|
+ # reg_number = reg_number if len(reg_number)>=10 and not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",reg_number) else ""
|
|
|
+ # org_number = item.get("org_number", "") # 已弃用,统一社会信用代码取代组织机构代码
|
|
|
+ # org_number = org_number if not re.search("@|\d{4}-\d{2}-\d{2}|\.(com|cn|COM|CN)",org_number) else ""
|
|
|
zhao_biao_number = item.get("zhao_biao_number", 0)
|
|
|
+ zhao_biao_number = zhao_biao_number if zhao_biao_number else 0
|
|
|
zhong_biao_number = item.get("zhong_biao_number", 0)
|
|
|
+ zhong_biao_number = zhong_biao_number if zhong_biao_number else 0
|
|
|
dai_li_number = item.get("dai_li_number", 0)
|
|
|
+ dai_li_number = dai_li_number if dai_li_number else 0
|
|
|
bid_number = item.get("bid_number", 0)
|
|
|
+ bid_number = bid_number if bid_number else 0
|
|
|
|
|
|
num = 0
|
|
|
- for business in [history_names, legal_person, reg_capital, credit_code, tax_number, reg_number, org_number]:
|
|
|
- if len(str(business).replace("-", "")) > 1:
|
|
|
+ for business in [credit_code]: # 新增实体只判断credit_code
|
|
|
+ business = re.sub("\s-","",str(business))
|
|
|
+ business = re.sub("^nan$","",business)
|
|
|
+ if len(business) > 1:
|
|
|
num += 1
|
|
|
isLegal = isLegalNewName(name)
|
|
|
if isLegal >= 0:
|
|
@@ -263,13 +273,13 @@ def isLegalNewName(enterprise_name):
|
|
|
return -1
|
|
|
if re.search("[区市镇乡县洲州路街]$", enterprise_name) and not re.search("(超市|门市|保护区|园区|景区|校区|社区|服务区|工区|小区|集市|花市|夜市|学区|旅游区|矿区|林区|度假区|示范区|菜市)$", enterprise_name):
|
|
|
return -1
|
|
|
- if re.search("^个人|^个体|测试$", enterprise_name):
|
|
|
+ if re.search("^.?(个人|个体|测试)|(个人|个体|测试).?$", enterprise_name):
|
|
|
return -1
|
|
|
if re.search("个人|个体", enterprise_name):
|
|
|
_split = re.split("个人|个体", enterprise_name)
|
|
|
if len(_split[0]) <= 5:
|
|
|
return -1
|
|
|
- if re.search("测试", enterprise_name) and len(enterprise_name) < 8:
|
|
|
+ if (re.search("测试", enterprise_name) and len(enterprise_name) < 8) or len(re.findall("测试", enterprise_name))>1:
|
|
|
return -1
|
|
|
if re.search("^(省|自治[县州区]|市|县|区|镇|乡|街道)", enterprise_name) and not re.search(
|
|
|
"^(镇江|乡宁|镇原|镇海|镇安|镇巴|镇坪|镇赉|镇康|镇沅|镇雄|镇远|镇宁|乡城|镇平|市中|市南|市北)", enterprise_name):
|