|
@@ -2444,7 +2444,7 @@ def check_project_codes_merge(list_code,list_code_to_merge,b_log):
|
|
return 1
|
|
return 1
|
|
return 0
|
|
return 0
|
|
|
|
|
|
-def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=False):
|
|
|
|
|
|
+def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=False,simple_check=False):
|
|
docids = _proj.get(project_docids,"")
|
|
docids = _proj.get(project_docids,"")
|
|
page_time = _proj.get(project_page_time,"")
|
|
page_time = _proj.get(project_page_time,"")
|
|
project_codes = _proj.get(project_project_codes,"")
|
|
project_codes = _proj.get(project_project_codes,"")
|
|
@@ -2538,14 +2538,6 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
|
|
return False,0
|
|
return False,0
|
|
return False
|
|
return False
|
|
|
|
|
|
- #事件判断-编号
|
|
|
|
- _codes_check = check_project_codes_merge(list_code,list_code_to_merge,b_log)
|
|
|
|
- check_dict[_codes_check] += 1
|
|
|
|
- if check_dict[-1]>0:
|
|
|
|
- if return_prob:
|
|
|
|
- return False,0
|
|
|
|
- return False
|
|
|
|
-
|
|
|
|
_product_check = check_product_merge(product,product_to_merge,b_log)
|
|
_product_check = check_product_merge(product,product_to_merge,b_log)
|
|
|
|
|
|
|
|
|
|
@@ -2553,8 +2545,11 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
|
|
|
|
|
|
_title_check = check_dynamics_title_merge(project_dynamics,project_dynamics_to_merge,b_log)
|
|
_title_check = check_dynamics_title_merge(project_dynamics,project_dynamics_to_merge,b_log)
|
|
|
|
|
|
|
|
+ min_count = 2
|
|
|
|
+ if product=="" or product_to_merge=="":
|
|
|
|
+ min_count = 1
|
|
#事件判断--产品和名称、标题需要满足两个个
|
|
#事件判断--产品和名称、标题需要满足两个个
|
|
- if _project_name_check+_product_check+_title_check<2:
|
|
|
|
|
|
+ if _project_name_check+_product_check+_title_check<min_count:
|
|
if b_log:
|
|
if b_log:
|
|
log("project_name,project_name_to_merge %s %s"%(project_name,project_name_to_merge))
|
|
log("project_name,project_name_to_merge %s %s"%(project_name,project_name_to_merge))
|
|
log("product,product_to_merge %s %s"%(product,product_to_merge))
|
|
log("product,product_to_merge %s %s"%(product,product_to_merge))
|
|
@@ -2566,6 +2561,19 @@ def check_merge_rule(_proj,_dict,b_log=False,time_limit=86400*200,return_prob=Fa
|
|
check_dict[1] += 1
|
|
check_dict[1] += 1
|
|
check_dict[1] += 1
|
|
check_dict[1] += 1
|
|
|
|
|
|
|
|
+ if simple_check:
|
|
|
|
+ if return_prob:
|
|
|
|
+ _prob = check_dict[1]/(check_dict[-1]+check_dict[0]+check_dict[1])
|
|
|
|
+ return True,_prob
|
|
|
|
+ return True
|
|
|
|
+
|
|
|
|
+ #事件判断-编号
|
|
|
|
+ _codes_check = check_project_codes_merge(list_code,list_code_to_merge,b_log)
|
|
|
|
+ check_dict[_codes_check] += 1
|
|
|
|
+ if check_dict[-1]>0:
|
|
|
|
+ if return_prob:
|
|
|
|
+ return False,0
|
|
|
|
+ return False
|
|
|
|
|
|
#时间判断-其他时间
|
|
#时间判断-其他时间
|
|
_time_check = check_time_merge(_proj,_dict,b_log)
|
|
_time_check = check_time_merge(_proj,_dict,b_log)
|
|
@@ -2641,7 +2649,7 @@ class f_group_merge_projects(BaseUDAF):
|
|
for _j in range(_i+1,len(_group)):
|
|
for _j in range(_i+1,len(_group)):
|
|
_p_uuid,_,_p = _group[_i]
|
|
_p_uuid,_,_p = _group[_i]
|
|
_pp_uuid,_,_pp = _group[_j]
|
|
_pp_uuid,_,_pp = _group[_j]
|
|
- if check_merge_rule(_p,_pp,False):
|
|
|
|
|
|
+ if check_merge_rule(_p,_pp,False,simple_check=True):
|
|
list_group_pair.append([_p_uuid,_pp_uuid])
|
|
list_group_pair.append([_p_uuid,_pp_uuid])
|
|
if len(list_group_pair)>0:
|
|
if len(list_group_pair)>0:
|
|
list_group_data.append(list_group_pair)
|
|
list_group_data.append(list_group_pair)
|
|
@@ -2973,18 +2981,28 @@ class f_check_projects_by_num(BaseUDTF):
|
|
if _num>=len_start and _num<=len_end:
|
|
if _num>=len_start and _num<=len_end:
|
|
self.forward(json.dumps(_proj,ensure_ascii=False))
|
|
self.forward(json.dumps(_proj,ensure_ascii=False))
|
|
|
|
|
|
-@annotate('string->string')
|
|
|
|
|
|
+@annotate('string->string,string')
|
|
class f_check_projects_by_time(BaseUDTF):
|
|
class f_check_projects_by_time(BaseUDTF):
|
|
|
|
|
|
def process(self,json_projects):
|
|
def process(self,json_projects):
|
|
if json_projects is not None:
|
|
if json_projects is not None:
|
|
list_projects = json.loads(json_projects)
|
|
list_projects = json.loads(json_projects)
|
|
|
|
+ _type = ""
|
|
for _proj in list_projects:
|
|
for _proj in list_projects:
|
|
zhaobiao = _proj.get(project_zhao_biao_page_time)
|
|
zhaobiao = _proj.get(project_zhao_biao_page_time)
|
|
zhongbiao = _proj.get(project_zhong_biao_page_time)
|
|
zhongbiao = _proj.get(project_zhong_biao_page_time)
|
|
- if (zhongbiao is None or zhongbiao=="") and zhaobiao is not None and zhaobiao!="":
|
|
|
|
|
|
+ if getLength(zhaobiao)>0 and getLength(zhongbiao)>0:
|
|
|
|
+ _type = "招中标"
|
|
|
|
+ elif getLength(zhaobiao)>0 and getLength(zhongbiao)==0:
|
|
|
|
+ _type = "招标"
|
|
|
|
+ elif getLength(zhaobiao)==0 and getLength(zhongbiao)>0:
|
|
|
|
+ _type = "中标"
|
|
|
|
+ else:
|
|
|
|
+ _type = "其他"
|
|
|
|
+ self.forward(json.dumps(_proj,ensure_ascii=False),_type)
|
|
|
|
+ # if (zhongbiao is None or zhongbiao=="") and zhaobiao is not None and zhaobiao!="":
|
|
# if zhaobiao is not None and zhongbiao is not None and zhaobiao!="" and zhongbiao!="":
|
|
# if zhaobiao is not None and zhongbiao is not None and zhaobiao!="" and zhongbiao!="":
|
|
- self.forward(json.dumps(_proj,ensure_ascii=False))
|
|
|
|
|
|
+ # self.forward(json.dumps(_proj,ensure_ascii=False))
|
|
|
|
|
|
@annotate('string->string,string,double')
|
|
@annotate('string->string,string,double')
|
|
class f_extract_year_win_and_price(BaseUDTF):
|
|
class f_extract_year_win_and_price(BaseUDTF):
|