|
@@ -86,7 +86,10 @@ def getSet(list_dict,key):
|
|
_set.add(str(item[key]))
|
|
_set.add(str(item[key]))
|
|
return _set
|
|
return _set
|
|
|
|
|
|
-def split_with_time(list_dict,sort_key,timedelta=86400*120):
|
|
|
|
|
|
+def split_with_time(list_dict,sort_key,timedelta=86400*120,more_than_one=True):
|
|
|
|
+ group_num = 1
|
|
|
|
+ if more_than_one:
|
|
|
|
+ group_num = 2
|
|
if len(list_dict)>0:
|
|
if len(list_dict)>0:
|
|
if sort_key in list_dict[0]:
|
|
if sort_key in list_dict[0]:
|
|
list_dict.sort(key=lambda x:x[sort_key])
|
|
list_dict.sort(key=lambda x:x[sort_key])
|
|
@@ -102,7 +105,7 @@ def split_with_time(list_dict,sort_key,timedelta=86400*120):
|
|
if len(_group)>1:
|
|
if len(_group)>1:
|
|
list_group.append(_group)
|
|
list_group.append(_group)
|
|
_begin = i + 1
|
|
_begin = i + 1
|
|
- if len(list_dict)>1:
|
|
|
|
|
|
+ if len(list_dict)>=group_num:
|
|
_group = []
|
|
_group = []
|
|
for j in range(_begin,len(list_dict)):
|
|
for j in range(_begin,len(list_dict)):
|
|
_group.append(list_dict[j])
|
|
_group.append(list_dict[j])
|
|
@@ -442,12 +445,6 @@ class f_remege_limit_num_contain_bychannel(BaseUDAF):
|
|
return _result
|
|
return _result
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
def terminate(self, buffer):
|
|
def terminate(self, buffer):
|
|
list_group = []
|
|
list_group = []
|
|
the_group = buffer[0]
|
|
the_group = buffer[0]
|
|
@@ -461,6 +458,7 @@ class f_remege_limit_num_contain_bychannel(BaseUDAF):
|
|
re_merge = False
|
|
re_merge = False
|
|
for _key in keys:
|
|
for _key in keys:
|
|
if len(getSet(the_group,_key))>1:
|
|
if len(getSet(the_group,_key))>1:
|
|
|
|
+ log("has_more_than_one:%s"%str(getSet(the_group,_key)))
|
|
re_merge = True
|
|
re_merge = True
|
|
break
|
|
break
|
|
#判断是否相似而不相同
|
|
#判断是否相似而不相同
|
|
@@ -565,8 +563,7 @@ class f_remege_limit_num_contain_bychannel(BaseUDAF):
|
|
# final_group.append(list(set(_group["docid"])))
|
|
# final_group.append(list(set(_group["docid"])))
|
|
else:
|
|
else:
|
|
final_group = [list(set([item["docid"] for item in the_group]))]
|
|
final_group = [list(set([item["docid"] for item in the_group]))]
|
|
- log(str(final_group))
|
|
|
|
-
|
|
|
|
|
|
+ log("%s--%s"%("final_group",str(final_group)))
|
|
|
|
|
|
#每个channel选择一篇公告
|
|
#每个channel选择一篇公告
|
|
final_group_channel = []
|
|
final_group_channel = []
|
|
@@ -586,16 +583,16 @@ class f_remege_limit_num_contain_bychannel(BaseUDAF):
|
|
|
|
|
|
#根据日期进行切分
|
|
#根据日期进行切分
|
|
new_dict_channel_id = {}
|
|
new_dict_channel_id = {}
|
|
- print(dict_channel_id)
|
|
|
|
|
|
+ log("%s:%s"%("dict_channel_id",str(dict_channel_id)))
|
|
for k,v in dict_channel_id.items():
|
|
for k,v in dict_channel_id.items():
|
|
- list_time_docids = split_with_time(v,"page_time_stamp",86400*6)
|
|
|
|
- print(list_time_docids)
|
|
|
|
|
|
+ list_time_docids = split_with_time(v,"page_time_stamp",86400*6,more_than_one=False)
|
|
|
|
+ log(list_time_docids)
|
|
for _l in list_time_docids:
|
|
for _l in list_time_docids:
|
|
list_t = self.splitByTimezone(_l,"json_dicttime")
|
|
list_t = self.splitByTimezone(_l,"json_dicttime")
|
|
for _t in list_t:
|
|
for _t in list_t:
|
|
otherChannel += 1
|
|
otherChannel += 1
|
|
new_dict_channel_id[otherChannel] = _t
|
|
new_dict_channel_id[otherChannel] = _t
|
|
- print(new_dict_channel_id)
|
|
|
|
|
|
+ log("%s:%s"%("new_dict_channel_id",str(new_dict_channel_id)))
|
|
channel_dict = {}
|
|
channel_dict = {}
|
|
for k,v in new_dict_channel_id.items():
|
|
for k,v in new_dict_channel_id.items():
|
|
v.sort(key=lambda x:x["docid"])
|
|
v.sort(key=lambda x:x["docid"])
|
|
@@ -1231,11 +1228,36 @@ class f_encode_time(object):
|
|
|
|
|
|
return _encode
|
|
return _encode
|
|
|
|
|
|
|
|
+
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
a = f_remege_limit_num_contain_bychannel()
|
|
a = f_remege_limit_num_contain_bychannel()
|
|
buffer = a.new_buffer()
|
|
buffer = a.new_buffer()
|
|
- a.iterate(buffer,1,1,86400*1,"1","1","1","1","1","1","1",5,5,None)
|
|
|
|
- a.iterate(buffer,3,1,86400*4,"1","1","1","1","1","1","1",5,5,'{"a":"dbb"}')
|
|
|
|
- a.iterate(buffer,5,1,86400*10,"1","1","1","1","1","1","1",5,5,"{}")
|
|
|
|
|
|
+ tmp_s = '''
|
|
|
|
+ 225405503 230202661 2022-04-02 1648828800 TZTX-2022-GK005 生活家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 10 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 226411495 2022-03-16 1647360000 TZTX-2022-GK005 生活家具采购项目 台州天兴工程管理咨询有限公司关于生活家具采购项目的更正公告 台州天兴管理咨询有限公司关于生活家具项目更正 台州市机关事务管理局 台州天兴工程管理咨询有限公司 10000.0 51 1 5 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 230202661 2022-04-02 1648828800 TZTX-2022-GK005 生活家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 10 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 231350581 2022-04-07 1649260800 TZTX-2022-GK005 生活家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 10 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 225405503 2022-03-10 1646841600 TZTX-2022-GK005 台州市机关事务管理局家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 1730000.0 52 1 5 "{"time_bidclose": "2022-03-30", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 231350581 2022-04-07 1649260800 TZTX-2022-GK005 生活家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 10 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 231350581 2022-04-07 1649260800 TZTX-2022-GK005 生活家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 10 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 225405503 2022-03-10 1646841600 TZTX-2022-GK005 台州市机关事务管理局家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 1730000.0 52 1 5 "{"time_bidclose": "2022-03-30", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 225405503 2022-03-10 1646841600 TZTX-2022-GK005 台州市机关事务管理局家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 1730000.0 52 1 5 "{"time_bidclose": "2022-03-30", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 230101787 2022-03-31 1648656000 TZTX-2022-GK005 生活家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 10 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 230101787 2022-03-31 1648656000 TZTX-2022-GK005 生活家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 10 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 230093569 2022-03-31 1648656000 TZTX-2022-GK005 台州市机关事务管理局家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 7 "{"time_bidclose": "", "time_bidopen": "", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "", "time_get_file_start": "", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 226411495 2022-03-16 1647360000 TZTX-2022-GK005 生活家具采购项目 台州天兴工程管理咨询有限公司关于生活家具采购项目的更正公告 台州天兴管理咨询有限公司关于生活家具项目更正 台州市机关事务管理局 台州天兴工程管理咨询有限公司 10000.0 51 1 5 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 230093569 2022-03-31 1648656000 TZTX-2022-GK005 台州市机关事务管理局家具采购项目 台州市机关事务管理局 台州天兴工程管理咨询有限公司 浙江华泰办公家具有限公司 1412700.0 101 1 7 "{"time_bidclose": "", "time_bidopen": "", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "", "time_get_file_start": "", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+ 225405503 226411495 2022-03-16 1647360000 TZTX-2022-GK005 生活家具采购项目 台州天兴工程管理咨询有限公司关于生活家具采购项目的更正公告 台州天兴管理咨询有限公司关于生活家具项目更正 台州市机关事务管理局 台州天兴工程管理咨询有限公司 10000.0 51 1 5 "{"time_bidclose": "", "time_bidopen": "2022-03-30", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "2022-03-30", "time_get_file_start": "2022-03-10", "time_publicity_end": "", "time_publicity_start": "", "time_registration_end": "", "time_registration_start": "", "time_release": ""}"
|
|
|
|
+
|
|
|
|
+ '''
|
|
|
|
+ for _s in tmp_s.split("\n"):
|
|
|
|
+ ls = _s.split("\t")
|
|
|
|
+ if len(ls)!=17:
|
|
|
|
+ continue
|
|
|
|
+ _confid = 1 if ls[14] =="" else ls[14]
|
|
|
|
+ a.iterate(buffer,ls[1],ls[13],int(ls[3]),ls[8],ls[10],ls[11],ls[12],ls[7],ls[5],ls[4],_confid,ls[15],ls[16][1:-1])
|
|
|
|
+ # a.iterate(buffer,219957825,101,86400*4,"1","1","1","1","1","1","1",0,5,'{"time_bidclose": "", "time_bidopen": "2022-02-10", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "", "time_get_file_start": "", "time_publicity_end": "2022-02-21", "time_publicity_start": "2022-02-11", "time_registration_end": "", "time_registration_start": "", "time_release": ""}')
|
|
|
|
+ # a.iterate(buffer,219957825,101,86400*4,"1","1","1","1","1","1","1",0,5,'{"time_bidclose": "", "time_bidopen": "2022-02-10", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "", "time_get_file_start": "", "time_publicity_end": "2022-02-21", "time_publicity_start": "2022-02-11", "time_registration_end": "", "time_registration_start": "", "time_release": ""}')
|
|
|
|
+ # a.iterate(buffer,219957825,101,86400*4,"1","1","1","1","1","1","1",0,5,'{"time_bidclose": "", "time_bidopen": "2022-02-10", "time_bidstart": "", "time_commencement": "", "time_completion": "", "time_earnest_money_end": "", "time_earnest_money_start": "", "time_get_file_end": "", "time_get_file_start": "", "time_publicity_end": "2022-02-22", "time_publicity_start": "2022-02-11", "time_registration_end": "", "time_registration_start": "", "time_release": ""}')
|
|
print(a.terminate(buffer))
|
|
print(a.terminate(buffer))
|
|
print(1)
|
|
print(1)
|