|
@@ -479,7 +479,7 @@ class f_set_docid(BaseUDAF):
|
|
|
defind_count = list_docs[0]["defind_count"]
|
|
|
print(defind_count)
|
|
|
for i in range(len(list_docs)-1):
|
|
|
- if abs(list_docs[i]["page_time_stamp"]-list_docs[i+1]["page_time_stamp"])<=86400*2:
|
|
|
+ if abs(list_docs[i]["page_time_stamp"]-list_docs[i+1]["page_time_stamp"])<=86400*7:
|
|
|
continue
|
|
|
else:
|
|
|
_group = []
|
|
@@ -590,10 +590,10 @@ class f_group_fingerprint(BaseUDAF):
|
|
|
buffer[0].append(docid)
|
|
|
|
|
|
def merge(self, buffer, pbuffer):
|
|
|
- buffer[0].extend(pbuffer[0])
|
|
|
+ buffer[0].extend(pbuffer[0][:100000])
|
|
|
|
|
|
def terminate(self, buffer):
|
|
|
- list_docid = buffer[0]
|
|
|
+ list_docid = buffer[0][:100000]
|
|
|
list_docid.sort(key=lambda x:x)
|
|
|
return ",".join([str(a) for a in list_docid])
|
|
|
|
|
@@ -635,7 +635,7 @@ class f_dump_probability(BaseUDAF):
|
|
|
list_data.append(_dict)
|
|
|
if len(list_data)>10000:
|
|
|
break
|
|
|
- list_group = split_with_time(list_data,sort_key="page_time_stamp",timedelta=86400*2)
|
|
|
+ list_group = split_with_time(list_data,sort_key="page_time_stamp",timedelta=86400*7)
|
|
|
return json.dumps(list_group)
|
|
|
|
|
|
|
|
@@ -1958,7 +1958,7 @@ class f_set_docid_binaryChart(BaseUDAF):
|
|
|
|
|
|
def terminate(self, buffer):
|
|
|
list_docs = buffer[0]
|
|
|
- list_timeGroups = split_with_time(list_docs,"page_time_stamp",86400*2)
|
|
|
+ list_timeGroups = split_with_time(list_docs,"page_time_stamp",86400*7)
|
|
|
|
|
|
list_group = []
|
|
|
|
|
@@ -2001,7 +2001,7 @@ class f_set_docid_binaryChart(BaseUDAF):
|
|
|
|
|
|
|
|
|
|
|
|
-def split_with_time(list_dict,sort_key,timedelta=86400*2):
|
|
|
+def split_with_time(list_dict,sort_key,timedelta=86400*7):
|
|
|
if len(list_dict)>0:
|
|
|
if sort_key in list_dict[0]:
|
|
|
list_dict.sort(key=lambda x:x[sort_key])
|
|
@@ -2116,7 +2116,7 @@ class f_stamp_squence(BaseUDAF):
|
|
|
list_stamp.sort(key=lambda x:x)
|
|
|
list_stamp_final = []
|
|
|
_begin = 0
|
|
|
- _time_decase = 86400*2
|
|
|
+ _time_decase = 86400*7
|
|
|
logging.info(str(list_stamp))
|
|
|
for _index in range(len(list_stamp)-1):
|
|
|
if list_stamp[_index+1]-list_stamp[_index]<_time_decase:
|