Browse Source

各要素指标

luojiehua 3 năm trước cách đây
mục cha
commit
9c43bb9e76
2 tập tin đã thay đổi với 12 bổ sung7 xóa
  1. 1 1
      BiddingKG/dl/interface/Preprocessing.py
  2. 11 6
      BiddingKG/maxcompute/cycleRec.py

+ 1 - 1
BiddingKG/dl/interface/Preprocessing.py

@@ -1577,7 +1577,7 @@ def get_preprocessed_sentences(list_articles,useselffool=True,cost_time=dict()):
             tokens_all = getTokens(sentences,useselffool=useselffool)
             if key_nerToken not in cost_time:
                 cost_time[key_nerToken] = 0
-            cost_time[key_nerToken] += time.time()-start_time
+            cost_time[key_nerToken] += round(time.time()-start_time,2)
 
 
             for sentence_index in range(len(sentences)):

+ 11 - 6
BiddingKG/maxcompute/cycleRec.py

@@ -134,8 +134,7 @@ def getDistanceOfCluster(aint_center):
 
 
 def getPeriod(aint_timestamp):
-
-    aint_center = clusterTimestamp(aint_timestamp)#聚类
+    aint_center = clusterTimestamp(aint_timestamp,distance=29*24*60*60)#聚类
     aint_dis,int_avgD,int_minD,int_maxD,cluster_d = getDistanceOfCluster(aint_center)
     if cluster_d is not None:
         aint_center = clusterTimestamp(aint_center,distance=(cluster_d-1)*24*60*60)
@@ -154,7 +153,7 @@ def getPeriod(aint_timestamp):
             base_prob = 0.9
         _prob = round(base_prob-(flt_powD/len(aint_dis)/int_avgD**2),4)
         # if flt_powD/len(aint_dis)<30:
-        if _prob>0.5:
+        if _prob>0.3:
             return last_time,_prob,int(int_avgD),int(int_minD),int(int_maxD),len(aint_dis)
     return None,_prob,None,None,None,None
 
@@ -192,21 +191,27 @@ class f_getTendererCompany(BaseUDTF):
                 self.forward(_third)
 
 
-@annotate('string->string')
+@annotate('string,bigint->string')
 class f_concatstr(BaseUDAF):
 
     def new_buffer(self):
         return [[]]
 
-    def iterate(self,buffer, _str):
+    def iterate(self,buffer, _str,signal):
+        self.signal = signal
         if _str is not None and _str!="":
             buffer[0].append(str(_str))
             buffer[0] = buffer[0][:10000]
 
 
+
     def merge(self, buffer, pbuffer):
         buffer[0].extend(pbuffer[0])
         buffer[0] = buffer[0][:10000]
 
     def terminate(self, buffer):
-        return ",".join(buffer[0])
+        _s = ",".join(buffer[0])
+        _s1 = set(_s.split(","))
+        if "" in _s1:
+            _s1.remove("")
+        return ",".join(list(_s1))