Pārlūkot izejas kodu

产品字段提取

rogel 4 gadi atpakaļ
vecāks
revīzija
05e70becc9

+ 28 - 23
BiddingKG/dl/complaint/punish_predictor.py

@@ -68,7 +68,7 @@ class Punish_Extract():
                     count += 1
                     # print(count)
                     sentence_len = [len(sentence.sentence_text) for sentence in sentences]
-                    maxlen = max(sentence_len)
+                    maxlen = min(max(sentence_len),MAXlLEN)
                     sentences_x = []
                     for sentence in sentences:
                         sentence = sentence.sentence_text
@@ -386,29 +386,34 @@ class Punish_Extract():
             title = article.title
             text=article.content
             keyword, punishType = self.get_punishType(title, text)
-            if punishType == "未知类别":
-                list_result.append({"punish":{}})
-            else:
-                # print('处罚类型:',punishType)
-                punish_code = self.predict_punishCode(list_sentences)
-                # print('处罚编号: ',punish_code)
-                institutions, punishTimes = self.get_institution(title, list_sentence, list_entity)
-                # print('执法机构:',institutions, '\n 处罚时间:', punishTimes)
-                punishDecision = self.get_punishDecision(text, punishType)
-                # print('处罚决定:',punishDecision)
-                punishWhether= self.get_punishWhether(punishDecision, text, punishType)
-                # print('投诉是否成立:',punishWhether)
-                complainants, punishPeople = self.get_complainant(punishType, list_sentence, list_entity)
-                # print('投诉人:%s  被投诉人:%s'%(complainants, punishPeople))
-                punish_dic = {'punish_code':punish_code,
-                              'punishType':punishType,
-                              'punishDecision':punishDecision,
-                             'complainants':complainants,
-                             'punishPeople':punishPeople,
-                             'punishWhether':punishWhether,
-                             'institutions':institutions,
-                             'punishTimes':punishTimes}
+
+            # print('处罚类型:',punishType)
+            punish_code = self.predict_punishCode(list_sentences)
+            # print('处罚编号: ',punish_code)
+            institutions, punishTimes = self.get_institution(title, list_sentence, list_entity)
+            # print('执法机构:',institutions, '\n 处罚时间:', punishTimes)
+            punishDecision = self.get_punishDecision(text, punishType)
+            # print('处罚决定:',punishDecision)
+            punishWhether= self.get_punishWhether(punishDecision, text, punishType)
+            # print('投诉是否成立:',punishWhether)
+            complainants, punishPeople = self.get_complainant(punishType, list_sentence, list_entity)
+            # print('投诉人:%s  被投诉人:%s'%(complainants, punishPeople))
+            punish_dic = {'punish_code':punish_code,
+                          'punishType':punishType,
+                          'punishDecision':punishDecision,
+                         'complainants':complainants,
+                         'punishPeople':punishPeople,
+                         'punishWhether':punishWhether,
+                         'institutions':institutions,
+                         'punishTimes':punishTimes}
+            _count = 0
+            for k,v in punish_dic.items():
+                if v!="":
+                    _count += 1
+            if _count>=2 and punish_dic["punishType"]!="未知类别":
                 list_result.append({"punish":punish_dic})
+            else:
+                list_result.append({"punish":{}})
         return list_result
 
 

+ 2 - 0
BiddingKG/dl/interface/Preprocessing.py

@@ -1917,6 +1917,8 @@ def union_result(codeName,prem):
     @return:拼接起来的字典
     '''
     result = []
+    print(codeName)
+    print(prem)
     assert len(codeName)==len(prem)
     for item_code,item_prem in zip(codeName,prem):
         result.append(dict(item_code,**item_prem))

+ 3 - 0
BiddingKG/dl/interface/getAttributes.py

@@ -1177,6 +1177,9 @@ def getOtherAttributes(list_entity):
             dict_other["time_bidclose"] = timeFormat(entity.entity_text)
         elif entity.entity_type=="person" and entity.label ==4:
             dict_other["person_review"].append(entity.entity_text)
+        elif entity.entity_type=='product':
+            dict_other["product"].append(entity.entity_text)
+    dict_other["product"] = list(set(dict_other["product"]))
     return dict_other