|
@@ -143,7 +143,7 @@ class Punish_Extract():
|
|
|
# ner_list.append((n, start, end))
|
|
|
ner_list.append(n) # 改为只返回实体字符
|
|
|
# article_ner_list.append(ner_list)
|
|
|
- article_ner_list.append(';'.join(set(ner_list)))
|
|
|
+ article_ner_list.append(';'.join(set(ner_list)))
|
|
|
return article_ner_list[0]
|
|
|
|
|
|
# 处罚类型
|
|
@@ -261,7 +261,7 @@ class Punish_Extract():
|
|
|
elif re.search(rule4, x[-int(len(x)*0.4):]):
|
|
|
return re.search(rule4, x[-int(len(x)*0.4):]).group(0)
|
|
|
else:
|
|
|
- return ' '
|
|
|
+ return ''
|
|
|
|
|
|
# 投诉是否成立
|
|
|
def get_punishWhether(self, x1, x2, x3):
|
|
@@ -278,7 +278,7 @@ class Punish_Extract():
|
|
|
'|予以驳回|不予受理|继续开展采购|被投诉人不存在违法违规行为|中标结果有效|投诉[^,。]{,10}不成立'
|
|
|
'|维持被投诉人|不支持[^,。]{,20}投诉|无确凿证据')
|
|
|
if x3 != '投诉处理':
|
|
|
- return ' '
|
|
|
+ return ''
|
|
|
elif re.search(p1, x1):
|
|
|
return '投诉成立'
|
|
|
elif re.search(p2, x1):
|
|
@@ -287,7 +287,7 @@ class Punish_Extract():
|
|
|
return '投诉成立'
|
|
|
elif re.search(p2, x2):
|
|
|
return '投诉无效'
|
|
|
- return ' '
|
|
|
+ return ''
|
|
|
|
|
|
# 执法机构、处罚时间
|
|
|
def get_institution(self, title, sentences_l, entity_l):
|
|
@@ -296,7 +296,7 @@ class Punish_Extract():
|
|
|
:param title: 文章标题
|
|
|
:param sentences_l: 单篇公告句子列表
|
|
|
:param entity_l: 单篇公告实体列表
|
|
|
- :return: 执法机构及处罚时间字符串,多个的用;号隔开
|
|
|
+ :return: 执法机构及处罚时间字符串,多个的用;号隔开
|
|
|
'''
|
|
|
institutions = []
|
|
|
punishTimes = []
|
|
@@ -359,7 +359,7 @@ class Punish_Extract():
|
|
|
institutions.append(ins)
|
|
|
if punishTimes == [] and ptime != "":
|
|
|
punishTimes.append(ptime)
|
|
|
- return ";".join(institutions), ";".join(punishTimes)
|
|
|
+ return ";".join(institutions), ";".join(punishTimes)
|
|
|
|
|
|
# 投诉人、被投诉人、被处罚人
|
|
|
def get_complainant(self, punishType, sentences_l, entity_l):
|
|
@@ -426,7 +426,7 @@ class Punish_Extract():
|
|
|
punishPeople.append(ner_l)
|
|
|
complainants = set([it.entity_text for l in complainants for it in l])
|
|
|
punishPeople = set([it.entity_text for l in punishPeople for it in l])
|
|
|
- return ';'.join(complainants), ';'.join(punishPeople)
|
|
|
+ return ';'.join(complainants), ';'.join(punishPeople)
|
|
|
|
|
|
def get_punish_extracts_backup(self, doc_id=' ', title=' ', text=' '):
|
|
|
list_articles, list_sentences, list_entitys, _ = Preprocessing.get_preprocessed([[doc_id, text, "", "", ""]],
|
|
@@ -459,30 +459,35 @@ class Punish_Extract():
|
|
|
for article,list_sentence,list_entity in zip(list_articles,list_sentences,list_entitys):
|
|
|
title = article.title
|
|
|
text=article.content
|
|
|
+
|
|
|
keyword, punishType = self.get_punishType(title, text)
|
|
|
- if punishType == "未知类别":
|
|
|
- list_result.append({"punish":{}})
|
|
|
- else:
|
|
|
- # print('处罚类型:',punishType)
|
|
|
- punish_code = self.predict_punishCode(list_sentences)
|
|
|
- # print('处罚编号: ',punish_code)
|
|
|
- institutions, punishTimes = self.get_institution(title, list_sentence, list_entity)
|
|
|
- # print('执法机构:',institutions, '\n 处罚时间:', punishTimes)
|
|
|
- punishDecision = self.get_punishDecision(text, punishType)
|
|
|
- # print('处罚决定:',punishDecision)
|
|
|
- punishWhether= self.get_punishWhether(punishDecision, text, punishType)
|
|
|
- # print('投诉是否成立:',punishWhether)
|
|
|
- complainants, punishPeople = self.get_complainant(punishType, list_sentence, list_entity)
|
|
|
- # print('投诉人:%s 被投诉人:%s'%(complainants, punishPeople))
|
|
|
- punish_dic = {'punish_code':punish_code,
|
|
|
- 'punishType':punishType,
|
|
|
- 'punishDecision':punishDecision,
|
|
|
- 'complainants':complainants,
|
|
|
- 'punishPeople':punishPeople,
|
|
|
- 'punishWhether':punishWhether,
|
|
|
- 'institutions':institutions,
|
|
|
- 'punishTimes':punishTimes}
|
|
|
+ # print('处罚类型:',punishType)
|
|
|
+ punish_code = self.predict_punishCode(list_sentences)
|
|
|
+ # print('处罚编号: ',punish_code)
|
|
|
+ institutions, punishTimes = self.get_institution(title, list_sentence, list_entity)
|
|
|
+ # print('执法机构:',institutions, '\n 处罚时间:', punishTimes)
|
|
|
+ punishDecision = self.get_punishDecision(text, punishType)
|
|
|
+ # print('处罚决定:',punishDecision)
|
|
|
+ punishWhether= self.get_punishWhether(punishDecision, text, punishType)
|
|
|
+ # print('投诉是否成立:',punishWhether)
|
|
|
+ complainants, punishPeople = self.get_complainant(punishType, list_sentence, list_entity)
|
|
|
+ # print('投诉人:%s 被投诉人:%s'%(complainants, punishPeople))
|
|
|
+ punish_dic = {'punish_code':punish_code,
|
|
|
+ 'punishType':punishType,
|
|
|
+ 'punishDecision':punishDecision,
|
|
|
+ 'complainants':complainants,
|
|
|
+ 'punishPeople':punishPeople,
|
|
|
+ 'punishWhether':punishWhether,
|
|
|
+ 'institutions':institutions,
|
|
|
+ 'punishTimes':punishTimes}
|
|
|
+ _count = 0
|
|
|
+ for k,v in punish_dic.items():
|
|
|
+ if v!="":
|
|
|
+ _count += 1
|
|
|
+ if _count>=2 and punish_dic["punishType"]!="未知类别":
|
|
|
list_result.append({"punish":punish_dic})
|
|
|
+ else:
|
|
|
+ list_result.append({"punish":{}})
|
|
|
return list_result
|
|
|
|
|
|
if __name__ == "__main__":
|