|
@@ -3,6 +3,7 @@
|
|
|
# @Author : bidikeji
|
|
|
# @Time : 2020/12/24 0024 15:23
|
|
|
import re
|
|
|
+import os
|
|
|
import time
|
|
|
import tensorflow as tf
|
|
|
from BiddingKG.dl.common.Utils import *
|
|
@@ -85,7 +86,8 @@ def decode(logits, trans, sequence_lengths, tag_num):
|
|
|
return viterbi_sequences
|
|
|
|
|
|
class Punish_Extract():
|
|
|
- def __init__(self, model_file = "models/21-0.9990081295021194-0.3647936/model.ckpt"):
|
|
|
+ def __init__(self, model_file = os.path.dirname(__file__)+"/models/21-0.9990081295021194-0.3647936/model.ckpt"):
|
|
|
+ print('model_file_path:',model_file)
|
|
|
self.sess = tf.Session(graph=tf.Graph())
|
|
|
self.code = ""
|
|
|
self.punish_dicition = ""
|
|
@@ -325,7 +327,7 @@ class Punish_Extract():
|
|
|
ins = ""
|
|
|
ptime = ""
|
|
|
# 如果前面步骤找不到处罚机构则在标题找实体,并正则检查是否有关键词
|
|
|
- if institutions == []:
|
|
|
+ if institutions == [] and len(title)>10:
|
|
|
title_ners = getNers([title], useselffool=True)
|
|
|
if title_ners[0]:
|
|
|
for title_ner in title_ners[0]:
|
|
@@ -426,22 +428,56 @@ class Punish_Extract():
|
|
|
punishPeople = set([it.entity_text for l in punishPeople for it in l])
|
|
|
return ';'.join(complainants), ';'.join(punishPeople)
|
|
|
|
|
|
-def get_punish_extracts(doc_id=' ', title=' ', text=' '):
|
|
|
- list_articles, list_sentences, list_entitys, _ = Preprocessing.get_preprocessed([[doc_id, text, "", "", ""]],
|
|
|
- useselffool=True)
|
|
|
- punish_code = punish.predict_punishCode(list_sentences)
|
|
|
- # print('处罚编号: ',punish_code)
|
|
|
- institutions, punishTimes = punish.get_institution(title, list_sentences[0], list_entitys[0])
|
|
|
- # print('执法机构:',institutions, '\n 处罚时间:', punishTimes)
|
|
|
- keyword, punishType = punish.get_punishType(title, text)
|
|
|
- # print('处罚类型:',punishType)
|
|
|
- punishDecision = punish.get_punishDecision(text, punishType)
|
|
|
- # print('处罚决定:',punishDecision)
|
|
|
- punishWhether= punish.get_punishWhether(punishDecision, text, punishType)
|
|
|
- # print('投诉是否成立:',punishWhether)
|
|
|
- complainants, punishPeople = punish.get_complainant(punishType, list_sentences[0], list_entitys[0])
|
|
|
- # print('投诉人:%s 被投诉人:%s'%(complainants, punishPeople))
|
|
|
- return punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether,institutions, punishTimes
|
|
|
+ def get_punish_extracts_backup(self, doc_id=' ', title=' ', text=' '):
|
|
|
+ list_articles, list_sentences, list_entitys, _ = Preprocessing.get_preprocessed([[doc_id, text, "", "", ""]],
|
|
|
+ useselffool=True)
|
|
|
+ punish_code = punish.predict_punishCode(list_sentences)
|
|
|
+ # print('处罚编号: ',punish_code)
|
|
|
+ institutions, punishTimes = punish.get_institution(title, list_sentences[0], list_entitys[0])
|
|
|
+ # print('执法机构:',institutions, '\n 处罚时间:', punishTimes)
|
|
|
+ keyword, punishType = punish.get_punishType(title, text)
|
|
|
+ # print('处罚类型:',punishType)
|
|
|
+ punishDecision = punish.get_punishDecision(text, punishType)
|
|
|
+ # print('处罚决定:',punishDecision)
|
|
|
+ punishWhether= punish.get_punishWhether(punishDecision, text, punishType)
|
|
|
+ # print('投诉是否成立:',punishWhether)
|
|
|
+ complainants, punishPeople = punish.get_complainant(punishType, list_sentences[0], list_entitys[0])
|
|
|
+ # print('投诉人:%s 被投诉人:%s'%(complainants, punishPeople))
|
|
|
+ punish_dic = {'punish_code':punish_code,
|
|
|
+ 'punishType':punishType,
|
|
|
+ 'punishDecision':punishDecision,
|
|
|
+ 'complainants':complainants,
|
|
|
+ 'punishPeople':punishPeople,
|
|
|
+ 'punishWhether':punishWhether,
|
|
|
+ 'institutions':institutions,
|
|
|
+ 'punishTimes':punishTimes}
|
|
|
+ return punish_dic
|
|
|
+ # return punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether,institutions, punishTimes
|
|
|
+
|
|
|
+ def get_punish_extracts(self,list_sentences, list_entitys, title=' ', text=' '):
|
|
|
+ keyword, punishType = self.get_punishType(title, text)
|
|
|
+ if punishType == "未知类别":
|
|
|
+ return {'punishType':punishType}
|
|
|
+ # print('处罚类型:',punishType)
|
|
|
+ punish_code = self.predict_punishCode(list_sentences)
|
|
|
+ # print('处罚编号: ',punish_code)
|
|
|
+ institutions, punishTimes = self.get_institution(title, list_sentences[0], list_entitys[0])
|
|
|
+ # print('执法机构:',institutions, '\n 处罚时间:', punishTimes)
|
|
|
+ punishDecision = self.get_punishDecision(text, punishType)
|
|
|
+ # print('处罚决定:',punishDecision)
|
|
|
+ punishWhether= self.get_punishWhether(punishDecision, text, punishType)
|
|
|
+ # print('投诉是否成立:',punishWhether)
|
|
|
+ complainants, punishPeople = self.get_complainant(punishType, list_sentences[0], list_entitys[0])
|
|
|
+ # print('投诉人:%s 被投诉人:%s'%(complainants, punishPeople))
|
|
|
+ punish_dic = {'punish_code':punish_code,
|
|
|
+ 'punishType':punishType,
|
|
|
+ 'punishDecision':punishDecision,
|
|
|
+ 'complainants':complainants,
|
|
|
+ 'punishPeople':punishPeople,
|
|
|
+ 'punishWhether':punishWhether,
|
|
|
+ 'institutions':institutions,
|
|
|
+ 'punishTimes':punishTimes}
|
|
|
+ return punish_dic
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
punish = Punish_Extract(model_file = "models/21-0.9990081295021194-0.3647936/model.ckpt")
|
|
@@ -478,11 +514,12 @@ if __name__ == "__main__":
|
|
|
# 'DETAILLINK', 'sentences', 'PAGE_TIME'])
|
|
|
# t3 = time.time()
|
|
|
# print('处理耗时:%.4f, 保存耗时:%.4f'%(t2-t1, t3-t2))
|
|
|
- s = '厦财企〔2020〕12号,各有关单位:341号。厦财企〔2020〕12号,各有关单位:行政处罚厦建招诉决【2019】342号。行政处罚厦建招诉决【2019】343号。行政处罚厦建招诉决【2019】344号,'
|
|
|
+ s = '编号:厦财企〔2020〕12号,各有关单位:341号。处罚编号:厦财企〔2020〕12号,文章编号:京财采投字(2018)第42号。公告编号:闽建筑招〔2018〕5号。处罚编号:松公管监[2020]2号,'
|
|
|
# list_sentences = [s.split('。')]
|
|
|
# punish_code= punish.predict_punishCode( list_sentences)
|
|
|
# print(punish_code)
|
|
|
|
|
|
- punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether, institutions, punishTimes = \
|
|
|
- get_punish_extracts(text=s)
|
|
|
- print(punish_code)
|
|
|
+ # punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether, institutions, punishTimes = \
|
|
|
+ # get_punish_extracts(text=s)
|
|
|
+ punish_dic = punish.get_punish_extracts_backup(text=s)
|
|
|
+ print(punish_dic)
|