4 年之前 · 2b179e8877
--- a/BiddingKG/dl/BertNer/BertCRF.py
+++ b/BiddingKG/dl/BertNer/BertCRF.py
@@ -4,9 +4,9 @@ Created on 2019年12月31日
 
				 @author: User
			
 
				 '''
			
 
				 
			
 
				-from tensorflow.contrib import rnn
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				+# from tensorflow.contrib import rnn
			
 
				+# from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+# from tensorflow.contrib.layers.python.layers import initializers
			
 
				 import numpy as np
			
 
				 from BiddingKG.dl.common.Utils import viterbi_decode
			
 
				 from zipfile import ZipFile
			
--- a/BiddingKG/dl/BertNer/Pretrain.py
+++ b/BiddingKG/dl/BertNer/Pretrain.py
@@ -8,9 +8,9 @@ import os
 
				 from BiddingKG.dl.BertNer.BertModel import *
			
 
				 import tensorflow as tf
			
 
				 
			
 
				-from tensorflow.contrib import rnn
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				+# from tensorflow.contrib import rnn
			
 
				+# from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+# from tensorflow.contrib.layers.python.layers import initializers
			
 
				 import numpy as np
			
 
				 from BiddingKG.dl.common.Utils import viterbi_decode
			
 
				 from zipfile import ZipFile
			
--- a/BiddingKG/dl/common/Utils.py
+++ b/BiddingKG/dl/common/Utils.py
@@ -11,6 +11,7 @@ from keras import backend as K
 
				 import os
			
 
				 import time
			
 
				 
			
 
				+
			
 
				 from threading import RLock
			
 
				 
			
 
				 # from pai_tf_predict_proto import tf_predict_pb2
			
@@ -124,6 +125,7 @@ def viterbi_decode(score, transition_params):
 
				     viterbi_score = np.max(trellis[-1])
			
 
				     return viterbi, viterbi_score
			
 
				 
			
 
				+
			
 
				 def limitRun(sess,list_output,feed_dict,MAX_BATCH=1024):
			
 
				     len_sample = 0
			
 
				     if len(feed_dict.keys())>0:
			
--- a/BiddingKG/dl/common/nerUtils.py
+++ b/BiddingKG/dl/common/nerUtils.py
@@ -0,0 +1,104 @@
 
				+from BiddingKG.dl.foolnltk import selffool
			
 
				+
			
 
				+
			
 
				+def getTokensAndNers(sentences,MAXAREA = 10000,useselffool=False):
			
 
				+    '''
			
 
				+    @param: sentences:句子数
			
 
				+    @return 限流执行后的分词和实体识别list
			
 
				+    '''
			
 
				+    def getData(tokens,ners,process_data):
			
 
				+        process_sentences = [item[1] for item in process_data]
			
 
				+
			
 
				+        token_ = selffool.cut(process_sentences)
			
 
				+        if useselffool:
			
 
				+            ner_ = selffool.self_ner(process_sentences)
			
 
				+        else:
			
 
				+            ner_ = selffool.ner(process_sentences)
			
 
				+        for i in range(len(token_)):
			
 
				+            the_index = process_data[i][0]
			
 
				+            tokens[the_index] = token_[i]
			
 
				+            ners[the_index] = ner_[i]
			
 
				+    sents = []
			
 
				+    for i in range(len(sentences)):
			
 
				+        sents.append([i,sentences[i]])
			
 
				+    sents.sort(key=lambda x:len(x[1]),reverse=True)
			
 
				+    index_ = 0
			
 
				+    tokens = [[]for i in range(len(sentences))]
			
 
				+    ners = [[]for i in range(len(sentences))]
			
 
				+
			
 
				+    while(True):
			
 
				+        width = len(sents[index_][1])
			
 
				+        height = MAXAREA//width+1
			
 
				+        if height>len(sents)-index_:
			
 
				+            height = len(sents)-index_
			
 
				+        process_data = sents[index_:index_+height]
			
 
				+        getData(tokens, ners, process_data)
			
 
				+        index_ += height
			
 
				+        if index_>=len(sents):
			
 
				+            break
			
 
				+    return tokens,ners
			
 
				+
			
 
				+def getTokens(sentences,MAXAREA = 10000,useselffool=True):
			
 
				+    '''
			
 
				+     @param: sentences:句子数
			
 
				+     @return 限流执行后的分词list
			
 
				+     '''
			
 
				+    def getData(tokens,process_data):
			
 
				+        process_sentences = [item[1] for item in process_data]
			
 
				+
			
 
				+        token_ = selffool.cut(process_sentences)
			
 
				+        for i in range(len(token_)):
			
 
				+            the_index = process_data[i][0]
			
 
				+            tokens[the_index] = token_[i]
			
 
				+    sents = []
			
 
				+    for i in range(len(sentences)):
			
 
				+        sents.append([i,sentences[i]])
			
 
				+    sents.sort(key=lambda x:len(x[1]),reverse=True)
			
 
				+    index_ = 0
			
 
				+    tokens = [[]for i in range(len(sentences))]
			
 
				+
			
 
				+    while(True):
			
 
				+        width = len(sents[index_][1])
			
 
				+        height = MAXAREA//width+1
			
 
				+        if height>len(sents)-index_:
			
 
				+            height = len(sents)-index_
			
 
				+        process_data = sents[index_:index_+height]
			
 
				+        getData(tokens, process_data)
			
 
				+        index_ += height
			
 
				+        if index_>=len(sents):
			
 
				+            break
			
 
				+    return tokens
			
 
				+
			
 
				+def getNers(sentences,MAXAREA = 10000,useselffool=False):
			
 
				+    '''
			
 
				+    @param: sentences:句子数
			
 
				+    @return 限流执行后的实体识别list
			
 
				+    '''
			
 
				+    def getData(ners,process_data):
			
 
				+        process_sentences = [item[1] for item in process_data]
			
 
				+
			
 
				+        if useselffool:
			
 
				+            ner_ = selffool.self_ner(process_sentences)
			
 
				+        else:
			
 
				+            ner_ = selffool.ner(process_sentences)
			
 
				+        for i in range(len(ner_)):
			
 
				+            the_index = process_data[i][0]
			
 
				+            ners[the_index] = ner_[i]
			
 
				+    sents = []
			
 
				+    for i in range(len(sentences)):
			
 
				+        sents.append([i,sentences[i]])
			
 
				+    sents.sort(key=lambda x:len(x[1]),reverse=True)
			
 
				+    index_ = 0
			
 
				+    ners = [[]for i in range(len(sentences))]
			
 
				+
			
 
				+    while(True):
			
 
				+        width = len(sents[index_][1])
			
 
				+        height = MAXAREA//width+1
			
 
				+        if height>len(sents)-index_:
			
 
				+            height = len(sents)-index_
			
 
				+        process_data = sents[index_:index_+height]
			
 
				+        getData( ners, process_data)
			
 
				+        index_ += height
			
 
				+        if index_>=len(sents):
			
 
				+            break
			
 
				+    return ners
			
--- a/BiddingKG/dl/complaint/punishNo_tf.py
+++ b/BiddingKG/dl/complaint/punishNo_tf.py
@@ -1,6 +1,6 @@
 
				 import tensorflow as tf
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				+# from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+# from tensorflow.contrib.layers.python.layers import initializers
			
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				 from zipfile import ZipFile
			
@@ -215,7 +215,7 @@ def getAcc(y_batch,logits,trans,lengths):
 
				         # logit = np.concatenate([score, pad], axis=1)
			
 
				         # logit = np.concatenate([start, logit], axis=0)
			
 
				         # path, _ = tf.contrib.crf.viterbi_decode(logit, trans)
			
 
				-        path, _ = tf.contrib.crf.viterbi_decode(score, trans)
			
 
				+        path, _ = viterbi_decode(score, trans)
			
 
				         preds += path[0:]
			
 
				         # preds += path[1:]
			
 
				         index += 1
			
--- a/BiddingKG/dl/complaint/punish_predictor.py
+++ b/BiddingKG/dl/complaint/punish_predictor.py
@@ -11,12 +11,9 @@ import re
 
				 import os
			
 
				 import time
			
 
				 import tensorflow as tf
			
 
				-# from BiddingKG.dl.common.Utils import *
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				-# from keras.preprocessing.sequence import pad_sequences
			
 
				-# import BiddingKG.dl.interface.Preprocessing as Preprocessing
			
 
				-from BiddingKG.dl.interface.Preprocessing import *
			
 
				+from BiddingKG.dl.common.Utils import *
			
 
				+from BiddingKG.dl.common.nerUtils import *
			
 
				+from keras.preprocessing.sequence import pad_sequences
			
 
				 
			
 
				 
			
 
				 def decode(logits, trans, sequence_lengths, tag_num):
			
--- a/BiddingKG/dl/complaint/punish_rule.py
+++ b/BiddingKG/dl/complaint/punish_rule.py
@@ -1,559 +0,0 @@
 
				-#!/usr/bin/python3
			
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author  : bidikeji
			
 
				-# @Time    : 2020/12/24 0024 15:23
			
 
				-import re
			
 
				-import os
			
 
				-import time
			
 
				-import tensorflow as tf
			
 
				-from BiddingKG.dl.common.Utils import *
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				-from keras.preprocessing.sequence import pad_sequences
			
 
				-import BiddingKG.dl.interface.Preprocessing as Preprocessing
			
 
				-from BiddingKG.dl.interface.Preprocessing import *
			
 
				-
			
 
				-def BiLSTM_CRF_tfmodel(sess,weights):
			
 
				-    BiRNN_Units = 140
			
 
				-    chunk_tags = {
			
 
				-        'O': 0,
			
 
				-        'PN_B': 1,
			
 
				-        'PN_M': 2,
			
 
				-        'PN_E': 3
			
 
				-    }
			
 
				-
			
 
				-    def embedding_layer(input):
			
 
				-        embedding = tf.get_variable("embedding",initializer=np.array(weights,dtype=np.float32) if weights is not None else None,dtype=tf.float32)
			
 
				-        return tf.nn.embedding_lookup(params=embedding,ids=input)
			
 
				-
			
 
				-    def BiLSTM_Layer(input,length):
			
 
				-        with tf.variable_scope("BiLSTM"):
			
 
				-            forward_cell = tf.contrib.rnn.BasicLSTMCell(BiRNN_Units//2,state_is_tuple=True)
			
 
				-            backward_cell = tf.contrib.rnn.BasicLSTMCell(BiRNN_Units//2,state_is_tuple=True)
			
 
				-        output, _ = tf.nn.bidirectional_dynamic_rnn(forward_cell,backward_cell,input,dtype=tf.float32,sequence_length=length)
			
 
				-        output = tf.concat(output,2)
			
 
				-        return output
			
 
				-
			
 
				-    def CRF_layer(input,num_tags,BiRNN_Units,time_step):
			
 
				-        with tf.variable_scope("CRF"):
			
 
				-            with tf.variable_scope("hidden"):
			
 
				-                w_hidden = tf.get_variable(name='w_hidden',shape=(BiRNN_Units,BiRNN_Units//2),dtype=tf.float32,
			
 
				-                                           initializer=initializers.xavier_initializer(),regularizer=tf.contrib.layers.l2_regularizer(0.001))
			
 
				-                b_hidden = tf.get_variable(name='b_hidden',shape=(BiRNN_Units//2),dtype=tf.float32,initializer=tf.zeros_initializer())
			
 
				-                # print(input)
			
 
				-                input_reshape = tf.reshape(input,shape=(-1,BiRNN_Units))
			
 
				-                hidden = tf.tanh(tf.nn.xw_plus_b(input_reshape,w_hidden,b_hidden))
			
 
				-            with tf.variable_scope("output"):
			
 
				-                w_output = tf.get_variable(name='w_output',shape=(BiRNN_Units//2,num_tags),dtype=tf.float32,initializer=initializers.xavier_initializer(),regularizer=tf.contrib.layers.l2_regularizer(0.001))
			
 
				-                b_output = tf.get_variable(name='b_output',shape=(num_tags),dtype=tf.float32,initializer=tf.zeros_initializer())
			
 
				-                pred = tf.nn.xw_plus_b(hidden,w_output,b_output)
			
 
				-                logits_ = tf.reshape(pred,shape=(-1,time_step,num_tags),name='logits')
			
 
				-        return logits_
			
 
				-
			
 
				-    def layer_loss(input,true_target,num_tags,length):
			
 
				-        with tf.variable_scope("crf_loss"):
			
 
				-            trans = tf.get_variable(name='transitons',shape=(num_tags,num_tags),dtype=tf.float32,initializer=initializers.xavier_initializer())
			
 
				-            log_likelihood,trans = crf_log_likelihood(inputs=input,tag_indices=true_target,transition_params=trans,sequence_lengths=length)
			
 
				-            return tf.reduce_mean(-log_likelihood),trans
			
 
				-
			
 
				-    with sess.graph.as_default():
			
 
				-        char_input = tf.placeholder(name='char_input',shape=(None,None),dtype=tf.int32)
			
 
				-        target = tf.placeholder(name='target',shape=(None,None),dtype=tf.int32)
			
 
				-        length = tf.placeholder(name='length',shape=(None,),dtype=tf.int32)
			
 
				-        # keepprob = tf.placeholder(name='keepprob',dtype=tf.float32)
			
 
				-
			
 
				-        _embedding = embedding_layer(char_input)
			
 
				-        _shape = tf.shape(char_input)
			
 
				-        batch_size = _shape[0]
			
 
				-        step_size = _shape[-1]
			
 
				-        bilstm = BiLSTM_Layer(_embedding,length)
			
 
				-        _logits = CRF_layer(bilstm,num_tags=len(chunk_tags),BiRNN_Units=BiRNN_Units,time_step=step_size)
			
 
				-        crf_loss,trans = layer_loss(_logits,true_target=target,num_tags=len(chunk_tags),length=length)
			
 
				-        global_step = tf.Variable(0,trainable=False)
			
 
				-        with tf.variable_scope("optimizer"):
			
 
				-            opt = tf.train.AdamOptimizer(0.002)
			
 
				-            grads_vars = opt.compute_gradients(crf_loss)
			
 
				-            capped_grads_vars = [[tf.clip_by_value(g,-5,5),v] for g,v in grads_vars]
			
 
				-            train_op = opt.apply_gradients(capped_grads_vars,global_step)
			
 
				-            print('tensor: ',char_input, length, trans, _logits)
			
 
				-            return char_input,_logits,target,length,crf_loss,trans,train_op
			
 
				-
			
 
				-def decode(logits, trans, sequence_lengths, tag_num):
			
 
				-    viterbi_sequences = []
			
 
				-    for logit, length in zip(logits, sequence_lengths):
			
 
				-        score = logit[:length]
			
 
				-        viterbi_seq, viterbi_score = viterbi_decode(score, trans)
			
 
				-        viterbi_sequences.append(viterbi_seq)
			
 
				-    return viterbi_sequences
			
 
				-
			
 
				-class Punish_Extract():
			
 
				-    def __init__(self, model_file = os.path.dirname(__file__)+"/models/21-0.9990081295021194-0.3647936/model.ckpt"):
			
 
				-        print('model_file_path:',model_file)
			
 
				-        self.sess = tf.Session(graph=tf.Graph())
			
 
				-        self.code = ""
			
 
				-        self.punish_dicition = ""
			
 
				-        self.model_file = model_file #预测编号模型
			
 
				-        self.load_model()
			
 
				-
			
 
				-    # 加载处罚编号预测模型
			
 
				-    def load_model(self):
			
 
				-        with self.sess.as_default() as sess:
			
 
				-            with sess.graph.as_default():
			
 
				-                vocab_model = getModel_word()
			
 
				-                vocab, w2v_matrix = getVocabAndMatrix(vocab_model, Embedding_size=60)
			
 
				-                self.char_input, self.logits, self.target, self.length, self.crf_loss, self.trans, self.train_op = BiLSTM_CRF_tfmodel(sess, w2v_matrix)
			
 
				-                sess.run(tf.global_variables_initializer())
			
 
				-                saver = tf.train.Saver()
			
 
				-                saver.restore(sess, self.model_file)
			
 
				-
			
 
				-    # 处罚编号预测
			
 
				-    def predict_punishCode(self,list_sentences):
			
 
				-        re_ner = re.compile("12+?3")
			
 
				-        article_ner_list = []
			
 
				-        count = 0
			
 
				-        with self.sess.as_default():
			
 
				-            with self.sess.graph.as_default():
			
 
				-                for sentences in list_sentences:
			
 
				-                    count += 1
			
 
				-                    # print(count)
			
 
				-                    sentence_len = [len(sentence.sentence_text) for sentence in sentences]
			
 
				-                    maxlen = max(sentence_len)
			
 
				-                    sentences_x = []
			
 
				-                    for sentence in sentences:
			
 
				-                        sentence = sentence.sentence_text
			
 
				-                        sentence = list(sentence)
			
 
				-                        sentence2id = [getIndexOfWord(word) for word in sentence]
			
 
				-                        sentences_x.append(sentence2id)
			
 
				-                    sentences_x = pad_sequences(sentences_x, maxlen=maxlen, padding="post", truncating="post")
			
 
				-                    sentences_x = [np.array(x) for x in sentences_x]
			
 
				-                    print('punish tensor: ',self.logits, self.trans, self.char_input, self.length)
			
 
				-                    _logits, _trans = self.sess.run([self.logits, self.trans],
			
 
				-                                               feed_dict={self.char_input: np.array(sentences_x), self.length: sentence_len})
			
 
				-                    viterbi_sequence = decode(logits=_logits, trans=_trans, sequence_lengths=sentence_len, tag_num=4)
			
 
				-
			
 
				-                    ner_list = []
			
 
				-                    for _seq, sentence in zip(viterbi_sequence, sentences):
			
 
				-                        sentence = sentence.sentence_text
			
 
				-                        seq_id = ''.join([str(s) for s in _seq])
			
 
				-                        if re_ner.search(seq_id):
			
 
				-                            # print("sentence: ",sentence)
			
 
				-                            for _ner in re_ner.finditer(seq_id):
			
 
				-                                start = _ner.start()
			
 
				-                                end = _ner.end()
			
 
				-                                n = sentence[start:end]
			
 
				-                                # print(n,'<==>',start,end)
			
 
				-                                # ner_list.append((n, start, end))
			
 
				-                                ner_list.append(n)  # 改为只返回实体字符
			
 
				-                    # article_ner_list.append(ner_list)
			
 
				-                    article_ner_list.append(';'.join(set(ner_list)))
			
 
				-        return article_ner_list[0]
			
 
				-
			
 
				-    # 处罚类型
			
 
				-    def get_punishType(self, x1, x2):
			
 
				-        '''通过文章标题及内容判断文章类别
			
 
				-        x1: 标题
			
 
				-        x2: 内容
			
 
				-        return 类别'''
			
 
				-        # x1 = x1.replace('(','（').replace(')', '）').replace(' ','')
			
 
				-        # x2 = x2.replace('(', '（').replace(')', '）').replace(' ', '')
			
 
				-        '''标题正则'''
			
 
				-        # 未知公告
			
 
				-        unknow = re.compile('采购方式|采购公告|磋商公告|谈判公告|交易公告$|征集|征求|招标公告|竞标公告|中标公告|'
			
 
				-                            '成交公告|成交信息|流标公告|废标公告|城市管理考评|决算表|决算|预算|资格考试|招聘|选聘'
			
 
				-                            '|聘请|拟录用|无违规违法|无此项信息|暂无工程投标违法|管理办法|指导意见|无投诉|投诉办法'
			
 
				-                            '公共资源交易情况|绩效评价|考试成绩|付息公告|不动产|办证|印发|转发')  #|结果公示 部分是
			
 
				-        # 投诉处理
			
 
				-        tscl = re.compile('投诉不予[处受]理|投诉不成立|终止投诉|投诉终止|不予受理|投诉事?项?的?处理')
			
 
				-        # 行政处罚
			
 
				-        xzcf = re.compile('行政处罚|行政处理|政处罚|行政裁决|防罚|公罚|医罚|环罚|政罚|文罚|局罚|旅罚|财罚|运罚')
			
 
				-        # 监督检查
			
 
				-        jdjc = re.compile('(监督检查的?问?题?(处理|整改|记分|结果|决定|处罚))|监督处罚|调查处理|监督处理')
			
 
				-        # 严重违法
			
 
				-        yzwf = re.compile('严重违法失信|黑名单|失信名单')
			
 
				-        # 不良行为
			
 
				-        blxw = re.compile('((不良|失信|不诚信|差错|不规范|违规|违约|处罚|违法)(行为|记录|信息))|((违约|违规|违法)(处理|操作|情况|问题))'
			
 
				-                          '|通报批评|记分管理|迟到|早退|缺席|虚假材料|弄虚作假|履职不到位|诚信考核扣分|串通投标'
			
 
				-                          '|审核不通过|码一致|地址一致|扣分处理|扣分通知|扣[0-9]+分|责令整改|信用信息认定书$'
			
 
				-                          '|关于.{,30}的处罚|关于.{,10}的?考评通报|关于.{,30}扣分情况|不规范代理行为'
			
 
				-                          '|(取消|暂停|限制).{,50}((专家|评标|评委|投标|竞价|被抽取|中标|供应商|候选人)资格)'
			
 
				-                          '|(代理服?务?机构).{,10}(扣分)|(专家).{,30}(扣分|记分|处罚)|对.{,30}处理|冻结.{,30}账号')
			
 
				-        # 其他不良行为
			
 
				-        other = re.compile('质疑|代理机构进场交易情况|网上投诉办理|信用奖惩|信用奖罚|进场工作.{,5}考核'
			
 
				-                           '|举报处理|结果无效|成交无效|行政复议')
			
 
				-
			
 
				-        '''正文内容正则'''
			
 
				-        # 投诉处理
			
 
				-        tscl_c = re.compile('(投诉(人|单位)[1-9]?(名称)?[:：])|(投诉事项[1-5一二三四五、]*部?分?(成立|予以受理))'
			
 
				-                            '|((驳回|撤回|撤销|终止)[^，。]{,60}(投诉|质疑))')
			
 
				-        # 行政处罚
			
 
				-        xzcf_c = re.compile('((处理依据及结果|处理结果|处罚结果)).*行政处罚|如下行政处罚|行政处罚决定')
			
 
				-        # 诚信加分
			
 
				-        cxjf_c = re.compile('处罚结果.*诚信加分')
			
 
				-        # 严重违法失信
			
 
				-        yzwf_c = re.compile('工商部门严重违法失信起名单|严重违法失信的具体情形') #|严重违法失信的具体情形
			
 
				-        # 不良行为
			
 
				-        blxw_c = re.compile('(取消|暂停|限制).{,30}((专家|评标|评委|投标|采购|竞价|被抽取|中标|供应商)的?资格)'
			
 
				-                            '|(处罚结果|处罚情况).*(扣[1-9]*分|记分|不良行为|不良记录|不良信用|不诚信|扣除信用'
			
 
				-                            '|诚信档案|信用信息|取消.*资格|口头警告|处罚机关|责令改正|罚款|限制投标|暂扣|禁止'
			
 
				-                            '|暂停|封禁|暂无|行政处罚)|处罚结果'
			
 
				-                            '|处罚主题|禁止参与.{,10}政府采购活动|列入不良行为|处罚如下|如下处罚|违规处罚|处罚违规'
			
 
				-                            '|责令改正|责令整改|处罚依据|进行以下处理|处理依据及结果|处理结果|处罚决定书|'
			
 
				-                            '(不规范|不良|不诚信)行为记录')
			
 
				-        # 其他不良行为
			
 
				-        other_c = re.compile('质疑(人|单位)[1-9]?(名称)?：|公告期内受质疑')
			
 
				-
			
 
				-        if re.search(unknow, x1):
			
 
				-            return re.search(unknow, x1).group(0), '未知类别'
			
 
				-        elif re.search(yzwf, x1):
			
 
				-            return re.search(yzwf, x1).group(0), '严重违法'
			
 
				-        elif re.search(yzwf_c, x2):
			
 
				-            return re.search(yzwf_c, x2).group(0), '严重违法'
			
 
				-
			
 
				-        elif re.search(tscl, x1):
			
 
				-            return re.search(tscl, x1).group(0), '投诉处理'
			
 
				-        elif re.search(xzcf, x1):
			
 
				-            return re.search(xzcf, x1).group(0), '行政处罚'
			
 
				-        elif re.search(jdjc, x1):
			
 
				-            return re.search(jdjc, x1).group(0), '监督检查'
			
 
				-        elif re.search(blxw, x1):
			
 
				-            return re.search(blxw, x1).group(0), '不良行为'
			
 
				-        elif re.search(other, x1):
			
 
				-            return re.search(other, x1).group(0), '其他不良行为'
			
 
				-
			
 
				-        elif re.search(tscl_c, x2):
			
 
				-            return re.search(tscl_c, x2).group(0), '投诉处理'
			
 
				-        elif re.search(xzcf_c, x2):
			
 
				-            return re.search(xzcf_c, x2).group(0), '行政处罚'
			
 
				-        elif re.search(cxjf_c, x2):
			
 
				-            return re.search(cxjf_c, x2).group(0), '诚信加分'
			
 
				-
			
 
				-        elif re.search(blxw_c, x2):
			
 
				-            return re.search(blxw_c, x2).group(0), '不良行为'
			
 
				-        elif re.search(other_c, x2):
			
 
				-            return re.search(other_c, x2).group(0), '其他不良行为'
			
 
				-
			
 
				-        return ' ', '未知类别'
			
 
				-
			
 
				-    # 处罚决定
			
 
				-    def get_punishDecision(self, x, x2):
			
 
				-        '''通过正则匹配文章内容中的处理决定
			
 
				-        x:正文内容
			
 
				-        x2: 处罚类别
			
 
				-        return 处理决定字符串'''
			
 
				-        rule1 = re.compile(
			
 
				-            '(((如下|以下|处理|研究|本机关|我机关|本局|我局)决定)|((决定|处理|处理意见|行政处罚|处罚)(如下|如下))'
			
 
				-            '|((以下|如下)(决定|处理|处理意见|行政处罚|处罚))|处理依据及结果|处理结果|处罚结果|处罚情况|限制行为'
			
 
				-            '|整改意见)[:：].{5,}')
			
 
				-        rule2 = re.compile(
			
 
				-            '(((如下|以下|处理|研究|本机关|我机关|本局|我局)决定)|((决定|处理|处罚|处理意见)(如下|如下))'
			
 
				-            '|((以下|如下)(决定|处理|处理意见|处罚))|处理依据及结果|处理结果|处罚结果|处罚情况|限制行为'
			
 
				-            '|处罚内容)[：，,].{10,}')
			
 
				-        rule3 = re.compile('考评结果：?.*')
			
 
				-        rule4 = re.compile('(依据|根据)《.*》.*')
			
 
				-        if x2 == '未知类别':
			
 
				-            return ' '
			
 
				-        elif re.search(rule1, x[-int(len(x)*0.4):]):
			
 
				-            return re.search(rule1, x[-int(len(x)*0.4):]).group(0)
			
 
				-        elif re.search(rule1, x[-int(len(x)*0.6):]):
			
 
				-            return re.search(rule1, x[-int(len(x)*0.6):]).group(0)
			
 
				-        elif re.search(rule2, x[-int(len(x)*0.7):]):
			
 
				-            return re.search(rule2, x[-int(len(x)*0.7):]).group(0)
			
 
				-        elif re.search(rule3, x[-int(len(x)*0.6):]):
			
 
				-            return re.search(rule3, x[-int(len(x)*0.6):]).group(0)
			
 
				-        elif re.search(rule4, x[-int(len(x)*0.4):]):
			
 
				-            return re.search(rule4, x[-int(len(x)*0.4):]).group(0)
			
 
				-        else:
			
 
				-            return ''
			
 
				-
			
 
				-    # 投诉是否成立
			
 
				-    def get_punishWhether(self, x1, x2, x3):
			
 
				-        '''通过正则匹配处理决定判断投诉是否成立
			
 
				-        x1: 处理决定字符串
			
 
				-        x2: 正文内容
			
 
				-        x3: 处罚类别
			
 
				-        return 投诉是否成立'''
			
 
				-        p1 = re.compile('(投诉|投拆|质疑|举报)(事项|内容|事实)?[^不，。]{,10}(成立|属实|予以受理|予以支持)|责令|废标|(中标|成交)[^，。]{,10}无效'
			
 
				-                        '|取消[^，。]{,60}资格|罚款|重新(组织|开展)?(招标|采购)|投诉成立|被投诉人存在违法违规行为'
			
 
				-                        '|采购活动违法|(中标|评标|成交)结果无效')
			
 
				-        p2 = re.compile('投诉不予[处受]理|((投诉|投拆|质疑|举报)(事项|内容|事实)?[^，。]{,10}(不成立|情?况?不属实|不予支持|缺乏事实依据))'
			
 
				-                        '|((驳回|撤回|撤销|终止)[^，。]*(投诉|质疑|诉求))|终止[^，。]{,20}(行政裁决|投诉处理|采购活动)|投诉终止|投诉无效'
			
 
				-                        '|予以驳回|不予受理|继续开展采购|被投诉人不存在违法违规行为|中标结果有效|投诉[^，。]{,10}不成立'
			
 
				-                        '|维持被投诉人|不支持[^，。]{,20}投诉|无确凿证据')
			
 
				-        if x3 != '投诉处理':
			
 
				-            return ''
			
 
				-        elif re.search(p1, x1):
			
 
				-            return '投诉成立'
			
 
				-        elif re.search(p2, x1):
			
 
				-            return '投诉无效'
			
 
				-        elif re.search(p1, x2):
			
 
				-            return '投诉成立'
			
 
				-        elif re.search(p2, x2):
			
 
				-            return '投诉无效'
			
 
				-        return ''
			
 
				-
			
 
				-    # 执法机构、处罚时间
			
 
				-    def get_institution(self, title, sentences_l, entity_l):
			
 
				-        '''
			
 
				-        通过判断实体前信息判断改实体是否为执法机构
			
 
				-        :param title: 文章标题
			
 
				-        :param sentences_l: 单篇公告句子列表
			
 
				-        :param entity_l: 单篇公告实体列表
			
 
				-        :return: 执法机构及处罚时间字符串，多个的用;号隔开
			
 
				-        '''
			
 
				-        institutions = []
			
 
				-        punishTimes = []
			
 
				-        institution_1 = re.compile("(?:处罚执行部门|认定部门|执法机关名称|执法单位|通报部门|处罚机关|处罚部门)[:：]")
			
 
				-        punishTimes_1 = re.compile("(?:处罚日期|限制行为开始时间|曝光开始日期|处罚决定日期|处罚期限|处罚时间|处理日期|公告开始时间)[:：]")
			
 
				-        # 通过实体前面关键词判断是否为执法机构或处罚时间
			
 
				-        for ner in entity_l:
			
 
				-            if ner.entity_type == 'org':
			
 
				-                left = sentences_l[ner.sentence_index].sentence_text[
			
 
				-                       max(0, ner.wordOffset_begin - 15):ner.wordOffset_begin]
			
 
				-                if institution_1.search(left):
			
 
				-                    institutions.append(ner)
			
 
				-                elif institutions != [] and ner.sentence_index == institutions[-1].sentence_index and \
			
 
				-                        ner.wordOffset_begin - institutions[-1].wordOffset_end < 2 and \
			
 
				-                        sentences_l[ner.sentence_index].sentence_text[
			
 
				-                        ner.wordOffset_begin:institutions[-1].wordOffset_end] \
			
 
				-                        in ['', '、', '和', '及']:
			
 
				-                    institutions.append(ner)
			
 
				-            elif ner.entity_type == 'time':
			
 
				-                left = sentences_l[ner.sentence_index].sentence_text[
			
 
				-                       max(0, ner.wordOffset_begin - 15):ner.wordOffset_begin]
			
 
				-                if punishTimes_1.search(left):
			
 
				-                    punishTimes.append(ner)
			
 
				-
			
 
				-        institution_title = re.compile("财政局|财政厅|监督管理局|公管局|公共资源局|委员会")
			
 
				-        institution_time = re.compile(
			
 
				-            "(^，?[\d一二三四五六七八九十]{4}，?[/年-][\d一二三四五六七八九十]{1,2}，?[/月-][\d一二三四五六七八九十]{1,2}，?[/日-]?)")
			
 
				-        ins = ""
			
 
				-        ptime = ""
			
 
				-        # 如果前面步骤找不到处罚机构则在标题找实体，并正则检查是否有关键词
			
 
				-        if institutions == [] and len(title)>10:
			
 
				-            title_ners = getNers([title], useselffool=True)
			
 
				-            if title_ners[0]:
			
 
				-                for title_ner in title_ners[0]:
			
 
				-                    if title_ner[2] == 'org' and institution_title.search(title_ner[3]):
			
 
				-                        ins = title_ner[3]
			
 
				-                        break
			
 
				-        if punishTimes == [] or institutions == []:
			
 
				-            # 如果前面步骤还没找到要素，则通过公司实体后面是否有日期关键词，有则作为处罚机构和处罚时间
			
 
				-            for ner in [ner for ner in entity_l if ner.entity_type == 'org'][-5:][::-1]:
			
 
				-                right = sentences_l[ner.sentence_index].sentence_text[ner.wordOffset_end:ner.wordOffset_end + 16]
			
 
				-                if institution_time.search(right):
			
 
				-                    if ins == '':
			
 
				-                        ins = ner.entity_text
			
 
				-                    if ptime == '':
			
 
				-                        ptime = institution_time.search(right).group(1)
			
 
				-                    break
			
 
				-            # 前面步骤都没找到则判断最后一个时间实体是否在文章末尾，是则作为处罚时间
			
 
				-            if ptime == '':
			
 
				-                n_time = [ner for ner in entity_l if ner.entity_type == 'time']
			
 
				-                if len(n_time) != 0:
			
 
				-                    ner = n_time[-1]
			
 
				-                    if ner.sentence_index == len(sentences_l) - 1:
			
 
				-                        textLong = len(sentences_l[ner.sentence_index].sentence_text)
			
 
				-                        if ner.wordOffset_end > textLong - 3 and len(ner.entity_text) > 3:
			
 
				-                            ptime = ner.entity_text
			
 
				-        institutions = [ner.entity_text for ner in institutions]
			
 
				-        punishTimes = [ner.entity_text for ner in punishTimes]
			
 
				-        if institutions == [] and ins != "":
			
 
				-            institutions.append(ins)
			
 
				-        if punishTimes == [] and ptime != "":
			
 
				-            punishTimes.append(ptime)
			
 
				-        return ";".join(institutions), ";".join(punishTimes)
			
 
				-
			
 
				-    # 投诉人、被投诉人、被处罚人
			
 
				-    def get_complainant(self, punishType, sentences_l, entity_l):
			
 
				-        '''
			
 
				-        通过对公告类别、句子列表、实体列表正则寻找投诉人、被投诉人、处罚人
			
 
				-        :param punishType: 公告处罚类别
			
 
				-        :param sentences_l: 单篇公告句子列表
			
 
				-        :param entity_l: 单篇公告实体列表
			
 
				-        :return: 投诉人、被投诉人
			
 
				-        '''
			
 
				-        complainants = []  # 投诉人
			
 
				-        punishPeople = []  # 被投诉人、被处罚人
			
 
				-        size = 16
			
 
				-        # 投诉人、质疑人
			
 
				-        complainants_rule1 = re.compile(
			
 
				-            "(?:[^被]|^)(?:投[诉拆][人方]|质疑[人方]|质疑供应商|质疑单位|疑问[人方]|检举[人方]|举报[人方])[\d一二三四五六七八九十]?(\(.+?\))?(:?，?名称[\d一二三四五六七八九十]?)?(?:[:：，]+.{0,3}$|$)")
			
 
				-        # 被处罚人，被投诉人
			
 
				-        punishPeople_rule1 = re.compile(
			
 
				-            "(被投[诉拆][人方]|被检举[人方]|被举报[人方]|被处罚人|被处罚单位|行政相对人|单位名称|不良行为单位或个人|被查单位|处罚主题|企业|主体|违规对象|违规单位|当事人)[\d一二三四五六七八九十]?(\(.+?\))?(:?，?名称[\d一二三四五六七八九十]?)?(?:[:：，]+.{0,3}$|$)")
			
 
				-        punishPeople_rule2_1 = re.compile("，$")
			
 
				-        punishPeople_rule2_2 = re.compile("^[:：]")
			
 
				-        punishPeople_rule3_1 = re.compile("(?:关于|对)[^，。]*$")
			
 
				-        punishPeople_rule3_2 = re.compile("^[^，。]*(?:通报|处罚|披露|处理|信用奖惩|不良行为|不良记录)")
			
 
				-
			
 
				-        punish_l = []  # 处罚实体列表
			
 
				-        tmp = []
			
 
				-        for ner in [ner for ner in entity_l if ner.entity_type in ['org', 'company', 'person']]:
			
 
				-            if tmp == []:
			
 
				-                tmp.append(ner)
			
 
				-            elif ner.entity_type == tmp[-1].entity_type and ner.sentence_index == tmp[-1].sentence_index and \
			
 
				-                    ner.wordOffset_begin - tmp[-1].wordOffset_end < 2 \
			
 
				-                    and sentences_l[ner.sentence_index].sentence_text[ner.wordOffset_begin:tmp[-1].wordOffset_end] in [
			
 
				-                '',
			
 
				-                '、',
			
 
				-                '和',
			
 
				-                '及']:
			
 
				-                tmp.append(ner)
			
 
				-            elif ner.entity_type in ['org', 'company'] and tmp[-1].entity_type in ['org', 'company'] and \
			
 
				-                    ner.sentence_index == tmp[-1].sentence_index and ner.wordOffset_begin - tmp[-1].wordOffset_end < 2 \
			
 
				-                    and sentences_l[ner.sentence_index].sentence_text[ner.wordOffset_begin:tmp[-1].wordOffset_end] in [
			
 
				-                '',
			
 
				-                '、',
			
 
				-                '和',
			
 
				-                '及']:
			
 
				-                tmp.append(ner)
			
 
				-            else:
			
 
				-                punish_l.append(tmp)
			
 
				-                tmp = [ner]
			
 
				-        for ner_l in punish_l:
			
 
				-            begin_index = ner_l[0].wordOffset_begin
			
 
				-            end_index = ner_l[-1].wordOffset_end
			
 
				-            left = sentences_l[ner_l[0].sentence_index].sentence_text[max(0, begin_index - size):begin_index]
			
 
				-            right = sentences_l[ner_l[0].sentence_index].sentence_text[end_index:end_index + size]
			
 
				-            if complainants_rule1.search(left):
			
 
				-                complainants.append(ner_l)
			
 
				-            elif punishPeople_rule1.search(left):
			
 
				-                punishPeople.append(ner_l)
			
 
				-            elif punishPeople_rule2_1.search(left) and punishPeople_rule2_2.search(right):
			
 
				-                if punishType == '投诉处理':
			
 
				-                    complainants.append(ner_l)
			
 
				-                else:
			
 
				-                    punishPeople.append(ner_l)
			
 
				-            elif punishPeople_rule3_1.search(left) and punishPeople_rule3_2.search(right):
			
 
				-                punishPeople.append(ner_l)
			
 
				-        complainants = set([it.entity_text for l in complainants for it in l])
			
 
				-        punishPeople = set([it.entity_text for l in punishPeople for it in l])
			
 
				-        return ';'.join(complainants), ';'.join(punishPeople)
			
 
				-
			
 
				-    def get_punish_extracts_backup(self, doc_id=' ', title=' ', text=' '):
			
 
				-        list_articles, list_sentences, list_entitys, _ = Preprocessing.get_preprocessed([[doc_id, text, "", "", ""]],
			
 
				-                                                                                        useselffool=True)
			
 
				-        punish_code = punish.predict_punishCode(list_sentences)
			
 
				-        # print('处罚编号： ',punish_code)
			
 
				-        institutions, punishTimes = punish.get_institution(title, list_sentences[0], list_entitys[0])
			
 
				-        # print('执法机构:',institutions, '\n 处罚时间：', punishTimes)
			
 
				-        keyword, punishType = punish.get_punishType(title, text)
			
 
				-        # print('处罚类型：',punishType)
			
 
				-        punishDecision = punish.get_punishDecision(text, punishType)
			
 
				-        # print('处罚决定：',punishDecision)
			
 
				-        punishWhether= punish.get_punishWhether(punishDecision, text, punishType)
			
 
				-        # print('投诉是否成立：',punishWhether)
			
 
				-        complainants, punishPeople = punish.get_complainant(punishType, list_sentences[0], list_entitys[0])
			
 
				-        # print('投诉人：%s  被投诉人：%s'%(complainants, punishPeople))
			
 
				-        punish_dic = {'punish_code':punish_code,
			
 
				-                      'punishType':punishType,
			
 
				-                      'punishDecision':punishDecision,
			
 
				-                     'complainants':complainants,
			
 
				-                     'punishPeople':punishPeople,
			
 
				-                     'punishWhether':punishWhether,
			
 
				-                     'institutions':institutions,
			
 
				-                     'punishTimes':punishTimes}
			
 
				-        return punish_dic
			
 
				-        # return punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether,institutions, punishTimes
			
 
				-
			
 
				-    def get_punish_extracts(self,list_articles,list_sentences, list_entitys):
			
 
				-        list_result = []
			
 
				-        for article,list_sentence,list_entity in zip(list_articles,list_sentences,list_entitys):
			
 
				-            title = article.title
			
 
				-            text=article.content
			
 
				-
			
 
				-            keyword, punishType = self.get_punishType(title, text)
			
 
				-            # print('处罚类型：',punishType)
			
 
				-            punish_code = self.predict_punishCode(list_sentences)
			
 
				-            # print('处罚编号： ',punish_code)
			
 
				-            institutions, punishTimes = self.get_institution(title, list_sentence, list_entity)
			
 
				-            # print('执法机构:',institutions, '\n 处罚时间：', punishTimes)
			
 
				-            punishDecision = self.get_punishDecision(text, punishType)
			
 
				-            # print('处罚决定：',punishDecision)
			
 
				-            punishWhether= self.get_punishWhether(punishDecision, text, punishType)
			
 
				-            # print('投诉是否成立：',punishWhether)
			
 
				-            complainants, punishPeople = self.get_complainant(punishType, list_sentence, list_entity)
			
 
				-            # print('投诉人：%s  被投诉人：%s'%(complainants, punishPeople))
			
 
				-            punish_dic = {'punish_code':punish_code,
			
 
				-                          'punishType':punishType,
			
 
				-                          'punishDecision':punishDecision,
			
 
				-                         'complainants':complainants,
			
 
				-                         'punishPeople':punishPeople,
			
 
				-                         'punishWhether':punishWhether,
			
 
				-                         'institutions':institutions,
			
 
				-                         'punishTimes':punishTimes}
			
 
				-            _count = 0
			
 
				-            for k,v in punish_dic.items():
			
 
				-                if v!="":
			
 
				-                    _count += 1
			
 
				-            if _count>=2 and punish_dic["punishType"]!="未知类别":
			
 
				-                list_result.append({"punish":punish_dic})
			
 
				-            else:
			
 
				-                list_result.append({"punish":{}})
			
 
				-        return list_result
			
 
				-
			
 
				-def save_punish_code_model():
			
 
				-    model_folder = os.path.dirname(__file__) + "/models/21-0.9990081295021194-0.3647936"
			
 
				-    output_graph = os.path.dirname(__file__) + "/models/punish_code.pb"
			
 
				-    ckpt = tf.train.get_checkpoint_state(model_folder)
			
 
				-    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
			
 
				-        input_checkpoint = ckpt.model_checkpoint_path
			
 
				-        saver = tf.train.import_meta_graph(input_checkpoint+".meta", clear_devices=True)
			
 
				-        graph = tf.get_default_graph()
			
 
				-        input_graph_def = graph.as_graph_def()
			
 
				-        with tf.Session() as sess:
			
 
				-            saver.restore(sess, input_checkpoint)
			
 
				-            output_graph_def = graph_util.convert_variables_to_constants(
			
 
				-                sess = sess,
			
 
				-                input_graph_def = input_graph_def,
			
 
				-                output_node_names=["char_input","length","crf_loss/transitons","CRF/output/logits"]
			
 
				-            )
			
 
				-            with tf.gfile.GFile(output_graph, "wb") as f:
			
 
				-                f.write(output_graph_def.SerializeToString())
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    save_punish_code_model()
			
 
				-    # punish = Punish_Extract(model_file = "models/21-0.9990081295021194-0.3647936/model.ckpt")
			
 
				-    #
			
 
				-    # import pandas as pd
			
 
				-    # # with open('G:/失信数据/ALLDATA_re2-3.xlsx') as f:
			
 
				-    # df = pd.read_excel('G:/失信数据/ALLDATA_re2-3.xlsx', index=0)[2:10]
			
 
				-    # # i = 89
			
 
				-    # # predict('2', df.loc[i, 'PAGE_TITLE'],df.loc[i, 'PAGE_CONTENT'])
			
 
				-    # # i = 92
			
 
				-    # # predict('2', df.loc[i, 'PAGE_TITLE'],df.loc[i, 'PAGE_CONTENT'])
			
 
				-    #
			
 
				-    # # t1 = time.time()
			
 
				-    # # for i in df.index:
			
 
				-    # #     punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether, institutions, punishTimes = \
			
 
				-    # #         get_punish_extracts(i, df.loc[i, 'PAGE_TITLE'], df.loc[i, 'PAGE_CONTENT'])
			
 
				-    # #     df.loc[i, '投诉人'] = complainants
			
 
				-    # #     df.loc[i, '被投诉人'] = punishPeople
			
 
				-    # #     df.loc[i, '执法机构'] = institutions
			
 
				-    # #     df.loc[i, '处罚时间'] = punishTimes
			
 
				-    # #     df.loc[i, '处罚编号'] = punish_code
			
 
				-    # #     print('完成第%d篇'%i)
			
 
				-    # # # df.to_excel('G:/失信数据/ALLDATA_re2-4.xlsx', encoding='utf-8',columns=[['PAGE_TITLE', 'PAGE_CONTENT',
			
 
				-    # # #     '关键词', '类别', '处理决定', '投诉是否成立',
			
 
				-    # # #    'DETAILLINK', 'sentences', 'PAGE_TIME', 'complainant', 'punishPeople',
			
 
				-    # # #    'institution', 'punishTime', 'ner_test']])
			
 
				-    # # t2 = time.time()
			
 
				-    # # # df.to_excel('G:/失信数据/ALLDATA_re2-4.xlsx', encoding='utf-8',columns=['PAGE_TITLE', 'PAGE_CONTENT',
			
 
				-    # # #     '关键词', '类别', '处理决定', '投诉是否成立',
			
 
				-    # # #    'DETAILLINK', 'sentences', 'PAGE_TIME', 'complainant', '投诉人', 'punishPeople', '被投诉人',
			
 
				-    # # #    'institution', '执法机构', 'punishTime', '处罚时间', 'ner_test', '处罚编号'])
			
 
				-    # # df.to_excel('G:/失信数据/ALLDATA_re2-4.xlsx', encoding='utf-8',columns=['PAGE_TITLE', 'PAGE_CONTENT',
			
 
				-    # #     '关键词', '类别', '处理决定', '投诉是否成立', '投诉人', '被投诉人','执法机构', '处罚时间', '处罚编号',
			
 
				-    # #    'DETAILLINK', 'sentences', 'PAGE_TIME'])
			
 
				-    # # t3 = time.time()
			
 
				-    # # print('处理耗时：%.4f, 保存耗时：%.4f'%(t2-t1, t3-t2))
			
 
				-    # s = '编号：厦财企〔2020〕12号，各有关单位：341号。处罚编号：厦财企〔2020〕12号，文章编号：京财采投字(2018)第42号。公告编号：闽建筑招〔2018〕5号。处罚编号：松公管监[2020]2号,'
			
 
				-    # # list_sentences = [s.split('。')]
			
 
				-    # # punish_code= punish.predict_punishCode( list_sentences)
			
 
				-    # # print(punish_code)
			
 
				-    #
			
 
				-    # # punish_code, punishType, punishDecision, complainants, punishPeople, punishWhether, institutions, punishTimes = \
			
 
				-    # #             get_punish_extracts(text=s)
			
 
				-    # punish_dic = punish.get_punish_extracts_backup(text=s)
			
 
				-    # print(punish_dic)
			
--- a/BiddingKG/dl/complaint/test/__init__.py
+++ b/BiddingKG/dl/complaint/test/__init__.py
--- a/BiddingKG/dl/complaint/test/test1.py
+++ b/BiddingKG/dl/complaint/test/test1.py
@@ -1,6 +1,6 @@
 
				 import sys
			
 
				 import os
			
 
				-sys.path.append(os.path.abspath("../.."))
			
 
				+sys.path.append(os.path.abspath("../../.."))
			
 
				 import pandas as pd
			
 
				 import re
			
 
				 from BiddingKG.dl.common.Utils import *
			
--- a/BiddingKG/dl/foolnltk/bi_lstm_crf.py
+++ b/BiddingKG/dl/foolnltk/bi_lstm_crf.py
@@ -3,9 +3,9 @@
 
				 
			
 
				 
			
 
				 import tensorflow as tf
			
 
				-from tensorflow.contrib import rnn
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				+# from tensorflow.contrib import rnn
			
 
				+# from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+# from tensorflow.contrib.layers.python.layers import initializers
			
 
				 import numpy as np
			
 
				 from BiddingKG.dl.common.Utils import viterbi_decode
			
 
				 from zipfile import ZipFile
			
--- a/BiddingKG/dl/foolnltk/retrain.py
+++ b/BiddingKG/dl/foolnltk/retrain.py
@@ -10,7 +10,7 @@ import sys
 
				 sys.path.append(os.path.abspath("../../"))
			
 
				 
			
 
				 import tensorflow as tf
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+# from tensorflow.contrib.crf import crf_log_likelihood
			
 
				 import json
			
 
				 import numpy as np
			
 
				 from BiddingKG.dl.foolnltk.label import labelEntity,readlabeldata,getContext
			
@@ -66,7 +66,7 @@ def getAcc(y_batch,logits,trans,lengths):
 
				         pad = small * np.ones([length, 1])
			
 
				         logit = np.concatenate([score, pad], axis=1)
			
 
				         logit = np.concatenate([start, logit], axis=0)
			
 
				-        path, _ = tf.contrib.crf.viterbi_decode(logit, trans)
			
 
				+        path, _ = viterbi_decode(logit, trans)
			
 
				         preds += path[1:]
			
 
				         index += 1
			
 
				 
			
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -11,8 +11,8 @@ sys.path.append(os.path.abspath("../.."))
 
				 sys.path.append(os.path.abspath(".."))
			
 
				 from BiddingKG.dl.common.Utils import *
			
 
				 from BiddingKG.dl.interface.Entitys import *
			
 
				-from BiddingKG.dl.interface.predictor import *
			
 
				-from BiddingKG.dl.foolnltk import selffool
			
 
				+from BiddingKG.dl.interface.predictor import getPredictor
			
 
				+from BiddingKG.dl.common.nerUtils import *
			
 
				 from BiddingKG.dl.money.moneySource.ruleExtra import extract_moneySource
			
 
				 from BiddingKG.dl.time.re_servicetime import extract_servicetime
			
 
				 from BiddingKG.dl.bidway.re_bidway import extract_bidway
			
@@ -1210,109 +1210,6 @@ def union_ner(list_ner):
 
				         result_list.append((list_ner[item[0]][0],list_ner[item[1]][1],'company',str(list_ner[item[0]][3])+str(list_ner[item[1]][3])))
			
 
				     return result_list
			
 
				                 
			
 
				-                
			
 
				-def getTokensAndNers(sentences,MAXAREA = 10000,useselffool=False):
			
 
				-    '''
			
 
				-    @param: sentences:句子数
			
 
				-    @return 限流执行后的分词和实体识别list
			
 
				-    '''
			
 
				-    def getData(tokens,ners,process_data):
			
 
				-        process_sentences = [item[1] for item in process_data]
			
 
				-        
			
 
				-        token_ = selffool.cut(process_sentences)
			
 
				-        if useselffool:
			
 
				-            ner_ = selffool.self_ner(process_sentences)
			
 
				-        else:
			
 
				-            ner_ = selffool.ner(process_sentences)
			
 
				-        for i in range(len(token_)):
			
 
				-            the_index = process_data[i][0]
			
 
				-            tokens[the_index] = token_[i]
			
 
				-            ners[the_index] = ner_[i]
			
 
				-    sents = []
			
 
				-    for i in range(len(sentences)):
			
 
				-        sents.append([i,sentences[i]])
			
 
				-    sents.sort(key=lambda x:len(x[1]),reverse=True)
			
 
				-    index_ = 0
			
 
				-    tokens = [[]for i in range(len(sentences))]
			
 
				-    ners = [[]for i in range(len(sentences))]
			
 
				-    
			
 
				-    while(True):
			
 
				-        width = len(sents[index_][1])
			
 
				-        height = MAXAREA//width+1
			
 
				-        if height>len(sents)-index_:
			
 
				-            height = len(sents)-index_
			
 
				-        process_data = sents[index_:index_+height]
			
 
				-        getData(tokens, ners, process_data)
			
 
				-        index_ += height
			
 
				-        if index_>=len(sents):
			
 
				-            break
			
 
				-    return tokens,ners
			
 
				-
			
 
				-def getTokens(sentences,MAXAREA = 10000,useselffool=True):
			
 
				-    '''
			
 
				-     @param: sentences:句子数
			
 
				-     @return 限流执行后的分词list
			
 
				-     '''
			
 
				-    def getData(tokens,process_data):
			
 
				-        process_sentences = [item[1] for item in process_data]
			
 
				-
			
 
				-        token_ = selffool.cut(process_sentences)
			
 
				-        for i in range(len(token_)):
			
 
				-            the_index = process_data[i][0]
			
 
				-            tokens[the_index] = token_[i]
			
 
				-    sents = []
			
 
				-    for i in range(len(sentences)):
			
 
				-        sents.append([i,sentences[i]])
			
 
				-    sents.sort(key=lambda x:len(x[1]),reverse=True)
			
 
				-    index_ = 0
			
 
				-    tokens = [[]for i in range(len(sentences))]
			
 
				-
			
 
				-    while(True):
			
 
				-        width = len(sents[index_][1])
			
 
				-        height = MAXAREA//width+1
			
 
				-        if height>len(sents)-index_:
			
 
				-            height = len(sents)-index_
			
 
				-        process_data = sents[index_:index_+height]
			
 
				-        getData(tokens, process_data)
			
 
				-        index_ += height
			
 
				-        if index_>=len(sents):
			
 
				-            break
			
 
				-    return tokens
			
 
				-
			
 
				-def getNers(sentences,MAXAREA = 10000,useselffool=False):
			
 
				-    '''
			
 
				-    @param: sentences:句子数
			
 
				-    @return 限流执行后的实体识别list
			
 
				-    '''
			
 
				-    def getData(ners,process_data):
			
 
				-        process_sentences = [item[1] for item in process_data]
			
 
				-
			
 
				-        if useselffool:
			
 
				-            ner_ = selffool.self_ner(process_sentences)
			
 
				-        else:
			
 
				-            ner_ = selffool.ner(process_sentences)
			
 
				-        for i in range(len(ner_)):
			
 
				-            the_index = process_data[i][0]
			
 
				-            ners[the_index] = ner_[i]
			
 
				-    sents = []
			
 
				-    for i in range(len(sentences)):
			
 
				-        sents.append([i,sentences[i]])
			
 
				-    sents.sort(key=lambda x:len(x[1]),reverse=True)
			
 
				-    index_ = 0
			
 
				-    ners = [[]for i in range(len(sentences))]
			
 
				-
			
 
				-    while(True):
			
 
				-        width = len(sents[index_][1])
			
 
				-        height = MAXAREA//width+1
			
 
				-        if height>len(sents)-index_:
			
 
				-            height = len(sents)-index_
			
 
				-        process_data = sents[index_:index_+height]
			
 
				-        getData( ners, process_data)
			
 
				-        index_ += height
			
 
				-        if index_>=len(sents):
			
 
				-            break
			
 
				-    return ners
			
 
				-    
			
 
				 
			
 
				 # def get_preprocessed(articles,useselffool=False):
			
 
				 #     '''
			
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
@@ -25,14 +25,7 @@ import json
 
				 
			
 
				 
			
 
				 
			
 
				-''''''
			
 
				-codeNamePredict = predictor.CodeNamePredict()
			
 
				-premPredict = predictor.PREMPredict()
			
 
				-epcPredict = predictor.EPCPredict()
			
 
				-roleRulePredict = predictor.RoleRulePredictor()
			
 
				-timePredict = predictor.TimePredictor()
			
 
				-punish = punish_rule.Punish_Extract()
			
 
				-productPredict = predictor.ProductPredictor()
			
 
				+
			
 
				 
			
 
				 #自定义jsonEncoder
			
 
				 class MyEncoder(json.JSONEncoder):
			
@@ -49,21 +42,74 @@ class MyEncoder(json.JSONEncoder):
 
				         return json.JSONEncoder.default(self, obj)
			
 
				 
			
 
				 def predict(doc_id,text,title=""):
			
 
				-    list_articles,list_sentences,list_entitys,_ = Preprocessing.get_preprocessed([[doc_id,text,"","",title]],useselffool=True)
			
 
				-
			
 
				-    codeName = codeNamePredict.predict(list_sentences,list_entitys=list_entitys)
			
 
				 
			
 
				-    premPredict.predict(list_sentences,list_entitys)
			
 
				-    productPredict.predict(list_sentences,list_entitys)
			
 
				-
			
 
				-    roleRulePredict.predict(list_articles,list_sentences, list_entitys,codeName)
			
 
				-    epcPredict.predict(list_sentences,list_entitys)
			
 
				-    timePredict.predict(list_sentences, list_entitys)
			
 
				+    cost_time = dict()
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    log("start process doc %s"%(str(doc_id)))
			
 
				+    list_articles,list_sentences,list_entitys,_cost_time = Preprocessing.get_preprocessed([[doc_id,text,"","",title]],useselffool=True)
			
 
				+    log("get preprocessed done of doc_id%s"%(doc_id))
			
 
				+    cost_time["preprocess"] = time.time()-start_time
			
 
				+    cost_time.update(_cost_time)
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    codeName = predictor.getPredictor("codeName").predict(list_sentences,MAX_AREA=5000,list_entitys=list_entitys)
			
 
				+    log("get codename done of doc_id%s"%(doc_id))
			
 
				+    cost_time["codename"] = time.time()-start_time
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    predictor.getPredictor("prem").predict(list_sentences,list_entitys)
			
 
				+    log("get prem done of doc_id%s"%(doc_id))
			
 
				+    cost_time["prem"] = time.time()-start_time
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    predictor.getPredictor("product").predict(list_sentences,list_entitys)
			
 
				+    log("get product done of doc_id%s"%(doc_id))
			
 
				+    cost_time["product"] = time.time()-start_time
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    predictor.getPredictor("roleRule").predict(list_articles,list_sentences, list_entitys,codeName)
			
 
				+    cost_time["rule"] = time.time()-start_time
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    predictor.getPredictor("epc").predict(list_sentences,list_entitys)
			
 
				+    log("get epc done of doc_id%s"%(doc_id))
			
 
				+    cost_time["person"] = time.time()-start_time
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    predictor.getPredictor("time").predict(list_sentences, list_entitys)
			
 
				+    log("get time done of doc_id%s"%(doc_id))
			
 
				+    cost_time["time"] = time.time()-start_time
			
 
				+
			
 
				+    start_time = time.time()
			
 
				     entityLink.link_entitys(list_entitys)
			
 
				     prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles)
			
 
				-    list_punish_dic = punish.get_punish_extracts(list_articles,list_sentences, list_entitys)
			
 
				-
			
 
				-    return json.dumps(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0],cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False)
			
 
				+    log("get attributes done of doc_id%s"%(doc_id))
			
 
				+    cost_time["attrs"] = time.time()-start_time
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    list_punish_dic = predictor.getPredictor("punish").get_punish_extracts(list_articles,list_sentences, list_entitys)
			
 
				+    cost_time["punish"] = time.time()-start_time
			
 
				+
			
 
				+    #print(prem)
			
 
				+    data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
			
 
				+    data_res["cost_time"] = cost_time
			
 
				+    data_res["success"] = True
			
 
				+
			
 
				+
			
 
				+    # codeName = codeNamePredict.predict(list_sentences,list_entitys=list_entitys)
			
 
				+    #
			
 
				+    # premPredict.predict(list_sentences,list_entitys)
			
 
				+    # productPredict.predict(list_sentences,list_entitys)
			
 
				+    #
			
 
				+    # roleRulePredict.predict(list_articles,list_sentences, list_entitys,codeName)
			
 
				+    # epcPredict.predict(list_sentences,list_entitys)
			
 
				+    # timePredict.predict(list_sentences, list_entitys)
			
 
				+    # entityLink.link_entitys(list_entitys)
			
 
				+    # prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles)
			
 
				+    # list_punish_dic = punish.get_punish_extracts(list_articles,list_sentences, list_entitys)
			
 
				+
			
 
				+    return json.dumps(data_res,cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False)
			
 
				 
			
 
				 
			
 
				 def test(name,content):
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -16,16 +16,19 @@ sys.path.append(os.path.abspath("../.."))
 
				 from BiddingKG.dl.common.Utils import *
			
 
				 from BiddingKG.dl.interface.modelFactory import *
			
 
				 import tensorflow as tf
			
 
				-from tensorflow.python.framework import graph_util
			
 
				 from BiddingKG.dl.product.data_util import decode, process_data
			
 
				 from BiddingKG.dl.interface.Entitys import Entity
			
 
				+from BiddingKG.dl.complaint.punish_predictor import Punish_Extract
			
 
				 
			
 
				 from threading import RLock
			
 
				 dict_predictor = {"codeName":{"predictor":None,"Lock":RLock()},
			
 
				               "prem":{"predictor":None,"Lock":RLock()},
			
 
				               "epc":{"predictor":None,"Lock":RLock()},
			
 
				               "roleRule":{"predictor":None,"Lock":RLock()},
			
 
				-                  "form":{"predictor":None,"Lock":RLock()}}
			
 
				+                  "form":{"predictor":None,"Lock":RLock()},
			
 
				+                  "time":{"predictor":None,"Lock":RLock()},
			
 
				+                  "punish":{"predictor":None,"Lock":RLock()},
			
 
				+                  "product":{"predictor":None,"Lock":RLock()}}
			
 
				 
			
 
				 
			
 
				 def getPredictor(_type):
			
@@ -42,6 +45,12 @@ def getPredictor(_type):
 
				                     dict_predictor[_type]["predictor"] = RoleRulePredictor()
			
 
				                 if _type=="form":
			
 
				                     dict_predictor[_type]["predictor"] = FormPredictor()
			
 
				+                if _type=="time":
			
 
				+                    dict_predictor[_type]["predictor"] = TimePredictor()
			
 
				+                if _type=="punish":
			
 
				+                    dict_predictor[_type]["predictor"] = Punish_Extract()
			
 
				+                if _type=="product":
			
 
				+                    dict_predictor[_type]["predictor"] = ProductPredictor()
			
 
				             return dict_predictor[_type]["predictor"]
			
 
				     raise NameError("no this type of predictor")
			
 
				 
			
@@ -1322,82 +1331,6 @@ def getBiLSTMCRFModel(MAX_LEN,vocab,EMBED_DIM,BiRNN_UNITS,chunk_tags,weights):
 
				     model.compile(optimizer = 'adam', loss = crf.loss_function, metrics = [crf.accuracy])
			
 
				     return model
			
 
				 
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				-def BiLSTM_CRF_tfmodel(sess,embedding_weights):
			
 
				-    '''
			
 
				-    :param embedding_weights: 预训练的字向量矩阵
			
 
				-
			
 
				-    '''
			
 
				-    BiRNN_Unit = 100
			
 
				-    chunk_tags = {
			
 
				-        'O': 0,
			
 
				-        'PN_B': 1,
			
 
				-        'PN_M': 2,
			
 
				-        'PN_E': 3,
			
 
				-        'PC_B': 4,
			
 
				-        'PC_M': 5,
			
 
				-        'PC_E': 6,
			
 
				-    }
			
 
				-
			
 
				-    def embedding_layer(input,keepprob):
			
 
				-        # 加载预训练的字向量矩阵
			
 
				-        embedding = tf.get_variable(name="embedding",initializer=np.array(embedding_weights, dtype=np.float32),dtype=tf.float32)
			
 
				-        embedding = tf.nn.embedding_lookup(params=embedding,ids=input)
			
 
				-        embedding_drop = tf.nn.dropout(embedding,keepprob)
			
 
				-        return embedding_drop
			
 
				-
			
 
				-    def BiLSTM_Layer(input,length):
			
 
				-        with tf.variable_scope("BiLSTM"):
			
 
				-            forward_cell = tf.contrib.rnn.BasicLSTMCell(BiRNN_Unit,state_is_tuple=True)
			
 
				-            backward_cell = tf.contrib.rnn.BasicLSTMCell(BiRNN_Unit,state_is_tuple=True)
			
 
				-        output, _ = tf.nn.bidirectional_dynamic_rnn(forward_cell,backward_cell,input,dtype=tf.float32,sequence_length=length)
			
 
				-        output = tf.concat(output,2)
			
 
				-        return output
			
 
				-
			
 
				-    def CRF_layer(input,num_tags,BiRNN_Unit,time_step,keepprob):
			
 
				-        with tf.variable_scope("CRF"):
			
 
				-            with tf.variable_scope("hidden"):
			
 
				-                w_hidden = tf.get_variable(name='w_hidden',shape=(BiRNN_Unit*2,BiRNN_Unit),dtype=tf.float32,
			
 
				-                                           initializer=initializers.xavier_initializer(),regularizer=tf.contrib.layers.l2_regularizer(0.001))
			
 
				-                b_hidden = tf.get_variable(name='b_hidden',shape=(BiRNN_Unit),dtype=tf.float32,initializer=tf.zeros_initializer())
			
 
				-                # print(input)
			
 
				-                input_reshape = tf.reshape(input,shape=(-1,BiRNN_Unit*2))
			
 
				-                hidden = tf.tanh(tf.nn.xw_plus_b(input_reshape,w_hidden,b_hidden))
			
 
				-                hidden = tf.nn.dropout(hidden,keepprob)
			
 
				-            with tf.variable_scope("output"):
			
 
				-                w_output = tf.get_variable(name='w_output',shape=(BiRNN_Unit,num_tags),dtype=tf.float32,initializer=initializers.xavier_initializer(),regularizer=tf.contrib.layers.l2_regularizer(0.001))
			
 
				-                b_output = tf.get_variable(name='b_output',shape=(num_tags),dtype=tf.float32,initializer=tf.zeros_initializer())
			
 
				-                pred = tf.nn.xw_plus_b(hidden,w_output,b_output)
			
 
				-                logits_ = tf.reshape(pred,shape=(-1,time_step,num_tags),name='logits')
			
 
				-        return logits_
			
 
				-
			
 
				-    def layer_loss(input,true_target,num_tags,length):
			
 
				-        with tf.variable_scope("crf_loss"):
			
 
				-            trans = tf.get_variable(name='transitons',shape=(num_tags,num_tags),dtype=tf.float32,initializer=initializers.xavier_initializer())
			
 
				-            log_likelihood,trans = crf_log_likelihood(inputs=input,tag_indices=true_target,transition_params=trans,sequence_lengths=length)
			
 
				-            return tf.reduce_mean(-log_likelihood),trans
			
 
				-
			
 
				-    with sess.graph.as_default():
			
 
				-        char_input = tf.placeholder(name='char_input',shape=(None,None),dtype=tf.int32)
			
 
				-        target = tf.placeholder(name='target',shape=(None,None),dtype=tf.int32)
			
 
				-        length = tf.placeholder(name='length',shape=(None,),dtype=tf.int32)
			
 
				-        keepprob = tf.placeholder(name='keepprob',dtype=tf.float32)
			
 
				-
			
 
				-        _embedding = embedding_layer(char_input,keepprob)
			
 
				-        _shape = tf.shape(char_input)
			
 
				-        batch_size = _shape[0]
			
 
				-        step_size = _shape[-1]
			
 
				-        bilstm = BiLSTM_Layer(_embedding,length)
			
 
				-        _logits = CRF_layer(bilstm,num_tags=len(chunk_tags),BiRNN_Unit=BiRNN_Unit,time_step=step_size,keepprob=keepprob)
			
 
				-        crf_loss,trans = layer_loss(_logits,true_target=target,num_tags=len(chunk_tags),length=length)
			
 
				-        global_step = tf.Variable(0,trainable=False)
			
 
				-        with tf.variable_scope("optimizer"):
			
 
				-            opt = tf.train.AdamOptimizer(0.002)
			
 
				-            grads_vars = opt.compute_gradients(crf_loss)
			
 
				-            capped_grads_vars = [[tf.clip_by_value(g,-5,5),v] for g,v in grads_vars]
			
 
				-            train_op = opt.apply_gradients(capped_grads_vars,global_step)
			
 
				-            return char_input,_logits,target,keepprob,length,crf_loss,trans,train_op
			
 
				 
			
 
				 import h5py
			
 
				 def h5_to_graph(sess,graph,h5file):
			
--- a/BiddingKG/dl/money/moneySource/ruleExtra.py
+++ b/BiddingKG/dl/money/moneySource/ruleExtra.py
@@ -153,7 +153,7 @@ def extract_moneySource(text):
 
				         else:
			
 
				             wordOffset_begin = result['index'] + (result['start']+result['moneySource']).find(entity_text)
			
 
				             wordOffset_end = wordOffset_begin + len(entity_text)
			
 
				-            print(entity_text,wordOffset_begin,wordOffset_end)
			
 
				+            # print(entity_text,wordOffset_begin,wordOffset_end)
			
 
				             _moneySource = dict()
			
 
				             _moneySource['body'] = entity_text
			
 
				             _moneySource['begin_index'] = wordOffset_begin
			
--- a/BiddingKG/dl/product/data_util.py
+++ b/BiddingKG/dl/product/data_util.py
@@ -6,8 +6,7 @@ import re
 
				 import math
			
 
				 import random
			
 
				 import numpy as np
			
 
				-from tensorflow.contrib.crf import viterbi_decode
			
 
				-from BiddingKG.dl.common.Utils import getVocabAndMatrix,getModel_word
			
 
				+from BiddingKG.dl.common.Utils import getVocabAndMatrix,getModel_word,viterbi_decode
			
 
				 
			
 
				 id_to_tag = {0:'O',1:'B',2:'I',3:'E'}
			
 
				 word_model = getModel_word()
			
--- a/BiddingKG/dl/product/product_model.py
+++ b/BiddingKG/dl/product/product_model.py
@@ -6,9 +6,9 @@
 
				 from BiddingKG.dl.product.data_util import matrix,vocab,input_from_line,result_to_json,get_ner
			
 
				 import tensorflow as tf
			
 
				 import numpy as np
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.crf import viterbi_decode
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				+# from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+# from tensorflow.contrib.crf import viterbi_decode
			
 
				+# from tensorflow.contrib.layers.python.layers import initializers
			
 
				 
			
 
				 # word_model = getModel_word()
			
 
				 class Product_Model(object):
			
--- a/BiddingKG/dl/projectCode/ProjectCodeNameRecognition.py
+++ b/BiddingKG/dl/projectCode/ProjectCodeNameRecognition.py
@@ -74,7 +74,7 @@ def getAcc(y_batch,logits,trans,lengths):
 
				         # logit = np.concatenate([score, pad], axis=1)
			
 
				         # logit = np.concatenate([start, logit], axis=0)
			
 
				         # path, _ = tf.contrib.crf.viterbi_decode(logit, trans)
			
 
				-        path, _ = tf.contrib.crf.viterbi_decode(score, trans)
			
 
				+        path, _ = viterbi_decode(score, trans)
			
 
				         preds += path[0:]
			
 
				         # preds += path[1:]
			
 
				         index += 1
			
--- a/BiddingKG/dl/projectCode/models.py
+++ b/BiddingKG/dl/projectCode/models.py
@@ -668,8 +668,8 @@ def getBiLSTMCRFModel(MAX_LEN,vocab,EMBED_DIM,BiRNN_UNITS,chunk_tags,weights):
 
				     return model
			
 
				 
			
 
				 def getBilstmCRF_tf(sess,MAX_LEN,vocab,EMBED_DIM,BiRNN_UNITS,chunk_tags,weights):
			
 
				-    from tensorflow.contrib.layers.python.layers import initializers
			
 
				-    from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+    # from tensorflow.contrib.layers.python.layers import initializers
			
 
				+    # from tensorflow.contrib.crf import crf_log_likelihood
			
 
				     def layer_embedding(input):
			
 
				         embedding = tf.get_variable("embedding",initializer=np.array(weights,dtype=np.float32) if weights is not None else None,dtype=tf.float32)
			
 
				         return tf.nn.embedding_lookup(params=embedding,ids=input)
			
--- a/BiddingKG/dl/projectCode/projectCodeAndName_tf.py
+++ b/BiddingKG/dl/projectCode/projectCodeAndName_tf.py
@@ -1,6 +1,6 @@
 
				 import tensorflow as tf
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				+# from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+# from tensorflow.contrib.layers.python.layers import initializers
			
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				 import os
			
@@ -432,7 +432,7 @@ def getAcc(y_batch,logits,trans,lengths):
 
				     true_tags = []
			
 
				     for score, length in zip(logits, lengths):
			
 
				         score = score[:length]
			
 
				-        path, _ = tf.contrib.crf.viterbi_decode(score, trans)
			
 
				+        path, _ = viterbi_decode(score, trans)
			
 
				         preds += path[0:]
			
 
				         index += 1
			
 
				 
			
--- a/BiddingKG/dl/selffool/bi_lstm_crf.py
+++ b/BiddingKG/dl/selffool/bi_lstm_crf.py
@@ -3,9 +3,9 @@
 
				 
			
 
				 
			
 
				 import tensorflow as tf
			
 
				-from tensorflow.contrib import rnn
			
 
				-from tensorflow.contrib.crf import crf_log_likelihood
			
 
				-from tensorflow.contrib.layers.python.layers import initializers
			
 
				+# from tensorflow.contrib import rnn
			
 
				+# from tensorflow.contrib.crf import crf_log_likelihood
			
 
				+# from tensorflow.contrib.layers.python.layers import initializers
			
 
				 import numpy as np
			
 
				 from BiddingKG.dl.common.Utils import viterbi_decode
			
 
				 from zipfile import ZipFile
			
--- a/BiddingKG/dl/test/test4.py
+++ b/BiddingKG/dl/test/test4.py
@@ -35,7 +35,7 @@ def test(name,content):
 
				 if __name__=="__main__":
			
 
				     # filename = "比地_52_79929693.html"
			
 
				     #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
			
 
				-    text = codecs.open("C:\\Users\\admin\\Desktop\\新建文本文档 (3).txt","r",encoding="utf8").read()
			
 
				+    text = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
			
 
				     content = str(BeautifulSoup(text).find("div",id="pcontent"))
			
 
				     # df_a = {"html":[]}
			
 
				     # df_a["html"].append(re.sub('\r|\n|\r\n',"",content))
			
--- a/BiddingKG/maxcompute/evaluates.py
+++ b/BiddingKG/maxcompute/evaluates.py
@@ -61,329 +61,6 @@ def init_env(list_files,package_name):
 
				     # sys.path.append(os.path.join(os.path.abspath("local_package"),"interface_real"))
			
 
				 
			
 
				 
			
 
				-
			
 
				-
			
 
				-# UDF主程序
			
 
				-# 由于Series可能在多处调用，所以先在__init__中将其定义为全局类。
			
 
				-@annotate("string->string")
			
 
				-class JiebaCut(object):
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        # zip_01 = include_package_path('testB01.zip')
			
 
				-        # zip_02 = include_package_path('testB02.zip')
			
 
				-        # self.cat_cmd = "cat %s %s > %s"%(zip_01+"/files/*",zip_02+"/files/*","testH.zip")
			
 
				-        # import os
			
 
				-        import sys
			
 
				-        # os.system(self.cat_cmd)
			
 
				-        # self.out = str(os.path.getsize("testH.zip"))
			
 
				-        # # self.out = str(os.listdir(zip_01+"/files/testB01/"))
			
 
				-        # os.system("mkdir jieba_t")
			
 
				-        # os.system("unzip testH.zip -d jieba_t")
			
 
				-        # self.out = str(os.listdir("jieba_t"))
			
 
				-        # sys.path.append(".")
			
 
				-        # # sys.path.append(os.path.dirname(os.path.normpath("jieba_test")))
			
 
				-        # # import jieba_test
			
 
				-
			
 
				-        # from jieba_t import cut
			
 
				-
			
 
				-        include_package_path("jiebaA.zip")
			
 
				-        import jieba
			
 
				-        reload(sys)
			
 
				-        sys.setdefaultencoding('utf-8')
			
 
				-        global jieba
			
 
				-
			
 
				-    def evaluate(self, x):
			
 
				-        import os
			
 
				-        # return self.out
			
 
				-        # return str(os.listdir("jieba_test"))
			
 
				-        # return self.cat_cmd
			
 
				-        return '--'.join(jieba.cut(x))
			
 
				-
			
 
				-
			
 
				-@annotate("string->string")
			
 
				-class Preprocess(BaseUDTF):
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        # init_env(["gensim_package.zip.env"],"local_package1")
			
 
				-        import sys
			
 
				-        import uuid
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = include_package_path("envs_py37.env.zip")
			
 
				-        # self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-
			
 
				-        import BiddingKG.dl.interface.predictor as predictor
			
 
				-        import BiddingKG.dl.interface.Preprocessing as Preprocessing
			
 
				-        import BiddingKG.dl.entityLink.entityLink as entityLink
			
 
				-        import BiddingKG.dl.interface.getAttributes as getAttributes
			
 
				-        global Preprocessing,entityLink,predictor,uuid,getAttributes
			
 
				-        # import gensim
			
 
				-        # include_package_path("numpy.zip")
			
 
				-        # init_env(["tensorflow-1.14.0-cp37-cp37m-manylinux1_x86_64.whl"])
			
 
				-        # so_file = get_cache_file("tensorflow-1.14.0-cp37-cp37m-manylinux1_x86_64.whl")
			
 
				-        # import os
			
 
				-        # self.out = os.path.abspath(so_file.name)
			
 
				-        # import tensorflow
			
 
				-
			
 
				-    def process(self, x):
			
 
				-        k = str(uuid.uuid4())
			
 
				-        list_articles = Preprocessing.get_preprocessed_articles([[k,x,"","_doc_id",""]],useselffool=True)
			
 
				-        self.forward(list_articles[0].toJson())
			
 
				-
			
 
				-        # list_articles,list_sentences,list_entitys,_cost_time = Preprocessing.get_preprocessed([[k,x,"","_doc_id",""]],useselffool=True)
			
 
				-        #
			
 
				-        # codeName = predictor.getPredictor("codeName").predict(list_sentences,MAX_AREA=2000,list_entitys=list_entitys)
			
 
				-        #
			
 
				-        # predictor.getPredictor("prem").predict(list_sentences,list_entitys)
			
 
				-        #
			
 
				-        # predictor.getPredictor("roleRule").predict(list_articles,list_sentences, list_entitys,codeName)
			
 
				-        #
			
 
				-        # predictor.getPredictor("epc").predict(list_sentences,list_entitys)
			
 
				-        #
			
 
				-        # entityLink.link_entitys(list_entitys)
			
 
				-        #
			
 
				-        # prem = getAttributes.getPREMs(list_sentences,list_entitys,list_articles)
			
 
				-        # # return str(self.out)
			
 
				-        # return "1"
			
 
				-        # list_articles,list_sentences,list_entitys,_ = Preprocessing.get_articles_processed([["doc_id",x,"","",""]],useselffool=True)
			
 
				-        # if len(list_articles)==1:
			
 
				-        #     json_article = list_articles[0]
			
 
				-        # self.forward(list_sentences[0][0].sentence_text)
			
 
				-
			
 
				-@annotate("string -> string,string")
			
 
				-class Preprocess_article(BaseUDTF):
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        # self.out = init_env(["BiddingKG.z01","BiddingKG.z02"],"local_package")
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-        import uuid
			
 
				-        import BiddingKG.dl.interface.predictor as predictor
			
 
				-        import BiddingKG.dl.interface.Preprocessing as Preprocessing
			
 
				-        import BiddingKG.dl.entityLink.entityLink as entityLink
			
 
				-        import BiddingKG.dl.interface.getAttributes as getAttributes
			
 
				-        global Preprocessing,entityLink,predictor,uuid,getAttributes
			
 
				-
			
 
				-    def process(self, x):
			
 
				-        if x is not None:
			
 
				-            k = str(uuid.uuid4())
			
 
				-            list_articles = Preprocessing.get_preprocessed_article([[k,x,"","_doc_id",""]])
			
 
				-            self.forward(list_articles[0].id,list_articles[0].toJson())
			
 
				-
			
 
				-@annotate("string->string,string")
			
 
				-class Preprocess_sentences(BaseUDTF):
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        # self.out = init_env(["BiddingKG.z01","BiddingKG.z02"],"local_package")
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-        import BiddingKG.dl.interface.Preprocessing as Preprocessing
			
 
				-        import BiddingKG.dl.interface.Entitys as Entitys
			
 
				-        import json
			
 
				-        global Preprocessing,Entitys,json
			
 
				-
			
 
				-    def process(self,x):
			
 
				-        _article = Entitys.Article.fromJson(x)
			
 
				-        list_sentences = Preprocessing.get_preprocessed_sentences([_article],True)
			
 
				-        list_out = []
			
 
				-        for _sentence in list_sentences[0]:
			
 
				-            list_out.append(_sentence.toJson())
			
 
				-        self.forward(_article.id,json.dumps(list_out))
			
 
				-
			
 
				-@annotate("string->string,string")
			
 
				-class Preprocess_entitys(BaseUDTF):
			
 
				-    def __init__(self):
			
 
				-        # self.out = init_env(["BiddingKG.z01","BiddingKG.z02"],"local_package")
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-        import BiddingKG.dl.interface.Preprocessing as Preprocessing
			
 
				-        import BiddingKG.dl.interface.Entitys as Entitys
			
 
				-        import json
			
 
				-        global Preprocessing,Entitys,json
			
 
				-
			
 
				-    def process(self,x):
			
 
				-        list_sentence = []
			
 
				-        for _x in json.loads(x):
			
 
				-            list_sentence.append(Entitys.Sentences.fromJson(_x))
			
 
				-        list_out = []
			
 
				-        list_entitys = Preprocessing.get_preprocessed_entitys([list_sentence],True)
			
 
				-        for _entity in list_entitys[0]:
			
 
				-            list_out.append(_entity.toJson())
			
 
				-        self.forward(list_sentence[0].doc_id,json.dumps(list_out))
			
 
				-
			
 
				-@annotate("string->string,string")
			
 
				-class Predict_codename(BaseUDTF):
			
 
				-    def __init__(self):
			
 
				-        # self.out = init_env(["BiddingKG.z01","BiddingKG.z02"],"local_package")
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-        import BiddingKG.dl.interface.predictor as predictor
			
 
				-        import BiddingKG.dl.interface.Entitys as Entitys
			
 
				-        import json
			
 
				-        global predictor,Entitys,json
			
 
				-
			
 
				-    def process(self,x):
			
 
				-        list_sentence = []
			
 
				-        for _x in json.loads(x):
			
 
				-            list_sentence.append(Entitys.Sentences.fromJson(_x))
			
 
				-        codename = predictor.getPredictor("codeName").predict([list_sentence],MAX_AREA=2000)
			
 
				-        self.forward(codename[0][0],json.dumps(codename[0]))
			
 
				-
			
 
				-@annotate("string,string->string,string")
			
 
				-class Predict_role(BaseUDTF):
			
 
				-    def __init__(self):
			
 
				-        # self.out = init_env(["BiddingKG.z01","BiddingKG.z02"],"local_package")
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-        import BiddingKG.dl.interface.predictor as predictor
			
 
				-        import BiddingKG.dl.interface.Entitys as Entitys
			
 
				-        import json
			
 
				-        global predictor,Entitys,json
			
 
				-
			
 
				-    def process(self,x,y):
			
 
				-        list_sentence = []
			
 
				-        list_entity = []
			
 
				-        for _x in json.loads(x):
			
 
				-            list_sentence.append(Entitys.Sentences.fromJson(_x))
			
 
				-        for _y in json.loads(y):
			
 
				-            list_entity.append(Entitys.Entity.fromJson(_y))
			
 
				-        predictor.getPredictor("prem").predict_role([list_sentence],[list_entity])
			
 
				-        list_out = []
			
 
				-        for _entity in list_entity:
			
 
				-            if _entity.label is not None:
			
 
				-                list_out.append(_entity.toJson())
			
 
				-        self.forward(list_sentence[0].doc_id,json.dumps(list_out))
			
 
				-
			
 
				-@annotate("string,string->string,string")
			
 
				-class Predict_money(BaseUDTF):
			
 
				-    def __init__(self):
			
 
				-        # self.out = init_env(["BiddingKG.z01","BiddingKG.z02"],"local_package")
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-        import BiddingKG.dl.interface.predictor as predictor
			
 
				-        import BiddingKG.dl.interface.Entitys as Entitys
			
 
				-        import json
			
 
				-        global predictor,Entitys,json
			
 
				-
			
 
				-    def process(self,x,y):
			
 
				-        list_sentence = []
			
 
				-        list_entity = []
			
 
				-        for _x in json.loads(x):
			
 
				-            list_sentence.append(Entitys.Sentences.fromJson(_x))
			
 
				-        for _y in json.loads(y):
			
 
				-            list_entity.append(Entitys.Entity.fromJson(_y))
			
 
				-        predictor.getPredictor("prem").predict_money([list_sentence],[list_entity])
			
 
				-        list_out = []
			
 
				-        for _entity in list_entity:
			
 
				-            if _entity.label is not None:
			
 
				-                list_out.append(_entity.toJson())
			
 
				-        self.forward(list_sentence[0].doc_id,json.dumps(list_out))
			
 
				-
			
 
				-@annotate("string,string->string,string")
			
 
				-class Predict_person(BaseUDTF):
			
 
				-    def __init__(self):
			
 
				-        # self.out = init_env(["BiddingKG.z01","BiddingKG.z02"],"local_package")
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-        import BiddingKG.dl.interface.predictor as predictor
			
 
				-        import BiddingKG.dl.interface.Entitys as Entitys
			
 
				-        import json
			
 
				-        global predictor,Entitys,json
			
 
				-
			
 
				-    def process(self,x,y):
			
 
				-        list_sentence = []
			
 
				-        list_entity = []
			
 
				-        for _x in json.loads(x):
			
 
				-            list_sentence.append(Entitys.Sentences.fromJson(_x))
			
 
				-        for _y in json.loads(y):
			
 
				-            list_entity.append(Entitys.Entity.fromJson(_y))
			
 
				-        predictor.getPredictor("epc").predict_person([list_sentence],[list_entity])
			
 
				-        list_out = []
			
 
				-        for _entity in list_entity:
			
 
				-            if _entity.label is not None:
			
 
				-                list_out.append(_entity.toJson())
			
 
				-        self.forward(list_sentence[0].doc_id,json.dumps(list_out))
			
 
				-
			
 
				-@annotate("string,string,string,string,string,string,string->string,string,string")
			
 
				-class ContentUnion(BaseUDTF):
			
 
				-    def __init__(self):
			
 
				-
			
 
				-        # self.out = init_env(["BiddingKG.z01","BiddingKG.z02"],"local_package")
			
 
				-        self.out = init_env(["BiddingKG.zip.env"],"local_package")
			
 
				-        self.out = init_env(["wiki_128_word_embedding_new.vector.env"],".")
			
 
				-        self.out = init_env(["envs_py37.zip.env"],"local_package")
			
 
				-        self.out = init_env(["so.env"],".")
			
 
				-        import BiddingKG.dl.interface.predictor as predictor
			
 
				-        import BiddingKG.dl.interface.Entitys as Entitys
			
 
				-        import BiddingKG.dl.interface.getAttributes as getAttributes
			
 
				-        import BiddingKG.dl.entityLink.entityLink as entityLink
			
 
				-        import BiddingKG.dl.interface.Preprocessing as Preprocessing
			
 
				-        import json
			
 
				-
			
 
				-
			
 
				-        global predictor,Entitys,getAttributes,entityLink,json,MyEncoder,Preprocessing
			
 
				-        #自定义jsonEncoder
			
 
				-        class MyEncoder(json.JSONEncoder):
			
 
				-
			
 
				-            def __init__(self):
			
 
				-                import numpy as np
			
 
				-                global np
			
 
				-
			
 
				-            def default(self, obj):
			
 
				-                if isinstance(obj, np.ndarray):
			
 
				-                    return obj.tolist()
			
 
				-                elif isinstance(obj, bytes):
			
 
				-                    return str(obj, encoding='utf-8')
			
 
				-                elif isinstance(obj, (np.float_, np.float16, np.float32,
			
 
				-                                      np.float64)):
			
 
				-                    return float(obj)
			
 
				-                return json.JSONEncoder.default(self, obj)
			
 
				-
			
 
				-
			
 
				-
			
 
				-    def process(self,json_article,list_json_sentence,list_json_entity,list_json_entity_role,list_json_entity_money,list_json_entity_person,json_codename):
			
 
				-        dict_entity = {}
			
 
				-        list_sentence = []
			
 
				-        list_entity = []
			
 
				-        _article = Entitys.Article.fromJson(json_article)
			
 
				-        for list_json in [list_json_entity_role,list_json_entity_money,list_json_entity_person]:
			
 
				-            for _json_entity in json.loads(list_json):
			
 
				-                _entity = Entitys.Entity.fromJson(_json_entity)
			
 
				-                _key = "%s-%s-%s"%(str(_entity.doc_id),str(_entity.entity_id),str(_entity.entity_type))
			
 
				-                dict_entity[_key] = _entity
			
 
				-        for _json_sentence in json.loads(list_json_sentence):
			
 
				-            list_sentence.append(Entitys.Sentences.fromJson(_json_sentence))
			
 
				-        for _json_entity in json.loads(list_json_entity):
			
 
				-            _entity = Entitys.Entity.fromJson(_json_entity)
			
 
				-            _key = "%s-%s-%s"%(str(_entity.doc_id),str(_entity.entity_id),str(_entity.entity_type))
			
 
				-            if _key in dict_entity:
			
 
				-                list_entity.append(dict_entity[_key])
			
 
				-            else:
			
 
				-                list_entity.append(_entity)
			
 
				-        codeName = json.loads(json_codename)
			
 
				-        predictor.getPredictor("roleRule").predict([_article],[list_sentence], [list_entity],[codeName])
			
 
				-        entityLink.link_entitys([list_entity])
			
 
				-        prem = getAttributes.getPREMs([list_sentence],[list_entity],[_article])
			
 
				-
			
 
				-        # result = json.dumps(Preprocessing.union_result([codeName], prem)[0][1],cls=MyEncoder,sort_keys=True,indent=4,ensure_ascii=False)
			
 
				-        result = json.dumps(Preprocessing.union_result([codeName], prem)[0][1],ensure_ascii=False)
			
 
				-        self.forward(_article.id,_article.doc_id,result)
			
 
				-
			
 
				 @annotate("string,bigint,string,string->string,bigint,string")
			
 
				 class Extract(BaseUDTF):
			
 
				 
			
@@ -422,7 +99,6 @@ class Extract(BaseUDTF):
 
				         import time
			
 
				 
			
 
				         from BiddingKG.dl.common.Utils import log
			
 
				-        logging.info("time7"+str(datetime.datetime.now().strftime('%y-%m-%d %H:%M:%S')))
			
 
				         import numpy as np