import pickle import requests import json from ipywidgets import widgets from IPython.display import display,clear_output import os os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "" def getHbox(entity): check = False if entity[5]=="1" else True return widgets.HBox([widgets.ToggleButton( value=check, description='表述错误', disabled=False, layout=widgets.Layout(width="100px",height="100px"), icon='check' ), widgets.Label(value="表述:",layout=widgets.Layout(width="60px",height="100px")), widgets.Textarea(value=getBS(entity),layout=widgets.Layout(width="170px",height="100px")), widgets.Label(value="前后文:",layout=widgets.Layout(width="100px",height="100px")), widgets.Textarea(value="".join(entity[0]),layout=widgets.Layout(width="170px",height="100px")), widgets.Textarea(value="".join(entity[1]),layout=widgets.Layout(width="170px",height="100px")), widgets.Textarea(value="".join(entity[2]),layout=widgets.Layout(width="170px",height="100px"))]) def save(object_to_save, path): ''' 保存对象 @Arugs: object_to_save: 需要保存的对象 @Return: 保存的路径 ''' with open(path, 'wb') as f: pickle.dump(object_to_save, f) def load(path): ''' 读取对象 @Arugs: path: 读取的路径 @Return: 读取的对象 ''' with open(path, 'rb') as f: object1 = pickle.load(f) return object1 guardian_base = 'http://127.0.0.1:15010' myheaders = {'Content-Type': 'application/json'} source_data_file = "data.pk" import psycopg2 from DBUtils.PooledDB import PooledDB pool = None def getConnection(): global pool if pool is None: pool = PooledDB(psycopg2, 5,5,dbname="article_label", host="192.168.2.101",user="postgres",password="postgres",port="5432") return pool.connection() def make(index_,source_data): user = { "id": source_data[index_][0], "content":source_data[index_][1] } _resp = requests.post(guardian_base + '/article_extract', json=user, headers=myheaders, verify=True) return json.loads(_resp.content.decode("utf-8"))["success"] is True BS_dic = {"org":{"0":"角色-招标人","1":"角色-代理人","2":"角色-中标/第一候选人","3":"角色-第二候选人","4":"角色-第三候选人","5":"角色-无"}, "company":{"0":"角色-招标人","1":"角色-代理人","2":"角色-中标/第一候选人","3":"角色-第二候选人","4":"角色-第三候选人","5":"角色-无"}, "money":{"0":"金额-招标金额","1":"金额-中投标金额","2":"金额-其他金额"}, "person":{"0":"联系人-非目标联系人","1":"联系人-招标联系人","2":"联系人-代理联系人","3":"联系人-联系人"}} def getBS(entity): return BS_dic[entity[3]][entity[4]] def getEntitys(index_,source_data): id = source_data[index_][0] conn = getConnection() cursor = conn.cursor() sql = " select B.tokens,A.entity_text,A.entity_type,A.label,A.handlabel,A.entity_id,A.begin_index,A.end_index,A.values from entity_mention A,sentences B where A.doc_id=B.doc_id and A.sentence_index=B.sentence_index and A.label !='None' "+\ " and B.doc_id='"+id+"' order by A.label,A.entity_type " cursor.execute(sql) rows = cursor.fetchall() data = [] for row in rows: tokens = row[0] entity_text = row[1] entity_type = row[2] label = row[3] handlabel = row[4] entity_id = row[5] begin_index = row[6] end_index = row[7] values = row[8] prob = values[1:-1].split(",")[int(label)] if float(prob)<0.5: continue span = spanWindow(tokens,begin_index,end_index,10) data.append([span[0],span[1],span[2],entity_type,label,handlabel,entity_id]) conn.close() return data def spanWindow(tokens,begin_index,end_index,size): ''' @summary:取得某个实体的上下文词汇 @param: tokens:句子分词list begin_index:实体的开始index end_index:实体的结束index size:左右两边各取多少个词 @return: list,实体的上下文词汇 ''' length_tokens = len(tokens) if begin_index>size: begin = begin_index-size else: begin = 0 if end_index+size0: return rows[0][0],rows[0][1] else: return "","" def saveData(datas,out_code,begin_index,source_data,out_name,out_vbox): if out_code.value=="" and out_name.value=="": print("请标注编号名称") return 1 conn = getConnection() cursor = conn.cursor() sql = " update articles_processed set code='"+out_code.value+"',name='"+out_name.value+"' where id='"+source_data[begin_index][0]+"'" cursor.execute(sql) for i in range(len(datas)): handlabel = "0" if out_vbox.children[i].children[0].value else "1" if handlabel == "0": sql = " update entity_mention set handlabel='"+handlabel+"' where entity_id='"+datas[i][6]+"' and entity_type='"+datas[i][3]+"'" cursor.execute(sql) conn.commit() conn.close() return 0