123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- import pickle
- import requests
- import json
- from ipywidgets import widgets
- from IPython.display import display,clear_output
- import os
- os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
- def getHbox(entity):
- check = False if entity[5]=="1" else True
- return widgets.HBox([widgets.ToggleButton(
- value=check,
- description='表述错误',
- disabled=False,
- layout=widgets.Layout(width="100px",height="100px"),
- icon='check'
- ),
- widgets.Label(value="表述:",layout=widgets.Layout(width="60px",height="100px")),
- widgets.Textarea(value=getBS(entity),layout=widgets.Layout(width="170px",height="100px")),
- widgets.Label(value="前后文:",layout=widgets.Layout(width="100px",height="100px")),
- widgets.Textarea(value="".join(entity[0]),layout=widgets.Layout(width="170px",height="100px")),
- widgets.Textarea(value="".join(entity[1]),layout=widgets.Layout(width="170px",height="100px")),
- widgets.Textarea(value="".join(entity[2]),layout=widgets.Layout(width="170px",height="100px"))])
- def save(object_to_save, path):
- '''
- 保存对象
- @Arugs:
- object_to_save: 需要保存的对象
- @Return:
- 保存的路径
- '''
- with open(path, 'wb') as f:
- pickle.dump(object_to_save, f)
- def load(path):
- '''
- 读取对象
- @Arugs:
- path: 读取的路径
- @Return:
- 读取的对象
- '''
- with open(path, 'rb') as f:
- object1 = pickle.load(f)
- return object1
-
- guardian_base = 'http://127.0.0.1:15010'
- myheaders = {'Content-Type': 'application/json'}
- source_data_file = "data.pk"
- import psycopg2
- from DBUtils.PooledDB import PooledDB
- pool = None
- def getConnection():
- global pool
- if pool is None:
- pool = PooledDB(psycopg2, 5,5,dbname="article_label", host="192.168.2.101",user="postgres",password="postgres",port="5432")
- return pool.connection()
- def make(index_,source_data):
- user = {
- "id": source_data[index_][0],
- "content":source_data[index_][1]
- }
- _resp = requests.post(guardian_base + '/article_extract', json=user, headers=myheaders, verify=True)
- return json.loads(_resp.content.decode("utf-8"))["success"] is True
- BS_dic = {"org":{"0":"角色-招标人","1":"角色-代理人","2":"角色-中标/第一候选人","3":"角色-第二候选人","4":"角色-第三候选人","5":"角色-无"},
- "company":{"0":"角色-招标人","1":"角色-代理人","2":"角色-中标/第一候选人","3":"角色-第二候选人","4":"角色-第三候选人","5":"角色-无"},
- "money":{"0":"金额-招标金额","1":"金额-中投标金额","2":"金额-其他金额"},
- "person":{"0":"联系人-非目标联系人","1":"联系人-招标联系人","2":"联系人-代理联系人","3":"联系人-联系人"}}
- def getBS(entity):
- return BS_dic[entity[3]][entity[4]]
- def getEntitys(index_,source_data):
- id = source_data[index_][0]
- conn = getConnection()
- cursor = conn.cursor()
- sql = " select B.tokens,A.entity_text,A.entity_type,A.label,A.handlabel,A.entity_id,A.begin_index,A.end_index,A.values from entity_mention A,sentences B where A.doc_id=B.doc_id and A.sentence_index=B.sentence_index and A.label !='None' "+\
- " and B.doc_id='"+id+"' order by A.label,A.entity_type "
- cursor.execute(sql)
- rows = cursor.fetchall()
- data = []
- for row in rows:
- tokens = row[0]
- entity_text = row[1]
- entity_type = row[2]
- label = row[3]
- handlabel = row[4]
- entity_id = row[5]
- begin_index = row[6]
- end_index = row[7]
- values = row[8]
- prob = values[1:-1].split(",")[int(label)]
- if float(prob)<0.5:
- continue
- span = spanWindow(tokens,begin_index,end_index,10)
- data.append([span[0],span[1],span[2],entity_type,label,handlabel,entity_id])
- conn.close()
- return data
- def spanWindow(tokens,begin_index,end_index,size):
- '''
- @summary:取得某个实体的上下文词汇
- @param:
- tokens:句子分词list
- begin_index:实体的开始index
- end_index:实体的结束index
- size:左右两边各取多少个词
- @return: list,实体的上下文词汇
- '''
- length_tokens = len(tokens)
- if begin_index>size:
- begin = begin_index-size
- else:
- begin = 0
- if end_index+size<length_tokens:
- end = end_index+size+1
- else:
- end = length_tokens
- result = []
- result.append(tokens[begin:begin_index])
- result.append(tokens[begin_index:end_index+1])
- result.append(tokens[end_index+1:end])
- return result
- def getCodeName(index_,source_data):
- id = source_data[index_][0]
- conn = getConnection()
- cursor = conn.cursor()
- sql = " select code,name from articles_processed where id='"+id+"' "
- cursor.execute(sql)
- rows = cursor.fetchall()
- conn.close()
- if len(rows)>0:
- return rows[0][0],rows[0][1]
- else:
- return "",""
- def saveData(datas,out_code,begin_index,source_data,out_name,out_vbox):
- if out_code.value=="" and out_name.value=="":
- print("请标注编号名称")
- return 1
- conn = getConnection()
- cursor = conn.cursor()
- sql = " update articles_processed set code='"+out_code.value+"',name='"+out_name.value+"' where id='"+source_data[begin_index][0]+"'"
- cursor.execute(sql)
-
- for i in range(len(datas)):
- handlabel = "0" if out_vbox.children[i].children[0].value else "1"
- if handlabel == "0":
- sql = " update entity_mention set handlabel='"+handlabel+"' where entity_id='"+datas[i][6]+"' and entity_type='"+datas[i][3]+"'"
- cursor.execute(sql)
- conn.commit()
- conn.close()
- return 0
-
|