{ "cells": [ { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "import pickle\n", "import requests\n", "import json\n", "\n", "def save(object_to_save, path):\n", " '''\n", " 保存对象\n", " @Arugs:\n", " object_to_save: 需要保存的对象\n", "\n", " @Return:\n", " 保存的路径\n", " '''\n", " with open(path, 'wb') as f:\n", " pickle.dump(object_to_save, f)\n", "\n", "def load(path):\n", " '''\n", " 读取对象\n", " @Arugs:\n", " path: 读取的路径\n", "\n", " @Return:\n", " 读取的对象\n", " '''\n", " with open(path, 'rb') as f:\n", " object1 = pickle.load(f)\n", " return object1" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "guardian_base = 'http://192.168.2.8:15010'\n", "myheaders = {'Content-Type': 'application/json'}\n", "source_data_file = \"data.pk\"\n", "source_data = load(source_data_file)\n", "\n", "label_begin = 0\n", "label_size = 300\n", "\n", "\n", "save_data_file = \"label/label_\"+str(label_begin)+str(label_size)+\".pk\"\n", "import os\n", "\n", "begin_index = -1\n", "\n", " " ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [], "source": [ "import psycopg2\n", "\n", "from DBUtils.PooledDB import PooledDB\n", "\n", "pool = None\n", "\n", "def getConnection():\n", " global pool\n", " if pool is None:\n", " pool = PooledDB(psycopg2, 10,dbname=\"article_label\", host=\"192.168.2.101\",user=\"postgres\",password=\"postgres\",port=\"5432\")\n", " return pool.connection()\n", "\n", "def make(index_,source_data):\n", " user = {\n", " \"id\": source_data[index_][0],\n", " \"content\":source_data[index_][1]\n", " }\n", " _resp = requests.post(guardian_base + '/article_extract', json=user, headers=myheaders, verify=True)\n", " return json.loads(_resp.content.decode(\"utf-8\"))[\"success\"] is True\n", "\n", "def getEntitys(index_):\n", " global source_data\n", " id = source_data[index_][0]\n", " conn = getConnection()\n", " cursor = conn.cursor()\n", " sql = \" select B.tokens[A.begin_index-10:A.begin_index] as before,A.entity_text,B.tokens[A.end_index:A.end_index+10],A.entity_type,A.label,A.handlabel,A.entity_id from entity_mention A,sentences B where A.doc_id=B.doc_id and A.sentence_index=B.sentence_index and A.label !='None' \"+\\\n", " \" and B.doc_id='\"+id+\"' order by A.label,A.entity_type \"\n", " cursor.execute(sql)\n", " rows = cursor.fetchall()\n", " conn.close()\n", " return rows\n", "\n", "def getCodeName(index_):\n", " global source_data\n", " id = source_data[index_][0]\n", " conn = getConnection()\n", " cursor = conn.cursor()\n", " sql = \" select code,name from articles_processed where id='\"+id+\"' \"\n", " cursor.execute(sql)\n", " rows = cursor.fetchall()\n", " conn.close()\n", " return rows[0][0],rows[0][1]\n", " \n", "entity_data = []\n", "\n", "BS_dic = {\"org\":{\"0\":\"角色-招标人\",\"1\":\"角色-代理人\",\"2\":\"角色-中标/第一候选人\",\"3\":\"角色-第二候选人\",\"4\":\"角色-第三候选人\",\"5\":\"角色-无\"},\n", " \"company\":{\"0\":\"角色-招标人\",\"1\":\"角色-代理人\",\"2\":\"角色-中标/第一候选人\",\"3\":\"角色-第二候选人\",\"4\":\"角色-第三候选人\",\"5\":\"角色-无\"},\n", " \"money\":{\"0\":\"金额-招标金额\",\"1\":\"金额-中投标金额\",\"2\":\"金额-其他金额\"},\n", " \"person\":{\"0\":\"联系人-非目标联系人\",\"1\":\"联系人-招标联系人\",\"2\":\"联系人-代理联系人\",\"3\":\"联系人-联系人\"}}\n", "\n", "def getBS(entity):\n", " return BS_dic[entity[3]][entity[4]]\n", "\n", "\n", " \n", " \n", "def next_article(b):\n", " global out_code,entity_data,begin_index,source_data\n", " \n", " re = 0\n", " if out_code is not None:\n", " re = saveData(entity_data)\n", " \n", " if re==0:\n", " begin_index += 1\n", " make(begin_index,source_data)\n", " entity_data = getEntitys(begin_index)\n", " getOutput(entity_data)\n", " \n", " \n", " \n", "def last_article(b):\n", " global entity_data,begin_index,entity_data_label,page_index,source_data,vbox,textarea\n", " \n", " if begin_index==-1:\n", " print(\"已经是第一篇\")\n", " return\n", " \n", " begin_index -= 1\n", " make(begin_index,source_data)\n", " entity_data = getEntitys(begin_index)\n", " getOutput(entity_data)\n", " " ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-1" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "begin_index" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "scrolled": false }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3d3f6505f2ed4afca36098960c0ebf6d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='