{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from definition import *\n", "import gc\n", "import codecs" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "label_begin = 400\n", "label_size = 400\n", "# begin_index = -1\n", "source_data = load(source_data_file)[label_begin:label_begin+label_size]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "if os.path.exists(\"index_\"+str(label_begin)+\"_\"+str(label_size)+\".txt\"):\n", " with codecs.open(\"index_\"+str(label_begin)+\"_\"+str(label_size)+\".txt\",\"r\") as f:\n", " begin_index = int(f.read().strip())-1\n", "else:\n", " begin_index = -1" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "entity_data = []\n", "def next_article(b):\n", " global out_code,entity_data,begin_index,source_data,out_name,out_vbox\n", " \n", " re = 0\n", " if out_code is not None:\n", " re = saveData(entity_data,out_code,begin_index,source_data,out_name,out_vbox)\n", " \n", " if re==0:\n", " begin_index += 1\n", " make(begin_index,source_data)\n", " entity_data = getEntitys(begin_index,source_data)\n", " getOutput(entity_data)\n", " \n", " with codecs.open(\"index_\"+str(label_begin)+\"_\"+str(label_size)+\".txt\",\"w\") as f:\n", " f.write(str(begin_index))\n", " f.flush()\n", " \n", " print(\"回收\",gc.collect())\n", " \n", "def last_article(b):\n", " global entity_data,begin_index,entity_data_label,page_index,source_data,vbox,textarea\n", " \n", " if begin_index==-1:\n", " print(\"已经是第一篇\")\n", " return\n", " \n", " begin_index -= 1\n", " make(begin_index,source_data)\n", " entity_data = getEntitys(begin_index,source_data)\n", " getOutput(entity_data)\n", " " ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bef4d8bd2963427ebffa800901e750b3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='关于温州大学暂停2019—2020年度采购代理机构服务招标项目的通知
\\r\\n
\\r…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2788d403a7724e72a2fa5c8fe928c973", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Text(value='', description='项目编号'), Text(value='温州大学暂停2019—2020年度采购代理机构服务招标项目', description='项目…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "559291a5179f46659849f5a900882601", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HBox(children=(ToggleButton(value=False, description='表述错误', icon='check', layout=Layout(height…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a79c1a3ebad24009bf985a140cdea19d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Button(description='此篇完成', style=ButtonStyle())" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c60fc9b9f9d44d30a57a6fd57bae5b3b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Button(description='返回上篇', style=ButtonStyle())" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "回收 949\n" ] } ], "source": [ "\n", "out_code = None\n", "out_name = None\n", "out_vbox = None\n", "\n", "\n", "def getOutput(data):\n", " global out_code,out_name,out_vbox,entity_data,source_data,begin_index\n", " clear_output()\n", " code,name = getCodeName(begin_index,source_data)\n", " textarea = widgets.HTML(value=source_data[begin_index][1], layout=widgets.Layout(width='1200px', height='400px'))\n", " out_code = widgets.Text(value=code,description=\"项目编号\")\n", " out_name = widgets.Text(value=name,description=\"项目名称\")\n", " hbox = widgets.HBox([out_code,out_name])\n", " out_vbox = widgets.VBox([ getHbox(data[i]) for i in range(len(data)) ])\n", " display(textarea)\n", " display(hbox)\n", " display(out_vbox)\n", " \n", " okText = widgets.Button(description=\"此篇完成\")\n", " okText.on_click(next_article)\n", " display(okText)\n", " backText_2 = widgets.Button(description=\"返回上篇\")\n", " backText_2.on_click(last_article)\n", " display(backText_2)\n", "\n", "\n", "okText = widgets.Button(description=\"开始\")\n", "okText.on_click(next_article)\n", "display(okText)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(source_data[begin_index][0])\n", "begin_index =223" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "比地_52_61344426.html\n" ] }, { "ename": "NameError", "evalue": "name 'data' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msource_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mbegin_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0mbegin_index\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mgetHbox\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'data' is not defined" ] } ], "source": [ "# conn = getConnection()\n", "# cursor = conn.cursor()\n", "# sql = \" select * from articles_processed limit 3\"\n", "# cursor.execute(sql)\n", "# rows = cursor.fetchall()begin_index\n", "# for row in rows:\n", "# print(row)\n", "print(source_data[begin_index][0])\n", "begin_index\n", "getHbox(data[0])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.0" } }, "nbformat": 4, "nbformat_minor": 2 }