{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from definition import *\n", "import gc\n", "import codecs" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "label_begin = 400\n", "label_size = 400\n", "# begin_index = -1\n", "source_data = load(source_data_file)[label_begin:label_begin+label_size]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "if os.path.exists(\"index_\"+str(label_begin)+\"_\"+str(label_size)+\".txt\"):\n", " with codecs.open(\"index_\"+str(label_begin)+\"_\"+str(label_size)+\".txt\",\"r\") as f:\n", " begin_index = int(f.read().strip())-1\n", "else:\n", " begin_index = -1" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "entity_data = []\n", "def next_article(b):\n", " global out_code,entity_data,begin_index,source_data,out_name,out_vbox\n", " \n", " re = 0\n", " if out_code is not None:\n", " re = saveData(entity_data,out_code,begin_index,source_data,out_name,out_vbox)\n", " \n", " if re==0:\n", " begin_index += 1\n", " make(begin_index,source_data)\n", " entity_data = getEntitys(begin_index,source_data)\n", " getOutput(entity_data)\n", " \n", " with codecs.open(\"index_\"+str(label_begin)+\"_\"+str(label_size)+\".txt\",\"w\") as f:\n", " f.write(str(begin_index))\n", " f.flush()\n", " \n", " print(\"回收\",gc.collect())\n", " \n", "def last_article(b):\n", " global entity_data,begin_index,entity_data_label,page_index,source_data,vbox,textarea\n", " \n", " if begin_index==-1:\n", " print(\"已经是第一篇\")\n", " return\n", " \n", " begin_index -= 1\n", " make(begin_index,source_data)\n", " entity_data = getEntitys(begin_index,source_data)\n", " getOutput(entity_data)\n", " " ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "bef4d8bd2963427ebffa800901e750b3", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='