{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import requests\n", "import base64\n", "import json" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "24.d1553d0054c592e9a54671bb20c7ff98.2592000.1579349472.282335-15518595\n" ] } ], "source": [ "# 获取token\n", "# host 中的 client_id client_secret 为注册成功后百度给的key\n", "host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=bwnlUhy0DFLVGq72dQGs8Ao8&client_secret=E55Xu9YGMteKFG9AZWnWZGpGUXEriAXL'\n", "headers = {\n", " 'Content-Type': 'application/json;charset=UTF-8',\n", "}\n", "response = requests.get(url=host, headers=headers)\n", "\n", "# print(response.content)\n", "if response:\n", " access_token=response.json()['access_token']\n", " print(access_token)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"log_id\": 6154236719442914002, \"words_result_num\": 1, \"words_result\": [{\"location\": {\"width\": 92, \"top\": 15, \"left\": 7, \"height\": 24}, \"words\": \"847=\"}]}\n" ] } ], "source": [ "# 获取数据\n", "# access_token = '24.5578dd7759f6e662c7beea352e3667fb.2592000.1579258726.282335-15518595'\n", "url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=' + access_token\n", "f = open(r'test.jpg', 'rb')\n", "imgR = base64.b64encode(f.read())\n", "params={'image':imgR}\n", "headers = {\n", " 'Content-Type':'application/x-www-form-urlencoded'\n", "}\n", "response = requests.post(url, params=params, headers=headers)\n", "result = response.content.decode('utf-8')\n", "print(result)\n", "f.close()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "847=\n" ] } ], "source": [ "# 解析json\n", "dic = json.loads(result)\n", "for item in dic['words_result']:\n", " print(item['words'])\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'words_result_num': 1, 'words_result': [{'words': '四仪御', 'location': {'width': 91, 'top': 0, 'left': 8, 'height': 24}}], 'log_id': 1380845182423524947}\n", "['四仪御']\n" ] } ], "source": [ "def baidu_ocr(imgpath):\n", " import requests\n", " import base64\n", " import json\n", "# access_token = '24.5578dd7759f6e662c7beea352e3667fb.2592000.1579258726.282335-15518595'\n", "# url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=' + access_token # 普通版\n", "# url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate_basic?access_token=' + access_token # 高精度版\n", " url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/accurate?access_token=' + access_token # 高精度含位置版\n", " f = open(imgpath, 'rb')\n", " imgR = base64.b64encode(f.read())\n", " params={'image':imgR}\n", " headers = {\n", " 'Content-Type':'application/x-www-form-urlencoded'\n", " }\n", " response = requests.post(url, params=params, headers=headers)\n", " result = response.content.decode('utf-8')\n", " dic = json.loads(result)\n", " print(dic)\n", " words_list = []\n", " for item in dic['words_result']:\n", " words_list.append(item['words'])\n", " return words_list\n", "pic = '../FileInfo1031/057b6bb5-fbce-11e9-9bc7-408d5cd36814_四仪乐印.jpg'\n", "# pic = '../FileInfo1031/0cd3bc65-fbc6-11e9-9bc7-408d5cd36814_5302.jpg'\n", "print(baidu_ocr(pic))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.0" } }, "nbformat": 4, "nbformat_minor": 2 }