import base64 import io import json import multiprocessing as mp import socket import sys import os from PIL import Image sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../") import time import traceback from multiprocessing.context import Process import cv2 import requests import logging import numpy as np os.environ['FLAGS_eager_delete_tensor_gb'] = '0' from format_convert.utils import request_post, test_gpu, get_intranet_ip, log, get_md5_from_bytes, bytes2np from flask import Flask, request from format_convert import _global # 接口配置 app = Flask(__name__) use_angle_cls = False @app.route('/ocr', methods=['POST']) def _ocr(): _global._init() _global.update({"port": globals().get("port")}) start_time = time.time() log("into ocr_interface _ocr") try: if not request.form: log("ocr no data!") return json.dumps({"text": str([-9]), "bbox": str([-9])}) data = request.form.get("data") _md5 = request.form.get("md5") only_rec = request.form.get("only_rec") if only_rec is None: only_rec = 0 else: only_rec = int(only_rec) _global.update({"md5": _md5}) ocr_model = globals().get("global_ocr_model") if ocr_model is None: log("----------- init ocr_model ------------") ocr_model = OcrModels().get_model() globals().update({"global_ocr_model": ocr_model}) text = ocr(data, ocr_model, only_rec) return json.dumps(text) except TimeoutError: return json.dumps({"text": str([-5]), "bbox": str([-5])}) except: traceback.print_exc() return json.dumps({"text": str([-1]), "bbox": str([-1])}) finally: log("ocr interface finish time " + str(time.time()-start_time)) def ocr(data, ocr_model, only_rec=0): log("into ocr_interface ocr") try: img_data = base64.b64decode(data) text = picture2text(img_data, ocr_model, only_rec) return text except TimeoutError: return {"text": str([-5]), "bbox": str([-5])} def picture2text(img_data, ocr_model, only_rec=0): log("into ocr_interface picture2text") try: # 二进制数据流转np.ndarray [np.uint8: 8位像素] img = bytes2np(img_data) # cv2.imwrite('ocr.jpg', img) # 预测 if only_rec: results = ocr_model.ocr(img, det=False, rec=True, cls=use_angle_cls) else: results = ocr_model.ocr(img, det=True, rec=True, cls=use_angle_cls) # 循环每张图片识别结果 text_list = [] bbox_list = [] if only_rec: text_list = [results[0][0]] bbox_list = [] else: for line in results: text_list.append(line[-1][0]) bbox_list.append(line[0]) return {"text": str(text_list), "bbox": str(bbox_list)} except TimeoutError: raise TimeoutError except Exception: log("picture2text error!") traceback.print_exc() return {"text": str([]), "bbox": str([])} def get_best_predict_size(image_np): sizes = [1280, 1152, 1024, 896, 768, 640, 512, 384, 256, 128] min_len = 10000 best_height = sizes[0] for height in sizes: if abs(image_np.shape[0] - height) < min_len: min_len = abs(image_np.shape[0] - height) best_height = height min_len = 10000 best_width = sizes[0] for width in sizes: if abs(image_np.shape[1] - width) < min_len: min_len = abs(image_np.shape[1] - width) best_width = width return best_height, best_width class OcrModels: def __init__(self): from ocr.paddleocr import PaddleOCR try: log('----------- init ocr model ---------------') self.ocr_model = PaddleOCR(use_angle_cls=use_angle_cls, lang="ch") except: print(traceback.print_exc()) raise RuntimeError def get_model(self): return self.ocr_model def test_ocr_model(from_remote=True): file_path = "error8.png" file_path = "C:/Users/Administrator/Downloads/dbf46fe38862ac03209f1b2c12b1adc1.jpg" with open(file_path, "rb") as f: file_bytes = f.read() src = """ data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAASwAAAAeCAYAAACWuCNnAAAE3ElEQVR42u2dQWjUQBSGi4gnEURERKQgIiIiggdP4sWDiPTgvSAK4sFDEe8iIohHEW8iUsSLFBERQaSI9CBIERGRgpQiHtpuMkl2Pa/vH2fW2ZhssttkO8v+AyHTzCT53nT23zdvZpOJIAh2TEhqNpu7VaS+THiWWq3WHuFaQV7279bW1rarWP2y5eRnYv8ZI36l1PF2u71N52P11V7Ap+QaKbwPGo3GPudv8jOx/4wLv5xw2+ajKDrhY4cT5Z23xkKJhfOG5KeSJDlEfib2H/L7Y2ysHotRp62xsp8J4/Bc2ljyM7H/kN+LFEbhGxipjU3C86LYiRy7R34m9h/ye2s0DJZtEsaK0aujFO8ZdX72H/Kz/zup3W5vsfmgGRyJ4/hkug7cSBnn3nHrkZ+J/Yf8/8agkZoXRfyOPKYoaxnnRuqHbMti5EH9N1Q4DE91KXQYHnWNJf+IxJDY/uQfFj8qQzn1tCSglLpfm8v4d2y7omJ1E4bLvWa7WKQRBjGW/JsrVmx/8lfOf/HSlbatZPO4gVTeKhUjZxx6FdOR6XPSKa8s77hc8wIM1flYfcD4VrYW6kOpJf9etre97plxzZ78lX4wc/jlG+WZy9/nNYfGX4tYsf3JXwP/f2KCfd4GY+H6bUSwsrb13+t7jaLOWaMxBkZZEAT7scfisrKChelSaaxvmE6F2ouhd8M4vObyV5ny+PHNAH6U28VxPvLX0IHZ/uSvhb8jIlZQ5GbTUvmyzkdqCSeZ/CrGoQBBsAzHAFRWsNz7ZNWT63+Se10P4uCsLbOBO5ev0FjDb66p+c0Yu4sf5Xn8Aw5/uvjtP63v62wSf2XtwPYnf438mQJjhOsJ9nLya5xkXb8sDynPe8oSpqxyd02G68FJfjHPKytovDnwgzvNL2WfcQ/TGAuVjN8z+E1+MUmSXQP884fKX0Psiu1P/lr482NYcpJUPuMMB/VJ5keKjwYZEmYNPdPlcAVdYXLXavQVwzL8thHT/HV9WMHf+W3UBtaabBZ/laLF9id/1fyd9RO9YlcG5LVZYj/Vy9MZ9LgrZJi+xB7rNoqE0F3/Iar8Aq5pl1IbxXf5G0njcI3xmwU7/Zrmz0q+8feb2P7kHyY/Mk/hHloxgAvnBshc7wqPh8BUY1pgsrynokB7nujBjRSmnWUmAYwhmh8zEln82ugUf82ehebv4xvJK/4BvlHZ/uQfGn/noIlbvbRDMkwpOrGkn0Y9Z/MC4Hkxq6LydF3X4ys71HRmJzQ/HlNh+U15h1/KDwzL4+jH/fWFf1D3ne1P/mHw60pZImIj9WYt1LH0MvpeolM09CsTQC8rWJZfjProuKVTll8HBTP4vfmwk5/85C/PLwUPpcItu/zdDAFnigLcvQRlIx5WmetjOhZKnMWvV9wKv88BafKTn/wb4O9aHBqraayhQMCsKLheVmjq8LDMbMcS8piJ0NO1WBjn8Lvupm+J/OQn/wD8GC+aRz9Muk/8w0mIzPuq0pia1UaTn/zkHx9+85D45bxAmGdGzpOf/OQfY34peI7f+kDpfDQWv0XSY1tRYROQe0V+8pN/TPn5miPyk5/8I8PP1xyRn/zkHxl+viaI/OQnP/mrMpavOSI/+ck/SomvOSI/+ck/cqLF1xyRn/zk9yLxNUfkJz/5i9If9M5atZCy5xcAAAAASUVORK5CYII= """ image_data = src.split('data:image/png;base64,')[1] # 解码 base64 字符串 file_bytes = base64.b64decode(image_data) file_base64 = base64.b64encode(file_bytes) _md5 = get_md5_from_bytes(file_bytes)[0] only_rec = 0 _global._init() _global.update({"port": 15010, "md5": _md5}) if from_remote: file_json = {"data": file_base64, "md5": _md5, 'only_rec': only_rec} # _url = "http://192.168.2.102:17000/ocr" # _url = "http://127.0.0.1:17000/ocr" _url = "http://120.132.118.205:17000/ocr" print(json.loads(request_post(_url, file_json))) else: ocr_model = OcrModels().get_model() result = ocr(file_base64, ocr_model, only_rec=only_rec) text = result.get('text') bbox = result.get('bbox') print('bbox', bbox) print(result) if __name__ == '__main__': test_ocr_model() # src = """ # data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAASwAAAAeCAYAAACWuCNnAAAE3ElEQVR42u2dQWjUQBSGi4gnEURERKQgIiIiggdP4sWDiPTgvSAK4sFDEe8iIohHEW8iUsSLFBERQaSI9CBIERGRgpQiHtpuMkl2Pa/vH2fW2ZhssttkO8v+AyHTzCT53nT23zdvZpOJIAh2TEhqNpu7VaS+THiWWq3WHuFaQV7279bW1rarWP2y5eRnYv8ZI36l1PF2u71N52P11V7Ap+QaKbwPGo3GPudv8jOx/4wLv5xw2+ajKDrhY4cT5Z23xkKJhfOG5KeSJDlEfib2H/L7Y2ysHotRp62xsp8J4/Bc2ljyM7H/kN+LFEbhGxipjU3C86LYiRy7R34m9h/ye2s0DJZtEsaK0aujFO8ZdX72H/Kz/zup3W5vsfmgGRyJ4/hkug7cSBnn3nHrkZ+J/Yf8/8agkZoXRfyOPKYoaxnnRuqHbMti5EH9N1Q4DE91KXQYHnWNJf+IxJDY/uQfFj8qQzn1tCSglLpfm8v4d2y7omJ1E4bLvWa7WKQRBjGW/JsrVmx/8lfOf/HSlbatZPO4gVTeKhUjZxx6FdOR6XPSKa8s77hc8wIM1flYfcD4VrYW6kOpJf9etre97plxzZ78lX4wc/jlG+WZy9/nNYfGX4tYsf3JXwP/f2KCfd4GY+H6bUSwsrb13+t7jaLOWaMxBkZZEAT7scfisrKChelSaaxvmE6F2ouhd8M4vObyV5ny+PHNAH6U28VxPvLX0IHZ/uSvhb8jIlZQ5GbTUvmyzkdqCSeZ/CrGoQBBsAzHAFRWsNz7ZNWT63+Se10P4uCsLbOBO5ev0FjDb66p+c0Yu4sf5Xn8Aw5/uvjtP63v62wSf2XtwPYnf438mQJjhOsJ9nLya5xkXb8sDynPe8oSpqxyd02G68FJfjHPKytovDnwgzvNL2WfcQ/TGAuVjN8z+E1+MUmSXQP884fKX0Psiu1P/lr482NYcpJUPuMMB/VJ5keKjwYZEmYNPdPlcAVdYXLXavQVwzL8thHT/HV9WMHf+W3UBtaabBZ/laLF9id/1fyd9RO9YlcG5LVZYj/Vy9MZ9LgrZJi+xB7rNoqE0F3/Iar8Aq5pl1IbxXf5G0njcI3xmwU7/Zrmz0q+8feb2P7kHyY/Mk/hHloxgAvnBshc7wqPh8BUY1pgsrynokB7nujBjRSmnWUmAYwhmh8zEln82ugUf82ehebv4xvJK/4BvlHZ/uQfGn/noIlbvbRDMkwpOrGkn0Y9Z/MC4Hkxq6LydF3X4ys71HRmJzQ/HlNh+U15h1/KDwzL4+jH/fWFf1D3ne1P/mHw60pZImIj9WYt1LH0MvpeolM09CsTQC8rWJZfjProuKVTll8HBTP4vfmwk5/85C/PLwUPpcItu/zdDAFnigLcvQRlIx5WmetjOhZKnMWvV9wKv88BafKTn/wb4O9aHBqraayhQMCsKLheVmjq8LDMbMcS8piJ0NO1WBjn8Lvupm+J/OQn/wD8GC+aRz9Muk/8w0mIzPuq0pia1UaTn/zkHx9+85D45bxAmGdGzpOf/OQfY34peI7f+kDpfDQWv0XSY1tRYROQe0V+8pN/TPn5miPyk5/8I8PP1xyRn/zkHxl+viaI/OQnP/mrMpavOSI/+ck/SomvOSI/+ck/cqLF1xyRn/zk9yLxNUfkJz/5i9If9M5atZCy5xcAAAAASUVORK5CYII= # """ # # image_data = src.split('data:image/png;base64,')[1] # # # 解码 base64 字符串 # image_bytes = base64.b64decode(image_data) # # # 将字节转换为图像 # # image = Image.open(io.BytesIO(image_bytes)) # # # image.show('img') # # # with open(r'C:\Users\Administrator\Desktop\test_image\error16.jpg', 'rb') as f: # # image_bytes = f.read() # # image = bytes2np(image_bytes) # # cv2.imshow('img', image) # cv2.imwrite('./1.png', image) # cv2.waitKey(0)