ocr_interface.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. import base64
  2. import json
  3. import multiprocessing as mp
  4. import sys
  5. import os
  6. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  7. import time
  8. import traceback
  9. from multiprocessing.context import Process
  10. import cv2
  11. import requests
  12. import logging
  13. import numpy as np
  14. os.environ['FLAGS_eager_delete_tensor_gb'] = '0'
  15. from ocr.paddleocr import PaddleOCR
  16. logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  17. logger = logging.getLogger(__name__)
  18. def log(msg):
  19. '''
  20. @summary:打印信息
  21. '''
  22. logger.info(msg)
  23. def ocr(data, ocr_model):
  24. logging.info("into ocr_interface ocr")
  25. try:
  26. img_data = base64.b64decode(data)
  27. text = picture2text(img_data, ocr_model)
  28. return text
  29. except TimeoutError:
  30. raise TimeoutError
  31. flag = 0
  32. def picture2text(img_data, ocr_model):
  33. logging.info("into ocr_interface picture2text")
  34. try:
  35. start_time = time.time()
  36. # 二进制数据流转np.ndarray [np.uint8: 8位像素]
  37. img = cv2.imdecode(np.frombuffer(img_data, np.uint8), cv2.IMREAD_COLOR)
  38. # 将bgr转为rbg
  39. try:
  40. np_images = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
  41. except cv2.error as e:
  42. if "src.empty()" in str(e):
  43. logging.info("ocr_interface picture2text image is empty!")
  44. return {"text": str([]), "bbox": str([])}
  45. # resize
  46. # cv2.imshow("before resize", np_images)
  47. # print("np_images.shape", np_images.shape)
  48. # best_h, best_w = get_best_predict_size(np_images)
  49. # np_images = cv2.resize(np_images, (best_w, best_h), interpolation=cv2.INTER_AREA)
  50. # cv2.imshow("after resize", np_images)
  51. # print("np_images.shape", np_images.shape)
  52. # cv2.waitKey(0)
  53. # 预测
  54. results = ocr_model.ocr(np_images, det=True, rec=True, cls=True)
  55. # 循环每张图片识别结果
  56. text_list = []
  57. bbox_list = []
  58. for line in results:
  59. # print("ocr_interface line", line)
  60. text_list.append(line[-1][0])
  61. bbox_list.append(line[0])
  62. # 查看bbox
  63. # img = np.zeros((np_images.shape[1], np_images.shape[0]), np.uint8)
  64. # img.fill(255)
  65. # for box in bbox_list:
  66. # print(box)
  67. # cv2.rectangle(img, (int(box[0][0]), int(box[0][1])),
  68. # (int(box[2][0]), int(box[2][1])), (0, 0, 255), 1)
  69. # cv2.imshow("bbox", img)
  70. # cv2.waitKey(0)
  71. logging.info("ocr model use time: " + str(time.time()-start_time))
  72. return {"text": str(text_list), "bbox": str(bbox_list)}
  73. except TimeoutError:
  74. raise TimeoutError
  75. except Exception as e:
  76. logging.info("picture2text error!")
  77. print("picture2text", traceback.print_exc())
  78. return {"text": str([]), "bbox": str([])}
  79. def get_best_predict_size(image_np):
  80. sizes = [1280, 1152, 1024, 896, 768, 640, 512, 384, 256, 128]
  81. min_len = 10000
  82. best_height = sizes[0]
  83. for height in sizes:
  84. if abs(image_np.shape[0] - height) < min_len:
  85. min_len = abs(image_np.shape[0] - height)
  86. best_height = height
  87. min_len = 10000
  88. best_width = sizes[0]
  89. for width in sizes:
  90. if abs(image_np.shape[1] - width) < min_len:
  91. min_len = abs(image_np.shape[1] - width)
  92. best_width = width
  93. return best_height, best_width
  94. class OcrModels:
  95. def __init__(self):
  96. try:
  97. self.ocr_model = PaddleOCR(use_angle_cls=True, lang="ch")
  98. except:
  99. print(traceback.print_exc())
  100. raise RuntimeError
  101. def get_model(self):
  102. return self.ocr_model
  103. if __name__ == '__main__':
  104. # if len(sys.argv) == 2:
  105. # port = int(sys.argv[1])
  106. # else:
  107. # port = 15011
  108. #
  109. # app.run(host='0.0.0.0', port=port, threaded=False, debug=False)
  110. # log("OCR running")
  111. file_path = "C:/Users/Administrator/Desktop/error1.png"
  112. # file_path = "1.png"
  113. with open(file_path, "rb") as f:
  114. file_bytes = f.read()
  115. file_base64 = base64.b64encode(file_bytes)
  116. ocr_model = OcrModels().get_model()
  117. result = ocr(file_base64, ocr_model)
  118. result = ocr(file_base64, ocr_model)
  119. text_list = eval(result.get("text"))
  120. box_list = eval(result.get("bbox"))
  121. new_list = []
  122. for i in range(len(text_list)):
  123. new_list.append([text_list[i], box_list[i]])
  124. # print(new_list[0][1])
  125. new_list.sort(key=lambda x: (x[1][1][0], x[1][0][0]))
  126. for t in new_list:
  127. print(t[0])