my_infer.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import cv2
  2. from PIL import Image
  3. from paddleocr import PaddleOCR
  4. from tools.infer.utility import draw_ocr
  5. import numpy as np
  6. # path = "../temp/complex/710.png"
  7. # path = "../test_files/开标记录表3_page_0.png"
  8. # path = "D:\\Project\\format_conversion\\appendix_test\\temp\\00e959a0bc9011ebaf5a00163e0ae709" + \
  9. # "\\00e95f7cbc9011ebaf5a00163e0ae709_pdf_page0.png"
  10. # path = "../去章文字.jpg"
  11. # path = "../1.jpg"
  12. # path = "../real1.png"
  13. path = "../temp/f1fe9c4ac8e511eb81d700163e0857b6/f1fea1e0c8e511eb81d700163e0857b6.png"
  14. path = "../翻转1.jpg"
  15. # 去掉公章
  16. # image_np = cv2.imread(path)
  17. # cv2.imshow("origin image", image_np)
  18. # cv2.waitKey(0)
  19. # image_np = remove_red_seal(image_np)
  20. # cv2.imwrite("../去章文字.jpg", image_np)
  21. # 去掉下划线
  22. # image_np = cv2.imread(path)
  23. # remove_underline(image_np)
  24. with open(path, "rb") as f:
  25. image = f.read()
  26. ocr_model = PaddleOCR(use_angle_cls=True, lang="ch")
  27. image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
  28. # # 将bgr转为rbg
  29. np_images = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
  30. # np_images = [cv2.imread(img_data)]
  31. results = ocr_model.ocr(np_images, det=True, rec=True, cls=True)
  32. bbox_list = []
  33. text_list = []
  34. score_list = []
  35. for line in results:
  36. text_list.append(line[-1][0])
  37. bbox_list.append(line[0])
  38. score_list.append(line[-1][1])
  39. # print("len(text_list)", len(text_list))
  40. # print("len(bbox_list)", len(bbox_list))
  41. # print("score_list", score_list)
  42. image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
  43. boxes = bbox_list
  44. image = draw_ocr(image, boxes, text_list, score_list, drop_score=0.2)
  45. print(type(image))
  46. image = Image.fromarray(image)
  47. image.show("image")