123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import cv2
- from PIL import Image
- from paddleocr import PaddleOCR
- from tools.infer.utility import draw_ocr
- import numpy as np
- from format_convert.convert import remove_red_seal, remove_underline
- # path = "../temp/complex/710.png"
- # path = "../test_files/开标记录表3_page_0.png"
- # path = "D:\\Project\\format_conversion\\appendix_test\\temp\\00e959a0bc9011ebaf5a00163e0ae709" + \
- # "\\00e95f7cbc9011ebaf5a00163e0ae709_pdf_page0.png"
- # path = "../去章文字.jpg"
- # path = "../1.jpg"
- # path = "../real1.png"
- path = "../temp/f1fe9c4ac8e511eb81d700163e0857b6/f1fea1e0c8e511eb81d700163e0857b6.png"
- path = "../翻转1.jpg"
- # 去掉公章
- # image_np = cv2.imread(path)
- # cv2.imshow("origin image", image_np)
- # cv2.waitKey(0)
- # image_np = remove_red_seal(image_np)
- # cv2.imwrite("../去章文字.jpg", image_np)
- # 去掉下划线
- # image_np = cv2.imread(path)
- # remove_underline(image_np)
- with open(path, "rb") as f:
- image = f.read()
- ocr_model = PaddleOCR(use_angle_cls=True, lang="ch")
- image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR)
- # # 将bgr转为rbg
- np_images = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
- # np_images = [cv2.imread(img_data)]
- results = ocr_model.ocr(np_images, det=True, rec=True, cls=True)
- bbox_list = []
- text_list = []
- score_list = []
- for line in results:
- text_list.append(line[-1][0])
- bbox_list.append(line[0])
- score_list.append(line[-1][1])
- # print("len(text_list)", len(text_list))
- # print("len(bbox_list)", len(bbox_list))
- # print("score_list", score_list)
- image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
- boxes = bbox_list
- image = draw_ocr(image, boxes, text_list, score_list, drop_score=0.2)
- print(type(image))
- image = Image.fromarray(image)
- image.show("image")
|