import cv2 from PIL import Image from paddleocr import PaddleOCR from tools.infer.utility import draw_ocr import numpy as np # path = "../temp/complex/710.png" # path = "../test_files/开标记录表3_page_0.png" # path = "D:\\Project\\format_conversion\\appendix_test\\temp\\00e959a0bc9011ebaf5a00163e0ae709" + \ # "\\00e95f7cbc9011ebaf5a00163e0ae709_pdf_page0.png" # path = "../去章文字.jpg" # path = "../1.jpg" # path = "../real1.png" path = "../temp/f1fe9c4ac8e511eb81d700163e0857b6/f1fea1e0c8e511eb81d700163e0857b6.png" path = "../翻转1.jpg" # 去掉公章 # image_np = cv2.imread(path) # cv2.imshow("origin image", image_np) # cv2.waitKey(0) # image_np = remove_red_seal(image_np) # cv2.imwrite("../去章文字.jpg", image_np) # 去掉下划线 # image_np = cv2.imread(path) # remove_underline(image_np) with open(path, "rb") as f: image = f.read() ocr_model = PaddleOCR(use_angle_cls=True, lang="ch") image = cv2.imdecode(np.frombuffer(image, np.uint8), cv2.IMREAD_COLOR) # # 将bgr转为rbg np_images = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # np_images = [cv2.imread(img_data)] results = ocr_model.ocr(np_images, det=True, rec=True, cls=True) bbox_list = [] text_list = [] score_list = [] for line in results: text_list.append(line[-1][0]) bbox_list.append(line[0]) score_list.append(line[-1][1]) # print("len(text_list)", len(text_list)) # print("len(bbox_list)", len(bbox_list)) # print("score_list", score_list) image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) boxes = bbox_list image = draw_ocr(image, boxes, text_list, score_list, drop_score=0.2) print(type(image)) image = Image.fromarray(image) image.show("image")