paddleocr.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import sys
  16. __dir__ = os.path.dirname(__file__)
  17. sys.path.append(os.path.dirname(os.path.abspath(__file__)))
  18. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  19. project_path = os.path.abspath(__dir__)
  20. # project_path = ""
  21. import cv2
  22. import numpy as np
  23. from pathlib import Path
  24. import tarfile
  25. import requests
  26. from tqdm import tqdm
  27. os.environ['FLAGS_eager_delete_tensor_gb'] = '0'
  28. from ocr.tools.infer import predict_system
  29. from ocr.ppocr.utils.logging import get_logger
  30. from format_convert.max_compute_config import max_compute
  31. logger = get_logger()
  32. from ocr.ppocr.utils.utility import check_and_read_gif, get_image_file_list
  33. __all__ = ['PaddleOCR']
  34. model_urls = {
  35. 'det':
  36. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
  37. 'rec': {
  38. 'ch': {
  39. 'url':
  40. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
  41. 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
  42. },
  43. 'en': {
  44. 'url':
  45. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
  46. 'dict_path': './ppocr/utils/dict/en_dict.txt'
  47. },
  48. 'french': {
  49. 'url':
  50. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
  51. 'dict_path': './ppocr/utils/dict/french_dict.txt'
  52. },
  53. 'german': {
  54. 'url':
  55. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
  56. 'dict_path': './ppocr/utils/dict/german_dict.txt'
  57. },
  58. 'korean': {
  59. 'url':
  60. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
  61. 'dict_path': './ppocr/utils/dict/korean_dict.txt'
  62. },
  63. 'japan': {
  64. 'url':
  65. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
  66. 'dict_path': './ppocr/utils/dict/japan_dict.txt'
  67. }
  68. },
  69. 'cls':
  70. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
  71. }
  72. SUPPORT_DET_MODEL = ['DB']
  73. VERSION = 2.0
  74. SUPPORT_REC_MODEL = ['CRNN']
  75. # BASE_DIR = os.path.expanduser("~/.paddleocr/")
  76. BASE_DIR = project_path + "/model/"
  77. def download_with_progressbar(url, save_path):
  78. response = requests.get(url, stream=True)
  79. total_size_in_bytes = int(response.headers.get('content-length', 0))
  80. block_size = 1024 # 1 Kibibyte
  81. progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
  82. with open(save_path, 'wb') as file:
  83. for data in response.iter_content(block_size):
  84. progress_bar.update(len(data))
  85. file.write(data)
  86. progress_bar.close()
  87. if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes:
  88. logger.error("Something went wrong while downloading models")
  89. sys.exit(0)
  90. def maybe_download(model_storage_directory, url):
  91. # using custom model
  92. tar_file_name_list = [
  93. 'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel'
  94. ]
  95. if not os.path.exists(
  96. os.path.join(model_storage_directory, 'inference.pdiparams')
  97. ) or not os.path.exists(
  98. os.path.join(model_storage_directory, 'inference.pdmodel')):
  99. tmp_path = os.path.join(model_storage_directory, url.split('/')[-1])
  100. print('download {} to {}'.format(url, tmp_path))
  101. os.makedirs(model_storage_directory, exist_ok=True)
  102. download_with_progressbar(url, tmp_path)
  103. with tarfile.open(tmp_path, 'r') as tarObj:
  104. for member in tarObj.getmembers():
  105. filename = None
  106. for tar_file_name in tar_file_name_list:
  107. if tar_file_name in member.name:
  108. filename = tar_file_name
  109. if filename is None:
  110. continue
  111. file = tarObj.extractfile(member)
  112. with open(
  113. os.path.join(model_storage_directory, filename),
  114. 'wb') as f:
  115. f.write(file.read())
  116. os.remove(tmp_path)
  117. def parse_args(mMain=True, add_help=True):
  118. import argparse
  119. def str2bool(v):
  120. return v.lower() in ("true", "t", "1")
  121. if mMain:
  122. parser = argparse.ArgumentParser(add_help=add_help)
  123. # params for prediction engine
  124. parser.add_argument("--use_gpu", type=str2bool, default=True)
  125. parser.add_argument("--ir_optim", type=str2bool, default=True)
  126. parser.add_argument("--use_tensorrt", type=str2bool, default=False)
  127. parser.add_argument("--gpu_mem", type=int, default=8000)
  128. # params for text detector
  129. parser.add_argument("--image_dir", type=str)
  130. parser.add_argument("--det_algorithm", type=str, default='DB')
  131. parser.add_argument("--det_model_dir", type=str, default=None)
  132. parser.add_argument("--det_limit_side_len", type=float, default=960)
  133. parser.add_argument("--det_limit_type", type=str, default='max')
  134. # DB parmas
  135. parser.add_argument("--det_db_thresh", type=float, default=0.1)
  136. parser.add_argument("--det_db_box_thresh", type=float, default=0.1)
  137. parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
  138. parser.add_argument("--use_dilation", type=bool, default=False)
  139. # EAST parmas
  140. parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
  141. parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
  142. parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
  143. # params for text recognizer
  144. parser.add_argument("--rec_algorithm", type=str, default='CRNN')
  145. parser.add_argument("--rec_model_dir", type=str, default=None)
  146. parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
  147. parser.add_argument("--rec_char_type", type=str, default='ch')
  148. parser.add_argument("--rec_batch_num", type=int, default=30)
  149. parser.add_argument("--max_text_length", type=int, default=25)
  150. parser.add_argument("--rec_char_dict_path", type=str, default=None)
  151. parser.add_argument("--use_space_char", type=bool, default=True)
  152. parser.add_argument("--drop_score", type=float, default=0.5)
  153. # params for text classifier
  154. parser.add_argument("--cls_model_dir", type=str, default=None)
  155. parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
  156. parser.add_argument("--label_list", type=list, default=['0', '180'])
  157. parser.add_argument("--cls_batch_num", type=int, default=30)
  158. parser.add_argument("--cls_thresh", type=float, default=0.9)
  159. parser.add_argument("--enable_mkldnn", type=bool, default=False)
  160. parser.add_argument("--use_zero_copy_run", type=bool, default=False)
  161. parser.add_argument("--use_pdserving", type=str2bool, default=False)
  162. parser.add_argument("--lang", type=str, default='ch')
  163. parser.add_argument("--det", type=str2bool, default=True)
  164. parser.add_argument("--rec", type=str2bool, default=True)
  165. parser.add_argument("--use_angle_cls", type=str2bool, default=False)
  166. return parser.parse_args()
  167. else:
  168. if max_compute:
  169. use_gpu = False
  170. else:
  171. use_gpu = True
  172. return argparse.Namespace(
  173. use_gpu=use_gpu,
  174. ir_optim=True,
  175. use_tensorrt=False,
  176. gpu_mem=8000,
  177. image_dir='',
  178. det_algorithm='DB',
  179. det_model_dir=None,
  180. det_limit_side_len=1280,
  181. det_limit_type='max',
  182. det_db_thresh=0.1,
  183. # det_db_box_thresh 漏行 调小
  184. det_db_box_thresh=0.1,
  185. # det_db_unclip_ratio 检测框的贴近程度
  186. det_db_unclip_ratio=2.5,
  187. # 对文字膨胀操作
  188. use_dilation=False,
  189. det_east_score_thresh=0.8,
  190. det_east_cover_thresh=0.1,
  191. det_east_nms_thresh=0.2,
  192. rec_algorithm='CRNN',
  193. rec_model_dir=None,
  194. rec_image_shape="3, 32, 1000",
  195. rec_char_type='ch',
  196. rec_batch_num=30,
  197. max_text_length=128,
  198. rec_char_dict_path='ocr/ppocr/utils/ppocr_keys_v1.txt',
  199. use_space_char=True,
  200. drop_score=0.5,
  201. cls_model_dir=None,
  202. cls_image_shape="3, 32, 1000",
  203. label_list=['0', '180'],
  204. cls_batch_num=30,
  205. cls_thresh=0.9,
  206. enable_mkldnn=False,
  207. use_zero_copy_run=True,
  208. use_pdserving=False,
  209. lang='ch',
  210. det=True,
  211. rec=True,
  212. use_angle_cls=False)
  213. class PaddleOCR(predict_system.TextSystem):
  214. def __init__(self, **kwargs):
  215. """
  216. paddleocr package
  217. args:
  218. **kwargs: other params show in paddleocr --help
  219. """
  220. postprocess_params = parse_args(mMain=False, add_help=False)
  221. postprocess_params.__dict__.update(**kwargs)
  222. self.use_angle_cls = postprocess_params.use_angle_cls
  223. lang = postprocess_params.lang
  224. assert lang in model_urls[
  225. 'rec'], 'param lang must in {}, but got {}'.format(
  226. model_urls['rec'].keys(), lang)
  227. if postprocess_params.rec_char_dict_path is None:
  228. postprocess_params.rec_char_dict_path = model_urls['rec'][lang][
  229. 'dict_path']
  230. # init model dir
  231. if postprocess_params.det_model_dir is None:
  232. postprocess_params.det_model_dir = os.path.join(
  233. BASE_DIR, '{}/det'.format(VERSION))
  234. if postprocess_params.rec_model_dir is None:
  235. postprocess_params.rec_model_dir = os.path.join(
  236. BASE_DIR, '{}/rec/{}'.format(VERSION, lang))
  237. if postprocess_params.cls_model_dir is None:
  238. postprocess_params.cls_model_dir = os.path.join(
  239. BASE_DIR, '{}/cls'.format(VERSION))
  240. logger.info(postprocess_params)
  241. # download model
  242. maybe_download(postprocess_params.det_model_dir, model_urls['det'])
  243. maybe_download(postprocess_params.rec_model_dir,
  244. model_urls['rec'][lang]['url'])
  245. maybe_download(postprocess_params.cls_model_dir, model_urls['cls'])
  246. if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL:
  247. logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
  248. sys.exit(0)
  249. if postprocess_params.rec_algorithm not in SUPPORT_REC_MODEL:
  250. logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
  251. sys.exit(0)
  252. postprocess_params.rec_char_dict_path = str(
  253. Path(__file__).parent.parent / postprocess_params.rec_char_dict_path)
  254. # init det_model and rec_model
  255. super().__init__(postprocess_params)
  256. def ocr(self, img, det=True, rec=True, cls=False):
  257. """
  258. ocr with paddleocr
  259. args:
  260. img: img for ocr, support ndarray, img_path and list or ndarray
  261. det: use text detection or not, if false, only rec will be exec. default is True
  262. rec: use text recognition or not, if false, only det will be exec. default is True
  263. """
  264. assert isinstance(img, (np.ndarray, list, str))
  265. if isinstance(img, list) and det == True:
  266. logger.error('When input a list of images, det must be false')
  267. exit(0)
  268. self.use_angle_cls = cls
  269. if isinstance(img, str):
  270. # download net image
  271. if img.startswith('http'):
  272. download_with_progressbar(img, 'tmp.jpg')
  273. img = 'tmp.jpg'
  274. image_file = img
  275. img, flag = check_and_read_gif(image_file)
  276. if not flag:
  277. with open(image_file, 'rb') as f:
  278. np_arr = np.frombuffer(f.read(), dtype=np.uint8)
  279. img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
  280. if img is None:
  281. logger.error("error in loading image:{}".format(image_file))
  282. return None
  283. if isinstance(img, np.ndarray) and len(img.shape) == 2:
  284. img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  285. if det and rec:
  286. dt_boxes, rec_res = self.__call__(img)
  287. # print("paddleocr.py dt_boxes", len(dt_boxes))
  288. # print("paddleocr.py rec_res", len(rec_res))
  289. return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
  290. elif det and not rec:
  291. dt_boxes, elapse = self.text_detector(img)
  292. if dt_boxes is None:
  293. return None
  294. return [box.tolist() for box in dt_boxes]
  295. else:
  296. if not isinstance(img, list):
  297. img = [img]
  298. if self.use_angle_cls:
  299. img, cls_res, elapse = self.text_classifier(img)
  300. if not rec:
  301. return cls_res
  302. rec_res, elapse = self.text_recognizer(img)
  303. return rec_res
  304. def main(mMain=True):
  305. # for cmd
  306. args = parse_args(mMain)
  307. # args = parse_args(mMain=True)
  308. # 图片是网络的还是本地路径
  309. image_dir = args.image_dir
  310. if image_dir.startswith('http'):
  311. download_with_progressbar(image_dir, 'tmp.jpg')
  312. image_file_list = ['tmp.jpg']
  313. else:
  314. image_file_list = get_image_file_list(args.image_dir)
  315. if len(image_file_list) == 0:
  316. logger.error('no images find in {}'.format(args.image_dir))
  317. return
  318. ocr_engine = PaddleOCR(**(args.__dict__))
  319. for img_path in image_file_list:
  320. logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
  321. result = ocr_engine.ocr(img_path,
  322. det=args.det,
  323. rec=args.rec,
  324. cls=args.use_angle_cls)
  325. if result is not None:
  326. for line in result:
  327. logger.info(line)
  328. if __name__ == '__main__':
  329. main(False)