paddleocr.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import sys
  16. __dir__ = os.path.dirname(__file__)
  17. sys.path.append(os.path.dirname(os.path.abspath(__file__)))
  18. sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
  19. project_path = os.path.abspath(__dir__)
  20. # project_path = ""
  21. import cv2
  22. import numpy as np
  23. from pathlib import Path
  24. import tarfile
  25. import requests
  26. from tqdm import tqdm
  27. os.environ['FLAGS_eager_delete_tensor_gb'] = '0'
  28. from ocr.tools.infer import predict_system
  29. from ocr.ppocr.utils.logging import get_logger
  30. from config.max_compute_config import MAX_COMPUTE
  31. logger = get_logger()
  32. from ocr.ppocr.utils.utility import check_and_read_gif, get_image_file_list
  33. __all__ = ['PaddleOCR']
  34. model_urls = {
  35. 'det':
  36. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
  37. 'rec': {
  38. 'ch': {
  39. 'url':
  40. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
  41. 'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
  42. },
  43. 'en': {
  44. 'url':
  45. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
  46. 'dict_path': './ppocr/utils/dict/en_dict.txt'
  47. },
  48. 'french': {
  49. 'url':
  50. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
  51. 'dict_path': './ppocr/utils/dict/french_dict.txt'
  52. },
  53. 'german': {
  54. 'url':
  55. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
  56. 'dict_path': './ppocr/utils/dict/german_dict.txt'
  57. },
  58. 'korean': {
  59. 'url':
  60. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
  61. 'dict_path': './ppocr/utils/dict/korean_dict.txt'
  62. },
  63. 'japan': {
  64. 'url':
  65. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
  66. 'dict_path': './ppocr/utils/dict/japan_dict.txt'
  67. }
  68. },
  69. 'cls':
  70. 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
  71. }
  72. SUPPORT_DET_MODEL = ['DB']
  73. VERSION = 2.0
  74. SUPPORT_REC_MODEL = ['CRNN']
  75. # BASE_DIR = os.path.expanduser("~/.paddleocr/")
  76. BASE_DIR = project_path + "/model/"
  77. def download_with_progressbar(url, save_path):
  78. response = requests.get(url, stream=True)
  79. total_size_in_bytes = int(response.headers.get('content-length', 0))
  80. block_size = 1024 # 1 Kibibyte
  81. progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
  82. with open(save_path, 'wb') as file:
  83. for data in response.iter_content(block_size):
  84. progress_bar.update(len(data))
  85. file.write(data)
  86. progress_bar.close()
  87. if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes:
  88. logger.error("Something went wrong while downloading models")
  89. sys.exit(0)
  90. def maybe_download(model_storage_directory, url):
  91. # using custom model
  92. tar_file_name_list = [
  93. 'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel'
  94. ]
  95. if not os.path.exists(
  96. os.path.join(model_storage_directory, 'inference.pdiparams')
  97. ) or not os.path.exists(
  98. os.path.join(model_storage_directory, 'inference.pdmodel')):
  99. tmp_path = os.path.join(model_storage_directory, url.split('/')[-1])
  100. print('download {} to {}'.format(url, tmp_path))
  101. os.makedirs(model_storage_directory, exist_ok=True)
  102. download_with_progressbar(url, tmp_path)
  103. with tarfile.open(tmp_path, 'r') as tarObj:
  104. for member in tarObj.getmembers():
  105. filename = None
  106. for tar_file_name in tar_file_name_list:
  107. if tar_file_name in member.name:
  108. filename = tar_file_name
  109. if filename is None:
  110. continue
  111. file = tarObj.extractfile(member)
  112. with open(
  113. os.path.join(model_storage_directory, filename),
  114. 'wb') as f:
  115. f.write(file.read())
  116. os.remove(tmp_path)
  117. def parse_args(mMain=True, add_help=True):
  118. import argparse
  119. def str2bool(v):
  120. return v.lower() in ("true", "t", "1")
  121. if mMain:
  122. parser = argparse.ArgumentParser(add_help=add_help)
  123. # params for prediction engine
  124. parser.add_argument("--use_gpu", type=str2bool, default=True)
  125. parser.add_argument("--ir_optim", type=str2bool, default=True)
  126. parser.add_argument("--use_tensorrt", type=str2bool, default=False)
  127. parser.add_argument("--gpu_mem", type=int, default=8000)
  128. # params for text detector
  129. parser.add_argument("--image_dir", type=str)
  130. parser.add_argument("--det_algorithm", type=str, default='DB')
  131. parser.add_argument("--det_model_dir", type=str, default=None)
  132. parser.add_argument("--det_limit_side_len", type=float, default=960)
  133. parser.add_argument("--det_limit_type", type=str, default='max')
  134. # DB parmas
  135. parser.add_argument("--det_db_thresh", type=float, default=0.1)
  136. parser.add_argument("--det_db_box_thresh", type=float, default=0.1)
  137. parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
  138. parser.add_argument("--use_dilation", type=bool, default=False)
  139. # EAST parmas
  140. parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
  141. parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
  142. parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
  143. # params for text recognizer
  144. parser.add_argument("--rec_algorithm", type=str, default='CRNN')
  145. parser.add_argument("--rec_model_dir", type=str, default=None)
  146. parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
  147. parser.add_argument("--rec_char_type", type=str, default='ch')
  148. parser.add_argument("--rec_batch_num", type=int, default=30)
  149. parser.add_argument("--max_text_length", type=int, default=25)
  150. parser.add_argument("--rec_char_dict_path", type=str, default=None)
  151. parser.add_argument("--use_space_char", type=bool, default=True)
  152. parser.add_argument("--drop_score", type=float, default=0.5)
  153. # params for text classifier
  154. parser.add_argument("--cls_model_dir", type=str, default=None)
  155. parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
  156. parser.add_argument("--label_list", type=list, default=['0', '180'])
  157. parser.add_argument("--cls_batch_num", type=int, default=30)
  158. parser.add_argument("--cls_thresh", type=float, default=0.9)
  159. parser.add_argument("--enable_mkldnn", type=bool, default=False)
  160. parser.add_argument("--use_zero_copy_run", type=bool, default=False)
  161. parser.add_argument("--use_pdserving", type=str2bool, default=False)
  162. parser.add_argument("--lang", type=str, default='ch')
  163. parser.add_argument("--det", type=str2bool, default=True)
  164. parser.add_argument("--rec", type=str2bool, default=True)
  165. parser.add_argument("--use_angle_cls", type=str2bool, default=False)
  166. return parser.parse_args()
  167. else:
  168. if MAX_COMPUTE:
  169. use_gpu = False
  170. else:
  171. use_gpu = True
  172. # return argparse.Namespace(
  173. # use_gpu=use_gpu,
  174. # ir_optim=True,
  175. # use_tensorrt=False,
  176. # gpu_mem=8000,
  177. # image_dir='',
  178. # det_algorithm='DB',
  179. # det_model_dir=None,
  180. # det_limit_side_len=1280,
  181. # det_limit_type='max',
  182. # det_db_thresh=0.1,
  183. # # det_db_box_thresh 漏行 调小
  184. # det_db_box_thresh=0.1,
  185. # # det_db_unclip_ratio 检测框的贴近程度
  186. # det_db_unclip_ratio=2.5,
  187. # # 对文字膨胀操作
  188. # use_dilation=False,
  189. # det_east_score_thresh=0.8,
  190. # det_east_cover_thresh=0.1,
  191. # det_east_nms_thresh=0.2,
  192. # rec_algorithm='CRNN',
  193. # rec_model_dir=None,
  194. # rec_image_shape="3, 32, 1000",
  195. # rec_char_type='ch',
  196. # rec_batch_num=30,
  197. # max_text_length=128,
  198. # rec_char_dict_path='ocr/ppocr/utils/ppocr_keys_v1.txt',
  199. # use_space_char=True,
  200. # drop_score=0.5,
  201. # cls_model_dir=None,
  202. # cls_image_shape="3, 32, 1000",
  203. # label_list=['0', '180'],
  204. # cls_batch_num=30,
  205. # cls_thresh=0.9,
  206. # enable_mkldnn=False,
  207. # use_zero_copy_run=True,
  208. # use_pdserving=False,
  209. # lang='ch',
  210. # det=True,
  211. # rec=True,
  212. # use_angle_cls=False)
  213. return argparse.Namespace(
  214. use_gpu=use_gpu,
  215. ir_optim=True,
  216. use_tensorrt=False,
  217. gpu_mem=8000,
  218. image_dir='',
  219. det_algorithm='DB',
  220. det_model_dir=None,
  221. det_limit_side_len=1280,
  222. det_limit_type='max',
  223. # det_db_thresh=0.1,
  224. det_db_thresh=0.2,# torch
  225. # det_db_box_thresh 漏行 调小
  226. # det_db_box_thresh=0.1,
  227. det_db_box_thresh=0.5,# torch
  228. # det_db_unclip_ratio 检测框的贴近程度
  229. # det_db_unclip_ratio=2.5,
  230. det_db_unclip_ratio=2.5,# torch
  231. # 对文字膨胀操作
  232. use_dilation=False,
  233. det_east_score_thresh=0.8,
  234. det_east_cover_thresh=0.1,
  235. det_east_nms_thresh=0.2,
  236. rec_algorithm='CRNN',
  237. rec_model_dir=None,
  238. rec_image_shape="3, 32, 1000",
  239. rec_char_type='ch',
  240. rec_batch_num=30,
  241. max_text_length=128,
  242. # rec_char_dict_path='ocr/ppocr/utils/ppocr_keys_v1.txt',
  243. # use_space_char=True,
  244. rec_char_dict_path='ocr/ppocr/utils/char_std_7551.txt',
  245. use_space_char=False,
  246. drop_score=0.5,
  247. cls_model_dir=None,
  248. cls_image_shape="3, 32, 1000",
  249. label_list=['0', '180'],
  250. cls_batch_num=30,
  251. cls_thresh=0.9,
  252. enable_mkldnn=False,
  253. use_zero_copy_run=True,
  254. use_pdserving=False,
  255. lang='ch',
  256. det=True,
  257. rec=True,
  258. use_angle_cls=False)
  259. class PaddleOCR(predict_system.TextSystem):
  260. def __init__(self, **kwargs):
  261. """
  262. paddleocr package
  263. args:
  264. **kwargs: other params show in paddleocr --help
  265. """
  266. postprocess_params = parse_args(mMain=False, add_help=False)
  267. postprocess_params.__dict__.update(**kwargs)
  268. self.use_angle_cls = postprocess_params.use_angle_cls
  269. lang = postprocess_params.lang
  270. assert lang in model_urls[
  271. 'rec'], 'param lang must in {}, but got {}'.format(
  272. model_urls['rec'].keys(), lang)
  273. if postprocess_params.rec_char_dict_path is None:
  274. postprocess_params.rec_char_dict_path = model_urls['rec'][lang][
  275. 'dict_path']
  276. # init model dir
  277. if postprocess_params.det_model_dir is None:
  278. # postprocess_params.det_model_dir = os.path.join(
  279. # BASE_DIR, '{}/det'.format(VERSION))
  280. postprocess_params.det_model_dir = os.path.join(
  281. BASE_DIR, 'pytorch/det/det.pth') # torch
  282. if postprocess_params.rec_model_dir is None:
  283. # postprocess_params.rec_model_dir = os.path.join(
  284. # BASE_DIR, '{}/rec/{}'.format(VERSION, lang))
  285. postprocess_params.rec_model_dir = os.path.join(
  286. BASE_DIR, 'pytorch/rec/rec.pth')# torch
  287. if postprocess_params.cls_model_dir is None:
  288. postprocess_params.cls_model_dir = os.path.join(
  289. BASE_DIR, '{}/cls'.format(VERSION))
  290. logger.info(postprocess_params)
  291. # download model
  292. # maybe_download(postprocess_params.det_model_dir, model_urls['det'])
  293. # maybe_download(postprocess_params.rec_model_dir,
  294. # model_urls['rec'][lang]['url'])
  295. maybe_download(postprocess_params.cls_model_dir, model_urls['cls'])
  296. if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL:
  297. logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
  298. sys.exit(0)
  299. if postprocess_params.rec_algorithm not in SUPPORT_REC_MODEL:
  300. logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
  301. sys.exit(0)
  302. postprocess_params.rec_char_dict_path = str(
  303. Path(__file__).parent.parent / postprocess_params.rec_char_dict_path)
  304. # init det_model and rec_model
  305. super().__init__(postprocess_params)
  306. def ocr(self, img, det=True, rec=True, cls=False):
  307. """
  308. ocr with paddleocr
  309. args:
  310. img: img for ocr, support ndarray, img_path and list or ndarray
  311. det: use text detection or not, if false, only rec will be exec. default is True
  312. rec: use text recognition or not, if false, only det will be exec. default is True
  313. """
  314. assert isinstance(img, (np.ndarray, list, str))
  315. if isinstance(img, list) and det == True:
  316. logger.error('When input a list of images, det must be false')
  317. exit(0)
  318. self.use_angle_cls = cls
  319. if isinstance(img, str):
  320. # download net image
  321. if img.startswith('http'):
  322. download_with_progressbar(img, 'tmp.jpg')
  323. img = 'tmp.jpg'
  324. image_file = img
  325. img, flag = check_and_read_gif(image_file)
  326. if not flag:
  327. with open(image_file, 'rb') as f:
  328. np_arr = np.frombuffer(f.read(), dtype=np.uint8)
  329. img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
  330. if img is None:
  331. logger.error("error in loading image:{}".format(image_file))
  332. return None
  333. if isinstance(img, np.ndarray) and len(img.shape) == 2:
  334. img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  335. if det and rec:
  336. dt_boxes, rec_res = self.__call__(img)
  337. # print("paddleocr.py dt_boxes", len(dt_boxes))
  338. # print("paddleocr.py rec_res", len(rec_res))
  339. return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
  340. elif det and not rec:
  341. dt_boxes, elapse = self.text_detector(img)
  342. if dt_boxes is None:
  343. return None
  344. return [box.tolist() for box in dt_boxes]
  345. else:
  346. if not isinstance(img, list):
  347. img = [img]
  348. if self.use_angle_cls:
  349. img, cls_res, elapse = self.text_classifier(img)
  350. if not rec:
  351. return cls_res
  352. rec_res, elapse = self.text_recognizer(img)
  353. return rec_res
  354. def main(mMain=True):
  355. # for cmd
  356. args = parse_args(mMain)
  357. # args = parse_args(mMain=True)
  358. # 图片是网络的还是本地路径
  359. image_dir = args.image_dir
  360. if image_dir.startswith('http'):
  361. download_with_progressbar(image_dir, 'tmp.jpg')
  362. image_file_list = ['tmp.jpg']
  363. else:
  364. image_file_list = get_image_file_list(args.image_dir)
  365. if len(image_file_list) == 0:
  366. logger.error('no images find in {}'.format(args.image_dir))
  367. return
  368. ocr_engine = PaddleOCR(**(args.__dict__))
  369. for img_path in image_file_list:
  370. logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
  371. result = ocr_engine.ocr(img_path,
  372. det=args.det,
  373. rec=args.rec,
  374. cls=args.use_angle_cls)
  375. if result is not None:
  376. for line in result:
  377. logger.info(line)
  378. if __name__ == '__main__':
  379. main(False)