convert_need_interface.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. import base64
  2. import logging
  3. import os
  4. import sys
  5. sys.path.append(os.path.dirname(__file__) + "/../")
  6. import traceback
  7. import requests
  8. from format_convert import get_memory_info
  9. from format_convert.utils import get_platform, get_sequential_data, judge_error_code
  10. from ocr.ocr_interface import ocr, OcrModels
  11. from otr.otr_interface import otr, OtrModels
  12. from format_convert.libreoffice_interface import office_convert
  13. def from_office_interface(src_path, dest_path, target_format, retry_times=1):
  14. try:
  15. # Win10跳出超时装饰器
  16. if get_platform() == "Windows":
  17. # origin_office_convert = office_convert.__wrapped__
  18. # file_path = origin_office_convert(src_path, dest_path, target_format, retry_times)
  19. file_path = office_convert(src_path, dest_path, target_format, retry_times)
  20. else:
  21. # 将装饰器包装为一个类,否则多进程Pickle会报错 it's not the same object as xxx 问题,
  22. # timeout_decorator_obj = my_timeout_decorator.TimeoutClass(office_convert, 180, TimeoutError)
  23. # file_path = timeout_decorator_obj.run(src_path, dest_path, target_format, retry_times)
  24. file_path = office_convert(src_path, dest_path, target_format, retry_times)
  25. if judge_error_code(file_path):
  26. return file_path
  27. return file_path
  28. except TimeoutError:
  29. logging.info("from_office_interface timeout error!")
  30. return [-5]
  31. except:
  32. logging.info("from_office_interface error!")
  33. print("from_office_interface", traceback.print_exc())
  34. return [-1]
  35. @get_memory_info.memory_decorator
  36. def from_ocr_interface(image_stream, is_table=False):
  37. logging.info("into from_ocr_interface")
  38. try:
  39. base64_stream = base64.b64encode(image_stream)
  40. # 调用接口
  41. try:
  42. if globals().get("global_ocr_model") is None:
  43. globals().update({"global_ocr_model": OcrModels().get_model()})
  44. print("=========== init ocr model ===========")
  45. r = ocr(data=base64_stream, ocr_model=globals().get("global_ocr_model"))
  46. except TimeoutError:
  47. if is_table:
  48. return [-5], [-5]
  49. else:
  50. return [-5]
  51. except requests.exceptions.ConnectionError as e:
  52. if is_table:
  53. return [-2], [-2]
  54. else:
  55. return [-2]
  56. _dict = r
  57. text_list = eval(_dict.get("text"))
  58. bbox_list = eval(_dict.get("bbox"))
  59. if text_list is None:
  60. text_list = []
  61. if bbox_list is None:
  62. bbox_list = []
  63. if is_table:
  64. return text_list, bbox_list
  65. else:
  66. if text_list and bbox_list:
  67. text = get_sequential_data(text_list, bbox_list, html=True)
  68. if judge_error_code(text):
  69. return text
  70. # if text == [-1]:
  71. # return [-1]
  72. else:
  73. text = ""
  74. return text
  75. except Exception as e:
  76. logging.info("from_ocr_interface error!")
  77. # print("from_ocr_interface", e, global_type)
  78. if is_table:
  79. return [-1], [-1]
  80. else:
  81. return [-1]
  82. @get_memory_info.memory_decorator
  83. def from_otr_interface(image_stream):
  84. logging.info("into from_otr_interface")
  85. try:
  86. base64_stream = base64.b64encode(image_stream)
  87. # 调用接口
  88. try:
  89. if globals().get("global_otr_model") is None:
  90. globals().update({"global_otr_model": OtrModels().get_model()})
  91. print("=========== init otr model ===========")
  92. r = otr(data=base64_stream, otr_model=globals().get("global_otr_model"))
  93. except TimeoutError:
  94. return [-5], [-5], [-5], [-5], [-5]
  95. except requests.exceptions.ConnectionError as e:
  96. logging.info("from_otr_interface")
  97. print("from_otr_interface", traceback.print_exc())
  98. return [-2], [-2], [-2], [-2], [-2]
  99. # 处理结果
  100. _dict = r
  101. points = eval(_dict.get("points"))
  102. split_lines = eval(_dict.get("split_lines"))
  103. bboxes = eval(_dict.get("bboxes"))
  104. outline_points = eval(_dict.get("outline_points"))
  105. lines = eval(_dict.get("lines"))
  106. # print("from_otr_interface len(bboxes)", len(bboxes))
  107. if points is None:
  108. points = []
  109. if split_lines is None:
  110. split_lines = []
  111. if bboxes is None:
  112. bboxes = []
  113. if outline_points is None:
  114. outline_points = []
  115. if lines is None:
  116. lines = []
  117. return points, split_lines, bboxes, outline_points, lines
  118. except Exception as e:
  119. logging.info("from_otr_interface error!")
  120. print("from_otr_interface", traceback.print_exc())
  121. return [-1], [-1], [-1], [-1], [-1]