hace 4 años · 63617ff686
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,4 @@
 
				 /package_env/
			
 
				 /package_2022_03_22/
			
 
				 /package_env/
			
 
				+/package_*
			
--- a/format_convert/_global.py
+++ b/format_convert/_global.py
@@ -0,0 +1,16 @@
 
				+import logging
			
 
				+
			
 
				+
			
 
				+def _init():
			
 
				+    global global_dict
			
 
				+    global_dict = {}
			
 
				+
			
 
				+
			
 
				+def update(_dict):
			
 
				+    # 定义一个全局变量
			
 
				+    global_dict.update(_dict)
			
 
				+
			
 
				+
			
 
				+def get(key):
			
 
				+    # 获得一个全局变量
			
 
				+    return global_dict.get(key)
			
--- a/format_convert/convert.py
+++ b/format_convert/convert.py
--- a/format_convert/convert_doc.py
+++ b/format_convert/convert_doc.py
@@ -1,3 +1,4 @@
 
				+import inspect
			
 
				 import os
			
 
				 import sys
			
 
				 sys.path.append(os.path.dirname(__file__) + "/../")
			
@@ -7,12 +8,12 @@ import traceback
 
				 from format_convert import get_memory_info
			
 
				 from format_convert.convert_docx import docx2text, DocxConvert
			
 
				 from format_convert.convert_need_interface import from_office_interface
			
 
				-from format_convert.utils import judge_error_code
			
 
				+from format_convert.utils import judge_error_code, get_logger, log
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def doc2text(path, unique_type_dir):
			
 
				-    logging.info("into doc2text")
			
 
				+    log("into doc2text")
			
 
				     try:
			
 
				         # 调用office格式转换
			
 
				         file_path = from_office_interface(path, unique_type_dir, 'docx')
			
@@ -22,7 +23,7 @@ def doc2text(path, unique_type_dir):
 
				         text = docx2text(file_path, unique_type_dir)
			
 
				         return text
			
 
				     except Exception as e:
			
 
				-        logging.info("doc2text error!")
			
 
				+        log("doc2text error!")
			
 
				         print("doc2text", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
--- a/format_convert/convert_docx.py
+++ b/format_convert/convert_docx.py
@@ -1,3 +1,4 @@
 
				+import inspect
			
 
				 import os
			
 
				 import sys
			
 
				 sys.path.append(os.path.dirname(__file__) + "/../")
			
@@ -11,19 +12,19 @@ import docx
 
				 import timeout_decorator
			
 
				 from format_convert import get_memory_info
			
 
				 from format_convert.convert_image import picture2text
			
 
				-from format_convert.utils import judge_error_code, add_div
			
 
				+from format_convert.utils import judge_error_code, add_div, get_logger, log
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def docx2text(path, unique_type_dir):
			
 
				-    logging.info("into docx2text")
			
 
				+    log("into docx2text")
			
 
				     try:
			
 
				         try:
			
 
				             doc = docx.Document(path)
			
 
				         except Exception as e:
			
 
				             print("docx format error!", e)
			
 
				             print(traceback.print_exc())
			
 
				-            logging.info("docx format error!")
			
 
				+            log("docx format error!")
			
 
				             return [-3]
			
 
				 
			
 
				         # 遍历段落
			
@@ -110,14 +111,14 @@ def docx2text(path, unique_type_dir):
 
				                     text += table_text_list.pop(0)
			
 
				         return [text]
			
 
				     except Exception as e:
			
 
				-        logging.info("docx2text error!")
			
 
				+        log("docx2text error!")
			
 
				         print("docx2text", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def read_xml_order(path, save_path):
			
 
				-    logging.info("into read_xml_order")
			
 
				+    log("into read_xml_order")
			
 
				     try:
			
 
				         try:
			
 
				             f = zipfile.ZipFile(path)
			
@@ -126,13 +127,13 @@ def read_xml_order(path, save_path):
 
				                     f.extract(file, save_path)
			
 
				             f.close()
			
 
				         except Exception as e:
			
 
				-            logging.info("docx format error!")
			
 
				+            log("docx format error!")
			
 
				             return [-3]
			
 
				 
			
 
				         try:
			
 
				             collection = xml_analyze(save_path + "word/document.xml")
			
 
				         except TimeoutError:
			
 
				-            logging.info("read_xml_order timeout")
			
 
				+            log("read_xml_order timeout")
			
 
				             return [-4]
			
 
				 
			
 
				         body = collection.getElementsByTagName("w:body")[0]
			
@@ -167,7 +168,7 @@ def read_xml_order(path, save_path):
 
				         read_xml_table(path, save_path)
			
 
				         return [order_list, text_list]
			
 
				     except Exception as e:
			
 
				-        logging.info("read_xml_order error!")
			
 
				+        log("read_xml_order error!")
			
 
				         print("read_xml_order", traceback.print_exc())
			
 
				         # log_traceback("read_xml_order")
			
 
				         return [-1]
			
@@ -175,7 +176,7 @@ def read_xml_order(path, save_path):
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def read_xml_table(path, save_path):
			
 
				-    logging.info("into read_xml_table")
			
 
				+    log("into read_xml_table")
			
 
				     try:
			
 
				         try:
			
 
				             f = zipfile.ZipFile(path)
			
@@ -185,13 +186,13 @@ def read_xml_table(path, save_path):
 
				             f.close()
			
 
				         except Exception as e:
			
 
				             # print("docx format error!", e)
			
 
				-            logging.info("docx format error!")
			
 
				+            log("docx format error!")
			
 
				             return [-3]
			
 
				 
			
 
				         try:
			
 
				             collection = xml_analyze(save_path + "word/document.xml")
			
 
				         except TimeoutError:
			
 
				-            logging.info("read_xml_table timeout")
			
 
				+            log("read_xml_table timeout")
			
 
				             return [-4]
			
 
				 
			
 
				         body = collection.getElementsByTagName("w:body")[0]
			
@@ -267,7 +268,7 @@ def read_xml_table(path, save_path):
 
				         return table_text_list
			
 
				 
			
 
				     except Exception as e:
			
 
				-        logging.info("read_xml_table error")
			
 
				+        log("read_xml_table error")
			
 
				         print("read_xml_table", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
@@ -309,7 +310,7 @@ class DocxConvert:
 
				             self.docx = docx.Document(self.path)
			
 
				             self.zip = zipfile.ZipFile(self.path)
			
 
				         except:
			
 
				-            logging.info("cannot open docx!")
			
 
				+            log("cannot open docx!")
			
 
				             traceback.print_exc()
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
@@ -348,7 +349,9 @@ class DocxConvert:
 
				                     _image = image_list.pop(0)
			
 
				                     with open(temp_image_path, "wb") as f:
			
 
				                         f.write(_image)
			
 
				-                    self._page.add_child(_Image(_image, temp_image_path, bbox))
			
 
				+                    _img = _Image(_image, temp_image_path, bbox)
			
 
				+                    _img.is_from_docx = True
			
 
				+                    self._page.add_child(_img)
			
 
				                     doc_pr_cnt += 1
			
 
				 
			
 
				             if tag == "w:tbl":
			
--- a/format_convert/convert_image.py
+++ b/format_convert/convert_image.py
@@ -1,3 +1,4 @@
 
				+import inspect
			
 
				 import logging
			
 
				 import os
			
 
				 import sys
			
@@ -6,12 +7,12 @@ from pdfminer.layout import LTLine
 
				 import traceback
			
 
				 import cv2
			
 
				 from format_convert import get_memory_info
			
 
				-from format_convert.utils import judge_error_code, add_div, LineTable, get_table_html
			
 
				+from format_convert.utils import judge_error_code, add_div, LineTable, get_table_html, get_logger, log
			
 
				 from format_convert.table_correct import get_rotated_image
			
 
				 from format_convert.convert_need_interface import from_otr_interface, from_ocr_interface
			
 
				 
			
 
				 
			
 
				-def image_process(image_np, image_path, is_from_pdf, use_ocr=True):
			
 
				+def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False, use_ocr=True):
			
 
				     from format_convert.convert_tree import _Table, _Sentence
			
 
				 
			
 
				     def get_cluster(t_list, b_list, axis):
			
@@ -71,13 +72,16 @@ def image_process(image_np, image_path, is_from_pdf, use_ocr=True):
 
				                 textbox_list.remove(_obj)
			
 
				         return textbox_list
			
 
				 
			
 
				-    logging.info("into image_preprocess")
			
 
				+    log("into image_preprocess")
			
 
				     try:
			
 
				         # 图片倾斜校正，写入原来的图片路径
			
 
				-        print("image_process", image_path)
			
 
				+        # print("image_process", image_path)
			
 
				         g_r_i = get_rotated_image(image_np, image_path)
			
 
				-        if g_r_i == [-1]:
			
 
				-            return [-1]
			
 
				+        if judge_error_code(g_r_i):
			
 
				+            if is_from_docx:
			
 
				+                return []
			
 
				+            else:
			
 
				+                return g_r_i
			
 
				 
			
 
				         image_np = cv2.imread(image_path)
			
 
				         if image_np is None:
			
@@ -114,7 +118,7 @@ def image_process(image_np, image_path, is_from_pdf, use_ocr=True):
 
				         # 调用ocr模型接口
			
 
				         with open(image_resize_path, "rb") as f:
			
 
				             image_bytes = f.read()
			
 
				-        text_list, bbox_list = from_ocr_interface(image_bytes, True)
			
 
				+        text_list, bbox_list = from_ocr_interface(image_bytes, is_table=True)
			
 
				         if judge_error_code(text_list):
			
 
				             return text_list
			
 
				 
			
@@ -163,14 +167,14 @@ def image_process(image_np, image_path, is_from_pdf, use_ocr=True):
 
				             return [-8]
			
 
				 
			
 
				     except Exception as e:
			
 
				-        logging.info("image_preprocess error")
			
 
				-        print("image_preprocess", traceback.print_exc())
			
 
				+        log("image_preprocess error")
			
 
				+        traceback.print_exc()
			
 
				         return [-1]
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def picture2text(path, html=False):
			
 
				-    logging.info("into picture2text")
			
 
				+    log("into picture2text")
			
 
				     try:
			
 
				         # 判断图片中表格
			
 
				         img = cv2.imread(path)
			
@@ -185,7 +189,7 @@ def picture2text(path, html=False):
 
				             text = add_div(text)
			
 
				         return [text]
			
 
				     except Exception as e:
			
 
				-        logging.info("picture2text error!")
			
 
				+        log("picture2text error!")
			
 
				         print("picture2text", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
@@ -235,7 +239,7 @@ class ImageConvert:
 
				             with open(self.path, "rb") as f:
			
 
				                 self.image = f.read()
			
 
				         except:
			
 
				-            logging.info("cannot open image!")
			
 
				+            log("cannot open image!")
			
 
				             traceback.print_exc()
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
--- a/format_convert/convert_need_interface.py
+++ b/format_convert/convert_need_interface.py
@@ -1,55 +1,165 @@
 
				 import base64
			
 
				+import inspect
			
 
				+import json
			
 
				 import logging
			
 
				 import os
			
 
				+import random
			
 
				 import sys
			
 
				-sys.path.append(os.path.dirname(__file__) + "/../")
			
 
				+
			
 
				+from werkzeug.exceptions import NotFound
			
 
				+
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 import traceback
			
 
				 import requests
			
 
				-from format_convert import get_memory_info
			
 
				-from format_convert.utils import get_platform, get_sequential_data, judge_error_code
			
 
				+from format_convert import get_memory_info, _global
			
 
				+from format_convert.utils import get_platform, get_sequential_data, judge_error_code, request_post, get_ip_port, \
			
 
				+    get_intranet_ip, get_logger, log
			
 
				 from ocr.ocr_interface import ocr, OcrModels
			
 
				 from otr.otr_interface import otr, OtrModels
			
 
				 from format_convert.libreoffice_interface import office_convert
			
 
				 
			
 
				 
			
 
				-def from_office_interface(src_path, dest_path, target_format, retry_times=1):
			
 
				+# 远程GPU接口
			
 
				+# # interface_ip_list = ['http://192.168.2.102', 'http://192.168.2.103']
			
 
				+# # interface_ip_list = ['http://172.16.160.65', 'http://172.16.160.64', 'http://172.16.160.66', 'http://172.16.160.67']
			
 
				+# interface_ip_list = ['http://172.16.160.65', 'http://172.16.160.65']
			
 
				+# # ocr_port_list = ["15011", "15013", "15015"]
			
 
				+# # ocr_port_list = ["15011", "15013", "15015", "15017", "15019"]
			
 
				+# # otr_port_list = ["15012", "15014", "15016", "15018", "15020"]
			
 
				+# ocr_port_list = ["15011", "15013", "15015"]
			
 
				+# otr_port_list = ["15012", "15014", "15016"]
			
 
				+# # ocr_port_list = ["15011", "15013", "15015", "15017", "15019", "15021"]
			
 
				+# # otr_port_list = ["15012", "15014", "15016", "15018", "15020", "15022"]
			
 
				+# soffice_port_list = ["16000", "16001", "16002", "16003", "16004", "16005",
			
 
				+#                      "16006", "16007", "16008", "16009"]
			
 
				+# # ocr_port_list = ["15011", "15013"]
			
 
				+# # otr_port_list = ["15012"]
			
 
				+
			
 
				+if get_platform() == "Windows":
			
 
				+    FROM_REMOTE = False
			
 
				+else:
			
 
				+    FROM_REMOTE = True
			
 
				+
			
 
				+
			
 
				+def from_office_interface(src_path, dest_path, target_format, retry_times=1, from_remote=FROM_REMOTE):
			
 
				     try:
			
 
				         # Win10跳出超时装饰器
			
 
				-        if get_platform() == "Windows":
			
 
				-            # origin_office_convert = office_convert.__wrapped__
			
 
				-            # file_path = origin_office_convert(src_path, dest_path, target_format, retry_times)
			
 
				-            file_path = office_convert(src_path, dest_path, target_format, retry_times)
			
 
				-        else:
			
 
				-            # 将装饰器包装为一个类，否则多进程Pickle会报错 it's not the same object as xxx 问题，
			
 
				-            # timeout_decorator_obj = my_timeout_decorator.TimeoutClass(office_convert, 180, TimeoutError)
			
 
				-            # file_path = timeout_decorator_obj.run(src_path, dest_path, target_format, retry_times)
			
 
				+        # if get_platform() == "Windows":
			
 
				+        #     # origin_office_convert = office_convert.__wrapped__
			
 
				+        #     # file_path = origin_office_convert(src_path, dest_path, target_format, retry_times)
			
 
				+        #     file_path = office_convert(src_path, dest_path, target_format, retry_times)
			
 
				+        # else:
			
 
				+        #     # 将装饰器包装为一个类，否则多进程Pickle会报错 it's not the same object as xxx 问题，
			
 
				+        #     # timeout_decorator_obj = my_timeout_decorator.TimeoutClass(office_convert, 180, TimeoutError)
			
 
				+        #     # file_path = timeout_decorator_obj.run(src_path, dest_path, target_format, retry_times)
			
 
				+        #
			
 
				+        #     file_path = office_convert(src_path, dest_path, target_format, retry_times)
			
 
				+
			
 
				+        if from_remote:
			
 
				+            # 重试
			
 
				+            retry_times_1 = 1
			
 
				+            retry_times_2 = 2
			
 
				+            while retry_times_1 and retry_times_2:
			
 
				+                # _ip = ip_pool("soffice", _random=True)
			
 
				+                # _port = port_pool("soffice", _random=True)
			
 
				+                # _ip = interface_ip_list[0]
			
 
				+                # _port = "16002"
			
 
				+                # _ip, _port = interface_pool("soffice")
			
 
				+                # ip_port = from_schedule_interface("office")
			
 
				+                ip_port = interface_pool("office")
			
 
				+                if judge_error_code(ip_port):
			
 
				+                    return ip_port
			
 
				+                _url = ip_port + "/soffice"
			
 
				 
			
 
				+                with open(src_path, "rb") as f:
			
 
				+                    file_bytes = f.read()
			
 
				+                base64_stream = base64.b64encode(file_bytes)
			
 
				+                r = json.loads(request_post(_url, {"src_path": src_path,
			
 
				+                                                   "dest_path": dest_path,
			
 
				+                                                   "file": base64_stream,
			
 
				+                                                   "target_format": target_format,
			
 
				+                                                   "retry_times": retry_times}, time_out=15))
			
 
				+                if type(r) == list:
			
 
				+                    # 接口连不上换个端口重试
			
 
				+                    if retry_times_1 <= 1:
			
 
				+                        return r
			
 
				+                    else:
			
 
				+                        retry_times_1 -= 1
			
 
				+                        log("retry post office_interface... left times " + str(retry_times_1))
			
 
				+                        continue
			
 
				+                file_str = r.get("data")
			
 
				+                if judge_error_code(file_str):
			
 
				+                    if retry_times_2 <= 1:
			
 
				+                        return file_str
			
 
				+                    else:
			
 
				+                        retry_times_2 -= 1
			
 
				+                        continue
			
 
				+                file_bytes = eval(file_str)
			
 
				+                uid1 = src_path.split(os.sep)[-1].split(".")[0]
			
 
				+                file_path = dest_path + uid1 + "." + target_format
			
 
				+                if not os.path.exists(os.path.dirname(file_path)):
			
 
				+                    os.makedirs(os.path.dirname(file_path), mode=0o777)
			
 
				+                with open(file_path, "wb") as f:
			
 
				+                    f.write(file_bytes)
			
 
				+                break
			
 
				+        else:
			
 
				             file_path = office_convert(src_path, dest_path, target_format, retry_times)
			
 
				 
			
 
				         if judge_error_code(file_path):
			
 
				             return file_path
			
 
				         return file_path
			
 
				     except TimeoutError:
			
 
				-        logging.info("from_office_interface timeout error!")
			
 
				+        log("from_office_interface timeout error!")
			
 
				         return [-5]
			
 
				     except:
			
 
				-        logging.info("from_office_interface error!")
			
 
				+        log("from_office_interface error!")
			
 
				         print("from_office_interface", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				-def from_ocr_interface(image_stream, is_table=False):
			
 
				-    logging.info("into from_ocr_interface")
			
 
				+def from_ocr_interface(image_stream, is_table=False, from_remote=FROM_REMOTE):
			
 
				+    log("into from_ocr_interface")
			
 
				     try:
			
 
				         base64_stream = base64.b64encode(image_stream)
			
 
				 
			
 
				         # 调用接口
			
 
				         try:
			
 
				-            if globals().get("global_ocr_model") is None:
			
 
				-                globals().update({"global_ocr_model": OcrModels().get_model()})
			
 
				-                print("=========== init ocr model ===========")
			
 
				-            r = ocr(data=base64_stream, ocr_model=globals().get("global_ocr_model"))
			
 
				+            if from_remote:
			
 
				+                retry_times_1 = 3
			
 
				+                # 重试
			
 
				+                while retry_times_1:
			
 
				+                    # _ip = ip_pool("ocr", _random=True)
			
 
				+                    # _port = port_pool("ocr", _random=True)
			
 
				+                    # if _ip == interface_ip_list[1]:
			
 
				+                    #     _port = ocr_port_list[0]
			
 
				+                    # _ip, _port = interface_pool("ocr")
			
 
				+                    # ip_port = _ip + ":" + _port
			
 
				+                    # ip_port = from_schedule_interface("ocr")
			
 
				+                    ip_port = interface_pool("ocr")
			
 
				+                    if judge_error_code(ip_port):
			
 
				+                        return ip_port
			
 
				+                    _url = ip_port + "/ocr"
			
 
				+                    r = json.loads(request_post(_url, {"data": base64_stream}, time_out=60))
			
 
				+                    if type(r) == list:
			
 
				+                        # 接口连不上换个端口重试
			
 
				+                        if retry_times_1 <= 1:
			
 
				+                            if is_table:
			
 
				+                                return r, r
			
 
				+                            else:
			
 
				+                                return r
			
 
				+                        else:
			
 
				+                            retry_times_1 -= 1
			
 
				+                            log("retry post ocr_interface... left times " + str(retry_times_1))
			
 
				+                            continue
			
 
				+                    if judge_error_code(r):
			
 
				+                        return r
			
 
				+                    break
			
 
				+            else:
			
 
				+                if globals().get("global_ocr_model") is None:
			
 
				+                    globals().update({"global_ocr_model": OcrModels().get_model()})
			
 
				+                    print("=========== init ocr model ===========")
			
 
				+                r = ocr(data=base64_stream, ocr_model=globals().get("global_ocr_model"))
			
 
				         except TimeoutError:
			
 
				             if is_table:
			
 
				                 return [-5], [-5]
			
@@ -76,13 +186,11 @@ def from_ocr_interface(image_stream, is_table=False):
 
				                 text = get_sequential_data(text_list, bbox_list, html=True)
			
 
				                 if judge_error_code(text):
			
 
				                     return text
			
 
				-                # if text == [-1]:
			
 
				-                #     return [-1]
			
 
				             else:
			
 
				                 text = ""
			
 
				             return text
			
 
				     except Exception as e:
			
 
				-        logging.info("from_ocr_interface error!")
			
 
				+        log("from_ocr_interface error!")
			
 
				         # print("from_ocr_interface", e, global_type)
			
 
				         if is_table:
			
 
				             return [-1], [-1]
			
@@ -92,7 +200,7 @@ def from_ocr_interface(image_stream, is_table=False):
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def from_otr_interface2(image_stream):
			
 
				-    logging.info("into from_otr_interface")
			
 
				+    log("into from_otr_interface")
			
 
				     try:
			
 
				         base64_stream = base64.b64encode(image_stream)
			
 
				 
			
@@ -105,7 +213,7 @@ def from_otr_interface2(image_stream):
 
				         except TimeoutError:
			
 
				             return [-5], [-5], [-5], [-5], [-5]
			
 
				         except requests.exceptions.ConnectionError as e:
			
 
				-            logging.info("from_otr_interface")
			
 
				+            log("from_otr_interface")
			
 
				             print("from_otr_interface", traceback.print_exc())
			
 
				             return [-2], [-2], [-2], [-2], [-2]
			
 
				 
			
@@ -129,26 +237,52 @@ def from_otr_interface2(image_stream):
 
				             lines = []
			
 
				         return points, split_lines, bboxes, outline_points, lines
			
 
				     except Exception as e:
			
 
				-        logging.info("from_otr_interface error!")
			
 
				+        log("from_otr_interface error!")
			
 
				         print("from_otr_interface", traceback.print_exc())
			
 
				         return [-1], [-1], [-1], [-1], [-1]
			
 
				 
			
 
				 
			
 
				-def from_otr_interface(image_stream, is_from_pdf=False):
			
 
				-    logging.info("into from_otr_interface")
			
 
				+def from_otr_interface(image_stream, is_from_pdf=False, from_remote=FROM_REMOTE):
			
 
				+    log("into from_otr_interface")
			
 
				     try:
			
 
				         base64_stream = base64.b64encode(image_stream)
			
 
				 
			
 
				         # 调用接口
			
 
				         try:
			
 
				-            if globals().get("global_otr_model") is None:
			
 
				-                globals().update({"global_otr_model": OtrModels().get_model()})
			
 
				-                print("=========== init otr model ===========")
			
 
				-            r = otr(data=base64_stream, otr_model=globals().get("global_otr_model"), is_from_pdf=is_from_pdf)
			
 
				+            if from_remote:
			
 
				+                retry_times_1 = 3
			
 
				+                # 重试
			
 
				+                while retry_times_1:
			
 
				+                    # _ip = ip_pool("otr", _random=True)
			
 
				+                    # _port = port_pool("otr", _random=True)
			
 
				+                    # if _ip == interface_ip_list[1]:
			
 
				+                    #     _port = otr_port_list[0]
			
 
				+                    ip_port = interface_pool("otr")
			
 
				+                    # ip_port = from_schedule_interface("otr")
			
 
				+                    if judge_error_code(ip_port):
			
 
				+                        return ip_port
			
 
				+                    _url = ip_port + "/otr"
			
 
				+                    r = json.loads(request_post(_url, {"data": base64_stream, "is_from_pdf": is_from_pdf}, time_out=60))
			
 
				+                    if type(r) == list:
			
 
				+                        # 接口连不上换个端口重试
			
 
				+                        if retry_times_1 <= 1:
			
 
				+                            return r
			
 
				+                        else:
			
 
				+                            retry_times_1 -= 1
			
 
				+                            log("retry post otr_interface... left times " + str(retry_times_1))
			
 
				+                            continue
			
 
				+                    if judge_error_code(r):
			
 
				+                        return r
			
 
				+                    break
			
 
				+            else:
			
 
				+                if globals().get("global_otr_model") is None:
			
 
				+                    globals().update({"global_otr_model": OtrModels().get_model()})
			
 
				+                    print("=========== init otr model ===========")
			
 
				+                r = otr(data=base64_stream, otr_model=globals().get("global_otr_model"), is_from_pdf=is_from_pdf)
			
 
				         except TimeoutError:
			
 
				             return [-5]
			
 
				         except requests.exceptions.ConnectionError as e:
			
 
				-            logging.info("from_otr_interface")
			
 
				+            log("from_otr_interface")
			
 
				             print("from_otr_interface", traceback.print_exc())
			
 
				             return [-2]
			
 
				 
			
@@ -157,6 +291,141 @@ def from_otr_interface(image_stream, is_from_pdf=False):
 
				         list_line = eval(_dict.get("list_line"))
			
 
				         return list_line
			
 
				     except Exception as e:
			
 
				-        logging.info("from_otr_interface error!")
			
 
				+        log("from_otr_interface error!")
			
 
				         print("from_otr_interface", traceback.print_exc())
			
 
				-        return [-1]
			
 
				+        return [-1]
			
 
				+
			
 
				+
			
 
				+# def from_schedule_interface(interface_type):
			
 
				+#     try:
			
 
				+#         _ip = "http://" + get_intranet_ip()
			
 
				+#         _port = ip_port_dict.get(_ip).get("schedule")[0]
			
 
				+#         _url = _ip + ":" + _port + "/schedule"
			
 
				+#         data = {"interface_type": interface_type}
			
 
				+#         result = json.loads(request_post(_url, data, time_out=10)).get("data")
			
 
				+#         if judge_error_code(result):
			
 
				+#             return result
			
 
				+#         _ip, _port = result
			
 
				+#         log("from_schedule_interface " + _ip + " " + _port)
			
 
				+#         return _ip + ":" + _port
			
 
				+#     except requests.exceptions.ConnectionError as e:
			
 
				+#         log("from_schedule_interface ConnectionError")
			
 
				+#         return [-2]
			
 
				+#     except:
			
 
				+#         log("from_schedule_interface error!")
			
 
				+#         traceback.print_exc()
			
 
				+#         return [-1]
			
 
				+
			
 
				+
			
 
				+def interface_pool(interface_type):
			
 
				+    ip_port_flag = _global.get("ip_port_flag")
			
 
				+    ip_port_dict = _global.get("ip_port")
			
 
				+    log(str(_global.get("ip_port_flag")))
			
 
				+
			
 
				+    try:
			
 
				+        # 负载均衡, 选取ip
			
 
				+        interface_load_list = []
			
 
				+        for _ip in ip_port_flag.keys():
			
 
				+            if ip_port_dict.get(_ip).get(interface_type):
			
 
				+                load_scale = ip_port_flag.get(_ip).get(interface_type) / len(ip_port_dict.get(_ip).get(interface_type))
			
 
				+                interface_load_list.append([_ip, load_scale])
			
 
				+
			
 
				+        if not interface_load_list:
			
 
				+            raise NotFound
			
 
				+        interface_load_list.sort(key=lambda x: x[-1])
			
 
				+        _ip = interface_load_list[0][0]
			
 
				+
			
 
				+        # 负载均衡, 选取port
			
 
				+        ip_type_cnt = ip_port_flag.get(_ip).get(interface_type)
			
 
				+        ip_type_total = len(ip_port_dict.get(_ip).get(interface_type))
			
 
				+        if ip_type_cnt == 0:
			
 
				+            ip_type_cnt = random.randint(0, ip_type_total-1)
			
 
				+        port_index = ip_type_cnt % ip_type_total
			
 
				+        _port = ip_port_dict.get(_ip).get(interface_type)[port_index]
			
 
				+
			
 
				+        # 更新flag
			
 
				+        current_flag = ip_type_cnt
			
 
				+        if current_flag >= 10000:
			
 
				+            ip_port_flag[_ip][interface_type] = 0
			
 
				+        else:
			
 
				+            ip_port_flag[_ip][interface_type] = current_flag + 1
			
 
				+        _global.update({"ip_port_flag": ip_port_flag})
			
 
				+        log(str(_global.get("ip_port_flag")))
			
 
				+
			
 
				+        ip_port = _ip + ":" + str(_port)
			
 
				+        log(ip_port)
			
 
				+        return ip_port
			
 
				+    except NotFound:
			
 
				+        log("cannot read ip from config! checkout config")
			
 
				+        return [-2]
			
 
				+    except:
			
 
				+        traceback.print_exc()
			
 
				+        return [-1]
			
 
				+
			
 
				+
			
 
				+# def ip_pool(interface_type, _random=False):
			
 
				+#     ip_flag_name = interface_type + '_ip_flag'
			
 
				+#     ip_flag = globals().get(ip_flag_name)
			
 
				+#     if ip_flag is None:
			
 
				+#         if _random:
			
 
				+#             _r = random.randint(0, len(interface_ip_list)-1)
			
 
				+#             ip_flag = _r
			
 
				+#             globals().update({ip_flag_name: ip_flag})
			
 
				+#             ip_index = _r
			
 
				+#         else:
			
 
				+#             ip_flag = 0
			
 
				+#             globals().update({ip_flag_name: ip_flag})
			
 
				+#             ip_index = 0
			
 
				+#     else:
			
 
				+#         ip_index = ip_flag % len(interface_ip_list)
			
 
				+#     ip_flag += 1
			
 
				+#
			
 
				+#     if ip_flag >= 10000:
			
 
				+#         ip_flag = 0
			
 
				+#     globals().update({ip_flag_name: ip_flag})
			
 
				+#
			
 
				+#     log("ip_pool " + interface_type + " " + str(ip_flag) + " " + str(interface_ip_list[ip_index]))
			
 
				+#     return interface_ip_list[ip_index]
			
 
				+#
			
 
				+#
			
 
				+# def port_pool(interface_type, _random=False):
			
 
				+#     port_flag_name = interface_type + '_port_flag'
			
 
				+#
			
 
				+#     port_flag = globals().get(port_flag_name)
			
 
				+#     if port_flag is None:
			
 
				+#         if _random:
			
 
				+#             if interface_type == "ocr":
			
 
				+#                 _r = random.randint(0, len(ocr_port_list)-1)
			
 
				+#             elif interface_type == "otr":
			
 
				+#                 _r = random.randint(0, len(otr_port_list)-1)
			
 
				+#             else:
			
 
				+#                 _r = random.randint(0, len(soffice_port_list)-1)
			
 
				+#             port_flag = _r
			
 
				+#             globals().update({port_flag_name: port_flag})
			
 
				+#             port_index = _r
			
 
				+#         else:
			
 
				+#             port_flag = 0
			
 
				+#             globals().update({port_flag_name: port_flag})
			
 
				+#             port_index = 0
			
 
				+#     else:
			
 
				+#         if interface_type == "ocr":
			
 
				+#             port_index = port_flag % len(ocr_port_list)
			
 
				+#         elif interface_type == "otr":
			
 
				+#             port_index = port_flag % len(otr_port_list)
			
 
				+#         else:
			
 
				+#             port_index = port_flag % len(soffice_port_list)
			
 
				+#     port_flag += 1
			
 
				+#
			
 
				+#     if port_flag >= 10000:
			
 
				+#         port_flag = 0
			
 
				+#     globals().update({port_flag_name: port_flag})
			
 
				+#
			
 
				+#     if interface_type == "ocr":
			
 
				+#         log("port_pool " + interface_type + " " + str(port_flag) + " " + ocr_port_list[port_index])
			
 
				+#         return ocr_port_list[port_index]
			
 
				+#     elif interface_type == "otr":
			
 
				+#         log("port_pool " + interface_type + " " + str(port_flag) + " " + otr_port_list[port_index])
			
 
				+#         return otr_port_list[port_index]
			
 
				+#     else:
			
 
				+#         log("port_pool " + interface_type + " " + str(port_flag) + " " + soffice_port_list[port_index])
			
 
				+#         return soffice_port_list[port_index]
			
--- a/format_convert/convert_pdf.py
+++ b/format_convert/convert_pdf.py
@@ -1,3 +1,4 @@
 
				+import inspect
			
 
				 import io
			
 
				 import logging
			
 
				 import os
			
@@ -10,7 +11,7 @@ from pdfplumber.page import Page as pdfPage
 
				 from format_convert.convert_tree import _Document, _Page, _Image, _Sentence, _Table
			
 
				 import time
			
 
				 import pdfminer
			
 
				-import timeout_decorator
			
 
				+from format_convert import timeout_decorator
			
 
				 from PIL import Image
			
 
				 from format_convert.convert_image import image_process
			
 
				 from format_convert.convert_need_interface import from_ocr_interface, from_office_interface
			
@@ -26,18 +27,19 @@ from pdfminer.converter import PDFPageAggregator
 
				 from pdfminer.layout import LTTextBoxHorizontal, LAParams, LTFigure, LTImage, LTCurve, LTText, LTChar, LTRect, \
			
 
				     LTTextBoxVertical, LTLine
			
 
				 from format_convert import get_memory_info
			
 
				-from format_convert.utils import judge_error_code, add_div, get_platform, get_html_p, string_similarity, LineTable
			
 
				+from format_convert.utils import judge_error_code, add_div, get_platform, get_html_p, string_similarity, LineTable, \
			
 
				+    get_logger, log
			
 
				 import fitz
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def pdf2Image(path, save_dir):
			
 
				-    logging.info("into pdf2Image")
			
 
				+    log("into pdf2Image")
			
 
				     try:
			
 
				         try:
			
 
				             doc = fitz.open(path)
			
 
				         except Exception as e:
			
 
				-            logging.info("pdf format error!")
			
 
				+            log("pdf format error!")
			
 
				             # print("pdf format error!", e)
			
 
				             return [-3]
			
 
				 
			
@@ -48,7 +50,7 @@ def pdf2Image(path, save_dir):
 
				             # 限制pdf页数，只取前10页后10页
			
 
				             if page_count > 20:
			
 
				                 if 10 <= page_no < page_count - 10:
			
 
				-                    # logging.info("pdf2Image: pdf pages count " + str(doc.page_count)
			
 
				+                    # log("pdf2Image: pdf pages count " + str(doc.page_count)
			
 
				                     #              + ", only get 70 pages")
			
 
				                     continue
			
 
				 
			
@@ -73,14 +75,14 @@ def pdf2Image(path, save_dir):
 
				             except ValueError as e:
			
 
				                 traceback.print_exc()
			
 
				                 if str(e) == "page not in document":
			
 
				-                    logging.info("pdf2Image page not in document! continue..." + str(page_no))
			
 
				+                    log("pdf2Image page not in document! continue..." + str(page_no))
			
 
				                     continue
			
 
				                 elif "encrypted" in str(e):
			
 
				-                    logging.info("pdf2Image document need password " + str(page_no))
			
 
				+                    log("pdf2Image document need password " + str(page_no))
			
 
				                     return [-7]
			
 
				             except RuntimeError as e:
			
 
				                 if "cannot find page" in str(e):
			
 
				-                    logging.info("pdf2Image page {} not in document! continue... ".format(str(page_no)) + str(e))
			
 
				+                    log("pdf2Image page {} not in document! continue... ".format(str(page_no)) + str(e))
			
 
				                     continue
			
 
				                 else:
			
 
				                     traceback.print_exc()
			
@@ -88,28 +90,27 @@ def pdf2Image(path, save_dir):
 
				         return [output_image_dict]
			
 
				 
			
 
				     except Exception as e:
			
 
				-        logging.info("pdf2Image error!")
			
 
				+        log("pdf2Image error!")
			
 
				         print("pdf2Image", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				-@timeout_decorator.timeout(300, timeout_exception=TimeoutError)
			
 
				-def pdf_analyze(interpreter, page, device):
			
 
				-    logging.info("into pdf_analyze")
			
 
				-    # 解析pdf中的不含表格的页
			
 
				+@timeout_decorator.timeout(10, timeout_exception=TimeoutError)
			
 
				+def pdf_analyze(interpreter, page, device, page_no):
			
 
				+    log("into pdf_analyze")
			
 
				     pdf_time = time.time()
			
 
				     print("pdf_analyze interpreter process...")
			
 
				     interpreter.process_page(page)
			
 
				     print("pdf_analyze device get_result...")
			
 
				     layout = device.get_result()
			
 
				-    logging.info("pdf2text read time " + str(time.time() - pdf_time))
			
 
				+    log("pdf2text page " + str(page_no) + " read time " + str(time.time() - pdf_time))
			
 
				     return layout
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def pdf2text(path, unique_type_dir):
			
 
				-    logging.info("into pdf2text")
			
 
				+    log("into pdf2text")
			
 
				     try:
			
 
				         # pymupdf pdf to image
			
 
				         save_dir = path.split(".")[-2] + "_" + path.split(".")[-1]
			
@@ -133,7 +134,7 @@ def pdf2text(path, unique_type_dir):
 
				                 img = cv2.imread(img_path)
			
 
				                 img_size = img.shape
			
 
				             except:
			
 
				-                logging.info("pdf2text read image in page fail! continue...")
			
 
				+                log("pdf2text read image in page fail! continue...")
			
 
				                 continue
			
 
				 
			
 
				             # 每张图片处理
			
@@ -185,10 +186,10 @@ def pdf2text(path, unique_type_dir):
 
				                 break
			
 
				         except pdfminer.psparser.PSEOF as e:
			
 
				             # pdfminer 读不了空白页的对象，直接使用pymupdf转换出的图片进行ocr识别
			
 
				-            logging.info("pdf2text " + str(e) + " use ocr read pdf!")
			
 
				+            log("pdf2text " + str(e) + " use ocr read pdf!")
			
 
				             text_list = []
			
 
				             for page_no in page_no_list:
			
 
				-                logging.info("pdf2text ocr page_no " + str(page_no))
			
 
				+                log("pdf2text ocr page_no " + str(page_no))
			
 
				                 page_info = page_info_dict.get(page_no)
			
 
				                 # 表格
			
 
				                 if page_info[3]:
			
@@ -224,7 +225,7 @@ def pdf2text(path, unique_type_dir):
 
				                 text += t[0]
			
 
				             return [text]
			
 
				         except Exception as e:
			
 
				-            logging.info("pdf format error!")
			
 
				+            log("pdf format error!")
			
 
				             traceback.print_exc()
			
 
				             return [-3]
			
 
				 
			
@@ -234,10 +235,10 @@ def pdf2text(path, unique_type_dir):
 
				         pages = list(pages)
			
 
				         page_count = len(pages)
			
 
				         for page in pages:
			
 
				-            logging.info("pdf2text pymupdf page_no " + str(page_no))
			
 
				+            log("pdf2text pymupdf page_no " + str(page_no))
			
 
				             # 限制pdf页数，只取前100页
			
 
				             # if page_no >= 70:
			
 
				-            #     logging.info("pdf2text: pdf pages only get 70 pages")
			
 
				+            #     log("pdf2text: pdf pages only get 70 pages")
			
 
				             #     break
			
 
				             if page_count > 20:
			
 
				                 if 10 <= page_no < page_count - 10:
			
@@ -276,7 +277,7 @@ def pdf2text(path, unique_type_dir):
 
				                         interpreter.process_page(page)
			
 
				                         layout = device.get_result()
			
 
				                     except Exception:
			
 
				-                        logging.info("pdf2text pdfminer read pdf page error! continue...")
			
 
				+                        log("pdf2text pdfminer read pdf page error! continue...")
			
 
				                         continue
			
 
				 
			
 
				                 else:
			
@@ -287,12 +288,12 @@ def pdf2text(path, unique_type_dir):
 
				                             origin_pdf_analyze = pdf_analyze.__wrapped__
			
 
				                             layout = origin_pdf_analyze(interpreter, page, device)
			
 
				                         else:
			
 
				-                            layout = pdf_analyze(interpreter, page, device)
			
 
				+                            layout = pdf_analyze(interpreter, page, device, page_no)
			
 
				                     except TimeoutError as e:
			
 
				-                        logging.info("pdf2text pdfminer read pdf page time out!")
			
 
				+                        log("pdf2text pdfminer read pdf page time out!")
			
 
				                         return [-4]
			
 
				                     except Exception:
			
 
				-                        logging.info("pdf2text pdfminer read pdf page error! continue...")
			
 
				+                        log("pdf2text pdfminer read pdf page error! continue...")
			
 
				                         continue
			
 
				 
			
 
				                 # 判断该页有没有文字对象，没有则有可能是有水印
			
@@ -305,7 +306,7 @@ def pdf2text(path, unique_type_dir):
 
				                         image_count += 1
			
 
				 
			
 
				                 # 如果该页图片数量过多，直接ocr整页识别
			
 
				-                logging.info("pdf2text image_count " + str(image_count))
			
 
				+                log("pdf2text image_count " + str(image_count))
			
 
				                 if image_count >= 3:
			
 
				                     image_text = page_info_dict.get(page_no)[0]
			
 
				                     if image_text is None:
			
@@ -387,7 +388,7 @@ def pdf2text(path, unique_type_dir):
 
				                                 #     with open(output_image_list[page_no], "rb") as ff:
			
 
				                                 #         image_stream = ff.read()
			
 
				                                 except Exception:
			
 
				-                                    logging.info("pdf2text pdfminer read image in page " + str(page_no) +
			
 
				+                                    log("pdf2text pdfminer read image in page " + str(page_no) +
			
 
				                                                  "  fail! use pymupdf read image...")
			
 
				                                     # print(traceback.print_exc())
			
 
				                                     image_text = page_info_dict.get(page_no)[0]
			
@@ -404,11 +405,11 @@ def pdf2text(path, unique_type_dir):
 
				                                 if image_text == "" and only_image:
			
 
				                                     # 拆出该页pdf
			
 
				                                     try:
			
 
				-                                        logging.info("pdf2text guess pdf has watermark")
			
 
				+                                        log("pdf2text guess pdf has watermark")
			
 
				                                         split_path = get_single_pdf(path, page_no)
			
 
				                                     except:
			
 
				                                         # 如果拆分抛异常，则大概率不是水印图，用ocr识别图片
			
 
				-                                        logging.info("pdf2text guess pdf has no watermark")
			
 
				+                                        log("pdf2text guess pdf has no watermark")
			
 
				                                         image_text = page_info_dict.get(page_no)[0]
			
 
				                                         if image_text is None:
			
 
				                                             with open(output_image_dict.get(page_no), "rb") as ff:
			
@@ -471,16 +472,16 @@ def pdf2text(path, unique_type_dir):
 
				                 text += t[0]
			
 
				         return [text]
			
 
				     except UnicodeDecodeError as e:
			
 
				-        logging.info("pdf2text pdfminer create pages failed! " + str(e))
			
 
				+        log("pdf2text pdfminer create pages failed! " + str(e))
			
 
				         return [-3]
			
 
				     except Exception as e:
			
 
				-        logging.info("pdf2text error!")
			
 
				+        log("pdf2text error!")
			
 
				         print("pdf2text", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
 
				 
			
 
				 def get_single_pdf(path, page_no):
			
 
				-    logging.info("into get_single_pdf")
			
 
				+    log("into get_single_pdf")
			
 
				     try:
			
 
				         # print("path, ", path)
			
 
				         pdf_origin = PdfFileReader(path, strict=False)
			
@@ -495,13 +496,13 @@ def get_single_pdf(path, page_no):
 
				     except PyPDF2.utils.PdfReadError as e:
			
 
				         raise e
			
 
				     except Exception as e:
			
 
				-        logging.info("get_single_pdf error! page " + str(page_no))
			
 
				+        log("get_single_pdf error! page " + str(page_no))
			
 
				         print("get_single_pdf", traceback.print_exc())
			
 
				         raise e
			
 
				 
			
 
				 
			
 
				 def page_table_connect(has_table_dict):
			
 
				-    logging.info("into page_table_connect")
			
 
				+    log("into page_table_connect")
			
 
				     if not has_table_dict:
			
 
				         return [], []
			
 
				 
			
@@ -576,7 +577,7 @@ def page_table_connect(has_table_dict):
 
				         return table_connect_list, connect_text_list
			
 
				     except Exception as e:
			
 
				         # print("page_table_connect", e)
			
 
				-        logging.info("page_table_connect error!")
			
 
				+        log("page_table_connect error!")
			
 
				         print("page_table_connect", traceback.print_exc())
			
 
				         return [-1], [-1]
			
 
				 
			
@@ -601,7 +602,7 @@ class PDFConvert:
 
				                 self.doc_pdfminer = PDFDocument(parser)
			
 
				                 rsrcmgr = PDFResourceManager()
			
 
				                 self.laparams = LAParams(line_overlap=0.01,
			
 
				-                                         char_margin=0.05,
			
 
				+                                         char_margin=0.3,
			
 
				                                          line_margin=0.01,
			
 
				                                          word_margin=0.01,
			
 
				                                          boxes_flow=0.1,)
			
@@ -628,7 +629,7 @@ class PDFConvert:
 
				                 print("Only Support Packages", str(self.packages))
			
 
				                 raise Exception
			
 
				         except:
			
 
				-            logging.info(package_name + " cannot open pdf!")
			
 
				+            log(package_name + " cannot open pdf!")
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
 
				     def convert(self):
			
@@ -642,11 +643,23 @@ class PDFConvert:
 
				 
			
 
				         # 判断是否能读pdf
			
 
				         try:
			
 
				-            for page in PDFPage.create_pages(self.doc_pdfminer):
			
 
				+            pages = PDFPage.create_pages(self.doc_pdfminer)
			
 
				+            for page in pages:
			
 
				                 break
			
 
				-        except pdfminer.psparser.PSEOF as e:
			
 
				+            pages = list(pages)
			
 
				+        # except pdfminer.psparser.PSEOF as e:
			
 
				+        except:
			
 
				             # pdfminer 读不了空白页的对象，直接使用pymupdf转换出的图片进行ocr识别
			
 
				-            logging.info("pdf2text " + str(e) + " use ocr read pdf!")
			
 
				+            log("pdf2text pdfminer read failed! read by pymupdf!")
			
 
				+            traceback.print_exc()
			
 
				+            try:
			
 
				+                self.get_all_page_image()
			
 
				+                return
			
 
				+            except:
			
 
				+                traceback.print_exc()
			
 
				+                log("pdf2text use pymupdf read failed!")
			
 
				+                self._doc.error_code = [-3]
			
 
				+                return
			
 
				 
			
 
				         # 每一页进行处理
			
 
				         pages = PDFPage.create_pages(self.doc_pdfminer)
			
@@ -664,44 +677,75 @@ class PDFConvert:
 
				             # 解析单页
			
 
				             self.convert_page(page, page_no)
			
 
				 
			
 
				+            # print("+"*30, page.resources)
			
 
				+
			
 
				             if self._doc.error_code is None and self._page.error_code is not None:
			
 
				-                self._doc.error_code = self._page.error_code
			
 
				-                break
			
 
				+                if self._page.error_code[0] in [-4, -3, 0]:
			
 
				+                    page_no += 1
			
 
				+                    continue
			
 
				+                else:
			
 
				+                    self._doc.error_code = self._page.error_code
			
 
				+                    break
			
 
				             self._doc.add_child(self._page)
			
 
				             page_no += 1
			
 
				 
			
 
				     def convert_page(self, page, page_no):
			
 
				-        layout = self.get_layout(page)
			
 
				+        # pdf page.annots为None，不经过get_layout，直接ocr
			
 
				+        # if page.annots is None:
			
 
				+        #     lt_image_list = []
			
 
				+        #     lt_text_list = []
			
 
				+        #     # 设置只有图片，可跳到ocr
			
 
				+        #     only_image = 1
			
 
				+        #     image_count = 1
			
 
				+        # else:
			
 
				+        layout = self.get_layout(page, page_no)
			
 
				+        if self._doc.error_code is not None:
			
 
				+            return
			
 
				         if judge_error_code(layout):
			
 
				             self._page.error_code = layout
			
 
				             return
			
 
				 
			
 
				         # 判断该页的对象类型，并存储
			
 
				-        only_image = 1
			
 
				-        image_count = 0
			
 
				+        # only_image = 1
			
 
				+        # image_count = 0
			
 
				         lt_text_list = []
			
 
				         lt_image_list = []
			
 
				         for x in layout:
			
 
				             if isinstance(x, (LTTextBoxHorizontal, LTTextBoxVertical)):
			
 
				-                only_image = 0
			
 
				+                # only_image = 0
			
 
				                 lt_text_list.append(x)
			
 
				             if isinstance(x, LTFigure):
			
 
				                 for y in x:
			
 
				                     if isinstance(y, LTImage):
			
 
				                         lt_image_list.append(y)
			
 
				-                        image_count += 1
			
 
				+                        # image_count += 1
			
 
				         lt_text_list = self.delete_water_mark(lt_text_list, layout.bbox, 15)
			
 
				         print("convert_pdf page", page_no)
			
 
				         print("len(lt_image_list), len(lt_text_list)", len(lt_image_list), len(lt_text_list))
			
 
				 
			
 
				         # 若只有文本且图片数为0，直接提取文字及表格
			
 
				-        if only_image == 0 and image_count == 0:
			
 
				+        # if only_image == 0 and image_count == 0:
			
 
				+        if len(lt_image_list) == 0 and len(lt_text_list) > 0:
			
 
				             # PDFPlumber
			
 
				             if self.has_init_pdf[3] == 0:
			
 
				                 self.init_package("pdfplumber")
			
 
				             if self._doc.error_code is not None:
			
 
				                 return
			
 
				 
			
 
				+            # 无法识别pdf字符编码，整页用ocr
			
 
				+            text_temp = ""
			
 
				+            for _t in lt_text_list:
			
 
				+                text_temp += _t.get_text()
			
 
				+
			
 
				+            if re.search('[(]cid:[0-9]+[)]', text_temp):
			
 
				+                page_image = self.get_page_image(page_no)
			
 
				+                if judge_error_code(page_image):
			
 
				+                    self._page.error_code = page_image
			
 
				+                else:
			
 
				+                    _image = _Image(page_image[1], page_image[0])
			
 
				+                    self._page.add_child(_image)
			
 
				+                return
			
 
				+
			
 
				             try:
			
 
				                 lt_line_list = []
			
 
				                 page_plumber = pdfPage(self.doc_pdfplumber, page, page_number=page_no, initial_doctop=self.doc_top)
			
@@ -722,7 +766,6 @@ class PDFConvert:
 
				                 list_sentences = ParseUtils.recognize_sentences(lt_text_list, filter_objs,
			
 
				                                                                 layout.bbox, page_no)
			
 
				 
			
 
				-
			
 
				                 for sentence in list_sentences:
			
 
				                     _sen = _Sentence(sentence.text, sentence.bbox)
			
 
				                     self._page.add_child(_sen)
			
@@ -733,7 +776,8 @@ class PDFConvert:
 
				                 self._page.error_code = [-8]
			
 
				 
			
 
				         # 若该页图片数量过多，或无文本，则直接ocr整页识别
			
 
				-        elif image_count > 3 or only_image == 1:
			
 
				+        # elif image_count > 3 or only_image == 1:
			
 
				+        elif len(lt_image_list) > 3 or len(lt_text_list) == 0:
			
 
				             page_image = self.get_page_image(page_no)
			
 
				             if judge_error_code(page_image):
			
 
				                 self._page.error_code = page_image
			
@@ -794,37 +838,33 @@ class PDFConvert:
 
				                         _image = _Image(image_stream, temp_path, image.bbox)
			
 
				                         self._page.add_child(_image)
			
 
				                 except Exception:
			
 
				-                    logging.info("pdf2text pdfminer read image in page " + str(page_no) +
			
 
				+                    log("pdf2text pdfminer read image in page " + str(page_no) +
			
 
				                                  "  fail! use pymupdf read image...")
			
 
				                     print(traceback.print_exc())
			
 
				             # pdf对象需反向排序
			
 
				             self._page.is_reverse = True
			
 
				 
			
 
				-    def get_layout(self, page):
			
 
				+    def get_layout(self, page, page_no):
			
 
				         if self.has_init_pdf[0] == 0:
			
 
				             self.init_package("pdfminer")
			
 
				         if self._doc.error_code is not None:
			
 
				             return
			
 
				 
			
 
				         # 获取该页layout
			
 
				+        start_time = time.time()
			
 
				         try:
			
 
				             if get_platform() == "Windows":
			
 
				-                self.interpreter.process_page(page)
			
 
				-                layout = self.device.get_result()
			
 
				+                # origin_pdf_analyze = pdf_analyze.__wrapped__
			
 
				+                # layout = origin_pdf_analyze(self.interpreter, page, self.device)
			
 
				+                layout = pdf_analyze(self.interpreter, page, self.device, page_no)
			
 
				             else:
			
 
				-                # 设置超时时间
			
 
				-                try:
			
 
				-                    # 解析pdf中的不含表格的页
			
 
				-                    if get_platform() == "Windows":
			
 
				-                        origin_pdf_analyze = pdf_analyze.__wrapped__
			
 
				-                        layout = origin_pdf_analyze(self.interpreter, page, self.device)
			
 
				-                    else:
			
 
				-                        layout = pdf_analyze(self.interpreter, page, self.device)
			
 
				-                except TimeoutError as e:
			
 
				-                    logging.info("pdf2text pdfminer read pdf page time out!")
			
 
				-                    layout = [-4]
			
 
				+                layout = pdf_analyze(self.interpreter, page, self.device, page_no)
			
 
				+        except TimeoutError as e:
			
 
				+            log("pdf2text pdfminer read pdf page " + str(page_no) + " time out! " + str(time.time() - start_time))
			
 
				+            layout = [-4]
			
 
				         except Exception:
			
 
				-            logging.info("pdf2text pdfminer read pdf page error! continue...")
			
 
				+            traceback.print_exc()
			
 
				+            log("pdf2text pdfminer read pdf page " + str(page_no) + " error! continue...")
			
 
				             layout = [-3]
			
 
				         return layout
			
 
				 
			
@@ -844,21 +884,22 @@ class PDFConvert:
 
				             mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
			
 
				             pix = page.getPixmap(matrix=mat, alpha=False)
			
 
				             pix.writePNG(output)
			
 
				-            # pdf_image = cv2.imread(output)
			
 
				+            # 输出图片resize
			
 
				+            self.resize_image(output)
			
 
				             with open(output, "rb") as f:
			
 
				                 pdf_image = f.read()
			
 
				             return [output, pdf_image]
			
 
				         except ValueError as e:
			
 
				             traceback.print_exc()
			
 
				             if str(e) == "page not in document":
			
 
				-                logging.info("pdf2Image page not in document! continue..." + str(page_no))
			
 
				+                log("pdf2Image page not in document! continue... page " + str(page_no))
			
 
				                 return [0]
			
 
				             elif "encrypted" in str(e):
			
 
				-                logging.info("pdf2Image document need password " + str(page_no))
			
 
				+                log("pdf2Image document need password " + str(page_no))
			
 
				                 return [-7]
			
 
				         except RuntimeError as e:
			
 
				             if "cannot find page" in str(e):
			
 
				-                logging.info("pdf2Image page {} not in document! continue... ".format(str(page_no)) + str(e))
			
 
				+                log("pdf2Image page {} not in document! continue... ".format(str(page_no)) + str(e))
			
 
				                 return [0]
			
 
				             else:
			
 
				                 traceback.print_exc()
			
@@ -923,6 +964,19 @@ class PDFConvert:
 
				                 temp_text_list.append(_obj)
			
 
				         return temp_text_list
			
 
				 
			
 
				+    def resize_image(self, img_path, max_size=2000):
			
 
				+        _img = cv2.imread(img_path)
			
 
				+        if _img.shape[0] <= max_size or _img.shape[1] <= max_size:
			
 
				+            return
			
 
				+        else:
			
 
				+            resize_axis = 0 if _img.shape[0] >= _img.shape[1] else 1
			
 
				+            ratio = max_size / _img.shape[resize_axis]
			
 
				+            new_shape = [0, 0]
			
 
				+            new_shape[resize_axis] = max_size
			
 
				+            new_shape[1-resize_axis] = int(_img.shape[1-resize_axis] * ratio)
			
 
				+            _img = cv2.resize(_img, (new_shape[1], new_shape[0]))
			
 
				+            cv2.imwrite(img_path, _img)
			
 
				+
			
 
				 
			
 
				 # 以下为现成pdf单页解析接口
			
 
				 class ParseSentence:
			
--- a/format_convert/convert_rar.py
+++ b/format_convert/convert_rar.py
@@ -1,3 +1,4 @@
 
				+import inspect
			
 
				 import os
			
 
				 import sys
			
 
				 sys.path.append(os.path.dirname(__file__) + "/../")
			
@@ -5,24 +6,26 @@ from format_convert.convert_tree import _Document, _Table, _Page, _Sentence
 
				 import logging
			
 
				 import traceback
			
 
				 from format_convert import get_memory_info
			
 
				-from format_convert.utils import get_platform, rename_inner_files, judge_error_code, judge_format, slash_replace
			
 
				+from format_convert.utils import get_platform, rename_inner_files, judge_error_code, judge_format, slash_replace, \
			
 
				+    my_subprocess_call, get_logger, log
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def rar2text(path, unique_type_dir):
			
 
				     from format_convert.convert import getText
			
 
				-    logging.info("into rar2text")
			
 
				+    log("into rar2text")
			
 
				     try:
			
 
				         rar_path = unique_type_dir
			
 
				         try:
			
 
				             # shell调用unrar解压
			
 
				-            _signal = os.system("unrar x " + path + " " + rar_path)
			
 
				+            # _signal = os.system("unrar x " + path + " " + rar_path)
			
 
				+            pid, _signal = my_subprocess_call(["unrar x ", path, rar_path])
			
 
				             print("rar2text _signal", _signal)
			
 
				             # =0, 解压成功
			
 
				             if _signal != 0:
			
 
				                 raise Exception
			
 
				         except Exception as e:
			
 
				-            logging.info("rar format error!")
			
 
				+            log("rar format error!")
			
 
				             print("rar format error!", e)
			
 
				             return [-3]
			
 
				 
			
@@ -49,13 +52,13 @@ def rar2text(path, unique_type_dir):
 
				 
			
 
				             # 无文件后缀，猜格式
			
 
				             if len(file.split(".")) <= 1:
			
 
				-                logging.info(str(file) + " has no type! Guess type...")
			
 
				+                log(str(file) + " has no type! Guess type...")
			
 
				                 _type = judge_format(file)
			
 
				                 if _type is None:
			
 
				-                    logging.info(str(file) + "cannot guess type!")
			
 
				+                    log(str(file) + "cannot guess type!")
			
 
				                     sub_text = [""]
			
 
				                 else:
			
 
				-                    logging.info(str(file) + " guess type: " + _type)
			
 
				+                    log(str(file) + " guess type: " + _type)
			
 
				                     new_file = str(file) + "." + _type
			
 
				                     os.rename(file, new_file)
			
 
				                     file = new_file
			
@@ -74,7 +77,7 @@ def rar2text(path, unique_type_dir):
 
				             text = text + sub_text
			
 
				         return text
			
 
				     except Exception as e:
			
 
				-        logging.info("rar2text error!")
			
 
				+        log("rar2text error!")
			
 
				         print("rar2text", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
@@ -95,7 +98,7 @@ class RarConvert:
 
				             if _signal != 0:
			
 
				                 raise Exception
			
 
				         except:
			
 
				-            logging.info("cannot open rar!")
			
 
				+            log("cannot open rar!")
			
 
				             traceback.print_exc()
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
@@ -120,13 +123,13 @@ class RarConvert:
 
				             bbox = (0, file_no, 0, 0)
			
 
				             # 无文件后缀，猜格式
			
 
				             if len(file.split(".")) <= 1:
			
 
				-                logging.info(str(file) + " has no type! Guess type...")
			
 
				+                log(str(file) + " has no type! Guess type...")
			
 
				                 _type = judge_format(file)
			
 
				                 if _type is None:
			
 
				-                    logging.info(str(file) + "cannot guess type!")
			
 
				+                    log(str(file) + "cannot guess type!")
			
 
				                     continue
			
 
				                 else:
			
 
				-                    logging.info(str(file) + " guess type: " + _type)
			
 
				+                    log(str(file) + " guess type: " + _type)
			
 
				                     new_file = str(file) + "." + _type
			
 
				                     os.rename(file, new_file)
			
 
				                     file = new_file
			
--- a/format_convert/convert_swf.py
+++ b/format_convert/convert_swf.py
@@ -1,3 +1,4 @@
 
				+import inspect
			
 
				 import os
			
 
				 import sys
			
 
				 import time
			
@@ -13,12 +14,12 @@ from format_convert import get_memory_info, timeout_decorator
 
				 from format_convert.convert_image import picture2text
			
 
				 from format_convert.swf.export import SVGExporter
			
 
				 from format_convert.swf.movie import SWF
			
 
				-from format_convert.utils import judge_error_code
			
 
				+from format_convert.utils import judge_error_code, get_logger, log
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def swf2text(path, unique_type_dir):
			
 
				-    logging.info("into swf2text")
			
 
				+    log("into swf2text")
			
 
				     try:
			
 
				         try:
			
 
				             with open(path, 'rb') as f:
			
@@ -27,7 +28,7 @@ def swf2text(path, unique_type_dir):
 
				                 svg = swf_file.export(svg_exporter)
			
 
				             swf_str = str(svg.getvalue(), encoding='utf-8')
			
 
				         except Exception as e:
			
 
				-            logging.info("swf format error!")
			
 
				+            log("swf format error!")
			
 
				             traceback.print_exc()
			
 
				             return [-3]
			
 
				 
			
@@ -86,7 +87,7 @@ def swf2text(path, unique_type_dir):
 
				 
			
 
				         return [text]
			
 
				     except Exception as e:
			
 
				-        logging.info("swf2text error!")
			
 
				+        log("swf2text error!")
			
 
				         print("swf2text", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
@@ -105,7 +106,7 @@ class SwfConvert:
 
				                 svg = swf_file.export(svg_exporter)
			
 
				             self.swf_str = str(svg.getvalue(), encoding='utf-8')
			
 
				         except:
			
 
				-            logging.info("cannot open swf!")
			
 
				+            log("cannot open swf!")
			
 
				             traceback.print_exc()
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
--- a/format_convert/convert_tree.py
+++ b/format_convert/convert_tree.py
@@ -77,6 +77,7 @@ class _Image:
 
				         self.path = path
			
 
				         # 来源
			
 
				         self.is_from_pdf = False
			
 
				+        self.is_from_docx = False
			
 
				         # 位置
			
 
				         self.bbox = bbox
			
 
				         self.x = bbox[0]
			
@@ -123,7 +124,7 @@ class _Image:
 
				         # image_np = cv2.cvtColor(np.asarray(image_np), cv2.COLOR_RGB2BGR)
			
 
				         image_np = cv2.imread(self.path)
			
 
				 
			
 
				-        obj_list = image_process(image_np, self.path, self.is_from_pdf, use_ocr=True)
			
 
				+        obj_list = image_process(image_np, self.path, self.is_from_pdf, self.is_from_docx, use_ocr=True)
			
 
				         if judge_error_code(obj_list):
			
 
				             self.error_code = obj_list
			
 
				             return
			
--- a/format_convert/convert_txt.py
+++ b/format_convert/convert_txt.py
@@ -1,16 +1,18 @@
 
				+import inspect
			
 
				 import os
			
 
				 import sys
			
 
				-sys.path.append(os.path.dirname(__file__) + "/../")
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 from format_convert.convert_tree import _Document, _Page, _Sentence
			
 
				 import logging
			
 
				 import traceback
			
 
				 import chardet
			
 
				 from format_convert import get_memory_info
			
 
				+from format_convert.utils import get_logger, log
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def txt2text(path):
			
 
				-    logging.info("into txt2text")
			
 
				+    log("into txt2text")
			
 
				     try:
			
 
				         # 判断字符编码
			
 
				         with open(path, "rb") as ff:
			
@@ -20,17 +22,17 @@ def txt2text(path):
 
				 
			
 
				         try:
			
 
				             if encode is None:
			
 
				-                logging.info("txt2text cannot judge file code!")
			
 
				+                log("txt2text cannot judge file code!")
			
 
				                 return [-3]
			
 
				             with open(path, "r", encoding=encode) as ff:
			
 
				                 txt_text = ff.read()
			
 
				             return [txt_text]
			
 
				         except:
			
 
				-            logging.info("txt2text cannot open file with code " + encode)
			
 
				+            log("txt2text cannot open file with code " + encode)
			
 
				             return [-3]
			
 
				     except Exception as e:
			
 
				         print("txt2text", traceback.print_exc())
			
 
				-        logging.info("txt2text error!")
			
 
				+        log("txt2text error!")
			
 
				         return [-1]
			
 
				 
			
 
				 
			
@@ -48,12 +50,12 @@ class TxtConvert:
 
				             encode = chardet.detect(data).get("encoding")
			
 
				             print("txt2text judge code is", encode)
			
 
				             if encode is None:
			
 
				-                logging.info("txt2text cannot judge file code!")
			
 
				+                log("txt2text cannot judge file code!")
			
 
				                 raise Exception
			
 
				             with open(self.path, "r", encoding=encode) as ff:
			
 
				                 self.txt_text = ff.read()
			
 
				         except:
			
 
				-            logging.info("cannot open txt!")
			
 
				+            log("cannot open txt!")
			
 
				             traceback.print_exc()
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
--- a/format_convert/convert_xls.py
+++ b/format_convert/convert_xls.py
@@ -1,18 +1,19 @@
 
				+import inspect
			
 
				 import os
			
 
				 import sys
			
 
				-sys.path.append(os.path.dirname(__file__) + "/../")
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 from format_convert.convert_tree import _Document
			
 
				 import logging
			
 
				 import traceback
			
 
				 from format_convert import get_memory_info
			
 
				 from format_convert.convert_need_interface import from_office_interface
			
 
				 from format_convert.convert_xlsx import xlsx2text, XlsxConvert
			
 
				-from format_convert.utils import judge_error_code
			
 
				+from format_convert.utils import judge_error_code, get_logger, log
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def xls2text(path, unique_type_dir):
			
 
				-    logging.info("into xls2text")
			
 
				+    log("into xls2text")
			
 
				     try:
			
 
				         # 调用libreoffice格式转换
			
 
				         file_path = from_office_interface(path, unique_type_dir, 'xlsx')
			
@@ -25,8 +26,8 @@ def xls2text(path, unique_type_dir):
 
				 
			
 
				         return text
			
 
				     except Exception as e:
			
 
				-        logging.info("xls2text error!")
			
 
				-        print("xls2text", traceback.print_exc())
			
 
				+        log("xls2text error!")
			
 
				+        traceback.print_exc()
			
 
				         return [-1]
			
 
				 
			
 
				 
			
--- a/format_convert/convert_xlsx.py
+++ b/format_convert/convert_xlsx.py
@@ -1,22 +1,27 @@
 
				+import inspect
			
 
				 import os
			
 
				 import sys
			
 
				+
			
 
				+from format_convert.utils import get_logger, log
			
 
				+
			
 
				 sys.path.append(os.path.dirname(__file__) + "/../")
			
 
				 from format_convert.convert_tree import _Document, _Page, _Table
			
 
				 import logging
			
 
				 import traceback
			
 
				 import pandas
			
 
				+import numpy as np
			
 
				 from format_convert import get_memory_info
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def xlsx2text(path, unique_type_dir):
			
 
				-    logging.info("into xlsx2text")
			
 
				+    log("into xlsx2text")
			
 
				     try:
			
 
				         try:
			
 
				             # sheet_name=None, 即拿取所有sheet，存为dict
			
 
				             df_dict = pandas.read_excel(path, header=None, keep_default_na=False, sheet_name=None)
			
 
				         except Exception as e:
			
 
				-            logging.info("xlsx format error!")
			
 
				+            log("xlsx format error!")
			
 
				             return [-3]
			
 
				 
			
 
				         df_list = [sheet for sheet in df_dict.values()]
			
@@ -34,8 +39,8 @@ def xlsx2text(path, unique_type_dir):
 
				 
			
 
				         return [sheet_text]
			
 
				     except Exception as e:
			
 
				-        logging.info("xlsx2text error!")
			
 
				-        print("xlsx2text", traceback.print_exc())
			
 
				+        log("xlsx2text error!")
			
 
				+        traceback.print_exc()
			
 
				         return [-1]
			
 
				 
			
 
				 
			
@@ -49,8 +54,39 @@ class XlsxConvert:
 
				         # 各个包初始化
			
 
				         try:
			
 
				             self.df = pandas.read_excel(self.path, header=None, keep_default_na=False, sheet_name=None)
			
 
				+            self.sheet_list = [sheet for sheet in self.df.values()]
			
 
				+
			
 
				+            # 防止读太多空列空行
			
 
				+            self.col_limit = 100
			
 
				+            self.row_limit = 2000
			
 
				+            self.re_read = 0
			
 
				+            for s in self.sheet_list:
			
 
				+                if s.shape[1] > self.col_limit and s.shape[0] > self.row_limit:
			
 
				+                    self.re_read = 3
			
 
				+                    break
			
 
				+                elif s.shape[0] > self.row_limit:
			
 
				+                    self.re_read = 2
			
 
				+                    break
			
 
				+                elif s.shape[1] > self.col_limit:
			
 
				+                    self.re_read = 1
			
 
				+                    break
			
 
				+
			
 
				+            if self.re_read == 3:
			
 
				+                self.df = pandas.read_excel(self.path, header=None, keep_default_na=False,
			
 
				+                                            sheet_name=None, usecols=[x for x in range(self.col_limit)],
			
 
				+                                            nrows=self.row_limit)
			
 
				+            if self.re_read == 2:
			
 
				+                self.df = pandas.read_excel(self.path, header=None, keep_default_na=False,
			
 
				+                                            sheet_name=None, nrows=self.row_limit)
			
 
				+            elif self.re_read == 1:
			
 
				+                self.df = pandas.read_excel(self.path, header=None, keep_default_na=False,
			
 
				+                                            sheet_name=None, usecols=[x for x in range(self.col_limit)])
			
 
				+            if self.re_read > 0:
			
 
				+                self.sheet_list = [sheet for sheet in self.df.values()]
			
 
				+
			
 
				+            print(self.sheet_list[0].shape)
			
 
				         except:
			
 
				-            logging.info("cannot open xlsx!")
			
 
				+            log("cannot open xlsx!")
			
 
				             traceback.print_exc()
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
@@ -59,9 +95,8 @@ class XlsxConvert:
 
				         if self._doc.error_code is not None:
			
 
				             return
			
 
				 
			
 
				-        sheet_list = [sheet for sheet in self.df.values()]
			
 
				         sheet_no = 0
			
 
				-        for sheet in sheet_list:
			
 
				+        for sheet in self.sheet_list:
			
 
				             self._page = _Page(None, sheet_no)
			
 
				             self.convert_page(sheet)
			
 
				 
			
@@ -72,8 +107,34 @@ class XlsxConvert:
 
				 
			
 
				     def convert_page(self, sheet):
			
 
				         text = '<table border="1">' + "\n"
			
 
				+
			
 
				+        # 剔除多余空列
			
 
				+        max_row_len = 0
			
 
				+        max_col_len = 0
			
 
				+        if self.re_read:
			
 
				+            for index, row in sheet.iterrows():
			
 
				+                col_len = 0
			
 
				+                row_empty_flag = 1
			
 
				+                for i in range(len(row)):
			
 
				+                    if row[i] not in [None, "", np.nan]:
			
 
				+                        row_empty_flag = 0
			
 
				+                        col_len = i
			
 
				+
			
 
				+                if self.re_read == 3 or self.re_read == 1:
			
 
				+                    if col_len > max_col_len:
			
 
				+                        max_col_len = col_len
			
 
				+
			
 
				+                if self.re_read == 3 or self.re_read == 2:
			
 
				+                    if row_empty_flag == 0:
			
 
				+                        max_row_len = index
			
 
				+
			
 
				         for index, row in sheet.iterrows():
			
 
				+            if self.re_read == 3 or self.re_read == 2:
			
 
				+                if index > max_row_len:
			
 
				+                    break
			
 
				             text = text + "<tr>"
			
 
				+            if self.re_read == 3 or self.re_read == 1:
			
 
				+                row = row[:max_col_len+1]
			
 
				             for r in row:
			
 
				                 text = text + "<td>" + str(r) + "</td>" + "\n"
			
 
				                 # print(text)
			
--- a/format_convert/convert_zip.py
+++ b/format_convert/convert_zip.py
@@ -1,3 +1,4 @@
 
				+import inspect
			
 
				 import os
			
 
				 import sys
			
 
				 sys.path.append(os.path.dirname(__file__) + "/../")
			
@@ -6,13 +7,13 @@ import logging
 
				 import traceback
			
 
				 import zipfile
			
 
				 from format_convert import get_memory_info
			
 
				-from format_convert.utils import get_platform, rename_inner_files, judge_error_code, judge_format
			
 
				+from format_convert.utils import get_platform, rename_inner_files, judge_error_code, judge_format, get_logger, log
			
 
				 
			
 
				 
			
 
				 @get_memory_info.memory_decorator
			
 
				 def zip2text(path, unique_type_dir):
			
 
				     from format_convert.convert import getText
			
 
				-    logging.info("into zip2text")
			
 
				+    log("into zip2text")
			
 
				     try:
			
 
				         zip_path = unique_type_dir
			
 
				 
			
@@ -53,7 +54,7 @@ def zip2text(path, unique_type_dir):
 
				             # file_list = temp_list
			
 
				 
			
 
				         except Exception as e:
			
 
				-            logging.info("zip format error!")
			
 
				+            log("zip format error!")
			
 
				             print("zip format error!", traceback.print_exc())
			
 
				             return [-3]
			
 
				 
			
@@ -74,13 +75,13 @@ def zip2text(path, unique_type_dir):
 
				 
			
 
				             # 无文件后缀，猜格式
			
 
				             if len(file.split(".")) <= 1:
			
 
				-                logging.info(str(file) + " has no type! Guess type...")
			
 
				+                log(str(file) + " has no type! Guess type...")
			
 
				                 _type = judge_format(file)
			
 
				                 if _type is None:
			
 
				-                    logging.info(str(file) + "cannot guess type!")
			
 
				+                    log(str(file) + "cannot guess type!")
			
 
				                     sub_text = [""]
			
 
				                 else:
			
 
				-                    logging.info(str(file) + " guess type: " + _type)
			
 
				+                    log(str(file) + " guess type: " + _type)
			
 
				                     new_file = str(file) + "." + _type
			
 
				                     os.rename(file, new_file)
			
 
				                     file = new_file
			
@@ -98,7 +99,7 @@ def zip2text(path, unique_type_dir):
 
				             text = text + sub_text
			
 
				         return text
			
 
				     except Exception as e:
			
 
				-        logging.info("zip2text error!")
			
 
				+        log("zip2text error!")
			
 
				         print("zip2text", traceback.print_exc())
			
 
				         return [-1]
			
 
				 
			
@@ -136,7 +137,7 @@ class ZipConvert:
 
				                 file_list.append(zip_file.extract(f, path=self.zip_path))
			
 
				             zip_file.close()
			
 
				         except:
			
 
				-            logging.info("cannot open zip!")
			
 
				+            log("cannot open zip!")
			
 
				             traceback.print_exc()
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
@@ -161,13 +162,13 @@ class ZipConvert:
 
				             bbox = (0, file_no, 0, 0)
			
 
				             # 无文件后缀，猜格式
			
 
				             if len(file.split(".")) <= 1:
			
 
				-                logging.info(str(file) + " has no type! Guess type...")
			
 
				+                log(str(file) + " has no type! Guess type...")
			
 
				                 _type = judge_format(file)
			
 
				                 if _type is None:
			
 
				-                    logging.info(str(file) + "cannot guess type!")
			
 
				+                    log(str(file) + "cannot guess type!")
			
 
				                     continue
			
 
				                 else:
			
 
				-                    logging.info(str(file) + " guess type: " + _type)
			
 
				+                    log(str(file) + " guess type: " + _type)
			
 
				                     new_file = str(file) + "." + _type
			
 
				                     os.rename(file, new_file)
			
 
				                     file = new_file
			
--- a/format_convert/get_points.jpg
+++ b/format_convert/get_points.jpg
--- a/format_convert/interface.yml
+++ b/format_convert/interface.yml
@@ -0,0 +1,62 @@
 
				+MASTER:
			
 
				+#  windows: 'http://192.168.2.104',
			
 
				+#  product: 'http://172.16.160.65'
			
 
				+#  local-102: 'http://192.168.2.102'
			
 
				+#  local-103: 'http://192.168.2.103'
			
 
				+  ip: ['http://172.16.160.65']
			
 
				+
			
 
				+  PATH:
			
 
				+#  65: /root/miniconda3/bin/python
			
 
				+#  102: /home/python/anaconda3/envs/convert/bin/python
			
 
				+#  103: /home/yons/anaconda3/envs/tf1.5/bin/python
			
 
				+    python: '/root/miniconda3/bin/python'
			
 
				+#  65: /data/format_conversion_maxcompute/
			
 
				+#  102: /data/fangjiasheng/format_conversion_maxcompute/
			
 
				+#  103: /data/python/fangjiasheng/format_conversion_maxcompute/
			
 
				+    project: '/data/format_conversion_maxcompute/'
			
 
				+
			
 
				+  CONVERT:
			
 
				+    port: 15010
			
 
				+    processes: 25
			
 
				+
			
 
				+  SCHEDULE:
			
 
				+    port:
			
 
				+
			
 
				+  OCR:
			
 
				+    port_start: 17000
			
 
				+    port_no: 6
			
 
				+
			
 
				+  OTR:
			
 
				+    port_start: 18000
			
 
				+    port_no: 6
			
 
				+
			
 
				+  OFFICE:
			
 
				+    port_start: 16000
			
 
				+    port_no: 24
			
 
				+
			
 
				+
			
 
				+SLAVE:
			
 
				+  ip:
			
 
				+
			
 
				+  PATH:
			
 
				+    python:
			
 
				+    project:
			
 
				+
			
 
				+  CONVERT:
			
 
				+    port:
			
 
				+    processes:
			
 
				+
			
 
				+  SCHEDULE:
			
 
				+    port:
			
 
				+
			
 
				+  OCR:
			
 
				+    port_start: 17000
			
 
				+    port_no: 1
			
 
				+
			
 
				+  OTR:
			
 
				+    port_start: 18000
			
 
				+    port_no: 1
			
 
				+
			
 
				+  OFFICE:
			
 
				+    port_start:
			
 
				+    port_no:
			
--- a/format_convert/kill_all.py
+++ b/format_convert/kill_all.py
@@ -0,0 +1,39 @@
 
				+import os
			
 
				+import re
			
 
				+import sys
			
 
				+import psutil
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				+from format_convert.utils import get_ip_port, get_intranet_ip
			
 
				+
			
 
				+
			
 
				+ip_port_dict = get_ip_port()
			
 
				+ip = "http://" + get_intranet_ip()
			
 
				+python_path = ip_port_dict.get(ip).get("python_path")
			
 
				+project_path = ip_port_dict.get(ip).get("project_path")
			
 
				+
			
 
				+
			
 
				+def kill():
			
 
				+    if python_path and project_path:
			
 
				+        pid_list = psutil.pids()
			
 
				+        for pid in pid_list:
			
 
				+            process = psutil.Process(pid)
			
 
				+            process_cmd = ''
			
 
				+            for c in process.cmdline():
			
 
				+                process_cmd += c + " "
			
 
				+            if process_cmd.strip() == "":
			
 
				+                continue
			
 
				+            if "monitor" in process_cmd or "kill" in process_cmd:
			
 
				+                continue
			
 
				+
			
 
				+            if re.search(project_path, process_cmd):
			
 
				+                comm = "kill -9 " + str(pid)
			
 
				+                print(comm, process_cmd)
			
 
				+                os.system(comm)
			
 
				+    else:
			
 
				+        print("cannot kill! checkout config...")
			
 
				+        print(ip_port_dict)
			
 
				+        print(ip, python_path, project_path)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    kill()
			
--- a/format_convert/kill_office.py
+++ b/format_convert/kill_office.py
@@ -0,0 +1,26 @@
 
				+import logging
			
 
				+import os
			
 
				+import re
			
 
				+import time
			
 
				+import psutil
			
 
				+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+
			
 
				+
			
 
				+def kill_soffice(limit_sec=12):
			
 
				+    pid_list = psutil.pids()
			
 
				+    for pid in pid_list:
			
 
				+        process = psutil.Process(pid)
			
 
				+        if re.search("soffice", process.exe()):
			
 
				+            start_time = process.create_time()
			
 
				+            now_time = time.time()
			
 
				+            run_time = now_time-start_time
			
 
				+            # logging.info("pid " + str(run_time))
			
 
				+            if run_time >= limit_sec:
			
 
				+                comm = "kill -9 " + str(pid)
			
 
				+                print("kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
			
 
				+                os.system("echo $(date +%F%n%T)")
			
 
				+                os.system(comm)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    kill_soffice()
			
--- a/format_convert/libreoffice_interface.py
+++ b/format_convert/libreoffice_interface.py
@@ -1,16 +1,17 @@
 
				+import base64
			
 
				+import json
			
 
				 import os
			
 
				 import re
			
 
				-import signal
			
 
				+import shutil
			
 
				 import subprocess
			
 
				 import sys
			
 
				-import time
			
 
				 import traceback
			
 
				 import psutil
			
 
				-from format_convert import timeout_decorator
			
 
				-
			
 
				-from format_convert import get_memory_info
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 from format_convert.judge_platform import get_platform
			
 
				 import logging
			
 
				+from format_convert.utils import my_subprocess_call
			
 
				+from flask import Flask, request
			
 
				 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				 
			
 
				 
			
@@ -29,17 +30,17 @@ def monitor_libreoffice():
 
				             try:
			
 
				                 process = psutil.Process(pid)
			
 
				                 # if process.username() == "appuser":
			
 
				-                if re.search("soffice|unrar", process.exe()):
			
 
				-                    # if time.time() - process.create_time() >= 120:
			
 
				-
			
 
				-                    # logging.info("---------------------------killed soffice")
			
 
				-                    # print("process", pid, process.exe())
			
 
				+                if re.search("soffice", process.exe()):
			
 
				+                    if str(pid) == str(globals().get("soffice_pid")):
			
 
				+                        logging.info("process " + str(pid) + str(process.exe()))
			
 
				+                        comm = "kill -9 " + str(pid)
			
 
				+                        os.system(comm)
			
 
				+                        logging.info("killed soffice" + str(pid))
			
 
				+                elif re.search("unrar", process.exe()):
			
 
				                     logging.info("process " + str(pid) + str(process.exe()))
			
 
				                     comm = "kill -9 " + str(pid)
			
 
				-                    # subprocess.call(comm, shell=True)
			
 
				                     os.system(comm)
			
 
				-                    # print("killed", pid)
			
 
				-                    logging.info("killed " + str(pid))
			
 
				+                    logging.info("killed unrar" + str(pid))
			
 
				 
			
 
				             except TimeoutError:
			
 
				                 raise TimeoutError
			
@@ -54,7 +55,7 @@ def monitor_libreoffice():
 
				 def office_convert(src_path, dest_path, target_format, retry_times=1):
			
 
				     try:
			
 
				         logging.info("into office_convert")
			
 
				-        print("src_path", src_path)
			
 
				+        # print("src_path", src_path)
			
 
				         uid1 = src_path.split(os.sep)[-1].split(".")[0]
			
 
				         dest_file_path = dest_path + uid1 + "." + target_format
			
 
				         src_format = src_path.split(".")[-1]
			
@@ -69,14 +70,13 @@ def office_convert(src_path, dest_path, target_format, retry_times=1):
 
				 
			
 
				                 try:
			
 
				                     p = subprocess.call(comm_list, timeout=30*(i+2))
			
 
				-
			
 
				                 except:
			
 
				                     continue
			
 
				 
			
 
				             # 调用Linux下的libreoffice子进程
			
 
				             else:
			
 
				                 # 先杀libreoffice进程
			
 
				-                monitor_libreoffice()
			
 
				+                # monitor_libreoffice()
			
 
				 
			
 
				                 # 再调用转换
			
 
				                 libreoffice_dir = 'soffice'
			
@@ -89,17 +89,20 @@ def office_convert(src_path, dest_path, target_format, retry_times=1):
 
				                 # logging.info("office_convert command" + comm)
			
 
				                 try:
			
 
				                     # p = subprocess.call(comm_list, timeout=30*(i+2))
			
 
				-                    os.system(comm)
			
 
				+                    # os.system(comm)
			
 
				+                    pid, p_code = my_subprocess_call(comm_list, timeout=30*(i+1))
			
 
				+                    logging.info("subprocess code " + str(p_code))
			
 
				+                    globals().update({"soffice_pid": pid})
			
 
				                 except TimeoutError:
			
 
				                     return [-5]
			
 
				                 except Exception as e:
			
 
				-                    print(src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times')
			
 
				-                    print(traceback.print_exc())
			
 
				+                    print(1, src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times')
			
 
				+                    traceback.print_exc()
			
 
				                     continue
			
 
				 
			
 
				             # 执行失败，重试
			
 
				             if not os.path.exists(dest_file_path):
			
 
				-                print(src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times')
			
 
				+                print(2, src_format + ' to ' + target_format + ' Failed! Retry...', i, 'times')
			
 
				                 continue
			
 
				             # 执行成功，跳出循环
			
 
				             else:
			
@@ -107,11 +110,98 @@ def office_convert(src_path, dest_path, target_format, retry_times=1):
 
				 
			
 
				         # 重试后还未成功
			
 
				         if not os.path.exists(dest_file_path):
			
 
				-            # print(src_format + ' to ' + target_format + ' failed!')
			
 
				-            logging.info(src_format + ' to ' + target_format + " failed!")
			
 
				+            logging.info(str(3) + src_format + ' to ' + target_format + " failed!")
			
 
				             return [-3]
			
 
				 
			
 
				         logging.info("out office_convert")
			
 
				         return dest_file_path
			
 
				     except TimeoutError:
			
 
				         return [-5]
			
 
				+
			
 
				+
			
 
				+# 接口配置
			
 
				+app = Flask(__name__)
			
 
				+
			
 
				+
			
 
				+@app.route('/soffice', methods=['POST'])
			
 
				+def _office_convert():
			
 
				+    src_path = None
			
 
				+    try:
			
 
				+        logging.info("into office_convert")
			
 
				+
			
 
				+        if not request.form:
			
 
				+            logging.info("office_convert no data!")
			
 
				+            return {"data": []}
			
 
				+
			
 
				+        src_path = request.form.get("src_path")
			
 
				+        dest_path = request.form.get("dest_path")
			
 
				+        file_b64 = request.form.get("file")
			
 
				+        file_bytes = base64.b64decode(file_b64)
			
 
				+        target_format = request.form.get("target_format")
			
 
				+        # retry_times = int(request.form.get("retry_times"))
			
 
				+
			
 
				+        uid1 = src_path.split(os.sep)[-1].split(".")[0]
			
 
				+        dest_file_path = dest_path + uid1 + "." + target_format
			
 
				+        src_format = src_path.split(".")[-1]
			
 
				+
			
 
				+        if not os.path.exists(os.path.dirname(src_path)):
			
 
				+            os.makedirs(os.path.dirname(src_path), mode=0o777)
			
 
				+        with open(src_path, "wb") as f:
			
 
				+            f.write(file_bytes)
			
 
				+
			
 
				+        # 调用Win下的libreoffice子进程
			
 
				+        if get_platform() == "Windows":
			
 
				+            soffice = 'C:\\Program Files\\LibreOfficeDev 5\\program\\soffice.exe'
			
 
				+            comm_list = [soffice, '--headless', '--convert-to', target_format, src_path,
			
 
				+                         '--outdir', dest_path+os.sep]
			
 
				+            p = subprocess.call(comm_list, timeout=10)
			
 
				+
			
 
				+        # 调用Linux下的libreoffice子进程
			
 
				+        else:
			
 
				+            # 再调用转换
			
 
				+            libreoffice_dir = 'soffice'
			
 
				+            comm_list = [libreoffice_dir, '--headless', '--convert-to', target_format, src_path,
			
 
				+                         '--outdir', dest_path+os.sep]
			
 
				+            comm = ''
			
 
				+            for c in comm_list:
			
 
				+                comm += c + ' '
			
 
				+            logging.info("office_convert command" + comm)
			
 
				+
			
 
				+            # p = subprocess.call(comm_list, timeout=30*(i+2))
			
 
				+            # os.system(comm)
			
 
				+            pid, p_code = my_subprocess_call(comm_list, timeout=10)
			
 
				+            logging.info("subprocess code " + str(p_code))
			
 
				+
			
 
				+        # 重试后还未成功
			
 
				+        if not os.path.exists(dest_file_path):
			
 
				+            logging.info(str(3) + src_format + ' to ' + target_format + " failed!")
			
 
				+            return {"data": [-3]}
			
 
				+
			
 
				+        logging.info("out office_convert")
			
 
				+        with open(dest_file_path, "rb") as f:
			
 
				+            file_bytes = f.read()
			
 
				+        base64_stream = base64.b64encode(file_bytes)
			
 
				+
			
 
				+        # temp_dir = "/data/fangjiasheng/format_conversion_maxcompute/format_convert/temp/"
			
 
				+        # if os.path.exists(temp_dir):
			
 
				+        #     shutil.rmtree(temp_dir)
			
 
				+
			
 
				+        print("base64_stream", type(base64_stream))
			
 
				+        return {"data": str(file_bytes)}
			
 
				+    except TimeoutError:
			
 
				+        return {"data": [-5]}
			
 
				+    except:
			
 
				+        traceback.print_exc()
			
 
				+        return {"data": [-1]}
			
 
				+    finally:
			
 
				+        if src_path is not None:
			
 
				+            file_dir = os.path.dirname(src_path)
			
 
				+            if os.path.exists(file_dir):
			
 
				+                logging.info("delete " + str(file_dir))
			
 
				+                shutil.rmtree(file_dir)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    port = 16000
			
 
				+    os.system("service cron start")
			
 
				+    app.run(host='0.0.0.0', port=port, threaded=False, debug=False)
			
--- a/format_convert/monitor_process.py
+++ b/format_convert/monitor_process.py
@@ -0,0 +1,87 @@
 
				+import logging
			
 
				+import os
			
 
				+import re
			
 
				+
			
 
				+import psutil
			
 
				+
			
 
				+
			
 
				+convert_port_list = ["15010"]
			
 
				+# ocr_port_list = ["15011", "15013", "15015"]
			
 
				+ocr_port_list = ["15011", "15013"]
			
 
				+otr_port_list = ["15012", "15014"]
			
 
				+soffice_port_list = ["16000", "16001", "16002", "16003"]
			
 
				+
			
 
				+
			
 
				+python_path = "/home/python/anaconda3/envs/convert/bin/python"
			
 
				+interface_path = "/data/fangjiasheng/format_conversion_maxcompute"
			
 
				+std_out = " >>/convert.out 2>&1 &"
			
 
				+convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
			
 
				+ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_interface.py #" + std_out
			
 
				+otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_interface.py #" + std_out
			
 
				+soffice_comm = "docker run -itd -p #:16000 soffice:v1 bash"
			
 
				+
			
 
				+
			
 
				+def get_port():
			
 
				+    net_conn = psutil.net_connections()
			
 
				+    current_port_list = []
			
 
				+    for conn in net_conn:
			
 
				+        current_port_list.append(str(conn.laddr.port))
			
 
				+    current_port_list = list(set(current_port_list))
			
 
				+    current_port_list.sort(key=lambda x: x)
			
 
				+    # print(current_port_list)
			
 
				+    return current_port_list
			
 
				+
			
 
				+
			
 
				+def restart(process_type, port):
			
 
				+    if process_type == "convert":
			
 
				+        _comm = re.sub("#", port, convert_comm)
			
 
				+    elif process_type == "ocr":
			
 
				+        _comm = re.sub("#", port, ocr_comm)
			
 
				+    elif process_type == "otr":
			
 
				+        _comm = re.sub("#", port, otr_comm)
			
 
				+    elif process_type == "soffice":
			
 
				+        _comm = re.sub("#", port, soffice_comm)
			
 
				+    else:
			
 
				+        _comm = "netstat -nltp"
			
 
				+        print("no process_type", process_type)
			
 
				+    print(_comm)
			
 
				+    # os.system("netstat -nltp")
			
 
				+    os.system(_comm)
			
 
				+
			
 
				+
			
 
				+def kill_soffice(limit_sec=12):
			
 
				+    pid_list = psutil.pids()
			
 
				+    for pid in pid_list:
			
 
				+        process = psutil.Process(pid)
			
 
				+        if re.search("soffice", process.exe()):
			
 
				+            run_time = process.cpu_times().user
			
 
				+            if run_time >= limit_sec:
			
 
				+                comm = "kill -9 " + str(pid)
			
 
				+                print("kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
			
 
				+                os.system(comm)
			
 
				+
			
 
				+
			
 
				+def monitor():
			
 
				+    current_port_list = get_port()
			
 
				+
			
 
				+    for p in convert_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("convert", p)
			
 
				+
			
 
				+    for p in ocr_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("ocr", p)
			
 
				+
			
 
				+    for p in otr_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("otr", p)
			
 
				+
			
 
				+    for p in soffice_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("soffice", p)
			
 
				+
			
 
				+    kill_soffice()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    monitor()
			
--- a/format_convert/monitor_process2.py
+++ b/format_convert/monitor_process2.py
@@ -0,0 +1,134 @@
 
				+import logging
			
 
				+import os
			
 
				+import re
			
 
				+import time
			
 
				+
			
 
				+import psutil
			
 
				+
			
 
				+
			
 
				+convert_port_list = ["15010"]
			
 
				+# ocr_port_list = ["15011", "15013", "15015"]
			
 
				+# ocr_port_list = ["15011", "15013", "15015", "15017", "15019"]
			
 
				+# otr_port_list = ["15012", "15014", "15016", "15018", "15020"]
			
 
				+ocr_port_list = ["15011", "15013", "15015", "15017", "15019", "15021"]
			
 
				+otr_port_list = ["15012", "15014", "15016", "15018", "15020", "15022"]
			
 
				+soffice_port_list = ["16000", "16001", "16002", "16003", "16004", "16005",
			
 
				+                     "16006", "16007", "16008", "16009"]
			
 
				+
			
 
				+
			
 
				+python_path = "/root/miniconda3/bin/python"
			
 
				+interface_path = "/data/format_conversion_maxcompute"
			
 
				+std_out = " >>/convert.out 2>&1 &"
			
 
				+std_out_gpu = " >>/gpu.out 2>&1 &"
			
 
				+convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
			
 
				+ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_interface.py # 0" + std_out_gpu
			
 
				+otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_interface.py # 0" + std_out_gpu
			
 
				+soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
			
 
				+
			
 
				+
			
 
				+def get_port():
			
 
				+    net_conn = psutil.net_connections()
			
 
				+    current_port_list = []
			
 
				+    for conn in net_conn:
			
 
				+        current_port_list.append(str(conn.laddr.port))
			
 
				+    current_port_list = list(set(current_port_list))
			
 
				+    current_port_list.sort(key=lambda x: x)
			
 
				+    # print(current_port_list)
			
 
				+    return current_port_list
			
 
				+
			
 
				+
			
 
				+def restart(process_type, port):
			
 
				+    if process_type == "convert":
			
 
				+        _comm = re.sub("#", port, convert_comm)
			
 
				+    elif process_type == "ocr":
			
 
				+        _comm = re.sub("#", port, ocr_comm)
			
 
				+    elif process_type == "otr":
			
 
				+        _comm = re.sub("#", port, otr_comm)
			
 
				+    elif process_type == "soffice":
			
 
				+        _comm = re.sub("#", port, soffice_comm)
			
 
				+    else:
			
 
				+        _comm = "netstat -nltp"
			
 
				+        print("no process_type", process_type)
			
 
				+
			
 
				+    # os.system("netstat -nltp")
			
 
				+    os.system("echo $(date +%F%n%T)")
			
 
				+    print("restart comm", _comm)
			
 
				+    os.system(_comm)
			
 
				+
			
 
				+
			
 
				+def kill_soffice(limit_sec=20):
			
 
				+    pid_list = psutil.pids()
			
 
				+    for pid in pid_list:
			
 
				+        process = psutil.Process(pid)
			
 
				+
			
 
				+        process_cmd = ''
			
 
				+        for c in process.cmdline():
			
 
				+            process_cmd += c + " "
			
 
				+        if process_cmd.strip() == "":
			
 
				+            continue
			
 
				+
			
 
				+        if process.status() == "zombie":
			
 
				+            print("zombie cmd", process_cmd)
			
 
				+
			
 
				+        if re.search("soffice", process.exe()):
			
 
				+            if process.status() == "zombie":
			
 
				+                ppid = process.ppid
			
 
				+                comm = "kill -9 " + str(ppid)
			
 
				+                print("kill defunct process ", str(ppid), str(process.exe()))
			
 
				+                os.system("echo $(date +%F%n%T)")
			
 
				+                os.system(comm)
			
 
				+
			
 
				+            start_time = process.create_time()
			
 
				+            now_time = time.time()
			
 
				+            run_time = now_time-start_time
			
 
				+            if run_time >= limit_sec:
			
 
				+                comm = "kill -9 " + str(pid)
			
 
				+                print("kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
			
 
				+                os.system("echo $(date +%F%n%T)")
			
 
				+                os.system(comm)
			
 
				+
			
 
				+
			
 
				+def kill_defunct():
			
 
				+    pid_list = psutil.pids()
			
 
				+    for pid in pid_list:
			
 
				+        process = psutil.Process(pid)
			
 
				+        if process.status() == "zombie":
			
 
				+            ppid = process.ppid
			
 
				+            process = psutil.Process(ppid)
			
 
				+            process.kill()
			
 
				+            process.send_signal(9)
			
 
				+            break
			
 
				+            # comm = "kill -9 " + str(ppid)
			
 
				+            # print("kill process ", str(ppid))
			
 
				+            # os.system("echo $(date +%F%n%T)")
			
 
				+            # os.system(comm)
			
 
				+
			
 
				+
			
 
				+def monitor():
			
 
				+    current_port_list = get_port()
			
 
				+
			
 
				+    for p in convert_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("convert", p)
			
 
				+
			
 
				+    for p in ocr_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("ocr", p)
			
 
				+
			
 
				+    for p in otr_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("otr", p)
			
 
				+
			
 
				+    for p in soffice_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("soffice", p)
			
 
				+
			
 
				+    kill_soffice()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    for i in range(6):
			
 
				+        # os.system("echo $(date +%F%n%T)")
			
 
				+        monitor()
			
 
				+        time.sleep(10)
			
 
				+    # kill_defunct()
			
--- a/format_convert/monitor_process3.py
+++ b/format_convert/monitor_process3.py
@@ -0,0 +1,104 @@
 
				+import logging
			
 
				+import os
			
 
				+import re
			
 
				+import sys
			
 
				+import time
			
 
				+import psutil
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				+from format_convert.utils import get_ip_port
			
 
				+
			
 
				+
			
 
				+# convert_port_list = ["15010"]
			
 
				+# ocr_port_list = ["15011", "15013", "15015"]
			
 
				+# ocr_port_list = ["15011", "15013", "15015", "15017", "15019"]
			
 
				+# otr_port_list = ["15012", "15014", "15016", "15018", "15020"]
			
 
				+# ocr_port_list = ["15011", "15013", "15015", "15017", "15019", "15021"]
			
 
				+# otr_port_list = ["15012", "15014", "15016", "15018", "15020", "15022"]
			
 
				+# soffice_port_list = ["16000", "16001", "16002", "16003", "16004", "16005",
			
 
				+#                      "16006", "16007", "16008", "16009"]
			
 
				+
			
 
				+convert_port_list = get_ip_port("convert")
			
 
				+ocr_port_list = get_ip_port("ocr")
			
 
				+otr_port_list = get_ip_port("otr")
			
 
				+soffice_port_list = get_ip_port("office")
			
 
				+
			
 
				+
			
 
				+python_path = "/root/miniconda3/bin/python"
			
 
				+interface_path = "/data/format_conversion_maxcompute"
			
 
				+std_out = " >>/convert.out 2>&1 &"
			
 
				+std_out_gpu = " >>/gpu.out 2>&1 &"
			
 
				+convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
			
 
				+ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_interface.py # 0" + std_out + std_out_gpu
			
 
				+otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_interface.py # 0" + std_out + std_out_gpu
			
 
				+soffice_comm = "docker run -itd -p #:16000 soffice:v1 bash"
			
 
				+
			
 
				+
			
 
				+def get_port():
			
 
				+    net_conn = psutil.net_connections()
			
 
				+    current_port_list = []
			
 
				+    for conn in net_conn:
			
 
				+        current_port_list.append(str(conn.laddr.port))
			
 
				+    current_port_list = list(set(current_port_list))
			
 
				+    current_port_list.sort(key=lambda x: x)
			
 
				+    # print(current_port_list)
			
 
				+    return current_port_list
			
 
				+
			
 
				+
			
 
				+def restart(process_type, port):
			
 
				+    if process_type == "convert":
			
 
				+        _comm = re.sub("#", port, convert_comm)
			
 
				+    elif process_type == "ocr":
			
 
				+        _comm = re.sub("#", port, ocr_comm)
			
 
				+    elif process_type == "otr":
			
 
				+        _comm = re.sub("#", port, otr_comm)
			
 
				+    elif process_type == "soffice":
			
 
				+        _comm = re.sub("#", port, soffice_comm)
			
 
				+    else:
			
 
				+        _comm = "netstat -nltp"
			
 
				+        print("no process_type", process_type)
			
 
				+    print(_comm)
			
 
				+    # os.system("netstat -nltp")
			
 
				+    os.system("echo $(date +%F%n%T)")
			
 
				+    os.system(_comm)
			
 
				+
			
 
				+
			
 
				+def kill_soffice(limit_sec=12):
			
 
				+    pid_list = psutil.pids()
			
 
				+    for pid in pid_list:
			
 
				+        process = psutil.Process(pid)
			
 
				+        if re.search("soffice", process.exe()):
			
 
				+            start_time = process.create_time()
			
 
				+            now_time = time.time()
			
 
				+            # run_time = process.cpu_times().user
			
 
				+            run_time = now_time-start_time
			
 
				+            if run_time >= limit_sec:
			
 
				+                comm = "kill -9 " + str(pid)
			
 
				+                print("kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
			
 
				+                os.system("echo $(date +%F%n%T)")
			
 
				+                os.system(comm)
			
 
				+
			
 
				+
			
 
				+def monitor():
			
 
				+    current_port_list = get_port()
			
 
				+
			
 
				+    # for p in convert_port_list:
			
 
				+    #     if p not in current_port_list:
			
 
				+    #         restart("convert", p)
			
 
				+
			
 
				+    for p in ocr_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("ocr", p)
			
 
				+
			
 
				+    for p in otr_port_list:
			
 
				+        if p not in current_port_list:
			
 
				+            restart("otr", p)
			
 
				+
			
 
				+    # for p in soffice_port_list:
			
 
				+    #     if p not in current_port_list:
			
 
				+    #         restart("soffice", p)
			
 
				+    #
			
 
				+    # kill_soffice()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    monitor()
			
--- a/format_convert/monitor_process_config.py
+++ b/format_convert/monitor_process_config.py
@@ -0,0 +1,124 @@
 
				+import logging
			
 
				+import os
			
 
				+import re
			
 
				+import sys
			
 
				+import time
			
 
				+import psutil
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				+from format_convert.utils import get_ip_port, get_intranet_ip
			
 
				+
			
 
				+
			
 
				+ip_port_dict = get_ip_port()
			
 
				+ip = "http://" + get_intranet_ip()
			
 
				+convert_port_list = ip_port_dict.get(ip).get("convert")
			
 
				+ocr_port_list = ip_port_dict.get(ip).get("ocr")
			
 
				+otr_port_list = ip_port_dict.get(ip).get("otr")
			
 
				+soffice_port_list = ip_port_dict.get(ip).get("office")
			
 
				+schedule_port_list = ip_port_dict.get(ip).get("schedule")
			
 
				+python_path = ip_port_dict.get(ip).get("python_path")
			
 
				+project_path = ip_port_dict.get(ip).get("project_path")
			
 
				+
			
 
				+
			
 
				+interface_path = project_path[:-1]
			
 
				+std_out = " >>/convert.out 2>&1 &"
			
 
				+std_out_gpu = " >>/gpu.out 2>&1 &"
			
 
				+std_out_schedule = " >>/schedule.out 2>&1 &"
			
 
				+convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
			
 
				+ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_interface.py # 0" + std_out_gpu
			
 
				+otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_interface.py # 0" + std_out_gpu
			
 
				+schedule_comm = "nohup " + python_path + " " + interface_path + "/format_convert/schedule_interface.py #" + std_out_schedule
			
 
				+soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
			
 
				+
			
 
				+
			
 
				+def get_port():
			
 
				+    net_conn = psutil.net_connections()
			
 
				+    current_port_list = []
			
 
				+    for conn in net_conn:
			
 
				+        current_port_list.append(str(conn.laddr.port))
			
 
				+    current_port_list = list(set(current_port_list))
			
 
				+    current_port_list.sort(key=lambda x: x)
			
 
				+    # print(current_port_list)
			
 
				+    return current_port_list
			
 
				+
			
 
				+
			
 
				+def restart(process_type, port):
			
 
				+    if process_type == "convert":
			
 
				+        _comm = re.sub("#", port, convert_comm)
			
 
				+    elif process_type == "ocr":
			
 
				+        _comm = re.sub("#", port, ocr_comm)
			
 
				+    elif process_type == "otr":
			
 
				+        _comm = re.sub("#", port, otr_comm)
			
 
				+    elif process_type == "soffice":
			
 
				+        _comm = re.sub("#", port, soffice_comm)
			
 
				+    elif process_type == "schedule":
			
 
				+        _comm = re.sub("#", port, schedule_comm)
			
 
				+    else:
			
 
				+        _comm = "netstat -nltp"
			
 
				+        print("no process_type", process_type)
			
 
				+    os.system("echo $(date +%F%n%T)")
			
 
				+    print("restart comm", _comm)
			
 
				+    # os.system("netstat -nltp")
			
 
				+    os.system(_comm)
			
 
				+
			
 
				+
			
 
				+def kill_soffice(limit_sec=15):
			
 
				+    pid_list = psutil.pids()
			
 
				+    for pid in pid_list:
			
 
				+        process = psutil.Process(pid)
			
 
				+
			
 
				+        process_cmd = ''
			
 
				+        for c in process.cmdline():
			
 
				+            process_cmd += c + " "
			
 
				+        if process_cmd.strip() == "":
			
 
				+            continue
			
 
				+
			
 
				+        if process.status() == "zombie":
			
 
				+            print("zombie cmd", process_cmd)
			
 
				+
			
 
				+        if re.search("soffice", process.exe()):
			
 
				+            start_time = process.create_time()
			
 
				+            now_time = time.time()
			
 
				+            run_time = now_time-start_time
			
 
				+            if run_time >= limit_sec:
			
 
				+                comm = "kill -9 " + str(pid)
			
 
				+                os.system("echo $(date +%F%n%T)")
			
 
				+                print("kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
			
 
				+                os.system(comm)
			
 
				+
			
 
				+
			
 
				+def monitor():
			
 
				+    current_port_list = get_port()
			
 
				+
			
 
				+    if convert_port_list:
			
 
				+        for p in convert_port_list:
			
 
				+            if p not in current_port_list:
			
 
				+                restart("convert", p)
			
 
				+
			
 
				+    if ocr_port_list:
			
 
				+        for p in ocr_port_list:
			
 
				+            if p not in current_port_list:
			
 
				+                restart("ocr", p)
			
 
				+
			
 
				+    if otr_port_list:
			
 
				+        for p in otr_port_list:
			
 
				+            if p not in current_port_list:
			
 
				+                restart("otr", p)
			
 
				+
			
 
				+    if soffice_port_list:
			
 
				+        for p in soffice_port_list:
			
 
				+            if p not in current_port_list:
			
 
				+                restart("soffice", p)
			
 
				+
			
 
				+    kill_soffice()
			
 
				+
			
 
				+    # if schedule_port_list:
			
 
				+    #     for p in schedule_port_list:
			
 
				+    #         if p not in current_port_list:
			
 
				+    #             restart("schedule", p)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    for i in range(6):
			
 
				+        # os.system("echo $(date +%F%n%T)")
			
 
				+        monitor()
			
 
				+        time.sleep(10)
			
--- a/format_convert/schedule_interface.py
+++ b/format_convert/schedule_interface.py
@@ -0,0 +1,124 @@
 
				+import base64
			
 
				+import json
			
 
				+import logging
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+import traceback
			
 
				+from multiprocessing import Process, RLock
			
 
				+from flask import Flask, request
			
 
				+from werkzeug.exceptions import NotFound
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				+from format_convert.utils import get_platform, get_ip_port, request_post, get_intranet_ip
			
 
				+
			
 
				+# 接口配置
			
 
				+app = Flask(__name__)
			
 
				+
			
 
				+
			
 
				+@app.route('/schedule', methods=['POST'])
			
 
				+def _schedule():
			
 
				+    logging.info("into _schedule")
			
 
				+    _lock = globals().get("lock")
			
 
				+    start_time = time.time()
			
 
				+    try:
			
 
				+        _lock.acquire()
			
 
				+
			
 
				+        if not request.form:
			
 
				+            logging.info("_schedule no data!")
			
 
				+            return {"data": [-9]}
			
 
				+
			
 
				+        interface_type = request.form.get("interface_type")
			
 
				+        _ip, _port = interface_pool(interface_type)
			
 
				+        logging.info("_schedule " + _ip + " " + _port)
			
 
				+        return {"data": [_ip, _port]}
			
 
				+    except NotFound:
			
 
				+        logging.info("_schedule cannot find " + interface_type + " 's interfaces! Please Checkout")
			
 
				+        return {"data": [-2]}
			
 
				+    except:
			
 
				+        traceback.print_exc()
			
 
				+        logging.info("_schedule failed!")
			
 
				+        return {"data": [-1]}
			
 
				+    finally:
			
 
				+        _lock.release()
			
 
				+        logging.info("_schedule cost " + str(time.time()-start_time))
			
 
				+
			
 
				+
			
 
				+def interface_pool(interface_type):
			
 
				+    ip_port_flag_dict = globals().get("ip_port_flag")
			
 
				+    ip_port_dict = globals().get("ip_port")
			
 
				+    # print(ip_port_flag_dict)
			
 
				+    # print(ip_port_dict)
			
 
				+
			
 
				+    # 负载均衡, 选取ip
			
 
				+    interface_load_list = []
			
 
				+    for _ip in ip_port_flag_dict.keys():
			
 
				+        if ip_port_dict.get(_ip).get(interface_type):
			
 
				+            load_scale = ip_port_flag_dict.get(_ip).get(interface_type) / len(ip_port_dict.get(_ip).get(interface_type))
			
 
				+            interface_load_list.append([_ip, load_scale])
			
 
				+
			
 
				+    if not interface_load_list:
			
 
				+        raise NotFound
			
 
				+    interface_load_list.sort(key=lambda x: x[-1])
			
 
				+    _ip = interface_load_list[0][0]
			
 
				+
			
 
				+    # 负载均衡, 选取port
			
 
				+    port_index = ip_port_flag_dict.get(_ip).get(interface_type) % len(ip_port_dict.get(_ip).get(interface_type))
			
 
				+    _port = ip_port_dict.get(_ip).get(interface_type)[port_index]
			
 
				+
			
 
				+    # 更新flag
			
 
				+    current_flag = globals().get("ip_port_flag").get(_ip).get(interface_type)
			
 
				+    if current_flag >= 10000:
			
 
				+        globals()["ip_port_flag"][_ip][interface_type] = 0
			
 
				+    else:
			
 
				+        globals()["ip_port_flag"][_ip][interface_type] = current_flag + 1
			
 
				+    return _ip, _port
			
 
				+
			
 
				+
			
 
				+def set_flask_global():
			
 
				+    # 接口轮询所需锁、参数
			
 
				+    globals().update({"lock": RLock()})
			
 
				+    ip_port_flag = {}
			
 
				+    ip_port_dict = get_ip_port()
			
 
				+    for _k in ip_port_dict.keys():
			
 
				+        ip_port_flag.update({_k: {"ocr": 0,
			
 
				+                                  "otr": 0,
			
 
				+                                  "convert": 0,
			
 
				+                                  "office": 0
			
 
				+                                  }})
			
 
				+    globals().update({"ip_port_flag": ip_port_flag})
			
 
				+    globals().update({"ip_port": ip_port_dict})
			
 
				+    # print(globals().get("ip_port"))
			
 
				+
			
 
				+
			
 
				+def test_schedule(interface_type):
			
 
				+    _url = 'http://127.0.0.1:15011/schedule'
			
 
				+    # _url = 'http://192.168.2.102:15011/schedule'
			
 
				+    # _url = 'http://172.16.160.65:15011/schedule'
			
 
				+    data = {"interface_type": interface_type}
			
 
				+    result = json.loads(request_post(_url, data, time_out=10000)).get("data")
			
 
				+    print(result)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    set_flask_global()
			
 
				+    if len(sys.argv) == 2:
			
 
				+        port = int(sys.argv[1])
			
 
				+    else:
			
 
				+        port = 15011
			
 
				+
			
 
				+    ip = get_intranet_ip()
			
 
				+    logging.basicConfig(level=logging.INFO,
			
 
				+                        format='%(asctime)s - %(name)s - %(levelname)s - '
			
 
				+                               + ip + ' - ' + str(port) + ' - %(message)s')
			
 
				+
			
 
				+    app.run(host='0.0.0.0', port=port, threaded=True, debug=False)
			
 
				+    logging.info("Schedule running "+str(port))
			
 
				+
			
 
				+    # for i in range(10):
			
 
				+    #     p = Process(target=test_schedule, args=("ocr", ))
			
 
				+    #     p.start()
			
 
				+    #     p = Process(target=test_schedule, args=("otr", ))
			
 
				+    #     p.start()
			
 
				+    #     p = Process(target=test_schedule, args=("office", ))
			
 
				+    #     p.start()
			
 
				+    # p.join()
			
--- a/format_convert/table_correct.py
+++ b/format_convert/table_correct.py
@@ -264,8 +264,11 @@ def get_rotated_image(image, output_path):
 
				         # cv2.imshow("output", rotated)
			
 
				         # cv2.waitKey(0)
			
 
				         return True
			
 
				+    except cv2.error:
			
 
				+        traceback.print_exc()
			
 
				+        return [-3]
			
 
				     except Exception as e:
			
 
				-        print("get_rotated_image", e)
			
 
				+        traceback.print_exc()
			
 
				         return [-1]
			
 
				 
			
 
				 
			
--- a/format_convert/test1.doc
+++ b/format_convert/test1.doc
--- a/format_convert/test1.pdf
+++ b/format_convert/test1.pdf
--- a/format_convert/test1.xls
+++ b/format_convert/test1.xls
--- a/format_convert/test2.doc
+++ b/format_convert/test2.doc
--- a/format_convert/timeout_decorator.py
+++ b/format_convert/timeout_decorator.py
@@ -102,6 +102,50 @@ def timeout(seconds=None, use_signals=True, timeout_exception=TimeoutError, exce
 
				     return decorate
			
 
				 
			
 
				 
			
 
				+def timeout4class(cls, seconds=None, use_signals=True, timeout_exception=TimeoutError, exception_message=None):
			
 
				+    """Add a timeout parameter to a function and return it.
			
 
				+
			
 
				+    """
			
 
				+    def decorate(function):
			
 
				+        if get_platform() == "Windows":
			
 
				+            @wraps(function)
			
 
				+            def new_function(*args, **kwargs):
			
 
				+                return function(*args, **kwargs)
			
 
				+            return new_function
			
 
				+
			
 
				+        else:
			
 
				+            if use_signals:
			
 
				+                def handler(signum, frame):
			
 
				+                    _raise_exception(timeout_exception, exception_message)
			
 
				+
			
 
				+                @wraps(function)
			
 
				+                def new_function(*args, **kwargs):
			
 
				+                    new_seconds = kwargs.pop('timeout', seconds)
			
 
				+                    if new_seconds:
			
 
				+                        old = signal.signal(signal.SIGALRM, handler)
			
 
				+                        signal.setitimer(signal.ITIMER_REAL, new_seconds)
			
 
				+
			
 
				+                    if not seconds:
			
 
				+                        return function(*args, **kwargs)
			
 
				+
			
 
				+                    try:
			
 
				+                        return function(*args, **kwargs)
			
 
				+                    finally:
			
 
				+                        if new_seconds:
			
 
				+                            signal.setitimer(signal.ITIMER_REAL, 0)
			
 
				+                            signal.signal(signal.SIGALRM, old)
			
 
				+                return new_function
			
 
				+            else:
			
 
				+                @wraps(function)
			
 
				+                def new_function(*args, **kwargs):
			
 
				+                    timeout_wrapper = _Timeout(function, timeout_exception, exception_message, seconds)
			
 
				+                    return timeout_wrapper(*args, **kwargs)
			
 
				+                return new_function
			
 
				+
			
 
				+    return decorate
			
 
				+
			
 
				+
			
 
				+
			
 
				 # 装饰器包装为类，方便Pickle
			
 
				 class TimeoutClass:
			
 
				     def __init__(self, func, seconds, timeout_exception):
			
--- a/format_convert/utils.py
+++ b/format_convert/utils.py
@@ -1,6 +1,14 @@
 
				+import hashlib
			
 
				+import inspect
			
 
				+import json
			
 
				 import os
			
 
				+import socket
			
 
				+import subprocess
			
 
				 import sys
			
 
				-sys.path.append(os.path.dirname(__file__) + "/../")
			
 
				+from io import BytesIO
			
 
				+from subprocess import Popen
			
 
				+import requests
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 import difflib
			
 
				 import logging
			
 
				 import mimetypes
			
@@ -9,10 +17,12 @@ import re
 
				 import traceback
			
 
				 import filetype
			
 
				 from bs4 import BeautifulSoup
			
 
				+import yaml
			
 
				 from pdfminer.layout import *
			
 
				+from format_convert import _global
			
 
				 
			
 
				 
			
 
				-def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8]):
			
 
				+def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8, -9]):
			
 
				     """
			
 
				     [0] : continue
			
 
				     [-1]: 逻辑处理错误
			
@@ -23,6 +33,7 @@ def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8]):
 
				     [-6]: 阿里云UDF队列超时
			
 
				     [-7]: 文件需密码，无法打开
			
 
				     [-8]: 调用现成接口报错
			
 
				+    [-9]: 接口接收数据为空
			
 
				     """
			
 
				     for c in code:
			
 
				         if _list == [c]:
			
@@ -723,14 +734,14 @@ class LineTable:
 
				                         exists,point = self.cross_point(line1,line2)
			
 
				                         if exists:
			
 
				                             list_crosspoints.append(point)
			
 
				-                from matplotlib import pyplot as plt
			
 
				-                plt.figure()
			
 
				-                for _line in l_lines:
			
 
				-                    x0,y0,x1,y1 = _line
			
 
				-                    plt.plot([x0,x1],[y0,y1])
			
 
				-                for point in list_crosspoints:
			
 
				-                    plt.scatter(point.get("point")[0],point.get("point")[1])
			
 
				-                plt.show()
			
 
				+                # from matplotlib import pyplot as plt
			
 
				+                # plt.figure()
			
 
				+                # for _line in l_lines:
			
 
				+                #     x0,y0,x1,y1 = _line
			
 
				+                #     plt.plot([x0,x1],[y0,y1])
			
 
				+                # for point in list_crosspoints:
			
 
				+                #     plt.scatter(point.get("point")[0],point.get("point")[1])
			
 
				+                # plt.show()
			
 
				 
			
 
				         # print(list_crosspoints)
			
 
				         # print("points num",len(list_crosspoints))
			
@@ -1329,6 +1340,219 @@ def sort_object(obj_list, is_reverse=False):
 
				         return obj_list
			
 
				 
			
 
				 
			
 
				+def request_post(url, param, time_out=1000):
			
 
				+    fails = 0
			
 
				+    text = json.dumps([-2])
			
 
				+    while True:
			
 
				+        try:
			
 
				+            if fails >= 1:
			
 
				+                break
			
 
				+
			
 
				+            headers = {'content-type': 'application/json'}
			
 
				+            result = requests.post(url, data=param, timeout=time_out)
			
 
				+            # print('result.status_code', result.status_code)
			
 
				+            # print('result.text', result.text)
			
 
				+
			
 
				+            if result.status_code == 200:
			
 
				+                text = result.text
			
 
				+                break
			
 
				+            else:
			
 
				+                fails += 1
			
 
				+                continue
			
 
				+        except:
			
 
				+            fails += 1
			
 
				+            print('fail! fail times:', fails)
			
 
				+            traceback.print_exc()
			
 
				+    return text
			
 
				+
			
 
				+
			
 
				+def test_gpu():
			
 
				+    print("="*30)
			
 
				+    import paddle
			
 
				+    paddle.utils.run_check()
			
 
				+
			
 
				+    # import tensorflow as tf
			
 
				+    # print("tf gpu", tf.config.list_physical_devices('GPU'))
			
 
				+    print("="*30)
			
 
				+
			
 
				+
			
 
				+def my_subprocess_call(*popenargs, timeout=None):
			
 
				+    logging.info("into my_subprocess_call")
			
 
				+    with Popen(*popenargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
			
 
				+        try:
			
 
				+            for line in p.stdout:
			
 
				+                print("stdout", line)
			
 
				+            for line in p.stderr:
			
 
				+                print("stderr", line)
			
 
				+            p.wait(timeout=timeout)
			
 
				+            # p.communicate()
			
 
				+            return p.pid, p.returncode
			
 
				+        except:  # Including KeyboardInterrupt, wait handled that.
			
 
				+            p.kill()
			
 
				+            # We don't call p.wait() again as p.__exit__ does that for us.
			
 
				+            raise
			
 
				+        finally:
			
 
				+            logging.info("out my_subprocess_call")
			
 
				+            p.kill()
			
 
				+
			
 
				+
			
 
				+def parse_yaml():
			
 
				+    yaml_path = os.path.dirname(os.path.abspath(__file__)) + "/interface.yml"
			
 
				+    with open(yaml_path, "r", encoding='utf-8') as f:
			
 
				+        cfg = f.read()
			
 
				+
			
 
				+    params = yaml.load(cfg, Loader=yaml.SafeLoader)
			
 
				+    return params
			
 
				+
			
 
				+
			
 
				+def get_ip_port(node_type=None, interface_type=None):
			
 
				+    if node_type is None:
			
 
				+        node_type_list = ["master", "slave"]
			
 
				+    else:
			
 
				+        node_type_list = [node_type]
			
 
				+
			
 
				+    if interface_type is None:
			
 
				+        interface_type_list = ["convert", "ocr", "otr", "office", "path"]
			
 
				+    else:
			
 
				+        interface_type_list = [interface_type]
			
 
				+
			
 
				+    ip_port_dict = {}
			
 
				+    params = parse_yaml()
			
 
				+    for type1 in node_type_list:
			
 
				+        node_type = type1.upper()
			
 
				+        ip_list = params.get(node_type).get("ip")
			
 
				+        for type2 in interface_type_list:
			
 
				+            interface_type = type2.upper()
			
 
				+            processes = 0
			
 
				+            python_path = None
			
 
				+            project_path = None
			
 
				+            if interface_type in ["convert".upper()]:
			
 
				+                _port = params.get(node_type).get(interface_type).get("port")
			
 
				+                if _port is None:
			
 
				+                    port_list = []
			
 
				+                else:
			
 
				+                    port_list = [str(_port)]
			
 
				+                    if interface_type == "convert".upper():
			
 
				+                        processes = params.get(node_type).get(interface_type).get("processes")
			
 
				+            elif interface_type == "path".upper():
			
 
				+                python_path = params.get(node_type).get(interface_type).get("python")
			
 
				+                project_path = params.get(node_type).get(interface_type).get("project")
			
 
				+            else:
			
 
				+                port_start = params.get(node_type).get(interface_type).get("port_start")
			
 
				+                port_no = params.get(node_type).get(interface_type).get("port_no")
			
 
				+                if port_start is None or port_no is None:
			
 
				+                    port_list = []
			
 
				+                else:
			
 
				+                    port_list = [str(x) for x in range(port_start, port_start+port_no, 1)]
			
 
				+            if ip_list:
			
 
				+                for _ip in ip_list:
			
 
				+                    if _ip is None:
			
 
				+                        continue
			
 
				+                    if _ip in ip_port_dict.keys():
			
 
				+                        if port_list:
			
 
				+                            ip_port_dict.get(_ip).update({interface_type.lower(): port_list})
			
 
				+                    else:
			
 
				+                        if port_list:
			
 
				+                            ip_port_dict[_ip] = {interface_type.lower(): port_list}
			
 
				+                    if processes:
			
 
				+                        ip_port_dict.get(_ip).update({interface_type.lower()+"_processes": processes})
			
 
				+                    if project_path and python_path:
			
 
				+                        ip_port_dict.get(_ip).update({"project_path": project_path,
			
 
				+                                                      "python_path": python_path})
			
 
				+    return ip_port_dict
			
 
				+
			
 
				+
			
 
				+def get_intranet_ip():
			
 
				+    try:
			
 
				+        # Create a new socket using the given address family,
			
 
				+        # socket type and protocol number.
			
 
				+        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
			
 
				+
			
 
				+        # Connect to a remote socket at address.
			
 
				+        # (The format of address depends on the address family.)
			
 
				+        address = ("8.8.8.8", 80)
			
 
				+        s.connect(address)
			
 
				+
			
 
				+        # Return the socket’s own address.
			
 
				+        # This is useful to find out the port number of an IPv4/v6 socket, for instance.
			
 
				+        # (The format of the address returned depends on the address family.)
			
 
				+        sockname = s.getsockname()
			
 
				+        ip = sockname[0]
			
 
				+        port = sockname[1]
			
 
				+    finally:
			
 
				+        s.close()
			
 
				+    return ip
			
 
				+
			
 
				+
			
 
				+def log(msg):
			
 
				+    call_func_name = inspect.currentframe().f_back.f_code.co_name
			
 
				+    logger = get_logger(call_func_name, {"md5": _global.get("md5"),
			
 
				+                                         "port": _global.get("port")})
			
 
				+    logger.info(msg)
			
 
				+    # logging.info(msg)
			
 
				+
			
 
				+
			
 
				+def get_logger(_name, _dict):
			
 
				+    extra = _dict
			
 
				+    _format = '%(asctime)s - %(name)s - %(levelname)s - %(md5)s - %(port)s - %(message)s'
			
 
				+    logger = logging.getLogger(_name)
			
 
				+
			
 
				+    create_new_flag = 1
			
 
				+    handlers = logger.handlers
			
 
				+    if handlers:
			
 
				+        for h in handlers:
			
 
				+            if h.formatter.__dict__.get("_fmt") == _format:
			
 
				+                create_new_flag = 0
			
 
				+                break
			
 
				+    if create_new_flag:
			
 
				+        formatter = logging.Formatter(_format)
			
 
				+        handler = logging.StreamHandler()
			
 
				+        handler.setFormatter(formatter)
			
 
				+        logger.addHandler(handler)
			
 
				+
			
 
				+    logger.setLevel(logging.INFO)
			
 
				+    logger.propagate = False
			
 
				+    logger = logging.LoggerAdapter(logger, extra)
			
 
				+    return logger
			
 
				+
			
 
				+
			
 
				+def set_flask_global():
			
 
				+    # 接口轮询所需锁、参数
			
 
				+    ip_port_flag = {}
			
 
				+    ip_port_dict = get_ip_port()
			
 
				+    for _k in ip_port_dict.keys():
			
 
				+        ip_port_flag.update({_k: {"ocr": 0,
			
 
				+                                  "otr": 0,
			
 
				+                                  "convert": 0,
			
 
				+                                  "office": 0
			
 
				+                                  }})
			
 
				+    _global.update({"ip_port_flag": ip_port_flag})
			
 
				+    _global.update({"ip_port": ip_port_dict})
			
 
				+    # print(globals().get("ip_port"))
			
 
				+
			
 
				+
			
 
				+def get_md5_from_bytes(_bytes):
			
 
				+    def generate_fp(_b):
			
 
				+        bio = BytesIO()
			
 
				+        bio.write(_b)
			
 
				+        return bio
			
 
				+    _length = 0
			
 
				+    try:
			
 
				+        _md5 = hashlib.md5()
			
 
				+        ff = generate_fp(_bytes)
			
 
				+        ff.seek(0)
			
 
				+        while True:
			
 
				+            data = ff.read(4096)
			
 
				+            if not data:
			
 
				+                break
			
 
				+            _length += len(data)
			
 
				+            _md5.update(data)
			
 
				+        return _md5.hexdigest(), _length
			
 
				+    except Exception as e:
			
 
				+        traceback.print_exc()
			
 
				+        return None, _length
			
 
				+
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # strs = r"D:\Project\temp\04384fcc9e8911ecbd2844f971944973\043876ca9e8911eca5e144f971944973_rar\1624114035529.jpeg"
			
 
				     # print(slash_replace(strs))
			
@@ -1345,10 +1569,18 @@ if __name__ == "__main__":
 
				     #                                   edgecolor=(random.randint(0,255)/255,random.randint(0,255)/255,random.randint(0,255)/255),
			
 
				     #                                   fill=False, linewidth=2))
			
 
				     #
			
 
				-    # plt.show()
			
 
				-    import cv2
			
 
				-    import numpy as np
			
 
				-    img = np.zeros(shape=(1800,1800),dtype=np.uint8)
			
 
				-    img += 255
			
 
				-    cv2.imshow("bbox", img)
			
 
				-    cv2.waitKey(0)
			
 
				+    # # plt.show()
			
 
				+    # import cv2
			
 
				+    # import numpy as np
			
 
				+    # img = np.zeros(shape=(1800,1800),dtype=np.uint8)
			
 
				+    # img += 255
			
 
				+    # cv2.imshow("bbox", img)
			
 
				+    # cv2.waitKey(0)
			
 
				+
			
 
				+    # print(json.dumps({"data":[1, 2]}))
			
 
				+
			
 
				+    # print(parse_yaml())
			
 
				+
			
 
				+    print(get_ip_port())
			
 
				+
			
 
				+    # print(get_intranet_ip())
			
--- a/format_convert/wrapt_timeout_decorator/__init__.py
+++ b/format_convert/wrapt_timeout_decorator/__init__.py
@@ -0,0 +1,13 @@
 
				+from .wrapt_timeout_decorator import timeout
			
 
				+from .wrap_helper import detect_unpickable_objects
			
 
				+
			
 
				+# this needs to come after the module imports, otherwise circular import under windows
			
 
				+from . import __init__conf__
			
 
				+
			
 
				+__title__ = __init__conf__.title
			
 
				+__version__ = __init__conf__.version
			
 
				+__name__ = __init__conf__.name
			
 
				+__url__ = __init__conf__.url
			
 
				+__author__ = __init__conf__.author
			
 
				+__author_email__ = __init__conf__.author_email
			
 
				+__shell_command__ = __init__conf__.shell_command
			
--- a/format_convert/wrapt_timeout_decorator/__init__conf__.py
+++ b/format_convert/wrapt_timeout_decorator/__init__conf__.py
@@ -0,0 +1,24 @@
 
				+# CONF
			
 
				+
			
 
				+name = "wrapt_timeout_decorator"
			
 
				+title = "The better timout decorator"
			
 
				+version = "v1.3.8"
			
 
				+url = "https://github.com/bitranox/wrapt_timeout_decorator"
			
 
				+author = "Robert Nowotny"
			
 
				+author_email = "bitranox@gmail.com"
			
 
				+shell_command = "wrapt_timeout_decorator"
			
 
				+
			
 
				+
			
 
				+def print_info() -> None:
			
 
				+    print(
			
 
				+        """\
			
 
				+
			
 
				+Info for wrapt_timeout_decorator:
			
 
				+
			
 
				+    The better timout decorator
			
 
				+
			
 
				+    Version : v1.3.8
			
 
				+    Url     : https://github.com/bitranox/wrapt_timeout_decorator
			
 
				+    Author  : Robert Nowotny
			
 
				+    Email   : bitranox@gmail.com"""
			
 
				+    )
			
--- a/format_convert/wrapt_timeout_decorator/py.typed
+++ b/format_convert/wrapt_timeout_decorator/py.typed
--- a/format_convert/wrapt_timeout_decorator/wrap_function_multiprocess.py
+++ b/format_convert/wrapt_timeout_decorator/wrap_function_multiprocess.py
@@ -0,0 +1,91 @@
 
				+# STDLIB
			
 
				+import os
			
 
				+import sys
			
 
				+from typing import Any
			
 
				+
			
 
				+# EXT
			
 
				+import multiprocess  # type: ignore
			
 
				+
			
 
				+# OWN
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				+from wrapt_timeout_decorator.wrap_helper import WrapHelper, raise_exception  # type: ignore # pragma: no cover
			
 
				+
			
 
				+
			
 
				+class Timeout(object):
			
 
				+    """Wrap a function and add a timeout (limit) attribute to it.
			
 
				+    Instances of this class are automatically generated by the add_timeout
			
 
				+    function defined above. Wrapping a function allows asynchronous calls
			
 
				+    to be made and termination of execution after a timeout has passed.
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, wrap_helper: WrapHelper) -> None:
			
 
				+        """Initialize instance in preparation for being called."""
			
 
				+        self.wrap_helper = wrap_helper
			
 
				+        self.__name__ = self.wrap_helper.wrapped.__name__
			
 
				+        self.__doc__ = self.wrap_helper.wrapped.__doc__
			
 
				+        self.__process = None  # type: multiprocess.Process
			
 
				+        self.__parent_conn = None  # type: multiprocess.Pipe
			
 
				+
			
 
				+    def __call__(self) -> Any:
			
 
				+        """Execute the embedded function object asynchronously.
			
 
				+        The function given to the constructor is transparently called and
			
 
				+        requires that "ready" be intermittently polled. If and when it is
			
 
				+        True, the "value" property may then be checked for returned data.
			
 
				+        """
			
 
				+        self.__parent_conn, self.wrap_helper.child_conn = multiprocess.Pipe(duplex=False)
			
 
				+        self.__process = multiprocess.Process(target=_target, args=[self.wrap_helper])
			
 
				+        # daemonic process must not have subprocess - we need that for nested decorators
			
 
				+        self.__process.daemon = False
			
 
				+        self.__process.start()
			
 
				+        if not self.wrap_helper.dec_hard_timeout:
			
 
				+            self.wait_until_process_started()
			
 
				+        if self.__parent_conn.poll(self.wrap_helper.dec_timeout_float):
			
 
				+            return self.value
			
 
				+        else:
			
 
				+            self.cancel()
			
 
				+
			
 
				+    def cancel(self) -> None:
			
 
				+        """Terminate any possible execution of the embedded function."""
			
 
				+        if self.__process.is_alive():  # pragma: no cover      # we can not produce that state - its just a security measure
			
 
				+            # 不用terminate,可能会造成主进程崩溃
			
 
				+            # self.__process.terminate()
			
 
				+            self.__process.kill()
			
 
				+        self.__process.join(timeout=1.0)
			
 
				+        self.__parent_conn.close()
			
 
				+        raise_exception(self.wrap_helper.timeout_exception, self.wrap_helper.exception_message)
			
 
				+
			
 
				+    def wait_until_process_started(self) -> None:
			
 
				+        self.__parent_conn.recv()
			
 
				+
			
 
				+    @property
			
 
				+    def value(self) -> Any:
			
 
				+        exception_occured, result = self.__parent_conn.recv()
			
 
				+        # when self.__parent_conn.recv() exits, maybe __process is still alive,
			
 
				+        # then it might zombie the process. so join it explicitly
			
 
				+        self.__process.join(timeout=1.0)
			
 
				+        self.__parent_conn.close()
			
 
				+
			
 
				+        if exception_occured:
			
 
				+            raise result
			
 
				+        else:
			
 
				+            return result
			
 
				+
			
 
				+
			
 
				+def _target(wrap_helper: WrapHelper) -> None:
			
 
				+    """Run a function with arguments and return output via a pipe.
			
 
				+    This is a helper function for the Process created in Timeout. It runs
			
 
				+    the function with positional arguments and keyword arguments and then
			
 
				+    returns the function's output by way of a queue. If an exception gets
			
 
				+    raised, it is returned to Timeout to be raised by the value property.
			
 
				+    """
			
 
				+    # noinspection PyBroadException
			
 
				+    try:
			
 
				+        if not wrap_helper.dec_hard_timeout:
			
 
				+            wrap_helper.child_conn.send("started")
			
 
				+        exception_occured = False
			
 
				+        wrap_helper.child_conn.send((exception_occured, wrap_helper.wrapped(*wrap_helper.args, **wrap_helper.kwargs)))
			
 
				+    except Exception:
			
 
				+        exception_occured = True
			
 
				+        wrap_helper.child_conn.send((exception_occured, sys.exc_info()[1]))
			
 
				+    finally:
			
 
				+        wrap_helper.child_conn.close()
			
--- a/format_convert/wrapt_timeout_decorator/wrap_helper.py
+++ b/format_convert/wrapt_timeout_decorator/wrap_helper.py
@@ -0,0 +1,195 @@
 
				+# STDLIB
			
 
				+import logging
			
 
				+import platform
			
 
				+import signal
			
 
				+import sys
			
 
				+import threading
			
 
				+from types import FrameType
			
 
				+from typing import Any, Callable, Dict, List, Type, Union, Optional
			
 
				+
			
 
				+# EXT
			
 
				+import dill  # type: ignore
			
 
				+import multiprocess  # type: ignore
			
 
				+
			
 
				+# Types
			
 
				+AlarmHandler = Union[Callable[[int, Optional[FrameType]], Any], int, signal.Handlers, None]
			
 
				+
			
 
				+logger = logging.getLogger("pickle_analyzer")
			
 
				+
			
 
				+
			
 
				+class WrapHelper(object):
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        dec_timeout: Union[None, float, str],
			
 
				+        use_signals: bool,
			
 
				+        timeout_exception: Type[BaseException],
			
 
				+        exception_message: str,
			
 
				+        dec_allow_eval: bool,
			
 
				+        dec_hard_timeout: bool,
			
 
				+        wrapped: Callable[..., Any],
			
 
				+        instance: object,
			
 
				+        args: Any,
			
 
				+        kwargs: Any,
			
 
				+    ) -> None:
			
 
				+        self.dec_timeout = dec_timeout
			
 
				+        self.use_signals = use_signals
			
 
				+        self.timeout_exception = timeout_exception
			
 
				+        self.exception_message = exception_message
			
 
				+        self.dec_allow_eval = dec_allow_eval
			
 
				+        self.dec_hard_timeout = dec_hard_timeout
			
 
				+        self.wrapped = wrapped
			
 
				+        self.instance = instance
			
 
				+        self.args = args
			
 
				+        self.kwargs = kwargs
			
 
				+
			
 
				+        self.dec_timeout_float = 0.0  # type: float
			
 
				+        self.old_alarm_handler: AlarmHandler = None
			
 
				+        self.child_conn: "multiprocess.Pipe" = None
			
 
				+
			
 
				+        self.pop_kwargs()
			
 
				+        self.set_signals_to_false_if_not_possible()
			
 
				+        self.eval_if_required()
			
 
				+        self.convert_timeout_given_to_float()
			
 
				+        self.format_exception_message()
			
 
				+
			
 
				+    def convert_timeout_given_to_float(self) -> None:
			
 
				+        if self.dec_timeout is None:
			
 
				+            self.dec_timeout_float = 0.0
			
 
				+        else:
			
 
				+            try:
			
 
				+                self.dec_timeout_float = float(self.dec_timeout)
			
 
				+            except ValueError:
			
 
				+                raise ValueError(f'the given or evaluated value for the timeout can not be converted to float : "{self.dec_timeout}"')
			
 
				+
			
 
				+    def pop_kwargs(self) -> None:
			
 
				+        self.dec_allow_eval = self.kwargs.pop("dec_allow_eval", self.dec_allow_eval)
			
 
				+        self.dec_timeout = self.kwargs.pop("dec_timeout", self.dec_timeout)
			
 
				+        self.use_signals = self.kwargs.pop("use_signals", self.use_signals)
			
 
				+        self.dec_hard_timeout = self.kwargs.pop("dec_hard_timeout", self.dec_hard_timeout)
			
 
				+
			
 
				+    @property
			
 
				+    def should_eval(self) -> bool:
			
 
				+        if self.dec_allow_eval and isinstance(self.dec_timeout, str):
			
 
				+            return True
			
 
				+        else:
			
 
				+            return False
			
 
				+
			
 
				+    def format_exception_message(self) -> None:
			
 
				+        function_name = self.wrapped.__name__ or "(unknown name)"
			
 
				+        if not self.exception_message:
			
 
				+            self.exception_message = f"Function {function_name} timed out after {self.dec_timeout_float} seconds"
			
 
				+
			
 
				+    def new_alarm_handler(self, signum: signal.Signals, frame: FrameType) -> None:
			
 
				+        raise_exception(self.timeout_exception, self.exception_message)
			
 
				+
			
 
				+    def save_old_and_set_new_alarm_handler(self) -> None:
			
 
				+        self.old_alarm_handler = signal.signal(signal.SIGALRM, self.new_alarm_handler)  # type: ignore
			
 
				+        signal.setitimer(signal.ITIMER_REAL, self.dec_timeout_float)  # type: ignore  # on windows we dont have signals
			
 
				+
			
 
				+    def restore_old_alarm_handler(self) -> None:
			
 
				+        signal.setitimer(signal.ITIMER_REAL, 0)  # type: ignore  # on windows we dont have signals
			
 
				+        signal.signal(signal.SIGALRM, self.old_alarm_handler)  # type: ignore  # on windows we dont have signals
			
 
				+
			
 
				+    def set_signals_to_false_if_not_possible(self) -> None:
			
 
				+        if is_system_windows() or not is_in_main_thread():
			
 
				+            self.use_signals = False
			
 
				+
			
 
				+    def eval_if_required(self) -> None:
			
 
				+        # define local variables which then can be used in eval
			
 
				+        wrapped = self.wrapped  # noqa
			
 
				+        instance = self.instance  # noqa
			
 
				+        args = self.args  # noqa
			
 
				+        kwargs = self.kwargs  # noqa
			
 
				+
			
 
				+        if self.should_eval:
			
 
				+            self.dec_timeout = eval(str(self.dec_timeout))
			
 
				+
			
 
				+
			
 
				+def detect_unpickable_objects_and_reraise(object_to_pickle: Any) -> None:
			
 
				+    # sometimes the detection detects unpickable objects but actually
			
 
				+    # they can be pickled - so we just try to start the thread and report
			
 
				+    # the unpickable objects if that fails
			
 
				+    dict_result = detect_unpickable_objects(object_to_pickle, dill_trace=False, log_warning=False)
			
 
				+    s_err = (
			
 
				+        f"can not pickle {dict_result['object_name']}, bad items: {dict_result['bad_items']}, bad objects: {dict_result['bad_objects']}, "
			
 
				+        f"bad types {dict_result['bad_types']}"
			
 
				+    )
			
 
				+    raise dill.PicklingError(s_err)
			
 
				+
			
 
				+
			
 
				+def detect_unpickable_objects(object_to_pickle: Any, dill_trace: bool = True, log_warning: bool = True) -> Dict[str, Union[str, List[Any]]]:
			
 
				+    if log_warning:
			
 
				+        logger.warning('always remember that the "object_to_pickle" should not be defined within the main context')
			
 
				+    dict_result = dict()  # type: Dict[str, Union[str, List[Any]]]
			
 
				+    dict_result["object_name"] = ""
			
 
				+    dict_result["bad_items"] = list()
			
 
				+    dict_result["bad_objects"] = list()
			
 
				+    dict_result["bad_types"] = list()
			
 
				+    safe_status_of_dill_trace = dill.detect.trace
			
 
				+    # noinspection PyBroadException
			
 
				+    try:
			
 
				+        if dill_trace:
			
 
				+            dill.detect.trace = True
			
 
				+        pickled_object = dill.dumps(object_to_pickle)
			
 
				+        dill.loads(pickled_object)
			
 
				+    except Exception:
			
 
				+        dict_result["object_name"] = get_object_name(object_to_pickle)
			
 
				+        dict_result["bad_objects"] = get_bad_pickling_objects(object_to_pickle)
			
 
				+        dict_result["bad_types"] = get_bad_pickling_types(object_to_pickle)
			
 
				+    finally:
			
 
				+        dill.detect.trace = safe_status_of_dill_trace
			
 
				+        return dict_result
			
 
				+
			
 
				+
			
 
				+def get_object_name(object_to_pickle: object) -> str:
			
 
				+    object_name = "object"
			
 
				+    if hasattr(object_to_pickle, "__name__"):
			
 
				+        if object_to_pickle.__name__:  # type: ignore
			
 
				+            object_name = object_to_pickle.__name__  # type: ignore
			
 
				+    return object_name
			
 
				+
			
 
				+
			
 
				+def get_bad_pickling_types(object_to_pickle: object) -> List[Any]:
			
 
				+    bad_types = list()  # type: List[Any]
			
 
				+    # noinspection PyBroadException
			
 
				+    try:
			
 
				+        bad_types = dill.detect.badtypes(object_to_pickle)
			
 
				+    except Exception:
			
 
				+        bad_types = [sys.exc_info()[1]]
			
 
				+    finally:
			
 
				+        return bad_types
			
 
				+
			
 
				+
			
 
				+def get_bad_pickling_objects(object_to_pickle: Any) -> Any:
			
 
				+    bad_objects = list()  # type: List[object]
			
 
				+    # noinspection PyBroadException
			
 
				+    try:
			
 
				+        bad_objects = dill.detect.badobjects(object_to_pickle)
			
 
				+    except Exception:
			
 
				+        bad_objects = [sys.exc_info()[1]]
			
 
				+    finally:
			
 
				+        return bad_objects
			
 
				+
			
 
				+
			
 
				+def raise_exception(exception: Type[BaseException], exception_message: str) -> None:
			
 
				+    """This function checks if a exception message is given.
			
 
				+    If there is no exception message, the default behaviour is maintained.
			
 
				+    If there is an exception message, the message is passed to the exception.
			
 
				+    """
			
 
				+    if not exception:
			
 
				+        exception = TimeoutError
			
 
				+    raise exception(exception_message)
			
 
				+
			
 
				+
			
 
				+def is_in_main_thread() -> bool:
			
 
				+    if threading.current_thread() == threading.main_thread():
			
 
				+        return True
			
 
				+    else:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def is_system_windows() -> bool:
			
 
				+    if platform.system().lower().startswith("win"):
			
 
				+        return True
			
 
				+    else:
			
 
				+        return False
			
--- a/format_convert/wrapt_timeout_decorator/wrapt_timeout_decorator.py
+++ b/format_convert/wrapt_timeout_decorator/wrapt_timeout_decorator.py
@@ -0,0 +1,184 @@
 
				+"""
			
 
				+Timeout decorator.
			
 
				+    :copyright: (c) 2017 by Robert Nowotny
			
 
				+    :license: MIT, see LICENSE for more details.
			
 
				+"""
			
 
				+
			
 
				+# STDLIB
			
 
				+import os
			
 
				+import sys
			
 
				+from typing import Any, Callable, Type, Union
			
 
				+
			
 
				+# EXT
			
 
				+from dill import PicklingError  # type: ignore
			
 
				+import wrapt  # type: ignore
			
 
				+
			
 
				+# OWN
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				+from wrapt_timeout_decorator.wrap_helper import WrapHelper, detect_unpickable_objects_and_reraise
			
 
				+from wrapt_timeout_decorator.wrap_function_multiprocess import Timeout
			
 
				+
			
 
				+
			
 
				+def timeout(
			
 
				+    dec_timeout: Union[None, float, str] = None,
			
 
				+    use_signals: bool = True,
			
 
				+    timeout_exception: Type[BaseException] = TimeoutError,
			
 
				+    exception_message: str = "",
			
 
				+    dec_allow_eval: bool = False,
			
 
				+    dec_hard_timeout: bool = False,
			
 
				+) -> Any:
			
 
				+
			
 
				+    """Add a timeout parameter to a function and return it.
			
 
				+
			
 
				+    ToDo :   not clear how to type a decorator factory,
			
 
				+             tried:   ->  Callable[..., Any]
			
 
				+                ...
			
 
				+             return cast(Callable[..., Any], wrapped)
			
 
				+             without success - so we stuck with any at the moment
			
 
				+             ** see example on bottom of that file for correct annotation of a generic decorator
			
 
				+
			
 
				+    ToDo :   look at https://stackoverflow.com/questions/6126007/python-getting-a-traceback-from-a-multiprocessing-process
			
 
				+
			
 
				+
			
 
				+    Windows remark : dont use the decorator on classes in the main.py because of Windows multiprocessing limitations
			
 
				+                     read the README
			
 
				+
			
 
				+    Usage:
			
 
				+
			
 
				+    @timeout(3)
			
 
				+    def foo():
			
 
				+        pass
			
 
				+
			
 
				+    Overriding the timeout:
			
 
				+
			
 
				+    foo(dec_timeout=5)
			
 
				+
			
 
				+    Usage without decorating a function :
			
 
				+
			
 
				+    def test_method(a,b,c):
			
 
				+        pass
			
 
				+
			
 
				+    timeout(3)(test_method)(1,2,c=3)
			
 
				+
			
 
				+    Usage with eval (beware, security hazard, no user input values here):
			
 
				+        read : https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html before usage !
			
 
				+
			
 
				+    def class ClassTest4(object):
			
 
				+        def __init__(self,x):
			
 
				+            self.x=x
			
 
				+
			
 
				+        @timeout('instance.x', dec_allow_eval=True)
			
 
				+        def test_method(self):
			
 
				+            print('swallow')
			
 
				+
			
 
				+        @timeout(1)
			
 
				+        def foo3(self):
			
 
				+            print('parrot')
			
 
				+
			
 
				+    # or override via kwarg :
			
 
				+    my_foo = ClassTest4(3)
			
 
				+    my_foo.test_method(dec_timeout='instance.x * 2.5 +1')
			
 
				+    my_foo.foo3(dec_timeout='instance.x * 2.5 +1', dec_allow_eval=True)
			
 
				+
			
 
				+    :param dec_timeout: *       optional time limit in seconds or fractions of a second. If None is passed,
			
 
				+                                no seconds is applied. This adds some flexibility to the usage: you can disable timing
			
 
				+                                out depending on the settings. dec_timeout will always be overridden by a
			
 
				+                                kwarg passed to the wrapped function, class or class method.
			
 
				+    :param use_signals:         flag indicating whether signals should be used or the multiprocessing
			
 
				+    :param timeout_exception:   the Exception to be raised when timeout occurs, default = TimeoutException
			
 
				+    :param exception_message:   the Message for the Exception. Default: 'Function {f} timed out after {s} seconds.
			
 
				+    :param dec_allow_eval: *    allows a string in parameter dec_timeout what will be evaluated. Beware this can
			
 
				+                                be a security issue. This is very powerful, but is also very dangerous if you
			
 
				+                                accept strings to evaluate from untrusted input.
			
 
				+                                read: https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
			
 
				+
			
 
				+                                If enabled, the parameter of the function dec_timeout, or the parameter passed
			
 
				+                                by kwarg dec_timeout will be evaluated if its type is string. You can access :
			
 
				+                                wrapped (the function object and all their exposed objects)
			
 
				+                                instance    Example: 'instance.x' - see example above or doku
			
 
				+                                args        Example: 'args[0]' - the timeout is the first argument in args
			
 
				+                                kwargs      Example: 'kwargs["max_time"] * 2'
			
 
				+
			
 
				+    :param dec_hard_timeout:    only considered when use_signals = True (Windows)
			
 
				+                                if dec_hard_timeout = True, the decorator will timeout after dec_timeout after the
			
 
				+                                decorated function is called by the main program.
			
 
				+                                If You set up a small timeout value like 0.1 seconds, in windows that function might
			
 
				+                                actually never run - because setting up the process will already take longer
			
 
				+                                then 0.1 seconds - that means the decorated function will ALWAYS time out (and never run).
			
 
				+
			
 
				+                                if dec_hard_timeout = False, the decorator will timeout after the process is allowed to
			
 
				+                                run for dec_timeout seconds, that means the time to set up the new process is not considered.
			
 
				+                                If You set up a small timeout value like 0.1 seconds, in windows that function might now
			
 
				+                                take something like 0.6 seconds to timeout - 0.5 seconds to set up the process, and
			
 
				+                                allowing the function in the process to run for 0.1 seconds.
			
 
				+                                Since You can not know how long the spawn() will take under Windows, this is the default setting.
			
 
				+
			
 
				+    * all parameters starting with dec_ can be overridden via kwargs passed to the wrapped function.
			
 
				+
			
 
				+    :raises:                    TimeoutError if time limit is reached
			
 
				+    :returns:                   the Result of the wrapped function
			
 
				+
			
 
				+    It is illegal to pass anything other than a function as the first parameter.
			
 
				+    The function is wrapped and returned to the caller.
			
 
				+    """
			
 
				+
			
 
				+    @wrapt.decorator  # type: ignore
			
 
				+    def wrapper(wrapped: Callable[..., Any], instance: object, args: Any, kwargs: Any) -> Any:
			
 
				+        wrap_helper = WrapHelper(
			
 
				+            dec_timeout, use_signals, timeout_exception, exception_message, dec_allow_eval, dec_hard_timeout, wrapped, instance, args, kwargs
			
 
				+        )
			
 
				+        if not wrap_helper.dec_timeout_float:
			
 
				+            return wrapped(*wrap_helper.args, **wrap_helper.kwargs)
			
 
				+        else:
			
 
				+            return wrapped_with_timeout(wrap_helper)
			
 
				+
			
 
				+    return wrapper
			
 
				+
			
 
				+
			
 
				+def wrapped_with_timeout(wrap_helper: WrapHelper) -> Any:
			
 
				+    if wrap_helper.use_signals:
			
 
				+        return wrapped_with_timeout_signals(wrap_helper)
			
 
				+    else:
			
 
				+        return wrapped_with_timeout_process(wrap_helper)
			
 
				+
			
 
				+
			
 
				+def wrapped_with_timeout_signals(wrap_helper: WrapHelper) -> Any:
			
 
				+    try:
			
 
				+        wrap_helper.save_old_and_set_new_alarm_handler()
			
 
				+        return wrap_helper.wrapped(*wrap_helper.args, **wrap_helper.kwargs)
			
 
				+    finally:
			
 
				+        wrap_helper.restore_old_alarm_handler()
			
 
				+
			
 
				+
			
 
				+def wrapped_with_timeout_process(wrap_helper: WrapHelper) -> Any:
			
 
				+    try:
			
 
				+        timeout_wrapper = Timeout(wrap_helper)
			
 
				+        return timeout_wrapper()
			
 
				+    except PicklingError:
			
 
				+        detect_unpickable_objects_and_reraise(wrap_helper.wrapped)
			
 
				+
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+# Example for generic decorator with does not destroy the signature of the wrapped function for mypy
			
 
				+
			
 
				+from typing import Any, Callable, TypeVar, cast
			
 
				+
			
 
				+F = TypeVar('F', bound=Callable[..., Any])
			
 
				+
			
 
				+
			
 
				+def check_for_kwargs(f: F) -> F:
			
 
				+    def wrapper(*args: Any, **kwargs: Any) -> Any:
			
 
				+        if kwargs:
			
 
				+            keys = ', '.join([key for key in kwargs.keys()])
			
 
				+            raise TypeError("{fn}() got some positional-only arguments passed as keyword arguments: '{keys}'".format(fn=f.__name__, keys=keys))
			
 
				+        return f(*args, **kwargs)
			
 
				+    return cast(F, wrapper)
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print(
			
 
				+        b'this is a library only, the executable is named "wrapt_timeout_decorator_cli.py"',
			
 
				+        file=sys.stderr,
			
 
				+    )
			
--- a/format_convert/wrapt_timeout_decorator/wrapt_timeout_decorator_cli.py
+++ b/format_convert/wrapt_timeout_decorator/wrapt_timeout_decorator_cli.py
@@ -0,0 +1,55 @@
 
				+# STDLIB
			
 
				+import os
			
 
				+import sys
			
 
				+from typing import Optional
			
 
				+
			
 
				+# EXT
			
 
				+import click
			
 
				+
			
 
				+# OWN
			
 
				+import cli_exit_tools
			
 
				+
			
 
				+# PROJ
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				+from wrapt_timeout_decorator import __init__conf__
			
 
				+from wrapt_timeout_decorator import wrapt_timeout_decorator
			
 
				+
			
 
				+
			
 
				+# CONSTANTS
			
 
				+CLICK_CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
			
 
				+
			
 
				+
			
 
				+def info() -> None:
			
 
				+    """
			
 
				+    >>> info()
			
 
				+    Info for ...
			
 
				+
			
 
				+    """
			
 
				+    __init__conf__.print_info()
			
 
				+
			
 
				+
			
 
				+@click.group(help=__init__conf__.title, context_settings=CLICK_CONTEXT_SETTINGS)
			
 
				+@click.version_option(
			
 
				+    version=__init__conf__.version, prog_name=__init__conf__.shell_command, message=f"{__init__conf__.shell_command} version {__init__conf__.version}"
			
 
				+)
			
 
				+@click.option("--traceback/--no-traceback", is_flag=True, type=bool, default=None, help="return traceback information on cli")
			
 
				+def cli_main(traceback: Optional[bool] = None) -> None:
			
 
				+    if traceback is not None:
			
 
				+        cli_exit_tools.config.traceback = traceback
			
 
				+
			
 
				+
			
 
				+@cli_main.command("info", context_settings=CLICK_CONTEXT_SETTINGS)  # type: ignore
			
 
				+def cli_info() -> None:
			
 
				+    """get program informations"""
			
 
				+    info()
			
 
				+
			
 
				+
			
 
				+# entry point if main
			
 
				+if __name__ == "__main__":
			
 
				+    try:
			
 
				+        cli_main()
			
 
				+    except Exception as exc:
			
 
				+        cli_exit_tools.print_exception_message()
			
 
				+        sys.exit(cli_exit_tools.get_system_exit_code(exc))
			
 
				+    finally:
			
 
				+        cli_exit_tools.flush_streams()
			
--- a/ocr/ocr_interface.py
+++ b/ocr/ocr_interface.py
@@ -1,6 +1,7 @@
 
				 import base64
			
 
				 import json
			
 
				 import multiprocessing as mp
			
 
				+import socket
			
 
				 import sys
			
 
				 import os
			
 
				 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
@@ -13,19 +14,43 @@ import logging
 
				 import numpy as np
			
 
				 os.environ['FLAGS_eager_delete_tensor_gb'] = '0'
			
 
				 from ocr.paddleocr import PaddleOCR
			
 
				+from format_convert.utils import request_post, test_gpu, get_intranet_ip, log, get_md5_from_bytes
			
 
				+from flask import Flask, request
			
 
				+from format_convert import _global
			
 
				 
			
 
				 
			
 
				-logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				-logger = logging.getLogger(__name__)
			
 
				-def log(msg):
			
 
				-    '''
			
 
				-    @summary:打印信息
			
 
				-    '''
			
 
				-    logger.info(msg)
			
 
				+# 接口配置
			
 
				+app = Flask(__name__)
			
 
				+
			
 
				+
			
 
				+@app.route('/ocr', methods=['POST'])
			
 
				+def _ocr():
			
 
				+    log("into ocr_interface _ocr")
			
 
				+    try:
			
 
				+        if not request.form:
			
 
				+            log("ocr no data!")
			
 
				+            return json.dumps({"text": str([-9]), "bbox": str([-9])})
			
 
				+
			
 
				+        ocr_model = globals().get("global_ocr_model")
			
 
				+        if ocr_model is None:
			
 
				+            ocr_model = OcrModels().get_model()
			
 
				+            globals().update({"global_ocr_model": ocr_model})
			
 
				+
			
 
				+        data = request.form.get("data")
			
 
				+        img_data = base64.b64decode(data)
			
 
				+        _md5 = get_md5_from_bytes(img_data)[0]
			
 
				+        _global.update({"md5": _md5})
			
 
				+        text = picture2text(img_data, ocr_model)
			
 
				+        return json.dumps(text)
			
 
				+    except TimeoutError:
			
 
				+        return json.dumps({"text": str([-5]), "bbox": str([-5])})
			
 
				+    except:
			
 
				+        traceback.print_exc()
			
 
				+        return json.dumps({"text": str([-1]), "bbox": str([-1])})
			
 
				 
			
 
				 
			
 
				 def ocr(data, ocr_model):
			
 
				-    logging.info("into ocr_interface ocr")
			
 
				+    log("into ocr_interface ocr")
			
 
				     try:
			
 
				         img_data = base64.b64decode(data)
			
 
				         text = picture2text(img_data, ocr_model)
			
@@ -36,7 +61,7 @@ def ocr(data, ocr_model):
 
				 
			
 
				 flag = 0
			
 
				 def picture2text(img_data, ocr_model):
			
 
				-    logging.info("into ocr_interface picture2text")
			
 
				+    log("into ocr_interface picture2text")
			
 
				     try:
			
 
				         start_time = time.time()
			
 
				         # 二进制数据流转np.ndarray [np.uint8: 8位像素]
			
@@ -46,7 +71,7 @@ def picture2text(img_data, ocr_model):
 
				             np_images = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				         except cv2.error as e:
			
 
				             if "src.empty()" in str(e):
			
 
				-                logging.info("ocr_interface picture2text image is empty!")
			
 
				+                log("ocr_interface picture2text image is empty!")
			
 
				                 return {"text": str([]), "bbox": str([])}
			
 
				         # resize
			
 
				         # cv2.imshow("before resize", np_images)
			
@@ -80,13 +105,13 @@ def picture2text(img_data, ocr_model):
 
				         # cv2.imshow("bbox", img)
			
 
				         # cv2.waitKey(0)
			
 
				 
			
 
				-        logging.info("ocr model use time: " + str(time.time()-start_time))
			
 
				+        log("ocr model use time: " + str(time.time()-start_time))
			
 
				         return {"text": str(text_list), "bbox": str(bbox_list)}
			
 
				 
			
 
				     except TimeoutError:
			
 
				         raise TimeoutError
			
 
				     except Exception as e:
			
 
				-        logging.info("picture2text error!")
			
 
				+        log("picture2text error!")
			
 
				         print("picture2text", traceback.print_exc())
			
 
				         return {"text": str([]), "bbox": str([])}
			
 
				 
			
@@ -123,34 +148,63 @@ class OcrModels:
 
				         return self.ocr_model
			
 
				 
			
 
				 
			
 
				-if __name__ == '__main__':
			
 
				-    # if len(sys.argv) == 2:
			
 
				-    #     port = int(sys.argv[1])
			
 
				-    # else:
			
 
				-    #     port = 15011
			
 
				-    #
			
 
				-    # app.run(host='0.0.0.0', port=port, threaded=False, debug=False)
			
 
				-    # log("OCR running")
			
 
				-    file_path = "C:/Users/Administrator/Desktop/error1.png"
			
 
				-    # file_path = "1.png"
			
 
				-
			
 
				+def test_ocr_model():
			
 
				+    file_path = "C:/Users/Administrator/Desktop/error2.png"
			
 
				     with open(file_path, "rb") as f:
			
 
				         file_bytes = f.read()
			
 
				     file_base64 = base64.b64encode(file_bytes)
			
 
				+    file_json = {"data": file_base64}
			
 
				 
			
 
				-    ocr_model = OcrModels().get_model()
			
 
				-    result = ocr(file_base64, ocr_model)
			
 
				-    result = ocr(file_base64, ocr_model)
			
 
				-
			
 
				-    text_list = eval(result.get("text"))
			
 
				-    box_list = eval(result.get("bbox"))
			
 
				+    # _url = "http://192.168.2.102:17000/ocr"
			
 
				+    _url = "http://127.0.0.1:17000/ocr"
			
 
				+    print(json.loads(request_post(_url, file_json)))
			
 
				 
			
 
				-    new_list = []
			
 
				-    for i in range(len(text_list)):
			
 
				-        new_list.append([text_list[i], box_list[i]])
			
 
				 
			
 
				-    # print(new_list[0][1])
			
 
				-    new_list.sort(key=lambda x: (x[1][1][0], x[1][0][0]))
			
 
				+if __name__ == '__main__':
			
 
				+    if len(sys.argv) == 2:
			
 
				+        port = int(sys.argv[1])
			
 
				+    elif len(sys.argv) == 3:
			
 
				+        port = int(sys.argv[1])
			
 
				+        using_gpu_index = int(sys.argv[2])
			
 
				+    else:
			
 
				+        port = 17000
			
 
				+        using_gpu_index = 0
			
 
				+    _global._init()
			
 
				+    _global.update({"port": str(port)})
			
 
				+
			
 
				+    ip = get_intranet_ip()
			
 
				+    logging.basicConfig(level=logging.INFO,
			
 
				+                        format='%(asctime)s - %(name)s - %(levelname)s - '
			
 
				+                               + ip + ' - ' + str(port) + ' - %(message)s')
			
 
				+
			
 
				+    os.environ['CUDA_VISIBLE_DEVICES'] = str(using_gpu_index)
			
 
				+
			
 
				+    app.run(host='0.0.0.0', port=port, processes=1, threaded=False, debug=False)
			
 
				+    log("OCR running "+str(port))
			
 
				+
			
 
				+    # test_ocr_model()
			
 
				+    #
			
 
				+    # log("OCR running")
			
 
				+    # file_path = "C:/Users/Administrator/Desktop/error9.jpg"
			
 
				+    # file_path = "error1.png"
			
 
				+    #
			
 
				+    # with open(file_path, "rb") as f:
			
 
				+    #     file_bytes = f.read()
			
 
				+    # file_base64 = base64.b64encode(file_bytes)
			
 
				+    #
			
 
				+    # ocr_model = OcrModels().get_model()
			
 
				+    # result = ocr(file_base64, ocr_model)
			
 
				+    # result = ocr(file_base64, ocr_model)
			
 
				 
			
 
				-    for t in new_list:
			
 
				-        print(t[0])
			
 
				+    # text_list = eval(result.get("text"))
			
 
				+    # box_list = eval(result.get("bbox"))
			
 
				+    #
			
 
				+    # new_list = []
			
 
				+    # for i in range(len(text_list)):
			
 
				+    #     new_list.append([text_list[i], box_list[i]])
			
 
				+    #
			
 
				+    # # print(new_list[0][1])
			
 
				+    # new_list.sort(key=lambda x: (x[1][1][0], x[1][0][0]))
			
 
				+    #
			
 
				+    # for t in new_list:
			
 
				+    #     print(t[0])
			
--- a/ocr/paddleocr.py
+++ b/ocr/paddleocr.py
@@ -16,7 +16,8 @@ import os
 
				 import sys
			
 
				 
			
 
				 __dir__ = os.path.dirname(__file__)
			
 
				-sys.path.append(os.path.join(__dir__, ''))
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 project_path = os.path.abspath(__dir__)
			
 
				 # project_path = ""
			
 
				 
			
@@ -187,7 +188,7 @@ def parse_args(mMain=True, add_help=True):
 
				         return parser.parse_args()
			
 
				     else:
			
 
				         return argparse.Namespace(
			
 
				-            use_gpu=False,
			
 
				+            use_gpu=True,
			
 
				             ir_optim=True,
			
 
				             use_tensorrt=False,
			
 
				             gpu_mem=8000,
			
--- a/otr/otr_interface.py
+++ b/otr/otr_interface.py
@@ -1,8 +1,10 @@
 
				 import base64
			
 
				+import json
			
 
				 import multiprocessing as mp
			
 
				 import os
			
 
				 # os.environ['TF_XLA_FLAGS'] = '--tf_xla_cpu_global_jit'
			
 
				 import sys
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 import time
			
 
				 import traceback
			
 
				 from multiprocessing.context import Process
			
@@ -12,19 +14,47 @@ import logging
 
				 import cv2
			
 
				 import numpy as np
			
 
				 import tensorflow as tf
			
 
				+from flask import Flask, request
			
 
				 
			
 
				+from format_convert.utils import request_post, judge_error_code, get_intranet_ip, log, get_md5_from_bytes
			
 
				 from otr.table_line import get_best_predict_size, table_line, get_points, get_split_line, get_points_row, \
			
 
				     get_points_col, \
			
 
				     delete_close_points, fix_outline, get_bbox, get_outline_point, table_net, delete_contain_bbox, points_to_line, \
			
 
				     fix_inner, merge_line, fix_corner, add_continue_bbox, delete_outline
			
 
				+from format_convert import _global
			
 
				 
			
 
				-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				-logger = logging.getLogger(__name__)
			
 
				-def log(msg):
			
 
				-    """
			
 
				-    @summary:打印信息
			
 
				-    """
			
 
				-    logger.info(msg)
			
 
				+
			
 
				+# 接口配置
			
 
				+app = Flask(__name__)
			
 
				+
			
 
				+
			
 
				+@app.route('/otr', methods=['POST'])
			
 
				+def _otr():
			
 
				+    try:
			
 
				+        if not request.form:
			
 
				+            log("otr no data!")
			
 
				+            return json.dumps({"list_line": str([-9])})
			
 
				+
			
 
				+        otr_model = globals().get("global_otr_model")
			
 
				+        if otr_model is None:
			
 
				+            otr_model = OtrModels().get_model()
			
 
				+            globals().update({"global_otr_model": otr_model})
			
 
				+
			
 
				+        data = request.form.get("data")
			
 
				+        is_from_pdf = request.form.get("is_from_pdf")
			
 
				+        img_data = base64.b64decode(data)
			
 
				+        _md5 = get_md5_from_bytes(img_data)[0]
			
 
				+        _global.update({"md5": _md5})
			
 
				+        if is_from_pdf:
			
 
				+            list_lines = line_detect(img_data, otr_model, prob=0.2)
			
 
				+        else:
			
 
				+            list_lines = line_detect(img_data, otr_model, prob=0.5)
			
 
				+        return json.dumps(list_lines)
			
 
				+    except TimeoutError:
			
 
				+        return json.dumps({"list_line": str([-5])})
			
 
				+    except:
			
 
				+        traceback.print_exc()
			
 
				+        return json.dumps({"list_line": str([-1])})
			
 
				 
			
 
				 
			
 
				 def otr(data, otr_model, is_from_pdf):
			
@@ -39,33 +69,34 @@ def otr(data, otr_model, is_from_pdf):
 
				     except TimeoutError:
			
 
				         raise TimeoutError
			
 
				 
			
 
				+
			
 
				 flag = 0
			
 
				 # model_path = "models/table-line.h5"
			
 
				 def table_detect2(img_data, otr_model):
			
 
				-    logging.info("into otr_interface table_detect")
			
 
				+    log("into otr_interface table_detect")
			
 
				     start_time = time.time()
			
 
				     try:
			
 
				         start_time1 = time.time()
			
 
				         # 二进制数据流转np.ndarray [np.uint8: 8位像素]
			
 
				         img = cv2.imdecode(np.frombuffer(img_data, np.uint8), cv2.IMREAD_COLOR)
			
 
				-        # logging.info("into otr_interface table_detect 1")
			
 
				+        # log("into otr_interface table_detect 1")
			
 
				         # cv2.imwrite("111111.jpg", img)
			
 
				 
			
 
				         # 将bgr转为rbg
			
 
				         image_np = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				-        # logging.info("into otr_interface table_detect 2")
			
 
				+        # log("into otr_interface table_detect 2")
			
 
				 
			
 
				         # 选择与图片最接近分辨率，以防失真
			
 
				         # best_h, best_w = get_best_predict_size(img)
			
 
				         print("image_np.shape", image_np.shape)
			
 
				         best_h, best_w, _ = image_np.shape
			
 
				-        logging.info("otr preprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				+        log("otr preprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				 
			
 
				         # 调用模型
			
 
				         # rows, cols = table_line(image_np, otr_model)
			
 
				         start_time1 = time.time()
			
 
				         rows, cols, image_np = table_line(image_np, otr_model, size=(best_w, best_h), hprob=0.5, vprob=0.5)
			
 
				-        logging.info("otr model predict time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				+        log("otr model predict time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				 
			
 
				         start_time1 = time.time()
			
 
				         if not rows or not cols:
			
@@ -88,7 +119,7 @@ def table_detect2(img_data, otr_model):
 
				 
			
 
				         # 计算交点、分割线
			
 
				         points = get_points(rows, cols, (image_np.shape[0], image_np.shape[1]))
			
 
				-        # logging.info("into otr_interface table_detect 5")
			
 
				+        # log("into otr_interface table_detect 5")
			
 
				         if not points:
			
 
				             print("points", 0, "split_lines", 0, "bboxes", 0)
			
 
				             return {"points": str([]), "split_lines": str([]),
			
@@ -99,14 +130,14 @@ def table_detect2(img_data, otr_model):
 
				         rows, cols = delete_outline(rows, cols, points)
			
 
				 
			
 
				         split_lines, split_y = get_split_line(points, cols, image_np)
			
 
				-        # logging.info("into otr_interface table_detect 6")
			
 
				+        # log("into otr_interface table_detect 6")
			
 
				 
			
 
				         # 计算交点所在行列，剔除相近交点
			
 
				         row_point_list = get_points_row(points, split_y, 5)
			
 
				         col_point_list = get_points_col(points, split_y, 5)
			
 
				-        # logging.info("into otr_interface table_detect 7")
			
 
				+        # log("into otr_interface table_detect 7")
			
 
				         points = delete_close_points(points, row_point_list, col_point_list)
			
 
				-        # logging.info("into otr_interface table_detect 8")
			
 
				+        # log("into otr_interface table_detect 8")
			
 
				 
			
 
				         # 查看是否正确输出点
			
 
				         # for p in points:
			
@@ -140,17 +171,17 @@ def table_detect2(img_data, otr_model):
 
				             # 修复边框后重新计算交点、分割线
			
 
				             points = get_points(rows, cols, (image_np.shape[0], image_np.shape[1]))
			
 
				 
			
 
				-            # logging.info("into otr_interface table_detect 10")
			
 
				+            # log("into otr_interface table_detect 10")
			
 
				             split_lines, split_y = get_split_line(points, cols, image_np)
			
 
				 
			
 
				             # 计算交点所在行列，剔除相近交点
			
 
				             row_point_list = get_points_row(points, split_y, 0)
			
 
				             col_point_list = get_points_col(points, split_y, 0)
			
 
				-            # logging.info("into otr_interface table_detect 11")
			
 
				+            # log("into otr_interface table_detect 11")
			
 
				             points = delete_close_points(points, row_point_list, col_point_list)
			
 
				             # row_point_list = get_points_row(points, split_y)
			
 
				             # col_point_list = get_points_col(points, split_y)
			
 
				-            # logging.info("into otr_interface table_detect 12")
			
 
				+            # log("into otr_interface table_detect 12")
			
 
				 
			
 
				         # 查看是否正确输出rows,cols
			
 
				         # for line in rows+cols:
			
@@ -200,7 +231,7 @@ def table_detect2(img_data, otr_model):
 
				 
			
 
				         # 获取bbox 单元格
			
 
				         bboxes = get_bbox(image_np, row_point_list, col_point_list, split_y, rows, cols)
			
 
				-        # logging.info("into otr_interface table_detect 13")
			
 
				+        # log("into otr_interface table_detect 13")
			
 
				 
			
 
				         # 删除包含bbox
			
 
				         if bboxes:
			
@@ -242,15 +273,15 @@ def table_detect2(img_data, otr_model):
 
				 
			
 
				         # 获取每个表格的左上右下两个点
			
 
				         outline_points = get_outline_point(points, split_y)
			
 
				-        # logging.info("into otr_interface table_detect 14")
			
 
				+        # log("into otr_interface table_detect 14")
			
 
				 
			
 
				         if bboxes:
			
 
				             print("bboxes number", len(bboxes))
			
 
				             # print("bboxes", bboxes)
			
 
				         else:
			
 
				             print("bboxes number", "None")
			
 
				-        logging.info("otr postprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				-        logging.info("use time: " + str(time.time()-start_time))
			
 
				+        log("otr postprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				+        log("use time: " + str(time.time()-start_time))
			
 
				         return {"points": str(points), "split_lines": str(split_lines),
			
 
				                 "bboxes": str(bboxes), "outline_points": str(outline_points),
			
 
				                 "lines": str(rows+cols)}
			
@@ -258,46 +289,46 @@ def table_detect2(img_data, otr_model):
 
				     except TimeoutError:
			
 
				         raise TimeoutError
			
 
				     except Exception as e:
			
 
				-        logging.info("otr_interface cannot detected table!")
			
 
				+        log("otr_interface cannot detected table!")
			
 
				         print("otr_interface cannot detected table!", traceback.print_exc())
			
 
				         print("points", 0, "split_lines", 0, "bboxes", 0)
			
 
				-        logging.info("otr postprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				+        log("otr postprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				         return {"points": str([]), "split_lines": str([]), "bboxes": str([]),
			
 
				                 "outline_points": str([]), "lines": str([])}
			
 
				 
			
 
				 
			
 
				 def line_detect(img_data, otr_model, prob=0.2):
			
 
				-    logging.info("into otr_interface table_detect")
			
 
				+    log("into otr_interface table_detect")
			
 
				     start_time = time.time()
			
 
				     try:
			
 
				         start_time1 = time.time()
			
 
				         # 二进制数据流转np.ndarray [np.uint8: 8位像素]
			
 
				         img = cv2.imdecode(np.frombuffer(img_data, np.uint8), cv2.IMREAD_COLOR)
			
 
				-        # logging.info("into otr_interface table_detect 1")
			
 
				+        # log("into otr_interface table_detect 1")
			
 
				         # cv2.imwrite("111111.jpg", img)
			
 
				 
			
 
				         # 将bgr转为rbg
			
 
				         image_np = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
			
 
				-        # logging.info("into otr_interface table_detect 2")
			
 
				+        # log("into otr_interface table_detect 2")
			
 
				 
			
 
				         # 选择与图片最接近分辨率，以防失真
			
 
				         # best_h, best_w = get_best_predict_size(img)
			
 
				-        logging.info("image_np.shape" + str(image_np.shape))
			
 
				+        log("image_np.shape" + str(image_np.shape))
			
 
				         best_h, best_w, _ = image_np.shape
			
 
				-        logging.info("otr preprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				+        log("otr preprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				 
			
 
				         # 调用模型
			
 
				         # rows, cols = table_line(image_np, otr_model)
			
 
				         start_time1 = time.time()
			
 
				         list_line = table_line(image_np, otr_model, size=(best_w, best_h), prob=prob)
			
 
				-        logging.info("otr finish " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				+        log("otr finish " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				         return {"list_line": str(list_line)}
			
 
				     except TimeoutError:
			
 
				         raise TimeoutError
			
 
				     except Exception as e:
			
 
				-        logging.info("otr_interface cannot detected table!")
			
 
				+        log("otr_interface cannot detected table!")
			
 
				         print("otr_interface cannot detected table!", traceback.print_exc())
			
 
				-        logging.info("otr postprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				+        log("otr postprocess time: " + str(round(float(time.time()-start_time1), 4)) + "s")
			
 
				         return {"list_line": str([])}
			
 
				 
			
 
				 
			
@@ -313,14 +344,52 @@ class OtrModels:
 
				         return self.otr_model
			
 
				 
			
 
				 
			
 
				+def test_otr_model():
			
 
				+    file_path = "C:/Users/Administrator/Desktop/error2.png"
			
 
				+    with open(file_path, "rb") as f:
			
 
				+        file_bytes = f.read()
			
 
				+    file_base64 = base64.b64encode(file_bytes)
			
 
				+    file_json = {"data": file_base64, "is_from_pdf": False}
			
 
				+
			
 
				+    _url = "http://192.168.2.103:18000/otr"
			
 
				+    r = json.loads(request_post(_url, file_json))
			
 
				+    print(r)
			
 
				+
			
 
				+
			
 
				 # otr_model = table_net((None, None, 3), 2)
			
 
				 # otr_model.load_weights(model_path)
			
 
				 if __name__ == '__main__':
			
 
				-    # if len(sys.argv) == 2:
			
 
				-    #     port = int(sys.argv[1])
			
 
				-    # else:
			
 
				-    #     port = 15017
			
 
				-    # app.run(host='0.0.0.0', port=port, threaded=False, debug=False)
			
 
				-    # log("OTR running "+str(port))
			
 
				-    otr_model = OtrModels().get_model()
			
 
				-    otr("11", otr_model)
			
 
				+    if len(sys.argv) == 2:
			
 
				+        port = int(sys.argv[1])
			
 
				+    elif len(sys.argv) == 3:
			
 
				+        port = int(sys.argv[1])
			
 
				+        using_gpu_index = int(sys.argv[2])
			
 
				+    else:
			
 
				+        port = 18000
			
 
				+        using_gpu_index = 0
			
 
				+    _global._init()
			
 
				+    _global.update({"port": str(port)})
			
 
				+
			
 
				+    # 日志格式设置
			
 
				+    # ip = get_intranet_ip()
			
 
				+    # logging.basicConfig(level=logging.INFO,
			
 
				+    #                     format='%(asctime)s - %(name)s - %(levelname)s - '
			
 
				+    #                            + ip + ' - ' + str(port) + ' - %(message)s')
			
 
				+
			
 
				+    # 限制tensorflow显存
			
 
				+    memory_limit_scale = 0.3
			
 
				+    os.environ['CUDA_VISIBLE_DEVICES'] = str(using_gpu_index)
			
 
				+    os.environ['CUDA_CACHE_MAXSIZE'] = str(2147483648)
			
 
				+    os.environ['CUDA_CACHE_DISABLE'] = str(0)
			
 
				+    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=memory_limit_scale)
			
 
				+    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
			
 
				+
			
 
				+    app.run(host='0.0.0.0', port=port, processes=1, threaded=False, debug=False)
			
 
				+    log("OTR running "+str(port))
			
 
				+
			
 
				+    # test_otr_model()
			
 
				+
			
 
				+    # print(json.dumps([-2]))
			
 
				+
			
 
				+    # otr_model = OtrModels().get_model()
			
 
				+    # otr("11", otr_model)
			
--- a/otr/table_line.py
+++ b/otr/table_line.py
@@ -485,29 +485,6 @@ def table_line(img, model, size=(512, 1024), prob=0.2, is_test=0):
 
				         elif line[1] == line[3]:
			
 
				             list_rows.append(line)
			
 
				 
			
 
				-    # 删掉贴着边框的line
			
 
				-    # temp_list = []
			
 
				-    # threshold = 5
			
 
				-    # for line in list_rows:
			
 
				-    #     if line[1]-0 <= threshold or size[1]-line[1] <= threshold:
			
 
				-    #         continue
			
 
				-    #     # 内部排序
			
 
				-    #     if line[0] > line[2]:
			
 
				-    #         line = [line[2], line[3], line[0], line[1]]
			
 
				-    #     temp_list.append(line)
			
 
				-    # list_rows = temp_list
			
 
				-    # temp_list = []
			
 
				-    # for line in list_cols:
			
 
				-    #     if line[0]-0 <= threshold or size[0]-line[0] <= threshold:
			
 
				-    #         continue
			
 
				-    #     # 内部排序
			
 
				-    #     if line[1] > line[3]:
			
 
				-    #         line = [line[2], line[3], line[0], line[1]]
			
 
				-    #     temp_list.append(line)
			
 
				-    # list_cols = temp_list
			
 
				-    # if not list_rows or not list_cols:
			
 
				-    #     return []
			
 
				-
			
 
				     # 合并错开线
			
 
				     list_rows = merge_line(list_rows, axis=0)
			
 
				     list_cols = merge_line(list_cols, axis=1)
			
@@ -519,10 +496,11 @@ def table_line(img, model, size=(512, 1024), prob=0.2, is_test=0):
 
				         return []
			
 
				 
			
 
				     # 清掉外围的没用的线
			
 
				-    list_rows, list_cols = delete_outline(list_rows, list_cols, cross_points)
			
 
				-    mat_plot(list_rows+list_cols, "delete_outline", is_test)
			
 
				+    # list_rows, list_cols = delete_outline(list_rows, list_cols, cross_points)
			
 
				+    # mat_plot(list_rows+list_cols, "delete_outline", is_test)
			
 
				 
			
 
				     # 多个表格分割线
			
 
				+    list_rows, list_cols = fix_in_split_lines(list_rows, list_cols, img_new)
			
 
				     split_lines, split_y = get_split_line(cross_points, list_cols, img_new)
			
 
				 
			
 
				     # 修复边框
			
@@ -541,9 +519,15 @@ def table_line(img, model, size=(512, 1024), prob=0.2, is_test=0):
 
				         if new_cols:
			
 
				             list_cols += new_cols
			
 
				 
			
 
				+        list_rows, list_cols = fix_in_split_lines(list_rows, list_cols, img_new)
			
 
				+
			
 
				         # 修复边框后重新计算交点、分割线
			
 
				         cross_points = get_points(list_rows, list_cols, (img_new.shape[0], img_new.shape[1]))
			
 
				+        cv_plot(cross_points, img_new.shape, 0, is_test)
			
 
				+
			
 
				         split_lines, split_y = get_split_line(cross_points, list_cols, img_new)
			
 
				+        print("fix new split_y", split_y)
			
 
				+        print("fix new split_lines", split_lines)
			
 
				 
			
 
				         # 修复内部缺线
			
 
				         # cross_points = fix_inner(list_rows, list_cols, cross_points, split_y)
			
@@ -551,16 +535,25 @@ def table_line(img, model, size=(512, 1024), prob=0.2, is_test=0):
 
				         #     return []
			
 
				     mat_plot(list_rows+list_cols, "fix_outline", is_test)
			
 
				 
			
 
				+    split_lines_show = []
			
 
				+    for _l in split_lines:
			
 
				+        split_lines_show.append([_l[0][0], _l[0][1], _l[1][0], _l[1][1]])
			
 
				+    mat_plot(split_lines_show+list_cols,
			
 
				+             "split_lines", is_test)
			
 
				+
			
 
				+    # 修复表格4个角
			
 
				+    list_rows, list_cols = fix_corner(list_rows, list_cols, split_y, threshold=0)
			
 
				+    mat_plot(list_rows+list_cols, "fix_corner", is_test)
			
 
				+
			
 
				     # 修复内部缺线
			
 
				-    cross_points = fix_inner(list_rows, list_cols, cross_points, split_y)
			
 
				-    if not cross_points:
			
 
				-        return []
			
 
				-    row_point_list = get_points_row(cross_points, split_y, 5)
			
 
				-    col_point_list = get_points_col(cross_points, split_y, 5)
			
 
				-    list_rows = points_to_line(row_point_list, axis=0)
			
 
				-    list_cols = points_to_line(col_point_list, axis=1)
			
 
				+    list_rows, list_cols = fix_inner(list_rows, list_cols, cross_points, split_y)
			
 
				     mat_plot(list_rows+list_cols, "fix_inner", is_test)
			
 
				 
			
 
				+    # 合并错开线
			
 
				+    list_rows = merge_line(list_rows, axis=0)
			
 
				+    list_cols = merge_line(list_cols, axis=1)
			
 
				+    mat_plot(list_rows+list_cols, "merge_line", is_test)
			
 
				+
			
 
				     list_line = list_rows + list_cols
			
 
				 
			
 
				     # 打印处理后线
			
@@ -642,6 +635,27 @@ def table_line2(img, model, size=(512, 1024), hprob=0.5, vprob=0.5, row=50, col=
 
				     return rowboxes, colboxes, img_new
			
 
				 
			
 
				 
			
 
				+def fix_in_split_lines(_rows, _cols, _img):
			
 
				+    # 补线贴着边缘无法得到split_y，导致无法分区
			
 
				+    for _row in _rows:
			
 
				+        if _row[1] >= _img.shape[0] - 5:
			
 
				+            _row[1] = _img.shape[0] - 6
			
 
				+            _row[3] = _img.shape[0] - 6
			
 
				+            print("_row", _row)
			
 
				+        if _row[1] <= 0 + 5:
			
 
				+            _row[1] = 6
			
 
				+            _row[3] = 6
			
 
				+
			
 
				+    for _col in _cols:
			
 
				+        if _col[3] >= _img.shape[0] - 5:
			
 
				+            _col[3] = _img.shape[0] - 6
			
 
				+
			
 
				+        if _col[1] <= 0 + 5:
			
 
				+            _col[1] = 6
			
 
				+
			
 
				+    return _rows, _cols
			
 
				+
			
 
				+
			
 
				 def mat_plot(list_line, name="", is_test=1):
			
 
				     if not is_test:
			
 
				         return
			
@@ -654,14 +668,22 @@ def mat_plot(list_line, name="", is_test=1):
 
				     plt.show()
			
 
				 
			
 
				 
			
 
				-def cv_plot(list_line, img_shape):
			
 
				+def cv_plot(_list, img_shape, line_or_point=1, is_test=1):
			
 
				+    if is_test == 0:
			
 
				+        return
			
 
				     img_print = np.zeros(img_shape, np.uint8)
			
 
				     img_print.fill(255)
			
 
				-    for line in list_line:
			
 
				-        cv2.line(img_print, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])),
			
 
				-                 (255, 0, 0))
			
 
				-    cv2.imshow("cv_plot", img_print)
			
 
				-    cv2.waitKey(0)
			
 
				+    if line_or_point:
			
 
				+        for line in _list:
			
 
				+            cv2.line(img_print, (int(line[0]), int(line[1])), (int(line[2]), int(line[3])),
			
 
				+                     (255, 0, 0))
			
 
				+        cv2.imshow("cv_plot", img_print)
			
 
				+        cv2.waitKey(0)
			
 
				+    else:
			
 
				+        for point in _list:
			
 
				+            cv2.circle(img_print, (int(point[0]), int(point[1])), 1, (255, 0, 0), 2)
			
 
				+        cv2.imshow("cv_plot", img_print)
			
 
				+        cv2.waitKey(0)
			
 
				 
			
 
				 
			
 
				 def delete_no_cross_lines(list_lines):
			
@@ -725,7 +747,7 @@ def get_outline(points, image_np):
 
				     return outline_img
			
 
				 
			
 
				 
			
 
				-def get_split_line(points, col_lines, image_np):
			
 
				+def get_split_line(points, col_lines, image_np, threshold=5):
			
 
				     # print("get_split_line", image_np.shape)
			
 
				     points.sort(key=lambda x: (x[1], x[0]))
			
 
				     # 遍历y坐标，并判断y坐标与上一个y坐标是否存在连接线
			
@@ -734,10 +756,10 @@ def get_split_line(points, col_lines, image_np):
 
				     for point in points:
			
 
				         # 从已分开的线下面开始判断
			
 
				         if split_line_y:
			
 
				-            if point[1] <= split_line_y[-1] + 5:
			
 
				+            if point[1] <= split_line_y[-1] + threshold:
			
 
				                 last_y = point[1]
			
 
				                 continue
			
 
				-            if last_y <= split_line_y[-1] + 5:
			
 
				+            if last_y <= split_line_y[-1] + threshold:
			
 
				                 last_y = point[1]
			
 
				                 continue
			
 
				 
			
@@ -768,14 +790,14 @@ def get_split_line(points, col_lines, image_np):
 
				     y_min = points[0][1]
			
 
				     y_max = points[-1][1]
			
 
				     # print("加上收尾分割线", y_min, y_max)
			
 
				-    if y_min-5 < 0:
			
 
				+    if y_min-threshold < 0:
			
 
				         split_line_y.append(0)
			
 
				     else:
			
 
				-        split_line_y.append(y_min-5)
			
 
				-    if y_max+5 > image_np.shape[0]:
			
 
				+        split_line_y.append(y_min-threshold)
			
 
				+    if y_max+threshold > image_np.shape[0]:
			
 
				         split_line_y.append(image_np.shape[0])
			
 
				     else:
			
 
				-        split_line_y.append(y_max+5)
			
 
				+        split_line_y.append(y_max+threshold)
			
 
				     split_line_y = list(set(split_line_y))
			
 
				 
			
 
				     # 剔除两条相隔太近分割线
			
@@ -829,7 +851,8 @@ def get_points(row_lines, col_lines, image_size):
 
				 
			
 
				     # 求出交点
			
 
				     point_img = np.bitwise_and(row_img, col_img)
			
 
				-    # cv2.imshow("point_img", np.bitwise_not(point_img))
			
 
				+    # cv2.imwrite("get_points.jpg", row_img+col_img)
			
 
				+    # cv2.imshow("get_points", row_img+col_img)
			
 
				     # cv2.waitKey(0)
			
 
				 
			
 
				     # 识别黑白图中的白色交叉点，将横纵坐标取出
			
@@ -998,7 +1021,7 @@ def fix_inner2(row_points, col_points, row_lines, col_lines, threshold=3):
 
				     return row_lines, col_lines
			
 
				 
			
 
				 
			
 
				-def fix_inner(row_lines, col_lines, points, split_y):
			
 
				+def fix_inner1(row_lines, col_lines, points, split_y):
			
 
				     def fix(fix_lines, assist_lines, split_points, axis):
			
 
				         new_points = []
			
 
				         for line1 in fix_lines:
			
@@ -1047,11 +1070,11 @@ def fix_inner(row_lines, col_lines, points, split_y):
 
				                         line_distance = abs(min_col_point[i][axis] - line1_point[i][axis])
			
 
				                         if bbox_len/3 <= line_distance <= bbox_len:
			
 
				                             add_point = (line1_point[i][1-axis], min_assist_line[i][axis])
			
 
				-                            # print("============================table line==")
			
 
				-                            # print("fix_inner add point", add_point)
			
 
				-                            # print(min_col_point[i][axis], line1_point[i][axis], min_col_point[i][axis], min_assist_line[i][axis])
			
 
				-                            # print(abs(min_col_point[i][axis] - line1_point[i][axis]), abs(min_col_point[i][axis] - min_assist_line[i][axis])/3)
			
 
				-                            # print("line1, line2", line1, min_assist_line[i])
			
 
				+                            print("============================table line==")
			
 
				+                            print("fix_inner add point", add_point)
			
 
				+                            print(min_col_point[i][axis], line1_point[i][axis], min_col_point[i][axis], min_assist_line[i][axis])
			
 
				+                            print(abs(min_col_point[i][axis] - line1_point[i][axis]), abs(min_col_point[i][axis] - min_assist_line[i][axis])/3)
			
 
				+                            print("line1, line2", line1, min_assist_line[i])
			
 
				                             new_points.append(add_point)
			
 
				 
			
 
				         return new_points
			
@@ -1144,7 +1167,138 @@ def fix_inner(row_lines, col_lines, points, split_y):
 
				     return points+new_points
			
 
				 
			
 
				 
			
 
				-def fix_corner(row_lines, col_lines, split_y):
			
 
				+def fix_inner(row_lines, col_lines, points, split_y):
			
 
				+    def fix(fix_lines, assist_lines, split_points, axis):
			
 
				+        new_points = []
			
 
				+        for line1 in fix_lines:
			
 
				+            min_assist_line = [[], []]
			
 
				+            min_distance = [1000, 1000]
			
 
				+            if_find = [0, 0]
			
 
				+
			
 
				+            # 获取fix_line中的所有col point，里面可能不包括两个顶点，col point是交点，顶点可能不是交点
			
 
				+            fix_line_points = []
			
 
				+            for point in split_points:
			
 
				+                if abs(point[1-axis] - line1[1-axis]) <= 2:
			
 
				+                    if line1[axis] <= point[axis] <= line1[axis+2]:
			
 
				+                        fix_line_points.append(point)
			
 
				+
			
 
				+            # 找出离两个顶点最近的assist_line, 并且assist_line与fix_line不相交
			
 
				+            line1_point = [line1[:2], line1[2:]]
			
 
				+            for i in range(2):
			
 
				+                point = line1_point[i]
			
 
				+                for line2 in assist_lines:
			
 
				+                    if not if_find[i] and abs(point[axis] - line2[axis]) <= 2:
			
 
				+                        if line1[1-axis] <= point[1-axis] <= line2[1-axis+2]:
			
 
				+                            # print("line1, match line2", line1, line2)
			
 
				+                            if_find[i] = 1
			
 
				+                            break
			
 
				+                    else:
			
 
				+                        if abs(point[axis] - line2[axis]) < min_distance[i] and line2[1-axis] <= point[1-axis] <= line2[1-axis+2]:
			
 
				+                            if line1[axis] <= line2[axis] <= line1[axis+2]:
			
 
				+                                continue
			
 
				+                            min_distance[i] = abs(line1[axis] - line2[axis])
			
 
				+                            min_assist_line[i] = line2
			
 
				+
			
 
				+            # 找出离assist_line最近的交点
			
 
				+            # 顶点到交点的距离(多出来的线)需大于assist_line到交点的距离(bbox的边)的1/3
			
 
				+            min_distance = [1000, 1000]
			
 
				+            min_col_point = [[], []]
			
 
				+            for i in range(2):
			
 
				+                # print("顶点", i, line1_point[i])
			
 
				+                if min_assist_line[i]:
			
 
				+                    for point in fix_line_points:
			
 
				+                        if abs(point[axis] - min_assist_line[i][axis]) < min_distance[i]:
			
 
				+                            min_distance[i] = abs(point[axis] - min_assist_line[i][axis])
			
 
				+                            min_col_point[i] = point
			
 
				+
			
 
				+            # print("min_col_point", min_col_point)
			
 
				+            # print("min_assist_line", min_assist_line)
			
 
				+            # print("line1_point", line1_point)
			
 
				+            if min_assist_line[0] and min_assist_line[0] == min_assist_line[1]:
			
 
				+                if min_assist_line[0][axis] < line1_point[0][axis]:
			
 
				+                    bbox_len = abs(min_col_point[0][axis] - min_assist_line[0][axis])
			
 
				+                    line_distance = abs(min_col_point[0][axis] - line1_point[0][axis])
			
 
				+                    if bbox_len/3 <= line_distance <= bbox_len:
			
 
				+                        if axis == 1:
			
 
				+                            add_point = (line1_point[0][1-axis], min_assist_line[0][axis])
			
 
				+                        else:
			
 
				+                            add_point = (min_assist_line[0][axis], line1_point[0][1-axis])
			
 
				+                        new_points.append([line1, add_point])
			
 
				+                elif min_assist_line[1][axis] > line1_point[1][axis]:
			
 
				+                    bbox_len = abs(min_col_point[1][axis] - min_assist_line[1][axis])
			
 
				+                    line_distance = abs(min_col_point[1][axis] - line1_point[1][axis])
			
 
				+                    if bbox_len/3 <= line_distance <= bbox_len:
			
 
				+                        if axis == 1:
			
 
				+                            add_point = (line1_point[1][1-axis], min_assist_line[1][axis])
			
 
				+                        else:
			
 
				+                            add_point = (min_assist_line[1][axis], line1_point[1][1-axis])
			
 
				+                        new_points.append([line1, add_point])
			
 
				+            else:
			
 
				+                for i in range(2):
			
 
				+                    if min_col_point[i]:
			
 
				+                        bbox_len = abs(min_col_point[i][axis] - min_assist_line[i][axis])
			
 
				+                        line_distance = abs(min_col_point[i][axis] - line1_point[i][axis])
			
 
				+                        # print("bbox_len, line_distance", bbox_len, line_distance)
			
 
				+                        if bbox_len/3 <= line_distance <= bbox_len:
			
 
				+                            if axis == 1:
			
 
				+                                add_point = (line1_point[i][1-axis], min_assist_line[i][axis])
			
 
				+                            else:
			
 
				+                                add_point = (min_assist_line[i][axis], line1_point[i][1-axis])
			
 
				+                            # print("============================table line==")
			
 
				+                            # print("fix_inner add point", add_point)
			
 
				+                            # print(min_col_point[i][axis], line1_point[i][axis], min_col_point[i][axis], min_assist_line[i][axis])
			
 
				+                            # print(abs(min_col_point[i][axis] - line1_point[i][axis]), abs(min_col_point[i][axis] - min_assist_line[i][axis])/3)
			
 
				+                            # print("line1, line2", line1, min_assist_line[i])
			
 
				+                            # print("line1, add_point", [line1, add_point])
			
 
				+                            new_points.append([line1, add_point])
			
 
				+
			
 
				+        return new_points
			
 
				+
			
 
				+    new_points = []
			
 
				+    for i in range(1, len(split_y)):
			
 
				+        last_y = split_y[i-1]
			
 
				+        y = split_y[i]
			
 
				+
			
 
				+        # 先对点线进行分区
			
 
				+        split_row_lines = []
			
 
				+        split_col_lines = []
			
 
				+        split_points = []
			
 
				+        for row in row_lines:
			
 
				+            if last_y <= row[1] <= y:
			
 
				+                split_row_lines.append(row)
			
 
				+        for col in col_lines:
			
 
				+            if last_y <= col[1] <= y:
			
 
				+                split_col_lines.append(col)
			
 
				+        for point in points:
			
 
				+            if last_y <= point[1] <= y:
			
 
				+                split_points.append(point)
			
 
				+
			
 
				+        new_point_list = fix(split_col_lines, split_row_lines, split_points, axis=1)
			
 
				+        for line, new_point in new_point_list:
			
 
				+            if line in col_lines:
			
 
				+                index = col_lines.index(line)
			
 
				+                point1 = line[:2]
			
 
				+                point2 = line[2:]
			
 
				+                if new_point[1] >= point2[1]:
			
 
				+                    col_lines[index] = [point1[0], point1[1], new_point[0], new_point[1]]
			
 
				+                elif new_point[1] <= point1[1]:
			
 
				+                    col_lines[index] = [new_point[0], new_point[1], point2[0], point2[1]]
			
 
				+
			
 
				+        new_point_list = fix(split_row_lines, split_col_lines, split_points, axis=0)
			
 
				+        for line, new_point in new_point_list:
			
 
				+            if line in row_lines:
			
 
				+                index = row_lines.index(line)
			
 
				+                point1 = line[:2]
			
 
				+                point2 = line[2:]
			
 
				+                if new_point[0] >= point2[0]:
			
 
				+                    row_lines[index] = [point1[0], point1[1], new_point[0], new_point[1]]
			
 
				+                elif new_point[0] <= point1[0]:
			
 
				+                    row_lines[index] = [new_point[0], new_point[1], point2[0], point2[1]]
			
 
				+
			
 
				+    return row_lines, col_lines
			
 
				+
			
 
				+
			
 
				+def fix_corner(row_lines, col_lines, split_y, threshold=0):
			
 
				     new_row_lines = []
			
 
				     new_col_lines = []
			
 
				     last_y = split_y[0]
			
@@ -1155,10 +1309,11 @@ def fix_corner(row_lines, col_lines, split_y):
 
				         split_row_lines = []
			
 
				         split_col_lines = []
			
 
				         for row in row_lines:
			
 
				-            if last_y <= row[1] <= y or last_y <= row[3] <= y:
			
 
				+            if last_y-threshold <= row[1] <= y+threshold or last_y-threshold <= row[3] <= y+threshold:
			
 
				                 split_row_lines.append(row)
			
 
				         for col in col_lines:
			
 
				-            if last_y <= col[1] <= y or last_y <= col[3] <= y:
			
 
				+            # fix corner 容易因split line 漏掉线
			
 
				+            if last_y-threshold <= col[1] <= y+threshold or last_y-threshold <= col[3] <= y+threshold:
			
 
				                 split_col_lines.append(col)
			
 
				 
			
 
				         if not split_row_lines or not split_col_lines:
			
@@ -1629,11 +1784,13 @@ def fix_outline2(image, row_lines, col_lines, points, split_y):
 
				     return new_row_lines, new_col_lines, all_longer_row_lines, all_longer_col_lines
			
 
				 
			
 
				 
			
 
				-def fix_outline(image, row_lines, col_lines, points, split_y, scale=20):
			
 
				+def fix_outline(image, row_lines, col_lines, points, split_y, scale=25):
			
 
				+    logging.info("into fix_outline")
			
 
				     x_min_len = max(10, int(image.shape[0] / scale))
			
 
				     y_min_len = max(10, int(image.shape[1] / scale))
			
 
				+    # print("x_min_len", x_min_len, "y_min_len", y_min_len)
			
 
				 
			
 
				-    print("split_y", split_y)
			
 
				+    # print("split_y", split_y)
			
 
				     # 分割线纵坐标
			
 
				     if len(split_y) < 2:
			
 
				         return [], [], [], []
			
@@ -1735,13 +1892,16 @@ def fix_outline(image, row_lines, col_lines, points, split_y, scale=20):
 
				             for j in range(len(split_row_list[i])):
			
 
				                 if j + 1 > len(split_row_list[i]) - 1:
			
 
				                     break
			
 
				+                # print("height_dict", split_row_list[i][j], split_row_list[i][j+1])
			
 
				                 height = abs(int(split_row_list[i][j][3] - split_row_list[i][j+1][3]))
			
 
				-                if height in height_dict.keys():
			
 
				-                    height_dict[height] = height_dict[height] + 1
			
 
				-                else:
			
 
				-                    height_dict[height] = 1
			
 
				+                if height >= 10:
			
 
				+                    if height in height_dict.keys():
			
 
				+                        height_dict[height] = height_dict[height] + 1
			
 
				+                    else:
			
 
				+                        height_dict[height] = 1
			
 
				             height_list = [[x, height_dict[x]] for x in height_dict.keys()]
			
 
				             height_list.sort(key=lambda x: (x[1], -x[0]), reverse=True)
			
 
				+            # print("box_height", height_list)
			
 
				             box_height = height_list[0][0]
			
 
				         else:
			
 
				             box_height = y_min_len
			
@@ -1750,11 +1910,20 @@ def fix_outline(image, row_lines, col_lines, points, split_y, scale=20):
 
				             box_width = abs(split_col_list[i][1][2] - split_col_list[i][0][2])
			
 
				         else:
			
 
				             box_width = x_min_len
			
 
				-        print("box_height", box_height, "box_width", box_width)
			
 
				+        # print("box_height", box_height, "box_width", box_width)
			
 
				+
			
 
				+        # 设置轮廓线需超出阈值
			
 
				+        if box_height >= 2*y_min_len:
			
 
				+            fix_h_len = y_min_len
			
 
				+        else:
			
 
				+            fix_h_len = box_height * 2/3
			
 
				+        if box_width >= 2*x_min_len:
			
 
				+            fix_w_len = x_min_len
			
 
				+        else:
			
 
				+            fix_w_len = box_width * 2/3
			
 
				 
			
 
				         # 补左右两条竖线超出来的线的row
			
 
				-        if (up_line[1] - left_line[1] >= y_min_len and up_line[1] - right_line[1] >= y_min_len) or \
			
 
				-                (up_line[1] - left_line[1] >= y_min_len and up_line[1] - right_line[1] >= y_min_len):
			
 
				+        if up_line[1] - left_line[1] >= fix_h_len and up_line[1] - right_line[1] >= fix_h_len:
			
 
				 
			
 
				             if up_line[1] - left_line[1] >= up_line[1] - right_line[1]:
			
 
				                 new_row_lines.append([left_line[0], left_line[1], right_line[0], left_line[1]])
			
@@ -1781,8 +1950,7 @@ def fix_outline(image, row_lines, col_lines, points, split_y, scale=20):
 
				                     if abs(new_col_y - col[1]) <= box_height:
			
 
				                         split_col_list[i][j][1] = min([new_col_y, col[1]])
			
 
				 
			
 
				-        if (left_line[3] - bottom_line[3] >= y_min_len and right_line[3] - bottom_line[3] >= y_min_len) or \
			
 
				-                (left_line[3] - bottom_line[3] >= y_min_len and right_line[3] - bottom_line[3] >= y_min_len):
			
 
				+        if left_line[3] - bottom_line[3] >= fix_h_len and right_line[3] - bottom_line[3] >= fix_h_len:
			
 
				 
			
 
				             if left_line[3] - bottom_line[3] >= right_line[3] - bottom_line[3]:
			
 
				                 new_row_lines.append([left_line[2], left_line[3], right_line[2], left_line[3]])
			
@@ -1805,8 +1973,7 @@ def fix_outline(image, row_lines, col_lines, points, split_y, scale=20):
 
				                         split_col_list[i][j][3] = max([new_col_y, col[3]])
			
 
				 
			
 
				         # 补上下两条横线超出来的线的col
			
 
				-        if (left_line[0] - up_line[0] >= x_min_len and left_line[0] - bottom_line[0] >= x_min_len) or \
			
 
				-                (left_line[0] - up_line[0] >= x_min_len and left_line[0] - bottom_line[0] >= x_min_len):
			
 
				+        if left_line[0] - up_line[0] >= fix_w_len and left_line[0] - bottom_line[0] >= fix_w_len:
			
 
				             if left_line[0] - up_line[0] >= left_line[0] - bottom_line[0]:
			
 
				                 new_col_lines.append([up_line[0], up_line[1], up_line[0], bottom_line[1]])
			
 
				                 new_row_x = up_line[0]
			
@@ -1826,8 +1993,7 @@ def fix_outline(image, row_lines, col_lines, points, split_y, scale=20):
 
				                     if abs(new_row_x - row[0]) <= box_width:
			
 
				                         split_row_list[i][j][0] = min([new_row_x, row[0]])
			
 
				 
			
 
				-        if (up_line[2] - right_line[2] >= x_min_len and bottom_line[2] - right_line[2] >= x_min_len) or \
			
 
				-                (up_line[2] - right_line[2] >= x_min_len and bottom_line[2] - right_line[2] >= x_min_len):
			
 
				+        if up_line[2] - right_line[2] >= fix_w_len and bottom_line[2] - right_line[2] >= fix_w_len:
			
 
				             if up_line[2] - right_line[2] >= bottom_line[2] - right_line[2]:
			
 
				                 new_col_lines.append([up_line[2], up_line[3], up_line[2], bottom_line[3]])
			
 
				                 new_row_x = up_line[2]
			
--- a/package_2022_03_22/convert_otr.zip
+++ b/package_2022_03_22/convert_otr.zip
--- a/package_2022_04_11/convert_format_convert.zip
+++ b/package_2022_04_11/convert_format_convert.zip
--- a/package_2022_04_11/convert_ocr.zip
+++ b/package_2022_04_11/convert_ocr.zip
--- a/package_2022_04_11/convert_otr.zip
+++ b/package_2022_04_11/convert_otr.zip
--- a/result.html
+++ b/result.html
@@ -1,62 +1 @@
 
				-<!DOCTYPE HTML><head><meta charset="UTF-8"></head><body><div>评标结果</div>
			
 
				-<div>项目名称：S206（14省道）临安段改建工程（一期）交、竣工质量检测评标日期：2021年2月3日</div>
			
 
				-<table border="1">
			
 
				-<tr>
			
 
				-<td colspan=1 rowspan=1>标段</td>
			
 
				-<td colspan=1 rowspan=1>推荐意见</td>
			
 
				-<td colspan=1 rowspan=1>投标人名称</td>
			
 
				-<td colspan=1 rowspan=1>评标价（元）</td>
			
 
				-<td colspan=1 rowspan=1>项目负责人</td>
			
 
				-<td colspan=1 rowspan=1>项目负责人</td>
			
 
				-<td colspan=1 rowspan=1>项目负责人</td>
			
 
				-<td colspan=1 rowspan=1>名次</td>
			
 
				-<td colspan=1 rowspan=1>综合得分</td>
			
 
				-</tr>
			
 
				-<tr>
			
 
				-<td colspan=1 rowspan=1>标段</td>
			
 
				-<td colspan=1 rowspan=1>推荐意见</td>
			
 
				-<td colspan=1 rowspan=1>投标人名称</td>
			
 
				-<td colspan=1 rowspan=1>评标价（元）</td>
			
 
				-<td colspan=1 rowspan=1>姓名</td>
			
 
				-<td colspan=1 rowspan=1>职称</td>
			
 
				-<td colspan=1 rowspan=1>证书编号</td>
			
 
				-<td colspan=1 rowspan=1>名次</td>
			
 
				-<td colspan=1 rowspan=1>综合得分</td>
			
 
				-</tr>
			
 
				-<tr>
			
 
				-<td colspan=1 rowspan=1>第JC01标段</td>
			
 
				-<td colspan=1 rowspan=1>推荐为中标候选人</td>
			
 
				-<td colspan=1 rowspan=1>浙江爱丽智能检测技术集团有限公司</td>
			
 
				-<td colspan=1 rowspan=1>1124934.22</td>
			
 
				-<td colspan=1 rowspan=1>赵亮明</td>
			
 
				-<td colspan=1 rowspan=1>高级工程师</td>
			
 
				-<td colspan=1 rowspan=1>师1029561CG（公路）检</td>
			
 
				-<td colspan=1 rowspan=1>1</td>
			
 
				-<td colspan=1 rowspan=1>98.57</td>
			
 
				-</tr>
			
 
				-</table>
			
 
				-<div>中标候选人相关业绩</div>
			
 
				-<table border="1">
			
 
				-<tr>
			
 
				-<td colspan=1 rowspan=1>该业绩证明对象</td>
			
 
				-<td colspan=1 rowspan=1>项目名称</td>
			
 
				-<td colspan=1 rowspan=1>项目建设单位</td>
			
 
				-<td colspan=1 rowspan=1>与评审相关指标</td>
			
 
				-<td colspan=1 rowspan=1>证明材料</td>
			
 
				-</tr>
			
 
				-<tr>
			
 
				-<td colspan=1 rowspan=1>浙江爱丽智能检测技术集团有限公司</td>
			
 
				-<td colspan=1 rowspan=1>桐庐县疏港公路综合码头至深奥段工程（320国道复线）第一合同段交（竣）工检测</td>
			
 
				-<td colspan=1 rowspan=1>桐庐县交通建设有限公司</td>
			
 
				-<td colspan=1 rowspan=1>2020年10月、一级公路、检测内容包含路基、路面、桥梁等</td>
			
 
				-<td colspan=1 rowspan=1>合同协议书、业绩正明</td>
			
 
				-</tr>
			
 
				-<tr>
			
 
				-<td colspan=1 rowspan=1>赵亮明</td>
			
 
				-<td colspan=1 rowspan=1>淳安县汾口镇汾口大道交（竣）工质量评定检测</td>
			
 
				-<td colspan=1 rowspan=1>淳安县汾口镇人民政府</td>
			
 
				-<td colspan=1 rowspan=1>2020年6月、一级公路、检测内容包括路基、路面、桥梁等</td>
			
 
				-<td colspan=1 rowspan=1>合同协议书、业绩正明</td>
			
 
				-</tr>
			
 
				-</table>
			
 
				-</body>
			
 
				+<!DOCTYPE HTML><head><meta charset="UTF-8"></head><body></body>