před 2 roky · 1940d1af19
--- a/.gitignore
+++ b/.gitignore
@@ -27,3 +27,4 @@
 
				 /package_2022_03_22/
			
 
				 /package_env/
			
 
				 /package_*
			
 
				+/html_output
			
--- a/atc/atc_interface.py
+++ b/atc/atc_interface.py
@@ -6,9 +6,8 @@ import time
 
				 import traceback
			
 
				 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
			
 
				 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 import tensorflow as tf
			
 
				-MAX_COMPUTE = max_compute
			
 
				 
			
 
				 if not MAX_COMPUTE:
			
 
				     # tensorflow 内存设置
			
--- a/botr/yolov8/yolo_interface.py
+++ b/botr/yolov8/yolo_interface.py
@@ -7,8 +7,7 @@ import torch
 
				 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../../")
			
 
				 from botr.yolov8.model import Predictor
			
 
				 from botr.yolov8.predict import detect
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				-MAX_COMPUTE = max_compute
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 import time
			
 
				 import cv2
			
 
				 from flask import Flask, request
			
--- a/format_convert/interface.yml
+++ b/format_convert/interface.yml
--- a/config/interface_list.py
+++ b/config/interface_list.py
@@ -0,0 +1,2 @@
 
				+# 所有接口
			
 
				+INTERFACES = ["convert", "ocr", "otr", "idc", "isr", "atc", 'yolo', "office", 'tika']
			
--- a/format_convert/interface_new.yml
+++ b/format_convert/interface_new.yml
@@ -54,6 +54,12 @@
 
				       "port": [ 16000 ],
			
 
				       "port_num": [ 25 ],
			
 
				       "gpu": []
			
 
				+    },
			
 
				+
			
 
				+    "tika": {
			
 
				+      "port": [ 16020 ],
			
 
				+      "port_num": [ 2 ],
			
 
				+      "gpu": [ -1 ]
			
 
				     }
			
 
				   },
			
 
				 
			
--- a/config/interface_new_19022.yml
+++ b/config/interface_new_19022.yml
@@ -0,0 +1,123 @@
 
				+{
			
 
				+  "MASTER": {
			
 
				+    "ip": "http://192.168.0.115",
			
 
				+
			
 
				+    "path": {
			
 
				+      "python": "/data/anaconda3/envs/convert4/bin/python",
			
 
				+      "gunicorn": "/data/anaconda3/envs/convert4/bin/gunicorn",
			
 
				+      "project": "/data/fangjiasheng/format_conversion_maxcompute/"
			
 
				+    },
			
 
				+
			
 
				+    "convert": {
			
 
				+      "port": [15010],
			
 
				+      "port_num": [30],
			
 
				+      "gpu": [-1]
			
 
				+    },
			
 
				+
			
 
				+    "ocr": {
			
 
				+      "port": [17000, 17001],
			
 
				+      "port_num": [4, 1],
			
 
				+      "gpu": [0, 1]
			
 
				+    },
			
 
				+
			
 
				+    "otr": {
			
 
				+      "port": [ 18000, 18001 ],
			
 
				+      "port_num": [ 0, 2 ],
			
 
				+      "gpu": [ 0, 1 ]
			
 
				+    },
			
 
				+
			
 
				+    "idc": {
			
 
				+      "port": [ 18020 ],
			
 
				+      "port_num": [ 1 ],
			
 
				+      "gpu": [ 1 ]
			
 
				+    },
			
 
				+
			
 
				+    "isr": {
			
 
				+      "port": [ 18040, 18041 ],
			
 
				+      "port_num": [ 2, 2 ],
			
 
				+      "gpu": [ 0, 1 ]
			
 
				+    },
			
 
				+
			
 
				+    "atc": {
			
 
				+      "port": [ 18060, 18061 ],
			
 
				+      "port_num": [ 1, 2 ],
			
 
				+      "gpu": [ 0, 1 ]
			
 
				+    },
			
 
				+
			
 
				+    "yolo": {
			
 
				+      "port": [ 18080, 18081 ],
			
 
				+      "port_num": [ 1, 1 ],
			
 
				+      "gpu": [ 0, 1 ]
			
 
				+    },
			
 
				+
			
 
				+    "office": {
			
 
				+      "port": [ 16000 ],
			
 
				+      "port_num": [ 20 ],
			
 
				+      "gpu": []
			
 
				+    },
			
 
				+
			
 
				+    "tika": {
			
 
				+      "port": [ 16020 ],
			
 
				+      "port_num": [ 5 ],
			
 
				+      "gpu": [ -1 ]
			
 
				+    }
			
 
				+  },
			
 
				+
			
 
				+  "SLAVE": {
			
 
				+    "ip": "http://192.168.0.114",
			
 
				+
			
 
				+    "path": {
			
 
				+      "python": "/data/anaconda3/envs/convert4/bin/python",
			
 
				+      "gunicorn": "/data/anaconda3/envs/convert4/bin/gunicorn",
			
 
				+      "project": "/data/fangjiasheng/format_conversion_maxcompute/"
			
 
				+    },
			
 
				+
			
 
				+    "convert": {
			
 
				+      "port": [],
			
 
				+      "port_num": [],
			
 
				+      "gpu": []
			
 
				+    },
			
 
				+
			
 
				+    "ocr": {
			
 
				+      "port": [ 17000, 17001 ],
			
 
				+      "port_num": [ 4, 1 ],
			
 
				+      "gpu": [ 0, 1 ]
			
 
				+    },
			
 
				+
			
 
				+    "otr": {
			
 
				+      "port": [ 18000, 18001 ],
			
 
				+      "port_num": [ 2, 1 ],
			
 
				+      "gpu": [ 0, 1 ]
			
 
				+    },
			
 
				+
			
 
				+    "idc": {
			
 
				+      "port": [],
			
 
				+      "port_num": [],
			
 
				+      "gpu": []
			
 
				+    },
			
 
				+
			
 
				+    "isr": {
			
 
				+      "port": [],
			
 
				+      "port_num": [],
			
 
				+      "gpu": []
			
 
				+    },
			
 
				+
			
 
				+    "atc": {
			
 
				+      "port": [],
			
 
				+      "port_num": [],
			
 
				+      "gpu": []
			
 
				+    },
			
 
				+
			
 
				+    "yolo": {
			
 
				+      "port": [],
			
 
				+      "port_num": [],
			
 
				+      "gpu": []
			
 
				+    },
			
 
				+
			
 
				+    "office": {
			
 
				+      "port": [],
			
 
				+      "port_num": [],
			
 
				+      "gpu": []
			
 
				+    }
			
 
				+  }
			
 
				+}
			
--- a/config/max_compute_config.py
+++ b/config/max_compute_config.py
@@ -0,0 +1 @@
 
				+MAX_COMPUTE = False
			
--- a/format_convert/convert.py
+++ b/format_convert/convert.py
@@ -37,10 +37,7 @@ logging.getLogger("pdfminer").setLevel(logging.WARNING)
 
				 from format_convert.table_correct import *
			
 
				 from format_convert.wrapt_timeout_decorator import *
			
 
				 from format_convert import _global
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				-
			
 
				-
			
 
				-MAX_COMPUTE = max_compute
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 
			
 
				 
			
 
				 if get_platform() == "Windows":
			
--- a/format_convert/convert_doc.py
+++ b/format_convert/convert_doc.py
@@ -11,7 +11,7 @@ import logging
 
				 import traceback
			
 
				 from format_convert import get_memory_info
			
 
				 from format_convert.convert_docx import docx2text, DocxConvert
			
 
				-from format_convert.convert_need_interface import from_office_interface
			
 
				+from format_convert.convert_need_interface import from_office_interface, from_tika_interface
			
 
				 from format_convert.utils import judge_error_code, get_logger, log
			
 
				 
			
 
				 
			
@@ -37,6 +37,7 @@ class DocConvert:
 
				         self._doc = _Document(path)
			
 
				         self.path = path
			
 
				         self.unique_type_dir = unique_type_dir
			
 
				+        self.tika_html = None
			
 
				 
			
 
				     def convert(self):
			
 
				         # 先判断特殊doc文件，可能是html文本
			
@@ -66,7 +67,11 @@ class DocConvert:
 
				             # 调用office格式转换
			
 
				             file_path = from_office_interface(self.path, self.unique_type_dir, 'docx')
			
 
				             if judge_error_code(file_path):
			
 
				-                self._doc.error_code = file_path
			
 
				+                # 调用tika提取
			
 
				+                html = from_tika_interface(self.path)
			
 
				+                if judge_error_code(html):
			
 
				+                    self._doc.error_code = html
			
 
				+                self.tika_html = html
			
 
				                 return
			
 
				             _docx = DocxConvert(file_path, self.unique_type_dir)
			
 
				             _docx.convert()
			
@@ -80,10 +85,74 @@ class DocConvert:
 
				             self._doc.error_code = [-1]
			
 
				         if self._doc.error_code is not None:
			
 
				             return self._doc.error_code
			
 
				+        if self.tika_html is not None:
			
 
				+            return [self.tika_html]
			
 
				         # print(self._doc.children)
			
 
				         return self._doc.get_html()
			
 
				 
			
 
				 
			
 
				+def parse_summary_info(data):
			
 
				+    # 解析 OLE 属性集格式
			
 
				+    import olefile
			
 
				+    from olefile import OleFileIO, OleMetadata
			
 
				+    from io import BytesIO
			
 
				+
			
 
				+    ole_metadata = OleMetadata()
			
 
				+    for prop in ole_metadata.parse_properties(data):
			
 
				+        print(f"{prop}: {ole_metadata.properties[prop]}")
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				-    c = DocConvert("C:/Users/Administrator/Downloads/-4274446916340743056.doc", "C:/Users/Administrator/Downloads/1")
			
 
				-    print(c.get_html())
			
 
				+    # c = DocConvert("C:/Users/Administrator/Downloads/-4274446916340743056.doc", "C:/Users/Administrator/Downloads/1")
			
 
				+    # print(c.get_html())
			
 
				+
			
 
				+    _p = "C:/Users/Administrator/Downloads/1716253106319.doc"
			
 
				+
			
 
				+
			
 
				+
			
 
				+    # with open(_p, 'rb') as f:
			
 
				+    #     _str = f.read()
			
 
				+    # print(_str.decode("utf-16le"))
			
 
				+
			
 
				+    # import olefile
			
 
				+    # import chardet
			
 
				+    # # 打开 CFBF 格式文件
			
 
				+    # ole = olefile.OleFileIO(_p)
			
 
				+    #
			
 
				+    # ole_meta = ole.get_metadata()
			
 
				+    #
			
 
				+    # for attr in dir(ole_meta):
			
 
				+    #     if '__' in attr:
			
 
				+    #         continue
			
 
				+    #
			
 
				+    #     print(attr, getattr(ole_meta, attr))
			
 
				+    #
			
 
				+    # # 获取根目录流
			
 
				+    # root_stream = ole.root
			
 
				+    #
			
 
				+    # parse_summary_info(ole)
			
 
				+    #
			
 
				+    # # 获取根目录流中的目录项
			
 
				+    # for files in ole.listdir():
			
 
				+    #     for entry in files:
			
 
				+    #         print(entry)
			
 
				+    #         _stream = ole.openstream(entry).read()
			
 
				+    #
			
 
				+    #         encoding = chardet.detect(_stream).get('encoding')
			
 
				+    #         print(chardet.detect(_stream))
			
 
				+    #         print(len(_stream) / 4)
			
 
				+            # print(parse_summary_info(_stream))
			
 
				+            # if not encoding:
			
 
				+            #     encoding = "utf-16-le"
			
 
				+            # elif encoding in ['X-ISO-10646-UCS-4-3412']:
			
 
				+            #     encoding = 'ISO-10646'
			
 
				+            # print(_stream.decode(encoding))
			
 
				+            # if encoding in ['ascii']:
			
 
				+            #     print(_stream.decode('ascii'))
			
 
				+
			
 
				+            # 输出目录项的名称和大小
			
 
				+            # print(f"名称：{entry.name}, 大小：{entry.stg_size} 字节")
			
 
				+
			
 
				+        # 如果是流，读取其内容
			
 
				+        # if entry.is_stream():
			
 
				+        #     data = root_stream.openstream(entry.name).read()
			
--- a/format_convert/convert_docx.py
+++ b/format_convert/convert_docx.py
@@ -129,7 +129,8 @@ def read_p_text(unique_type_dir, p_node, _last_node_level, _num_pr_dict, numberi
 
				                     node_level = int(node_level[0].getAttribute("w:val"))
			
 
				                     # print('group_id', group_id, 'node_level', node_level, 'last_node_level', _last_node_level)
			
 
				                     if group_id in _num_pr_dict.keys():
			
 
				-                        if node_level == 0 and node_level not in _num_pr_dict[group_id].keys():
			
 
				+                        # if node_level == 0 and node_level not in _num_pr_dict[group_id].keys():
			
 
				+                        if node_level == 0 and _num_pr_dict.get(group_id) and node_level not in _num_pr_dict.get(group_id).keys():
			
 
				                             _num_pr_dict[group_id][node_level] = 1
			
 
				                         if _last_node_level != 0 and node_level < _last_node_level:
			
 
				                             # print('重置', 'group_id', group_id, 'last_node_level', last_node_level)
			
@@ -141,7 +142,8 @@ def read_p_text(unique_type_dir, p_node, _last_node_level, _num_pr_dict, numberi
 
				                             else:
			
 
				                                 pass
			
 
				                                 # print('group_id, node_level', group_id, node_level)
			
 
				-                        elif node_level in _num_pr_dict[group_id].keys():
			
 
				+                        # elif node_level in _num_pr_dict[group_id].keys():
			
 
				+                        elif node_level in _num_pr_dict.get(group_id).keys():
			
 
				                             _num_pr_dict[group_id][node_level] += 1
			
 
				                         else:
			
 
				                             _num_pr_dict[group_id][node_level] = 1
			
@@ -150,15 +152,17 @@ def read_p_text(unique_type_dir, p_node, _last_node_level, _num_pr_dict, numberi
 
				                     # print(num_pr_dict[group_id])
			
 
				                     for level in range(node_level+1):
			
 
				                         # 当前level下有多少个node
			
 
				-                        if level not in _num_pr_dict[group_id]:
			
 
				-                            if level not in id_level_start_dict[group_id]:
			
 
				+                        # if level not in _num_pr_dict[group_id]:
			
 
				+                        if level not in _num_pr_dict.get(group_id):
			
 
				+                            # if level not in id_level_start_dict[group_id]:
			
 
				+                            if not id_level_start_dict.get(group_id) or level not in id_level_start_dict.get(group_id):
			
 
				                                 continue
			
 
				                             else:
			
 
				                                 level_node_cnt = id_level_start_dict[group_id][level]
			
 
				                         else:
			
 
				                             level_node_cnt = _num_pr_dict[group_id][level]
			
 
				 
			
 
				-                        if id_level_start_dict.get(group_id) and id_level_start_dict.get(group_id).get(level) and _num_pr_dict.get(group_id).get(level):
			
 
				+                        if id_level_start_dict.get(group_id) and _num_pr_dict.get(group_id) and id_level_start_dict.get(group_id).get(level) and _num_pr_dict.get(group_id).get(level):
			
 
				                             start_no = id_level_start_dict.get(group_id).get(level)
			
 
				                             level_node_cnt += start_no - 1
			
 
				 
			
@@ -316,6 +320,7 @@ def read_xml_table(unique_type_dir, document_xml, numbering_xml, document_xml_re
 
				         # 直接子节点用child表示，所有子节点用all表示
			
 
				         for table_child in table.childNodes:
			
 
				             if 'w:tr' in str(table_child):
			
 
				+                table_text += "<tr>"
			
 
				                 tr = table_child
			
 
				                 tr_child_nodes = tr.childNodes
			
 
				                 tc_index = 0
			
--- a/format_convert/convert_image.py
+++ b/format_convert/convert_image.py
@@ -560,7 +560,7 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False,
 
				                     return text_list
			
 
				 
			
 
				                 # 判断ocr识别是否正确
			
 
				-                print('ocr_cant_read(text_list, box_list)', ocr_cant_read(text_list, box_list), idc_flag)
			
 
				+                # print('ocr_cant_read(text_list, box_list)', ocr_cant_read(text_list, box_list), idc_flag, text_list)
			
 
				                 if ocr_cant_read(text_list, box_list) and not idc_flag:
			
 
				                     # 方向分类
			
 
				                     image_np, angle = idc_process(image_np, return_angle=True)
			
@@ -568,9 +568,10 @@ def image_process(image_np, image_path, is_from_pdf=False, is_from_docx=False,
 
				                         return image_np
			
 
				                     # 如果角度不变，旋转180
			
 
				                     if angle in [0, 360]:
			
 
				-                        print('ocr_cant_read image_rotate 180')
			
 
				+                        pass
			
 
				+                        # log('ocr_cant_read image_rotate 180')
			
 
				                         # image_np = image_rotate(image_np, angle=180)
			
 
				-                        reverse_flag = 1
			
 
				+                        # reverse_flag = 1
			
 
				                         # image_pil = Image.fromarray(image_np)
			
 
				                         # image_np = np.array(image_pil.rotate(180, expand=1))
			
 
				                     # cv2.imshow("idc_process", image_np)
			
--- a/format_convert/convert_need_interface.py
+++ b/format_convert/convert_need_interface.py
@@ -11,6 +11,9 @@ import uuid
 
				 import cv2
			
 
				 import torch
			
 
				 from werkzeug.exceptions import NotFound
			
 
				+
			
 
				+from tika_.tika_interface import tika_interface
			
 
				+
			
 
				 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 from botr.yolov8.yolo_interface import yolo
			
 
				 from botr.yolov8.model import Predictor
			
@@ -26,10 +29,8 @@ from ocr.ocr_interface import ocr, OcrModels
 
				 from otr.otr_interface import otr, OtrModels
			
 
				 from format_convert.libreoffice_interface import office_convert
			
 
				 import numpy as np
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				-
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 
			
 
				-MAX_COMPUTE = max_compute
			
 
				 
			
 
				 if get_platform() == "Windows":
			
 
				     FROM_REMOTE = False
			
@@ -62,7 +63,7 @@ lock = multiprocessing.RLock()
 
				 redis_db = None
			
 
				 
			
 
				 
			
 
				-def from_office_interface(src_path, dest_path, target_format, retry_times=1, from_remote=FROM_REMOTE):
			
 
				+def from_office_interface_240606(src_path, dest_path, target_format, retry_times=1, from_remote=FROM_REMOTE):
			
 
				     try:
			
 
				         # Win10跳出超时装饰器
			
 
				         # if get_platform() == "Windows":
			
@@ -102,7 +103,7 @@ def from_office_interface(src_path, dest_path, target_format, retry_times=1, fro
 
				                                                    "file": base64_stream,
			
 
				                                                    "target_format": target_format,
			
 
				                                                    "retry_times": retry_times}, time_out=25))
			
 
				-                log("get interface return")
			
 
				+                log("get office_interface return")
			
 
				                 log("office use time " + str(time.time()-start_time))
			
 
				                 if type(r) == list:
			
 
				                     # 接口连不上换个端口重试
			
@@ -142,6 +143,111 @@ def from_office_interface(src_path, dest_path, target_format, retry_times=1, fro
 
				         return [-1]
			
 
				 
			
 
				 
			
 
				+def from_office_interface(src_path, dest_path, target_format, retry_times=1, from_remote=FROM_REMOTE):
			
 
				+    try:
			
 
				+        if from_remote:
			
 
				+            # 重试
			
 
				+            while retry_times >= 0:
			
 
				+                ip_port = interface_pool_gunicorn("office")
			
 
				+                if judge_error_code(ip_port):
			
 
				+                    return ip_port
			
 
				+                _url = ip_port + "/soffice"
			
 
				+
			
 
				+                with open(src_path, "rb") as f:
			
 
				+                    file_bytes = f.read()
			
 
				+                base64_stream = base64.b64encode(file_bytes)
			
 
				+                start_time = time.time()
			
 
				+                log('office _url ' + str(_url))
			
 
				+                r = json.loads(request_post(_url, {"src_path": src_path,
			
 
				+                                                   "dest_path": dest_path,
			
 
				+                                                   "file": base64_stream,
			
 
				+                                                   "target_format": target_format,
			
 
				+                                                   "retry_times": retry_times}, time_out=25))
			
 
				+                log("get office_interface return, use time " + str(time.time()-start_time))
			
 
				+
			
 
				+                # 报错信息
			
 
				+                if type(r) == list:
			
 
				+                    file_path = r
			
 
				+                    # 拒绝连接，换个端口
			
 
				+                    if r == [-22]:
			
 
				+                        log("retry post office_interface... left times " + str(retry_times))
			
 
				+                        retry_times -= 1
			
 
				+                        continue
			
 
				+                    else:
			
 
				+                        return r
			
 
				+
			
 
				+                file_str = r.get("data")
			
 
				+                if judge_error_code(file_str):
			
 
				+                    return file_str
			
 
				+                uid1 = src_path.split(os.sep)[-1].split(".")[0]
			
 
				+                file_path = dest_path + uid1 + "." + target_format
			
 
				+                file_bytes = eval(file_str)
			
 
				+                if not os.path.exists(os.path.dirname(file_path)):
			
 
				+                    os.makedirs(os.path.dirname(file_path), mode=0o777)
			
 
				+                with open(file_path, "wb") as f:
			
 
				+                    f.write(file_bytes)
			
 
				+                break
			
 
				+        else:
			
 
				+            file_path = office_convert(src_path, dest_path, target_format, retry_times)
			
 
				+
			
 
				+        if judge_error_code(file_path):
			
 
				+            return file_path
			
 
				+        return file_path
			
 
				+    except TimeoutError:
			
 
				+        log("from_office_interface timeout error!")
			
 
				+        return [-5]
			
 
				+    except:
			
 
				+        log("from_office_interface error!")
			
 
				+        traceback.print_exc()
			
 
				+        return [-1]
			
 
				+
			
 
				+
			
 
				+def from_tika_interface(src_path, from_remote=FROM_REMOTE):
			
 
				+    log("into from_tika_interface")
			
 
				+    start_time = time.time()
			
 
				+    try:
			
 
				+        # 调用接口
			
 
				+        try:
			
 
				+            if from_remote:
			
 
				+                retry_times_1 = 2
			
 
				+                # 重试
			
 
				+                while retry_times_1:
			
 
				+                    ip_port = interface_pool_gunicorn("tika")
			
 
				+                    if judge_error_code(ip_port):
			
 
				+                        return ip_port
			
 
				+                    _url = ip_port + "/tika"
			
 
				+                    r = json.loads(request_post(_url, {"data": src_path,
			
 
				+                                                       "md5": _global.get("md5")},
			
 
				+                                                time_out=10))
			
 
				+                    log("get tika_interface return " + _url)
			
 
				+                    if type(r) == list:
			
 
				+                        # 接口连不上换个端口重试
			
 
				+                        if retry_times_1 <= 1:
			
 
				+                            return r
			
 
				+                        else:
			
 
				+                            retry_times_1 -= 1
			
 
				+                            log("retry post tika_interface... left times " + str(retry_times_1))
			
 
				+                            continue
			
 
				+                    if judge_error_code(r):
			
 
				+                        return r
			
 
				+                    break
			
 
				+            else:
			
 
				+                r = tika_interface(src_path)
			
 
				+        except TimeoutError:
			
 
				+            return [-5]
			
 
				+        except requests.exceptions.ConnectionError as e:
			
 
				+            return [-2]
			
 
				+
			
 
				+        _dict = r
			
 
				+        html = _dict.get("html")
			
 
				+        log("from_tika_interface cost time " + str(time.time()-start_time))
			
 
				+        return html
			
 
				+    except Exception as e:
			
 
				+        log("from_tika_interface error!")
			
 
				+        traceback.print_exc()
			
 
				+        return [-11]
			
 
				+
			
 
				+
			
 
				 def from_ocr_interface(image_stream, is_table=0, only_rec=0, from_remote=FROM_REMOTE):
			
 
				     log("into from_ocr_interface")
			
 
				     try:
			
@@ -162,7 +268,7 @@ def from_ocr_interface(image_stream, is_table=0, only_rec=0, from_remote=FROM_RE
 
				                                                        "only_rec": only_rec
			
 
				                                                        },
			
 
				                                                 time_out=60))
			
 
				-                    log("get ocr interface return")
			
 
				+                    log("get ocr_interface return")
			
 
				                     if type(r) == list:
			
 
				                         # 接口连不上换个端口重试
			
 
				                         if retry_times_1 <= 1:
			
@@ -282,7 +388,7 @@ def from_otr_interface(image_stream, is_from_pdf=False, from_remote=FROM_REMOTE)
 
				                     r = json.loads(request_post(_url, {"data": base64_stream,
			
 
				                                                        "is_from_pdf": is_from_pdf,
			
 
				                                                        "md5": _global.get("md5")}, time_out=60))
			
 
				-                    log("get interface return")
			
 
				+                    log("get otr_interface return")
			
 
				                     if type(r) == list:
			
 
				                         # 接口连不上换个端口重试
			
 
				                         if retry_times_1 <= 1:
			
@@ -340,7 +446,7 @@ def from_isr_interface(image_stream, from_remote=FROM_REMOTE):
 
				                     r = json.loads(request_post(_url, {"data": base64_stream,
			
 
				                                                        "md5": _global.get("md5")},
			
 
				                                                 time_out=60))
			
 
				-                    log("get interface return")
			
 
				+                    log("get isr_interface return")
			
 
				                     if type(r) == list:
			
 
				                         # 接口连不上换个端口重试
			
 
				                         if retry_times_1 <= 1:
			
@@ -411,7 +517,7 @@ def from_idc_interface(image_stream, from_remote=FROM_REMOTE):
 
				                     r = json.loads(request_post(_url, {"data": base64_stream,
			
 
				                                                        "md5": _global.get("md5")},
			
 
				                                                 time_out=60))
			
 
				-                    log("get interface return")
			
 
				+                    log("get idc_interface return")
			
 
				                     if type(r) == list:
			
 
				                         # 接口连不上换个端口重试
			
 
				                         if retry_times_1 <= 1:
			
@@ -462,7 +568,7 @@ def from_atc_interface(text, from_remote=FROM_REMOTE):
 
				                     r = json.loads(request_post(_url, {"data": text,
			
 
				                                                        "md5": _global.get("md5")},
			
 
				                                                 time_out=60))
			
 
				-                    log("get interface return")
			
 
				+                    log("get atc_interface return")
			
 
				                     if type(r) == list:
			
 
				                         # 接口连不上换个端口重试
			
 
				                         if retry_times_1 <= 1:
			
@@ -516,7 +622,7 @@ def from_yolo_interface(image_stream, from_remote=FROM_REMOTE):
 
				                     r = json.loads(request_post(_url, {"data": base64_stream,
			
 
				                                                        "md5": _global.get("md5")},
			
 
				                                                 time_out=60))
			
 
				-                    log("get interface return")
			
 
				+                    log("get yolo_interface return")
			
 
				                     if type(r) == list:
			
 
				                         # 接口连不上换个端口重试
			
 
				                         if retry_times_1 <= 1:
			
@@ -563,7 +669,6 @@ def interface_pool_gunicorn(interface_type):
 
				 
			
 
				     try:
			
 
				         if ip_port_dict is None or ip_port_flag_dict is None:
			
 
				-            print('_global', _global.get_dict())
			
 
				             raise NotFound
			
 
				 
			
 
				         # 负载均衡, 选取有该接口的ip
			
@@ -576,7 +681,6 @@ def interface_pool_gunicorn(interface_type):
 
				             # print('temp_port_list', temp_port_list)
			
 
				             if not temp_port_list:
			
 
				                 continue
			
 
				-
			
 
				             # 该ip下的该接口总数量(可能有多gpu接口)
			
 
				             _port_list, _port_num_list, _ = temp_port_list[0]
			
 
				             # print('_port_num_list', _port_num_list)
			
--- a/format_convert/convert_swf.py
+++ b/format_convert/convert_swf.py
@@ -9,11 +9,13 @@ import codecs
 
				 import logging
			
 
				 import re
			
 
				 import traceback
			
 
				+from PIL import Image
			
 
				 from format_convert.convert_image import picture2text
			
 
				 from format_convert.swf.export import SVGExporter
			
 
				 from format_convert.swf.movie import SWF
			
 
				 from format_convert.utils import judge_error_code, get_logger, log, memory_decorator
			
 
				 from format_convert.wrapt_timeout_decorator import timeout
			
 
				+from format_convert.yaswfp.swfparser import parsefile
			
 
				 
			
 
				 
			
 
				 @memory_decorator
			
@@ -91,7 +93,7 @@ def swf2text(path, unique_type_dir):
 
				         return [-1]
			
 
				 
			
 
				 
			
 
				-@timeout(20, timeout_exception=TimeoutError)
			
 
				+@timeout(40, timeout_exception=TimeoutError)
			
 
				 def read_swf(path):
			
 
				     with open(path, 'rb') as f:
			
 
				         swf_file = SWF(f)
			
@@ -108,16 +110,80 @@ class SwfConvert:
 
				         self.unique_type_dir = unique_type_dir
			
 
				 
			
 
				     @memory_decorator
			
 
				-    def init_package(self):
			
 
				+    def init_package(self, package_name):
			
 
				+        if package_name == 'yaswfp':
			
 
				+            try:
			
 
				+                # self.swf_str = read_swf(self.path)
			
 
				+                self.swf_parser = parsefile(self.path)
			
 
				+            except Exception as e:
			
 
				+                log("cannot open swf!")
			
 
				+                traceback.print_exc()
			
 
				+                self._doc.error_code = [-3]
			
 
				+        elif package_name == 'swf':
			
 
				+            try:
			
 
				+                self.swf_str = read_swf(self.path)
			
 
				+            except Exception as e:
			
 
				+                log("cannot open swf!")
			
 
				+                traceback.print_exc()
			
 
				+                self._doc.error_code = [-3]
			
 
				+
			
 
				+    def swf_to_images(self):
			
 
				+        log('swf_to_images yaswfp')
			
 
				+        image_no = 0
			
 
				+        image_path_prefix = self.path.split(".")[-2] + "_" + self.path.split(".")[-1]
			
 
				+        image_path_index_list = []
			
 
				         try:
			
 
				-            self.swf_str = read_swf(self.path)
			
 
				-        except Exception as e:
			
 
				-            log("cannot open swf!")
			
 
				+            for tag in self.swf_parser.tags:
			
 
				+                if not hasattr(tag, 'ImageData'):
			
 
				+                    continue
			
 
				+                byte_data = tag.ImageData
			
 
				+
			
 
				+                image_path = image_path_prefix + "_page_" + str(image_no) + ".png"
			
 
				+                with open(image_path, 'wb') as f:
			
 
				+                    f.write(byte_data)
			
 
				+
			
 
				+                image = Image.open(image_path)
			
 
				+                if image.size[0] > 1000 and image.size[1] > 1000:
			
 
				+                    image = image.resize((600, 1000), Image.BILINEAR)
			
 
				+                image.save(image_path, quality=10)
			
 
				+                image_path_index_list.append([image_path, image_no])
			
 
				+                image_no += 1
			
 
				+        except:
			
 
				+            image_path_index_list = [-18]
			
 
				             traceback.print_exc()
			
 
				-            self._doc.error_code = [-3]
			
 
				+        return image_path_index_list
			
 
				+
			
 
				+    def swf_to_images2(self):
			
 
				+        log('swf_to_images swf')
			
 
				+        # 正则匹配图片的信息位置
			
 
				+        result0 = re.finditer('<image id=(.[^>]*)', self.swf_str)
			
 
				+        image_no = 0
			
 
				+        image_path_prefix = self.path.split(".")[-2] + "_" + self.path.split(".")[-1]
			
 
				+        image_path_index_list = []
			
 
				+        for r in result0:
			
 
				+            # 截取图片信息所在位置
			
 
				+            swf_str0 = self.swf_str[r.span()[0]:r.span()[1] + 1]
			
 
				+
			
 
				+            # 正则匹配得到图片的base64编码
			
 
				+            result1 = re.search('xlink:href="data:(.[^>]*)', swf_str0)
			
 
				+            swf_str1 = swf_str0[result1.span()[0]:result1.span()[1]]
			
 
				+            reg1_prefix = 'b\''
			
 
				+            result1 = re.search(reg1_prefix + '(.[^\']*)', swf_str1)
			
 
				+            swf_str1 = swf_str1[result1.span()[0] + len(reg1_prefix):result1.span()[1]]
			
 
				+
			
 
				+            # base64_str -> base64_bytes -> no "\\" base64_bytes -> bytes -> image
			
 
				+            base64_bytes_with_double = bytes(swf_str1, "utf-8")
			
 
				+            base64_bytes = codecs.escape_decode(base64_bytes_with_double, "hex-escape")[0]
			
 
				+            image_bytes = base64.b64decode(base64_bytes)
			
 
				+            image_path = image_path_prefix + "_page_" + str(image_no) + ".png"
			
 
				+            with open(image_path, "wb") as f:
			
 
				+                f.write(image_bytes)
			
 
				+            image_path_index_list.append([image_path, image_no])
			
 
				+            image_no += 1
			
 
				+        return image_path_index_list
			
 
				 
			
 
				     @memory_decorator
			
 
				-    def convert(self):
			
 
				+    def convert_old(self):
			
 
				         self.init_package()
			
 
				         if self._doc.error_code is not None:
			
 
				             return
			
@@ -152,6 +218,31 @@ class SwfConvert:
 
				             image_no += 1
			
 
				         self._doc.add_child(self._page)
			
 
				 
			
 
				+    @memory_decorator
			
 
				+    def convert(self):
			
 
				+        self._page = _Page(None, 0)
			
 
				+
			
 
				+        self.init_package('yaswfp')
			
 
				+        if self._doc.error_code is not None:
			
 
				+            return
			
 
				+        image_path_index_list = self.swf_to_images()
			
 
				+        if judge_error_code(image_path_index_list):
			
 
				+            self._doc.error_code = image_path_index_list
			
 
				+            return
			
 
				+        if image_path_index_list:
			
 
				+            for image_path, image_no in image_path_index_list:
			
 
				+                _image = _Image(None, image_path, (0, image_no, 0, 0))
			
 
				+                self._page.add_child(_image)
			
 
				+        else:
			
 
				+            self.init_package('swf')
			
 
				+            if self._doc.error_code is not None:
			
 
				+                return
			
 
				+            image_path_index_list = self.swf_to_images2()
			
 
				+            for image_path, image_no in image_path_index_list:
			
 
				+                _image = _Image(None, image_path, (0, image_no, 0, 0))
			
 
				+                self._page.add_child(_image)
			
 
				+        self._doc.add_child(self._page)
			
 
				+
			
 
				     def get_html(self):
			
 
				         try:
			
 
				             self.convert()
			
@@ -161,3 +252,11 @@ class SwfConvert:
 
				         if self._doc.error_code is not None:
			
 
				             return self._doc.error_code
			
 
				         return self._doc.get_html()
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    start_time = time.time()
			
 
				+    p = "C:/Users/Administrator/Downloads/1716617588175.swf"
			
 
				+    obj = SwfConvert(p, 'temp/1/')
			
 
				+    obj.convert()
			
 
				+    print(time.time()-start_time)
			
--- a/format_convert/convert_test.py
+++ b/format_convert/convert_test.py
@@ -1,9 +1,11 @@
 
				 import base64
			
 
				+import concurrent.futures
			
 
				 import json
			
 
				 import os
			
 
				 import random
			
 
				 import sys
			
 
				 import time
			
 
				+import traceback
			
 
				 from glob import glob
			
 
				 
			
 
				 import requests
			
@@ -25,7 +27,13 @@ from format_convert.convert import to_html
 
				 import multiprocessing as mp
			
 
				 
			
 
				 
			
 
				-def test_one(p, page_no_range=None, from_remote=False, timeout=300, save_middle=None):
			
 
				+html_output_dir = os.path.dirname(os.path.abspath(__file__)) + "/../html_output/"
			
 
				+
			
 
				+
			
 
				+def test_one(p, page_no_range=None, timeout=300, save_middle=None, save_html=False):
			
 
				+    if type(p) == tuple:
			
 
				+        p, page_no_range, timeout, save_middle, save_html = p
			
 
				+
			
 
				     start_time = time.time()
			
 
				     with open(p, "rb") as f:
			
 
				         file_bytes = f.read()
			
@@ -35,27 +43,43 @@ def test_one(p, page_no_range=None, from_remote=False, timeout=300, save_middle=
 
				 
			
 
				     data = {"file": file_base64, "type": p.split(".")[-1], "filemd5": _md5, 'page_no': page_no_range,
			
 
				             'timeout': timeout, 'save_middle': save_middle}
			
 
				-    if from_remote:
			
 
				-        # _url = 'http://121.46.18.113:15010/convert'
			
 
				-        # _url = 'http://192.168.2.103:15010/convert'
			
 
				-        # _url = 'http://192.168.2.102:15011/convert'
			
 
				-        # _url = 'http://172.16.160.65:15010/convert'
			
 
				-        _url = 'http://127.0.0.1:15010/convert'
			
 
				+
			
 
				+    # _url = 'http://121.46.18.113:15010/convert'
			
 
				+    # _url = 'http://192.168.2.103:15010/convert'
			
 
				+    # _url = 'http://192.168.2.102:15010/convert'
			
 
				+    # _url = 'http://172.16.160.65:15010/convert'
			
 
				+    _url = 'http://127.0.0.1:15010/convert'
			
 
				+
			
 
				+    text_str = ""
			
 
				+    try:
			
 
				         result = json.loads(request_post(_url, data, time_out=timeout+20))
			
 
				-        text_str = ""
			
 
				+
			
 
				         for t in result.get("result_html"):
			
 
				             text_str += t
			
 
				         to_html(os.path.dirname(os.path.abspath(__file__)) + "/../result.html",
			
 
				                 text_str)
			
 
				-    else:
			
 
				-        print("only support remote!")
			
 
				 
			
 
				-    print(_md5)
			
 
				-    print('第', page_no_range.split(',')[0], '页到第', page_no_range.split(',')[-1], '页')
			
 
				-    print("result_text", result.get("result_text")[0][:20])
			
 
				-    print("is_success", result.get("is_success"))
			
 
				+        if save_html:
			
 
				+            new_path = html_output_dir + p.split(os.sep)[-1].split('.')[0] + '.html'
			
 
				+            if 0 < len(text_str) <= 3 and text_str[0] == '-':
			
 
				+                print(new_path, text_str)
			
 
				+            else:
			
 
				+                to_html(new_path, text_str)
			
 
				+
			
 
				+        print(_md5)
			
 
				+        print('第', page_no_range.split(',')[0], '页到第', page_no_range.split(',')[-1], '页')
			
 
				+        print("result_text", result.get("result_text")[0][:20])
			
 
				+        print("is_success", result.get("is_success"))
			
 
				+    except:
			
 
				+        traceback.print_exc()
			
 
				+        print(_md5)
			
 
				+        print("is_success", 0)
			
 
				+
			
 
				     print(time.time()-start_time)
			
 
				 
			
 
				+    return p, 1
			
 
				+
			
 
				+
			
 
				 
			
 
				 def test_path():
			
 
				     # _url = 'http://121.46.18.113:15010/convert'
			
@@ -112,23 +136,75 @@ def test_maxcompute(p, page_no_range=None):
 
				     print(time.time()-start_time)
			
 
				 
			
 
				 
			
 
				+def run_files(thread_num=20):
			
 
				+    paths = glob(r'C:\Users\Administrator\Downloads\招标文件内容提取\*')
			
 
				+
			
 
				+    temp_list = []
			
 
				+    for _path in paths:
			
 
				+        new_path = html_output_dir + _path.split(os.sep)[-1].split('.')[0] + '.html'
			
 
				+        if os.path.exists(new_path):
			
 
				+            continue
			
 
				+        temp_list.append(_path)
			
 
				+    paths = temp_list
			
 
				+
			
 
				+    print('len(paths)', len(paths))
			
 
				+
			
 
				+    with concurrent.futures.ThreadPoolExecutor(max_workers=thread_num) as executor:
			
 
				+        tasks = []
			
 
				+        for _path in paths:
			
 
				+            tasks.append((_path, '1,-1', 10000, None, True))
			
 
				+
			
 
				+        # 提交任务给线程池
			
 
				+        results = executor.map(test_one, tasks)
			
 
				+
			
 
				+        for result in results:
			
 
				+            print(result)
			
 
				+
			
 
				+
			
 
				+def test_kimi():
			
 
				+    MOONSHOT_API_KEY = 'sk-ZqQBQfVBrs1lIilWVgggYqFwGcMu5pjlCeQf2SZL1KDlg1Pj'
			
 
				+    paths = glob(html_output_dir + '*.html')
			
 
				+    for p in paths[:100]:
			
 
				+        with open(p, 'r', encoding='utf-8') as f:
			
 
				+            _str = f.read()
			
 
				+        print('len(_str)', len(_str))
			
 
				+        data = {
			
 
				+            'model': 'moonshot-v1-8k',
			
 
				+            'messages': [
			
 
				+                {
			
 
				+                    "role": "user",
			
 
				+                    "content": _str
			
 
				+                }
			
 
				+            ],
			
 
				+        }
			
 
				+        _url = 'https://api.moonshot.cn/v1/tokenizers/estimate-token-count'
			
 
				+        headers = {'Content-Type': 'application/json',
			
 
				+                   "Authorization": "Bearer " + MOONSHOT_API_KEY}
			
 
				+        result = requests.post(_url, json=data, data=None, headers=headers, timeout=100)
			
 
				+        print(result.text)
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     if get_platform() == "Windows":
			
 
				-        # file_path = "C:/Users/Administrator/Desktop/2.png"
			
 
				-        # file_path = "C:/Users/Administrator/Desktop/test_xls/error4.xls"
			
 
				-        # file_path = "C:/Users/Administrator/Desktop/test_doc/error5.doc"
			
 
				+        # file_path = "C:/Users/Administrator/Downloads/1672314827836.pdf"
			
 
				         # file_path = "D:/BIDI_DOC/比地_文档/1677829036789.pdf"
			
 
				-        # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_ODPS/1624325845476.pdf"
			
 
				-        file_path = "C:/Users/Administrator/Downloads/d871aa30916ab23c7d91d34ebd40002a.jpg"
			
 
				-        # file_path = "C:/Users/Administrator/Desktop/test_doc/error14.docx"
			
 
				-        # file_path = "C:/Users/Administrator/Desktop/test_image/error9-1.png"
			
 
				-        # file_path = "C:/Users/Administrator/Desktop/test_b_table/error1.png"
			
 
				-        # file_path = "C:/Users/Administrator/Desktop/test_pdf/直接读表格线error/error62.pdf"
			
 
				-        # file_path = "C:/save_b_table/0-0895e32470613dd7be1139eefd1342c4.png"
			
 
				+
			
 
				+        # file_path = "C:/Users/Administrator/Desktop/test_xls/error7.xls"
			
 
				+        # file_path = "C:/Users/Administrator/Desktop/test_doc/error15.doc"
			
 
				+        # file_path = "C:/Users/Administrator/Desktop/test_swf/error1.swf"
			
 
				+        # file_path = "C:/Users/Administrator/Desktop/test_rar/error1.rar"
			
 
				+        file_path = "C:/Users/Administrator/Desktop/test_image/error7.png"
			
 
				+        # file_path = "C:/Users/Administrator/Desktop/test_b_table/error13.pdf"
			
 
				+        # file_path = "C:/Users/Administrator/Desktop/test_pdf/表格连接error/error6.pdf"
			
 
				+        # file_path = "C:/Users/Administrator/Desktop/test_table_head/error2.pdf"
			
 
				     else:
			
 
				         file_path = "1660296734009.pdf"
			
 
				 
			
 
				-    test_one(file_path, page_no_range='1,-1', from_remote=True, timeout=1000, save_middle=None)
			
 
				+    test_one(file_path, page_no_range='1,-1', timeout=1000, save_middle=None)
			
 
				+
			
 
				+    # run_files()
			
 
				+
			
 
				+    # test_kimi()
			
 
				 
			
 
				     # test_path()
			
 
				 
			
@@ -153,39 +229,6 @@ if __name__ == '__main__':
 
				     index = 11
			
 
				     # test_one(file_path+test_pdf_list[index][0], page_no_range=test_pdf_list[index][1], from_remote=True)
			
 
				 
			
 
				-    # from pdfplumber.table import TableFinder
			
 
				-    # fp = open(file_path+test_pdf_list[index][0], 'rb')
			
 
				-    # parser = PDFParser(fp)
			
 
				-    # doc_pdfminer = PDFDocument(parser)
			
 
				-    # rsrcmgr = PDFResourceManager()
			
 
				-    # laparams = LAParams(line_overlap=0.01,
			
 
				-    #                     char_margin=0.3,
			
 
				-    #                     line_margin=0.01,
			
 
				-    #                     word_margin=0.01,
			
 
				-    #                     boxes_flow=0.1, )
			
 
				-    # device = PDFPageAggregator(rsrcmgr, laparams=laparams)
			
 
				-    # interpreter = PDFPageInterpreter(rsrcmgr, device)
			
 
				-    # doc_top = 0
			
 
				-    # doc_pdfplumber = PDF(fp)
			
 
				-    # pages = PDFPage.create_pages(doc_pdfminer)
			
 
				-    # from pdfplumber.page import Page as pdfPage
			
 
				-    # for page in pages:
			
 
				-    #     page_plumber = pdfPage(doc_pdfplumber, page, page_number=1, initial_doctop=doc_top)
			
 
				-    #     table_finder = TableFinder(page_plumber)
			
 
				-    #     all_width_zero = True
			
 
				-    #     for _edge in table_finder.get_edges():
			
 
				-    #         if _edge.get('linewidth') and _edge.get('linewidth') > 0:
			
 
				-    #             all_width_zero = False
			
 
				-    #             break
			
 
				-    #     lt_line_list = []
			
 
				-    #     for _edge in table_finder.get_edges():
			
 
				-    #         # print(_edge)
			
 
				-    #         if _edge.get('linewidth', 0.1) > 0 or all_width_zero:
			
 
				-    #             lt_line_list.append(LTLine(1, (float(_edge["x0"]), float(_edge["y0"])),
			
 
				-    #                                        (float(_edge["x1"]), float(_edge["y1"]))))
			
 
				-    #     _plot(lt_line_list, 'table', 1, 1)
			
 
				-
			
 
				-
			
 
				 
			
 
				     # 测试maxcompute模式
			
 
				     # _process = mp.Process(target=test_maxcompute, args=(file_path, '1,-1',))
			
--- a/format_convert/convert_xls.py
+++ b/format_convert/convert_xls.py
@@ -57,11 +57,11 @@ class XlsConvert:
 
				             self._doc.add_child(self._page)
			
 
				         else:
			
 
				             # 调用office格式转换
			
 
				-            file_path = from_office_interface(self.path, self.unique_type_dir, 'xlsx')
			
 
				-            if judge_error_code(file_path):
			
 
				-                self._doc.error_code = file_path
			
 
				-                return
			
 
				-            _xlsx = XlsxConvert(file_path, self.unique_type_dir)
			
 
				+            # file_path = from_office_interface(self.path, self.unique_type_dir, 'xlsx')
			
 
				+            # if judge_error_code(file_path):
			
 
				+            #     self._doc.error_code = file_path
			
 
				+            #     return
			
 
				+            _xlsx = XlsxConvert(self.path, self.unique_type_dir, is_xls=True)
			
 
				             _xlsx.convert()
			
 
				             self._doc = _xlsx._doc
			
 
				 
			
--- a/format_convert/convert_xlsx.py
+++ b/format_convert/convert_xlsx.py
@@ -5,7 +5,7 @@ sys.path.append(os.path.dirname(__file__) + "/../")
 
				 from format_convert.convert_tree import _Document, _Page, _Table
			
 
				 import logging
			
 
				 import traceback
			
 
				-import pandas
			
 
				+import pandas as pd
			
 
				 import numpy as np
			
 
				 import xlrd
			
 
				 from format_convert.utils import get_logger, log, memory_decorator
			
@@ -18,7 +18,7 @@ def xlsx2text(path, unique_type_dir):
 
				     try:
			
 
				         try:
			
 
				             # sheet_name=None, 即拿取所有sheet，存为dict
			
 
				-            df_dict = pandas.read_excel(path, header=None, keep_default_na=False, sheet_name=None)
			
 
				+            df_dict = pd.read_excel(path, header=None, keep_default_na=False, sheet_name=None)
			
 
				         except Exception as e:
			
 
				             log("xlsx format error!")
			
 
				             return [-3]
			
@@ -45,70 +45,108 @@ def xlsx2text(path, unique_type_dir):
 
				 
			
 
				 class XlsxConvert:
			
 
				 
			
 
				-    def __init__(self, path, unique_type_dir):
			
 
				+    def __init__(self, path, unique_type_dir, is_xls=False):
			
 
				         self._doc = _Document(path)
			
 
				         self.path = path
			
 
				         self.unique_type_dir = unique_type_dir
			
 
				 
			
 
				+        # xls直接用xlrd读取
			
 
				+        self.is_xls = is_xls
			
 
				+
			
 
				+        self.workbook = None
			
 
				+        self.sheet_list = []
			
 
				+
			
 
				+        # 防止读太多列行
			
 
				+        self.col_limit = 100
			
 
				+        self.row_limit = 2000
			
 
				+
			
 
				+        # 防止sheet太多
			
 
				+        self.sheet_limit = 10
			
 
				+
			
 
				     @timeout(30, timeout_exception=TimeoutError, use_signals=False)
			
 
				     def read(self):
			
 
				-        # pandas
			
 
				-        df = pandas.read_excel(self.path, header=None, keep_default_na=False, sheet_name=None)
			
 
				-        # xlrd 为了读取合并单元格
			
 
				+        # xlrd 为了读取合并单元格 或 直接读取xls
			
 
				         workbook = xlrd.open_workbook(self.path)
			
 
				-        return df, workbook
			
 
				+
			
 
				+        if not self.is_xls:
			
 
				+            # pandas
			
 
				+            # df = pd.read_excel(self.path, header=None, keep_default_na=False, sheet_name=None)
			
 
				+            df = pd.read_excel(self.path, header=None, keep_default_na=False,
			
 
				+                               sheet_name=None, usecols=[x for x in range(self.col_limit)],
			
 
				+                               nrows=self.row_limit)
			
 
				+            sheet_list = [sheet for sheet in df.values()]
			
 
				+
			
 
				+        else:
			
 
				+            # xlrd -> pandas
			
 
				+            data_list = []
			
 
				+            for sheet in workbook.sheets():
			
 
				+                data = []
			
 
				+                # 读取工作表中的内容
			
 
				+                for row_idx in range(sheet.nrows):
			
 
				+                    if row_idx >= self.row_limit:
			
 
				+                        break
			
 
				+                    row = sheet.row_values(row_idx)[:self.col_limit]
			
 
				+                    data.append(row)
			
 
				+
			
 
				+                # 将读取的数据转换为 pandas DataFrame
			
 
				+                df = pd.DataFrame(data)
			
 
				+                data_list.append(df)
			
 
				+            sheet_list = data_list
			
 
				+
			
 
				+        # 使用了定时装饰器，需直接返回结果，直接赋值对象变量无效
			
 
				+        # self.workbook = workbook
			
 
				+        # self.sheet_list = self.sheet_list[:self.sheet_limit]
			
 
				+        return workbook, sheet_list
			
 
				 
			
 
				     def init_package(self):
			
 
				         # 各个包初始化
			
 
				         try:
			
 
				-            self.df, self.workbook = self.read()
			
 
				-            self.sheet_list = [sheet for sheet in self.df.values()]
			
 
				-
			
 
				-            # 防止读太多空列空行
			
 
				-            self.col_limit = 100
			
 
				-            self.row_limit = 2000
			
 
				-            self.re_read = 0
			
 
				-            for s in self.sheet_list:
			
 
				-                if s.shape[1] > self.col_limit and s.shape[0] > self.row_limit:
			
 
				-                    self.re_read = 3
			
 
				-                    break
			
 
				-                elif s.shape[0] > self.row_limit:
			
 
				-                    self.re_read = 2
			
 
				-                    break
			
 
				-                elif s.shape[1] > self.col_limit:
			
 
				-                    self.re_read = 1
			
 
				-                    break
			
 
				+            self.workbook, self.sheet_list = self.read()
			
 
				+            # self.df, self.workbook = self.read()
			
 
				+            # self.sheet_list = [sheet for sheet in self.df.values()]
			
 
				 
			
 
				-            if self.re_read == 3:
			
 
				-                self.df = pandas.read_excel(self.path, header=None, keep_default_na=False,
			
 
				-                                            sheet_name=None, usecols=[x for x in range(self.col_limit)],
			
 
				-                                            nrows=self.row_limit)
			
 
				-            if self.re_read == 2:
			
 
				-                self.df = pandas.read_excel(self.path, header=None, keep_default_na=False,
			
 
				-                                            sheet_name=None, nrows=self.row_limit)
			
 
				-            elif self.re_read == 1:
			
 
				-                self.df = pandas.read_excel(self.path, header=None, keep_default_na=False,
			
 
				-                                            sheet_name=None, usecols=[x for x in range(self.col_limit)])
			
 
				-            if self.re_read > 0:
			
 
				-                self.sheet_list = [sheet for sheet in self.df.values()]
			
 
				+            # self.re_read = 0
			
 
				+            # for s in self.sheet_list:
			
 
				+            #     if s.shape[1] > self.col_limit and s.shape[0] > self.row_limit:
			
 
				+            #         self.re_read = 3
			
 
				+            #         break
			
 
				+            #     elif s.shape[0] > self.row_limit:
			
 
				+            #         self.re_read = 2
			
 
				+            #         break
			
 
				+            #     elif s.shape[1] > self.col_limit:
			
 
				+            #         self.re_read = 1
			
 
				+            #         break
			
 
				+
			
 
				+            # if self.re_read == 3:
			
 
				+            #     self.df = pd.read_excel(self.path, header=None, keep_default_na=False,
			
 
				+            #                                 sheet_name=None, usecols=[x for x in range(self.col_limit)],
			
 
				+            #                                 nrows=self.row_limit)
			
 
				+            # if self.re_read == 2:
			
 
				+            #     self.df = pd.read_excel(self.path, header=None, keep_default_na=False,
			
 
				+            #                                 sheet_name=None, nrows=self.row_limit)
			
 
				+            # elif self.re_read == 1:
			
 
				+            #     self.df = pd.read_excel(self.path, header=None, keep_default_na=False,
			
 
				+            #                                 sheet_name=None, usecols=[x for x in range(self.col_limit)])
			
 
				+            # if self.re_read > 0:
			
 
				+            #     self.sheet_list = [sheet for sheet in self.df.values()]
			
 
				 
			
 
				             # print(self.sheet_list[0].shape)
			
 
				         except:
			
 
				-            log("cannot open xlsx!")
			
 
				+            if self.is_xls:
			
 
				+                log("cannot open xls!")
			
 
				+            else:
			
 
				+                log("cannot open xlsx!")
			
 
				             traceback.print_exc()
			
 
				             self._doc.error_code = [-3]
			
 
				 
			
 
				     def convert(self):
			
 
				+        log('into xlsx_convert')
			
 
				         self.init_package()
			
 
				         if self._doc.error_code is not None:
			
 
				             return
			
 
				 
			
 
				         sheet_no = 0
			
 
				         for sheet in self.sheet_list:
			
 
				-            # 删除xlsx全为空的行列
			
 
				-            sheet.dropna(how='all', axis=1, inplace=True)
			
 
				-            sheet.dropna(how='all', axis=0, inplace=True)
			
 
				-
			
 
				             self._page = _Page(None, sheet_no)
			
 
				             self.convert_page(sheet, sheet_no)
			
 
				 
			
@@ -117,7 +155,7 @@ class XlsxConvert:
 
				             self._doc.add_child(self._page)
			
 
				             sheet_no += 1
			
 
				 
			
 
				-    def convert_page2(self, sheet):
			
 
				+    def convert_page_230101(self, sheet):
			
 
				         text = '<table border="1">' + "\n"
			
 
				 
			
 
				         # 剔除多余空列
			
@@ -156,7 +194,7 @@ class XlsxConvert:
 
				         _table = _Table(text, (0, 0, 0, 0), is_html=True)
			
 
				         self._page.add_child(_table)
			
 
				 
			
 
				-    def convert_page(self, sheet, sheet_no):
			
 
				+    def convert_page_2405024(self, sheet, sheet_no):
			
 
				         # 剔除多余空列
			
 
				         max_row_len = 0
			
 
				         max_col_len = 0
			
@@ -225,6 +263,76 @@ class XlsxConvert:
 
				         _table = _Table(text, (0, 0, 0, 0), is_html=True)
			
 
				         self._page.add_child(_table)
			
 
				 
			
 
				+    def convert_page(self, sheet, sheet_no):
			
 
				+        row_list = self.delete_empty_row_col(sheet)
			
 
				+
			
 
				+        # xlrd 获取合并单元格位置
			
 
				+        sheet_xlrd = self.workbook.sheet_by_index(sheet_no)
			
 
				+        merged_cell_list = sheet_xlrd.merged_cells
			
 
				+        merged_cell_list.sort(key=lambda x: (x[0], x[1], x[2], x[3]))
			
 
				+        # print("merged_cell_list", merged_cell_list)
			
 
				+
			
 
				+        # 复制填充合并单元格
			
 
				+        for row_start, row_end, col_start, col_end in merged_cell_list:
			
 
				+            if row_start >= len(row_list) or row_end > len(row_list):
			
 
				+                continue
			
 
				+            if col_start >= len(row_list[row_start]) or col_end > len(row_list[row_start]):
			
 
				+                continue
			
 
				+            copy_cell = row_list[row_start][col_start]
			
 
				+            for i in range(row_start, row_end):
			
 
				+                row = row_list[i]
			
 
				+                # 第一行补少一个，其他行需补多一个
			
 
				+                if i == row_start:
			
 
				+                    col_start_real = col_start+1
			
 
				+                else:
			
 
				+                    col_start_real = col_start
			
 
				+                for j in range(col_start_real, col_end):
			
 
				+                    if row[j] == "":
			
 
				+                        row[j] = copy_cell
			
 
				+
			
 
				+        # 拼接html表格
			
 
				+        text = '<table border="1">' + "\n"
			
 
				+        for row in row_list:
			
 
				+            text = text + "<tr>"
			
 
				+            for col in row:
			
 
				+                text = text + "<td>" + str(col) + "</td>" + "\n"
			
 
				+            text = text + "</tr>" + "\n"
			
 
				+        text = text + "</table>" + "\n"
			
 
				+
			
 
				+        _table = _Table(text, (0, 0, 0, 0), is_html=True)
			
 
				+        self._page.add_child(_table)
			
 
				+
			
 
				+    def delete_empty_row_col(self, sheet):
			
 
				+        # 删除xlsx全为空的行列
			
 
				+        sheet.dropna(how='all', axis=1, inplace=True)
			
 
				+        sheet.dropna(how='all', axis=0, inplace=True)
			
 
				+
			
 
				+        # 剔除多余空列
			
 
				+        max_row_len = 0
			
 
				+        max_col_len = 0
			
 
				+        for index, row in sheet.iterrows():
			
 
				+            col_len = 0
			
 
				+            row_empty_flag = 1
			
 
				+            for i in range(len(row)):
			
 
				+                if row[i] not in [None, "", np.nan]:
			
 
				+                    row_empty_flag = 0
			
 
				+                    col_len = i
			
 
				+            if col_len > max_col_len:
			
 
				+                max_col_len = col_len
			
 
				+            if row_empty_flag == 0:
			
 
				+                max_row_len = index
			
 
				+
			
 
				+        row_list = []
			
 
				+        for index, row in sheet.iterrows():
			
 
				+            if index > max_row_len:
			
 
				+                break
			
 
				+            row = row[:max_col_len+1]
			
 
				+            col_list = []
			
 
				+            for r in row:
			
 
				+                col_list.append(str(r))
			
 
				+            row_list.append(col_list)
			
 
				+        return row_list
			
 
				+
			
 
				     def get_html(self):
			
 
				         try:
			
 
				             self.convert()
			
--- a/format_convert/max_compute_config.py
+++ b/format_convert/max_compute_config.py
@@ -1 +0,0 @@
 
				-max_compute = False
			
--- a/format_convert/monitor_process.py
+++ b/format_convert/monitor_process.py
@@ -1,87 +0,0 @@
 
				-import logging
			
 
				-import os
			
 
				-import re
			
 
				-
			
 
				-import psutil
			
 
				-
			
 
				-
			
 
				-convert_port_list = ["15010"]
			
 
				-# ocr_port_list = ["15011", "15013", "15015"]
			
 
				-ocr_port_list = ["15011", "15013"]
			
 
				-otr_port_list = ["15012", "15014"]
			
 
				-soffice_port_list = ["16000", "16001", "16002", "16003"]
			
 
				-
			
 
				-
			
 
				-python_path = "/home/python/anaconda3/envs/convert/bin/python"
			
 
				-interface_path = "/data/fangjiasheng/format_conversion_maxcompute"
			
 
				-std_out = " >>/convert.out 2>&1 &"
			
 
				-convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
			
 
				-ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_interface.py #" + std_out
			
 
				-otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_interface.py #" + std_out
			
 
				-soffice_comm = "docker run -itd -p #:16000 soffice:v1 bash"
			
 
				-
			
 
				-
			
 
				-def get_port():
			
 
				-    net_conn = psutil.net_connections()
			
 
				-    current_port_list = []
			
 
				-    for conn in net_conn:
			
 
				-        current_port_list.append(str(conn.laddr.port))
			
 
				-    current_port_list = list(set(current_port_list))
			
 
				-    current_port_list.sort(key=lambda x: x)
			
 
				-    # print(current_port_list)
			
 
				-    return current_port_list
			
 
				-
			
 
				-
			
 
				-def restart(process_type, port):
			
 
				-    if process_type == "convert":
			
 
				-        _comm = re.sub("#", port, convert_comm)
			
 
				-    elif process_type == "ocr":
			
 
				-        _comm = re.sub("#", port, ocr_comm)
			
 
				-    elif process_type == "otr":
			
 
				-        _comm = re.sub("#", port, otr_comm)
			
 
				-    elif process_type == "soffice":
			
 
				-        _comm = re.sub("#", port, soffice_comm)
			
 
				-    else:
			
 
				-        _comm = "netstat -nltp"
			
 
				-        print("no process_type", process_type)
			
 
				-    print(_comm)
			
 
				-    # os.system("netstat -nltp")
			
 
				-    os.system(_comm)
			
 
				-
			
 
				-
			
 
				-def kill_soffice(limit_sec=12):
			
 
				-    pid_list = psutil.pids()
			
 
				-    for pid in pid_list:
			
 
				-        process = psutil.Process(pid)
			
 
				-        if re.search("soffice", process.exe()):
			
 
				-            run_time = process.cpu_times().user
			
 
				-            if run_time >= limit_sec:
			
 
				-                comm = "kill -9 " + str(pid)
			
 
				-                print("kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
			
 
				-                os.system(comm)
			
 
				-
			
 
				-
			
 
				-def monitor():
			
 
				-    current_port_list = get_port()
			
 
				-
			
 
				-    for p in convert_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("convert", p)
			
 
				-
			
 
				-    for p in ocr_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("ocr", p)
			
 
				-
			
 
				-    for p in otr_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("otr", p)
			
 
				-
			
 
				-    for p in soffice_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("soffice", p)
			
 
				-
			
 
				-    kill_soffice()
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    monitor()
			
--- a/format_convert/monitor_process2.py
+++ b/format_convert/monitor_process2.py
@@ -1,134 +0,0 @@
 
				-import logging
			
 
				-import os
			
 
				-import re
			
 
				-import time
			
 
				-
			
 
				-import psutil
			
 
				-
			
 
				-
			
 
				-convert_port_list = ["15010"]
			
 
				-# ocr_port_list = ["15011", "15013", "15015"]
			
 
				-# ocr_port_list = ["15011", "15013", "15015", "15017", "15019"]
			
 
				-# otr_port_list = ["15012", "15014", "15016", "15018", "15020"]
			
 
				-ocr_port_list = ["15011", "15013", "15015", "15017", "15019", "15021"]
			
 
				-otr_port_list = ["15012", "15014", "15016", "15018", "15020", "15022"]
			
 
				-soffice_port_list = ["16000", "16001", "16002", "16003", "16004", "16005",
			
 
				-                     "16006", "16007", "16008", "16009"]
			
 
				-
			
 
				-
			
 
				-python_path = "/root/miniconda3/bin/python"
			
 
				-interface_path = "/data/format_conversion_maxcompute"
			
 
				-std_out = " >>/convert.out 2>&1 &"
			
 
				-std_out_gpu = " >>/gpu.out 2>&1 &"
			
 
				-convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
			
 
				-ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_interface.py # 0" + std_out_gpu
			
 
				-otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_interface.py # 0" + std_out_gpu
			
 
				-soffice_comm = "docker run --init -itd --log-opt max-size=10m --log-opt max-file=3 -p #:16000 soffice:v2 bash"
			
 
				-
			
 
				-
			
 
				-def get_port():
			
 
				-    net_conn = psutil.net_connections()
			
 
				-    current_port_list = []
			
 
				-    for conn in net_conn:
			
 
				-        current_port_list.append(str(conn.laddr.port))
			
 
				-    current_port_list = list(set(current_port_list))
			
 
				-    current_port_list.sort(key=lambda x: x)
			
 
				-    # print(current_port_list)
			
 
				-    return current_port_list
			
 
				-
			
 
				-
			
 
				-def restart(process_type, port):
			
 
				-    if process_type == "convert":
			
 
				-        _comm = re.sub("#", port, convert_comm)
			
 
				-    elif process_type == "ocr":
			
 
				-        _comm = re.sub("#", port, ocr_comm)
			
 
				-    elif process_type == "otr":
			
 
				-        _comm = re.sub("#", port, otr_comm)
			
 
				-    elif process_type == "soffice":
			
 
				-        _comm = re.sub("#", port, soffice_comm)
			
 
				-    else:
			
 
				-        _comm = "netstat -nltp"
			
 
				-        print("no process_type", process_type)
			
 
				-
			
 
				-    # os.system("netstat -nltp")
			
 
				-    os.system("echo $(date +%F%n%T)")
			
 
				-    print("restart comm", _comm)
			
 
				-    os.system(_comm)
			
 
				-
			
 
				-
			
 
				-def kill_soffice(limit_sec=20):
			
 
				-    pid_list = psutil.pids()
			
 
				-    for pid in pid_list:
			
 
				-        process = psutil.Process(pid)
			
 
				-
			
 
				-        process_cmd = ''
			
 
				-        for c in process.cmdline():
			
 
				-            process_cmd += c + " "
			
 
				-        if process_cmd.strip() == "":
			
 
				-            continue
			
 
				-
			
 
				-        if process.status() == "zombie":
			
 
				-            print("zombie cmd", process_cmd)
			
 
				-
			
 
				-        if re.search("soffice", process.exe()):
			
 
				-            if process.status() == "zombie":
			
 
				-                ppid = process.ppid
			
 
				-                comm = "kill -9 " + str(ppid)
			
 
				-                print("kill defunct process ", str(ppid), str(process.exe()))
			
 
				-                os.system("echo $(date +%F%n%T)")
			
 
				-                os.system(comm)
			
 
				-
			
 
				-            start_time = process.create_time()
			
 
				-            now_time = time.time()
			
 
				-            run_time = now_time-start_time
			
 
				-            if run_time >= limit_sec:
			
 
				-                comm = "kill -9 " + str(pid)
			
 
				-                print("kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
			
 
				-                os.system("echo $(date +%F%n%T)")
			
 
				-                os.system(comm)
			
 
				-
			
 
				-
			
 
				-def kill_defunct():
			
 
				-    pid_list = psutil.pids()
			
 
				-    for pid in pid_list:
			
 
				-        process = psutil.Process(pid)
			
 
				-        if process.status() == "zombie":
			
 
				-            ppid = process.ppid
			
 
				-            process = psutil.Process(ppid)
			
 
				-            process.kill()
			
 
				-            process.send_signal(9)
			
 
				-            break
			
 
				-            # comm = "kill -9 " + str(ppid)
			
 
				-            # print("kill process ", str(ppid))
			
 
				-            # os.system("echo $(date +%F%n%T)")
			
 
				-            # os.system(comm)
			
 
				-
			
 
				-
			
 
				-def monitor():
			
 
				-    current_port_list = get_port()
			
 
				-
			
 
				-    for p in convert_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("convert", p)
			
 
				-
			
 
				-    for p in ocr_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("ocr", p)
			
 
				-
			
 
				-    for p in otr_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("otr", p)
			
 
				-
			
 
				-    for p in soffice_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("soffice", p)
			
 
				-
			
 
				-    kill_soffice()
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    for i in range(6):
			
 
				-        # os.system("echo $(date +%F%n%T)")
			
 
				-        monitor()
			
 
				-        time.sleep(10)
			
 
				-    # kill_defunct()
			
--- a/format_convert/monitor_process3.py
+++ b/format_convert/monitor_process3.py
@@ -1,104 +0,0 @@
 
				-import logging
			
 
				-import os
			
 
				-import re
			
 
				-import sys
			
 
				-import time
			
 
				-import psutil
			
 
				-sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				-from format_convert.utils import get_ip_port
			
 
				-
			
 
				-
			
 
				-# convert_port_list = ["15010"]
			
 
				-# ocr_port_list = ["15011", "15013", "15015"]
			
 
				-# ocr_port_list = ["15011", "15013", "15015", "15017", "15019"]
			
 
				-# otr_port_list = ["15012", "15014", "15016", "15018", "15020"]
			
 
				-# ocr_port_list = ["15011", "15013", "15015", "15017", "15019", "15021"]
			
 
				-# otr_port_list = ["15012", "15014", "15016", "15018", "15020", "15022"]
			
 
				-# soffice_port_list = ["16000", "16001", "16002", "16003", "16004", "16005",
			
 
				-#                      "16006", "16007", "16008", "16009"]
			
 
				-
			
 
				-convert_port_list = get_ip_port("convert")
			
 
				-ocr_port_list = get_ip_port("ocr")
			
 
				-otr_port_list = get_ip_port("otr")
			
 
				-soffice_port_list = get_ip_port("office")
			
 
				-
			
 
				-
			
 
				-python_path = "/root/miniconda3/bin/python"
			
 
				-interface_path = "/data/format_conversion_maxcompute"
			
 
				-std_out = " >>/convert.out 2>&1 &"
			
 
				-std_out_gpu = " >>/gpu.out 2>&1 &"
			
 
				-convert_comm = "nohup " + python_path + " " + interface_path + "/format_convert/convert.py #" + std_out
			
 
				-ocr_comm = "nohup " + python_path + " " + interface_path + "/ocr/ocr_interface.py # 0" + std_out + std_out_gpu
			
 
				-otr_comm = "nohup " + python_path + " " + interface_path + "/otr/otr_interface.py # 0" + std_out + std_out_gpu
			
 
				-soffice_comm = "docker run -itd -p #:16000 soffice:v1 bash"
			
 
				-
			
 
				-
			
 
				-def get_port():
			
 
				-    net_conn = psutil.net_connections()
			
 
				-    current_port_list = []
			
 
				-    for conn in net_conn:
			
 
				-        current_port_list.append(str(conn.laddr.port))
			
 
				-    current_port_list = list(set(current_port_list))
			
 
				-    current_port_list.sort(key=lambda x: x)
			
 
				-    # print(current_port_list)
			
 
				-    return current_port_list
			
 
				-
			
 
				-
			
 
				-def restart(process_type, port):
			
 
				-    if process_type == "convert":
			
 
				-        _comm = re.sub("#", port, convert_comm)
			
 
				-    elif process_type == "ocr":
			
 
				-        _comm = re.sub("#", port, ocr_comm)
			
 
				-    elif process_type == "otr":
			
 
				-        _comm = re.sub("#", port, otr_comm)
			
 
				-    elif process_type == "soffice":
			
 
				-        _comm = re.sub("#", port, soffice_comm)
			
 
				-    else:
			
 
				-        _comm = "netstat -nltp"
			
 
				-        print("no process_type", process_type)
			
 
				-    print(_comm)
			
 
				-    # os.system("netstat -nltp")
			
 
				-    os.system("echo $(date +%F%n%T)")
			
 
				-    os.system(_comm)
			
 
				-
			
 
				-
			
 
				-def kill_soffice(limit_sec=12):
			
 
				-    pid_list = psutil.pids()
			
 
				-    for pid in pid_list:
			
 
				-        process = psutil.Process(pid)
			
 
				-        if re.search("soffice", process.exe()):
			
 
				-            start_time = process.create_time()
			
 
				-            now_time = time.time()
			
 
				-            # run_time = process.cpu_times().user
			
 
				-            run_time = now_time-start_time
			
 
				-            if run_time >= limit_sec:
			
 
				-                comm = "kill -9 " + str(pid)
			
 
				-                print("kill process ", str(pid), str(process.exe()), str(run_time), ">", limit_sec)
			
 
				-                os.system("echo $(date +%F%n%T)")
			
 
				-                os.system(comm)
			
 
				-
			
 
				-
			
 
				-def monitor():
			
 
				-    current_port_list = get_port()
			
 
				-
			
 
				-    # for p in convert_port_list:
			
 
				-    #     if p not in current_port_list:
			
 
				-    #         restart("convert", p)
			
 
				-
			
 
				-    for p in ocr_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("ocr", p)
			
 
				-
			
 
				-    for p in otr_port_list:
			
 
				-        if p not in current_port_list:
			
 
				-            restart("otr", p)
			
 
				-
			
 
				-    # for p in soffice_port_list:
			
 
				-    #     if p not in current_port_list:
			
 
				-    #         restart("soffice", p)
			
 
				-    #
			
 
				-    # kill_soffice()
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    monitor()
			
--- a/format_convert/utils.py
+++ b/format_convert/utils.py
@@ -33,6 +33,7 @@ import psutil
 
				 import time
			
 
				 import numpy as np
			
 
				 from format_convert.judge_platform import get_platform
			
 
				+from config.interface_list import INTERFACES
			
 
				 
			
 
				 if get_platform() == "Linux":
			
 
				     import resource
			
@@ -40,6 +41,8 @@ import math
 
				 
			
 
				 from shapely.geometry import Polygon
			
 
				 
			
 
				+config_file_path = os.path.dirname(os.path.abspath(__file__)) + "/../config/interface_new.yml"
			
 
				+
			
 
				 
			
 
				 def has_intersection(poly1, poly2):
			
 
				     """
			
@@ -58,7 +61,8 @@ def has_intersection(poly1, poly2):
 
				     return polygon1.intersects(polygon2)
			
 
				 
			
 
				 
			
 
				-def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16]):
			
 
				+def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13,
			
 
				+                                  -14, -15, -16, -17, -18, -19, -20, -21, -22]):
			
 
				     """
			
 
				     [0] : continue
			
 
				     [-1]: 逻辑处理错误
			
@@ -77,6 +81,12 @@ def judge_error_code(_list, code=[0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -1
 
				     [-14]: 指定页码报错
			
 
				     [-15]: office转换接口未运行
			
 
				     [-16]: idc方向分类错误导致ocr读取乱码
			
 
				+    [-17]: tika接口报错
			
 
				+    [-18]: 新的swf处理报错
			
 
				+    [-19]: 动态获取端口报错
			
 
				+    [-20]: requests请求超时
			
 
				+    [-21]: requests请求返回错误状态码
			
 
				+    [-22]: requests请求拒绝连接
			
 
				     """
			
 
				     for c in code:
			
 
				         if isinstance(_list, list) and _list == [c]:
			
@@ -1526,7 +1536,7 @@ session_otr = requests.Session()
 
				 session_all = requests.Session()
			
 
				 
			
 
				 
			
 
				-def request_post(url, param, time_out=1000, use_zlib=False):
			
 
				+def request_post_240606(url, param, time_out=1000, use_zlib=False):
			
 
				     fails = 0
			
 
				     text = json.dumps([-2])
			
 
				     while True:
			
@@ -1564,6 +1574,25 @@ def request_post(url, param, time_out=1000, use_zlib=False):
 
				     return text
			
 
				 
			
 
				 
			
 
				+def request_post(url, param, time_out=1000):
			
 
				+    try:
			
 
				+        headers = {'content-type': 'application/json'}
			
 
				+        result = session_all.post(url, data=param, timeout=time_out)
			
 
				+
			
 
				+        if result.status_code == 200:
			
 
				+            text = result.text
			
 
				+        else:
			
 
				+            text = json.dumps([-21])
			
 
				+    except socket.timeout:
			
 
				+        text = json.dumps([-20])
			
 
				+    except requests.exceptions.ConnectionError:
			
 
				+        text = json.dumps([-22])
			
 
				+    except:
			
 
				+        text = json.dumps([-2])
			
 
				+        traceback.print_exc()
			
 
				+    return text
			
 
				+
			
 
				+
			
 
				 def test_gpu():
			
 
				     print("=" * 30)
			
 
				     import paddle
			
@@ -1595,7 +1624,8 @@ def my_subprocess_call(*popenargs, timeout=None):
 
				 
			
 
				 
			
 
				 def parse_yaml():
			
 
				-    yaml_path = os.path.dirname(os.path.abspath(__file__)) + "/interface_new.yml"
			
 
				+    # yaml_path = os.path.dirname(os.path.abspath(__file__)) + "/../config/interface_new.yml"
			
 
				+    yaml_path = config_file_path
			
 
				     # with open(yaml_path, "r", encoding='utf-8') as f:
			
 
				     #     cfg = f.read()
			
 
				     #
			
@@ -1613,7 +1643,8 @@ def get_ip_port(node_type=None, interface_type=None):
 
				         node_type_list = [node_type]
			
 
				 
			
 
				     if interface_type is None:
			
 
				-        interface_type_list = ["convert", "ocr", "otr", "office", "path", "isr", "idc", "atc", "yolo"]
			
 
				+        # interface_type_list = ["convert", "ocr", "otr", "office", "path", "isr", "idc", "atc", "yolo", 'tika']
			
 
				+        interface_type_list = INTERFACES + ["path"]
			
 
				     else:
			
 
				         interface_type_list = [interface_type]
			
 
				 
			
@@ -1839,7 +1870,8 @@ def set_flask_global():
 
				     for _k in ip_port_dict.keys():
			
 
				         # print(_k)
			
 
				         ip_port_flag.update({_k: {}})
			
 
				-        for interface in ["ocr", "otr", "convert", "idc", "isr", "atc", 'yolo', "office"]:
			
 
				+        interface_type_list = INTERFACES + ['path']
			
 
				+        for interface in interface_type_list:
			
 
				             if ip_port_dict.get(_k).get("MASTER") and ip_port_dict.get(_k).get("MASTER").get(interface):
			
 
				                     ip_port_flag[_k][interface] = 0
			
 
				             else:
			
@@ -2169,13 +2201,23 @@ def ocr_cant_read(text_list, box_list):
 
				 
			
 
				     # 每个格子的中文都小于2
			
 
				     short_text_cnt = 0
			
 
				+    single_text_cnt = 0
			
 
				+    short_text_flag = 0
			
 
				+    single_text_list = []
			
 
				     for text in text_list:
			
 
				-        if len(re.findall('[\u4e00-\u9fa5]', text)) <= 2:
			
 
				+        ch_list = re.findall('[\u4e00-\u9fa5]', text)
			
 
				+        ch_text_len = len(ch_list)
			
 
				+        ch_text = ''.join(ch_list)
			
 
				+        if ch_text_len <= 2:
			
 
				+        # if len(re.findall('[\u4e00-\u9fa5]', text)) <= 2:
			
 
				             short_text_cnt += 1
			
 
				+        if len(text) == 1 and ch_text_len == 1 and ch_text not in single_text_list:
			
 
				+            single_text_list.append(ch_text)
			
 
				+            single_text_cnt += 1
			
 
				     if short_text_cnt >= len(text_list):
			
 
				         short_text_flag = 1
			
 
				-    else:
			
 
				-        short_text_flag = 0
			
 
				+    if single_text_cnt >= 1/4 * len(text_list):
			
 
				+        short_text_flag = 1
			
 
				 
			
 
				     # print('short_text_cnt', short_text_cnt)
			
 
				     # print('box_cnt', box_cnt)
			
@@ -2287,6 +2329,22 @@ def image_rotate(image_np, angle):
 
				     return image_np
			
 
				 
			
 
				 
			
 
				+def dynamic_get_port(start_port, mode='-1', num=10):
			
 
				+    host = 'localhost'
			
 
				+    port = start_port
			
 
				+    for i in range(num):
			
 
				+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
			
 
				+            try:
			
 
				+                s.bind((host, port))
			
 
				+                return port
			
 
				+            except socket.error:
			
 
				+                if mode == '-1':
			
 
				+                    port = port - 1
			
 
				+                elif mode == '+1':
			
 
				+                    port = port + 1
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     # strs = r"D:\Project\temp\04384fcc9e8911ecbd2844f971944973\043876ca9e8911eca5e144f971944973_rar\1624114035529.jpeg"
			
 
				     # print(slash_replace(strs))
			
--- a/format_convert/yaswfp/__init__.py
+++ b/format_convert/yaswfp/__init__.py
--- a/format_convert/yaswfp/helpers.py
+++ b/format_convert/yaswfp/helpers.py
@@ -0,0 +1,173 @@
 
				+# Copyright 2013-2014 Facundo Batista
			
 
				+#
			
 
				+# This program is free software: you can redistribute it and/or modify it
			
 
				+# under the terms of the GNU General Public License version 3, as published
			
 
				+# by the Free Software Foundation.
			
 
				+#
			
 
				+# This program is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranties of
			
 
				+# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
			
 
				+# PURPOSE.  See the GNU General Public License for more details.
			
 
				+#
			
 
				+# You should have received a copy of the GNU General Public License along
			
 
				+# with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+#
			
 
				+# For further info, check  http://github.com/facundobatista/yaswfp
			
 
				+
			
 
				+"""Some helpers for the SWF parser."""
			
 
				+
			
 
				+import itertools
			
 
				+import struct
			
 
				+
			
 
				+
			
 
				+def grouper(n, iterable, fillvalue=None):
			
 
				+    """Collect data into fixed-length chunks or blocks.
			
 
				+
			
 
				+    This is taken from the itertools docs.
			
 
				+    """
			
 
				+    # grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
			
 
				+    args = [iter(iterable)] * n
			
 
				+    return itertools.zip_longest(*args, fillvalue=fillvalue)
			
 
				+
			
 
				+
			
 
				+def unpack_si16(src):
			
 
				+    """Read and unpack signed integer 16b."""
			
 
				+    return struct.unpack("<h", src.read(2))[0]
			
 
				+
			
 
				+
			
 
				+def unpack_ui8(src):
			
 
				+    """Read and unpack unsigned integer 8b."""
			
 
				+    return struct.unpack("<B", src.read(1))[0]
			
 
				+
			
 
				+
			
 
				+def unpack_ui16(src):
			
 
				+    """Read and unpack unsigned integer 16b."""
			
 
				+    return struct.unpack("<H", src.read(2))[0]
			
 
				+
			
 
				+
			
 
				+def unpack_ui32(src):
			
 
				+    """Read and unpack unsigned integer 32b."""
			
 
				+    return struct.unpack("<I", src.read(4))[0]
			
 
				+
			
 
				+
			
 
				+def unpack_fixed8(src):
			
 
				+    """Get a FIXED8 value."""
			
 
				+    dec_part = unpack_ui8(src)
			
 
				+    int_part = unpack_ui8(src)
			
 
				+    return int_part + dec_part / 256
			
 
				+
			
 
				+
			
 
				+def unpack_fixed16(src):
			
 
				+    """Get a FIXED16 value (called plainly FIXED in the spec)."""
			
 
				+    dec_part = unpack_ui16(src)
			
 
				+    int_part = unpack_ui16(src)
			
 
				+    return int_part + dec_part / 65536
			
 
				+
			
 
				+
			
 
				+def unpack_float16(src):
			
 
				+    """Read and unpack a 16b float.
			
 
				+
			
 
				+    The structure is:
			
 
				+    - 1 bit for the sign
			
 
				+    . 5 bits for the exponent, with an exponent bias of 16
			
 
				+    - 10 bits for the mantissa
			
 
				+    """
			
 
				+    bc = BitConsumer(src)
			
 
				+    sign = bc.u_get(1)
			
 
				+    exponent = bc.u_get(5)
			
 
				+    mantissa = bc.u_get(10)
			
 
				+    exponent -= 16
			
 
				+    mantissa /= 2 ** 10
			
 
				+    num = (-1 ** sign) * mantissa * (10 ** exponent)
			
 
				+    return num
			
 
				+
			
 
				+
			
 
				+def unpack_float(src):
			
 
				+    """Read and unpack a 32b float."""
			
 
				+    return struct.unpack("<f", src.read(4))[0]
			
 
				+
			
 
				+
			
 
				+def unpack_double(src):
			
 
				+    """Read and unpack a 64b float."""
			
 
				+    return struct.unpack("<d", src.read(8))[0]
			
 
				+
			
 
				+
			
 
				+class BitConsumer:
			
 
				+    """Get a byte source, yield bunch of bits."""
			
 
				+    def __init__(self, src):
			
 
				+        self.src = src
			
 
				+        self._bits = None
			
 
				+        self._count = 0
			
 
				+
			
 
				+    def u_get(self, quant):
			
 
				+        """Return a number using the given quantity of unsigned bits."""
			
 
				+        if not quant:
			
 
				+            return 0
			
 
				+        bits = []
			
 
				+        while quant:
			
 
				+            if self._count == 0:
			
 
				+                byte = self.src.read(1)
			
 
				+                number = struct.unpack("<B", byte)[0]
			
 
				+                self._bits = bin(number)[2:].zfill(8)
			
 
				+                self._count = 8
			
 
				+            if quant > self._count:
			
 
				+                self._count, quant, toget = 0, quant - self._count, self._count
			
 
				+            else:
			
 
				+                self._count, quant, toget = self._count - quant, 0, quant
			
 
				+            read, self._bits = self._bits[:toget], self._bits[toget:]
			
 
				+            bits.append(read)
			
 
				+        data = int("".join(bits), 2)
			
 
				+        return data
			
 
				+
			
 
				+    def s_get(self, quant):
			
 
				+        """Return a number using the given quantity of signed bits."""
			
 
				+        if quant < 2:
			
 
				+            # special case, just return that unsigned value
			
 
				+            # quant can also be 0
			
 
				+            return self.u_get(quant)
			
 
				+
			
 
				+        sign = self.u_get(1)
			
 
				+        raw_number = self.u_get(quant - 1)
			
 
				+        if sign == 0:
			
 
				+            # positive, simplest case
			
 
				+            number = raw_number
			
 
				+        else:
			
 
				+            # negative, complemento a 2
			
 
				+            complement = 2 ** (quant - 1) - 1
			
 
				+            number = -1 * ((raw_number ^ complement) + 1)
			
 
				+        return number
			
 
				+
			
 
				+    def fb_get(self, quant, fb=16):
			
 
				+        """Return a fixed bit number
			
 
				+
			
 
				+        quant: number of bits to read
			
 
				+        fb: number of bits in the integer and decimal part of the output
			
 
				+            default is 16, resulting in a 16.16 fixed bit"""
			
 
				+
			
 
				+        raw_number = self.s_get(quant)
			
 
				+
			
 
				+        if quant == 1:
			
 
				+            # special case, just return that unsigned value
			
 
				+            return raw_number
			
 
				+
			
 
				+        return raw_number / (1 << fb)
			
 
				+
			
 
				+
			
 
				+class ReadQuantityController:
			
 
				+    """A context manager that will complain if bad quantity is read."""
			
 
				+    def __init__(self, src, should):
			
 
				+        self._src = src
			
 
				+        self._should = should
			
 
				+        self._started = None
			
 
				+
			
 
				+    def __enter__(self):
			
 
				+        """Enter the guarded block."""
			
 
				+        self._started = self._src.tell()
			
 
				+
			
 
				+    def __exit__(self, *exc):
			
 
				+        """Exit the guarded block."""
			
 
				+        cur_pos = self._src.tell()
			
 
				+        if cur_pos != self._started + self._should:
			
 
				+            t = "Bad reading quantity: started={} should={} ended={}".format(
			
 
				+                self._started, self._should, cur_pos)
			
 
				+            raise ValueError(t)
			
--- a/format_convert/yaswfp/images/0.png
+++ b/format_convert/yaswfp/images/0.png
--- a/format_convert/yaswfp/images/0.txt
+++ b/format_convert/yaswfp/images/0.txt
--- a/format_convert/yaswfp/images/1.png
+++ b/format_convert/yaswfp/images/1.png
--- a/format_convert/yaswfp/images/1.txt
+++ b/format_convert/yaswfp/images/1.txt
--- a/format_convert/yaswfp/images/2.png
+++ b/format_convert/yaswfp/images/2.png
--- a/format_convert/yaswfp/images/2.txt
+++ b/format_convert/yaswfp/images/2.txt
--- a/format_convert/yaswfp/images/3.png
+++ b/format_convert/yaswfp/images/3.png
--- a/format_convert/yaswfp/images/3.txt
+++ b/format_convert/yaswfp/images/3.txt
--- a/format_convert/yaswfp/images/4.png
+++ b/format_convert/yaswfp/images/4.png
--- a/format_convert/yaswfp/images/4.txt
+++ b/format_convert/yaswfp/images/4.txt
--- a/format_convert/yaswfp/images/5.png
+++ b/format_convert/yaswfp/images/5.png
--- a/format_convert/yaswfp/swfparser.py
+++ b/format_convert/yaswfp/swfparser.py
@@ -0,0 +1,1733 @@
 
				+# Copyright 2013-2014 Facundo Batista
			
 
				+#
			
 
				+# This program is free software: you can redistribute it and/or modify it
			
 
				+# under the terms of the GNU General Public License version 3, as published
			
 
				+# by the Free Software Foundation.
			
 
				+#
			
 
				+# This program is distributed in the hope that it will be useful, but
			
 
				+# WITHOUT ANY WARRANTY; without even the implied warranties of
			
 
				+# MERCHANTABILITY, SATISFACTORY QUALITY, or FITNESS FOR A PARTICULAR
			
 
				+# PURPOSE.  See the GNU General Public License for more details.
			
 
				+#
			
 
				+# You should have received a copy of the GNU General Public License along
			
 
				+# with this program.  If not, see <http://www.gnu.org/licenses/>.
			
 
				+#
			
 
				+# For further info, check  http://github.com/facundobatista/yaswfp
			
 
				+
			
 
				+"""Parse a SWF file and expose all its internals.
			
 
				+
			
 
				+This follows the SWF FILE FORMAT SPECIFICATION VERSION 19 which is not
			
 
				+included in this project for your easier finding because Adobe forbids
			
 
				+the spec distribution.
			
 
				+
			
 
				+The attributes names are CamelCase to match as close as possible the
			
 
				+spec.
			
 
				+
			
 
				+Note: not all the spec is covered (work in progress!), there's a flag
			
 
				+in the SWFParser to change the behaviour when an still-not-done object
			
 
				+is found.
			
 
				+"""
			
 
				+
			
 
				+import collections
			
 
				+import io
			
 
				+import os
			
 
				+import sys
			
 
				+import warnings
			
 
				+import zlib
			
 
				+sys.path.append(os.path.dirname(__file__))
			
 
				+sys.path.append(os.path.dirname(__file__) + '/../../')
			
 
				+from helpers import (
			
 
				+    BitConsumer,
			
 
				+    ReadQuantityController,
			
 
				+    unpack_si16,
			
 
				+    unpack_ui16,
			
 
				+    unpack_ui32,
			
 
				+    unpack_ui8,
			
 
				+    unpack_fixed8,
			
 
				+    unpack_fixed16,
			
 
				+    unpack_float16,
			
 
				+    unpack_float,
			
 
				+    unpack_double,
			
 
				+)
			
 
				+
			
 
				+VERSION = "0.9.3"
			
 
				+
			
 
				+# name of each tag (as a dict, not a list, for easier human consumption)
			
 
				+TAG_NAMES = {
			
 
				+    0: "End",
			
 
				+    1: "ShowFrame",
			
 
				+    2: "DefineShape",
			
 
				+    4: "PlaceObject",
			
 
				+    5: "RemoveObject",
			
 
				+    6: "DefineBits",
			
 
				+    7: "DefineButton",
			
 
				+    8: "JPEGTables",
			
 
				+    9: "SetBackgroundColor",
			
 
				+    10: "DefineFont",
			
 
				+    11: "DefineText",
			
 
				+    12: "DoAction",
			
 
				+    13: "DefineFontInfo",
			
 
				+    14: "DefineSound",
			
 
				+    15: "StartSound",
			
 
				+    17: "DefineButtonSound",
			
 
				+    18: "SoundStreamHead",
			
 
				+    19: "SoundStreamBlock",
			
 
				+    20: "DefineBitsLossless",
			
 
				+    21: "DefineBitsJPEG2",
			
 
				+    22: "DefineShape2",
			
 
				+    23: "DefineButtonCxform",
			
 
				+    24: "Protect",
			
 
				+    26: "PlaceObject2",
			
 
				+    28: "RemoveObject2",
			
 
				+    32: "DefineShape3",
			
 
				+    33: "DefineText2",
			
 
				+    34: "DefineButton2",
			
 
				+    35: "DefineBitsJPEG3",
			
 
				+    36: "DefineBitsLossless2",
			
 
				+    37: "DefineEditText",
			
 
				+    39: "DefineSprite",
			
 
				+    43: "FrameLabel",
			
 
				+    45: "SoundStreamHead2",
			
 
				+    46: "DefineMorphShape",
			
 
				+    48: "DefineFont2",
			
 
				+    56: "ExportAssets",
			
 
				+    57: "ImportAssets",
			
 
				+    58: "EnableDebugger",
			
 
				+    59: "DoInitAction",
			
 
				+    60: "DefineVideoStream",
			
 
				+    61: "VideoFrame",
			
 
				+    62: "DefineFontInfo2",
			
 
				+    64: "EnableDebugger2",
			
 
				+    65: "ScriptLimits",
			
 
				+    66: "SetTabIndex",
			
 
				+    69: "FileAttributes",
			
 
				+    70: "PlaceObject3",
			
 
				+    71: "ImportAssets2",
			
 
				+    73: "DefineFontAlignZones",
			
 
				+    74: "CSMTextSettings",
			
 
				+    75: "DefineFont3",
			
 
				+    76: "SymbolClass",
			
 
				+    77: "Metadata",
			
 
				+    78: "DefineScalingGrid",
			
 
				+    82: "DoABC",
			
 
				+    83: "DefineShape4",
			
 
				+    84: "DefineMorphShape2",
			
 
				+    86: "DefineSceneAndFrameLabelData",
			
 
				+    87: "DefineBinaryData",
			
 
				+    88: "DefineFontName",
			
 
				+    89: "StartSound2",
			
 
				+    90: "DefineBitsJPEG4",
			
 
				+    91: "DefineFont4",
			
 
				+}
			
 
				+
			
 
				+LANGCODES = {
			
 
				+    0: "Sys",
			
 
				+    1: "Latin",
			
 
				+    2: "Japanese",
			
 
				+    3: "Korean",
			
 
				+    4: "Simplified Chinese",
			
 
				+    5: "Traditional Chinese",
			
 
				+}
			
 
				+
			
 
				+ACTION_NAMES = {
			
 
				+    0x04: 'ActionNextFrame',
			
 
				+    0x05: 'ActionPrevFrame',
			
 
				+    0x06: 'ActionPlay',
			
 
				+    0x07: 'ActionStop',
			
 
				+    0x08: 'ActionToggleQualty',
			
 
				+    0x09: 'ActionStopSounds',
			
 
				+    0x0A: 'ActionAdd',
			
 
				+    0x0B: 'ActionSubtract',
			
 
				+    0x0C: 'ActionMultiply',
			
 
				+    0x0D: 'ActionDivide',
			
 
				+    0x0E: 'ActionEquals',
			
 
				+    0x0F: 'ActionLess',
			
 
				+    0x10: 'ActionAnd',
			
 
				+    0x11: 'ActionOr',
			
 
				+    0x12: 'ActionNot',
			
 
				+    0x13: 'ActionStringEquals',
			
 
				+    0x14: 'ActionStringLength',
			
 
				+    0x15: 'ActionStringExtract',
			
 
				+    0x17: 'ActionPop',
			
 
				+    0x18: 'ActionToInteger',
			
 
				+    0x1C: 'ActionGetVariable',
			
 
				+    0x1D: 'ActionSetVariable',
			
 
				+    0x20: 'ActionSetTarget2',
			
 
				+    0x21: 'ActionStringAdd',
			
 
				+    0x22: 'ActionGetProperty',
			
 
				+    0x23: 'ActionSetProperty',
			
 
				+    0x24: 'ActionCloneSprite',
			
 
				+    0x25: 'ActionRemoveSprite',
			
 
				+    0x26: 'ActionTrace',
			
 
				+    0x27: 'ActionStartDrag',
			
 
				+    0x28: 'ActionEndDrag',
			
 
				+    0x29: 'ActionStringLess',
			
 
				+    0x2A: 'ActionThrow',
			
 
				+    0x2B: 'ActionCastOp',
			
 
				+    0x2C: 'ActionImplementsOp',
			
 
				+    0x30: 'ActionRandomNumber',
			
 
				+    0x31: 'ActionMBStringLength',
			
 
				+    0x32: 'ActionCharToAscii',
			
 
				+    0x33: 'ActionAsciiToChar',
			
 
				+    0x34: 'ActionGetTime',
			
 
				+    0x35: 'ActionMBStringExtract',
			
 
				+    0x36: 'ActionMBCharToAscii',
			
 
				+    0x37: 'ActionMBAsciiToChar',
			
 
				+    0x3A: 'ActionDelete',
			
 
				+    0x3B: 'ActionDelete2',
			
 
				+    0x3C: 'ActionDefineLocal',
			
 
				+    0x3D: 'ActionCallFunction',
			
 
				+    0x3E: 'ActionReturn',
			
 
				+    0x3F: 'ActionModulo',
			
 
				+    0x40: 'ActionNewObject',
			
 
				+    0x41: 'ActionDefineLocal2',
			
 
				+    0x42: 'ActionInitArray',
			
 
				+    0x43: 'ActionInitObject',
			
 
				+    0x44: 'ActionTypeOf',
			
 
				+    0x45: 'ActionTargetPath',
			
 
				+    0x46: 'ActionEnumerate',
			
 
				+    0x47: 'ActionAdd2',
			
 
				+    0x48: 'ActionLess2',
			
 
				+    0x49: 'ActionEquals2',
			
 
				+    0x4A: 'ActionToNumber',
			
 
				+    0x4B: 'ActionToString',
			
 
				+    0x4C: 'ActionPushDuplicate',
			
 
				+    0x4D: 'ActionStackSwap',
			
 
				+    0x4E: 'ActionGetMember',
			
 
				+    0x4F: 'ActionSetMember',
			
 
				+    0x50: 'ActionIncrement',
			
 
				+    0x51: 'ActionDecrement',
			
 
				+    0x52: 'ActionCallMethod',
			
 
				+    0x53: 'ActionNewMethod',
			
 
				+    0x54: 'ActionInstanceOf',
			
 
				+    0x55: 'ActionEnumerate2',
			
 
				+    0x60: 'ActionBitAnd',
			
 
				+    0x61: 'ActionBitOr',
			
 
				+    0x62: 'ActionBitXor',
			
 
				+    0x63: 'ActionBitLShift',
			
 
				+    0x64: 'ActionBitRShift',
			
 
				+    0x65: 'ActionBitURShift',
			
 
				+    0x66: 'ActionStrictEquals',
			
 
				+    0x67: 'ActionGreater',
			
 
				+    0x68: 'ActionStringGreater',
			
 
				+    0x69: 'ActionExtends',
			
 
				+    0x81: 'ActionGotoFrame',
			
 
				+    0x83: 'ActionGetURL',
			
 
				+    0x87: 'ActionStoreRegister',
			
 
				+    0x88: 'ActionConstantPool',
			
 
				+    0x8A: 'ActionWaitForFrame',
			
 
				+    0x8B: 'ActionSetTarget',
			
 
				+    0x8C: 'ActionGoToLabel',
			
 
				+    0x8D: 'ActionWaitForFrame2',
			
 
				+    0x8E: 'ActionDefineFunction2',
			
 
				+    0x8F: 'ActionTry',
			
 
				+    0x94: 'ActionWith',
			
 
				+    0x96: 'ActionPush',
			
 
				+    0x99: 'ActionJump',
			
 
				+    0x9A: 'ActionGetURL2',
			
 
				+    0x9B: 'ActionDefineFunction',
			
 
				+    0x9D: 'ActionIf',
			
 
				+    0x9E: 'ActionCall',
			
 
				+    0x9F: 'ActionGotoFrame2',
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def _str(obj):
			
 
				+    """Show nicely the generic object received."""
			
 
				+    values = []
			
 
				+    for name in obj._attribs:
			
 
				+        val = getattr(obj, name)
			
 
				+        if isinstance(val, str):
			
 
				+            val = repr(val)
			
 
				+        val = str(val) if len(str(val)) < 10 else "(...)"
			
 
				+        values.append((name, val))
			
 
				+    values = ", ".join("{}={}".format(k, v) for k, v in values)
			
 
				+    return "{}({})".format(obj.__class__.__name__, values)
			
 
				+
			
 
				+
			
 
				+def _repr(obj):
			
 
				+    """Show the received object as precise as possible."""
			
 
				+    vals = ", ".join("{}={!r}".format(
			
 
				+        name, getattr(obj, name)) for name in obj._attribs)
			
 
				+    if vals:
			
 
				+        t = "{}(name={}, {})".format(obj.__class__.__name__, obj.name, vals)
			
 
				+    else:
			
 
				+        t = "{}(name={})".format(obj.__class__.__name__, obj.name)
			
 
				+    return t
			
 
				+
			
 
				+
			
 
				+class SWFObject:
			
 
				+    """A super class for all the objects created here."""
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        self._attribs = []
			
 
				+
			
 
				+    def __setattr__(self, name, value):
			
 
				+        if name != "_attribs":
			
 
				+            if name not in self._attribs:
			
 
				+                self._attribs.append(name)
			
 
				+        super(SWFObject, self).__setattr__(name, value)
			
 
				+
			
 
				+
			
 
				+def _make_object(name):
			
 
				+    """Create a generic object for the tags."""
			
 
				+    klass = type(name, (SWFObject,),
			
 
				+                 {'__str__': _str, '__repr__': _repr, 'name': name})
			
 
				+    return klass()
			
 
				+
			
 
				+
			
 
				+class SWFParser:
			
 
				+    """Read (at a byte or bit level) the SWF structure from a fileobject.
			
 
				+
			
 
				+    When the parser finds a structure that still can't process (because more
			
 
				+    programming is needed), will just return an UnknownObject object with
			
 
				+    the unparsed bytes, or will raise an exception if you set
			
 
				+    the unknown_alert flag::
			
 
				+
			
 
				+        SWFParser.unknown_alert = True
			
 
				+    """
			
 
				+
			
 
				+    unknown_alert = False
			
 
				+
			
 
				+    def __init__(self, src, read_twips=True):
			
 
				+        self._src = src
			
 
				+        self._read_twips = read_twips
			
 
				+        self._version = None
			
 
				+        self._last_defined_glyphs_quantity = None
			
 
				+        self.header = self._get_header()
			
 
				+        self.tags = self._process_tags()
			
 
				+
			
 
				+    def _get_header(self):
			
 
				+        """Parse the SWF header."""
			
 
				+        fh = self._src
			
 
				+        obj = _make_object("Header")
			
 
				+
			
 
				+        # first part of the header
			
 
				+        obj.Signature = sign = "".join(chr(unpack_ui8(fh)) for _ in range(3))
			
 
				+        obj.Version = self._version = unpack_ui8(fh)
			
 
				+        obj.FileLength = file_length = unpack_ui32(fh)
			
 
				+
			
 
				+        # deal with compressed content
			
 
				+        if sign[0] == 'C':
			
 
				+            uncompressed = zlib.decompress(fh.read())
			
 
				+            if len(uncompressed) + 8 != file_length:
			
 
				+                raise ValueError("Problems dealing with compressed content")
			
 
				+            fh = self._src = io.BytesIO(uncompressed)
			
 
				+
			
 
				+        # second part of the header
			
 
				+        obj.FrameSize = self._get_struct_rect()
			
 
				+        obj.FrameRate = unpack_ui16(fh)
			
 
				+        obj.FrameCount = unpack_ui16(fh)
			
 
				+        return obj
			
 
				+
			
 
				+    def _process_tags(self):
			
 
				+        """Get a sequence of tags."""
			
 
				+        tags = []
			
 
				+
			
 
				+        while True:
			
 
				+            tag_bf = unpack_ui16(self._src)
			
 
				+            tag_type = tag_bf >> 6   # upper 10 bits
			
 
				+            if tag_type == 0:
			
 
				+                # the end
			
 
				+                break
			
 
				+            tag_len = tag_bf & 0x3f  # last 6 bits
			
 
				+            if tag_len == 0x3f:
			
 
				+                # the length is the next four bytes!
			
 
				+                tag_len = unpack_ui32(self._src)
			
 
				+
			
 
				+            try:
			
 
				+                tag_name = TAG_NAMES[tag_type]
			
 
				+            except KeyError:
			
 
				+                warnings.warn('unkonwn tag type: {}'.format(tag_type))
			
 
				+                # malformed SWF, create and unknown object with malformed tag
			
 
				+                tag_payload = self._src.read(tag_len)
			
 
				+                _dict = {
			
 
				+                    '__str__': _repr,
			
 
				+                    '__repr__': _repr,
			
 
				+                    'name': 'UnspecifiedObject(tag={!r})'.format(tag_type),
			
 
				+                }
			
 
				+                tag = type("UnknownObject", (SWFObject,), _dict)()
			
 
				+                tag.raw_payload = tag_payload
			
 
				+                tags.append(tag)
			
 
				+                continue
			
 
				+
			
 
				+            try:
			
 
				+                tag_meth = getattr(self, "_handle_tag_" + tag_name.lower())
			
 
				+            except AttributeError:
			
 
				+                if self.unknown_alert:
			
 
				+                    raise ValueError("Unknown tag: " + repr(tag_name))
			
 
				+
			
 
				+                warnings.warn('tag not supported: {}'.format(tag_name))
			
 
				+                tag_payload = self._src.read(tag_len)
			
 
				+                _dict = {'__str__': _repr, '__repr__': _repr, 'name': tag_name}
			
 
				+                tag = type("UnknownObject", (SWFObject,), _dict)()
			
 
				+                tag.raw_payload = tag_payload
			
 
				+                tags.append(tag)
			
 
				+                continue
			
 
				+
			
 
				+            # we know the tag type, and have the handler, let's process it
			
 
				+            prev_pos = self._src.tell()
			
 
				+            self._src.guard = tag_len
			
 
				+            try:
			
 
				+                with ReadQuantityController(self._src, tag_len):
			
 
				+                    tag = tag_meth()
			
 
				+                assert tag is not None, tag_name
			
 
				+            except ValueError as e:
			
 
				+                warnings.warn('processing {} tag: {}'.format(tag_name, e))
			
 
				+                # an attempt to read too much happened; create a failing
			
 
				+                # object with the raw payload
			
 
				+                self._src.guard = None
			
 
				+                self._src.seek(prev_pos)
			
 
				+                tag_payload = self._src.read(tag_len)
			
 
				+                _dict = {'__str__': _repr, '__repr__': _repr, 'name': tag_name}
			
 
				+                tag = type("FailingObject", (SWFObject,), _dict)()
			
 
				+                tag.raw_payload = tag_payload
			
 
				+            tags.append(tag)
			
 
				+        return tags
			
 
				+
			
 
				+    def _handle_tag_definebits(self):
			
 
				+        """Handle the DefineBits tag."""
			
 
				+        tag_end = self._src.tell() + self._src.guard
			
 
				+        obj = _make_object("DefineBits")
			
 
				+        obj.CharacterID = unpack_ui16(self._src)
			
 
				+        obj.JPEGData = self._get_raw_bytes(-tag_end)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definebitsjpeg2(self):
			
 
				+        """Handle the DefineBitsJPEG2 tag."""
			
 
				+        tag_end = self._src.tell() + self._src.guard
			
 
				+        obj = _make_object("DefineBitsJPEG2")
			
 
				+        obj.CharacterID = unpack_ui16(self._src)
			
 
				+        obj.ImageData = self._get_raw_bytes(-tag_end)
			
 
				+        return obj
			
 
				+
			
 
				+    def _generic_definebitsjpeg_parser(self, obj, version):
			
 
				+        """Handle the DefineBitsJPEGN tag."""
			
 
				+        tag_end = self._src.tell() + self._src.guard
			
 
				+        obj.CharacterID = unpack_ui16(self._src)
			
 
				+        obj.AlphaDataOffset = unpack_ui32(self._src)
			
 
				+        if 4 == version:
			
 
				+            # FIXME: 8.8 fixed point format in Comment
			
 
				+            obj.DeblockParam = unpack_ui16(self._src)
			
 
				+        obj.ImageData = self._get_raw_bytes(obj.AlphaDataOffset)
			
 
				+        obj.BitmapAlphaData = self._get_raw_bytes(-tag_end, unzip=True)
			
 
				+
			
 
				+    def _handle_tag_definebitsjpeg3(self):
			
 
				+        """Handle the DefineBitsJPEG3 tag."""
			
 
				+        obj = _make_object("DefineBitsJPEG3")
			
 
				+        self._generic_definebitsjpeg_parser(obj, 3)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definebitsjpeg4(self):
			
 
				+        """Handle the DefineBitsJPEG4 tag."""
			
 
				+        obj = _make_object("DefineBitsJPEG4")
			
 
				+        self._generic_definebitsjpeg_parser(obj, 4)
			
 
				+        return obj
			
 
				+
			
 
				+    def _generic_definebitslossless_parser(self, obj, version):
			
 
				+        """Generic parser for the DefineBitsLosslessN tags."""
			
 
				+        tag_end = self._src.tell() + self._src.guard
			
 
				+        obj.CharacterID = unpack_ui16(self._src)
			
 
				+        obj.BitmapFormat = unpack_ui8(self._src)
			
 
				+        obj.BitmapWidth = unpack_ui16(self._src)
			
 
				+        obj.BitmapHeight = unpack_ui16(self._src)
			
 
				+        if 3 == obj.BitmapFormat:
			
 
				+            obj.BitmapColorTableSize = unpack_ui8(self._src)
			
 
				+
			
 
				+        BitmapData = self._get_raw_bytes(-tag_end, unzip=True)
			
 
				+        _src = self._src
			
 
				+        try:
			
 
				+            self._src = io.BytesIO(BitmapData)
			
 
				+            if 3 == obj.BitmapFormat:
			
 
				+                if 1 == version:
			
 
				+                    color = self._get_struct_rgb
			
 
				+                elif 2 == version:
			
 
				+                    color = self._get_struct_rgba
			
 
				+                else:
			
 
				+                    raise ValueError("unknown version: {}".format(version))
			
 
				+                obj.ColorTableRGB = [
			
 
				+                    color() for _ in range(obj.BitmapColorTableSize + 1)]
			
 
				+                obj.ColormapPixelData = self._get_raw_bytes(-len(BitmapData))
			
 
				+            elif obj.BitmapFormat in (4, 5):
			
 
				+                obj.BitmapPixelData = BitmapData
			
 
				+            else:
			
 
				+                raise ValueError("BitmapFormat: {}".format(obj.BitmapFormat))
			
 
				+        finally:
			
 
				+            self._src = _src
			
 
				+
			
 
				+    def _handle_tag_definebitslossless(self):
			
 
				+        """Handle the DefineBitsLossless tag."""
			
 
				+        obj = _make_object("DefineBitsLossless")
			
 
				+        self._generic_definebitslossless_parser(obj, 1)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definebitslossless2(self):
			
 
				+        """Handle the DefineBitsLossless2 tag."""
			
 
				+        obj = _make_object("DefineBitsLossless2")
			
 
				+        self._generic_definebitslossless_parser(obj, 2)
			
 
				+        return obj
			
 
				+
			
 
				+    def _generic_definetext_parser(self, obj, rgb_struct):
			
 
				+        """Generic parser for the DefineTextN tags."""
			
 
				+        obj.CharacterID = unpack_ui16(self._src)
			
 
				+        obj.TextBounds = self._get_struct_rect()
			
 
				+        obj.TextMatrix = self._get_struct_matrix()
			
 
				+        obj.GlyphBits = glyph_bits = unpack_ui8(self._src)
			
 
				+        obj.AdvanceBits = advance_bits = unpack_ui8(self._src)
			
 
				+
			
 
				+        # textrecords
			
 
				+        obj.TextRecords = records = []
			
 
				+        while True:
			
 
				+            endofrecords_flag = unpack_ui8(self._src)
			
 
				+            if endofrecords_flag == 0:
			
 
				+                # all done
			
 
				+                obj.EndOfRecordsFlag = 0
			
 
				+                break
			
 
				+
			
 
				+            # we have a TEXTRECORD, let's go back the 8 bits and set the obj
			
 
				+            self._src.seek(-1, io.SEEK_CUR)
			
 
				+            record = _make_object("TextRecord")
			
 
				+            records.append(record)
			
 
				+
			
 
				+            bc = BitConsumer(self._src)
			
 
				+            record.TextRecordType = bc.u_get(1)
			
 
				+            record.StyleFlagsReserved = bc.u_get(3)
			
 
				+            record.StyleFlagsHasFont = bc.u_get(1)
			
 
				+            record.StyleFlagsHasColor = bc.u_get(1)
			
 
				+            record.StyleFlagsHasYOffset = bc.u_get(1)
			
 
				+            record.StyleFlagsHasXOffset = bc.u_get(1)
			
 
				+
			
 
				+            if record.StyleFlagsHasFont:
			
 
				+                record.FontID = unpack_ui16(self._src)
			
 
				+            if record.StyleFlagsHasColor:
			
 
				+                record.TextColor = rgb_struct()
			
 
				+            if record.StyleFlagsHasXOffset:
			
 
				+                record.XOffset = unpack_si16(self._src)
			
 
				+            if record.StyleFlagsHasYOffset:
			
 
				+                record.YOffset = unpack_si16(self._src)
			
 
				+            if record.StyleFlagsHasFont:
			
 
				+                record.TextHeight = unpack_ui16(self._src)
			
 
				+
			
 
				+            record.GlyphCount = unpack_ui8(self._src)
			
 
				+            bc = BitConsumer(self._src)
			
 
				+            record.GlyphEntries = glyphs = []
			
 
				+            for _ in range(record.GlyphCount):
			
 
				+                glyph = _make_object("GlyphEntry")
			
 
				+                glyphs.append(glyph)
			
 
				+                glyph.GlyphIndex = bc.u_get(glyph_bits)
			
 
				+                glyph.GlyphAdvance = bc.u_get(advance_bits)
			
 
				+
			
 
				+    def _handle_tag_definetext(self):
			
 
				+        """Handle the DefineText tag."""
			
 
				+        obj = _make_object("DefineText")
			
 
				+        self._generic_definetext_parser(obj, self._get_struct_rgb)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definetext2(self):
			
 
				+        """Handle the DefineText2 tag."""
			
 
				+        obj = _make_object("DefineText2")
			
 
				+        self._generic_definetext_parser(obj, self._get_struct_rgba)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_defineedittext(self):
			
 
				+        """Handle the DefineEditText tag."""
			
 
				+        obj = _make_object("DefineEditText")
			
 
				+        obj.CharacterID = unpack_ui16(self._src)
			
 
				+        obj.Bounds = self._get_struct_rect()
			
 
				+
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.HasText = bc.u_get(1)
			
 
				+        obj.WordWrap = bc.u_get(1)
			
 
				+        obj.Multiline = bc.u_get(1)
			
 
				+        obj.Password = bc.u_get(1)
			
 
				+        obj.ReadOnly = bc.u_get(1)
			
 
				+        obj.HasTextColor = bc.u_get(1)
			
 
				+        obj.HasMaxLength = bc.u_get(1)
			
 
				+        obj.HasFont = bc.u_get(1)
			
 
				+        obj.HasFontClass = bc.u_get(1)
			
 
				+        obj.AutoSize = bc.u_get(1)
			
 
				+        obj.HasLayout = bc.u_get(1)
			
 
				+        obj.NoSelect = bc.u_get(1)
			
 
				+        obj.Border = bc.u_get(1)
			
 
				+        obj.WasStatic = bc.u_get(1)
			
 
				+        obj.HTML = bc.u_get(1)
			
 
				+        obj.UseOutlines = bc.u_get(1)
			
 
				+
			
 
				+        if obj.HasFont:
			
 
				+            obj.FontID = unpack_ui16(self._src)
			
 
				+        if obj.HasFontClass:
			
 
				+            obj.FontClass = self._get_struct_string()
			
 
				+        if obj.HasFont:
			
 
				+            obj.FontHeight = unpack_ui16(self._src)
			
 
				+        if obj.HasTextColor:
			
 
				+            obj.TextColor = self._get_struct_rgba()
			
 
				+        if obj.HasMaxLength:
			
 
				+            obj.MaxLength = unpack_ui16(self._src)
			
 
				+        if obj.HasLayout:
			
 
				+            obj.Align = unpack_ui8(self._src)
			
 
				+            obj.LeftMargin = unpack_ui16(self._src)
			
 
				+            obj.RightMargin = unpack_ui16(self._src)
			
 
				+            obj.Indent = unpack_ui16(self._src)
			
 
				+            obj.Leading = unpack_ui16(self._src)
			
 
				+
			
 
				+        obj.VariableName = self._get_struct_string()
			
 
				+        if obj.HasText:
			
 
				+            obj.InitialText = self._get_struct_string()
			
 
				+        return obj
			
 
				+
			
 
				+    def _generic_placeobject_parser(self, obj, version):
			
 
				+        """A generic parser for several PlaceObjectX."""
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.PlaceFlagHasClipActions = bc.u_get(1)
			
 
				+        obj.PlaceFlagHasClipDepth = bc.u_get(1)
			
 
				+        obj.PlaceFlagHasName = bc.u_get(1)
			
 
				+        obj.PlaceFlagHasRatio = bc.u_get(1)
			
 
				+        obj.PlaceFlagHasColorTransform = bc.u_get(1)
			
 
				+        obj.PlaceFlagHasMatrix = bc.u_get(1)
			
 
				+        obj.PlaceFlagHasCharacter = bc.u_get(1)
			
 
				+        obj.PlaceFlagMove = bc.u_get(1)
			
 
				+
			
 
				+        if version == 3:
			
 
				+            obj.Reserved = bc.u_get(1)
			
 
				+            obj.PlaceFlagOpaqueBackground = bc.u_get(1)
			
 
				+            obj.PlaceFlagHasVisible = bc.u_get(1)
			
 
				+            obj.PlaceFlagHasImage = bc.u_get(1)
			
 
				+            obj.PlaceFlagHasClassName = bc.u_get(1)
			
 
				+            obj.PlaceFlagHasCacheAsBitmap = bc.u_get(1)
			
 
				+            obj.PlaceFlagHasBlendMode = bc.u_get(1)
			
 
				+            obj.PlaceFlagHasFilterList = bc.u_get(1)
			
 
				+
			
 
				+        obj.Depth = unpack_ui16(self._src)
			
 
				+
			
 
				+        if version == 3:
			
 
				+            if obj.PlaceFlagHasClassName or (
			
 
				+                    obj.PlaceFlagHasImage and obj.PlaceFlagHasCharacter):
			
 
				+                obj.ClassName = self._get_struct_string()
			
 
				+
			
 
				+        if obj.PlaceFlagHasCharacter:
			
 
				+            obj.CharacterId = unpack_ui16(self._src)
			
 
				+        if obj.PlaceFlagHasMatrix:
			
 
				+            obj.Matrix = self._get_struct_matrix()
			
 
				+        if obj.PlaceFlagHasColorTransform:
			
 
				+            obj.ColorTransform = self._get_struct_cxformwithalpha()
			
 
				+        if obj.PlaceFlagHasRatio:
			
 
				+            obj.Ratio = unpack_ui16(self._src)
			
 
				+        if obj.PlaceFlagHasName:
			
 
				+            obj.Name = self._get_struct_string()
			
 
				+        if obj.PlaceFlagHasClipDepth:
			
 
				+            obj.ClipDepth = unpack_ui16(self._src)
			
 
				+
			
 
				+        if version == 3:
			
 
				+            if obj.PlaceFlagHasFilterList:
			
 
				+                obj.SurfaceFilterList = self._get_struct_filterlist()
			
 
				+            if obj.PlaceFlagHasBlendMode:
			
 
				+                obj.BlendMode = unpack_ui8(self._src)
			
 
				+            if obj.PlaceFlagHasCacheAsBitmap:
			
 
				+                obj.BitmapCache = unpack_ui8(self._src)
			
 
				+            if obj.PlaceFlagHasVisible:
			
 
				+                obj.Visible = unpack_ui8(self._src)
			
 
				+                obj.BackgroundColor = self._get_struct_rgba()
			
 
				+
			
 
				+        if obj.PlaceFlagHasClipActions:
			
 
				+            obj.ClipActions = self._get_struct_clipactions()
			
 
				+
			
 
				+    def _handle_tag_placeobject2(self):
			
 
				+        """Handle the PlaceObject2 tag."""
			
 
				+        obj = _make_object("PlaceObject2")
			
 
				+        self._generic_placeobject_parser(obj, 2)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_placeobject3(self):
			
 
				+        """Handle the PlaceObject3 tag."""
			
 
				+        obj = _make_object("PlaceObject3")
			
 
				+        self._generic_placeobject_parser(obj, 3)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definesprite(self):
			
 
				+        """Handle the DefineSprite tag."""
			
 
				+        obj = _make_object("DefineSprite")
			
 
				+        obj.CharacterID = unpack_ui16(self._src)
			
 
				+        obj.FrameCount = unpack_ui16(self._src)
			
 
				+        tags = self._process_tags()
			
 
				+        obj.ControlTags = tags
			
 
				+        return obj
			
 
				+
			
 
				+    def _generic_action_parser(self):
			
 
				+        """Generic parser for Actions."""
			
 
				+        actions = []
			
 
				+        while True:
			
 
				+            action_code = unpack_ui8(self._src)
			
 
				+            if action_code == 0:
			
 
				+                break
			
 
				+
			
 
				+            action_name = ACTION_NAMES[action_code]
			
 
				+            if action_code > 128:
			
 
				+                # have a payload!
			
 
				+                action_len = unpack_ui16(self._src)
			
 
				+                try:
			
 
				+                    action_meth = getattr(
			
 
				+                        self, "_handle_" + action_name.lower())
			
 
				+                except AttributeError:
			
 
				+                    if self.unknown_alert:
			
 
				+                        raise ValueError(
			
 
				+                            "Unknown action: " + repr(action_name))
			
 
				+
			
 
				+                    action_payload = self._src.read(action_len)
			
 
				+                    _dict = {'__str__': _repr, '__repr__': _repr,
			
 
				+                             'name': action_name}
			
 
				+                    action = type("UnknownAction", (SWFObject,), _dict)()
			
 
				+                    action.raw_payload = action_payload
			
 
				+                    actions.append(action)
			
 
				+                else:
			
 
				+                    prev_pos = self._src.tell()
			
 
				+                    for action in action_meth(action_len):
			
 
				+                        assert action is not None, action_name
			
 
				+                        actions.append(action)
			
 
				+
			
 
				+                    quant_read = self._src.tell() - prev_pos
			
 
				+                    if quant_read != action_len:
			
 
				+                        raise RuntimeError(
			
 
				+                            "Bad bytes consumption by action {!r} handler "
			
 
				+                            "(did {}, should {})".format(
			
 
				+                                action_name, quant_read, action_len))
			
 
				+            else:
			
 
				+                action = _make_object(action_name)
			
 
				+                actions.append(action)
			
 
				+        return actions
			
 
				+
			
 
				+    def _handle_tag_doaction(self):
			
 
				+        """Handle the DoAction tag."""
			
 
				+        obj = _make_object("DoAction")
			
 
				+        obj.Actions = self._generic_action_parser()
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_fileattributes(self):
			
 
				+        """Handle the FileAttributes tag."""
			
 
				+        obj = _make_object("FileAttributes")
			
 
				+        bc = BitConsumer(self._src)
			
 
				+
			
 
				+        bc.u_get(1)  # reserved
			
 
				+        obj.UseDirectBlit = bc.u_get(1)
			
 
				+        obj.UseGPU = bc.u_get(1)
			
 
				+        obj.HasMetadata = bc.u_get(1)
			
 
				+        obj.ActionScript3 = bc.u_get(1)
			
 
				+        bc.u_get(2)  # reserved
			
 
				+        obj.UseNetwork = bc.u_get(1)
			
 
				+        bc.u_get(24)  # reserved
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_metadata(self):
			
 
				+        """Handle the Metadata tag."""
			
 
				+        obj = _make_object("Metadata")
			
 
				+        obj.Metadata = self._get_struct_string()
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_setbackgroundcolor(self):
			
 
				+        """Handle the SetBackgroundColor tag."""
			
 
				+        obj = _make_object("SetBackgroundColor")
			
 
				+        obj.BackgroundColor = self._get_struct_rgb()
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definesceneandframelabeldata(self):
			
 
				+        """Handle the DefineSceneAndFrameLabelData tag."""
			
 
				+        obj = _make_object("DefineSceneAndFrameLabelData")
			
 
				+        obj.SceneCount = self._get_struct_encodedu32()
			
 
				+        for i in range(1, obj.SceneCount + 1):
			
 
				+            setattr(obj, 'Offset{}'.format(i), self._get_struct_encodedu32())
			
 
				+            setattr(obj, 'Name{}'.format(i), self._get_struct_string())
			
 
				+        obj.FrameLabelCount = self._get_struct_encodedu32()
			
 
				+        for i in range(1, obj.FrameLabelCount + 1):
			
 
				+            setattr(obj, 'FrameNum{}'.format(i), self._get_struct_encodedu32())
			
 
				+            setattr(obj, 'FrameLabel{}'.format(i), self._get_struct_string())
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_defineshape4(self):
			
 
				+        """Handle the DefineShape4 tag."""
			
 
				+        obj = _make_object("DefineShape4")
			
 
				+        obj.ShapeId = unpack_ui16(self._src)
			
 
				+        obj.ShapeBounds = self._get_struct_rect()
			
 
				+        obj.EdgeBounds = self._get_struct_rect()
			
 
				+
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        bc.u_get(5)  # reserved
			
 
				+        obj.UsesFillWindingRule = bc.u_get(1)
			
 
				+        obj.UsesNonScalingStrokes = bc.u_get(1)
			
 
				+        obj.UsesScalingStrokes = bc.u_get(1)
			
 
				+        obj.Shapes = self._get_struct_shapewithstyle(4)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definemorphshape2(self):
			
 
				+        """Handle the DefineMorphShape2 tag."""
			
 
				+        obj = _make_object("DefineMorphShape2")
			
 
				+        obj.CharacterId = unpack_ui16(self._src)
			
 
				+        obj.StartBounds = self._get_struct_rect()
			
 
				+        obj.EndBounds = self._get_struct_rect()
			
 
				+        obj.StartEdgeBounds = self._get_struct_rect()
			
 
				+        obj.EndEdgeBounds = self._get_struct_rect()
			
 
				+
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        bc.u_get(6)  # reserved
			
 
				+        obj.UsesNonScalingStrokes = bc.u_get(1)
			
 
				+        obj.UsesScalingStrokes = bc.u_get(1)
			
 
				+
			
 
				+        obj.Offset = unpack_ui32(self._src)
			
 
				+
			
 
				+        # FIXME: this tag needs more work; I'm skipping some attributes here
			
 
				+        self._src.read(obj.Offset)
			
 
				+
			
 
				+        obj.EndEdges = self._get_struct_shape()
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_showframe(self):
			
 
				+        """Handle the ShowFrame tag."""
			
 
				+        return _make_object("ShowFrame")
			
 
				+
			
 
				+    def _handle_tag_removeobject(self):
			
 
				+        """Handle the RemoveObject tag."""
			
 
				+        obj = _make_object("RemoveObject")
			
 
				+        obj.CharacterId = unpack_ui16(self._src)
			
 
				+        obj.Depth = unpack_ui16(self._src)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_removeobject2(self):
			
 
				+        """Handle the RemoveObject2 tag."""
			
 
				+        obj = _make_object("RemoveObject2")
			
 
				+        obj.Depth = unpack_ui16(self._src)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_defineshape(self):
			
 
				+        """Handle the DefineShape tag."""
			
 
				+        obj = _make_object("DefineShape")
			
 
				+        obj.ShapeId = unpack_ui16(self._src)
			
 
				+        obj.ShapeBounds = self._get_struct_rect()
			
 
				+        obj.Shapes = self._get_struct_shapewithstyle(1)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_defineshape2(self):
			
 
				+        """Handle the DefineShape2 tag."""
			
 
				+        obj = _make_object("DefineShape2")
			
 
				+        obj.ShapeId = unpack_ui16(self._src)
			
 
				+        obj.ShapeBounds = self._get_struct_rect()
			
 
				+        obj.Shapes = self._get_struct_shapewithstyle(2)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_defineshape3(self):
			
 
				+        """Handle the DefineShape3 tag."""
			
 
				+        obj = _make_object("DefineShape3")
			
 
				+        obj.ShapeId = unpack_ui16(self._src)
			
 
				+        obj.ShapeBounds = self._get_struct_rect()
			
 
				+        obj.Shapes = self._get_struct_shapewithstyle(3)
			
 
				+        return obj
			
 
				+
			
 
				+    def _generic_definefont_parser(self, obj):
			
 
				+        """A generic parser for several DefineFontX."""
			
 
				+        obj.FontID = unpack_ui16(self._src)
			
 
				+
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.FontFlagsHasLayout = bc.u_get(1)
			
 
				+        obj.FontFlagsShiftJIS = bc.u_get(1)
			
 
				+        obj.FontFlagsSmallText = bc.u_get(1)
			
 
				+        obj.FontFlagsANSI = bc.u_get(1)
			
 
				+        obj.FontFlagsWideOffsets = bc.u_get(1)
			
 
				+        obj.FontFlagsWideCodes = bc.u_get(1)
			
 
				+        obj.FontFlagsItalic = bc.u_get(1)
			
 
				+        obj.FontFlagsBold = bc.u_get(1)
			
 
				+
			
 
				+        obj.LanguageCode = self._get_struct_langcode()
			
 
				+        obj.FontNameLen = unpack_ui8(self._src)
			
 
				+        obj.FontName = "".join(chr(unpack_ui8(self._src))
			
 
				+                               for i in range(obj.FontNameLen))
			
 
				+        if obj.FontName[-1] == '\x00':  # most probably ends in null, clean it
			
 
				+            obj.FontName = obj.FontName[:-1]
			
 
				+
			
 
				+        obj.NumGlyphs = num_glyphs = unpack_ui16(self._src)
			
 
				+        self._last_defined_glyphs_quantity = num_glyphs
			
 
				+        getter_wide = unpack_ui32 if obj.FontFlagsWideOffsets else unpack_ui16
			
 
				+        obj.OffsetTable = [getter_wide(self._src) for _ in range(num_glyphs)]
			
 
				+        obj.CodeTableOffset = getter_wide(self._src)
			
 
				+        obj.GlyphShapeTable = [self._get_struct_shape()
			
 
				+                               for _ in range(num_glyphs)]
			
 
				+        obj.CodeTable = [unpack_ui16(self._src) for _ in range(num_glyphs)]
			
 
				+
			
 
				+        if obj.FontFlagsHasLayout:
			
 
				+            obj.FontAscent = unpack_ui16(self._src)
			
 
				+            obj.FontDecent = unpack_ui16(self._src)
			
 
				+            obj.FontLeading = unpack_ui16(self._src)
			
 
				+            obj.FontAdvanceTable = [unpack_si16(self._src)
			
 
				+                                    for _ in range(num_glyphs)]
			
 
				+            obj.FontBoundsTable = [self._get_struct_rect()
			
 
				+                                   for _ in range(num_glyphs)]
			
 
				+            obj.KerningCount = unpack_ui16(self._src)
			
 
				+            obj.FontKerningTable = [
			
 
				+                self._get_struct_kerningrecord(obj.FontFlagsWideCodes)
			
 
				+                for _ in range(obj.KerningCount)]
			
 
				+
			
 
				+    def _handle_tag_definefont2(self):
			
 
				+        """Handle the DefineFont2 tag."""
			
 
				+        obj = _make_object("DefineFont2")
			
 
				+        self._generic_definefont_parser(obj)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definefont3(self):
			
 
				+        """Handle the DefineFont3 tag."""
			
 
				+        obj = _make_object("DefineFont3")
			
 
				+        self._generic_definefont_parser(obj)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definebutton2(self):
			
 
				+        """Handle the DefineButton2 tag."""
			
 
				+        obj = _make_object("DefineButton2")
			
 
				+        obj.ButtonId = unpack_ui16(self._src)
			
 
				+
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        bc.ReservedFlags = bc.u_get(7)
			
 
				+        bc.TrackAsMenu = bc.u_get(1)
			
 
				+
			
 
				+        obj.ActionOffset = unpack_ui16(self._src)
			
 
				+
			
 
				+        # characters
			
 
				+        obj.Characters = characters = []
			
 
				+        while True:
			
 
				+            end_flag = unpack_ui8(self._src)
			
 
				+            if end_flag == 0:
			
 
				+                # all done
			
 
				+                obj.CharacterEndFlag = 0
			
 
				+                break
			
 
				+
			
 
				+            # we have a BUTTONRECORD, let's go back the 8 bits and set the obj
			
 
				+            self._src.seek(-1, io.SEEK_CUR)
			
 
				+            character = _make_object("ButtonRecord")
			
 
				+            characters.append(character)
			
 
				+
			
 
				+            bc = BitConsumer(self._src)
			
 
				+            character.ButtonReserved = bc.u_get(2)
			
 
				+            character.ButtonHasBlendMode = bc.u_get(1)
			
 
				+            character.ButtonHasFilterList = bc.u_get(1)
			
 
				+            character.ButtonStateHitTest = bc.u_get(1)
			
 
				+            character.ButtonStateDown = bc.u_get(1)
			
 
				+            character.ButtonStateOver = bc.u_get(1)
			
 
				+            character.ButtonStateUp = bc.u_get(1)
			
 
				+
			
 
				+            character.CharacterId = unpack_ui16(self._src)
			
 
				+            character.PlaceDepth = unpack_ui16(self._src)
			
 
				+            character.PlaceMatrix = self._get_struct_matrix()
			
 
				+            character.ColorTransform = self._get_struct_cxformwithalpha()
			
 
				+            if character.ButtonHasFilterList:
			
 
				+                character.FilterList = self._get_struct_filterlist()
			
 
				+            if character.ButtonHasBlendMode:
			
 
				+                character.BlendMode = unpack_ui8(self._src)
			
 
				+
			
 
				+        obj.Actions = actions = []
			
 
				+        still_have_actions = True
			
 
				+        while still_have_actions:
			
 
				+            end_flag = unpack_ui16(self._src)
			
 
				+            if end_flag == 0:
			
 
				+                # this is the last action, parse it and then exit
			
 
				+                still_have_actions = False
			
 
				+
			
 
				+            bca = _make_object("ButtonCondAction")
			
 
				+            actions.append(bca)
			
 
				+            bca.CondActionSize = end_flag
			
 
				+
			
 
				+            bc = BitConsumer(self._src)
			
 
				+            bca.CondIdleToOverDown = bc.u_get(1)
			
 
				+            bca.CondOutDownToIdle = bc.u_get(1)
			
 
				+            bca.CondOutDownToOverDown = bc.u_get(1)
			
 
				+            bca.CondOverDownToOutDown = bc.u_get(1)
			
 
				+            bca.CondOverDownToOverUp = bc.u_get(1)
			
 
				+            bca.CondOverUpToOverDown = bc.u_get(1)
			
 
				+            bca.CondOverUpToIdle = bc.u_get(1)
			
 
				+            bca.CondIdleToOverUp = bc.u_get(1)
			
 
				+
			
 
				+            bca.CondKeyPress = bc.u_get(7)
			
 
				+            bca.CondOverDownToIdle = bc.u_get(1)
			
 
				+            bca.Actions = self._generic_action_parser()
			
 
				+
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_enabledebugger2(self):
			
 
				+        """Handle the EnableDebugger2 tag."""
			
 
				+        obj = _make_object("EnableDebugger2")
			
 
				+        obj.Reserved = unpack_ui16(self._src)
			
 
				+        obj.Password = self._get_struct_string()
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_scriptlimits(self):
			
 
				+        """Handle the ScriptLimits tag."""
			
 
				+        obj = _make_object("ScriptLimits")
			
 
				+        obj.MaxRecursionDepth = unpack_ui16(self._src)
			
 
				+        obj.ScriptTimeoutSeconds = unpack_ui16(self._src)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_framelabel(self):
			
 
				+        """Handle the FrameLabel tag."""
			
 
				+        obj = _make_object("FrameLabel")
			
 
				+        obj.Name = self._get_struct_string()
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_jpegtables(self):
			
 
				+        """Handle the JPEGTables tag."""
			
 
				+        obj = _make_object("JPEGTables")
			
 
				+        assert self._src.read(2) == b'\xFF\xD8'  # SOI marker
			
 
				+        eoimark1 = eoimark2 = None
			
 
				+        allbytes = [b'\xFF\xD8']
			
 
				+        while not (eoimark1 == b'\xFF' and eoimark2 == b'\xD9'):
			
 
				+            newbyte = self._src.read(1)
			
 
				+            allbytes.append(newbyte)
			
 
				+            eoimark1 = eoimark2
			
 
				+            eoimark2 = newbyte
			
 
				+
			
 
				+        # concatenate everything, removing the end mark
			
 
				+        obj.JPEGData = b"".join(allbytes[:-2])
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definefontalignzones(self):
			
 
				+        """Handle the DefineFontAlignZones tag."""
			
 
				+        obj = _make_object("DefineFontAlignZones")
			
 
				+        obj.FontId = unpack_ui16(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.CSMTableHint = bc.u_get(2)
			
 
				+        obj.Reserved = bc.u_get(6)
			
 
				+
			
 
				+        obj.ZoneTable = zone_records = []
			
 
				+        glyph_count = self._last_defined_glyphs_quantity
			
 
				+        self._last_defined_glyphs_quantity = None
			
 
				+        for _ in range(glyph_count):
			
 
				+            zone_record = _make_object("ZoneRecord")
			
 
				+            zone_records.append(zone_record)
			
 
				+            zone_record.NumZoneData = unpack_ui8(self._src)
			
 
				+            zone_record.ZoneData = zone_data = []
			
 
				+            for _ in range(zone_record.NumZoneData):
			
 
				+                zone_datum = _make_object("ZoneData")
			
 
				+                zone_data.append(zone_datum)
			
 
				+                zone_datum.AlignmentCoordinate = unpack_float16(self._src)
			
 
				+                zone_datum.Range = unpack_float16(self._src)
			
 
				+            bc = BitConsumer(self._src)
			
 
				+            zone_record.Reserved = bc.u_get(6)
			
 
				+            zone_record.ZoneMaskY = bc.u_get(1)
			
 
				+            zone_record.ZoneMaskX = bc.u_get(1)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_definefontname(self):
			
 
				+        """Handle the DefineFontName tag."""
			
 
				+        obj = _make_object("DefineFontName")
			
 
				+        obj.FontId = unpack_ui16(self._src)
			
 
				+        obj.FontName = self._get_struct_string()
			
 
				+        obj.FontCopyright = self._get_struct_string()
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_tag_csmtextsettings(self):
			
 
				+        """Handle the CSMTextSettings tag."""
			
 
				+        obj = _make_object("CSMTextSettings")
			
 
				+        obj.TextId = unpack_ui16(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.UseFlashType = bc.u_get(2)
			
 
				+        obj.GridFit = bc.u_get(3)
			
 
				+        obj.Reserved1 = bc.u_get(3)
			
 
				+        obj.Thickness = unpack_float(self._src)
			
 
				+        obj.Sharpness = unpack_float(self._src)
			
 
				+        obj.Reserved2 = unpack_ui8(self._src)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_raw_bytes(self, size, unzip=False):
			
 
				+        '''Get raw bytes data, optional uncompress with ZLIB'''
			
 
				+        pos = self._src.tell()
			
 
				+        try:
			
 
				+            # < 0: read until this pos
			
 
				+            if size < 0:
			
 
				+                assert abs(size) > pos
			
 
				+                size = abs(size) - pos
			
 
				+            data = self._src.read(size)
			
 
				+            if unzip:
			
 
				+                return zlib.decompress(data)
			
 
				+            else:
			
 
				+                return data
			
 
				+        except Exception:
			
 
				+            self._src.seek(pos, io.SEEK_SET)
			
 
				+            raise
			
 
				+
			
 
				+    def _get_struct_rect(self):
			
 
				+        """Get the RECT structure."""
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        nbits = bc.u_get(5)
			
 
				+        if self._read_twips:
			
 
				+            return tuple(bc.s_get(nbits) for _ in range(4))
			
 
				+        else:
			
 
				+            return tuple(bc.s_get(nbits) / 20.0 for _ in range(4))
			
 
				+
			
 
				+    def _get_struct_rgb(self):
			
 
				+        """Get the RGB structure."""
			
 
				+        return [unpack_ui8(self._src) for _ in range(3)]
			
 
				+
			
 
				+    def _get_struct_rgba(self):
			
 
				+        """Get the RGBA structure."""
			
 
				+        return [unpack_ui8(self._src) for _ in range(4)]
			
 
				+
			
 
				+    def _get_struct_langcode(self):
			
 
				+        """Get the LANGCODE structure."""
			
 
				+        code = unpack_ui8(self._src)
			
 
				+        return LANGCODES[code]
			
 
				+
			
 
				+    def _get_struct_kerningrecord(self, font_flags_wide_codes):
			
 
				+        """Get the KERNINGRECORD structure."""
			
 
				+        getter = unpack_ui16 if font_flags_wide_codes else unpack_ui8
			
 
				+        data = {}
			
 
				+        data['FontKerningCode1'] = getter(self._src)
			
 
				+        data['FontKerningCode2'] = getter(self._src)
			
 
				+        data['FontKerningAdjustment'] = unpack_si16(self._src)
			
 
				+        return data
			
 
				+
			
 
				+    def _get_struct_clipactions(self):
			
 
				+        """Get the several CLIPACTIONRECORDs."""
			
 
				+        obj = _make_object("ClipActions")
			
 
				+
			
 
				+        # In SWF 5 and earlier, these are 2 bytes wide; in SWF 6
			
 
				+        # and later 4 bytes
			
 
				+        clipeventflags_size = 2 if self._version <= 5 else 4
			
 
				+        clipactionend_size = 2 if self._version <= 5 else 4
			
 
				+        all_zero = b"\x00" * clipactionend_size
			
 
				+
			
 
				+        assert unpack_ui16(self._src) == 0  # reserved
			
 
				+        obj.AllEventFlags = self._src.read(clipeventflags_size)
			
 
				+
			
 
				+        obj.ClipActionRecords = records = []
			
 
				+        while True:
			
 
				+            next_bytes = self._src.read(clipactionend_size)
			
 
				+            if next_bytes == all_zero:
			
 
				+                # was the ClipActionEndFlag
			
 
				+                return
			
 
				+
			
 
				+            record = _make_object("ClipActionRecord")
			
 
				+            records.append(record)
			
 
				+
			
 
				+            # as event flags and end flag has same size, we can do this trick
			
 
				+            record.EventFlags = next_bytes
			
 
				+            record.ActionRecordSize = unpack_ui32(self._src)
			
 
				+            record.TheRestTODO = self._src.read(record.ActionRecordSize)
			
 
				+
			
 
				+            # FIXME: this struct needs more work; the EventFlags should be
			
 
				+            # expanded and each ActionRecord(s) should be detailed more
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_string(self):
			
 
				+        """Get the STRING structure."""
			
 
				+        data = []
			
 
				+        while True:
			
 
				+            t = self._src.read(1)
			
 
				+            if t == b'\x00':
			
 
				+                break
			
 
				+            data.append(t)
			
 
				+        val = b''.join(data)
			
 
				+        return val.decode("utf8")
			
 
				+
			
 
				+    def _get_struct_matrix(self):
			
 
				+        """Get the values for the MATRIX record."""
			
 
				+        obj = _make_object("Matrix")
			
 
				+        bc = BitConsumer(self._src)
			
 
				+
			
 
				+        # scale
			
 
				+        obj.HasScale = bc.u_get(1)
			
 
				+        if obj.HasScale:
			
 
				+            obj.NScaleBits = n_scale_bits = bc.u_get(5)
			
 
				+            obj.ScaleX = bc.fb_get(n_scale_bits)
			
 
				+            obj.ScaleY = bc.fb_get(n_scale_bits)
			
 
				+
			
 
				+        # rotate
			
 
				+        obj.HasRotate = bc.u_get(1)
			
 
				+        if obj.HasRotate:
			
 
				+            obj.NRotateBits = n_rotate_bits = bc.u_get(5)
			
 
				+            obj.RotateSkew0 = bc.fb_get(n_rotate_bits)
			
 
				+            obj.RotateSkew1 = bc.fb_get(n_rotate_bits)
			
 
				+
			
 
				+        # translate
			
 
				+        obj.NTranslateBits = n_translate_bits = bc.u_get(5)
			
 
				+        obj.TranslateX = bc.s_get(n_translate_bits)
			
 
				+        obj.TranslateY = bc.s_get(n_translate_bits)
			
 
				+        if not self._read_twips:
			
 
				+            obj.TranslateX /= 20.0
			
 
				+            obj.TranslateY /= 20.0
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_cxformwithalpha(self):
			
 
				+        """Get the values for the CXFORMWITHALPHA record."""
			
 
				+        obj = _make_object("CXformWithAlpha")
			
 
				+        bc = BitConsumer(self._src)
			
 
				+
			
 
				+        obj.HasAddTerms = bc.u_get(1)
			
 
				+        obj.HasMultTerms = bc.u_get(1)
			
 
				+        obj.NBits = nbits = bc.u_get(4)
			
 
				+
			
 
				+        if obj.HasMultTerms:
			
 
				+            obj.RedMultTerm = bc.s_get(nbits)
			
 
				+            obj.GreenMultTerm = bc.s_get(nbits)
			
 
				+            obj.BlueMultTerm = bc.s_get(nbits)
			
 
				+            obj.AlphaMultTerm = bc.s_get(nbits)
			
 
				+
			
 
				+        if obj.HasAddTerms:
			
 
				+            obj.RedAddTerm = bc.s_get(nbits)
			
 
				+            obj.GreenAddTerm = bc.s_get(nbits)
			
 
				+            obj.BlueAddTerm = bc.s_get(nbits)
			
 
				+            obj.AlphaAddTerm = bc.s_get(nbits)
			
 
				+
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_shaperecords(self, num_fill_bits,
			
 
				+                          num_line_bits, shape_number):
			
 
				+        """Return an array of SHAPERECORDS."""
			
 
				+        shape_records = []
			
 
				+        bc = BitConsumer(self._src)
			
 
				+
			
 
				+        while True:
			
 
				+            type_flag = bc.u_get(1)
			
 
				+            if type_flag:
			
 
				+                # edge record
			
 
				+                straight_flag = bc.u_get(1)
			
 
				+                num_bits = bc.u_get(4)
			
 
				+                if straight_flag:
			
 
				+                    record = _make_object('StraightEdgeRecord')
			
 
				+                    record.TypeFlag = 1
			
 
				+                    record.StraightFlag = 1
			
 
				+                    record.NumBits = num_bits
			
 
				+                    record.GeneralLineFlag = general_line_flag = bc.u_get(1)
			
 
				+                    if general_line_flag:
			
 
				+                        record.DeltaX = bc.s_get(num_bits + 2)
			
 
				+                        record.DeltaY = bc.s_get(num_bits + 2)
			
 
				+                    else:
			
 
				+                        record.VertLineFlag = vert_line_flag = bc.s_get(1)
			
 
				+                        if vert_line_flag:
			
 
				+                            record.DeltaY = bc.s_get(num_bits + 2)
			
 
				+                        else:
			
 
				+                            record.DeltaX = bc.s_get(num_bits + 2)
			
 
				+                else:
			
 
				+                    record = _make_object('CurvedEdgeRecord')
			
 
				+                    record.TypeFlag = 1
			
 
				+                    record.StraightFlag = 0
			
 
				+                    record.NumBits = num_bits
			
 
				+                    record.ControlDeltaX = bc.s_get(num_bits + 2)
			
 
				+                    record.ControlDeltaY = bc.s_get(num_bits + 2)
			
 
				+                    record.AnchorDeltaX = bc.s_get(num_bits + 2)
			
 
				+                    record.AnchorDeltaY = bc.s_get(num_bits + 2)
			
 
				+
			
 
				+            else:
			
 
				+                # non edge record
			
 
				+                record = _make_object('StyleChangeRecord')
			
 
				+                record.TypeFlag = 0
			
 
				+
			
 
				+                five_bits = [bc.u_get(1) for _ in range(5)]
			
 
				+                if not any(five_bits):
			
 
				+                    # the five bits are zero, this is an EndShapeRecord
			
 
				+                    break
			
 
				+
			
 
				+                # we're not done, store the proper flags
			
 
				+                (record.StateNewStyles, record.StateLineStyle,
			
 
				+                    record.StateFillStyle1, record.StateFillStyle0,
			
 
				+                    record.StateMoveTo) = five_bits
			
 
				+
			
 
				+                if record.StateMoveTo:
			
 
				+                    record.MoveBits = move_bits = bc.u_get(5)
			
 
				+                    record.MoveDeltaX = bc.s_get(move_bits)
			
 
				+                    record.MoveDeltaY = bc.s_get(move_bits)
			
 
				+                if record.StateFillStyle0:
			
 
				+                    record.FillStyle0 = bc.u_get(num_fill_bits)
			
 
				+                if record.StateFillStyle1:
			
 
				+                    record.FillStyle1 = bc.u_get(num_fill_bits)
			
 
				+                if record.StateLineStyle:
			
 
				+                    record.LineStyle = bc.u_get(num_line_bits)
			
 
				+
			
 
				+                if record.StateNewStyles:
			
 
				+                    record.FillStyles = self._get_struct_fillstylearray(
			
 
				+                        shape_number)
			
 
				+                    record.LineStyles = self._get_struct_linestylearray(
			
 
				+                        shape_number)
			
 
				+                    # these two not only belong to the record, but also
			
 
				+                    # modifies the number of bits read in the future
			
 
				+                    # if shape number bigs enough (didn't find this in the
			
 
				+                    # spec, but works for now, maybe '2' is not the limit...)
			
 
				+                    if shape_number > 2:
			
 
				+                        record.NumFillBits = num_fill_bits = bc.u_get(4)
			
 
				+                        record.NumLineBits = num_line_bits = bc.u_get(4)
			
 
				+                    else:
			
 
				+                        record.NumFillBits = bc.u_get(4)
			
 
				+                        record.NumLineBits = bc.u_get(4)
			
 
				+
			
 
				+                    # reset the BC here, as the structures just read work at
			
 
				+                    # byte level
			
 
				+                    bc = BitConsumer(self._src)
			
 
				+
			
 
				+            shape_records.append(record)
			
 
				+        return shape_records
			
 
				+
			
 
				+    def _get_struct_shape(self):
			
 
				+        """Get the values for the SHAPE record."""
			
 
				+        obj = _make_object("Shape")
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.NumFillBits = n_fill_bits = bc.u_get(4)
			
 
				+        obj.NumLineBits = n_line_bits = bc.u_get(4)
			
 
				+        obj.ShapeRecords = self._get_shaperecords(
			
 
				+            n_fill_bits, n_line_bits, 0)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_fillstyle(self, shape_number):
			
 
				+        """Get the values for the FILLSTYLE record."""
			
 
				+        obj = _make_object("FillStyle")
			
 
				+        obj.FillStyleType = style_type = unpack_ui8(self._src)
			
 
				+
			
 
				+        if style_type == 0x00:
			
 
				+            if shape_number <= 2:
			
 
				+                obj.Color = self._get_struct_rgb()
			
 
				+            else:
			
 
				+                obj.Color = self._get_struct_rgba()
			
 
				+
			
 
				+        if style_type in (0x10, 0x12, 0x13):
			
 
				+            obj.GradientMatrix = self._get_struct_matrix()
			
 
				+
			
 
				+        if style_type in (0x10, 0x12):
			
 
				+            obj.Gradient = self._get_struct_gradient(shape_number)
			
 
				+        if style_type == 0x13:
			
 
				+            obj.Gradient = self._get_struct_focalgradient(shape_number)
			
 
				+
			
 
				+        if style_type in (0x40, 0x41, 0x42, 0x43):
			
 
				+            obj.BitmapId = unpack_ui16(self._src)
			
 
				+            obj.BitmapMatrix = self._get_struct_matrix()
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_fillstylearray(self, shape_number):
			
 
				+        """Get the values for the FILLSTYLEARRAY record."""
			
 
				+        obj = _make_object("FillStyleArray")
			
 
				+        obj.FillStyleCount = count = unpack_ui8(self._src)
			
 
				+        if count == 0xFF:
			
 
				+            obj.FillStyleCountExtended = count = unpack_ui16(self._src)
			
 
				+        obj.FillStyles = [self._get_struct_fillstyle(shape_number)
			
 
				+                          for _ in range(count)]
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_linestylearray(self, shape_number):
			
 
				+        """Get the values for the LINESTYLEARRAY record."""
			
 
				+        obj = _make_object("LineStyleArray")
			
 
				+        obj.LineStyleCount = count = unpack_ui8(self._src)
			
 
				+        if count == 0xFF:
			
 
				+            obj.LineStyleCountExtended = count = unpack_ui16(self._src)
			
 
				+        obj.LineStyles = line_styles = []
			
 
				+
			
 
				+        for _ in range(count):
			
 
				+            if shape_number <= 3:
			
 
				+                record = _make_object("LineStyle")
			
 
				+                record.Width = unpack_ui16(self._src)
			
 
				+                if shape_number <= 2:
			
 
				+                    record.Color = self._get_struct_rgb()
			
 
				+                else:
			
 
				+                    record.Color = self._get_struct_rgba()
			
 
				+            else:
			
 
				+                record = _make_object("LineStyle2")
			
 
				+                record.Width = unpack_ui16(self._src)
			
 
				+
			
 
				+                bc = BitConsumer(self._src)
			
 
				+                record.StartCapStyle = bc.u_get(2)
			
 
				+                record.JoinStyle = bc.u_get(2)
			
 
				+                record.HasFillFlag = bc.u_get(1)
			
 
				+                record.NoHScaleFlag = bc.u_get(1)
			
 
				+                record.NoVScaleFlag = bc.u_get(1)
			
 
				+                record.PixelHintingFlag = bc.u_get(1)
			
 
				+
			
 
				+                bc.u_get(5)  # reserved
			
 
				+                record.NoClose = bc.u_get(1)
			
 
				+                record.EndCapStyle = bc.u_get(2)
			
 
				+
			
 
				+                if record.JoinStyle == 2:
			
 
				+                    record.MiterLimitFactor = unpack_ui16(self._src)
			
 
				+                if record.HasFillFlag == 0:
			
 
				+                    record.Color = self._get_struct_rgba()
			
 
				+                else:
			
 
				+                    record.Color = self._get_struct_fillstyle(shape_number)
			
 
				+
			
 
				+            line_styles.append(record)
			
 
				+
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_encodedu32(self):
			
 
				+        """Get a EncodedU32 number."""
			
 
				+        useful = []
			
 
				+        while True:
			
 
				+            byte = ord(self._src.read(1))
			
 
				+            useful.append(byte)
			
 
				+            if byte < 127:
			
 
				+                # got all the useful bytes
			
 
				+                break
			
 
				+
			
 
				+        # transform into bits reordering the bytes
			
 
				+        useful = ['00000000' + bin(b)[2:] for b in useful[::-1]]
			
 
				+
			
 
				+        # get the top 7 (*seven*, as the eight one is the flag) and convert
			
 
				+        return int(''.join([b[-7:] for b in useful]), 2)
			
 
				+
			
 
				+    def _get_struct_shapewithstyle(self, shape_number):
			
 
				+        """Get the values for the SHAPEWITHSTYLE record."""
			
 
				+        obj = _make_object("ShapeWithStyle")
			
 
				+        obj.FillStyles = self._get_struct_fillstylearray(shape_number)
			
 
				+        obj.LineStyles = self._get_struct_linestylearray(shape_number)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.NumFillBits = n_fill_bits = bc.u_get(4)
			
 
				+        obj.NumlineBits = n_line_bits = bc.u_get(4)
			
 
				+        obj.ShapeRecords = self._get_shaperecords(
			
 
				+            n_fill_bits, n_line_bits, shape_number)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_gradient(self, shape_number):
			
 
				+        """Get the values for the GRADIENT record."""
			
 
				+        obj = _make_object("Gradient")
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.SpreadMode = bc.u_get(2)
			
 
				+        obj.InterpolationMode = bc.u_get(2)
			
 
				+        obj.NumGradients = bc.u_get(4)
			
 
				+        obj.GradientRecords = gradient_records = []
			
 
				+
			
 
				+        for _ in range(obj.NumGradients):
			
 
				+            record = _make_object("GradRecord")
			
 
				+            gradient_records.append(record)
			
 
				+            record.Ratio = unpack_ui8(self._src)
			
 
				+            if shape_number <= 2:
			
 
				+                record.Color = self._get_struct_rgb()
			
 
				+            else:
			
 
				+                record.Color = self._get_struct_rgba()
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_focalgradient(self, shape_number):
			
 
				+        """Get the values for the FOCALGRADIENT record."""
			
 
				+        obj = _make_object("FocalGradient")
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.SpreadMode = bc.u_get(2)
			
 
				+        obj.InterpolationMode = bc.u_get(2)
			
 
				+        obj.NumGradients = bc.u_get(4)
			
 
				+        obj.GradientRecords = gradient_records = []
			
 
				+
			
 
				+        for _ in range(obj.NumGradients):
			
 
				+            record = _make_object("GradRecord")
			
 
				+            gradient_records.append(record)
			
 
				+            record.Ratio = unpack_ui8(self._src)
			
 
				+            if shape_number <= 2:
			
 
				+                record.Color = self._get_struct_rgb()
			
 
				+            else:
			
 
				+                record.Color = self._get_struct_rgba()
			
 
				+
			
 
				+        obj.FocalPoint = unpack_fixed8(self._src)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_filterlist(self):
			
 
				+        """Get the values for the FILTERLIST record."""
			
 
				+        obj = _make_object("FilterList")
			
 
				+        obj.NumberOfFilters = unpack_ui8(self._src)
			
 
				+        obj.Filter = filters = []
			
 
				+        # how to decode each filter type (and name), according to the filter id
			
 
				+        filter_type = [
			
 
				+            ("DropShadowFilter", self._get_struct_dropshadowfilter),  # 0
			
 
				+            ("BlurFilter", self._get_struct_blurfilter),  # 1
			
 
				+            ("GlowFilter", self._get_struct_glowfilter),  # 2...
			
 
				+            ("BevelFilter", self._get_struct_bevelfilter),
			
 
				+            ("GradientGlowFilter", self._get_struct_gradientglowfilter),
			
 
				+            ("ConvolutionFilter", self._get_struct_convolutionfilter),
			
 
				+            ("ColorMatrixFilter", self._get_struct_colormatrixfilter),
			
 
				+            ("GradientBevelFilter", self._get_struct_gradientbevelfilter),  # 7
			
 
				+        ]
			
 
				+
			
 
				+        for _ in range(obj.NumberOfFilters):
			
 
				+            _filter = _make_object("Filter")
			
 
				+            filters.append(_filter)
			
 
				+
			
 
				+            _filter.FilterId = unpack_ui8(self._src)
			
 
				+            name, func = filter_type[_filter.FilterId]
			
 
				+            setattr(_filter, name, func())
			
 
				+
			
 
				+    def _get_struct_dropshadowfilter(self):
			
 
				+        """Get the values for the DROPSHADOWFILTER record."""
			
 
				+        obj = _make_object("DropShadowFilter")
			
 
				+        obj.DropShadowColor = self._get_struct_rgba()
			
 
				+        obj.BlurX = unpack_fixed16(self._src)
			
 
				+        obj.BlurY = unpack_fixed16(self._src)
			
 
				+        obj.Angle = unpack_fixed16(self._src)
			
 
				+        obj.Distance = unpack_fixed16(self._src)
			
 
				+        obj.Strength = unpack_fixed8(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.InnerShadow = bc.u_get(1)
			
 
				+        obj.Knockout = bc.u_get(1)
			
 
				+        obj.CompositeSource = bc.u_get(1)
			
 
				+        obj.Passes = bc.u_get(5)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_blurfilter(self):
			
 
				+        """Get the values for the BLURFILTER record."""
			
 
				+        obj = _make_object("BlurFilter")
			
 
				+        obj.BlurX = unpack_fixed16(self._src)
			
 
				+        obj.BlurY = unpack_fixed16(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.Passes = bc.u_get(5)
			
 
				+        obj.Reserved = bc.u_get(3)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_glowfilter(self):
			
 
				+        """Get the values for the GLOWFILTER record."""
			
 
				+        obj = _make_object("GlowFilter")
			
 
				+        obj.GlowColor = self._get_struct_rgba()
			
 
				+        obj.BlurX = unpack_fixed16(self._src)
			
 
				+        obj.BlurY = unpack_fixed16(self._src)
			
 
				+        obj.Strength = unpack_fixed8(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.InnerGlow = bc.u_get(1)
			
 
				+        obj.Knockout = bc.u_get(1)
			
 
				+        obj.CompositeSource = bc.u_get(1)
			
 
				+        obj.Passes = bc.u_get(5)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_bevelfilter(self):
			
 
				+        """Get the values for the BEVELFILTER record."""
			
 
				+        obj = _make_object("BevelFilter")
			
 
				+        obj.ShadowColor = self._get_struct_rgba()
			
 
				+        obj.HighlightColor = self._get_struct_rgba()
			
 
				+        obj.BlurX = unpack_fixed16(self._src)
			
 
				+        obj.BlurY = unpack_fixed16(self._src)
			
 
				+        obj.Angle = unpack_fixed16(self._src)
			
 
				+        obj.Distance = unpack_fixed16(self._src)
			
 
				+        obj.Strength = unpack_fixed8(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.InnerShadow = bc.u_get(1)
			
 
				+        obj.Knockout = bc.u_get(1)
			
 
				+        obj.CompositeSource = bc.u_get(1)
			
 
				+        obj.OnTop = bc.u_get(1)
			
 
				+        obj.Passes = bc.u_get(4)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_gradientglowfilter(self):
			
 
				+        """Get the values for the GRADIENTGLOWFILTER record."""
			
 
				+        obj = _make_object("GradientGlowFilter")
			
 
				+        obj.NumColors = num_colors = unpack_ui8(self._src)
			
 
				+        obj.GradientColors = [self._get_struct_rgba()
			
 
				+                              for _ in range(num_colors)]
			
 
				+        obj.GradientRatio = [unpack_ui8(self._src)
			
 
				+                             for _ in range(num_colors)]
			
 
				+        obj.BlurX = unpack_fixed16(self._src)
			
 
				+        obj.BlurY = unpack_fixed16(self._src)
			
 
				+        obj.Angle = unpack_fixed16(self._src)
			
 
				+        obj.Distance = unpack_fixed16(self._src)
			
 
				+        obj.Strength = unpack_fixed8(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.InnerShadow = bc.u_get(1)
			
 
				+        obj.Knockout = bc.u_get(1)
			
 
				+        obj.CompositeSource = bc.u_get(1)
			
 
				+        obj.OnTop = bc.u_get(1)
			
 
				+        obj.Passes = bc.u_get(4)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_convolutionfilter(self):
			
 
				+        """Get the values for the CONVOLUTIONFILTER record."""
			
 
				+        obj = _make_object("ConvolutionFilter")
			
 
				+        obj.MatrixX = unpack_ui8(self._src)
			
 
				+        obj.MatrixY = unpack_ui8(self._src)
			
 
				+        obj.Divisor = unpack_float(self._src)
			
 
				+        obj.Bias = unpack_float(self._src)
			
 
				+
			
 
				+        _quant = obj.MatrixX * obj.MatrixY
			
 
				+        obj.Matrix = [unpack_float(self._src) for _ in range(_quant)]
			
 
				+
			
 
				+        obj.DefaultColor = self._get_struct_rgba()
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.Reserved = bc.u_get(6)
			
 
				+        obj.Clamp = bc.u_get(1)
			
 
				+        obj.PreserveAlpha = bc.u_get(1)
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_colormatrixfilter(self):
			
 
				+        """Get the values for the COLORMATRIXFILTER record."""
			
 
				+        obj = _make_object("ColorMatrixFilter")
			
 
				+        obj.Matrix = [unpack_float(self._src) for _ in range(20)]
			
 
				+        return obj
			
 
				+
			
 
				+    def _get_struct_gradientbevelfilter(self):
			
 
				+        """Get the values for the GRADIENTBEVELFILTER record."""
			
 
				+        obj = _make_object("GradientBevelFilter")
			
 
				+        obj.NumColors = num_colors = unpack_ui8(self._src)
			
 
				+        obj.GradientColors = [self._get_struct_rgba()
			
 
				+                              for _ in range(num_colors)]
			
 
				+        obj.GradientRatio = [unpack_ui8(self._src)
			
 
				+                             for _ in range(num_colors)]
			
 
				+        obj.BlurX = unpack_fixed16(self._src)
			
 
				+        obj.BlurY = unpack_fixed16(self._src)
			
 
				+        obj.Angle = unpack_fixed16(self._src)
			
 
				+        obj.Distance = unpack_fixed16(self._src)
			
 
				+        obj.Strength = unpack_fixed8(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.InnerShadow = bc.u_get(1)
			
 
				+        obj.Knockout = bc.u_get(1)
			
 
				+        obj.CompositeSource = bc.u_get(1)
			
 
				+        obj.OnTop = bc.u_get(1)
			
 
				+        obj.Passes = bc.u_get(4)
			
 
				+        return obj
			
 
				+
			
 
				+    def _handle_actionconstantpool(self, _):
			
 
				+        """Handle the ActionConstantPool action."""
			
 
				+        obj = _make_object("ActionConstantPool")
			
 
				+        obj.Count = count = unpack_ui16(self._src)
			
 
				+        obj.ConstantPool = pool = []
			
 
				+        for _ in range(count):
			
 
				+            pool.append(self._get_struct_string())
			
 
				+        yield obj
			
 
				+
			
 
				+    def _handle_actiongeturl(self, _):
			
 
				+        """Handle the ActionGetURL action."""
			
 
				+        obj = _make_object("ActionGetURL")
			
 
				+        obj.UrlString = self._get_struct_string()
			
 
				+        obj.TargetString = self._get_struct_string()
			
 
				+        yield obj
			
 
				+
			
 
				+    def _handle_actionpush(self, length):
			
 
				+        """Handle the ActionPush action."""
			
 
				+        init_pos = self._src.tell()
			
 
				+        while self._src.tell() < init_pos + length:
			
 
				+            obj = _make_object("ActionPush")
			
 
				+            obj.Type = unpack_ui8(self._src)
			
 
				+            # name and how to read each type
			
 
				+            push_types = {
			
 
				+                0: ("String", self._get_struct_string),
			
 
				+                1: ("Float", lambda: unpack_float(self._src)),
			
 
				+                2: ("Null", lambda: None),
			
 
				+                4: ("RegisterNumber", lambda: unpack_ui8(self._src)),
			
 
				+                5: ("Boolean", lambda: unpack_ui8(self._src)),
			
 
				+                6: ("Double", lambda: unpack_double(self._src)),
			
 
				+                7: ("Integer", lambda: unpack_ui32(self._src)),
			
 
				+                8: ("Constant8", lambda: unpack_ui8(self._src)),
			
 
				+                9: ("Constant16", lambda: unpack_ui16(self._src)),
			
 
				+            }
			
 
				+            name, func = push_types[obj.Type]
			
 
				+            setattr(obj, name, func())
			
 
				+            yield obj
			
 
				+
			
 
				+    def _handle_actiondefinefunction(self, _):
			
 
				+        """Handle the ActionDefineFunction action."""
			
 
				+        obj = _make_object("ActionDefineFunction")
			
 
				+        obj.FunctionName = self._get_struct_string()
			
 
				+        obj.NumParams = unpack_ui16(self._src)
			
 
				+        for i in range(1, obj.NumParams + 1):
			
 
				+            setattr(obj, "param" + str(i), self._get_struct_string())
			
 
				+        obj.CodeSize = unpack_ui16(self._src)
			
 
				+        yield obj
			
 
				+
			
 
				+    def _handle_actionif(self, _):
			
 
				+        """Handle the ActionIf action."""
			
 
				+        obj = _make_object("ActionIf")
			
 
				+        obj.BranchOffset = unpack_si16(self._src)
			
 
				+        yield obj
			
 
				+
			
 
				+    def _handle_actiondefinefunction2(self, _):
			
 
				+        """Handle the ActionDefineFunction2 action."""
			
 
				+        obj = _make_object("ActionDefineFunction2")
			
 
				+        obj.FunctionName = self._get_struct_string()
			
 
				+        obj.NumParams = unpack_ui16(self._src)
			
 
				+        obj.RegisterCount = unpack_ui8(self._src)
			
 
				+        bc = BitConsumer(self._src)
			
 
				+        obj.PreloadParentFlag = bc.u_get(1)
			
 
				+        obj.PreloadRootFlag = bc.u_get(1)
			
 
				+        obj.SupressSuperFlag = bc.u_get(1)
			
 
				+        obj.PreloadSuperFlag = bc.u_get(1)
			
 
				+        obj.SupressArgumentsFlag = bc.u_get(1)
			
 
				+        obj.PreloadArgumentsFlag = bc.u_get(1)
			
 
				+        obj.SupressThisFlag = bc.u_get(1)
			
 
				+        obj.PreloadThisFlag = bc.u_get(1)
			
 
				+        obj.Reserved = bc.u_get(7)
			
 
				+        obj.PreloadGlobalFlag = bc.u_get(1)
			
 
				+        obj.Parameters = parameters = []
			
 
				+        for _ in range(obj.NumParams):
			
 
				+            parameter = _make_object("Parameter")
			
 
				+            parameters.append(parameter)
			
 
				+            parameter.Register = unpack_ui8(self._src)
			
 
				+            parameter.ParamName = self._get_struct_string()
			
 
				+        obj.CodeSize = unpack_ui16(self._src)
			
 
				+        yield obj
			
 
				+
			
 
				+    def coverage(self):
			
 
				+        """Calculate the coverage of a file."""
			
 
				+        items_unk = collections.Counter()
			
 
				+        items_ok = collections.Counter()
			
 
				+
			
 
				+        def _go_deep(obj):
			
 
				+            """Recursive function to find internal attributes."""
			
 
				+            if type(obj).__name__ in ('UnknownObject', 'UnknownAction'):
			
 
				+                # blatantly unknown
			
 
				+                items_unk[obj.name] += 1
			
 
				+            elif obj.name in ('DefineMorphShape2', 'ClipActions'):
			
 
				+                # these are incomplete, see FIXMEs in the code above
			
 
				+                items_unk[obj.name] += 1
			
 
				+            else:
			
 
				+                # fully parsed
			
 
				+                items_ok[obj.name] += 1
			
 
				+
			
 
				+            for name in obj._attribs:
			
 
				+                attr = getattr(obj, name)
			
 
				+                if isinstance(attr, SWFObject):
			
 
				+                    _go_deep(attr)
			
 
				+
			
 
				+        for tag in self.tags:
			
 
				+            _go_deep(tag)
			
 
				+
			
 
				+        full_count = sum(items_ok.values()) + sum(items_unk.values())
			
 
				+        coverage = 100 * sum(items_ok.values()) / full_count
			
 
				+        print("Coverage is {:.1f}% of {} total items".format(coverage,
			
 
				+                                                             full_count))
			
 
				+        print("Most common parsed objects:")
			
 
				+        for k, v in items_ok.most_common(3):
			
 
				+            print("{:5d} {}".format(v, k))
			
 
				+        if items_unk:
			
 
				+            print("Most common Unknown objects")
			
 
				+            for k, v in items_unk.most_common(3):
			
 
				+                print("{:5d} {}".format(v, k))
			
 
				+
			
 
				+
			
 
				+def parsefile(filename, read_twips=True):
			
 
				+    """Parse a SWF.
			
 
				+
			
 
				+    If you have a file object already, just use SWFParser directly.
			
 
				+
			
 
				+    read_twips: True  - return values as read from the SWF
			
 
				+                False - return values in pixels (at 100% zoom)
			
 
				+    """
			
 
				+    with open(filename, 'rb') as fh:
			
 
				+        return SWFParser(fh, read_twips)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import cv2
			
 
				+    import numpy as np
			
 
				+    import time
			
 
				+    import traceback
			
 
				+    from PIL import Image
			
 
				+    from format_convert.utils import pil2np
			
 
				+
			
 
				+    start_time = time.time()
			
 
				+    p = "C:/Users/Administrator/Downloads/13035f4a379c4d24b89835456e047c14.swf"
			
 
				+    # p = "C:/Users/Administrator/Desktop/test_swf/error1.swf"
			
 
				+    swf_parser = parsefile(p)
			
 
				+
			
 
				+    index = 0
			
 
				+    for tag in swf_parser.tags:
			
 
				+        try:
			
 
				+            if not hasattr(tag, 'ImageData'):
			
 
				+                continue
			
 
				+            byte_data = tag.ImageData
			
 
				+            with open('images/' + str(index) + '.png', 'wb') as f:
			
 
				+                f.write(byte_data)
			
 
				+            # with open('images/' + str(index) + '.txt', 'w') as f:
			
 
				+            #     f.write(str(byte_data))
			
 
				+
			
 
				+            image = Image.open('images/' + str(index) + '.png')
			
 
				+            # image_np = pil2np(image)
			
 
				+            print(index, image.size)
			
 
				+            if image.size[0] > 1000 and image.size[1] > 1000:
			
 
				+                image = image.resize((600, 1000), Image.BILINEAR)
			
 
				+            image.save('images/' + str(index) + '.png', quality=10, )
			
 
				+            #
			
 
				+            # with open('images/' + str(index) + '.png', 'rb') as f:
			
 
				+            #     byte_data = f.read()
			
 
				+            # with open('images/' + str(index) + '.txt', 'w') as f:
			
 
				+            #     f.write(str(byte_data))
			
 
				+
			
 
				+        except:
			
 
				+            traceback.print_exc()
			
 
				+        index += 1
			
 
				+    print(time.time()-start_time)
			
--- a/idc/idc_interface.py
+++ b/idc/idc_interface.py
@@ -8,10 +8,9 @@ import traceback
 
				 from glob import glob
			
 
				 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
			
 
				 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 import tensorflow as tf
			
 
				 
			
 
				-MAX_COMPUTE = max_compute
			
 
				 
			
 
				 if not MAX_COMPUTE:
			
 
				     # tensorflow 内存设置
			
--- a/isr/isr_interface.py
+++ b/isr/isr_interface.py
@@ -6,10 +6,9 @@ import sys
 
				 import traceback
			
 
				 # os.environ["CUDA_VISIBLE_DEVICES"] = "1"
			
 
				 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 import tensorflow as tf
			
 
				 tf.compat.v1.disable_eager_execution()
			
 
				-MAX_COMPUTE = max_compute
			
 
				 
			
 
				 if not MAX_COMPUTE:
			
 
				     # tensorflow 内存设置
			
--- a/monitor/monitor_main_interface.py
+++ b/monitor/monitor_main_interface.py
@@ -0,0 +1,68 @@
 
				+import os
			
 
				+import re
			
 
				+import time
			
 
				+import psutil
			
 
				+import subprocess
			
 
				+from datetime import datetime, timedelta
			
 
				+
			
 
				+
			
 
				+def monitor():
			
 
				+    pid_list = psutil.pids()
			
 
				+    main_pid_list = []
			
 
				+    for pid in pid_list:
			
 
				+        try:
			
 
				+            process = psutil.Process(pid)
			
 
				+        except:
			
 
				+            continue
			
 
				+        process_cmd = ''
			
 
				+        for c in process.cmdline():
			
 
				+            process_cmd += c + " "
			
 
				+        if process_cmd.strip() == "":
			
 
				+            continue
			
 
				+
			
 
				+        if re.search('convert:app', process_cmd):
			
 
				+            # print(pid, process_cmd)
			
 
				+            main_pid_list.append(pid)
			
 
				+
			
 
				+    main_pid_list.sort(key=lambda x: x)
			
 
				+    print('main_pid_list', main_pid_list)
			
 
				+
			
 
				+    now = datetime.now()
			
 
				+    last_10_min = now - timedelta(minutes=10)
			
 
				+    now = now.strftime("%Y-%m-%d %H:%M:%S")
			
 
				+    last_10_min = last_10_min.strftime("%Y-%m-%d %H:%M:%S")
			
 
				+    now = now[:-4] + '0:00'
			
 
				+    last_10_min = last_10_min[:-4] + '0:00'
			
 
				+
			
 
				+    command = "sed -n '/%s/,/%s/p' /convert.out" % (last_10_min, now)
			
 
				+    print('command', command)
			
 
				+
			
 
				+    result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
			
 
				+    text = result.stdout
			
 
				+    text = str(text).split('\n')
			
 
				+
			
 
				+    all_free_time = 0
			
 
				+    for pid in main_pid_list:
			
 
				+        print('pid', pid)
			
 
				+        time_len = len('2024-06-07 10:15:12')
			
 
				+        time_finish = None
			
 
				+        free_time = 0
			
 
				+        for line in text:
			
 
				+            line = str(line)
			
 
				+            try:
			
 
				+                if re.search(str(pid), line):
			
 
				+                    if time_finish is not None and re.search('into convert', line):
			
 
				+                        free_time += (datetime.strptime(line[:time_len], "%Y-%m-%d %H:%M:%S")-time_finish).seconds
			
 
				+                        # print('time_finish', str(time_finish), 'time_start', line[:time_len])
			
 
				+                        # print('add free time', free_time)
			
 
				+                    if re.search('is_success', line):
			
 
				+                        time_finish = datetime.strptime(line[:time_len], "%Y-%m-%d %H:%M:%S")
			
 
				+                        # print('set time_finish', line[:time_len])
			
 
				+            except:
			
 
				+                continue
			
 
				+        all_free_time += free_time
			
 
				+        print(pid, 'free time in 10 min:', free_time)
			
 
				+    print(round(all_free_time / len(main_pid_list), 2))
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    monitor()
			
--- a/monitor/watch_10_minutes_process.sh
+++ b/monitor/watch_10_minutes_process.sh
@@ -0,0 +1,3 @@
 
				+#!/bin/bash
			
 
				+
			
 
				+sed -n '/2024-05-29 17:30:00/,/2024-05-29 17:40:00/p' /convert.out | grep 'is_success' | wc -l
			
--- a/ocr/paddleocr.py
+++ b/ocr/paddleocr.py
@@ -31,7 +31,7 @@ from tqdm import tqdm
 
				 os.environ['FLAGS_eager_delete_tensor_gb'] = '0'
			
 
				 from ocr.tools.infer import predict_system
			
 
				 from ocr.ppocr.utils.logging import get_logger
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 
			
 
				 logger = get_logger()
			
 
				 from ocr.ppocr.utils.utility import check_and_read_gif, get_image_file_list
			
@@ -188,7 +188,7 @@ def parse_args(mMain=True, add_help=True):
 
				         parser.add_argument("--use_angle_cls", type=str2bool, default=False)
			
 
				         return parser.parse_args()
			
 
				     else:
			
 
				-        if max_compute:
			
 
				+        if MAX_COMPUTE:
			
 
				             use_gpu = False
			
 
				         else:
			
 
				             use_gpu = True
			
--- a/ocr/tools/infer/predict_det_pytorch.py
+++ b/ocr/tools/infer/predict_det_pytorch.py
@@ -32,14 +32,13 @@ from ocr.ppocr.utils.logging import get_logger
 
				 from ocr.ppocr.utils.utility import get_image_file_list, check_and_read_gif
			
 
				 from ocr.ppocr.data import create_operators, transform
			
 
				 from ocr.ppocr.postprocess import build_post_process
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 
			
 
				 import torch
			
 
				 from torch import nn
			
 
				 from ocr.tools.infer.torch_det_model import DB_ResNet_18
			
 
				 import gc
			
 
				 
			
 
				-MAX_COMPUTE = max_compute
			
 
				 logger = get_logger()
			
 
				 
			
 
				 
			
@@ -196,7 +195,7 @@ class TextDetector(object):
 
				         img = img.to(self.device)
			
 
				         try:
			
 
				             # 加锁，防止太多大图片同时预测，爆显存
			
 
				-            if ori_im.shape[0] > 1024 and ori_im.shape[1] > 1024 and get_platform() != "Windows" and not max_compute:
			
 
				+            if ori_im.shape[0] > 1024 and ori_im.shape[1] > 1024 and get_platform() != "Windows" and not MAX_COMPUTE:
			
 
				                 time2 = time.time()
			
 
				                 lock_file_sub = 'ocr'
			
 
				                 lock_file = os.path.abspath(os.path.dirname(__file__)) + "/" + lock_file_sub + ".lock"
			
--- a/otr/otr_interface.py
+++ b/otr/otr_interface.py
@@ -7,10 +7,9 @@ import traceback
 
				 # os.environ['TF_XLA_FLAGS'] = '--tf_xla_cpu_global_jit'
			
 
				 # os.environ['CUDA_VISIBLE_DEVICES'] = "0"
			
 
				 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				-from format_convert.max_compute_config import max_compute
			
 
				+from config.max_compute_config import MAX_COMPUTE
			
 
				 import tensorflow as tf
			
 
				 
			
 
				-MAX_COMPUTE = max_compute
			
 
				 
			
 
				 if not MAX_COMPUTE:
			
 
				     # tensorflow 内存设置
			
--- a/start_and_stop/kill_all.py
+++ b/start_and_stop/kill_all.py
--- a/start_and_stop/kill_all.sh
+++ b/start_and_stop/kill_all.sh
--- a/start_and_stop/kill_main.sh
+++ b/start_and_stop/kill_main.sh
--- a/start_and_stop/kill_office.py
+++ b/start_and_stop/kill_office.py
--- a/format_convert/monitor_process_config.py
+++ b/format_convert/monitor_process_config.py
@@ -7,6 +7,7 @@ import time
 
				 import psutil
			
 
				 sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				 from format_convert.utils import get_ip_port, get_intranet_ip, get_args_from_config, get_all_ip, get_using_ip
			
 
				+from config.interface_list import INTERFACES
			
 
				 
			
 
				 # 解析配置文件
			
 
				 ip_port_dict = get_ip_port()
			
@@ -22,7 +23,8 @@ std_out_schedule = " >>/schedule.out 2>&1 &"
 
				 python_path = get_args_from_config(ip_port_dict, ip, "python_path")[0]
			
 
				 project_path = get_args_from_config(ip_port_dict, ip, "project_path")[0]
			
 
				 gunicorn_path = get_args_from_config(ip_port_dict, ip, "gunicorn_path")[0]
			
 
				-interface_list = ['convert', 'ocr', 'otr', 'idc', 'isr', 'atc', 'yolo', 'office']
			
 
				+# interface_list = ['convert', 'ocr', 'otr', 'idc', 'isr', 'atc', 'yolo', 'office', 'tika']
			
 
				+interface_list = INTERFACES
			
 
				 comm_dict = {}
			
 
				 interface_port_dict = {}
			
 
				 for name in interface_list:
			
@@ -57,6 +59,8 @@ for name in interface_list:
 
				             for office_port in range(port, port + port_num):
			
 
				                 office_port_comm_list.append(re.sub("#", str(office_port), comm))
			
 
				             comm_dict[name] = office_port_comm_list
			
 
				+        elif name == 'tika':
			
 
				+            comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 300 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "/" + name + '_ ' + name + "_interface:app" + std_out_gpu
			
 
				         else:
			
 
				             comm = "nohup " + gunicorn_path + " -w " + str(port_num) + " -t 300 --keep-alive 600 -b 0.0.0.0:" + str(port) + " --chdir " + project_path + "/" + name + ' ' + name + "_interface:app" + std_out_gpu
			
 
				 
			
--- a/tika_/doc.html
+++ b/tika_/doc.html
@@ -0,0 +1,47 @@
 
				+<html xmlns="http://www.w3.org/1999/xhtml">
			
 
				+<head>
			
 
				+<meta charset="UTF-8">
			
 
				+<title>投标供应商报名表</title>
			
 
				+</head>
			
 
				+<body><div class="header" />
			
 
				+<p><b>投标供应商报名表</b></p>
			
 
				+<table border="1"><tbody><tr>	<td><p>项目名称</p>
			
 
				+</td>	<td><p>华丰村华中、华群小区邻里中心厨房设备采购项目</p>
			
 
				+</td></tr>
			
 
				+<tr>	<td><p>项目编号</p>
			
 
				+</td>	<td><p>ZDGC2024-017</p>
			
 
				+</td>	<td><p>所投标项</p>
			
 
				+</td>	<td><p>/</p>
			
 
				+</td></tr>
			
 
				+<tr>	<td><p>投标单位全称</p>
			
 
				+</td>	<td><p />
			
 
				+</td></tr>
			
 
				+<tr>	<td><p>通信地址</p>
			
 
				+</td>	<td><p />
			
 
				+</td></tr>
			
 
				+<tr>	<td><p>投标人开票资料</p>
			
 
				+</td>	<td><p />
			
 
				+</td></tr>
			
 
				+<tr>	<td><p>项目联系人</p>
			
 
				+</td>	<td><p />
			
 
				+</td>	<td><p>法人</p>
			
 
				+</td>	<td><p />
			
 
				+</td></tr>
			
 
				+<tr>	<td><p>联系电话、手机</p>
			
 
				+</td>	<td><p />
			
 
				+</td></tr>
			
 
				+<tr>	<td><p>电子邮箱</p>
			
 
				+</td>	<td><p />
			
 
				+</td></tr>
			
 
				+<tr>	<td><p>投标人盖章:</p>
			
 
				+</td>	<td><p />
			
 
				+</td>	<td><p>日期:</p>
			
 
				+</td></tr>
			
 
				+<tr>	<td><p><b>报名表填写完整，随营业执照扫描件、报名费交纳凭证、符合供应商特定资格要求（如有）的有效证明材料扫描件，发送至电子邮箱：331747541@qq.com，电子邮件备注项目编号：ZDGC2024-017</b></p>
			
 
				+</td></tr>
			
 
				+</tbody></table>
			
 
				+<p><b>报名费电子发票以电子邮件的形式，发送至报名时所留的电子邮箱里，请自行下载打印。如需开具专票的请备注。</b></p>
			
 
				+<p>采购代理机构：浙江正大工程管理咨询有限公司
			
 
				+</p>
			
 
				+<p>联系电话：0573-87297016</p>
			
 
				+</body></html>
			
--- a/tika_/files/tika-server.jar
+++ b/tika_/files/tika-server.jar
--- a/tika_/files/tika-server.jar.md5
+++ b/tika_/files/tika-server.jar.md5
@@ -0,0 +1 @@
 
				+a590c87fec77730e5c1e0757de4f49e5
			
--- a/tika_/tika_interface.py
+++ b/tika_/tika_interface.py
@@ -0,0 +1,158 @@
 
				+import json
			
 
				+import os
			
 
				+import re
			
 
				+import sys
			
 
				+import time
			
 
				+import traceback
			
 
				+from glob import glob
			
 
				+
			
 
				+import psutil
			
 
				+
			
 
				+sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/../")
			
 
				+_dir = os.path.abspath(os.path.dirname(__file__))
			
 
				+os.environ["TIKA_SERVER_JAR"] = _dir + "/files/tika-server.jar"
			
 
				+os.environ["TIKA_LOG_PATH"] = _dir + "/log/"
			
 
				+os.environ["TIKA_PATH"] = _dir + "/files/"
			
 
				+os.environ["TIKA_LOG_FILE"] = "tika.log"
			
 
				+
			
 
				+from format_convert import _global
			
 
				+from format_convert.utils import log, request_post, dynamic_get_port
			
 
				+import tika
			
 
				+from tika import parser, config
			
 
				+from tika.tika import runCommand
			
 
				+from flask import Flask, request
			
 
				+
			
 
				+
			
 
				+# 接口配置
			
 
				+app = Flask(__name__)
			
 
				+
			
 
				+# tika.initVM()
			
 
				+
			
 
				+
			
 
				+@app.route('/tika', methods=['POST'])
			
 
				+def _tika():
			
 
				+    _global._init()
			
 
				+    _global.update({"port": globals().get("port")})
			
 
				+    start_time = time.time()
			
 
				+
			
 
				+    log("into tika_interface _tika")
			
 
				+    try:
			
 
				+        if not request.form:
			
 
				+            log("tika no data!")
			
 
				+            return json.dumps({"html": str([-9])})
			
 
				+        data = request.form.get("data")
			
 
				+        log("tika_interface get data time" + str(time.time()-start_time))
			
 
				+
			
 
				+        _md5 = request.form.get("md5")
			
 
				+        _global.update({"md5": _md5})
			
 
				+
			
 
				+        html = tika_interface(data).get('html')
			
 
				+        return json.dumps({"html": html})
			
 
				+    except TimeoutError:
			
 
				+        return json.dumps({"html": [-5]})
			
 
				+    except:
			
 
				+        traceback.print_exc()
			
 
				+        return json.dumps({"html": [-1]})
			
 
				+    finally:
			
 
				+        log("tika interface finish time " + str(time.time()-start_time))
			
 
				+
			
 
				+
			
 
				+def tika_interface(_path, show=1):
			
 
				+    try:
			
 
				+        # apache tika服务器 提取
			
 
				+        # text = runCommand('parse', 'all', _path, '9998', outDir='./files/')
			
 
				+        port = 9998
			
 
				+        pid = os.getpid()
			
 
				+        key = 'dynamic_port_' + str(pid)
			
 
				+        if globals().get(key):
			
 
				+            port = globals().get(key)
			
 
				+        else:
			
 
				+            port = dynamic_get_port(port)
			
 
				+            if port is None:
			
 
				+                kill_tika_java_server()
			
 
				+                # return {"html": [-19]}
			
 
				+            globals().update({key: port})
			
 
				+
			
 
				+        url = 'http://localhost:' + str(port)
			
 
				+        log('tika ' + key + ' port: ' + str(port))
			
 
				+        parsed = parser.from_file(_path, xmlContent=True, serverEndpoint=url)
			
 
				+        html = parsed.get('content')
			
 
				+
			
 
				+        # 处理html
			
 
				+        html = html.split('\n')
			
 
				+        temp_list = []
			
 
				+        for line in html:
			
 
				+            if '<meta' in line:
			
 
				+                continue
			
 
				+            temp_list.append(line)
			
 
				+        html = temp_list
			
 
				+        if len(html) <= 4:
			
 
				+            return {"html": ''}
			
 
				+
			
 
				+        html = html[:2] + ['<meta charset="UTF-8">'] + html[2:]
			
 
				+        html = '\n'.join(html)
			
 
				+        html = re.sub('<table>', '<table border="1">', html)
			
 
				+        html = re.sub(' class="正文"', '', html)
			
 
				+
			
 
				+        if show:
			
 
				+            with open(_dir + '/doc.html', 'w', encoding='utf-8') as f:
			
 
				+                f.write(html)
			
 
				+    except:
			
 
				+        traceback.print_exc()
			
 
				+        return {"html": [-17]}
			
 
				+    return {"html": html}
			
 
				+
			
 
				+
			
 
				+def kill_tika_java_server():
			
 
				+    pid_list = psutil.pids()
			
 
				+    java_path = 'format_conversion_maxcompute/tika_'
			
 
				+    for pid in pid_list:
			
 
				+        try:
			
 
				+            process = psutil.Process(pid)
			
 
				+        except:
			
 
				+            continue
			
 
				+        process_cmd = ''
			
 
				+        for c in process.cmdline():
			
 
				+            process_cmd += c + " "
			
 
				+        if process_cmd.strip() == "":
			
 
				+            continue
			
 
				+        if re.search(java_path, process_cmd) and re.search('java', process_cmd):
			
 
				+            comm = "kill -9 " + str(pid)
			
 
				+            print(comm, process_cmd)
			
 
				+            os.system(comm)
			
 
				+
			
 
				+
			
 
				+def test_interface():
			
 
				+    # paths = glob("C:/Users/Administrator/Downloads/1716253106319.doc")
			
 
				+    paths = ["files/1716253106319.doc"]
			
 
				+    # for i in range(1000):
			
 
				+    for file_path in paths:
			
 
				+        file_json = {"data": file_path, "md5": '1'}
			
 
				+        _url = "http://192.168.2.102:5000/tika"
			
 
				+        # _url = "http://127.0.0.1:5000/tika"
			
 
				+        print(json.loads(request_post(_url, file_json)))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # linux_flag = 1
			
 
				+    # if not linux_flag:
			
 
				+    #     p_list = [
			
 
				+    #         "C:/Users/Administrator/Downloads/1716253106319.doc",
			
 
				+    #         # "C:/Users/Administrator/Downloads/1716255351142.doc",
			
 
				+    #         # "C:/Users/Administrator/Downloads/1637042763112.xls",
			
 
				+    #         # "C:/Users/Administrator/Desktop/test_doc/error5.doc",
			
 
				+    #     ]
			
 
				+    # else:
			
 
				+    #     p_list = [
			
 
				+    #         "files/1716253106319.doc",
			
 
				+    #         # "files/1716255351142.doc",
			
 
				+    #         # "files/1716255350191.doc",
			
 
				+    #     ]
			
 
				+    #
			
 
				+    # for _p in p_list:
			
 
				+    #     # _p = "C:/Users/Administrator/Downloads/1716253106319.doc"
			
 
				+    #     tika_interface(_p)
			
 
				+
			
 
				+    # app.run(host='0.0.0.0', port=5000)
			
 
				+    # test_interface()
			
 
				+    kill_tika_java_server()