|
@@ -1,12 +1,15 @@
|
|
|
import os
|
|
|
import sys
|
|
|
+import time
|
|
|
+
|
|
|
sys.path.append(os.path.dirname(__file__) + "/../")
|
|
|
+from format_convert.convert_tree import _Document, _Image, _Page
|
|
|
import base64
|
|
|
import codecs
|
|
|
import logging
|
|
|
import re
|
|
|
import traceback
|
|
|
-from format_convert import get_memory_info
|
|
|
+from format_convert import get_memory_info, timeout_decorator
|
|
|
from format_convert.convert_image import picture2text
|
|
|
from format_convert.swf.export import SVGExporter
|
|
|
from format_convert.swf.movie import SWF
|
|
@@ -85,4 +88,66 @@ def swf2text(path, unique_type_dir):
|
|
|
except Exception as e:
|
|
|
logging.info("swf2text error!")
|
|
|
print("swf2text", traceback.print_exc())
|
|
|
- return [-1]
|
|
|
+ return [-1]
|
|
|
+
|
|
|
+
|
|
|
+class SwfConvert:
|
|
|
+ def __init__(self, path, unique_type_dir):
|
|
|
+ self._doc = _Document(path)
|
|
|
+ self.path = path
|
|
|
+ self.unique_type_dir = unique_type_dir
|
|
|
+
|
|
|
+ def init_package(self):
|
|
|
+ try:
|
|
|
+ with open(self.path, 'rb') as f:
|
|
|
+ swf_file = SWF(f)
|
|
|
+ svg_exporter = SVGExporter()
|
|
|
+ svg = swf_file.export(svg_exporter)
|
|
|
+ self.swf_str = str(svg.getvalue(), encoding='utf-8')
|
|
|
+ except:
|
|
|
+ logging.info("cannot open swf!")
|
|
|
+ traceback.print_exc()
|
|
|
+ self._doc.error_code = [-3]
|
|
|
+
|
|
|
+ def convert(self):
|
|
|
+ self.init_package()
|
|
|
+ if self._doc.error_code is not None:
|
|
|
+ return
|
|
|
+
|
|
|
+ self._page = _Page(None, 0)
|
|
|
+ # 正则匹配图片的信息位置
|
|
|
+ result0 = re.finditer('<image id=(.[^>]*)', self.swf_str)
|
|
|
+ image_no = 0
|
|
|
+ image_path_prefix = self.path.split(".")[-2] + "_" + self.path.split(".")[-1]
|
|
|
+ for r in result0:
|
|
|
+ # 截取图片信息所在位置
|
|
|
+ swf_str0 = self.swf_str[r.span()[0]:r.span()[1] + 1]
|
|
|
+
|
|
|
+ # 正则匹配得到图片的base64编码
|
|
|
+ result1 = re.search('xlink:href="data:(.[^>]*)', swf_str0)
|
|
|
+ swf_str1 = swf_str0[result1.span()[0]:result1.span()[1]]
|
|
|
+ reg1_prefix = 'b\''
|
|
|
+ result1 = re.search(reg1_prefix + '(.[^\']*)', swf_str1)
|
|
|
+ swf_str1 = swf_str1[result1.span()[0] + len(reg1_prefix):result1.span()[1]]
|
|
|
+
|
|
|
+ # base64_str -> base64_bytes -> no "\\" base64_bytes -> bytes -> image
|
|
|
+ base64_bytes_with_double = bytes(swf_str1, "utf-8")
|
|
|
+ base64_bytes = codecs.escape_decode(base64_bytes_with_double, "hex-escape")[0]
|
|
|
+ image_bytes = base64.b64decode(base64_bytes)
|
|
|
+ image_path = image_path_prefix + "_page_" + str(image_no) + ".png"
|
|
|
+
|
|
|
+ _image = _Image(image_bytes, image_path)
|
|
|
+ _image.y = image_no
|
|
|
+ self._page.add_child(_image)
|
|
|
+ image_no += 1
|
|
|
+ self._doc.add_child(self._page)
|
|
|
+
|
|
|
+ def get_html(self):
|
|
|
+ try:
|
|
|
+ self.convert()
|
|
|
+ except:
|
|
|
+ traceback.print_exc()
|
|
|
+ self._doc.error_code = [-1]
|
|
|
+ if self._doc.error_code is not None:
|
|
|
+ return self._doc.error_code
|
|
|
+ return self._doc.get_html()
|