fangjiasheng
/
FORMAT_CONVERSION_MAXCOMPUTE


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
							import inspect
import os
import sys
import time
sys.path.append(os.path.dirname(__file__) + "/../")
from format_convert.convert_tree import _Document, _Image, _Page
import base64
import codecs
import logging
import re
import traceback
from PIL import Image
from format_convert.convert_image import picture2text
from format_convert.swf.export import SVGExporter
from format_convert.swf.movie import SWF
from format_convert.utils import judge_error_code, get_logger, log, memory_decorator
from format_convert.wrapt_timeout_decorator import timeout
from format_convert.yaswfp.swfparser import parsefile


@memory_decorator
def swf2text(path, unique_type_dir):
    log("into swf2text")
    try:
        try:
            with open(path, 'rb') as f:
                swf_file = SWF(f)
                svg_exporter = SVGExporter()
                svg = swf_file.export(svg_exporter)
            swf_str = str(svg.getvalue(), encoding='utf-8')
        except Exception as e:
            log("swf format error!")
            traceback.print_exc()
            return [-3]

        # 正则匹配图片的信息位置
        result0 = re.finditer('<image id=(.[^>]*)', swf_str)
        image_bytes_list = []
        i = 0
        image_path_prefix = path.split(".")[-2] + "_" + path.split(".")[-1]
        image_path_list = []
        for r in result0:
            # 截取图片信息所在位置
            swf_str0 = swf_str[r.span()[0]:r.span()[1] + 1]

            # 正则匹配得到图片的base64编码
            result1 = re.search('xlink:href="data:(.[^>]*)', swf_str0)
            swf_str1 = swf_str0[result1.span()[0]:result1.span()[1]]
            reg1_prefix = 'b\''
            result1 = re.search(reg1_prefix + '(.[^\']*)', swf_str1)
            swf_str1 = swf_str1[result1.span()[0] + len(reg1_prefix):result1.span()[1]]

            # base64_str -> base64_bytes -> no "\\" base64_bytes -> bytes -> image
            base64_bytes_with_double = bytes(swf_str1, "utf-8")
            base64_bytes = codecs.escape_decode(base64_bytes_with_double, "hex-escape")[0]
            image_bytes = base64.b64decode(base64_bytes)
            image_bytes_list.append(image_bytes)
            image_path = image_path_prefix + "_page_" + str(i) + ".png"
            with open(image_path, 'wb') as f:
                f.write(image_bytes)

            image_path_list.append(image_path)
            # 正则匹配得到图片的宽高
            # reg2_prefix = 'width="'
            # result2 = re.search(reg2_prefix + '(\d+)', swf_str0)
            # swf_str2 = swf_str0[result2.span()[0]+len(reg2_prefix):result2.span()[1]]
            # width = swf_str2
            # reg2_prefix = 'height="'
            # result2 = re.search(reg2_prefix + '(\d+)', swf_str0)
            # swf_str2 = swf_str0[result2.span()[0]+len(reg2_prefix):result2.span()[1]]
            # height = swf_str2
            i += 1

        text_list = []
        for image_path in image_path_list:
            text = picture2text(image_path)
            if judge_error_code(text, code=[-3]):
                continue
            if judge_error_code(text):
                return text

            text = text[0]
            text_list.append(text)

        text = ""
        for t in text_list:
            text += t

        return [text]
    except Exception as e:
        log("swf2text error!")
        print("swf2text", traceback.print_exc())
        return [-1]


@timeout(40, timeout_exception=TimeoutError)
def read_swf(path):
    with open(path, 'rb') as f:
        swf_file = SWF(f)
        svg_exporter = SVGExporter()
        svg = swf_file.export(svg_exporter)
    swf_str = str(svg.getvalue(), encoding='utf-8')
    return swf_str


class SwfConvert:
    def __init__(self, path, unique_type_dir):
        self._doc = _Document(path)
        self.path = path
        self.unique_type_dir = unique_type_dir

    @memory_decorator
    def init_package(self, package_name):
        if package_name == 'yaswfp':
            try:
                # self.swf_str = read_swf(self.path)
                self.swf_parser = parsefile(self.path)
            except Exception as e:
                log("cannot open swf!")
                traceback.print_exc()
                self._doc.error_code = [-3]
        elif package_name == 'swf':
            try:
                self.swf_str = read_swf(self.path)
            except Exception as e:
                log("cannot open swf!")
                traceback.print_exc()
                self._doc.error_code = [-3]

    def swf_to_images(self):
        log('swf_to_images yaswfp')
        image_no = 0
        image_path_prefix = self.path.split(".")[-2] + "_" + self.path.split(".")[-1]
        image_path_index_list = []
        try:
            for tag in self.swf_parser.tags:
                if not hasattr(tag, 'ImageData'):
                    continue
                byte_data = tag.ImageData

                image_path = image_path_prefix + "_page_" + str(image_no) + ".png"
                with open(image_path, 'wb') as f:
                    f.write(byte_data)

                image = Image.open(image_path)
                if image.size[0] > 1000 and image.size[1] > 1000:
                    image = image.resize((600, 1000), Image.BILINEAR)
                image.save(image_path, quality=10)
                image_path_index_list.append([image_path, image_no])
                image_no += 1
        except:
            image_path_index_list = [-18]
            traceback.print_exc()
        return image_path_index_list

    def swf_to_images2(self):
        log('swf_to_images swf')
        # 正则匹配图片的信息位置
        result0 = re.finditer('<image id=(.[^>]*)', self.swf_str)
        image_no = 0
        image_path_prefix = self.path.split(".")[-2] + "_" + self.path.split(".")[-1]
        image_path_index_list = []
        for r in result0:
            # 截取图片信息所在位置
            swf_str0 = self.swf_str[r.span()[0]:r.span()[1] + 1]

            # 正则匹配得到图片的base64编码
            result1 = re.search('xlink:href="data:(.[^>]*)', swf_str0)
            swf_str1 = swf_str0[result1.span()[0]:result1.span()[1]]
            reg1_prefix = 'b\''
            result1 = re.search(reg1_prefix + '(.[^\']*)', swf_str1)
            swf_str1 = swf_str1[result1.span()[0] + len(reg1_prefix):result1.span()[1]]

            # base64_str -> base64_bytes -> no "\\" base64_bytes -> bytes -> image
            base64_bytes_with_double = bytes(swf_str1, "utf-8")
            base64_bytes = codecs.escape_decode(base64_bytes_with_double, "hex-escape")[0]
            image_bytes = base64.b64decode(base64_bytes)
            image_path = image_path_prefix + "_page_" + str(image_no) + ".png"
            with open(image_path, "wb") as f:
                f.write(image_bytes)
            image_path_index_list.append([image_path, image_no])
            image_no += 1
        return image_path_index_list

    @memory_decorator
    def convert_old(self):
        self.init_package()
        if self._doc.error_code is not None:
            return

        self._page = _Page(None, 0)
        # 正则匹配图片的信息位置
        result0 = re.finditer('<image id=(.[^>]*)', self.swf_str)
        image_no = 0
        image_path_prefix = self.path.split(".")[-2] + "_" + self.path.split(".")[-1]
        for r in result0:
            # 截取图片信息所在位置
            swf_str0 = self.swf_str[r.span()[0]:r.span()[1] + 1]

            # 正则匹配得到图片的base64编码
            result1 = re.search('xlink:href="data:(.[^>]*)', swf_str0)
            swf_str1 = swf_str0[result1.span()[0]:result1.span()[1]]
            reg1_prefix = 'b\''
            result1 = re.search(reg1_prefix + '(.[^\']*)', swf_str1)
            swf_str1 = swf_str1[result1.span()[0] + len(reg1_prefix):result1.span()[1]]

            # base64_str -> base64_bytes -> no "\\" base64_bytes -> bytes -> image
            base64_bytes_with_double = bytes(swf_str1, "utf-8")
            base64_bytes = codecs.escape_decode(base64_bytes_with_double, "hex-escape")[0]
            image_bytes = base64.b64decode(base64_bytes)
            image_path = image_path_prefix + "_page_" + str(image_no) + ".png"
            with open(image_path, "wb") as f:
                f.write(image_bytes)

            _image = _Image(image_bytes, image_path, (0, image_no, 0, 0))
            # _image.y = image_no
            self._page.add_child(_image)
            image_no += 1
        self._doc.add_child(self._page)

    @memory_decorator
    def convert(self):
        self._page = _Page(None, 0)

        self.init_package('yaswfp')
        if self._doc.error_code is not None:
            return
        image_path_index_list = self.swf_to_images()
        if judge_error_code(image_path_index_list):
            self._doc.error_code = image_path_index_list
            return
        if image_path_index_list:
            for image_path, image_no in image_path_index_list:
                _image = _Image(None, image_path, (0, image_no, 0, 0))
                self._page.add_child(_image)
        else:
            self.init_package('swf')
            if self._doc.error_code is not None:
                return
            image_path_index_list = self.swf_to_images2()
            for image_path, image_no in image_path_index_list:
                _image = _Image(None, image_path, (0, image_no, 0, 0))
                self._page.add_child(_image)
        self._doc.add_child(self._page)

    def get_html(self):
        try:
            self.convert()
        except:
            traceback.print_exc()
            self._doc.error_code = [-1]
        if self._doc.error_code is not None:
            return self._doc.error_code
        return self._doc.get_html()


if __name__ == '__main__':
    start_time = time.time()
    p = "C:/Users/Administrator/Downloads/1716617588175.swf"
    obj = SwfConvert(p, 'temp/1/')
    obj.convert()
    print(time.time()-start_time)