|
@@ -10,12 +10,11 @@ from format_convert.convert_docx import docx2text, DocxConvert
|
|
from format_convert.convert_image import picture2text, ImageConvert
|
|
from format_convert.convert_image import picture2text, ImageConvert
|
|
from format_convert.convert_pdf import pdf2text, PDFConvert
|
|
from format_convert.convert_pdf import pdf2text, PDFConvert
|
|
from format_convert.convert_rar import rar2text, RarConvert
|
|
from format_convert.convert_rar import rar2text, RarConvert
|
|
-from format_convert.convert_swf import swf2text
|
|
|
|
|
|
+from format_convert.convert_swf import swf2text, SwfConvert
|
|
from format_convert.convert_txt import txt2text
|
|
from format_convert.convert_txt import txt2text
|
|
from format_convert.convert_xls import xls2text, XlsConvert
|
|
from format_convert.convert_xls import xls2text, XlsConvert
|
|
from format_convert.convert_xlsx import xlsx2text, XlsxConvert
|
|
from format_convert.convert_xlsx import xlsx2text, XlsxConvert
|
|
-from format_convert.convert_zip import zip2text
|
|
|
|
-
|
|
|
|
|
|
+from format_convert.convert_zip import zip2text, ZipConvert
|
|
|
|
|
|
import codecs
|
|
import codecs
|
|
import gc
|
|
import gc
|
|
@@ -2256,7 +2255,8 @@ def getText(_type, path_or_stream):
|
|
# return docx2text(path_or_stream, unique_type_dir)
|
|
# return docx2text(path_or_stream, unique_type_dir)
|
|
return DocxConvert(path_or_stream, unique_type_dir).get_html()
|
|
return DocxConvert(path_or_stream, unique_type_dir).get_html()
|
|
if _type == "zip":
|
|
if _type == "zip":
|
|
- return zip2text(path_or_stream, unique_type_dir)
|
|
|
|
|
|
+ # return zip2text(path_or_stream, unique_type_dir)
|
|
|
|
+ return ZipConvert(path_or_stream, unique_type_dir).get_html()
|
|
if _type == "rar":
|
|
if _type == "rar":
|
|
# return rar2text(path_or_stream, unique_type_dir)
|
|
# return rar2text(path_or_stream, unique_type_dir)
|
|
return RarConvert(path_or_stream, unique_type_dir).get_html()
|
|
return RarConvert(path_or_stream, unique_type_dir).get_html()
|
|
@@ -2273,7 +2273,8 @@ def getText(_type, path_or_stream):
|
|
# return picture2text(path_or_stream)
|
|
# return picture2text(path_or_stream)
|
|
return ImageConvert(path_or_stream, unique_type_dir).get_html()
|
|
return ImageConvert(path_or_stream, unique_type_dir).get_html()
|
|
if _type == "swf":
|
|
if _type == "swf":
|
|
- return swf2text(path_or_stream, unique_type_dir)
|
|
|
|
|
|
+ # return swf2text(path_or_stream, unique_type_dir)
|
|
|
|
+ return SwfConvert(path_or_stream, unique_type_dir).get_html()
|
|
if _type == "txt":
|
|
if _type == "txt":
|
|
return txt2text(path_or_stream)
|
|
return txt2text(path_or_stream)
|
|
|
|
|
|
@@ -2648,8 +2649,8 @@ else:
|
|
_path = os.path.dirname(os.path.abspath(__file__))
|
|
_path = os.path.dirname(os.path.abspath(__file__))
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
if get_platform() == "Windows":
|
|
if get_platform() == "Windows":
|
|
- file_path = "C:/Users/Administrator/Desktop/error6.jpg"
|
|
|
|
- # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/has-3.rar"
|
|
|
|
|
|
+ # file_path = "C:/Users/Administrator/Desktop/error2.swf"
|
|
|
|
+ file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/转账支付说明.txt"
|
|
# file_path = "C:/Users/Administrator/Desktop/Test_ODPS/1624875783055.pdf"
|
|
# file_path = "C:/Users/Administrator/Desktop/Test_ODPS/1624875783055.pdf"
|
|
else:
|
|
else:
|
|
file_path = "1.doc"
|
|
file_path = "1.doc"
|