|
@@ -12,8 +12,8 @@ from format_convert.convert_pdf import pdf2text, PDFConvert
|
|
from format_convert.convert_rar import rar2text
|
|
from format_convert.convert_rar import rar2text
|
|
from format_convert.convert_swf import swf2text
|
|
from format_convert.convert_swf import swf2text
|
|
from format_convert.convert_txt import txt2text
|
|
from format_convert.convert_txt import txt2text
|
|
-from format_convert.convert_xls import xls2text
|
|
|
|
-from format_convert.convert_xlsx import xlsx2text
|
|
|
|
|
|
+from format_convert.convert_xls import xls2text, XlsConvert
|
|
|
|
+from format_convert.convert_xlsx import xlsx2text, XlsxConvert
|
|
from format_convert.convert_zip import zip2text
|
|
from format_convert.convert_zip import zip2text
|
|
|
|
|
|
|
|
|
|
@@ -2260,9 +2260,11 @@ def getText(_type, path_or_stream):
|
|
if _type == "rar":
|
|
if _type == "rar":
|
|
return rar2text(path_or_stream, unique_type_dir)
|
|
return rar2text(path_or_stream, unique_type_dir)
|
|
if _type == "xlsx":
|
|
if _type == "xlsx":
|
|
- return xlsx2text(path_or_stream, unique_type_dir)
|
|
|
|
|
|
+ # return xlsx2text(path_or_stream, unique_type_dir)
|
|
|
|
+ return XlsxConvert(path_or_stream, unique_type_dir).get_html()
|
|
if _type == "xls":
|
|
if _type == "xls":
|
|
- return xls2text(path_or_stream, unique_type_dir)
|
|
|
|
|
|
+ # return xls2text(path_or_stream, unique_type_dir)
|
|
|
|
+ return XlsConvert(path_or_stream, unique_type_dir).get_html()
|
|
if _type == "doc":
|
|
if _type == "doc":
|
|
# return doc2text(path_or_stream, unique_type_dir)
|
|
# return doc2text(path_or_stream, unique_type_dir)
|
|
return DocConvert(path_or_stream, unique_type_dir).get_html()
|
|
return DocConvert(path_or_stream, unique_type_dir).get_html()
|
|
@@ -2644,8 +2646,8 @@ else:
|
|
_path = os.path.dirname(os.path.abspath(__file__))
|
|
_path = os.path.dirname(os.path.abspath(__file__))
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
if get_platform() == "Windows":
|
|
if get_platform() == "Windows":
|
|
- file_path = "C:/Users/Administrator/Desktop/error3.pdf"
|
|
|
|
- # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/招标公告--汾口镇汪家桥村村道硬化工程 - .doc"
|
|
|
|
|
|
+ # file_path = "C:/Users/Administrator/Desktop/error3.pdf"
|
|
|
|
+ file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/询价单(246514).xls"
|
|
# file_path = "C:/Users/Administrator/Desktop/Test_ODPS/1624875783055.pdf"
|
|
# file_path = "C:/Users/Administrator/Desktop/Test_ODPS/1624875783055.pdf"
|
|
else:
|
|
else:
|
|
file_path = "1.doc"
|
|
file_path = "1.doc"
|