Ver Fonte

xls、xlsx封装

fangjiasheng há 3 anos atrás
pai
commit
a8a06844da

+ 8 - 6
format_convert/convert.py

@@ -12,8 +12,8 @@ from format_convert.convert_pdf import pdf2text, PDFConvert
 from format_convert.convert_rar import rar2text
 from format_convert.convert_swf import swf2text
 from format_convert.convert_txt import txt2text
-from format_convert.convert_xls import xls2text
-from format_convert.convert_xlsx import xlsx2text
+from format_convert.convert_xls import xls2text, XlsConvert
+from format_convert.convert_xlsx import xlsx2text, XlsxConvert
 from format_convert.convert_zip import zip2text
 
 
@@ -2260,9 +2260,11 @@ def getText(_type, path_or_stream):
     if _type == "rar":
         return rar2text(path_or_stream, unique_type_dir)
     if _type == "xlsx":
-        return xlsx2text(path_or_stream, unique_type_dir)
+        # return xlsx2text(path_or_stream, unique_type_dir)
+        return XlsxConvert(path_or_stream, unique_type_dir).get_html()
     if _type == "xls":
-        return xls2text(path_or_stream, unique_type_dir)
+        # return xls2text(path_or_stream, unique_type_dir)
+        return XlsConvert(path_or_stream, unique_type_dir).get_html()
     if _type == "doc":
         # return doc2text(path_or_stream, unique_type_dir)
         return DocConvert(path_or_stream, unique_type_dir).get_html()
@@ -2644,8 +2646,8 @@ else:
         _path = os.path.dirname(os.path.abspath(__file__))
 if __name__ == '__main__':
     if get_platform() == "Windows":
-        file_path = "C:/Users/Administrator/Desktop/error3.pdf"
-        # file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/招标公告--汾口镇汪家桥村村道硬化工程 - .doc"
+        # file_path = "C:/Users/Administrator/Desktop/error3.pdf"
+        file_path = "D:/BIDI_DOC/比地_文档/2022/Test_Interface/询价单(246514).xls"
         # file_path = "C:/Users/Administrator/Desktop/Test_ODPS/1624875783055.pdf"
     else:
         file_path = "1.doc"

+ 3 - 3
format_convert/convert_docx.py

@@ -295,18 +295,18 @@ class DocxConvert:
         self.path = path
         self.unique_type_dir = unique_type_dir
 
-    def init_package(self, package_name):
+    def init_package(self):
         # 各个包初始化
         try:
             self.docx = docx.Document(self.path)
             self.zip = zipfile.ZipFile(self.path)
         except:
-            logging.info(package_name + " cannot open docx!")
+            logging.info("cannot open docx!")
             traceback.print_exc()
             self._doc.error_code = [-3]
 
     def convert(self):
-        self.init_package("docx")
+        self.init_package()
         if self._doc.error_code is not None:
             return
 

+ 33 - 2
format_convert/convert_xls.py

@@ -1,11 +1,14 @@
 import os
 import sys
+
+from format_convert.convert_tree import _Document
+
 sys.path.append(os.path.dirname(__file__) + "/../")
 import logging
 import traceback
 from format_convert import get_memory_info
 from format_convert.convert_need_interface import from_office_interface
-from format_convert.convert_xlsx import xlsx2text
+from format_convert.convert_xlsx import xlsx2text, XlsxConvert
 from format_convert.utils import judge_error_code
 
 
@@ -26,4 +29,32 @@ def xls2text(path, unique_type_dir):
     except Exception as e:
         logging.info("xls2text error!")
         print("xls2text", traceback.print_exc())
-        return [-1]
+        return [-1]
+
+
+class XlsConvert:
+    def __init__(self, path, unique_type_dir):
+        self._doc = _Document(path)
+        self.path = path
+        self.unique_type_dir = unique_type_dir
+
+    def convert(self):
+        # 调用office格式转换
+        file_path = from_office_interface(self.path, self.unique_type_dir, 'xlsx')
+        if judge_error_code(file_path):
+            self._doc = file_path
+            return
+        _xlsx = XlsxConvert(file_path, self.unique_type_dir)
+        _xlsx.convert()
+        self._doc = _xlsx._doc
+
+    def get_html(self):
+        try:
+            self.convert()
+        except:
+            traceback.print_exc()
+            self._doc.error_code = [-1]
+        if self._doc.error_code is not None:
+            return self._doc.error_code
+        print(self._doc.children)
+        return self._doc.get_html()

+ 58 - 1
format_convert/convert_xlsx.py

@@ -1,6 +1,7 @@
 import os
 import sys
 sys.path.append(os.path.dirname(__file__) + "/../")
+from format_convert.convert_tree import _Document, _Page, _Table
 import logging
 import traceback
 import pandas
@@ -35,4 +36,60 @@ def xlsx2text(path, unique_type_dir):
     except Exception as e:
         logging.info("xlsx2text error!")
         print("xlsx2text", traceback.print_exc())
-        return [-1]
+        return [-1]
+
+
+class XlsxConvert:
+    def __init__(self, path, unique_type_dir):
+        self._doc = _Document(path)
+        self.path = path
+        self.unique_type_dir = unique_type_dir
+
+    def init_package(self):
+        # 各个包初始化
+        try:
+            self.df = pandas.read_excel(self.path, header=None, keep_default_na=False, sheet_name=None)
+        except:
+            logging.info("cannot open xlsx!")
+            traceback.print_exc()
+            self._doc.error_code = [-3]
+
+    def convert(self):
+        self.init_package()
+        if self._doc.error_code is not None:
+            return
+
+        sheet_list = [sheet for sheet in self.df.values()]
+        sheet_no = 0
+        for sheet in sheet_list:
+            self._page = _Page(None, sheet_no)
+            self.convert_page(sheet)
+
+            if self._doc.error_code is None and self._page.error_code is not None:
+                self._doc.error_code = self._page.error_code
+            self._doc.add_child(self._page)
+            sheet_no += 1
+
+    def convert_page(self, sheet):
+        text = '<table border="1">' + "\n"
+        for index, row in sheet.iterrows():
+            text = text + "<tr>"
+            for r in row:
+                text = text + "<td>" + str(r) + "</td>" + "\n"
+                # print(text)
+            text = text + "</tr>" + "\n"
+        text = text + "</table>" + "\n"
+
+        _table = _Table(text, (0, 0, 0, 0))
+        _table.is_html = True
+        self._page.add_child(_table)
+
+    def get_html(self):
+        try:
+            self.convert()
+        except:
+            traceback.print_exc()
+            self._doc.error_code = [-1]
+        if self._doc.error_code is not None:
+            return self._doc.error_code
+        return self._doc.get_html()

+ 206 - 62
result.html

@@ -1,69 +1,213 @@
-<!DOCTYPE HTML><head><meta charset="UTF-8"></head><body><div>华池县柔远镇李庄肉牛养殖场建设项目配</div>
-<div>套设备购置政府采购公开招标中标公告</div>
-<div>、项目编号</div>
-<div>HCZC2021-0001</div>
-<div>二、项目名称</div>
-<div>华池县柔远镇李庄肉牛养殖场建设项目配套设备购置</div>
-<div>三、中标(成交)信息</div>
-<table border="1">
-<tr>
-<td colspan=1 rowspan=1>供应商名称</td>
-<td colspan=1 rowspan=1>供应商联系地址</td>
-<td colspan=1 rowspan=1>中标金额(万元)</td>
+<!DOCTYPE HTML><head><meta charset="UTF-8"></head><body><table border="1">
+<tr><td>询价单</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
 </tr>
-<tr>
-<td colspan=1 rowspan=1>华池县卓泰机械设备租赁有限公司</td>
-<td colspan=1 rowspan=1>甘肃省庆阳市华池县柔远镇张川村</td>
-<td colspan=1 rowspan=1>72.3500</td>
+<tr><td></td>
+<td>询价标题:</td>
+<td>胥口南方雷达料位计询价</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td>询价单号:</td>
+<td>XJ2004130004</td>
+<td></td>
+<td></td>
+<td>报价截止时间:</td>
+<td>2020-04-16 09:45:23</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
 </tr>
-</table>
-<div>四、主要标的信息</div>
-<table border="1">
-<tr>
-<td colspan=1 rowspan=1>货物类</td>
-<td colspan=1 rowspan=1>货物类</td>
-<td colspan=1 rowspan=1>货物类</td>
-<td colspan=1 rowspan=1>货物类</td>
-<td colspan=1 rowspan=1>货物类</td>
-<td colspan=1 rowspan=1>货物类</td>
+<tr><td></td>
+<td>询价执行组织:</td>
+<td>杭州胥口南方水泥有限公司</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td>询价类型:</td>
+<td>普通合同</td>
+<td></td>
+<td></td>
+<td>报价模板:</td>
+<td>现金+承兑报价</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+</tr>
+<tr><td></td>
+<td>采购员:</td>
+<td>沈超航</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td>联系电话:</td>
+<td>15967187961</td>
+<td></td>
+<td></td>
+<td>采购内容:</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+</tr>
+<tr><td></td>
+<td>收货地址:</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td>备注:</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+</tr>
+<tr><td></td>
+<td>对供应商要求:</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+</tr>
+<tr><td></td>
+<td>付款方式:</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+</tr>
+<tr><td></td>
+<td>交货条件:</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
 </tr>
-<tr>
-<td colspan=1 rowspan=1>供应商名称</td>
-<td colspan=1 rowspan=1>名称</td>
-<td colspan=1 rowspan=1>品牌</td>
-<td colspan=1 rowspan=1>数量</td>
-<td colspan=1 rowspan=1>单价</td>
-<td colspan=1 rowspan=1>规格型号</td>
+<tr><td>行号</td>
+<td>物料编码</td>
+<td>物料名称</td>
+<td>项目名称</td>
+<td>采购数量</td>
+<td>单位</td>
+<td>物料需求描述</td>
+<td>需求组织</td>
+<td>需求部门</td>
+<td>需求人</td>
+<td>需求时间</td>
+<td>需求单号</td>
+<td>收货组织</td>
+<td>收货地址</td>
+<td>收货人</td>
+<td>收货人联系电话</td>
+<td>计划部门</td>
+<td>计划员</td>
+<td>备注</td>
+<td></td>
 </tr>
-<tr>
-<td colspan=1 rowspan=1>华池县卓泰机械设备租赁有限公司</td>
-<td colspan=1 rowspan=1>华池县柔远镇李庄肉牛养殖场建设项目配套设备购置</td>
-<td colspan=1 rowspan=1>详见附件</td>
-<td colspan=1 rowspan=1>详见附件</td>
-<td colspan=1 rowspan=1>详见附件</td>
-<td colspan=1 rowspan=1>详见附件</td>
+<tr><td>1</td>
+<td>0301010001</td>
+<td>备品备件专用物料</td>
+<td></td>
+<td>4.00</td>
+<td>个</td>
+<td>雷达料位计</td>
+<td>杭州胥口南方水泥有限公司</td>
+<td>杭州胥口南方水泥有限公司</td>
+<td></td>
+<td></td>
+<td>2020-04-16 00:00:00</td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td></td>
+<td>HZJT-2000/50M AC220V/4-20MA/四线制</td>
 </tr>
 </table>
-<div>五、评审专家(单一来源采购人员)名单:</div>
-<div>王正刚、段海龙、李鑫、刘翠平、张武峰</div>
-<div>六、代理服务收费标准及金额:</div>
-<div>收费标准:无</div>
-<div>收费金额:0万元</div>
-<div>七、公告期限</div>
-<div>自本公告发布之日起1个工作日。</div>
-<div>八、其他补充事宜</div>
-<div>无</div>
-<div>九、凡对本次公告内容提出询问,请按以下方式联系。</div>
-<div>1.采购人信息</div>
-<div>名称:华池县柔远镇人民政府</div>
-<div>地址:华池县东关街70号</div>
-<div>联系方式:0934-5952951</div>
-<div>2.采购代理机构信息</div>
-<div>名称:华池县公共资源交易中心</div>
-<div>地址:华池县东关街22号</div>
-<div>联系方式:0934-5953080</div>
-<div>3.项目联系方式</div>
-<div>项目联系人:孙治江</div>
-<div>电话:18793418165</div>
-<div>2</div>
 </body>