3 лет назад · eefeb6ecaf
--- a/BiddingKG/dl/interface/Preprocessing.py
+++ b/BiddingKG/dl/interface/Preprocessing.py
@@ -1,4 +1,4 @@
 
				-
			
 
				+# -*- coding: utf-8 -*-
			
 
				 
			
 
				 from bs4 import BeautifulSoup, Comment
			
 
				 import copy
			
@@ -752,7 +752,10 @@ def tableToText(soup):
 
				 
			
 
				                                 cell = table_occurence[i][j]
			
 
				                                 head = (cell["top_head"]+":") if len(cell["top_head"])>0 else ""
			
 
				-                                head += cell["left_head"]
			
 
				+                                if re.search("单报标限总]价|金额|成交报?价|报价", head):
			
 
				+                                    head = cell["left_head"] + head
			
 
				+                                else:
			
 
				+                                    head += cell["left_head"]
			
 
				                                 if str(head+cell["text"]) in text_set:
			
 
				                                     continue
			
 
				                                 if re.search(packPattern,head) is not None:
			
@@ -787,7 +790,10 @@ def tableToText(soup):
 
				 
			
 
				                                 cell = table_occurence[i][j]
			
 
				                                 head = (cell["left_head"]+"") if len(cell["left_head"])>0 else ""
			
 
				-                                head += cell["top_head"]
			
 
				+                                if re.search("单报标限总]价|金额|成交报?价|报价", head):
			
 
				+                                    head = cell["top_head"] + head
			
 
				+                                else:
			
 
				+                                    head += cell["top_head"]
			
 
				                                 if str(head+cell["text"]) in text_set:
			
 
				                                     continue
			
 
				                                 if re.search(packPattern,head) is not None:
			
@@ -796,7 +802,8 @@ def tableToText(soup):
 
				                                     #排名替换为同一种表达
			
 
				                                     rank_text += head+cell["text"]+"，"
			
 
				                                     #print(rank_text)
			
 
				-                                elif re.search(entityPattern,head) is not None:
			
 
				+                                elif re.search(entityPattern,head) is not None and \
			
 
				+                                        re.search('业绩|资格|条件',head)==None and re.search('业绩',cell["text"])==None : #2021/10/19 解决包含业绩的行调到前面问题
			
 
				                                     entity_text += head+cell["text"]+"，"
			
 
				                                     #print(entity_text)
			
 
				                                 else:
			
@@ -986,20 +993,22 @@ def tableToText(soup):
 
				     pat_value = re.compile("(\d{2,}.\d{1}|\d+年\d+月|\d{8,}|\d{3,}-\d{6,}|有限[责任]*公司|^\d+$)")
			
 
				 
			
 
				     list_innerTable = []
			
 
				-    tbodies = soup.find_all('table')
			
 
				+    tbodies = soup.find_all('tbody')
			
 
				     # 遍历表格中的每个tbody
			
 
				     #逆序处理嵌套表格
			
 
				     for tbody_index in range(1,len(tbodies)+1):
			
 
				         tbody = tbodies[len(tbodies)-tbody_index]
			
 
				         inner_table = trunTable(tbody)
			
 
				         list_innerTable.append(inner_table)
			
 
				-    tbodies = soup.find_all('tbody')
			
 
				+    '''2021/10/19先找tbody 再找table,避免一个table内多个tbody造成数据丢失'''
			
 
				+    tbodies = soup.find_all('table')
			
 
				     # 遍历表格中的每个tbody
			
 
				     #逆序处理嵌套表格
			
 
				     for tbody_index in range(1,len(tbodies)+1):
			
 
				         tbody = tbodies[len(tbodies)-tbody_index]
			
 
				         inner_table = trunTable(tbody)
			
 
				         list_innerTable.append(inner_table)
			
 
				+
			
 
				     return soup
			
 
				     # return list_innerTable
			
 
				 
			
@@ -1825,7 +1834,7 @@ def get_preprocessed_entitys(list_sentences,useselffool=True,cost_time=dict()):
 
				                                 re.search('\d{5,}',entity_text) and re.search('^0|1[3|4|5|6|7|8|9]\d{9}',entity_text)==None:
			
 
				                             unit = '元'
			
 
				                             # print('明显金额特征补充单位 元')
			
 
				-                        elif re.search('(^\d{,3}(,?\d{3})+(\.\d{2,7})$)|(^\d{,3}(,\d{3})+$)',entity_text):
			
 
				+                        elif re.search('(^\d{,3}(,?\d{3})+(\.\d{2,7}，?)$)|(^\d{,3}(,\d{3})+，?$)',entity_text):
			
 
				                             unit = '元'
			
 
				                             # print('明显金额特征补充单位 元')
			
 
				                     if unit.find("万") >= 0 and entity_text.find("万") >= 0:  #2021/7/19修改为金额文本有万，不计算单位
			
@@ -2107,6 +2116,12 @@ if __name__=="__main__":
 
				     '''        
			
 
				     # content = codecs.open("C:\\Users\\User\\Desktop\\2.html","r",encoding="utf8").read()
			
 
				     # print(segment(tableToText(BeautifulSoup(content,"lxml"))))
			
 
				-    getPredictTable()
			
 
				-    
			
 
				-        
			
 
				+    # getPredictTable()
			
 
				+    with open('D:/138786703.html', 'r', encoding='utf-8') as f:
			
 
				+        sourceContent = f.read()
			
 
				+        # article_processed = segment(tableToText(BeautifulSoup(sourceContent, "lxml")))
			
 
				+        # print(article_processed)
			
 
				+
			
 
				+        list_articles, list_sentences, list_entitys, _cost_time = get_preprocessed([['doc_id', sourceContent, "", "", '', '2021-02-01']], useselffool=True)
			
 
				+        for entity in list_entitys[0]:
			
 
				+            print(entity.entity_type, entity.entity_text)
			
--- a/BiddingKG/dl/interface/extract.py
+++ b/BiddingKG/dl/interface/extract.py
@@ -68,7 +68,7 @@ def predict(doc_id,text,title="",page_time="",**kwargs):
 
				     cost_time["product"] = round(time.time()-start_time,2)
			
 
				 
			
 
				     start_time = time.time()
			
 
				-    product_attrs = predictor.getPredictor("product_attrs").predict(doc_id, text)
			
 
				+    product_attrs = predictor.getPredictor("product_attrs").predict(doc_id, text, page_time)
			
 
				     log("get product attributes done of doc_id%s"%(doc_id))
			
 
				     cost_time["product_attrs"] = round(time.time()-start_time,2)
			
 
				 
			
@@ -102,10 +102,16 @@ def predict(doc_id,text,title="",page_time="",**kwargs):
 
				     list_punish_dic = predictor.getPredictor("punish").get_punish_extracts(list_articles,list_sentences, list_entitys)
			
 
				     cost_time["punish"] = round(time.time()-start_time,2)
			
 
				 
			
 
				+    if len(product_attrs[1]['demand_info']['data'])>0:
			
 
				+        for d in product_attrs[1]['demand_info']['data']:
			
 
				+            for product in set(prem[0]['product']):
			
 
				+                if product in d['project_name']:
			
 
				+                    d['product'].append(product)  #把产品在项目名称中的添加进需求要素中
			
 
				+
			
 
				     #print(prem)
			
 
				     # data_res = Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic)[0]
			
 
				     # data_res = Preprocessing.union_result(Preprocessing.union_result(Preprocessing.union_result(codeName, prem),list_punish_dic), list_channel_dic)[0]
			
 
				-    data_res = dict(codeName[0], **prem[0], **list_channel_dic[0], **product_attrs[0])
			
 
				+    data_res = dict(codeName[0], **prem[0], **list_channel_dic[0], **product_attrs[0], **product_attrs[1])
			
 
				     data_res["cost_time"] = cost_time
			
 
				     data_res["success"] = True
			
 
				 
			
@@ -139,7 +145,21 @@ if __name__=="__main__":
 
				     t1 = time.time()
			
 
				     text = '中标人：广州中医药有限公司，招标人：广州市第一人民医院， 代理机构：希达招标代理有限公司。招标金额：100万元， 手续费：100元，总投资：1亿元。中标金额：50000元。合同金额：50000万元。'
			
 
				     title = '合同公告'
			
 
				-    print(predict('',text,title))
			
 
				+    # df = pd.read_excel('E:/公告金额/产品名称采购需求预算金额采购时间等要素公告.xlsx')
			
 
				+    # # df = pd.read_excel('E:/公告金额/产品数量单价.xlsx')
			
 
				+    # for i in range(10):
			
 
				+    #     text = df.loc[i, 'dochtmlcon']
			
 
				+    #     rs = json.loads(predict('', text, ''))
			
 
				+    #     print(rs['demand_info'])
			
 
				+    #     print(rs['product'])
			
 
				+    #     print(rs['product_attrs'])
			
 
				+    # print(rs)
			
 
				+
			
 
				+    with open('D:/138786703.html', 'r', encoding='utf-8') as f:
			
 
				+        text = f.read()
			
 
				+        print(predict('', text, title))
			
 
				+
			
 
				+    # print(predict('',text,title))
			
 
				     # df = pd.read_excel('G:\公告金额/170角色金额原模型预测错误数据_new3为新预测中标金额_predict0812.xlsx')[:20]
			
 
				     # new_prem = []
			
 
				     # for i in range(len(df)):
			
--- a/BiddingKG/dl/interface/predictor.py
+++ b/BiddingKG/dl/interface/predictor.py
@@ -22,6 +22,8 @@ from BiddingKG.dl.interface.Entitys import Entity
 
				 from BiddingKG.dl.complaint.punish_predictor import Punish_Extract
			
 
				 from bs4 import BeautifulSoup
			
 
				 import copy
			
 
				+import calendar
			
 
				+import datetime
			
 
				 
			
 
				 from threading import RLock
			
 
				 dict_predictor = {"codeName":{"predictor":None,"Lock":RLock()},
			
@@ -1532,7 +1534,7 @@ class ProductPredictor():
 
				                     result.append(item) # 修正bug
			
 
				                 return result
			
 
				 
			
 
				-# 产品数量单价品牌规格提取
			
 
				+# 产品数量单价品牌规格提取 #2021/11/10 添加表格中的项目、需求、预算、时间要素提取
			
 
				 class ProductAttributesPredictor():
			
 
				     def __init__(self,):
			
 
				         self.p1 = '(设备|货物|商品|产品|物品|货品|材料|物资|物料|物件|耗材|备件|食材|食品|品目|标的|标的物|标项|资产|拍卖物|仪器|器材|器械|药械|药品|药材|采购品?|项目|招标|工程|服务)[\)）]?(名称|内容|描述)'
			
@@ -1653,6 +1655,67 @@ class ProductAttributesPredictor():
 
				                             elif len(tds1) > 0 and len(tds1) == indtd - 1:
			
 
				                                 tds1[indtd - 2].insert_after(copy.copy(td))
			
 
				 
			
 
				+    def get_monthlen(self, year, month):
			
 
				+        '''输入年份、月份 int类型 得到该月份天数'''
			
 
				+        try:
			
 
				+            weekday, num = calendar.monthrange(int(year), int(month))
			
 
				+        except:
			
 
				+            num = 30
			
 
				+        return str(num)
			
 
				+    def fix_time(self, text):
			
 
				+        '''输入日期字段返回格式化日期'''
			
 
				+        t1 = re.search('^(\d{4})(年|/|.|-)(\d{1,2})月?$', text)
			
 
				+        if t1:
			
 
				+            year = t1.group(1)
			
 
				+            month = t1.group(3)
			
 
				+            num = self.get_monthlen(year, month)
			
 
				+            if len(month)<2:
			
 
				+                month = '0'+month
			
 
				+            if len(num) < 2:
			
 
				+                num = '0'+num
			
 
				+            order_begin = "%s-%s-01" % (year, month)
			
 
				+            order_end = "%s-%s-%s" % (year, month, num)
			
 
				+            return order_begin, order_end
			
 
				+        if  re.search('^(\d{4})(年|/|.|-)(\d{1,2})(月|/|.|-)\d{1,2}日?$', text):
			
 
				+            text = re.sub('年|月|/|-', '-', text)
			
 
				+            text = text.replace('日', '')
			
 
				+            order_begin = text
			
 
				+            order_end = text
			
 
				+            return order_begin, order_end
			
 
				+        all_match = re.finditer('^(?P<y1>\d{4})(年|/|.)(?P<m1>\d{1,2})(?:(月|/|.)(?:(?P<d1>\d{1,2})日)?)?'
			
 
				+                                '(到|至|-)(?:(?P<y2>\d{4})(年|/|.))?(?P<m2>\d{1,2})(?:(月|/|.)'
			
 
				+                                '(?:(?P<d2>\d{1,2})日)?)?$', text)
			
 
				+        y1 = m1 = d1 = y2 = m2 = d2 = ""
			
 
				+        found_math = False
			
 
				+        for _match in all_match:
			
 
				+            if len(_match.group()) > 0:
			
 
				+                found_math = True
			
 
				+                for k, v in _match.groupdict().items():
			
 
				+                    if v!="" and v is not None:
			
 
				+                        if k == 'y1':
			
 
				+                            y1 = v
			
 
				+                        elif k == 'm1':
			
 
				+                            m1 = v
			
 
				+                        elif k == 'd1':
			
 
				+                            d1 = v
			
 
				+                        elif k == 'y2':
			
 
				+                            y2 = v
			
 
				+                        elif k == 'm2':
			
 
				+                            m2 = v
			
 
				+                        elif k == 'd2':
			
 
				+                            d2 = v
			
 
				+        if not found_math:
			
 
				+            return "", ""
			
 
				+        y2 = y1 if y2 == "" else y2
			
 
				+        d1 = '1' if d1 == "" else d1
			
 
				+        d2 = self.get_monthlen(y2, m2) if d2 == "" else d2
			
 
				+        for it in (m1,d1,m2,d2):
			
 
				+            if len(it)<2:
			
 
				+                it = '0'+it
			
 
				+        order_begin = "%s-%s-%s"%(y1,m1,d1)
			
 
				+        order_end = "%s-%s-%s"%(y2,m2,d2)
			
 
				+        return order_begin, order_end
			
 
				+
			
 
				     def find_header(self, items, p1, p2):
			
 
				         '''
			
 
				         inner_table 每行正则检查是否为表头，是则返回表头所在列序号，及表头内容
			
@@ -1662,12 +1725,16 @@ class ProductAttributesPredictor():
 
				         :return: 表头所在列序号，是否表头，表头内容
			
 
				         '''
			
 
				         flag = False
			
 
				-        header_dic = {'名称': '', '数量': '', '单价': '', '品牌': '', '规格': ''}
			
 
				+        header_dic = {'名称': '', '数量': '', '单价': '', '品牌': '', '规格': '', '需求': '', '预算': '', '时间': ''}
			
 
				         product = ""  # 产品
			
 
				         quantity = ""  # 数量
			
 
				         unitPrice = ""  # 单价
			
 
				         brand = ""  # 品牌
			
 
				         specs = ""  # 规格
			
 
				+        demand = "" # 采购需求
			
 
				+        budget = "" # 预算金额
			
 
				+        order_time = "" # 采购时间
			
 
				+
			
 
				         for i in range(min(4, len(items))):
			
 
				             it = items[i]
			
 
				             if len(it) < 15 and re.search(p1, it) != None:
			
@@ -1700,15 +1767,28 @@ class ProductAttributesPredictor():
 
				                 elif re.search('规格', items[j]):
			
 
				                     header_dic['规格'] = j
			
 
				                     specs = items[j]
			
 
				-            if header_dic.get('名称', "") != "" and (header_dic.get('数量', "") != "" or header_dic.get('单价', "") != ""
			
 
				-                                                   or header_dic.get('品牌', "") != "" or header_dic.get('规格',
			
 
				-                                                                                                       "") != ""):
			
 
				-                return header_dic, flag, (product, quantity, unitPrice, brand, specs)
			
 
				 
			
 
				+                elif re.search('需求', items[j]):
			
 
				+                    header_dic['需求'] = j
			
 
				+                    demand = items[j]
			
 
				+                elif re.search('预算', items[j]):
			
 
				+                    header_dic['预算'] = j
			
 
				+                    budget = items[j]
			
 
				+                elif re.search('时间', items[j]):
			
 
				+                    header_dic['时间'] = j
			
 
				+                    order_time = items[j]
			
 
				+
			
 
				+            if header_dic.get('名称', "") != "" :
			
 
				+                num = 0
			
 
				+                for it in (quantity, unitPrice, brand, specs, product, demand, budget, order_time):
			
 
				+                    if it != "":
			
 
				+                        num  += 1
			
 
				+                if num >=2:
			
 
				+                    return header_dic, flag, (product, quantity, unitPrice, brand, specs), (product, demand, budget, order_time)
			
 
				         flag = False
			
 
				-        return header_dic, flag, (product, quantity, unitPrice, brand, specs)
			
 
				+        return header_dic, flag, (product, quantity, unitPrice, brand, specs), (product, demand, budget, order_time)
			
 
				 
			
 
				-    def predict(self, docid='', html=''):
			
 
				+    def predict(self, docid='', html='', page_time=""):
			
 
				         '''
			
 
				         正则寻找table表格内 产品相关信息
			
 
				         :param html:公告HTML原文
			
@@ -1719,9 +1799,16 @@ class ProductAttributesPredictor():
 
				         soup = BeautifulSoup(html, 'lxml')
			
 
				         tables = soup.find_all(['table'])
			
 
				         headers = []
			
 
				+        headers_demand = []
			
 
				         header_col = []
			
 
				         product_link = []
			
 
				-        for table in tables:
			
 
				+        demand_link = []
			
 
				+        for i in range(len(tables)-1, -1, -1):
			
 
				+            table = tables[i]
			
 
				+            if table.parent.name == 'td' and len(table.find_all('td')) == 1:
			
 
				+                table.string = table.get_text()
			
 
				+                table.name = 'turntable'
			
 
				+                continue
			
 
				             if not self.isTrueTable(table):
			
 
				                 continue
			
 
				             self.fixSpan(table)
			
@@ -1740,10 +1827,17 @@ class ProductAttributesPredictor():
 
				                 unitPrice = ""  # 单价
			
 
				                 brand = ""  # 品牌
			
 
				                 specs = ""  # 规格
			
 
				+                demand = ""  # 采购需求
			
 
				+                budget = ""  # 预算金额
			
 
				+                order_time = ""  # 采购时间
			
 
				+                order_begin = ""
			
 
				+                order_end = ""
			
 
				+
			
 
				                 if len(set(tds) & self.header_set) > len(tds) * 0.2:
			
 
				-                    header_dic, found_header, header_list = self.find_header(tds, self.p1, self.p2)
			
 
				+                    header_dic, found_header, header_list, header_list2 = self.find_header(tds, self.p1, self.p2)
			
 
				                     if found_header:
			
 
				                         headers.append('_'.join(header_list))
			
 
				+                        headers_demand.append('_'.join(header_list2))
			
 
				                         header_colnum = len(tds)
			
 
				                         header_col.append('_'.join(tds))
			
 
				                     i += 1
			
@@ -1757,6 +1851,10 @@ class ProductAttributesPredictor():
 
				                     id3 = header_dic.get('单价', "")
			
 
				                     id4 = header_dic.get('品牌', "")
			
 
				                     id5 = header_dic.get('规格', "")
			
 
				+
			
 
				+                    id6 = header_dic.get('需求', "")
			
 
				+                    id7 = header_dic.get('预算', "")
			
 
				+                    id8 = header_dic.get('时间', "")
			
 
				                     if re.search('[a-zA-Z\u4e00-\u9fa5]', tds[id1]) and tds[id1] not in self.header_set and \
			
 
				                             re.search('备注|汇总|合计|总价|价格|金额|公司|附件|详见|无$|xxx', tds[id1]) == None:
			
 
				                         product = tds[id1]
			
@@ -1768,8 +1866,10 @@ class ProductAttributesPredictor():
 
				                         if id3 != "":
			
 
				                             if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', tds[id3]):
			
 
				                                 unitPrice = tds[id3]
			
 
				-                                if '万元' in header_list[2] and '万元' not in unitPrice:
			
 
				+                                if '万元' in header_list[2] and '万' not in unitPrice:
			
 
				                                     unitPrice += '万元'
			
 
				+                                unitPrice = re.sub("[^0-9.零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]", "", unitPrice)
			
 
				+                                unitPrice = str(getUnifyMoney(unitPrice))
			
 
				                             else:
			
 
				                                 unitPrice = ""
			
 
				                         if id4 != "":
			
@@ -1782,16 +1882,74 @@ class ProductAttributesPredictor():
 
				                                 specs = tds[id5]
			
 
				                             else:
			
 
				                                 specs = ""
			
 
				+                        if id6 != "":
			
 
				+                            if re.search('\w', tds[id6]):
			
 
				+                                demand = tds[id6]
			
 
				+                            else:
			
 
				+                                demand = ""
			
 
				+                        if id7 != "":
			
 
				+                            if re.search('\d+|[零壹贰叁肆伍陆柒捌玖拾佰仟萬億十百千万亿元角分]{3,}', tds[id7]):
			
 
				+                                budget = tds[id7]
			
 
				+                                if '万元' in header_list2[2] and '万' not in budget:
			
 
				+                                    budget += '万元'
			
 
				+                                budget = re.sub("[^0-9.零壹贰叁肆伍陆柒捌玖拾佰仟萬億圆十百千万亿元角分]", "", budget)
			
 
				+                                budget = str(getUnifyMoney(budget))
			
 
				+                            else:
			
 
				+                                budget = ""
			
 
				+                        if id8 != "":
			
 
				+                            if re.search('\w', tds[id8]):
			
 
				+                                order_time = tds[id8].strip()
			
 
				+                                if re.search('^\d{1,2}月$', order_time):
			
 
				+                                    m = re.search('^(\d{1,2})月$', order_time).group(1)
			
 
				+                                    if len(m) < 2:
			
 
				+                                        m = '0'+m
			
 
				+                                    year = re.search('(\d{4})年(.{,12}采购意向)?', html)
			
 
				+                                    if year:
			
 
				+                                        y = year.group(1)
			
 
				+                                        num = self.get_monthlen(y, m)
			
 
				+                                        if len(num)<2:
			
 
				+                                            num = '0'+num
			
 
				+                                        order_begin = "%s.%s.01" % (y, m)
			
 
				+                                        order_end = "%s.%s.%s" % (y, m, num)
			
 
				+                                    elif page_time!="":
			
 
				+                                        year = re.search('\d{4}', page_time)
			
 
				+                                        if year:
			
 
				+                                            y = year.group(0)
			
 
				+                                            num = self.get_monthlen(y, m)
			
 
				+                                            if len(num) < 2:
			
 
				+                                                num = '0'+num
			
 
				+                                            order_begin = "%s.%s.01" % (y, m)
			
 
				+                                            order_end = "%s.%s.%s" % (y, m, num)
			
 
				+                                        else:
			
 
				+                                            y = str(datetime.datetime.now().year)
			
 
				+                                            num = self.get_monthlen(y, m)
			
 
				+                                            if len(num) < 2:
			
 
				+                                                num = '0'+num
			
 
				+                                            order_begin = "%s.%s.01" % (y, m)
			
 
				+                                            order_end = "%s.%s.%s" % (y, m, num)
			
 
				+                                else:
			
 
				+                                    order_begin, order_end = self.fix_time(order_time)
			
 
				                         if quantity != "" or unitPrice != "" or brand != "" or specs != "":
			
 
				-                            # link = "{0}\t{1}\t{2}\t{3}\t{4}".format(product, quantity, unitPrice, brand, specs)
			
 
				                             link = {'product': product, 'quantity': quantity, 'unitPrice': unitPrice,
			
 
				-                                                      'brand': brand[:50], 'speces': specs[:100]}
			
 
				+                                                      'brand': brand[:50], 'specs':specs}
			
 
				                             if link not in product_link:
			
 
				                                 product_link.append(link)
			
 
				+                        if budget != "" and order_time != "" :
			
 
				+                            link = {'project_name': product, 'product':[], 'demand': demand, 'budget': budget, 'order_begin':order_begin, 'order_end':order_end}
			
 
				+                            if link not in demand_link:
			
 
				+                                demand_link.append(link)
			
 
				                     i += 1
			
 
				                 else:
			
 
				                     i += 1
			
 
				-        return [{'product_attrs':{'data':product_link, 'header':headers, 'header_col':header_col}}]
			
 
				+        if len(product_link)>0:
			
 
				+            attr_dic = {'product_attrs':{'data':product_link, 'header':headers, 'header_col':header_col}}
			
 
				+        else:
			
 
				+            attr_dic = {'product_attrs': {'data': [], 'header': [], 'header_col': []}}
			
 
				+        if len(demand_link)>0:
			
 
				+            demand_dic = {'demand_info':{'data':demand_link, 'header':headers_demand, 'header_col':header_col}}
			
 
				+        else:
			
 
				+            demand_dic = {'demand_info':{'data':[], 'header':[], 'header_col':[]}}
			
 
				+        return [attr_dic, demand_dic]
			
 
				 
			
 
				 # docchannel类型提取
			
 
				 class DocChannel():