12345678910111213141516171819202122232425262728293031323334353637383940 |
- from odps.udf import annotate
- from odps.udf import BaseUDTF
- @annotate('string -> string')
- class f_analysis_type(BaseUDTF):
- def __init__(self):
- import logging
- import json
- import time,re
- global json,logging,time,re
- self.time_pattern = "\d{4}\-\d{2}\-\d{2}.*"
- logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- def process(self, doctextcon):
- if doctextcon is not None:
- list_match = []
- dict_type_keyword = {"风电":[['风电|风力发电']],
- "火电":[['煤电|煤发电|燃煤机组|燃气热电|焚烧发电|火电|火力发电|锅炉|燃机']],
- "水电":[['水电|水力发电']],
- "送变电":[['变电|送出|输送|架线|配电|电压穿越']],
- "核电":[['核电|核能发电']],
- "光伏发电":[['光伏|太阳能发电']],
- "调试":[["整套启动|性能试验|调整试验|调试|试验|测试|检测|预试"]],
- "监理":[["监理"]],
- "施工":[["施工|工程|建设"]]
- }
- for k,v in dict_type_keyword.items():
- for searchItem in v:
- all_match = True
- for _item in searchItem:
- if re.search(_item,doctextcon) is None:
- all_match = False
- if all_match:
- list_match.append(k)
- if len(list_match)>0:
- self.forward(",".join(list_match))
|