luojiehua
/
BIDI_ML_INFO_EXTRACTION


			
							12345678910111213141516171819202122232425262728293031323334353637383940
							from odps.udf import annotate
from odps.udf import BaseUDTF


@annotate('string -> string')
class f_analysis_type(BaseUDTF):

    def __init__(self):
        import logging
        import json
        import time,re
        global json,logging,time,re
        self.time_pattern = "\d{4}\-\d{2}\-\d{2}.*"
        logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

    def process(self, doctextcon):
        if doctextcon is not None:
            list_match = []
            dict_type_keyword = {"风电":[['风电|风力发电']],
                                 "火电":[['煤电|煤发电|燃煤机组|燃气热电|焚烧发电|火电|火力发电|锅炉|燃机']],
                                 "水电":[['水电|水力发电']],
                                 "送变电":[['变电|送出|输送|架线|配电|电压穿越']],
                                 "核电":[['核电|核能发电']],
                                 "光伏发电":[['光伏|太阳能发电']],
                                 "调试":[["整套启动|性能试验|调整试验|调试|试验|测试|检测|预试"]],
                                 "监理":[["监理"]],
                                 "施工":[["施工|工程|建设"]]
                                 }

            for k,v in dict_type_keyword.items():
                for searchItem in v:
                    all_match = True
                    for _item in searchItem:
                        if re.search(_item,doctextcon) is None:
                            all_match = False
                    if all_match:
                        list_match.append(k)

            if len(list_match)>0:
                self.forward(",".join(list_match))