|
@@ -1,6 +1,6 @@
|
|
|
|
|
|
|
|
|
-from BiddingKG.dl.common.Utils import findAllIndex,debug,timeFormat,getCurrent_date,API_URL,uniform_package_name,money_process
|
|
|
+from BiddingKG.dl.common.Utils import findAllIndex,debug,timeFormat,getCurrent_date,API_URL,uniform_package_name,money_process,getDigitsDic,isValidDate
|
|
|
from BiddingKG.dl.interface.Entitys import PREM,Role,Entity
|
|
|
from decimal import Decimal
|
|
|
import re
|
|
@@ -3040,7 +3040,43 @@ def turnBidWay(bidway):
|
|
|
else:
|
|
|
return "其他"
|
|
|
|
|
|
-my_time_format_pattern = re.compile("((?P<year>\d{4}|\d{2})\s*[-\/年\.]\s*(?P<month>\d{1,2})\s*[-\/月\.]\s*(?P<day>\d{1,2}))")
|
|
|
+def turnMoneySource(moneysource):
|
|
|
+ result_list = []
|
|
|
+ if re.search("自筹|业主筹集|筹资|自有",moneysource):
|
|
|
+ result_list.append("自筹")
|
|
|
+ if re.search("财政",moneysource) and not re.search("非财政",moneysource):
|
|
|
+ result_list.append("财政资金")
|
|
|
+ if re.search("拨款|补助|划拨|拨付|国拨|上级资金",moneysource):
|
|
|
+ result_list.append("上级拨款")
|
|
|
+ if re.search("社会资本|社会资金",moneysource):
|
|
|
+ result_list.append("社会资本")
|
|
|
+ if re.search("贷款|借款|借贷",moneysource):
|
|
|
+ result_list.append("贷款资金")
|
|
|
+ if re.search("债券|债|国债",moneysource):
|
|
|
+ result_list.append("债券资金")
|
|
|
+ if re.search("专项|项目资金",moneysource):
|
|
|
+ result_list.append("项目专项资金")
|
|
|
+ if re.search("配套",moneysource):
|
|
|
+ result_list.append("配套资金")
|
|
|
+ if re.search("外资",moneysource):
|
|
|
+ result_list.append("外资")
|
|
|
+ if re.search("国有资金|国企资金|国资|国家投资",moneysource):
|
|
|
+ result_list.append("国有资金")
|
|
|
+ if re.search("投资|融资",moneysource):
|
|
|
+ result_list.append("投资资金")
|
|
|
+ if re.search("预算(?<!外)|预算内",moneysource):
|
|
|
+ result_list.append("预算内资金")
|
|
|
+ if re.search("预算外",moneysource):
|
|
|
+ result_list.append("预算外资金")
|
|
|
+
|
|
|
+ result_list = sorted(result_list,key = lambda x:x)
|
|
|
+ if len(result_list)>0 and len(result_list)<5:
|
|
|
+ return ",".join(result_list)
|
|
|
+ else:
|
|
|
+ return "其他资金"
|
|
|
+
|
|
|
+my_time_format_pattern = re.compile("((?P<year>20\d{2}|\d{2}|二[零〇0][零〇一二三四五六七八九0]{2})\s*[-/年.]\s*(?P<month>\d{1,2}|[一二三四五六七八九十]{1,3})\s*[-/月.]\s*(?P<day>\d{1,2}|[一二三四五六七八九十]{1,3}))")
|
|
|
+from BiddingKG.dl.ratio.re_ratio import getUnifyNum
|
|
|
import time
|
|
|
def my_timeFormat(_time):
|
|
|
current_year = time.strftime("%Y",time.localtime())
|
|
@@ -3060,24 +3096,52 @@ def my_timeFormat(_time):
|
|
|
if k=="day":
|
|
|
day = v
|
|
|
if year!="":
|
|
|
- if len(year)==2:
|
|
|
- year = "20"+year
|
|
|
- if int(year)>int(current_year):
|
|
|
- legal = False
|
|
|
+ if re.search("^\d+$", year):
|
|
|
+ if len(year) == 2:
|
|
|
+ year = "20" + year
|
|
|
+ if int(year) > int(current_year):
|
|
|
+ legal = False
|
|
|
+ else:
|
|
|
+ _year = ""
|
|
|
+ for word in year:
|
|
|
+ if word == '0':
|
|
|
+ _year += word
|
|
|
+ else:
|
|
|
+ _year += str(getDigitsDic(word))
|
|
|
+ year = _year
|
|
|
else:
|
|
|
legal = False
|
|
|
if month!="":
|
|
|
- if int(month)>12:
|
|
|
- legal = False
|
|
|
+ if re.search("^\d+$", month):
|
|
|
+ if int(month) > 12:
|
|
|
+ legal = False
|
|
|
+ else:
|
|
|
+ month = int(getUnifyNum(month))
|
|
|
+ if month >= 1 and month <= 12:
|
|
|
+ month = str(month)
|
|
|
+ else:
|
|
|
+ legal = False
|
|
|
else:
|
|
|
legal = False
|
|
|
if day!="":
|
|
|
- if int(day)>31:
|
|
|
- legal = False
|
|
|
+ if re.search("^\d+$", day):
|
|
|
+ if int(day) > 31:
|
|
|
+ legal = False
|
|
|
+ else:
|
|
|
+ day = int(getUnifyNum(day))
|
|
|
+ if day >= 1 and day <= 31:
|
|
|
+ day = str(day)
|
|
|
+ else:
|
|
|
+ legal = False
|
|
|
else:
|
|
|
legal = False
|
|
|
+ if not isValidDate(int(year),int(month),int(day)):
|
|
|
+ legal = False
|
|
|
if legal:
|
|
|
- # return "%s-%s-%s"%(year,month.rjust(2,"0"),day.rjust(2,"0"))
|
|
|
+ # 数字字符格式化
|
|
|
+ year = str(int(year))
|
|
|
+ month = str(int(month))
|
|
|
+ day = str(int(day))
|
|
|
time_list.append("%s-%s-%s"%(year,month.rjust(2,"0"),day.rjust(2,"0")))
|
|
|
return time_list
|
|
|
|
|
@@ -3119,12 +3183,67 @@ def getTimeAttributes(list_entity,list_sentence):
|
|
|
sentence_text = list_sentence[entity.sentence_index].sentence_text
|
|
|
entity_left = sentence_text[max(0, entity.wordOffset_begin - 2):entity.wordOffset_begin]
|
|
|
entity_left2 = sentence_text[max(0, entity.wordOffset_begin - 10):entity.wordOffset_begin]
|
|
|
+ entity_left3 = sentence_text[max(0, entity.wordOffset_begin - 20):entity.wordOffset_begin]
|
|
|
entity_right = sentence_text[entity.wordOffset_end:entity.wordOffset_end + 3]
|
|
|
label_prob = entity.values[entity.label]
|
|
|
entity_text = entity.entity_text
|
|
|
in_attachment = entity.in_attachment
|
|
|
extract_time = my_timeFormat(entity_text)
|
|
|
+ # definite_time = "00:00:00"
|
|
|
+ # if extract_time:
|
|
|
+ # t = re.compile("(?P<day>下午|上午|早上)?(?P<hour>\d{1,2})[::时点](?P<half_hour>半)?(?P<minute>\d{2})?[::分]?(?P<second>\d{2})?秒?")
|
|
|
+ # t_in_word = re.search(t,entity_text)
|
|
|
+ # t_out_of_word = re.search("^[^\d]{,2}"+t.pattern,sentence_text[entity.wordOffset_end:])
|
|
|
+ # if t_in_word:
|
|
|
+ # print('t_in_word',entity_text,t_in_word.groupdict())
|
|
|
+ # day = t_in_word.groupdict().get('day',"")
|
|
|
+ # hour = t_in_word.groupdict().get('hour',"")
|
|
|
+ # half_hour = t_in_word.groupdict().get('half_hour',"")
|
|
|
+ # minute = t_in_word.groupdict().get('minute',"")
|
|
|
+ # second = t_in_word.groupdict().get('second',"")
|
|
|
+ # if hour:
|
|
|
+ # if day=='下午' and int(hour)<12:
|
|
|
+ # hour = str(int(hour)+12)
|
|
|
+ # if int(hour)>24:
|
|
|
+ # continue
|
|
|
+ # else:
|
|
|
+ # hour = "00"
|
|
|
+ # if not minute:
|
|
|
+ # if half_hour:
|
|
|
+ # minute = "30"
|
|
|
+ # else:
|
|
|
+ # minute = "00"
|
|
|
+ # if int(minute)>60:
|
|
|
+ # continue
|
|
|
+ # if not second:
|
|
|
+ # second = "00"
|
|
|
+ # if int(second)>60:
|
|
|
+ # continue
|
|
|
+ # # 数字字符格式化
|
|
|
+ # # hour = str(int(hour))
|
|
|
+ # # minute = str(int(minute))
|
|
|
+ # # second = str(int(second))
|
|
|
+ # definite_time = "%s:%s:%s"%(hour.rjust(2,"0"),minute.rjust(2,"0"),second.rjust(2,"0"))
|
|
|
+ # print(definite_time)
|
|
|
+ #
|
|
|
+ # elif t_out_of_word:
|
|
|
+ # print('t_out_of_word', entity_text+sentence_text[entity.wordOffset_end:], t_out_of_word.groupdict())
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
if extract_time:
|
|
|
+ # 优化多个并列的时间,如:开标时间和截标时间,截标时间和报名结束时间
|
|
|
+ if entity.label in [2,3,9]:
|
|
|
+ if entity.label==2 and re.search("截标|投标.{,2}截止|递交(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
|
|
|
+ dict_time['time_bidclose'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+ if entity.label==3 and re.search("开标|评审.{,2}(?:开始)?时间|选取.{,2}时间",entity_left3):
|
|
|
+ dict_time['time_bidopen'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+ if entity.label==3 and re.search("报名",entity_left3):
|
|
|
+ dict_time['time_registrationEnd'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+ if entity.label==9 and re.search("截标|投标.{,2}截止|递交(?:文件)?.{,2}截止|报价.{,2}截止|响应.{,2}截止",entity_left3):
|
|
|
+ dict_time['time_bidclose'].append((extract_time[0], 0.5, in_attachment))
|
|
|
+
|
|
|
+
|
|
|
# 2022/12/12 新增挂牌时间正则
|
|
|
if re.search("挂牌.{,4}(?:时间|日期)",entity_left2):
|
|
|
if re.search("挂牌.{,4}(?:时间|日期)",entity_left2).end()>len(entity_left2)/2:
|
|
@@ -3310,12 +3429,22 @@ def getOtherAttributes(list_entity):
|
|
|
"total_tendereeMoney":0,
|
|
|
"total_tendereeMoneyUnit":''}
|
|
|
list_serviceTime = []
|
|
|
+ last_moneysource_prob = 0
|
|
|
for entity in list_entity:
|
|
|
if entity.entity_type == 'bidway':
|
|
|
dict_other["bidway"] = turnBidWay(entity.entity_text)
|
|
|
elif entity.entity_type=='moneysource':
|
|
|
- dict_other["moneysource"] = entity.entity_text
|
|
|
+ if dict_other["moneysource"] and entity.in_attachment:
|
|
|
+ continue
|
|
|
+ if not dict_other["moneysource"]:
|
|
|
+ dict_other["moneysource"] = entity.entity_text
|
|
|
+ last_moneysource_prob = entity.prob
|
|
|
+ elif entity.prob>last_moneysource_prob:
|
|
|
+ dict_other["moneysource"] = entity.entity_text
|
|
|
+ last_moneysource_prob = entity.prob
|
|
|
elif entity.entity_type=='serviceTime':
|
|
|
+ if list_serviceTime and entity.in_attachment:
|
|
|
+ continue
|
|
|
if re.search("[^之]日|天|年|月|周|星期", entity.entity_text) or re.search("\d{4}[\-\./]\d{1,2}", entity.entity_text):
|
|
|
list_serviceTime.append(entity)
|
|
|
elif entity.entity_type=="person" and entity.label ==4:
|
|
@@ -3331,7 +3460,8 @@ def getOtherAttributes(list_entity):
|
|
|
max_prob_serviceTime = [ent for ent in list_serviceTime if ent.prob==max_prob]
|
|
|
max_prob_serviceTime.sort(key=lambda x:(x.sentence_index,x.begin_index))
|
|
|
dict_other["serviceTime"] = max_prob_serviceTime[0].entity_text
|
|
|
-
|
|
|
+ if dict_other['moneysource']:
|
|
|
+ dict_other['moneysource'] = turnMoneySource(dict_other['moneysource'])
|
|
|
# dict_other["product"] = list(set(dict_other["product"])) # 已在添加时 顺序去重保留
|
|
|
return dict_other
|
|
|
|