|
@@ -139,6 +139,8 @@ class ExtractMetric():
|
|
_split = v.get("type").split("_")
|
|
_split = v.get("type").split("_")
|
|
if v.get("type") in ["money_tendereeMoney"]:
|
|
if v.get("type") in ["money_tendereeMoney"]:
|
|
_before_text = Htext[max(v["begin"]-10,0):v["begin"]]
|
|
_before_text = Htext[max(v["begin"]-10,0):v["begin"]]
|
|
|
|
+ if re.search('总投资|投资总额|总预算|总概算|投资规模|投资|工程造价', _before_text):
|
|
|
|
+ continue
|
|
if re.search("万",_before_text) is not None and re.search("整",_before_text) is None:
|
|
if re.search("万",_before_text) is not None and re.search("整",_before_text) is None:
|
|
_unit = 10000
|
|
_unit = 10000
|
|
else:
|
|
else:
|
|
@@ -150,6 +152,8 @@ class ExtractMetric():
|
|
for _k,_v in dict_role.items():
|
|
for _k,_v in dict_role.items():
|
|
if _v["subject"]==dict_anno[arg1]["text"]:
|
|
if _v["subject"]==dict_anno[arg1]["text"]:
|
|
_before_text = Htext[max(dict_anno[arg2]["begin"]-10,0):dict_anno[arg2]["begin"]]
|
|
_before_text = Htext[max(dict_anno[arg2]["begin"]-10,0):dict_anno[arg2]["begin"]]
|
|
|
|
+ if re.search('总投资|投资总额|总预算|总概算|投资规模|投资|工程造价', _before_text):
|
|
|
|
+ continue
|
|
if re.search("万",_before_text) is not None and re.search("整",_before_text) is None:
|
|
if re.search("万",_before_text) is not None and re.search("整",_before_text) is None:
|
|
_unit = 10000
|
|
_unit = 10000
|
|
else:
|
|
else:
|
|
@@ -222,7 +226,7 @@ class ExtractMetric():
|
|
_user = _payroll[2]
|
|
_user = _payroll[2]
|
|
doc_count = _payroll[3]
|
|
doc_count = _payroll[3]
|
|
print(_user,_begin_time,_end_time,doc_count)
|
|
print(_user,_begin_time,_end_time,doc_count)
|
|
- _sql = "select document_id,value from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')>='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' limit 10) order by document_id"%(_user,_begin_time,_end_time)
|
|
|
|
|
|
+ _sql = "select document_id,value from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')>='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' limit 100) order by document_id"%(_user,_begin_time,_end_time)
|
|
cursor.execute(_sql)
|
|
cursor.execute(_sql)
|
|
rows = cursor.fetchall()
|
|
rows = cursor.fetchall()
|
|
if len(rows)>0:
|
|
if len(rows)>0:
|
|
@@ -341,6 +345,13 @@ class ExtractMetric():
|
|
_score = jaccard_score(dict_project.get(k),dict_project.get(k_other))
|
|
_score = jaccard_score(dict_project.get(k),dict_project.get(k_other))
|
|
if _score>0.9:
|
|
if _score>0.9:
|
|
_dict["%s_union"%base_key] = 1
|
|
_dict["%s_union"%base_key] = 1
|
|
|
|
+ else:
|
|
|
|
+ #通过规则召回的也算
|
|
|
|
+ if dict_project.get("%s_%s"%(base_key,"inter")) is None and dict_project.get("%s_%s"%(base_key,"inter2")) is not None:
|
|
|
|
+ _dict[k] = 1
|
|
|
|
+ _dict[k_other] = 1
|
|
|
|
+ _dict["%s_union"%base_key] = 1
|
|
|
|
+
|
|
else:
|
|
else:
|
|
_dict["%s_union"%base_key] = len(set(v)&set(dict_project.get(k_other,[])))
|
|
_dict["%s_union"%base_key] = len(set(v)&set(dict_project.get(k_other,[])))
|
|
set_k.add(base_key)
|
|
set_k.add(base_key)
|