|
@@ -81,8 +81,8 @@ def export_extract_ai1():
|
|
|
for docid in list_docid:
|
|
|
task_queue.put(docid)
|
|
|
_count += 1
|
|
|
- if _count>=1000:
|
|
|
- break
|
|
|
+ # if _count>=1000:
|
|
|
+ # break
|
|
|
|
|
|
def get_ai_money(_text):
|
|
|
b = re.search(r'[\d,,\.]+[亿万元人民币]+',str(_text))
|
|
@@ -183,10 +183,10 @@ def export_extract_ai1():
|
|
|
_d = {document_tmp_partitionkey:partitionkey,
|
|
|
document_tmp_docid:docid}
|
|
|
dtmp = Document(_d)
|
|
|
- if dtmp.fix_columns(ots_capacity,["dochtmlcon"],True) and dtmp.fix_columns(ots_client,["extract_json","docchannel"],True):
|
|
|
+ if dtmp.fix_columns(ots_client,["extract_json","docchannel","extract_json_ai"],True):
|
|
|
|
|
|
- if not dtmp.getProperties().get("docchannel",0) in (52,101,118,119,120,121,122):
|
|
|
- return
|
|
|
+ # if not dtmp.getProperties().get("docchannel",0) in (52,101,118,119,120,121,122):
|
|
|
+ # return
|
|
|
|
|
|
|
|
|
_dochtmlcon = dtmp.getProperties().get("dochtmlcon","")
|
|
@@ -199,20 +199,22 @@ def export_extract_ai1():
|
|
|
|
|
|
#model_name = "ep-20250212111145-fflr7" #1.5pro 256k
|
|
|
#model_name = "ep-20250314164242-jd62g" #1.5pro 32k
|
|
|
- result = chat_doubao(msg,model_name='ep-20250212111145-fflr7')
|
|
|
-
|
|
|
- _json_256k = get_json_from_text(result)
|
|
|
- _extract_ai_256k = {}
|
|
|
-
|
|
|
- if _json_256k is not None:
|
|
|
- try:
|
|
|
- _extract_ai_256k = json.loads(_json_256k)
|
|
|
- except Exception as e:
|
|
|
- pass
|
|
|
-
|
|
|
- result = chat_doubao(msg,model_name='ep-20250314164242-jd62g')
|
|
|
-
|
|
|
- _json_32k = get_json_from_text(result)
|
|
|
+ # result = chat_doubao(msg,model_name='ep-20250212111145-fflr7')
|
|
|
+
|
|
|
+ # _json_256k = get_json_from_text(result)
|
|
|
+ # _extract_ai_256k = {}
|
|
|
+ #
|
|
|
+ #
|
|
|
+ # if _json_256k is not None:
|
|
|
+ # try:
|
|
|
+ # _extract_ai_256k = json.loads(_json_256k)
|
|
|
+ # except Exception as e:
|
|
|
+ # pass
|
|
|
+ #
|
|
|
+ # result = chat_doubao(msg,model_name='ep-20250314164242-jd62g')
|
|
|
+ #
|
|
|
+ # _json_32k = get_json_from_text(result)
|
|
|
+ _json_32k = dtmp.getProperties().get("extract_json_ai","{}")
|
|
|
_extract_ai_32k = {}
|
|
|
|
|
|
if _json_32k is not None:
|
|
@@ -220,26 +222,26 @@ def export_extract_ai1():
|
|
|
_extract_ai_32k = json.loads(_json_32k)
|
|
|
except Exception as e:
|
|
|
pass
|
|
|
- clean_ai_extract(_extract,_extract_ai_256k)
|
|
|
+ # clean_ai_extract(_extract,_extract_ai_256k)
|
|
|
clean_ai_extract(_extract,_extract_ai_32k)
|
|
|
- tenderee1,win_tenderer1,_budget1,_win_price1 = get_columns_from_extract(_extract_ai_256k)
|
|
|
+ # tenderee1,win_tenderer1,_budget1,_win_price1 = get_columns_from_extract(_extract_ai_256k)
|
|
|
tenderee2,win_tenderer2,_budget2,_win_price2 = get_columns_from_extract(_extract_ai_32k)
|
|
|
_d = {
|
|
|
"docid":docid,
|
|
|
- "extract_ai_256k":_json_256k,
|
|
|
- "招标人_256k":tenderee1,
|
|
|
- "项目预算_256k":_budget1,
|
|
|
- "中标人_256k":win_tenderer1,
|
|
|
- "中标金额_256k":_win_price1,
|
|
|
+ # "extract_ai_256k":_json_256k,
|
|
|
+ # "招标人_256k":tenderee1,
|
|
|
+ # "项目预算_256k":_budget1,
|
|
|
+ # "中标人_256k":win_tenderer1,
|
|
|
+ # "中标金额_256k":_win_price1,
|
|
|
"extract_ai_32k":_json_32k,
|
|
|
"招标人_32k":tenderee2,
|
|
|
"项目预算_32k":_budget2,
|
|
|
"中标人_32k":win_tenderer2,
|
|
|
"中标金额_32k":_win_price2,
|
|
|
- "招标人对比":tenderee1==tenderee2,
|
|
|
- "中标人对比":win_tenderer1==win_tenderer2,
|
|
|
- "项目预算对比":_budget1==_budget2,
|
|
|
- "中标金额对比":_win_price1==_win_price2
|
|
|
+ # "招标人对比":tenderee1==tenderee2,
|
|
|
+ # "中标人对比":win_tenderer1==win_tenderer2,
|
|
|
+ # "项目预算对比":_budget1==_budget2,
|
|
|
+ # "中标金额对比":_win_price1==_win_price2
|
|
|
}
|
|
|
result_queue.put(_d)
|
|
|
|