浏览代码

AI提取上线

luojiehua 4 月之前
父节点
当前提交
c9b055442b
共有 2 个文件被更改,包括 39 次插入34 次删除
  1. 32 30
      BaseDataMaintenance/AIUtils/export.py
  2. 7 4
      BaseDataMaintenance/maintenance/dataflow_mq.py

+ 32 - 30
BaseDataMaintenance/AIUtils/export.py

@@ -81,8 +81,8 @@ def export_extract_ai1():
     for docid in list_docid:
         task_queue.put(docid)
         _count += 1
-        if _count>=1000:
-            break
+        # if _count>=1000:
+        #     break
 
     def get_ai_money(_text):
         b = re.search(r'[\d,,\.]+[亿万元人民币]+',str(_text))
@@ -183,10 +183,10 @@ def export_extract_ai1():
         _d = {document_tmp_partitionkey:partitionkey,
               document_tmp_docid:docid}
         dtmp = Document(_d)
-        if dtmp.fix_columns(ots_capacity,["dochtmlcon"],True) and dtmp.fix_columns(ots_client,["extract_json","docchannel"],True):
+        if dtmp.fix_columns(ots_client,["extract_json","docchannel","extract_json_ai"],True):
 
-            if not dtmp.getProperties().get("docchannel",0) in (52,101,118,119,120,121,122):
-                return
+            # if not dtmp.getProperties().get("docchannel",0) in (52,101,118,119,120,121,122):
+            #     return
 
 
             _dochtmlcon = dtmp.getProperties().get("dochtmlcon","")
@@ -199,20 +199,22 @@ def export_extract_ai1():
 
             #model_name = "ep-20250212111145-fflr7" #1.5pro 256k
             #model_name = "ep-20250314164242-jd62g" #1.5pro 32k
-            result = chat_doubao(msg,model_name='ep-20250212111145-fflr7')
-
-            _json_256k = get_json_from_text(result)
-            _extract_ai_256k = {}
-
-            if _json_256k is not None:
-                try:
-                    _extract_ai_256k = json.loads(_json_256k)
-                except Exception as e:
-                    pass
-
-            result = chat_doubao(msg,model_name='ep-20250314164242-jd62g')
-
-            _json_32k = get_json_from_text(result)
+            # result = chat_doubao(msg,model_name='ep-20250212111145-fflr7')
+
+            # _json_256k = get_json_from_text(result)
+            # _extract_ai_256k = {}
+            #
+            #
+            # if _json_256k is not None:
+            #     try:
+            #         _extract_ai_256k = json.loads(_json_256k)
+            #     except Exception as e:
+            #         pass
+            #
+            # result = chat_doubao(msg,model_name='ep-20250314164242-jd62g')
+            #
+            # _json_32k = get_json_from_text(result)
+            _json_32k = dtmp.getProperties().get("extract_json_ai","{}")
             _extract_ai_32k = {}
 
             if _json_32k is not None:
@@ -220,26 +222,26 @@ def export_extract_ai1():
                     _extract_ai_32k = json.loads(_json_32k)
                 except Exception as e:
                     pass
-            clean_ai_extract(_extract,_extract_ai_256k)
+            # clean_ai_extract(_extract,_extract_ai_256k)
             clean_ai_extract(_extract,_extract_ai_32k)
-            tenderee1,win_tenderer1,_budget1,_win_price1 = get_columns_from_extract(_extract_ai_256k)
+            # tenderee1,win_tenderer1,_budget1,_win_price1 = get_columns_from_extract(_extract_ai_256k)
             tenderee2,win_tenderer2,_budget2,_win_price2 = get_columns_from_extract(_extract_ai_32k)
             _d = {
                 "docid":docid,
-                "extract_ai_256k":_json_256k,
-                "招标人_256k":tenderee1,
-                "项目预算_256k":_budget1,
-                "中标人_256k":win_tenderer1,
-                "中标金额_256k":_win_price1,
+                # "extract_ai_256k":_json_256k,
+                # "招标人_256k":tenderee1,
+                # "项目预算_256k":_budget1,
+                # "中标人_256k":win_tenderer1,
+                # "中标金额_256k":_win_price1,
                 "extract_ai_32k":_json_32k,
                 "招标人_32k":tenderee2,
                 "项目预算_32k":_budget2,
                 "中标人_32k":win_tenderer2,
                 "中标金额_32k":_win_price2,
-                "招标人对比":tenderee1==tenderee2,
-                "中标人对比":win_tenderer1==win_tenderer2,
-                "项目预算对比":_budget1==_budget2,
-                "中标金额对比":_win_price1==_win_price2
+                # "招标人对比":tenderee1==tenderee2,
+                # "中标人对比":win_tenderer1==win_tenderer2,
+                # "项目预算对比":_budget1==_budget2,
+                # "中标金额对比":_win_price1==_win_price2
             }
             result_queue.put(_d)
 

+ 7 - 4
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -1321,7 +1321,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                 except Exception as e:
                     win_price = 0
                 if win_price>0:
-                    if win_price>100000000 or win_price<1000:
+                    if win_price>100000000 or win_price<100:
                         winprice_unexpected = True
 
 
@@ -1332,7 +1332,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                 tendereeMoney = 0
             if tendereeMoney>0:
                 has_budget = True
-                if tendereeMoney>100000000 or tendereeMoney<1000:
+                if tendereeMoney>100000000 or tendereeMoney<100:
                     budget_unexpected = True
 
         if has_entity and not one_entity_used:
@@ -1594,7 +1594,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                 except Exception as e:
                     win_price = 0
                 if win_price>0:
-                    if win_price>100000000 or win_price<1000:
+                    if win_price>100000000 or win_price<100:
                         winprice_unexpected = True
 
 
@@ -1605,7 +1605,7 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                 tendereeMoney = 0
             if tendereeMoney>0:
                 has_budget = True
-                if tendereeMoney>100000000 or tendereeMoney<1000:
+                if tendereeMoney>100000000 or tendereeMoney<100:
                     budget_unexpected = True
 
 
@@ -1639,6 +1639,9 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
                     _changed = True
                     _extract["extract_count"] += 1
                     Project["tendereeMoney_by_ai"] = True
+            else:
+                if budget_unexpected:
+                    Project["tendereeMoney"] = "0"
         if not has_win_tenderer or winprice_unexpected:
             list_win = _extract_ai.get("中标信息",[])
             if len(list_win)>0: