Parcourir la source

1. 新增两个拟在建字段,用于建索引
2. extract_json中pb删掉空值

fangjiasheng il y a 1 an
Parent
commit
61c0b0ab27

+ 4 - 0
BiddingKG/dl/interface/extract.py

@@ -449,6 +449,10 @@ def predict(doc_id,text,title="",page_time="",web_source_no='',web_source_name="
     data_res["cost_time"] = cost_time
     data_res["success"] = True
 
+    # 拟在建需建索引字段
+    data_res["proportion"] = pb_json.get('pb').get('proportion', '')
+    data_res["pb_project_name"] = pb_json.get('pb').get('project_name_refind', '')
+
     # for _article in list_articles:
     #         log(_article.content)
     #

+ 10 - 6
BiddingKG/dl/proposed_building/pb_extract.py

@@ -133,11 +133,6 @@ class PBPredictor:
                     has_stage = 0
 
                 pb_json = {
-                    'tenderee': tenderee,
-                    'agency': agency,
-                    'project_code': project_code,
-                    'project_name': project_name,
-                    'doctitle': doctitle,
                     'stage': stage,
                     'industry': industry,
                     'proportion': proportion,
@@ -160,12 +155,21 @@ class PBPredictor:
                     'has_stage': has_stage,
                 }
 
+                # 值为None的key删掉
+                delete_keys = []
+                for key in pb_json.keys():
+                    if pb_json.get(key) in [None, "", 0.0, 0]:
+                        delete_keys.append(key)
+                for key in delete_keys:
+                    if key in pb_json.keys():
+                        pb_json.pop(key)
+
                 pb_json = {'pb': pb_json}
                 return pb_json
 
         except:
             traceback.print_exc()
-            return {'pb': 'error'}
+            return {'pb': {}}
 
 
 def extract_legal_stage(content, _pattern, priority_dict, product='', tenderee='', agency=''):