luojiehua 2 жил өмнө
parent
commit
9c0e76d63d

+ 3 - 0
BaseDataMaintenance/common/otsUtils.py

@@ -22,6 +22,9 @@ def getRow_ots(rows):
         list_dict.append(_dict)
     return list_dict
 
+def getPartitionKey(docid):
+    return int(docid)%500+1
+
 def getDocument(_query,ots_client,columns,sort="page_time",table_name="document",table_index="document_index"):
     try:
         item = _query.get("query")

+ 1 - 1
BaseDataMaintenance/dataSource/pool.py

@@ -47,4 +47,4 @@ class ConnectorPool():
 
 
 if __name__ == '__main__':
-    a = A()
+    print(1)

+ 6 - 0
BaseDataMaintenance/dataSource/setttings.py

@@ -64,6 +64,12 @@ activateMQ_port = 61613
 activateMQ_user = "admin"
 activateMQ_pswd = "admin"
 
+# activateMQ_ali_host = "172.16.147.13"
+activateMQ_ali_host = "116.62.167.43"
+activateMQ_ali_port = 61613
+activateMQ_ali_user = "admin"
+activateMQ_ali_pswd = "admin"
+
 
 # attach_postgres_host = "121.46.18.113"
 # attach_postgres_host = "127.0.0.1"

+ 6 - 0
BaseDataMaintenance/dataSource/source.py

@@ -134,6 +134,12 @@ def getConnect_activateMQ():
     conn.connect(login=activateMQ_user, passcode=activateMQ_pswd)
     return conn
 
+def getConnect_activateMQ_ali():
+    import stomp
+    conn = stomp.Connection(host_and_ports=[(activateMQ_ali_host, activateMQ_ali_port)])
+    conn.connect(login=activateMQ_ali_user, passcode=activateMQ_ali_pswd)
+    return conn
+
 
 if __name__=="__main__":
     # solrQuery("document",{"q":"*:*"})

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 949 - 57
BaseDataMaintenance/maintenance/dataflow.py


+ 3 - 3
BaseDataMaintenance/maintenance/dataflow_mq.py

@@ -519,9 +519,6 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
             self.conn = conn
             self._func = _func
 
-        def on_error(self, headers):
-            log('received an error %s' % str(headers.body))
-
         def on_message(self, headers):
             try:
                 message_id = headers.headers["message-id"]
@@ -531,6 +528,9 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
             except Exception as e:
                 pass
 
+        def on_error(self, headers):
+            log('received an error %s' % str(headers.body))
+
         def __del__(self):
             self.conn.disconnect()
 

+ 37 - 0
BaseDataMaintenance/model/ots/document.py

@@ -22,10 +22,47 @@ document_fingerprint = "fingerprint"
 document_opertime = "opertime"
 document_docchannel = "docchannel"
 document_original_docchannel = "original_docchannel"
+document_life_docchannel = "life_docchannel"
 document_area = "area"
 document_province = "province"
 document_city = "city"
 document_district = "district"
+document_extract_json = "extract_json"
+document_bidway = "bidway"
+document_industry = "industry"
+document_info_type = "info_type"
+document_qcodes = "qcodes"
+document_project_name = "project_name"
+document_project_code = "project_code"
+document_project_codes = "project_codes"
+document_tenderee = "tenderee"
+document_tenderee_addr = "tenderee_addr"
+document_tenderee_phone = "tenderee_phone"
+document_tenderee_contact = "tenderee_contact"
+document_agency = "agency"
+document_agency_phone = "agency_phone"
+document_agency_contact = "agency_contact"
+document_product = "product"
+
+document_moneysource = "moneysource"
+document_service_time = "service_time"
+document_time_bidclose = "time_bidclose"
+document_time_bidopen = "time_bidopen"
+document_time_bidstart = "time_bidstart"
+document_time_commencement = "time_commencement"
+document_time_completion = "time_completion"
+document_time_earnest_money_start = "time_earnest_money_start"
+document_time_earnest_money_end = "time_earnest_money_end"
+document_time_get_file_end = "time_get_file_end"
+document_time_get_file_start = "time_get_file_start"
+document_time_publicity_end = "time_publicity_end"
+document_time_publicity_start = "time_publicity_start"
+document_time_registration_end = "time_registration_end"
+document_time_registration_start = "time_registration_start"
+document_time_release = "time_release"
+
+
+
 
 class Document(BaseModel):
 

+ 27 - 20
BaseDataMaintenance/model/ots/document_tmp.py

@@ -255,22 +255,24 @@ def turn_document_tmp_status():
 
 
         bool_query = BoolQuery(
-            # must_queries=[
-            #     BoolQuery(should_queries=[
-            #                               # TermQuery("tenderee","山西利民工业有限责任公司"),
-            #                               # MatchPhraseQuery("doctitle","中国电信"),
-            #                               # MatchPhraseQuery("doctextcon","中国电信"),
-            #                               # MatchPhraseQuery("attachmenttextcon","中国电信")]),
-            #                               # RangeQuery(document_tmp_status,88,120,True,True),
-            #                               RangeQuery("page_time","2022-03-24","2022-03-25",True,False),
-            #                               ExistsQuery
-            #                                      #,TermQuery(document_tmp_docid,171146519)
-            #                                      ]
-            #     )
-            # ],
-            must_not_queries=[ExistsQuery("status"),
-                              ExistsQuery("page_time"),
-                              ]
+            must_queries=[
+                RangeQuery("status",66,71),
+                # BoolQuery(should_queries=[
+                #                           # TermQuery("tenderee","山西利民工业有限责任公司"),
+                #                           # MatchPhraseQuery("doctitle","中国电信"),
+                #                           # MatchPhraseQuery("doctextcon","中国电信"),
+                #                           # MatchPhraseQuery("attachmenttextcon","中国电信")]),
+                #                           # RangeQuery(document_tmp_status,88,120,True,True),
+                #                           RangeQuery("page_time","2022-03-24","2022-03-25",True,False),
+                #                           ExistsQuery
+                #                                  #,TermQuery(document_tmp_docid,171146519)
+                #                                  ]
+                # )
+            ],
+            # must_not_queries=[ExistsQuery("status"),
+            #                   ExistsQuery("page_time"),
+            #
+            #                   ]
         )
 
         rows,next_token,total_count,is_all_succeed = ots_client.search("document_tmp","document_tmp_index",
@@ -278,7 +280,7 @@ def turn_document_tmp_status():
                                                                        columns_to_get=ColumnsToGet(["extract_json"],return_type=ColumnReturnType.SPECIFIED))
         list_data = getRow_ots(rows)
         print(total_count)
-        print(list_data)
+        # print(list_data)
         _count = len(list_data)
         for _data in list_data:
             _document = Document_tmp(_data)
@@ -330,10 +332,15 @@ def turn_document_tmp_status():
 
         #change status
         # item.setValue(document_tmp_docchannel,item.getProperties().get(document_tmp_original_docchannel),True)
-        # item.setValue(document_tmp_status,random.randint(151,171),True)
-        # item.update_row(ots_client)
+        _extract_json = item.getProperties().get(document_tmp_extract_json,"")
+        _extract_json = _extract_json.replace("\x06", "").replace("\x05", "").replace("\x07", "").replace('\\', '')
+        item.setValue(document_tmp_extract_json,_extract_json,True)
+        json.loads(_extract_json)
+        # item.setValue(document_tmp_status,71,True)
+        # item.setValue(document_tmp_save,1,True)
+        item.update_row(ots_client)
         # log("update %d status done"%(item.getProperties().get(document_tmp_docid)))
-        item.delete_row(ots_client)
+        # item.delete_row(ots_client)
         pass
 
 

+ 19 - 0
BaseDataMaintenance/model/ots/project.py

@@ -44,7 +44,26 @@ project_bidway = "bidway"
 project_dup_data = "dup_data"
 project_docid_number = "docid_number"
 project_dynamics = "project_dynamic"
+project_product = "product"
 
+project_moneysource = "moneysource"
+project_service_time = "service_time"
+project_time_bidclose = "time_bidclose"
+project_time_bidopen = "time_bidopen"
+project_time_bidstart = "time_bidstart"
+project_time_commencement = "time_commencement"
+project_time_completion = "time_completion"
+project_time_earnest_money_start = "time_earnest_money_start"
+project_time_earnest_money_end = "time_earnest_money_end"
+project_time_get_file_end = "time_get_file_end"
+project_time_get_file_start = "time_get_file_start"
+project_time_publicity_end = "time_publicity_end"
+project_time_publicity_start = "time_publicity_start"
+project_time_registration_end = "time_registration_end"
+project_time_registration_start = "time_registration_start"
+project_time_release = "time_release"
+
+project_dup_docid = "dup_docid"
 
 
 class Project(BaseModel):

Файлын зөрүү хэтэрхий том тул дарагдсан байна
+ 2 - 0
BaseDataMaintenance/test/1.py


Энэ ялгаанд хэт олон файл өөрчлөгдсөн тул зарим файлыг харуулаагүй болно