luojiehua před 2 roky
rodič
revize
e1840314a5

+ 9 - 9
examples/coreline/bin/settlement.py

@@ -41,33 +41,33 @@ def getWage(user,start_time,end_time,percentPass):
 
     print(user,start_time,current_begin,end_time)
 
-    sql = " select count(1) from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s'"%(user,end_time,start_time)
+    sql = " select count(1) from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s'"%(user,end_time,start_time)
     cursor.execute(sql)
     doc_count = cursor.fetchall()[0][0]
     print("doc_count",doc_count)
-    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s' order by edittime desc limit 1200) and value like '%s' "%(user,end_time,current_begin,"T%")
+    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s' order by edittime desc limit 1200) and value like '%s' "%(user,end_time,current_begin,"T%")
     cursor.execute(sql)
     t_count = cursor.fetchall()[0][0]
     print("t_count",t_count)
-    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s' order by edittime desc limit 1200) and value like '%s' "%(user,end_time,current_begin,"R%")
+    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s' order by edittime desc limit 1200) and value like '%s' "%(user,end_time,current_begin,"R%")
     cursor.execute(sql)
     r_count = cursor.fetchall()[0][0]
     print("r_count",r_count)
-    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s') and value like '%s' "%(user,end_time,current_begin,"T%")
+    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s') and value like '%s' "%(user,end_time,current_begin,"T%")
     cursor.execute(sql)
     all_t_count = cursor.fetchall()[0][0]
     print("all_t_count",all_t_count)
-    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s') and value like '%s' "%(user,end_time,current_begin,"R%")
+    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s') and value like '%s' "%(user,end_time,current_begin,"R%")
     cursor.execute(sql)
     all_r_count = cursor.fetchall()[0][0]
     print("all_r_count",all_r_count)
 
-    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s') and value like '%s' "%(user,current_begin,start_time,"T%")
+    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s') and value like '%s' "%(user,current_begin,start_time,"T%")
     cursor.execute(sql)
     other_t_count = cursor.fetchall()[0][0]
     print("other_t_count",other_t_count)
 
-    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s') and value like '%s' "%(user,current_begin,start_time,"R%")
+    sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s') and value like '%s' "%(user,current_begin,start_time,"R%")
     cursor.execute(sql)
     other_r_count = cursor.fetchall()[0][0]
     print("other_r_count",other_r_count)
@@ -414,8 +414,8 @@ def makeCheck():
 if __name__=="__main__":
     settle = Settlement()
     # settle.makeMigrate("test","2020-08-01","2020-08-31")
-    settle_end = "2021-07-25"
-    settle.makePayroll(["test2","test9","test10","test11","test12","test16","test20","test25","test19","test22","test1","test7","test8","test17","test21","test26"],"2020-08-01",settle_end)
+    settle_end = "2021-09-25"
+    settle.makePayroll(["test1","test7","test8","test21","test27","test2","test9","test10","test11","test12","test25","test22","test29"],"2020-08-01",settle_end)
     settle.exportPayroll(begin_time=None,end_time=settle_end)
 
     # settle.createUser_batch(batch_size=102)

+ 7 - 5
examples/coreline/bin/statistic.py

@@ -8,11 +8,13 @@ def statis():
     from django.db import connection
 
     cursor = connection.cursor()
+    start_time = "2021-08-25"
+    end_time = "2021-09-25"
 
     df_data = {"user":[],"count":[],"online":[],"secondperdoc":[]}
-    list_user = ["test2","test9","test10","test11","test12","test20","test1","test7","test17","test21","test3","test19","test22"]
+    list_user = ["test1","test7","test8","test21","test27","test2","test9","test10","test11","test12","test25","test22","test29"]
     for _user in list_user:
-        sql = " select to_char(edittime,'yyyy-mm-dd hh24:mi:ss') as et  from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='2021-05-25' and to_char(edittime,'yyyy-mm-dd')>'2021-04-25' order by edittime asc"%_user
+        sql = " select edittime as et  from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s' order by edittime asc"%(_user,end_time,start_time)
 
         cursor.execute(sql)
 
@@ -21,7 +23,7 @@ def statis():
         list_et = []
         list_distance = []
         for row in rows:
-            _t = time.mktime(time.strptime(row[0],'%Y-%m-%d %H:%M:%S'))
+            _t = time.mktime(time.strptime(str(row[0])[:19],'%Y-%m-%d %H:%M:%S'))
             list_et.append(_t)
         for i in range(1,len(list_et)):
             _d = list_et[i]-list_et[i-1]
@@ -30,9 +32,9 @@ def statis():
         df_data["user"].append(_user)
         df_data["count"].append(len(list_et))
         df_data["online"].append(sum(list_distance)/60/60/30)
-        df_data["secondperdoc"].append(sum(list_distance)/len(list_et))
+        df_data["secondperdoc"].append(sum(list_distance)/len(list_et) if len(list_et)>0 else 0)
     df = pd.DataFrame(df_data)
-    df.to_excel("check.xlsx")
+    df.to_excel("check_%s_%s.xlsx"%(start_time,end_time))
 
 def getWage(user,start_time,end_time,percentPass):
     import time

+ 1 - 1
iepy/selfpreprocess/BiddingKG/dl/entityLink/entityLink.py

@@ -58,7 +58,7 @@ def link_entitys(list_entitys,on_value=0.8):
                     if re.search("公司$",_ent.entity_text) is not None:
                         if len(_ent.entity_text)>len(_entity.entity_text):
                             _entity.entity_text = _ent.entity_text
- 
+
 DICT_ENTERPRISE = {}
 DICT_ENTERPRISE_DONE = False
 def getDict_enterprise():

+ 1 - 1
setup.py

@@ -17,7 +17,7 @@ with open(path.join(HERE, 'iepy', 'version.txt'), encoding='utf-8') as f:
 base_reqs = """nltk>=3.2.1
 numpy>=1.8.0
 scipy>=0.13.3
-scikit-learn==0.15.2
+scikit-learn==1.0.2
 REfO==0.13
 docopt==0.6.1
 future==0.11.4