123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- import iepy
- iepy.setup(__file__)
- import time
- import pandas as pd
- def statis():
- from django.db import connection
- cursor = connection.cursor()
- start_time = "2021-08-25"
- end_time = "2021-09-25"
- df_data = {"user":[],"count":[],"online":[],"secondperdoc":[]}
- list_user = ["test1","test7","test8","test21","test27","test2","test9","test10","test11","test12","test25","test22","test29"]
- for _user in list_user:
- sql = " select edittime as et from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s' order by edittime asc"%(_user,end_time,start_time)
- cursor.execute(sql)
- rows = cursor.fetchall()
- list_et = []
- list_distance = []
- for row in rows:
- _t = time.mktime(time.strptime(str(row[0])[:19],'%Y-%m-%d %H:%M:%S'))
- list_et.append(_t)
- for i in range(1,len(list_et)):
- _d = list_et[i]-list_et[i-1]
- if _d<10*60:
- list_distance.append(_d)
- df_data["user"].append(_user)
- df_data["count"].append(len(list_et))
- df_data["online"].append(sum(list_distance)/60/60/30)
- df_data["secondperdoc"].append(sum(list_distance)/len(list_et) if len(list_et)>0 else 0)
- df = pd.DataFrame(df_data)
- df.to_excel("check_%s_%s.xlsx"%(start_time,end_time))
- def getWage(user,start_time,end_time,percentPass):
- import time
- from django.db import connection
- cursor = connection.cursor()
- and_sql = " and 1=1 "
- if start_time is not None:
- and_sql += " and to_char(edittime,'yyyy-mm-dd')>'%s' "%start_time
- if end_time is not None:
- and_sql += " and to_char(edittime,'yyyy-mm-dd')<='%s'"%end_time
- sql = " select human_identifier,to_char(edittime,'yyyy-mm-dd hh24:mi:ss') from corpus_iedocument where edituser='%s' %s order by edittime asc" %(user,and_sql)
- cursor.execute(sql)
- list_docid = []
- list_t = []
- set_docid_before = set()
- _time_split = time.mktime(time.strptime("2021-05-07 14:00:00","%Y-%m-%d %H:%M:%S"))
- for row in cursor.fetchall():
- list_docid.append(row[0])
- _et = time.mktime(time.strptime(row[1],"%Y-%m-%d %H:%M:%S"))
- if _et<_time_split:
- set_docid_before.add(row[0])
- set_first = set(list_docid[:1200])
- sql = " select document_id,value from brat_bratannotation where document_id in(select human_identifier from corpus_iedocument where edituser='%s' %s) "%(user,and_sql)
- cursor.execute(sql)
- eleCount = 0
- relCount = 0
- wage = 0
- for document_id,value in cursor.fetchall():
- if document_id in set_first:
- if document_id in set_docid_before:
- if len(value.strip())>0:
- if value[0]=="T":
- eleCount += 1
- wage += 0.03
- else:
- relCount += 1
- wage += 0.05
- else:
- if len(value.strip())>0:
- if value[0]=="T":
- eleCount += 1
- wage += 0.015
- else:
- relCount += 1
- wage += 0.025
- else:
- if document_id in set_docid_before:
- if len(value.strip())>0:
- if value[0]=="T":
- eleCount += 1
- wage += 0.04
- else:
- relCount += 1
- wage += 0.06
- else:
- if len(value.strip())>0:
- if value[0]=="T":
- eleCount += 1
- wage += 0.02
- else:
- relCount += 1
- wage += 0.03
- return {"docCount":len(list_docid),"eleCount":eleCount,"relCount":relCount,"wage":round(wage*percentPass,2)}
- if __name__=="__main__":
- statis()
- # from django.db import connection
- #
- # user = "test7"
- # cursor = connection.cursor()
- # sql = 'select max(end_time) from corpus_payroll where "user"=\'%s\''%(user)
- # cursor.execute(sql)
- # rows = cursor.fetchall()
- # start_time = rows[0][0]
- # print(getWage("test7","2021-04-25","2021-05-25",0.9))
|