statistic.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. import iepy
  2. iepy.setup(__file__)
  3. import time
  4. import pandas as pd
  5. def statis():
  6. from django.db import connection
  7. cursor = connection.cursor()
  8. df_data = {"user":[],"count":[],"online":[],"secondperdoc":[]}
  9. list_user = ["test2","test9","test10","test11","test12","test20","test1","test7","test17","test21","test3","test19","test22"]
  10. for _user in list_user:
  11. sql = " select to_char(edittime,'yyyy-mm-dd hh24:mi:ss') as et from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='2021-05-25' and to_char(edittime,'yyyy-mm-dd')>'2021-04-25' order by edittime asc"%_user
  12. cursor.execute(sql)
  13. rows = cursor.fetchall()
  14. list_et = []
  15. list_distance = []
  16. for row in rows:
  17. _t = time.mktime(time.strptime(row[0],'%Y-%m-%d %H:%M:%S'))
  18. list_et.append(_t)
  19. for i in range(1,len(list_et)):
  20. _d = list_et[i]-list_et[i-1]
  21. if _d<10*60:
  22. list_distance.append(_d)
  23. df_data["user"].append(_user)
  24. df_data["count"].append(len(list_et))
  25. df_data["online"].append(sum(list_distance)/60/60/30)
  26. df_data["secondperdoc"].append(sum(list_distance)/len(list_et))
  27. df = pd.DataFrame(df_data)
  28. df.to_excel("check.xlsx")
  29. def getWage(user,start_time,end_time,percentPass):
  30. import time
  31. from django.db import connection
  32. cursor = connection.cursor()
  33. and_sql = " and 1=1 "
  34. if start_time is not None:
  35. and_sql += " and to_char(edittime,'yyyy-mm-dd')>'%s' "%start_time
  36. if end_time is not None:
  37. and_sql += " and to_char(edittime,'yyyy-mm-dd')<='%s'"%end_time
  38. sql = " select human_identifier,to_char(edittime,'yyyy-mm-dd hh24:mi:ss') from corpus_iedocument where edituser='%s' %s order by edittime asc" %(user,and_sql)
  39. cursor.execute(sql)
  40. list_docid = []
  41. list_t = []
  42. set_docid_before = set()
  43. _time_split = time.mktime(time.strptime("2021-05-07 14:00:00","%Y-%m-%d %H:%M:%S"))
  44. for row in cursor.fetchall():
  45. list_docid.append(row[0])
  46. _et = time.mktime(time.strptime(row[1],"%Y-%m-%d %H:%M:%S"))
  47. if _et<_time_split:
  48. set_docid_before.add(row[0])
  49. set_first = set(list_docid[:1200])
  50. sql = " select document_id,value from brat_bratannotation where document_id in(select human_identifier from corpus_iedocument where edituser='%s' %s) "%(user,and_sql)
  51. cursor.execute(sql)
  52. eleCount = 0
  53. relCount = 0
  54. wage = 0
  55. for document_id,value in cursor.fetchall():
  56. if document_id in set_first:
  57. if document_id in set_docid_before:
  58. if len(value.strip())>0:
  59. if value[0]=="T":
  60. eleCount += 1
  61. wage += 0.03
  62. else:
  63. relCount += 1
  64. wage += 0.05
  65. else:
  66. if len(value.strip())>0:
  67. if value[0]=="T":
  68. eleCount += 1
  69. wage += 0.015
  70. else:
  71. relCount += 1
  72. wage += 0.025
  73. else:
  74. if document_id in set_docid_before:
  75. if len(value.strip())>0:
  76. if value[0]=="T":
  77. eleCount += 1
  78. wage += 0.04
  79. else:
  80. relCount += 1
  81. wage += 0.06
  82. else:
  83. if len(value.strip())>0:
  84. if value[0]=="T":
  85. eleCount += 1
  86. wage += 0.02
  87. else:
  88. relCount += 1
  89. wage += 0.03
  90. return {"docCount":len(list_docid),"eleCount":eleCount,"relCount":relCount,"wage":round(wage*percentPass,2)}
  91. if __name__=="__main__":
  92. statis()
  93. # from django.db import connection
  94. #
  95. # user = "test7"
  96. # cursor = connection.cursor()
  97. # sql = 'select max(end_time) from corpus_payroll where "user"=\'%s\''%(user)
  98. # cursor.execute(sql)
  99. # rows = cursor.fetchall()
  100. # start_time = rows[0][0]
  101. # print(getWage("test7","2021-04-25","2021-05-25",0.9))