settlement.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. from django.db.models import Q
  2. import datetime,time
  3. import iepy
  4. iepy.setup(__file__)
  5. from iepy.data.db import DocumentManager
  6. from iepy.data.models import IEDocument,LabeledIEDocument,IEDocumentMetadata,LabeledIEDocumentMetadata,Payroll
  7. from brat.models import BratAnnotation,LabeledBratAnnotation
  8. from django.db import transaction
  9. import pandas as pd
  10. from django.contrib.auth.models import User
  11. def object_to_dict(obj,class_model):
  12. '''
  13. :param obj:对象
  14. :param class_model:django model
  15. :return: 由对象生成的键值对
  16. '''
  17. _dict = {}
  18. concrete_model = class_model._meta.concrete_model
  19. for field in concrete_model._meta.local_fields:
  20. value = field.value_from_object(obj)
  21. _dict[field.name] = value
  22. return _dict
  23. class Settlement():
  24. '''
  25. @summary: 结算类,定义了结算者所需要执行的各种方法
  26. '''
  27. def makePayroll(self,_user,time_begin,time_end):
  28. '''
  29. :param _user: 用户名
  30. :param time_begin: 起始时间
  31. :param time_end: 截至时间
  32. :return:根据用户,时间段生成用户的标注情况
  33. '''
  34. from django.db import connection
  35. with transaction.atomic():
  36. cursor = connection.cursor()
  37. sql = " select count(1) from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>='%s'"%(_user,time_end,time_begin)
  38. cursor.execute(sql)
  39. doc_count = cursor.fetchall()[0][0]
  40. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>='%s' order by edittime desc limit 1200) and value like '%s' "%(_user,time_end,time_begin,"T%")
  41. cursor.execute(sql)
  42. t_count = cursor.fetchall()[0][0]
  43. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>='%s' order by edittime desc limit 1200) and value like '%s' "%(_user,time_end,time_begin,"R%")
  44. cursor.execute(sql)
  45. r_count = cursor.fetchall()[0][0]
  46. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>='%s') and value like '%s' "%(_user,time_end,time_begin,"T%")
  47. cursor.execute(sql)
  48. all_t_count = cursor.fetchall()[0][0]
  49. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>='%s') and value like '%s' "%(_user,time_end,time_begin,"R%")
  50. cursor.execute(sql)
  51. all_r_count = cursor.fetchall()[0][0]
  52. wage = round(0.03*t_count+0.05*r_count+(all_t_count-t_count)*0.04+(all_r_count-r_count)*0.06,2)
  53. print(doc_count,t_count,r_count,wage)
  54. payrolls = Payroll.objects.filter(Q(user=_user)& Q(begin_time=time_begin) & Q(end_time=time_end))
  55. if len(payrolls)==0:
  56. _payroll = Payroll.objects.create(**{"user":_user,"doc_count":doc_count,"begin_time":time_begin,"end_time":time_end,"t_count":all_t_count,"r_count":all_r_count,"wage":wage})
  57. _payroll.save()
  58. else:
  59. _payroll = payrolls[0]
  60. _payroll.doc_count = doc_count
  61. _payroll.t_count = all_t_count
  62. _payroll.r_count = all_r_count
  63. _payroll.wage = wage
  64. _payroll.save()
  65. def exportPayroll(self,begin_time,end_time):
  66. '''
  67. :param begin_time: 导出开始时间
  68. :param end_time: 导出结束时间
  69. :return:
  70. '''
  71. list_user = []
  72. list_doc_count = []
  73. list_t_count = []
  74. list_r_count = []
  75. list_wage = []
  76. list_yield = []
  77. list_account = []
  78. list_begin = []
  79. list_end = []
  80. if begin_time is not None:
  81. payrolls = Payroll.objects.filter(Q(begin_time=begin_time) & Q(end_time=end_time))
  82. else:
  83. payrolls = Payroll.objects.filter(Q(end_time=end_time))
  84. for _payroll in payrolls:
  85. list_user.append(_payroll.user)
  86. list_doc_count.append(_payroll.doc_count)
  87. list_t_count.append(_payroll.t_count)
  88. list_r_count.append(_payroll.r_count)
  89. list_wage.append(_payroll.wage)
  90. list_yield.append(_payroll._yield)
  91. list_account.append(_payroll.account)
  92. list_begin.append(_payroll.begin_time)
  93. list_end.append(_payroll.end_time)
  94. df = pd.DataFrame({"用户":list_user,"开始时间":list_begin,"结束时间":list_end,"文章数":list_doc_count,"要素数":list_t_count,"关系数":list_r_count,"总价":list_wage,"合格率":list_yield,"结算价":list_account})
  95. df.to_excel("%s-%s要素标注统计.xls"%(begin_time,end_time),columns=["用户","开始时间","结束时间","文章数","要素数","关系数","总价","合格率","结算价"])
  96. def getAllUser(self):
  97. from django.db import connection
  98. with transaction.atomic():
  99. list_user = []
  100. cursor = connection.cursor()
  101. sql = "select username from auth_user where is_staff='t'"
  102. cursor.execute(sql)
  103. for row in cursor.fetchall():
  104. list_user.append(row[0])
  105. return list_user
  106. def makeMigrate(self,_user,time_begin,time_end):
  107. '''
  108. :param _user: 用户名
  109. :param time_begin: 起始时间
  110. :param time_end: 截至时间
  111. :return: 将用户在时间段内的数据迁移到标准表中
  112. '''
  113. pass
  114. # from django.db import connection
  115. # with transaction.atomic():
  116. # cursor = connection.cursor()
  117. # sql = " select human_identifier,offsets_to_text,sentences from corpus_iedocument where edituser is null"
  118. # cursor.execute(sql)
  119. # cursor1 = connection.cursor()
  120. # _index = 0
  121. # rows = True
  122. # while(rows):
  123. # rows=cursor.fetchmany(1000)
  124. # for row in rows:
  125. # _index += 1
  126. # print(_index)
  127. # human_identifier,offsets_to_text,sentences = row
  128. # if sentences!="[]":
  129. # _off = offsets_to_text.split(", ")[-1][:-1]
  130. # _sen = sentences.split(", ")[-1][:-1]
  131. # print(_off,_sen)
  132. # if int(_off)!=int(_sen):
  133. # offsets_to_text = offsets_to_text[:-1]+", "+str(int(_sen))+"]"
  134. # print(offsets_to_text)
  135. # cursor1.execute("update corpus_iedocument set offsets_to_text='%s' where human_identifier='%s'"%(offsets_to_text,human_identifier))
  136. # ieDocuments = IEDocument.objects.filter(Q(edituser=_user) & Q(edittime__range=(time_begin,time_end)))
  137. # for obj in ieDocuments:
  138. # _dict = object_to_dict(obj,IEDocument)
  139. # _dict_meta = object_to_dict(obj.metadata,IEDocumentMetadata)
  140. # labeledMeta = LabeledIEDocumentMetadata.objects.create(**_dict_meta)
  141. # labeledMeta.save()
  142. # _dict["metadata"] = labeledMeta
  143. # tmp = LabeledIEDocument.objects.create(**_dict)
  144. # tmp.save()
  145. #
  146. # bratAnnotations = BratAnnotation.objects.filter(Q(document_id=obj.human_identifier))
  147. # for ann in bratAnnotations:
  148. # _dict_ann = object_to_dict(ann,BratAnnotation)
  149. # labeledAnn = LabeledBratAnnotation.objects.create(**_dict_ann)
  150. # labeledAnn.save()
  151. def getPercentOfPass(self,_user,time_begin,time_end):
  152. '''
  153. :param _user:用户名
  154. :param time_begin: 起始时间
  155. :param time_end: 截至时间
  156. :return: 获得用户在时间段内标注数据的合格率
  157. '''
  158. def makePayrolls(self,time_begin,time_end):
  159. '''
  160. :param time_begin:起始时间
  161. :param time_end: 截至时间
  162. :return: 获得所有用户的工资表
  163. '''
  164. for _user in self.getAllUser():
  165. self.makePayroll(_user,time_begin,time_end)
  166. self.exportPayroll(time_begin,time_end)
  167. def createUser_batch(self,batch_size=90):
  168. '''
  169. :param batch_size: 用户个数
  170. :return:
  171. '''
  172. list_user = [User.objects.create_user(username="bidi%d"%(i+1),password="bidi%d"%(i+1)) for i in range(batch_size)]
  173. def exportLabels(self):
  174. groups = [[1,7],[8,14],[15,22],[23,29],[30,36],[37,43],[44,50],[51,56],[57,62],[63,71]]
  175. from django.db import connection
  176. cursor = connection.cursor()
  177. for _i in range(len(groups)):
  178. _begin,_end = groups[_i]
  179. list_username = []
  180. list_user = []
  181. list_label = []
  182. list_time = []
  183. for _j in range(_begin,_end+1):
  184. username = "bidi%d"%_j
  185. list_username.append("'%s'"%username)
  186. sql = " select edituser,human_identifier,to_char(edittime,'yyyy-mm-dd') from corpus_iedocument where edituser in(%s) order by edittime asc"%(",".join(list_username))
  187. print(sql)
  188. cursor.execute(sql)
  189. rows = cursor.fetchall()
  190. for row in rows:
  191. list_user.append(row[0])
  192. list_label.append(row[1])
  193. list_time.append(row[2])
  194. df = pd.DataFrame({"时间":list_time,"用户":list_user,"文章编号":list_label})
  195. df.to_excel("分组_%d.xls"%(_i+1),columns=["时间","用户","文章编号"])
  196. def filter(self):
  197. '''
  198. 过滤拍卖公告
  199. :return:
  200. '''
  201. import re
  202. ieDocuments = IEDocument.objects.all()
  203. for obj in ieDocuments:
  204. if re.search("拍卖",obj.text) is not None:
  205. obj.jump_signal = 1
  206. obj.save()
  207. print(obj.human_identifier)
  208. if __name__=="__main__":
  209. settle = Settlement()
  210. # settle.makeMigrate("test","2020-08-01","2020-08-31")
  211. # settle.makePayroll("test17","2020-08-01","2020-10-31")
  212. # settle.makePayrolls("2020-08-01","2020-08-31")
  213. settle.exportPayroll(begin_time=None,end_time='2020-10-31')
  214. # settle.createUser_batch(batch_size=102)
  215. # settle.exportLabels()
  216. # settle.filter()