settlement.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. from django.db.models import Q
  2. import datetime,time
  3. import iepy
  4. iepy.setup(__file__)
  5. from iepy.data.db import DocumentManager
  6. from iepy.data.models import IEDocument,LabeledIEDocument,IEDocumentMetadata,LabeledIEDocumentMetadata,Payroll
  7. from brat.models import BratAnnotation,LabeledBratAnnotation
  8. from django.db import transaction
  9. import pandas as pd
  10. from django.contrib.auth.models import User
  11. def object_to_dict(obj,class_model):
  12. '''
  13. :param obj:对象
  14. :param class_model:django model
  15. :return: 由对象生成的键值对
  16. '''
  17. _dict = {}
  18. concrete_model = class_model._meta.concrete_model
  19. for field in concrete_model._meta.local_fields:
  20. value = field.value_from_object(obj)
  21. _dict[field.name] = value
  22. return _dict
  23. def timeAdd(_time,days):
  24. a = time.mktime(time.strptime(_time,'%Y-%m-%d'))+86400*days
  25. _time1 = time.strftime("%Y-%m-%d",time.localtime(a))
  26. return _time1
  27. def getWage(user,start_time,end_time,percentPass):
  28. import time
  29. from django.db import connection
  30. cursor = connection.cursor()
  31. current_begin = max(timeAdd(end_time,-30),start_time)
  32. print(user,start_time,current_begin,end_time)
  33. sql = " select count(1) from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s'"%(user,end_time,start_time)
  34. cursor.execute(sql)
  35. doc_count = cursor.fetchall()[0][0]
  36. print("doc_count",doc_count)
  37. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s' order by edittime desc limit 1200) and value like '%s' "%(user,end_time,current_begin,"T%")
  38. cursor.execute(sql)
  39. t_count = cursor.fetchall()[0][0]
  40. print("t_count",t_count)
  41. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s' order by edittime desc limit 1200) and value like '%s' "%(user,end_time,current_begin,"R%")
  42. cursor.execute(sql)
  43. r_count = cursor.fetchall()[0][0]
  44. print("r_count",r_count)
  45. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s') and value like '%s' "%(user,end_time,current_begin,"T%")
  46. cursor.execute(sql)
  47. all_t_count = cursor.fetchall()[0][0]
  48. print("all_t_count",all_t_count)
  49. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s') and value like '%s' "%(user,end_time,current_begin,"R%")
  50. cursor.execute(sql)
  51. all_r_count = cursor.fetchall()[0][0]
  52. print("all_r_count",all_r_count)
  53. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s') and value like '%s' "%(user,current_begin,start_time,"T%")
  54. cursor.execute(sql)
  55. other_t_count = cursor.fetchall()[0][0]
  56. print("other_t_count",other_t_count)
  57. sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and edittime<='%s' and edittime>'%s') and value like '%s' "%(user,current_begin,start_time,"R%")
  58. cursor.execute(sql)
  59. other_r_count = cursor.fetchall()[0][0]
  60. print("other_r_count",other_r_count)
  61. wage = round(0.015*(t_count+other_t_count)+0.025*(r_count+other_r_count)+(all_t_count-t_count)*0.02+(all_r_count-r_count)*0.03,2)
  62. # and_sql = " and 1=1 "
  63. # if start_time is not None:
  64. # and_sql += " and to_char(edittime,'yyyy-mm-dd')>'%s' "%start_time
  65. #
  66. # if end_time is not None:
  67. # and_sql += " and to_char(edittime,'yyyy-mm-dd')<='%s'"%end_time
  68. #
  69. #
  70. # sql = " select human_identifier,to_char(edittime,'yyyy-mm-dd hh24:mi:ss') from corpus_iedocument where edituser='%s' %s order by edittime asc" %(user,and_sql)
  71. # cursor.execute(sql)
  72. # list_docid = []
  73. # list_t = []
  74. # set_docid_before = set()
  75. # _time_split = time.mktime(time.strptime("2021-05-07 14:00:00","%Y-%m-%d %H:%M:%S"))
  76. # for row in cursor.fetchall():
  77. # list_docid.append(row[0])
  78. # _et = time.mktime(time.strptime(row[1],"%Y-%m-%d %H:%M:%S"))
  79. # if _et<_time_split:
  80. # set_docid_before.add(row[0])
  81. # set_first = set(list_docid[:1200])
  82. # sql = " select document_id,value from brat_bratannotation where document_id in(select human_identifier from corpus_iedocument where edituser='%s' %s) "%(user,and_sql)
  83. # cursor.execute(sql)
  84. # eleCount = 0
  85. # relCount = 0
  86. # wage = 0
  87. #
  88. # for document_id,value in cursor.fetchall():
  89. # if document_id in set_first:
  90. # if document_id in set_docid_before:
  91. # if len(value.strip())>0:
  92. # if value[0]=="T":
  93. # eleCount += 1
  94. # wage += 0.03
  95. # else:
  96. # relCount += 1
  97. # wage += 0.05
  98. #
  99. # else:
  100. # if len(value.strip())>0:
  101. # if value[0]=="T":
  102. # eleCount += 1
  103. # wage += 0.015
  104. # else:
  105. # relCount += 1
  106. # wage += 0.025
  107. #
  108. # else:
  109. # if document_id in set_docid_before:
  110. # if len(value.strip())>0:
  111. # if value[0]=="T":
  112. # eleCount += 1
  113. # wage += 0.04
  114. # else:
  115. # relCount += 1
  116. # wage += 0.06
  117. # else:
  118. # if len(value.strip())>0:
  119. # if value[0]=="T":
  120. # eleCount += 1
  121. # wage += 0.02
  122. # else:
  123. # relCount += 1
  124. # wage += 0.03
  125. return {"docCount":doc_count,"eleCount":other_t_count+all_t_count,"relCount":other_r_count+all_r_count,"wage":round(wage*percentPass,2)}
  126. class Settlement():
  127. '''
  128. @summary: 结算类,定义了结算者所需要执行的各种方法
  129. '''
  130. def makePayroll(self,list_user,begin_time,time_end):
  131. '''
  132. :param _user: 用户名
  133. :param time_begin: 起始时间
  134. :param time_end: 截至时间
  135. :return:根据用户,时间段生成用户的标注情况
  136. '''
  137. from django.db import connection
  138. with transaction.atomic():
  139. cursor = connection.cursor()
  140. time_begin = "2020-08-01"
  141. for _user in list(set(list_user)):
  142. sql = 'select max(end_time) from corpus_payroll where "user"=\'%s\''%(_user)
  143. cursor.execute(sql)
  144. rows = cursor.fetchall()
  145. if rows[0][0] is not None:
  146. time_begin = rows[0][0]
  147. else:
  148. time_begin = "2020-08-01"
  149. print(_user,time_begin,time_end)
  150. dict_wage = getWage(_user,time_begin,time_end,0.99)
  151. # sql = " select count(1) from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s'"%(_user,time_end,time_begin)
  152. # cursor.execute(sql)
  153. # doc_count = cursor.fetchall()[0][0]
  154. # sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s' order by edittime desc limit 1200) and value like '%s' "%(_user,time_end,time_begin,"T%")
  155. # cursor.execute(sql)
  156. # t_count = cursor.fetchall()[0][0]
  157. # sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s' order by edittime desc limit 1200) and value like '%s' "%(_user,time_end,time_begin,"R%")
  158. # cursor.execute(sql)
  159. # r_count = cursor.fetchall()[0][0]
  160. # sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s') and value like '%s' "%(_user,time_end,time_begin,"T%")
  161. # cursor.execute(sql)
  162. # all_t_count = cursor.fetchall()[0][0]
  163. # sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s') and value like '%s' "%(_user,time_end,time_begin,"R%")
  164. # cursor.execute(sql)
  165. # all_r_count = cursor.fetchall()[0][0]
  166. # wage = round(0.015*t_count+0.025*r_count+(all_t_count-t_count)*0.02+(all_r_count-r_count)*0.03,2)
  167. doc_count = dict_wage["docCount"]
  168. t_count = dict_wage['eleCount']
  169. r_count = dict_wage["relCount"]
  170. wage = dict_wage["wage"]
  171. all_t_count = t_count
  172. all_r_count = r_count
  173. print(_user,doc_count,t_count,r_count,wage)
  174. payrolls = Payroll.objects.filter(Q(user=_user)& Q(begin_time=time_begin) & Q(end_time=time_end))
  175. if len(payrolls)==0:
  176. _payroll = Payroll.objects.create(**{"user":_user,"doc_count":doc_count,"begin_time":time_begin,"end_time":time_end,"t_count":all_t_count,"r_count":all_r_count,"wage":wage})
  177. _payroll.save()
  178. else:
  179. _payroll = payrolls[0]
  180. _payroll.doc_count = doc_count
  181. _payroll.t_count = all_t_count
  182. _payroll.r_count = all_r_count
  183. _payroll.wage = wage
  184. _payroll.save()
  185. def exportPayroll(self,begin_time,end_time):
  186. '''
  187. :param begin_time: 导出开始时间
  188. :param end_time: 导出结束时间
  189. :return:
  190. '''
  191. list_user = []
  192. list_username = []
  193. list_doc_count = []
  194. list_t_count = []
  195. list_r_count = []
  196. list_wage = []
  197. list_yield = []
  198. list_account = []
  199. list_begin = []
  200. list_end = []
  201. if begin_time is not None:
  202. payrolls = Payroll.objects.filter(Q(begin_time=begin_time) & Q(end_time=end_time))
  203. else:
  204. payrolls = Payroll.objects.filter(Q(end_time=end_time))
  205. for _payroll in payrolls:
  206. list_user.append(User.objects.filter(username=_payroll.user).first().first_name)
  207. list_username.append(_payroll.user)
  208. list_doc_count.append(_payroll.doc_count)
  209. list_t_count.append(_payroll.t_count)
  210. list_r_count.append(_payroll.r_count)
  211. list_wage.append(_payroll.wage)
  212. list_yield.append(_payroll._yield)
  213. list_account.append(_payroll.account)
  214. list_begin.append(_payroll.begin_time)
  215. list_end.append(_payroll.end_time)
  216. df = pd.DataFrame({"用户":list_user,"登录名":list_username,"开始时间":list_begin,"结束时间":list_end,"文章数":list_doc_count,"要素数":list_t_count,"关系数":list_r_count,"总价":list_wage,"合格率":list_yield,"结算价":list_account})
  217. df.to_excel("%s-%s要素标注统计.xls"%(begin_time,end_time),columns=["用户","登录名","开始时间","结束时间","文章数","要素数","关系数","总价","合格率","结算价"])
  218. def getAllUser(self):
  219. from django.db import connection
  220. with transaction.atomic():
  221. list_user = []
  222. cursor = connection.cursor()
  223. sql = "select username from auth_user where is_staff='t'"
  224. cursor.execute(sql)
  225. for row in cursor.fetchall():
  226. list_user.append(row[0])
  227. return list_user
  228. def makeMigrate(self,_user,time_begin,time_end):
  229. '''
  230. :param _user: 用户名
  231. :param time_begin: 起始时间
  232. :param time_end: 截至时间
  233. :return: 将用户在时间段内的数据迁移到标准表中
  234. '''
  235. pass
  236. # from django.db import connection
  237. # with transaction.atomic():
  238. # cursor = connection.cursor()
  239. # sql = " select human_identifier,offsets_to_text,sentences from corpus_iedocument where edituser is null"
  240. # cursor.execute(sql)
  241. # cursor1 = connection.cursor()
  242. # _index = 0
  243. # rows = True
  244. # while(rows):
  245. # rows=cursor.fetchmany(1000)
  246. # for row in rows:
  247. # _index += 1
  248. # print(_index)
  249. # human_identifier,offsets_to_text,sentences = row
  250. # if sentences!="[]":
  251. # _off = offsets_to_text.split(", ")[-1][:-1]
  252. # _sen = sentences.split(", ")[-1][:-1]
  253. # print(_off,_sen)
  254. # if int(_off)!=int(_sen):
  255. # offsets_to_text = offsets_to_text[:-1]+", "+str(int(_sen))+"]"
  256. # print(offsets_to_text)
  257. # cursor1.execute("update corpus_iedocument set offsets_to_text='%s' where human_identifier='%s'"%(offsets_to_text,human_identifier))
  258. # ieDocuments = IEDocument.objects.filter(Q(edituser=_user) & Q(edittime__range=(time_begin,time_end)))
  259. # for obj in ieDocuments:
  260. # _dict = object_to_dict(obj,IEDocument)
  261. # _dict_meta = object_to_dict(obj.metadata,IEDocumentMetadata)
  262. # labeledMeta = LabeledIEDocumentMetadata.objects.create(**_dict_meta)
  263. # labeledMeta.save()
  264. # _dict["metadata"] = labeledMeta
  265. # tmp = LabeledIEDocument.objects.create(**_dict)
  266. # tmp.save()
  267. #
  268. # bratAnnotations = BratAnnotation.objects.filter(Q(document_id=obj.human_identifier))
  269. # for ann in bratAnnotations:
  270. # _dict_ann = object_to_dict(ann,BratAnnotation)
  271. # labeledAnn = LabeledBratAnnotation.objects.create(**_dict_ann)
  272. # labeledAnn.save()
  273. def getPercentOfPass(self,_user,time_begin,time_end):
  274. '''
  275. :param _user:用户名
  276. :param time_begin: 起始时间
  277. :param time_end: 截至时间
  278. :return: 获得用户在时间段内标注数据的合格率
  279. '''
  280. def makePayrolls(self,time_begin,time_end):
  281. '''
  282. :param time_begin:起始时间
  283. :param time_end: 截至时间
  284. :return: 获得所有用户的工资表
  285. '''
  286. for _user in self.getAllUser():
  287. self.makePayroll(_user,time_begin,time_end)
  288. self.exportPayroll(time_begin,time_end)
  289. def createUser_batch(self,batch_size=90):
  290. '''
  291. :param batch_size: 用户个数
  292. :return:
  293. '''
  294. list_user = [User.objects.create_user(username="bidi%d"%(i+1),password="bidi%d"%(i+1)) for i in range(batch_size)]
  295. def exportLabels(self):
  296. groups = [[1,7],[8,14],[15,22],[23,29],[30,36],[37,43],[44,50],[51,56],[57,62],[63,71]]
  297. from django.db import connection
  298. cursor = connection.cursor()
  299. for _i in range(len(groups)):
  300. _begin,_end = groups[_i]
  301. list_username = []
  302. list_user = []
  303. list_label = []
  304. list_time = []
  305. for _j in range(_begin,_end+1):
  306. username = "bidi%d"%_j
  307. list_username.append("'%s'"%username)
  308. sql = " select edituser,human_identifier,to_char(edittime,'yyyy-mm-dd') from corpus_iedocument where edituser in(%s) order by edittime asc"%(",".join(list_username))
  309. print(sql)
  310. cursor.execute(sql)
  311. rows = cursor.fetchall()
  312. for row in rows:
  313. list_user.append(row[0])
  314. list_label.append(row[1])
  315. list_time.append(row[2])
  316. df = pd.DataFrame({"时间":list_time,"用户":list_user,"文章编号":list_label})
  317. df.to_excel("分组_%d.xls"%(_i+1),columns=["时间","用户","文章编号"])
  318. def filter(self):
  319. '''
  320. 过滤拍卖公告
  321. :return:
  322. '''
  323. import re
  324. ieDocuments = IEDocument.objects.all()
  325. for obj in ieDocuments:
  326. if re.search("拍卖",obj.text) is not None:
  327. obj.jump_signal = 1
  328. obj.save()
  329. print(obj.human_identifier)
  330. def makeCheck():
  331. import re
  332. from datetime import datetime
  333. T_pattern = re.compile('(?P<T>T\d+)\t(?P<type>[^\s]+) (?P<begin>\d+) (?P<end>\d+)')
  334. R_pattern = re.compile("(?P<R>R\d+)\t(?P<type>[^\s]+)[^:]*:(?P<arg1>T\d+)[^:]*:(?P<arg2>T\d+)")
  335. import traceback
  336. from brat.models import BratAnnotation as brat_annotations
  337. from iepy.webui.corpus.models import IEDocument
  338. filename = '32762908'
  339. pre_label = IEDocument.objects.filter(human_identifier=filename).values("pre_label")
  340. _label = ""
  341. if len(pre_label)>0:
  342. if pre_label[0]["pre_label"] is not None:
  343. _label = pre_label[0]["pre_label"]
  344. dict_T = dict()
  345. dict_R = dict()
  346. set_pre_label = set()
  347. for _i in _label.split(";"):
  348. if _i!="":
  349. set_pre_label.add(_i)
  350. set_label = set()
  351. anns = brat_annotations.objects.filter(document_id=filename).values("value")
  352. for _str in anns:
  353. _str = _str["value"].strip()
  354. if _str != "":
  355. if _str[0]=="T":
  356. match = re.search(T_pattern,_str)
  357. if match is not None:
  358. match = match.groupdict()
  359. dict_T[match["T"]] = {"type":match["type"],"begin":match["begin"],"end":match["end"]}
  360. if _str[0]=="R":
  361. match = re.search(R_pattern,_str)
  362. if match is not None:
  363. match = match.groupdict()
  364. dict_R[match["R"]] = {"type":match["type"],"arg1":match["arg1"],"arg2":match["arg2"]}
  365. for _T,_v in dict_T.items():
  366. set_label.add("T|%s|%d|%d"%(_v["type"],int(_v["begin"]),int(_v["end"])))
  367. print(dict_R)
  368. for _R,_v in dict_R.items():
  369. set_label.add("R|%s|%d|%d|%d|%d"%(_v["type"],int(dict_T[_v["arg1"]]["begin"]),int(dict_T[_v["arg1"]]["end"]),int(dict_T[_v["arg2"]]["begin"]),int(dict_T[_v["arg2"]]["end"])))
  370. union_set = set_pre_label&set_label
  371. deleted = len(set_pre_label)-len(union_set)
  372. added = len(set_label)-len(union_set)
  373. print(deleted,added)
  374. print(set_pre_label-set_label)
  375. print(set_label-set_pre_label)
  376. if __name__=="__main__":
  377. settle = Settlement()
  378. # settle.makeMigrate("test","2020-08-01","2020-08-31")
  379. settle_end = "2021-09-25"
  380. settle.makePayroll(["test1","test7","test8","test21","test27","test2","test9","test10","test11","test12","test25","test22","test29"],"2020-08-01",settle_end)
  381. settle.exportPayroll(begin_time=None,end_time=settle_end)
  382. # settle.createUser_batch(batch_size=102)
  383. # settle.exportLabels()
  384. # settle.filter()