|
|
@@ -0,0 +1,251 @@
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+from django.db.models import Q
|
|
|
+import datetime,time
|
|
|
+import iepy
|
|
|
+iepy.setup(__file__)
|
|
|
+from iepy.data.db import DocumentManager
|
|
|
+from iepy.data.models import IEDocument,LabeledIEDocument,IEDocumentMetadata,LabeledIEDocumentMetadata,Payroll
|
|
|
+from brat.models import BratAnnotation,LabeledBratAnnotation
|
|
|
+from django.db import transaction
|
|
|
+import pandas as pd
|
|
|
+from django.contrib.auth.models import User
|
|
|
+
|
|
|
+def object_to_dict(obj,class_model):
|
|
|
+ '''
|
|
|
+ :param obj:对象
|
|
|
+ :param class_model:django model
|
|
|
+ :return: 由对象生成的键值对
|
|
|
+ '''
|
|
|
+ _dict = {}
|
|
|
+ concrete_model = class_model._meta.concrete_model
|
|
|
+ for field in concrete_model._meta.local_fields:
|
|
|
+ value = field.value_from_object(obj)
|
|
|
+ _dict[field.name] = value
|
|
|
+ return _dict
|
|
|
+
|
|
|
+
|
|
|
+class Settlement():
|
|
|
+
|
|
|
+ '''
|
|
|
+ @summary: 结算类,定义了结算者所需要执行的各种方法
|
|
|
+ '''
|
|
|
+
|
|
|
+ def makePayroll(self,list_user,begin_time,time_end):
|
|
|
+ '''
|
|
|
+ :param _user: 用户名
|
|
|
+ :param time_begin: 起始时间
|
|
|
+ :param time_end: 截至时间
|
|
|
+ :return:根据用户,时间段生成用户的标注情况
|
|
|
+ '''
|
|
|
+ from django.db import connection
|
|
|
+ with transaction.atomic():
|
|
|
+ cursor = connection.cursor()
|
|
|
+ time_begin = "2020-08-01"
|
|
|
+ for _user in list_user:
|
|
|
+ sql = 'select max(end_time) from corpus_payroll where "user"=\'%s\''%(_user)
|
|
|
+ cursor.execute(sql)
|
|
|
+ rows = cursor.fetchall()
|
|
|
+ if rows[0][0] is not None:
|
|
|
+ time_begin = rows[0][0]
|
|
|
+ else:
|
|
|
+ time_begin = "2020-08-01"
|
|
|
+
|
|
|
+ sql = " select count(1) from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s'"%(_user,time_end,time_begin)
|
|
|
+ cursor.execute(sql)
|
|
|
+ doc_count = cursor.fetchall()[0][0]
|
|
|
+ sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s' order by edittime desc limit 1200) and value like '%s' "%(_user,time_end,time_begin,"T%")
|
|
|
+ cursor.execute(sql)
|
|
|
+ t_count = cursor.fetchall()[0][0]
|
|
|
+ sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s' order by edittime desc limit 1200) and value like '%s' "%(_user,time_end,time_begin,"R%")
|
|
|
+ cursor.execute(sql)
|
|
|
+ r_count = cursor.fetchall()[0][0]
|
|
|
+ sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s') and value like '%s' "%(_user,time_end,time_begin,"T%")
|
|
|
+ cursor.execute(sql)
|
|
|
+ all_t_count = cursor.fetchall()[0][0]
|
|
|
+ sql = " select count(1) from brat_bratannotation where document_id in (select human_identifier from corpus_iedocument where edituser='%s' and to_char(edittime,'yyyy-mm-dd')<='%s' and to_char(edittime,'yyyy-mm-dd')>'%s') and value like '%s' "%(_user,time_end,time_begin,"R%")
|
|
|
+ cursor.execute(sql)
|
|
|
+ all_r_count = cursor.fetchall()[0][0]
|
|
|
+ wage = round(0.03*t_count+0.05*r_count+(all_t_count-t_count)*0.04+(all_r_count-r_count)*0.06,2)
|
|
|
+ print(doc_count,t_count,r_count,wage)
|
|
|
+ payrolls = Payroll.objects.filter(Q(user=_user)& Q(begin_time=time_begin) & Q(end_time=time_end))
|
|
|
+ if len(payrolls)==0:
|
|
|
+ _payroll = Payroll.objects.create(**{"user":_user,"doc_count":doc_count,"begin_time":time_begin,"end_time":time_end,"t_count":all_t_count,"r_count":all_r_count,"wage":wage})
|
|
|
+ _payroll.save()
|
|
|
+ else:
|
|
|
+ _payroll = payrolls[0]
|
|
|
+ _payroll.doc_count = doc_count
|
|
|
+ _payroll.t_count = all_t_count
|
|
|
+ _payroll.r_count = all_r_count
|
|
|
+ _payroll.wage = wage
|
|
|
+ _payroll.save()
|
|
|
+
|
|
|
+ def exportPayroll(self,begin_time,end_time):
|
|
|
+ '''
|
|
|
+ :param begin_time: 导出开始时间
|
|
|
+ :param end_time: 导出结束时间
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ list_user = []
|
|
|
+ list_doc_count = []
|
|
|
+ list_t_count = []
|
|
|
+ list_r_count = []
|
|
|
+ list_wage = []
|
|
|
+ list_yield = []
|
|
|
+ list_account = []
|
|
|
+ list_begin = []
|
|
|
+ list_end = []
|
|
|
+ if begin_time is not None:
|
|
|
+ payrolls = Payroll.objects.filter(Q(begin_time=begin_time) & Q(end_time=end_time))
|
|
|
+ else:
|
|
|
+ payrolls = Payroll.objects.filter(Q(end_time=end_time))
|
|
|
+ for _payroll in payrolls:
|
|
|
+ list_user.append(_payroll.user)
|
|
|
+ list_doc_count.append(_payroll.doc_count)
|
|
|
+ list_t_count.append(_payroll.t_count)
|
|
|
+ list_r_count.append(_payroll.r_count)
|
|
|
+ list_wage.append(_payroll.wage)
|
|
|
+ list_yield.append(_payroll._yield)
|
|
|
+ list_account.append(_payroll.account)
|
|
|
+ list_begin.append(_payroll.begin_time)
|
|
|
+ list_end.append(_payroll.end_time)
|
|
|
+ df = pd.DataFrame({"用户":list_user,"开始时间":list_begin,"结束时间":list_end,"文章数":list_doc_count,"要素数":list_t_count,"关系数":list_r_count,"总价":list_wage,"合格率":list_yield,"结算价":list_account})
|
|
|
+ df.to_excel("%s-%s要素标注统计.xls"%(begin_time,end_time),columns=["用户","开始时间","结束时间","文章数","要素数","关系数","总价","合格率","结算价"])
|
|
|
+
|
|
|
+ def getAllUser(self):
|
|
|
+ from django.db import connection
|
|
|
+ with transaction.atomic():
|
|
|
+ list_user = []
|
|
|
+ cursor = connection.cursor()
|
|
|
+ sql = "select username from auth_user where is_staff='t'"
|
|
|
+ cursor.execute(sql)
|
|
|
+ for row in cursor.fetchall():
|
|
|
+ list_user.append(row[0])
|
|
|
+ return list_user
|
|
|
+
|
|
|
+
|
|
|
+ def makeMigrate(self,_user,time_begin,time_end):
|
|
|
+ '''
|
|
|
+ :param _user: 用户名
|
|
|
+ :param time_begin: 起始时间
|
|
|
+ :param time_end: 截至时间
|
|
|
+ :return: 将用户在时间段内的数据迁移到标准表中
|
|
|
+ '''
|
|
|
+ pass
|
|
|
+ # from django.db import connection
|
|
|
+ # with transaction.atomic():
|
|
|
+ # cursor = connection.cursor()
|
|
|
+ # sql = " select human_identifier,offsets_to_text,sentences from corpus_iedocument where edituser is null"
|
|
|
+ # cursor.execute(sql)
|
|
|
+ # cursor1 = connection.cursor()
|
|
|
+ # _index = 0
|
|
|
+ # rows = True
|
|
|
+ # while(rows):
|
|
|
+ # rows=cursor.fetchmany(1000)
|
|
|
+ # for row in rows:
|
|
|
+ # _index += 1
|
|
|
+ # print(_index)
|
|
|
+ # human_identifier,offsets_to_text,sentences = row
|
|
|
+ # if sentences!="[]":
|
|
|
+ # _off = offsets_to_text.split(", ")[-1][:-1]
|
|
|
+ # _sen = sentences.split(", ")[-1][:-1]
|
|
|
+ # print(_off,_sen)
|
|
|
+ # if int(_off)!=int(_sen):
|
|
|
+ # offsets_to_text = offsets_to_text[:-1]+", "+str(int(_sen))+"]"
|
|
|
+ # print(offsets_to_text)
|
|
|
+ # cursor1.execute("update corpus_iedocument set offsets_to_text='%s' where human_identifier='%s'"%(offsets_to_text,human_identifier))
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ # ieDocuments = IEDocument.objects.filter(Q(edituser=_user) & Q(edittime__range=(time_begin,time_end)))
|
|
|
+ # for obj in ieDocuments:
|
|
|
+ # _dict = object_to_dict(obj,IEDocument)
|
|
|
+ # _dict_meta = object_to_dict(obj.metadata,IEDocumentMetadata)
|
|
|
+ # labeledMeta = LabeledIEDocumentMetadata.objects.create(**_dict_meta)
|
|
|
+ # labeledMeta.save()
|
|
|
+ # _dict["metadata"] = labeledMeta
|
|
|
+ # tmp = LabeledIEDocument.objects.create(**_dict)
|
|
|
+ # tmp.save()
|
|
|
+ #
|
|
|
+ # bratAnnotations = BratAnnotation.objects.filter(Q(document_id=obj.human_identifier))
|
|
|
+ # for ann in bratAnnotations:
|
|
|
+ # _dict_ann = object_to_dict(ann,BratAnnotation)
|
|
|
+ # labeledAnn = LabeledBratAnnotation.objects.create(**_dict_ann)
|
|
|
+ # labeledAnn.save()
|
|
|
+
|
|
|
+
|
|
|
+ def getPercentOfPass(self,_user,time_begin,time_end):
|
|
|
+ '''
|
|
|
+ :param _user:用户名
|
|
|
+ :param time_begin: 起始时间
|
|
|
+ :param time_end: 截至时间
|
|
|
+ :return: 获得用户在时间段内标注数据的合格率
|
|
|
+ '''
|
|
|
+
|
|
|
+ def makePayrolls(self,time_begin,time_end):
|
|
|
+ '''
|
|
|
+ :param time_begin:起始时间
|
|
|
+ :param time_end: 截至时间
|
|
|
+ :return: 获得所有用户的工资表
|
|
|
+ '''
|
|
|
+ for _user in self.getAllUser():
|
|
|
+ self.makePayroll(_user,time_begin,time_end)
|
|
|
+ self.exportPayroll(time_begin,time_end)
|
|
|
+
|
|
|
+ def createUser_batch(self,batch_size=90):
|
|
|
+ '''
|
|
|
+ :param batch_size: 用户个数
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ list_user = [User.objects.create_user(username="bidi%d"%(i+1),password="bidi%d"%(i+1)) for i in range(batch_size)]
|
|
|
+
|
|
|
+ def exportLabels(self):
|
|
|
+ groups = [[1,7],[8,14],[15,22],[23,29],[30,36],[37,43],[44,50],[51,56],[57,62],[63,71]]
|
|
|
+ from django.db import connection
|
|
|
+ cursor = connection.cursor()
|
|
|
+ for _i in range(len(groups)):
|
|
|
+ _begin,_end = groups[_i]
|
|
|
+ list_username = []
|
|
|
+ list_user = []
|
|
|
+ list_label = []
|
|
|
+ list_time = []
|
|
|
+ for _j in range(_begin,_end+1):
|
|
|
+ username = "bidi%d"%_j
|
|
|
+ list_username.append("'%s'"%username)
|
|
|
+ sql = " select edituser,human_identifier,to_char(edittime,'yyyy-mm-dd') from corpus_iedocument where edituser in(%s) order by edittime asc"%(",".join(list_username))
|
|
|
+ print(sql)
|
|
|
+ cursor.execute(sql)
|
|
|
+ rows = cursor.fetchall()
|
|
|
+ for row in rows:
|
|
|
+ list_user.append(row[0])
|
|
|
+ list_label.append(row[1])
|
|
|
+ list_time.append(row[2])
|
|
|
+ df = pd.DataFrame({"时间":list_time,"用户":list_user,"文章编号":list_label})
|
|
|
+ df.to_excel("分组_%d.xls"%(_i+1),columns=["时间","用户","文章编号"])
|
|
|
+
|
|
|
+ def filter(self):
|
|
|
+ '''
|
|
|
+ 过滤拍卖公告
|
|
|
+ :return:
|
|
|
+ '''
|
|
|
+ import re
|
|
|
+ ieDocuments = IEDocument.objects.all()
|
|
|
+ for obj in ieDocuments:
|
|
|
+ if re.search("拍卖",obj.text) is not None:
|
|
|
+ obj.jump_signal = 1
|
|
|
+ obj.save()
|
|
|
+ print(obj.human_identifier)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+if __name__=="__main__":
|
|
|
+ settle = Settlement()
|
|
|
+ # settle.makeMigrate("test","2020-08-01","2020-08-31")
|
|
|
+ settle.makePayroll(["test2","test9","test11","test12","test1","test7","test8","test3","test19"],"2020-08-01","2020-11-25")
|
|
|
+ # settle.makePayrolls("2020-08-01","2020-08-31")
|
|
|
+ settle.exportPayroll(begin_time=None,end_time='2020-11-25')
|
|
|
+ # settle.createUser_batch(batch_size=102)
|
|
|
+ # settle.exportLabels()
|
|
|
+ # settle.filter()
|