Entitys.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. '''
  2. Created on 2018年12月29日
  3. @author: User
  4. '''
  5. from BiddingKG.dl.common.Utils import *
  6. import json
  7. class RelationsTree():
  8. '''
  9. @summary: make a attribute tree
  10. '''
  11. def __init__(self):
  12. self.tree = dict()
  13. self.nodes = dict()
  14. self.nodes["ROOT"] = self.tree
  15. def add_relation(self,relation,parent,child):
  16. if parent in self.nodes:
  17. _relation = relation+"_"+str(parent)+str(child)
  18. self.nodes[parent][_relation] = dict()
  19. self.nodes[child] = self.nodes[parent][_relation]
  20. class Article():
  21. '''
  22. @summary:文章类
  23. '''
  24. def __init__(self,id,content,sourceContent,doc_id,title,code="",name=""):
  25. '''
  26. @param:
  27. id:文章的uuid
  28. content:文章经过预处理之后的文本
  29. '''
  30. self.id = id
  31. self.content = content
  32. self.code = code
  33. self.name = name
  34. self.sourceContent = sourceContent
  35. self.doc_id = doc_id
  36. self.title = title
  37. def toJson(self):
  38. _dict = {"id":self.id,"content":self.content,"code":self.code,
  39. "name":self.name,"sourceContent":"self.sourceContent","doc_id":self.doc_id,"title":self.title}
  40. return json.dumps(_dict)
  41. @staticmethod
  42. def fromJson(_json):
  43. _dict = json.loads(_json)
  44. return Article(_dict.get("id"),_dict.get("content"),
  45. _dict.get("sourceContent"),_dict.get("doc_id"),_dict.get("title"),_dict.get("code"),_dict.get("name"))
  46. class Sentences():
  47. '''
  48. @summary:句子类
  49. '''
  50. def __init__(self,doc_id,sentence_index,sentence_text,tokens,pos_tags,ner_tags):
  51. '''
  52. @param:
  53. doc_id:文章的uuid
  54. sentence_index:文章的句子编号
  55. sentence_text:句子内容
  56. tokens:句子分词
  57. pos_tags:词性标注(算法目前没有用到,暂为空)
  58. ner_tags:实体识别
  59. '''
  60. self.doc_id = doc_id
  61. self.sentence_index = sentence_index
  62. self.sentence_text = sentence_text
  63. self.tokens = tokens
  64. self.pos_tags = pos_tags
  65. self.ner_tags = ner_tags
  66. def toJson(self):
  67. _dict = {"doc_id":self.doc_id,"sentence_index":self.sentence_index,"sentence_text":self.sentence_text,
  68. "tokens":self.tokens,"pos_tags":self.pos_tags,"ner_tags":self.ner_tags}
  69. return json.dumps(_dict)
  70. @staticmethod
  71. def fromJson(_json):
  72. _dict = json.loads(_json)
  73. return Sentences(_dict.get("doc_id"),_dict.get("sentence_index"),_dict.get("sentence_text"),_dict.get("tokens"),
  74. _dict.get("pos_tags"),_dict.get("ner_tags"))
  75. class Entity():
  76. '''
  77. @summary:实体类
  78. '''
  79. def __init__(self,doc_id,entity_id,entity_text,entity_type,sentence_index,begin_index,end_index,wordOffset_begin=None,wordOffset_end=None,label=None,values=None,person_phone=None):
  80. '''
  81. @param:
  82. doc_id:文章的uuid
  83. entity_id:相同实体类型的实体的唯一值,由于抽取算法不唯一,不同类型的实体可能拥有相同entity_id
  84. entity_text:实体的内容
  85. entity_type:实体类型
  86. sentence_index:句子下标
  87. begin_index:实体所在句子的开始位置
  88. end_index:实体所在句子的结束位置
  89. label:实体所属类别
  90. value:实体的各类别概率值
  91. '''
  92. self.doc_id = doc_id
  93. self.entity_id = entity_id
  94. self.entity_text = entity_text
  95. self.entity_type = entity_type
  96. self.sentence_index = sentence_index
  97. self.begin_index = begin_index
  98. self.end_index = end_index
  99. self.wordOffset_begin = wordOffset_begin
  100. self.wordOffset_end = wordOffset_end
  101. self.label = label
  102. self.values = values
  103. self.handlabel = True
  104. self.packageName = "Project"
  105. self.packageCode = ""
  106. self.roleName = ""
  107. self.linked_entitys = []
  108. self.pointer_pack = None
  109. self.pointer_money = None
  110. self.pointer_person = None
  111. self.pointer_address = None
  112. self.pointer_tendereeMoney = None
  113. self.person_phone = person_phone
  114. def set_Role(self,role_label,role_values):
  115. self.label = int(role_label)
  116. self.values = [float(i) for i in role_values]
  117. def set_Money(self,money_label,money_values):
  118. self.label = int(money_label)
  119. self.values = [float(i) for i in money_values]
  120. def set_Person(self,person_label,person_values,person_phone):
  121. self.label = int(person_label)
  122. self.values = [float(i) for i in person_values]
  123. self.person_phone = person_phone
  124. def toJson(self):
  125. _dict = {"doc_id":self.doc_id,"entity_id":self.entity_id,"entity_text":self.entity_text,
  126. "entity_type":self.entity_type,"sentence_index":self.sentence_index,"begin_index":self.begin_index,
  127. "end_index":self.end_index,"wordOffset_begin":self.wordOffset_begin,"wordOffset_end":self.wordOffset_end,
  128. "label":int(self.label) if self.label is not None else None,"values":self.values,"person_phone":self.person_phone}
  129. return json.dumps(_dict)
  130. @staticmethod
  131. def fromJson(_json):
  132. _dict = json.loads(_json)
  133. return Entity(_dict.get("doc_id"),_dict.get("entity_id"),_dict.get("entity_text"),_dict.get("entity_type"),
  134. _dict.get("sentence_index"),_dict.get("begin_index"),_dict.get("end_index"),_dict.get("wordOffset_begin"),
  135. _dict.get("wordOffset_end"),_dict.get("label"),_dict.get("values"),_dict.get("person_phone"))
  136. class PREM():
  137. '''
  138. @summary:包-标段号-角色-公司实体-金额-金额概率-联系人-联系人概率-联系电话
  139. '''
  140. def __init__(self,packageName,packageCode,role_name,entity_text,role_prob,money,money_prob,linklist):
  141. '''
  142. @param:
  143. packageName:包名
  144. packageCode:标段号
  145. role_name:角色名称
  146. entity_text:公司实体名称
  147. role_prob:角色概率
  148. money:金额
  149. money_prob:金额概率
  150. linklist:联系list[联系人,联系电话]
  151. '''
  152. self.packageName = packageName
  153. self.packageCode = packageCode
  154. self.role_name = role_name
  155. self.entity_text = entity_text
  156. self.role_prob = role_prob
  157. self.money = money
  158. self.money_prob = money_prob
  159. self.linklist = linklist
  160. def getString(self,roleList):
  161. '''
  162. #不再在这里解决冲突
  163. count = 0
  164. for item in roleList:
  165. if item.entity_text==self.entity_text:
  166. if item.packageName==self.packageName:
  167. count += 1
  168. else:
  169. if "Project" in [item.packageName,self.packageName]:
  170. count += 1
  171. if count==1:
  172. self.linklist = [item for item in set(self.linklist)]
  173. result = [self.packageName,self.packageCode,self.role_name,self.entity_text,self.money,self.linklist]
  174. else:
  175. result = None
  176. '''
  177. self.linklist = [item for item in set(self.linklist)]
  178. result = [self.packageName,self.packageCode,self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist]
  179. return result
  180. class Role():
  181. '''
  182. @summary: 定义一个角色拥有的所有属性
  183. '''
  184. def __init__(self,role_name,entity_text,role_prob,money,money_prob,linklist):
  185. self.role_name = role_name
  186. self.entity_text = entity_text
  187. self.role_prob = role_prob
  188. self.money = money
  189. self.money_prob = money_prob
  190. self.linklist = linklist
  191. def getString(self):
  192. self.linklist = [item for item in set(self.linklist)]
  193. result = [self.role_name,fitDataByRule(self.entity_text),self.money,self.linklist]
  194. return result
  195. if __name__=="__main__":
  196. a = Article(1,[0.0026275085, 9.795774e-05, 0.00066399743, 0.99661046],"2","4","5")
  197. b = Article.fromJson(a.toJson())
  198. print(b.toJson())