app_role_secondtenderer.ddlog 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. articles(
  2. @key
  3. @distributed_by
  4. id text,
  5. @searchable
  6. content text,
  7. tenderee text,
  8. agency text,
  9. win_tenderer text,
  10. first_tenderer text,
  11. second_tenderer text,
  12. third_tenderer text
  13. ).
  14. articles_processed(
  15. @key
  16. id text,
  17. content text
  18. ).
  19. function articles_processed over(
  20. doc_id text,
  21. content text
  22. ) returns rows like articles_processed
  23. implementation "udf/articles_processed.py" handles tsv lines.
  24. articles_processed += articles_processed(doc_id,content):-
  25. articles(doc_id,content,_,_,_,_,_,_).
  26. sentences(
  27. @key
  28. @distributed_by
  29. doc_id text,
  30. @key
  31. sentence_index int,
  32. @searchable
  33. sentence_text text,
  34. tokens text[],
  35. lemmas text[],
  36. pos_tags text[],
  37. ner_tags text[],
  38. doc_offsets int[],
  39. dep_types text[],
  40. dep_tokens int[]
  41. ).
  42. function nlp_markup over (
  43. doc_id text,
  44. content text
  45. ) returns rows like sentences
  46. implementation "udf/nlp_markup_with_foolnltk.py" handles tsv lines.
  47. sentences += nlp_markup(doc_id, content) :-
  48. articles_processed(doc_id, content).
  49. entity_mention(
  50. entity_id text,
  51. entity_text text,
  52. entity_type text,
  53. doc_id text,
  54. sentence_index int,
  55. begin_index int,
  56. end_index int
  57. ).
  58. function map_entity_mention over(
  59. doc_id text,
  60. sentence_index int,
  61. tokens text[],
  62. pos_tags text[],
  63. ner_tags text[]
  64. )returns rows like entity_mention
  65. implementation "udf/map_entity_mention.py" handles tsv lines.
  66. entity_mention += map_entity_mention(
  67. doc_id,sentence_index,tokens,pos_tags,ner_tags):-
  68. sentences(doc_id,sentence_index,_,tokens,_,pos_tags,ner_tags,_,_,_).
  69. relation(
  70. @key
  71. @distributed_by
  72. id text,
  73. name text,
  74. step_host int,
  75. type_host text,
  76. step_guest int,
  77. type_guest text
  78. ).
  79. feature_window(
  80. @key
  81. size int
  82. ).
  83. feature_entity(
  84. @key
  85. @references(relation="isBiddingAgency",column="entity_id",alias="isBiddingAgency")
  86. entity_id text,
  87. feature text,
  88. window_size int
  89. ).
  90. function feature_entity over(
  91. entity_id text,
  92. entity_begin int,
  93. entity_end int,
  94. doc_id text,
  95. sentence_index int,
  96. tokens text[],
  97. pos_tags text[],
  98. ner_tags text[],
  99. window_size int
  100. )returns rows like feature_guest_BiddingAgency
  101. implementation "/udf/feature_entity.py" handles tsv lines.
  102. feature_entity += feature_entity(
  103. entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size
  104. ):-
  105. entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end),
  106. sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
  107. [entity_type="company";entity_type="org"],
  108. feature_window(window_size),
  109. window_size=10.
  110. feature_entity += feature_entity(
  111. entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size
  112. ):-
  113. entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end),
  114. sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
  115. [entity_type="person";entity_type="time";entity_type="location";entity_type="call";entity_type="money"],
  116. feature_window(window_size),
  117. window_size=5.
  118. label_guest_SecondTenderer(
  119. entity_id text,
  120. label int,
  121. rule_id text
  122. ).
  123. label_guest_SecondTenderer(entity_id,1,"标注数据"):-
  124. entity_mention(entity_id,entity_text,_,doc_id,_,_,_),
  125. articles(doc_id,_,_,_,_,_,entity_text,_).
  126. label_guest_SecondTenderer(entity_id,-1,"非标注数据"):-
  127. entity_mention(entity_id,entity_text,guest_type,doc_id,_,_,_),
  128. relation(id,_,_,_,_,guest_type),
  129. id = "Second_Tenderer",
  130. articles(doc_id,_,_,_,_,_,entity_text2,_),
  131. entity_text2 IS NOT NULL,
  132. entity_text!=entity_text2.
  133. @extraction
  134. is_secondtenderer?(
  135. @key
  136. @references(relation="entity_mention",column="entity_id",alias="entity_mention")
  137. entity_id text
  138. ).
  139. is_secondtenderer(entity_id) = if label>0 then TRUE else if label<0 then FALSE else NULL end:-
  140. label_guest_SecondTenderer(entity_id,label,_).
  141. @weight(f)
  142. is_secondtenderer(entity_id):-
  143. feature_entity(entity_id,f,window),
  144. label_guest_SecondTenderer(entity_id,_,_),
  145. window=10.