123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- articles(
- @key
- @distributed_by
- id text,
- @searchable
- content text,
- tenderee text,
- agency text,
- win_tenderer text,
- first_tenderer text,
- second_tenderer text,
- third_tenderer text
- ).
- articles_processed(
- @key
- id text,
- content text
- ).
- function articles_processed over(
- doc_id text,
- content text
- ) returns rows like articles_processed
- implementation "udf/articles_processed.py" handles tsv lines.
- articles_processed += articles_processed(doc_id,content):-
- articles(doc_id,content,_,_,_,_,_,_).
-
- sentences(
- @key
- @distributed_by
- doc_id text,
- @key
- sentence_index int,
- @searchable
- sentence_text text,
- tokens text[],
- lemmas text[],
- pos_tags text[],
- ner_tags text[],
- doc_offsets int[],
- dep_types text[],
- dep_tokens int[]
- ).
- function nlp_markup over (
- doc_id text,
- content text
- ) returns rows like sentences
- implementation "udf/nlp_markup_with_foolnltk.py" handles tsv lines.
- sentences += nlp_markup(doc_id, content) :-
- articles_processed(doc_id, content).
- entity_mention(
- entity_id text,
- entity_text text,
- entity_type text,
- doc_id text,
- sentence_index int,
- begin_index int,
- end_index int
- ).
- function map_entity_mention over(
- doc_id text,
- sentence_index int,
- tokens text[],
- pos_tags text[],
- ner_tags text[]
- )returns rows like entity_mention
- implementation "udf/map_entity_mention.py" handles tsv lines.
- entity_mention += map_entity_mention(
- doc_id,sentence_index,tokens,pos_tags,ner_tags):-
- sentences(doc_id,sentence_index,_,tokens,_,pos_tags,ner_tags,_,_,_).
- relation(
- @key
- @distributed_by
- id text,
- name text,
- step_host int,
- type_host text,
- step_guest int,
- type_guest text
- ).
- feature_window(
- @key
- size int
- ).
- feature_entity(
- @key
- @references(relation="isBiddingAgency",column="entity_id",alias="isBiddingAgency")
- entity_id text,
- feature text,
- window_size int
- ).
- function feature_entity over(
- entity_id text,
- entity_begin int,
- entity_end int,
- doc_id text,
- sentence_index int,
- tokens text[],
- pos_tags text[],
- ner_tags text[],
- window_size int
- )returns rows like feature_guest_BiddingAgency
- implementation "/udf/feature_entity.py" handles tsv lines.
- feature_entity += feature_entity(
- entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size
- ):-
- entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end),
- sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
- [entity_type="company";entity_type="org"],
- feature_window(window_size),
- window_size=10.
- feature_entity += feature_entity(
- entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size
- ):-
- entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end),
- sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
- [entity_type="person";entity_type="time";entity_type="location";entity_type="call";entity_type="money"],
- feature_window(window_size),
- window_size=5.
- label_guest_ThirdTenderer(
- entity_id text,
- label int,
- rule_id text
- ).
- label_guest_ThirdTenderer(entity_id,1,"标注数据"):-
- entity_mention(entity_id,entity_text,_,doc_id,_,_,_),
- articles(doc_id,_,_,_,_,_,_,entity_text).
- label_guest_ThirdTenderer(entity_id,-1,"非标注数据"):-
- entity_mention(entity_id,entity_text,guest_type,doc_id,_,_,_),
- relation(id,_,_,_,_,guest_type),
- id = "Third_Tenderer",
- articles(doc_id,_,_,_,_,_,_,entity_text2),
- entity_text2 IS NOT NULL,
- entity_text!=entity_text2.
- @extraction
- is_thirdtenderer?(
- @key
- @references(relation="entity_mention",column="entity_id",alias="entity_mention")
- entity_id text
- ).
- is_thirdtenderer(entity_id) = if label>0 then TRUE else if label<0 then FALSE else NULL end:-
- label_guest_ThirdTenderer(entity_id,label,_).
- @weight(f)
- is_thirdtenderer(entity_id):-
- feature_entity(entity_id,f,window),
- label_guest_ThirdTenderer(entity_id,_,_),
- window=10.
|