articles( @key @distributed_by id text, @searchable content text, tenderee text, agency text, win_tenderer text, first_tenderer text, second_tenderer text, third_tenderer text ). articles_processed( @key id text, content text ). function articles_processed over( doc_id text, content text ) returns rows like articles_processed implementation "udf/articles_processed.py" handles tsv lines. articles_processed += articles_processed(doc_id,content):- articles(doc_id,content,_,_,_,_,_,_). sentences( @key @distributed_by doc_id text, @key sentence_index int, @searchable sentence_text text, tokens text[], lemmas text[], pos_tags text[], ner_tags text[], doc_offsets int[], dep_types text[], dep_tokens int[] ). function nlp_markup over ( doc_id text, content text ) returns rows like sentences implementation "udf/nlp_markup_with_foolnltk.py" handles tsv lines. sentences += nlp_markup(doc_id, content) :- articles_processed(doc_id, content). entity_mention( entity_id text, entity_text text, entity_type text, doc_id text, sentence_index int, begin_index int, end_index int ). function map_entity_mention over( doc_id text, sentence_index int, tokens text[], pos_tags text[], ner_tags text[] )returns rows like entity_mention implementation "udf/map_entity_mention.py" handles tsv lines. entity_mention += map_entity_mention( doc_id,sentence_index,tokens,pos_tags,ner_tags):- sentences(doc_id,sentence_index,_,tokens,_,pos_tags,ner_tags,_,_,_). relation( @key @distributed_by id text, name text, step_host int, type_host text, step_guest int, type_guest text ). feature_window( @key size int ). feature_entity( @key @references(relation="isBiddingAgency",column="entity_id",alias="isBiddingAgency") entity_id text, feature text, window_size int ). function feature_entity over( entity_id text, entity_begin int, entity_end int, doc_id text, sentence_index int, tokens text[], pos_tags text[], ner_tags text[], window_size int )returns rows like feature_guest_BiddingAgency implementation "/udf/feature_entity.py" handles tsv lines. feature_entity += feature_entity( entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size ):- entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end), sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_), [entity_type="company";entity_type="org"], feature_window(window_size), window_size=10. feature_entity += feature_entity( entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size ):- entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end), sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_), [entity_type="person";entity_type="time";entity_type="location";entity_type="call";entity_type="money"], feature_window(window_size), window_size=5. label_guest_SecondTenderer( entity_id text, label int, rule_id text ). label_guest_SecondTenderer(entity_id,1,"标注数据"):- entity_mention(entity_id,entity_text,_,doc_id,_,_,_), articles(doc_id,_,_,_,_,_,entity_text,_). label_guest_SecondTenderer(entity_id,-1,"非标注数据"):- entity_mention(entity_id,entity_text,guest_type,doc_id,_,_,_), relation(id,_,_,_,_,guest_type), id = "Second_Tenderer", articles(doc_id,_,_,_,_,_,entity_text2,_), entity_text2 IS NOT NULL, entity_text!=entity_text2. @extraction is_secondtenderer?( @key @references(relation="entity_mention",column="entity_id",alias="entity_mention") entity_id text ). is_secondtenderer(entity_id) = if label>0 then TRUE else if label<0 then FALSE else NULL end:- label_guest_SecondTenderer(entity_id,label,_). @weight(f) is_secondtenderer(entity_id):- feature_entity(entity_id,f,window), label_guest_SecondTenderer(entity_id,_,_), window=10.