luojiehua
/
BIDI_ML_INFO_EXTRACTION


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
							articles(
	@key
	@distributed_by
	id		text,
	@searchable
	content	text,
	tenderee	text,
	agency	text,
	win_tenderer	text,
	first_tenderer text,
	second_tenderer text,
	third_tenderer text
).


articles_processed(
	@key
	id		text,
	content	text
).

function articles_processed over(
	doc_id		text,
	content	text
) returns rows like articles_processed
	implementation "udf/articles_processed.py" handles tsv lines.

articles_processed += articles_processed(doc_id,content):-
	articles(doc_id,content,_,_,_,_,_,_).
	

sentences(
	@key
    @distributed_by
    doc_id         text,
    @key
    sentence_index int,
    @searchable
    sentence_text  text,
    tokens         text[],
    lemmas         text[],
    pos_tags       text[],
    ner_tags       text[],
    doc_offsets    int[],
    dep_types      text[],	
    dep_tokens     int[]
).

function nlp_markup over (
        doc_id  text,
        content text
    ) returns rows like sentences
    implementation "udf/nlp_markup_with_foolnltk.py" handles tsv lines.

sentences += nlp_markup(doc_id, content) :-
    articles_processed(doc_id, content).


entity_mention(
	entity_id		text,
	entity_text		text,
	entity_type		text,
	doc_id			text,
	sentence_index	int,
	begin_index		int,
	end_index		int
).

function map_entity_mention over(
	doc_id			text,
	sentence_index	int,
	tokens			text[],
	pos_tags		text[],
	ner_tags		text[]
)returns rows like entity_mention
implementation "udf/map_entity_mention.py" handles tsv lines.


entity_mention += map_entity_mention(
doc_id,sentence_index,tokens,pos_tags,ner_tags):-
sentences(doc_id,sentence_index,_,tokens,_,pos_tags,ner_tags,_,_,_).

relation(
	@key
	@distributed_by
	id		text,
	name	text,
	step_host	int,
	type_host	text,
	step_guest	int,
	type_guest	text
).

feature_window(
	@key
	size	int
).


feature_entity(
	@key
	@references(relation="isBiddingAgency",column="entity_id",alias="isBiddingAgency")
	entity_id	text,
	feature		text,
	window_size	int
).
function feature_entity over(
	entity_id	text,
	entity_begin	int,
	entity_end		int,
	doc_id		text,
	sentence_index	int,
	tokens		text[],
	pos_tags	text[],
	ner_tags	text[],
	window_size		int
)returns rows like feature_guest_BiddingAgency
implementation "/udf/feature_entity.py" handles tsv lines.

feature_entity += feature_entity(
	entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size
):-
entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end),
sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
[entity_type="company";entity_type="org"],
feature_window(window_size),
window_size=10.

feature_entity += feature_entity(
	entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size
):-
entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end),
sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
[entity_type="person";entity_type="time";entity_type="location";entity_type="call";entity_type="money"],
feature_window(window_size),
window_size=5.

label_guest_Money(
	entity_id		text,
	label			int,
	rule_id			text
).

function supervise_guest_Money over(
	entity_id	text,entity_begin int,entity_end int,
	doc_id	text,
	sentence_index	int,
	sentence_text	text,
	tokens			text[],
	pos_tags		text[],
	ner_tags		text[]
)returns rows like label_guest_Money
implementation "udf/supervise_guest_Money.py" handles tsv lines.

label_guest_Money += supervise_guest_Money(
entity_id,entity_begin,entity_end,
doc_id,
sentence_index,
sentence_text,
tokens,
pos_tags,
ner_tags
):-
sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
entity_mention(entity_id,_,guest_type,doc_id,sentence_index,entity_begin,entity_end),
guest_type="money".


@extraction
is_moneytenderer?(
	@key
	@references(relation="entity_mention",column="entity_id",alias="entity_mention")
	entity_id	text
).

is_moneytenderer(entity_id) = if label=2 then TRUE else FALSE end:-
label_guest_Money(entity_id,label,_).

@weight(f)
is_moneytenderer(entity_id):-
feature_entity(entity_id,f,window),
label_guest_Money(entity_id,label,_),
label=2,
window=5.