articles(
	@key
	@distributed_by
	id		text,
	@searchable
	content	text,
	tenderee	text,
	agency	text,
	win_tenderer	text,
	first_tenderer text,
	second_tenderer text,
	third_tenderer text
).


articles_processed(
	@key
	id		text,
	content	text
).

function articles_processed over(
	doc_id		text,
	content	text
) returns rows like articles_processed
	implementation "udf/articles_processed.py" handles tsv lines.

articles_processed += articles_processed(doc_id,content):-
	articles(doc_id,content,_,_,_,_,_,_).
	

sentences(
	@key
    @distributed_by
    doc_id         text,
    @key
    sentence_index int,
    @searchable
    sentence_text  text,
    tokens         text[],
    lemmas         text[],
    pos_tags       text[],
    ner_tags       text[],
    doc_offsets    int[],
    dep_types      text[],	
    dep_tokens     int[]
).

function nlp_markup over (
        doc_id  text,
        content text
    ) returns rows like sentences
    implementation "udf/nlp_markup_with_foolnltk.py" handles tsv lines.

sentences += nlp_markup(doc_id, content) :-
    articles_processed(doc_id, content).


entity_mention(
	entity_id		text,
	entity_text		text,
	entity_type		text,
	doc_id			text,
	sentence_index	int,
	begin_index		int,
	end_index		int
).

function map_entity_mention over(
	doc_id			text,
	sentence_index	int,
	tokens			text[],
	pos_tags		text[],
	ner_tags		text[]
)returns rows like entity_mention
implementation "udf/map_entity_mention.py" handles tsv lines.


entity_mention += map_entity_mention(
doc_id,sentence_index,tokens,pos_tags,ner_tags):-
sentences(doc_id,sentence_index,_,tokens,_,pos_tags,ner_tags,_,_,_).

relation(
	@key
	@distributed_by
	id		text,
	name	text,
	step_host	int,
	type_host	text,
	step_guest	int,
	type_guest	text
).

feature_window(
	@key
	size	int
).


feature_entity(
	@key
	@references(relation="isBiddingAgency",column="entity_id",alias="isBiddingAgency")
	entity_id	text,
	feature		text,
	window_size	int
).
function feature_entity over(
	entity_id	text,
	entity_begin	int,
	entity_end		int,
	doc_id		text,
	sentence_index	int,
	tokens		text[],
	pos_tags	text[],
	ner_tags	text[],
	window_size		int
)returns rows like feature_guest_BiddingAgency
implementation "/udf/feature_entity.py" handles tsv lines.

feature_entity += feature_entity(
	entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size
):-
entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end),
sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
[entity_type="company";entity_type="org"],
feature_window(window_size),
window_size=10.

feature_entity += feature_entity(
	entity_id,entity_begin,entity_end,doc_id,sentence_index,tokens,pos_tags,ner_tags,window_size
):-
entity_mention(entity_id,entity_text,entity_type,doc_id,sentence_index,entity_begin,entity_end),
sentences(doc_id,sentence_index,sentence_text,tokens,_,pos_tags,ner_tags,_,_,_),
[entity_type="person";entity_type="time";entity_type="location";entity_type="call";entity_type="money"],
feature_window(window_size),
window_size=5.

label_guest_SecondTenderer(
	entity_id		text,
	label			int,
	rule_id			text
).

label_guest_SecondTenderer(entity_id,1,"标注数据"):-
entity_mention(entity_id,entity_text,_,doc_id,_,_,_),
articles(doc_id,_,_,_,_,_,entity_text,_).

label_guest_SecondTenderer(entity_id,-1,"非标注数据"):-
entity_mention(entity_id,entity_text,guest_type,doc_id,_,_,_),
relation(id,_,_,_,_,guest_type),
id = "Second_Tenderer",
articles(doc_id,_,_,_,_,_,entity_text2,_),
entity_text2 IS NOT NULL,
entity_text!=entity_text2.


@extraction
is_secondtenderer?(
	@key
	@references(relation="entity_mention",column="entity_id",alias="entity_mention")
	entity_id	text
).

is_secondtenderer(entity_id) = if label>0 then TRUE else if label<0 then FALSE else NULL end:-
label_guest_SecondTenderer(entity_id,label,_).

@weight(f)
is_secondtenderer(entity_id):-
feature_entity(entity_id,f,window),
label_guest_SecondTenderer(entity_id,_,_),
window=10.