#!/usr/bin/env python from deepdive import * import ddlib @tsv_extractor @returns(lambda entity_id = "text", feature = "text", window_size = "int", :[]) def extract( entity_id = "text", entity_begin_index = "int", entity_end_index = "int", doc_id = "text", sent_index = "int", tokens = "text[]", pos_tags = "text[]", ner_tags = "text[]", window_size = "int", ): """ Uses DDLIB to generate features for the spouse relation. """ # Create a DDLIB sentence object, which is just a list of DDLIB Word objects sent = [] for i,t in enumerate(tokens): sent.append(ddlib.Word( begin_char_offset=None, end_char_offset=None, word=t, lemma=tokens[i], pos=pos_tags[i], ner=ner_tags[i], dep_par=-1 , # Note that as stored from CoreNLP 0 is ROOT, but for DDLIB -1 is ROOT dep_label='')) # Create DDLIB Spans for the two person mentions entity_span = ddlib.Span(begin_word_id=entity_begin_index,length=(entity_end_index-entity_begin_index+1)) for feature in ddlib.get_generic_features_mention(sent, entity_span,window=window_size): yield [entity_id, feature,window_size]