#!/usr/bin/env python from deepdive import * import ddlib @tsv_extractor @returns(lambda p1_id = "text", p2_id = "text", feature = "text", window_size = "int", :[]) def extract( p1_id = "text", p2_id = "text", p1_begin_index = "int", p1_end_index = "int", p2_begin_index = "int", p2_end_index = "int", doc_id = "text", sent_index = "int", tokens = "text[]", lemmas = "text[]", pos_tags = "text[]", ner_tags = "text[]", dep_types = "text[]", dep_parents = "int[]", window_size = "int", ): """ Uses DDLIB to generate features for the spouse relation. """ # Create a DDLIB sentence object, which is just a list of DDLIB Word objects sent = [] for i,t in enumerate(tokens): sent.append(ddlib.Word( begin_char_offset=None, end_char_offset=None, word=t, lemma=lemmas[i], pos=pos_tags[i], ner=ner_tags[i], dep_par=-1 , # Note that as stored from CoreNLP 0 is ROOT, but for DDLIB -1 is ROOT dep_label='')) # Create DDLIB Spans for the two person mentions p1_span = ddlib.Span(begin_word_id=p1_begin_index, length=(p1_end_index-p1_begin_index+1)) p2_span = ddlib.Span(begin_word_id=p2_begin_index, length=(p2_end_index-p2_begin_index+1)) # Generate the generic features using DDLIB for feature in ddlib.get_generic_features_relation(sent, p1_span, p2_span,window=window_size): yield [p1_id, p2_id, feature,window_size]