123456789101112131415161718192021222324252627282930313233343536373839404142 |
- #!/usr/bin/env python
- from deepdive import *
- import ddlib
- @tsv_extractor
- @returns(lambda
- entity_id = "text",
- feature = "text",
- window_size = "int",
- :[])
- def extract(
- entity_id = "text",
- entity_begin_index = "int",
- entity_end_index = "int",
- doc_id = "text",
- sent_index = "int",
- tokens = "text[]",
- pos_tags = "text[]",
- ner_tags = "text[]",
- window_size = "int",
- ):
- """
- Uses DDLIB to generate features for the spouse relation.
- """
- # Create a DDLIB sentence object, which is just a list of DDLIB Word objects
- sent = []
- for i,t in enumerate(tokens):
- sent.append(ddlib.Word(
- begin_char_offset=None,
- end_char_offset=None,
- word=t,
- lemma=tokens[i],
- pos=pos_tags[i],
- ner=ner_tags[i],
- dep_par=-1 , # Note that as stored from CoreNLP 0 is ROOT, but for DDLIB -1 is ROOT
- dep_label=''))
- # Create DDLIB Spans for the two person mentions
- entity_span = ddlib.Span(begin_word_id=entity_begin_index,length=(entity_end_index-entity_begin_index+1))
- for feature in ddlib.get_generic_features_mention(sent, entity_span,window=window_size):
- yield [entity_id, feature,window_size]
|