|
1 | 1 | # cython: infer_types=True, boundscheck=False |
2 | 2 | # distutils: language=c++ |
3 | | -""" NeuralCoref resolution spaCy v2.0 pipeline component |
| 3 | +""" NeuralCoref resolution spaCy v2.0 pipeline component |
4 | 4 | Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components |
5 | 5 | Compatible with: spaCy v2.0.0+ |
6 | 6 | """ |
@@ -126,7 +126,7 @@ NSUBJ_OR_DEP = ["nsubj", "dep"] |
126 | 126 | CONJ_OR_PREP = ["conj", "prep"] |
127 | 127 | LEAVE_DEP = ["det", "compound", "appos"] |
128 | 128 | KEEP_DEP = ["nsubj", "dobj", "iobj", "pobj"] |
129 | | -REMOVE_POS = ["CCONJ", "INTJ", "ADP"] |
| 129 | +REMOVE_POS = ["CCONJ", "SCONJ", "INTJ", "ADP"] |
130 | 130 | LOWER_NOT_END = ["'s", ',', '.', '!', '?', ':', ';'] |
131 | 131 | PUNCTS = [".", "!", "?"] |
132 | 132 | ACCEPTED_ENTS = ["PERSON", "NORP", "FACILITY", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART", "LANGUAGE"] |
@@ -327,7 +327,7 @@ cdef (int, int) enlarge_span(TokenC* doc_c, int i, int sent_start, int sent_end, |
327 | 327 | maxchild_idx -= 1 # We don't want mentions finishing with 's or conjunctions/punctuation |
328 | 328 | # if debug: print("maxchild_idx", maxchild_idx) |
329 | 329 | while minchild_idx <= maxchild_idx and minchild_idx < sent_end - 1 \ |
330 | | - and (inside(doc_c[minchild_idx].pos, hashes.remove_pos) |
| 330 | + and (inside(doc_c[minchild_idx].pos, hashes.remove_pos) |
331 | 331 | or inside(doc_c[minchild_idx].lex.lower, hashes.lower_not_end)): |
332 | 332 | minchild_idx += 1 # We don't want mentions starting with 's or conjunctions/punctuation |
333 | 333 | # if debug: print("minchild_idx", minchild_idx) |
@@ -882,7 +882,7 @@ cdef class NeuralCoref(object): |
882 | 882 | if tuned and hash_w in self.tuned_vectors: |
883 | 883 | return self.tuned_vectors[hash_w] |
884 | 884 | return self.get_static(hash_w) |
885 | | - |
| 885 | + |
886 | 886 | def get_word_in_sentence(self, int i, Span sent): |
887 | 887 | if i < sent.start or i >= sent.end: |
888 | 888 | return self.tuned_vectors[self.hashes.missing_word] |
|
0 commit comments