Skip to content

Commit

Permalink
Use specific variables for start and end coordinates for clarity
Browse files Browse the repository at this point in the history
  • Loading branch information
kkaris committed Jul 24, 2024
1 parent 50cb274 commit 9caa293
Showing 1 changed file with 4 additions and 5 deletions.
9 changes: 4 additions & 5 deletions gilda/ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ def annotate(
word_tokenizer = TreebankWordTokenizer()
# FIXME: a custom sentence split function can be inconsistent
# with the coordinates being used here which come from NLTK
for sentence_coord in sentence_coords:
sentence = text[sentence_coord[0]:sentence_coord[1]]
for sent_start, sent_end in sentence_coords:
sentence = text[sent_start:sent_end]
# FIXME: one rare corner case is named entities with single quotes
# in them which get tokenized in a weird way
raw_word_coords = \
Expand Down Expand Up @@ -154,9 +154,8 @@ def annotate(
organisms=organisms,
namespaces=namespaces)
if matches:
start_coord = sentence_coord[0] + raw_word_coords[idx][0]
end_coord = sentence_coord[0] + \
raw_word_coords[idx+span-1][1]
start_coord = sent_start + raw_word_coords[idx][0]
end_coord = sent_end + raw_word_coords[idx+span-1][1]
annotations.append(Annotation(
raw_span, matches, start_coord, end_coord
))
Expand Down

0 comments on commit 9caa293

Please sign in to comment.