From 8c6dacc3d1d32b27bfec16f534bede19758b07ca Mon Sep 17 00:00:00 2001 From: Krishnasis Date: Fri, 23 Feb 2024 14:05:33 +0100 Subject: [PATCH] fix: Check distance between two chars --- hotpdf/memory_map.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/hotpdf/memory_map.py b/hotpdf/memory_map.py index 64f48d6..d82a9c3 100644 --- a/hotpdf/memory_map.py +++ b/hotpdf/memory_map.py @@ -145,13 +145,15 @@ def load_memory_map( prev_char_inserted = char_c != " " # Insert into Trie and Span Maps last_inserted_x_y: tuple[int, int] = (-1, -1) + # TODO: Constant distance for now - needs research + ANNOTATION_DISTANCE_THRESHOLD = 5 for i in range(len(char_hot_characters)): _current_character: HotCharacter = char_hot_characters[i] # Determine if annotation spaces should be added if include_annotation_spaces and i > 0 and i < len(char_hot_characters) - 1: prev_char: HotCharacter = char_hot_characters[i - 1] next_char: HotCharacter = char_hot_characters[i + 1] - if _current_character.is_anno and (not (next_char.x - prev_char.x_end) >= 5): + if _current_character.is_anno and (not (next_char.x - prev_char.x) >= ANNOTATION_DISTANCE_THRESHOLD): continue # Prevent characters from overlapping