diff --git a/pypdf/_page.py b/pypdf/_page.py index e474fd305..3c656a978 100644 --- a/pypdf/_page.py +++ b/pypdf/_page.py @@ -2027,8 +2027,6 @@ def extract_text( layout_mode_strip_rotated (bool): layout mode does not support rotated text. Set to False to include rotated text anyway. If rotated text is discovered, layout will be degraded and a warning will result. Defaults to True. - layout_mode_strip_rotated: Removes text that is rotated w.r.t. to the page from - layout mode output. Defaults to True. layout_mode_debug_path (Path | None): if supplied, must target a directory. creates the following files with debug information for layout mode functions if supplied: diff --git a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py index 7820dc165..1be500959 100644 --- a/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py +++ b/pypdf/_text_extraction/_layout_mode/_fixed_width_page.py @@ -143,8 +143,9 @@ def recurs_to_target_op( # multiply by bool (_idx != bt_idx) to ensure spaces aren't double # applied to the first tj of a BTGroup in fixed_width_page(). excess_tx = round(_tj.tx - last_displaced_tx, 3) * (_idx != bt_idx) - - new_text = f'{" " * int(excess_tx // _tj.space_tx)}{_tj.txt}' + # space_tx could be 0 if either Tz or font_size was 0 for this _tj. + spaces = int(excess_tx // _tj.space_tx) if _tj.space_tx else 0 + new_text = f'{" " * spaces}{_tj.txt}' last_ty = _tj.ty _text = f"{_text}{new_text}"