Skip to content

Commit

Permalink
Add ability to force get_text_range()
Browse files Browse the repository at this point in the history
  • Loading branch information
mara004 committed Mar 2, 2024
1 parent 8daf76b commit cd770e6
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/pypdfium2/_cli/extract_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def main(args):

# TODO let caller pass in possible range/boundary parameters
if args.strategy == EXTRACT_RANGE:
text = textpage.get_text_range()
text = textpage.get_text_range(force_this=True)
elif args.strategy == EXTRACT_BOUNDED:
text = textpage.get_text_bounded()
else:
Expand Down
15 changes: 8 additions & 7 deletions src/pypdfium2/_helpers/textpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,15 @@ def _get_active_text_range(self, c_start, c_end, l_passive=0, r_passive=0):
return t_start, t_end, l_passive, r_passive


def get_text_range(self, index=0, count=-1, errors="ignore"):
def get_text_range(self, index=0, count=-1, errors="ignore", force_this=False):
"""
Extract text from a given range.
Warning:
Unexpected upstream changes have caused allocation size concerns with this API.
Using it is now discouraged unless you specifically need to extract a character range. Prefer :meth:`.get_text_bounded` where possible.
Calling this method with default params now implicitly translates to :meth:`.get_text_bounded`.
.. versionchanged:: 4.28
Unexpected upstream changes have caused allocation size concerns with this API.
Using it is now discouraged unless you specifically need to extract a character range. Prefer :meth:`.get_text_bounded` where possible.
Calling this method with default params now implicitly translates to :meth:`.get_text_bounded` (pass ``force_this=True`` to circumvent).
Extract text from a given range.
Parameters:
index (int): Index of the first char to include.
Expand All @@ -76,7 +77,7 @@ def get_text_range(self, index=0, count=-1, errors="ignore"):

# https://github.com/pypdfium2-team/pypdfium2/issues/298
# https://crbug.com/pdfium/2133
if (index, count) == (0, -1):
if (index, count) == (0, -1) and not force_this:
warnings.warn("get_text_range() call with default params will be implicitly redirected to get_text_bounded()")
return self.get_text_bounded(errors=errors)

Expand Down

0 comments on commit cd770e6

Please sign in to comment.