Skip to content

Commit

Permalink
textpage: add consecutive option
Browse files Browse the repository at this point in the history
  • Loading branch information
mara004 committed Feb 23, 2023
1 parent ab72fe5 commit 4cea0ce
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/pypdfium2/_helpers/textpage.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def get_charbox(self, index, loose=False):
index (int):
Index of the character to work with, in the page's character array.
loose (bool):
If True, the entire glyph bounds will be covered, without taking the actual glyph shape into account.
TODO
Returns:
Float values for left, bottom, right and top in PDF canvas units.
"""
Expand Down Expand Up @@ -176,7 +176,7 @@ def get_rect(self, index):
return (l.value, b.value, r.value, t.value)


def search(self, text, index=0, match_case=False, match_whole_word=False):
def search(self, text, index=0, match_case=False, match_whole_word=False, consecutive=False):
"""
Locate text on the page.
Expand All @@ -189,6 +189,8 @@ def search(self, text, index=0, match_case=False, match_whole_word=False):
If True, the search will be case-specific (upper and lower letters treated as different characters).
match_whole_word (bool):
If True, substring occurrences will be ignored (e. g. `cat` would not match `category`).
consecutive (bool):
TODO
Returns:
PdfTextSearcher: A helper object to search text.
"""
Expand All @@ -201,6 +203,8 @@ def search(self, text, index=0, match_case=False, match_whole_word=False):
flags |= pdfium_c.FPDF_MATCHCASE
if match_whole_word:
flags |= pdfium_c.FPDF_MATCHWHOLEWORD
if consecutive:
flags |= pdfium_c.FPDF_CONSECUTIVE

enc_text = (text + "\x00").encode("utf-16-le")
enc_text_ptr = ctypes.cast(enc_text, ctypes.POINTER(ctypes.c_ushort))
Expand Down

0 comments on commit 4cea0ce

Please sign in to comment.