Skip to content

Commit

Permalink
ENH: Add Highlight text markup annotation (#1740)
Browse files Browse the repository at this point in the history
See #107
  • Loading branch information
MartinThoma authored Mar 26, 2023
1 parent 4fc0040 commit 3da3b25
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 0 deletions.
33 changes: 33 additions & 0 deletions docs/user/adding-pdf-annotations.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,36 @@ writer.add_annotation(page_number=0, annotation=annotation)
with open("annotated-pdf.pdf", "wb") as fp:
writer.write(fp)
```

## Text Markup Annotations

Text markup annotations refer to a specific piece of text within the document.

Those are a bit more complicated as you need to know exactly where the text
is. Those are the "Quad points".

### Highlighting

If you want to highlight text like this:

![](annotation-highlight.png)

you can use the {py:class}`AnnotationBuilder <pypdf.generic.AnnotationBuilder>`:

```python
pdf_path = os.path.join(RESOURCE_ROOT, "crazyones.pdf")
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Add the line
annotation = AnnotationBuilder.polygon(
vertices=[(50, 550), (200, 650), (70, 750), (50, 700)],
)
writer.add_annotation(page_number=0, annotation=annotation)

# Write the annotated file to disk
with open("annotated-pdf.pdf", "wb") as fp:
writer.write(fp)
```
Binary file added docs/user/annotation-highlight.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
33 changes: 33 additions & 0 deletions pypdf/generic/_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,39 @@ def rectangle(

return square_obj

@staticmethod
def highlight(
*,
rect: Union[RectangleObject, Tuple[float, float, float, float]],
quad_points: ArrayObject,
highlight_color: str = "ff0000",
) -> DictionaryObject:
"""
Add a highlight annotation to the document.
Args:
rect: Array of four integers ``[xLL, yLL, xUR, yUR]``
specifying the highlighted area
quad_points: An ArrayObject of 8 FloatObjects. Must match a word or
a group of words, otherwise no highlight will be shown.
highlight_color: The color used for the hightlight
Returns:
A dictionary object representing the annotation.
"""
obj = DictionaryObject(
{
NameObject("/Type"): NameObject("/Annot"),
NameObject("/Subtype"): NameObject("/Highlight"),
NameObject("/Rect"): RectangleObject(rect),
NameObject("/QuadPoints"): quad_points,
NameObject("/C"): ArrayObject(
[FloatObject(n) for n in hex_to_rgb(highlight_color)]
),
}
)
return obj

@staticmethod
def ellipse(
rect: Union[RectangleObject, Tuple[float, float, float, float]],
Expand Down
32 changes: 32 additions & 0 deletions tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,38 @@ def test_annotation_builder_square(pdf_file_path):
writer.write(fp)


def test_annotation_builder_highlight(pdf_file_path):
# Arrange
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
reader = PdfReader(pdf_path)
page = reader.pages[0]
writer = PdfWriter()
writer.add_page(page)

# Act
highlight_annotation = AnnotationBuilder.highlight(
rect=(95.79332, 704.31777, 138.55779, 724.6855),
highlight_color="ff0000",
quad_points=ArrayObject(
[
FloatObject(100.060779),
FloatObject(723.55398),
FloatObject(134.29033),
FloatObject(723.55398),
FloatObject(100.060779),
FloatObject(705.4493),
FloatObject(134.29033),
FloatObject(705.4493),
]
),
)
writer.add_annotation(0, highlight_annotation)

# Assert: You need to inspect the file manually
with open(pdf_file_path, "wb") as fp:
writer.write(fp)


def test_annotation_builder_circle(pdf_file_path):
# Arrange
pdf_path = RESOURCE_ROOT / "crazyones.pdf"
Expand Down

0 comments on commit 3da3b25

Please sign in to comment.