Skip to content

Commit

Permalink
Split handling of HTML attributes & style CSS properties
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucas-C committed Jun 19, 2024
1 parent ddc72ff commit 9dcea81
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 33 deletions.
89 changes: 56 additions & 33 deletions fpdf/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,21 +243,20 @@ def color_as_decimal(color="#000000"):


def parse_style(elem_attrs):
"""Parse `style="..."` making it's key-value pairs element's attributes"""
"""Parse `style="..."` HTML attributes, and return a dict of key-value"""
style = {}
try:
style = elem_attrs["style"]
style_attr = elem_attrs["style"]
except KeyError:
pass
else:
for element in style.split(";"):
if not element:
continue

pair = element.split(":")
if len(pair) == 2 and pair[0] and pair[1]:
attr, value = pair

elem_attrs[attr.strip()] = value.strip()
style_attr = ""
for element in style_attr.split(";"):
if not element:
continue
pair = element.split(":")
if len(pair) == 2 and pair[0] and pair[1]:
attr, value = pair
style[attr.strip()] = value.strip()
return style


class HTML2FPDF(HTMLParser):
Expand All @@ -281,7 +280,8 @@ def __init__(
tag_indents=None,
tag_styles=None,
list_vertical_margin=None,
**_,
heading_above=0.2,
heading_below=0.4,
):
"""
Args:
Expand All @@ -302,6 +302,8 @@ def __init__(
tag_styles (dict): mapping of HTML tag names to colors
list_vertical_margin (float): size of margins that precede lists.
The margin value is in the chosen pdf document units.
heading_above (float): extra space above heading, relative to font size
heading_below (float): extra space below heading, relative to font size
"""
super().__init__()
self.pdf = pdf
Expand Down Expand Up @@ -347,8 +349,8 @@ def __init__(
self.list_vertical_margin = list_vertical_margin
self.font_color = pdf.text_color.colors255
self.heading_level = None
self.heading_above = 0.2 # extra space above heading, relative to font size
self.heading_below = 0.4 # extra space below heading, relative to font size
self.heading_above = heading_above
self.heading_below = heading_below
self._tags_stack = []
self._column = self.pdf.text_columns(skip_leading_spaces=True)
self._paragraph = self._column.paragraph()
Expand Down Expand Up @@ -511,13 +513,17 @@ def handle_data(self, data):
emphasis |= TextEmphasis.I
if self.td_th.get("U"):
emphasis |= TextEmphasis.U
style = None
font_style = None
if bgcolor or emphasis:
style = FontFace(
font_style = FontFace(
emphasis=emphasis, fill_color=bgcolor, color=self.pdf.text_color
)
self.table_row.cell(
text=data, align=align, style=style, colspan=colspan, rowspan=rowspan
text=data,
align=align,
style=font_style,
colspan=colspan,
rowspan=rowspan,
)
self.td_th["inserted"] = True
elif self.table is not None:
Expand Down Expand Up @@ -561,9 +567,9 @@ def handle_starttag(self, tag, attrs):
self._pre_started = False
attrs = dict(attrs)
LOGGER.debug("STARTTAG %s %s", tag, attrs)
parse_style(attrs)
style = parse_style(attrs)
self._tags_stack.append(tag)
if attrs.get("break-before") == "page":
if style.get("break-before") == "page":
self._end_paragraph()
# pylint: disable=protected-access
self.pdf._perform_page_break()
Expand Down Expand Up @@ -606,11 +612,14 @@ def handle_starttag(self, tag, attrs):
align = attrs.get("align")[0].upper()
if not align in ["L", "R", "J", "C"]:
align = None
line_height = None
if "line-height" in attrs:
line_height = style.get("line-height", attrs.get("line-height"))
# "line-height" attributes are not valid in HTML,
# but we support it for backward compatibility,
# because fpdf2 honors it since 2.6.1 and PR #629
if line_height:
try:
# YYY parse and convert non-float line_height values
line_height = float(attrs.get("line-height"))
line_height = float(line_height)
except ValueError:
pass
self._new_paragraph(align=align, line_height=line_height)
Expand Down Expand Up @@ -638,7 +647,11 @@ def handle_starttag(self, tag, attrs):
bottom_margin=self.heading_below * hsize,
)
color = None
if "color" in attrs:
if "color" in style:
color = color_as_decimal(style["color"])
elif "color" in attrs:
# "color" attributes are not valid in HTML,
# but we support it for backward compatibility:
color = color_as_decimal(attrs["color"])
elif tag_style.color:
color = tag_style.color.colors255
Expand All @@ -650,7 +663,7 @@ def handle_starttag(self, tag, attrs):
)
if tag == "hr":
self._end_paragraph()
width = attrs.get("width")
width = style.get("width", attrs.get("width"))
if width:
if width[-1] == "%":
width = self.pdf.epw * int(width[:-1]) / 100
Expand Down Expand Up @@ -723,10 +736,14 @@ def handle_starttag(self, tag, attrs):
ul_prefix(attrs["type"]) if "type" in attrs else self.ul_bullet_char
)
self.bullet.append(bullet_char)
if "line-height" in attrs:
line_height = style.get("line-height", attrs.get("line-height"))
# "line-height" attributes are not valid in HTML,
# but we support it for backward compatibility,
# because fpdf2 honors it since 2.6.1 and PR #629
if line_height:
try:
# YYY parse and convert non-float line_height values
self.line_height_stack.append(float(attrs.get("line-height")))
self.line_height_stack.append(float(line_height))
except ValueError:
pass
else:
Expand All @@ -740,10 +757,14 @@ def handle_starttag(self, tag, attrs):
start = int(attrs["start"]) if "start" in attrs else 1
self.bullet.append(start - 1)
self.ol_type.append(attrs.get("type", "1"))
if "line-height" in attrs:
line_height = style.get("line-height", attrs.get("line-height"))
# "line-height" attributes are not valid in HTML,
# but we support it for backward compatibility,
# because fpdf2 honors it since 2.6.1 and PR #629
if line_height:
try:
# YYY parse and convert non-float line_height values
self.line_height_stack.append(float(attrs.get("line-height")))
self.line_height_stack.append(float(line_height))
except ValueError:
pass
else:
Expand Down Expand Up @@ -792,12 +813,14 @@ def handle_starttag(self, tag, attrs):
# This may result in a FPDFException "font not found".
self.set_font(face)
self.font_family = face
if "size" in attrs:
if "font-size" in style:
self.font_size = int(style.get("font-size"))
elif "size" in attrs:
self.font_size = int(attrs.get("size"))
self.set_font()
self.set_text_color(*self.font_color)
if tag == "table":
width = attrs.get("width")
width = style.get("width", attrs.get("width"))
if width:
if width[-1] == "%":
width = self.pdf.epw * int(width[:-1]) / 100
Expand Down Expand Up @@ -908,7 +931,7 @@ def handle_starttag(self, tag, attrs):
self.pdf.char_vpos = "SUP"
if tag == "sub":
self.pdf.char_vpos = "SUB"
if attrs.get("break-after") == "page":
if style.get("break-after") == "page":
if tag in ("br", "hr", "img"):
self._end_paragraph()
# pylint: disable=protected-access
Expand Down
Binary file added test/html/html_heading_above_below.pdf
Binary file not shown.
18 changes: 18 additions & 0 deletions test/html/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,3 +871,21 @@ def test_html_page_break_after(tmp_path):
Content on third page."""
)
assert_pdf_equal(pdf, HERE / "html_page_break_after.pdf", tmp_path)


def test_html_heading_above_below(tmp_path):
pdf = FPDF()
pdf.add_page()
pdf.write_html(
"""
<h1>Top heading</h1>
<p>Lorem ipsum</p>
<h2>First heading</h2>
<p>Lorem ipsum</p>
<h2>Second heading</h2>
<p>Lorem ipsum</p>
""",
heading_above=1,
heading_below=0.5,
)
assert_pdf_equal(pdf, HERE / "html_heading_above_below.pdf", tmp_path)

0 comments on commit 9dcea81

Please sign in to comment.