From ac4335ca86f1b84dc9bd601a782e6bb5ea7b82d4 Mon Sep 17 00:00:00 2001 From: Georg Mischler Date: Sun, 17 Sep 2023 22:39:33 +0200 Subject: [PATCH] column bottom balancing --- docs/TextRegion.md | 27 +++- fpdf/__init__.py | 1 - fpdf/fpdf.py | 28 ++-- fpdf/text_region.py | 202 ++++++++++++++++---------- test/text_region/tcols_balance.pdf | Bin 0 -> 2025 bytes test/text_region/test_text_columns.py | 56 ++++++- 6 files changed, 215 insertions(+), 99 deletions(-) create mode 100644 test/text_region/tcols_balance.pdf diff --git a/docs/TextRegion.md b/docs/TextRegion.md index d3b040b91..27237f278 100644 --- a/docs/TextRegion.md +++ b/docs/TextRegion.md @@ -34,7 +34,7 @@ But it is possible to use them intermittingly. This will probably most often mak The `FPDF.text_column() and ``FPDF.text_columns()` methods allow to create columnar layouts, with one or several columns respectively. Columns will always be of equal width. -#### Single-column example +#### Single-Column Example #### In this example an inserted paragraph is used in order to format its content with justified alignment, while the rest of the text uses the default left alignment. @@ -47,12 +47,12 @@ In this example an inserted paragraph is used in order to format its content wit cols.write(txt=LOREM_IPSUM) ``` -#### Multi-column example +#### Multi-Column Example Here we have a layout with three columns. Note that font type and text size can be varied within a text region, while still maintaining the justified (in this case) horizontal alignment. ```python - cols = pdf.text_columns(align="J", ncols=3, gap_width=5) + cols = pdf.text_columns(align="J", ncols=3, gutter=5) with cols: cols.write(txt=LOREM_IPSUM) pdf.set_font("Times", "", 8) @@ -62,19 +62,34 @@ Here we have a layout with three columns. Note that font type and text size can pdf.set_font("Helvetica", "", 12) ``` +#### Balanced Columns + +Normally the columns will be filled left to right, and if the text ends before the page is full, the rightmost column will end up shorter than the others. +If you prefer that all columns on a page end on the same height, you can use the `balanced=True` argument. In that case a simple algorithm will be applied that attempts to approximately balance their bottoms. + +```python + with pdf.text_columns(align="J", ncols=3, gutter=5, balanced=True) as cols: + pdf.set_font("Times", "", 14) + cols.write(txt=LOREM_IPSUM[:300]) +``` +Note that this only works reliably when the font size (specifically the line height) doesn't change. If parts of the text use a larger or smaller font than the rest, then the balancing will usually be out of whack. Contributions for a more refined balancing algorithm are welcome. + ### Possible future extensions -* Balanced columns, which all end on the same hight. Currently columns are filled to the maximum height from left to right. +Those features are currently not supported, but Pull Requests are welcome to implement them: + * Columns with differing widths (no balancing possible in this case). ## Paragraphs ## -The primary purpose of paragraphs is simply to enable variations in horizontal text alignment, while the horizontal extents of the text are managed by the text region. +The primary purpose of paragraphs is to enable variations in horizontal text alignment, while the horizontal extents of the text are managed by the text region. Other than text regions, paragraphs should alway be used as context managers and never be reused. Violating those rules may result in the entered text turning up on the page out of sequence. ### Possible future extensions -* Setting the spacing at the top/bottom of paragraphs +Those features are currently not supported, but Pull Requests are welcome to implement them: + +* Setting the spacing between paragraphs * first-line indent diff --git a/fpdf/__init__.py b/fpdf/__init__.py index 929d402f8..15fc47115 100644 --- a/fpdf/__init__.py +++ b/fpdf/__init__.py @@ -66,7 +66,6 @@ # FPDF constants: "FPDF_VERSION", "FPDF_FONT_DIR", - "TextColumns", ] __pdoc__ = {name: name.startswith("FPDF_") for name in __all__} diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py index 1d323969b..95046eaf4 100644 --- a/fpdf/fpdf.py +++ b/fpdf/fpdf.py @@ -75,18 +75,18 @@ class Image: from .fonts import CoreFont, CORE_FONTS, FontFace, TTFFont from .graphics_state import GraphicsStateMixin from .html import HTML2FPDF -from .text_region import TextRegionMixin, TextColumns from .image_parsing import SUPPORTED_IMAGE_FILTERS, get_img_info, load_image from .linearization import LinearizedOutputProducer -from .output import OutputProducer, PDFPage, ZOOM_CONFIGS from .line_break import Fragment, MultiLineBreak, TextLine from .outline import OutlineSection # , serialize_outline +from .output import OutputProducer, PDFPage, ZOOM_CONFIGS from .recorder import FPDFRecorder -from .structure_tree import StructureTreeBuilder from .sign import Signature +from .structure_tree import StructureTreeBuilder from .svg import Percent, SVGObject from .syntax import DestinationXYZ, PDFDate from .table import Table +from .text_region import TextRegionMixin, TextColumns from .util import get_scale_factor # Public global variables: @@ -3657,14 +3657,10 @@ def write( normalized_string = self.normalize_text(txt).replace("\r", "") styled_text_fragments = self._preload_font_styles(normalized_string, False) - def _get_width(height): # pylint: disable=unused-argument - # Set the width dynamically, since the first line can have a different width. - return max_width - text_lines = [] multi_line_break = MultiLineBreak( styled_text_fragments, - _get_width, + lambda h: max_width, print_sh=print_sh, wrapmode=wrapmode, ) @@ -3710,6 +3706,7 @@ def text_column( align: Union[Align, str] = "LEFT", l_margin: float = None, r_margin: float = None, + print_sh: bool = False, ): """Establish a layout with a single column to fill with text. Args: @@ -3717,6 +3714,8 @@ def text_column( align (Align or str, optional): The alignment of the region, default "LEFT". l_margin (float, optional): Override the current left page margin. r_margin (float, optional): Override the current right page margin. + print_sh (bool, optional): Treat a soft-hyphen (\\u00ad) as a printable + character, instead of a line breaking opportunity. Default value: False """ return TextColumns( self, @@ -3725,6 +3724,7 @@ def text_column( align=align, l_margin=l_margin, r_margin=r_margin, + print_sh=print_sh, ) @check_page @@ -3732,28 +3732,34 @@ def text_columns( self, text: Optional[str] = None, ncols: int = 2, - gap_width: float = 10, + gutter: float = 10, + balance: bool = False, align: Union[Align, str] = "LEFT", l_margin: float = None, r_margin: float = None, + print_sh: bool = False, ): """Establish a layout with multiple columns to fill with text. Args: text (str, optional): A first piece of text to insert. ncols (int, optional): the number of columns to create, default 2. - gap_width (float, optional): The distance between the columns, default 10. + gutter (float, optional): The distance between the columns, default 10. align (Align or str, optional): The alignment of the region, default "LEFT". l_margin (float, optional): Override the current left page margin. r_margin (float, optional): Override the current right page margin. + print_sh (bool, optional): Treat a soft-hyphen (\\u00ad) as a printable + character, instead of a line breaking opportunity. Default value: False """ return TextColumns( self, text=text, ncols=ncols, - gap_width=gap_width, + gutter=gutter, + balance=balance, align=align, l_margin=l_margin, r_margin=r_margin, + print_sh=print_sh, ) @check_page diff --git a/fpdf/text_region.py b/fpdf/text_region.py index f60ba1cb8..82dac2943 100644 --- a/fpdf/text_region.py +++ b/fpdf/text_region.py @@ -1,3 +1,5 @@ +import math + from .errors import FPDFException from .enums import Align, XPos, YPos from .line_break import MultiLineBreak @@ -24,16 +26,13 @@ def clear_text_region(self): class Paragraph: - def __init__(self, region, *args, align=None, **kwargs): - super().__init__(*args, **kwargs) + def __init__(self, region, align=None): self.region = region self.pdf = region.pdf if align: align = Align.coerce(align) self.align = align self._text_fragments = [] - self.current_y = 0 - super().__init__(*args, **kwargs) def __enter__(self): return self @@ -49,8 +48,7 @@ def write(self, text: str): # , link: str = ""): styled_text_fragments = self.pdf._preload_font_styles(normalized_string, False) self._text_fragments.extend(styled_text_fragments) - def _build_lines(self, current_y, print_sh): - self.current_y = current_y + def build_lines(self, print_sh): text_lines = [] multi_line_break = MultiLineBreak( self._text_fragments, @@ -61,16 +59,18 @@ def _build_lines(self, current_y, print_sh): self._text_fragments = [] text_line = multi_line_break.get_line() while (text_line) is not None: - self.current_y += text_line.height text_lines.append(text_line) text_line = multi_line_break.get_line() return text_lines class ParagraphCollectorMixin: - def __init__(self, pdf, *args, text=None, align="LEFT", **kwargs): + def __init__( + self, pdf, *args, text=None, align="LEFT", print_sh: bool = False, **kwargs + ): self.pdf = pdf self.align = Align.coerce(align) # default for auto paragraphs + self.print_sh = print_sh self._paragraphs = [] self._has_paragraph = None super().__init__(pdf, *args, **kwargs) @@ -97,7 +97,7 @@ def __exit__(self, exc_type, exc_value, traceback): def write(self, text: str): # , link: str = ""): if self._has_paragraph == "EXPLICIT": raise FPDFException( - "Conflicts with active paragraph. Consider adding your text there." + "Conflicts with active paragraph. Either close the current paragraph or write your text inside it." ) if self._has_paragraph is None: p = Paragraph(region=self, align=self.align) @@ -122,7 +122,7 @@ def end_paragraph(self): class TextRegion(ParagraphCollectorMixin): """Abstract base class for all text region subclasses.""" - def _ln(self, h=None): + def ln(self, h=None): self.pdf.ln(h) def current_x_extents( @@ -131,18 +131,24 @@ def current_x_extents( """Return the horizontal extents of the current line.""" raise NotImplementedError() - def _render_lines(self, text_lines): - page_break_triggered = False - self.pdf.y = max(self.pdf.y, self.pdf.t_margin) - text_line = None + def _render_column_lines(self, text_lines, top, bottom): + """Return : + bool True if reached bottom + """ + self.pdf.y = top + prev_line_height = 0 + last_line_height = None + rendered_lines = 0 for text_line_index, text_line in enumerate(text_lines): - if text_line_index != 0: - self._ln() - # print(self.pdf.y + text_line.height, self.pdf.page_break_trigger) - if hasattr(self, "accept_page_break"): - if self.pdf.y + text_line.height > self.pdf.page_break_trigger: - page_break_triggered = self.accept_page_break() - new_page = self.pdf._render_styled_text_line( + if text_line_index > 0: + self.ln(last_line_height) + if self.pdf.y + text_line.height > bottom: + last_line_height = prev_line_height + break + prev_line_height = last_line_height + last_line_height = text_line.height + # Don't check the return, we never render past the bottom here. + self.pdf._render_styled_text_line( text_line, text_line.max_width, h=text_line.height, @@ -150,34 +156,35 @@ def _render_lines(self, text_lines): new_x=XPos.WCONT, new_y=YPos.TOP, fill=False, - # link=link, + # link=link, # Must be part of Fragment ) - page_break_triggered = page_break_triggered or new_page - if text_line and text_line.trailing_nl: - # The line renderer can't handle trailing newlines in the text. - self.pdf._ln() - return page_break_triggered + rendered_lines += 1 + if rendered_lines: + del text_lines[:rendered_lines] + return last_line_height - def collect_lines(self, print_sh: bool = False): + def _render_lines(self, text_lines, top, bottom): + """Default page rendering a set of lines in one column""" + if text_lines: + self._render_column_lines(text_lines, top, bottom) + + def collect_lines(self): text_lines = [] - current_y = self.pdf.y for paragraph in self._paragraphs: - cur_lines = paragraph._build_lines(current_y, print_sh) + cur_lines = paragraph.build_lines(self.print_sh) if not cur_lines: continue - current_y = paragraph.current_y text_lines.extend(cur_lines) return text_lines - def render(self, print_sh: bool = False): - if not self._paragraphs: - return False - text_lines = self.collect_lines(print_sh) - return self._render_lines(text_lines) + def render(self): + raise NotImplementedError() def get_width(self, height): - limits = self.current_x_extents(self.pdf.y, height) - res = limits[1] - max(self.pdf.x, limits[0]) - 2 * self.pdf.c_margin + start, end = self.current_x_extents(self.pdf.y, height) + if self.pdf.x > start and self.pdf.x < end: + start = self.pdf.x + res = end - start - 2 * self.pdf.c_margin return res @@ -212,60 +219,101 @@ def current_x_extents(self, y, height): # pylint: disable=unused-argument class TextColumns(TextRegion, TextColumnarMixin): - def __init__(self, pdf, *args, ncols: int = 1, gap_width: float = 10, **kwargs): + def __init__( + self, + pdf, + *args, + ncols: int = 1, + gutter: float = 10, + balance: bool = False, + **kwargs, + ): super().__init__(pdf, *args, **kwargs) self.cur_column = 0 - self.cur_top = self.pdf.t_margin self.ncols = ncols - self.gap_width = gap_width + self.gutter = gutter + self.balance = balance total_w = self.right - self.left - self.col_width = (total_w - (self.ncols - 1) * self.gap_width) / self.ncols + self.col_width = (total_w - (self.ncols - 1) * self.gutter) / self.ncols # We calculate the column extents once in advance, and store them for lookup. - # This way we can later also enable the possibility to request columns of - # differing width. c_left = self.left self.cols = [(c_left, c_left + self.col_width)] for i in range(1, ncols): # pylint: disable=unused-variable - c_left += self.col_width + self.gap_width + c_left += self.col_width + self.gutter self.cols.append((c_left, c_left + self.col_width)) + self._first_page_top = max(self.pdf.t_margin, self.pdf.y) - def render( - self, - print_sh: bool = False, - stay_below: bool = False, - balance: bool = False, - ): + def __enter__(self): + super().__enter__() + self._first_page_top = max(self.pdf.t_margin, self.pdf.y) + if self.balance: + self.cur_column = 0 + self.pdf.x = self.cols[self.cur_column][0] + + def _render_page_lines(self, text_lines, top, bottom): + """Rendering a set of lines in one or several columns on one page.""" + balancing = False + next_y = self.pdf.y + if self.balance: + # Column balancing is currently very simplistic, and only works reliably when + # line height doesn't change much within the text block. + # The "correct" solution would require an exact precalculation of the hight of + # each column with the specific line heights and iterative regrouping of lines, + # which seems excessive at this point. + # Contribution of a more reliable but still reasonably simple algorithm welcome. + page_bottom = bottom + if not text_lines: + return + tot_height = sum(l.height for l in text_lines) + col_height = tot_height / self.ncols + avail_height = bottom - top + if col_height < avail_height: + balancing = True # We actually have room to balance on this page. + # total height divided by n + bottom = top + col_height + # A bit more generous: Try to keep the rightmost column the shortest. + lines_per_column = math.ceil(len(text_lines) / self.ncols) + 0.5 + mult_height = text_lines[0].height * lines_per_column + if mult_height > col_height: + bottom = top + mult_height + if bottom > page_bottom: + # Turns out we don't actually have enough room. + bottom = page_bottom + balancing = False + for c in range(self.cur_column, self.ncols): + if not text_lines: + return + if c != self.cur_column: + self.cur_column = c + col_left, col_right = self.current_x_extents(0, 0) + if self.pdf.x < col_left or self.pdf.x >= col_right: + self.pdf.x = col_left + if balancing and c == (self.ncols - 1): + # Give the last column more space in case the balancing is out of whack. + bottom = self.pdf.h - self.pdf.b_margin + last_line_height = self._render_column_lines(text_lines, top, bottom) + if balancing: + new_y = self.pdf.y + last_line_height + if new_y > next_y: + next_y = new_y + if balancing: + self.pdf.y = next_y + + def render(self): if not self._paragraphs: - return False + return text_lines = self.collect_lines() - if stay_below or (self.cur_column == 0 and balance): - self.cur_top = self.pdf.y - else: - self.cur_top = self.pdf.t_margin if not text_lines: - return False - # if not balance: - return self._render_lines(text_lines) - # balance the columns. - - # hgt_lines = sum(l.height for l in text_lines) - # bottom = self.pdf.h - self.pdf.b_margin - # hgt_avail = bottom - self.pdf.y - # hgt_avail += (self.ncols - self.cur_column - 1) * (bottom - self.cur_top) - # YYY Finish balancing - - def accept_page_break(self): - if self.cur_column == self.ncols - 1: - self.cur_top = self.pdf.t_margin + return + page_bottom = self.pdf.h - self.pdf.b_margin + self._render_page_lines(text_lines, self._first_page_top, page_bottom) + page_top = self._first_page_top if self.balance else self.pdf.t_margin + while text_lines: + self.pdf.add_page(same=True) self.cur_column = 0 - self.pdf.x = self.cols[self.cur_column][0] - return True - self.cur_column += 1 - self.pdf.x = self.cols[self.cur_column][0] - self.pdf.y = self.cur_top - return False + self._render_page_lines(text_lines, page_top, page_bottom) - def _ln(self, h=None): + def ln(self, h=None): self.pdf.ln(h=h) self.pdf.x = self.cols[self.cur_column][0] diff --git a/test/text_region/tcols_balance.pdf b/test/text_region/tcols_balance.pdf new file mode 100644 index 0000000000000000000000000000000000000000..de61a7fc60948f954027fc636904299ee28eebc0 GIT binary patch literal 2025 zcmbtVYfuwc6fQnGxG2gaN)hE&ML-^#O-NuBaY7ytl!pNor64OTaV6}=WTU`Td=!gH zE0hW^~0mV|KAY#XM>@Gr0)tSz;_s`w^?m6E%_uM(( zu?djKoDgR|ga9s}Hf)02-62>^=|~zN5WJj-)PfK`bEF7`U|&3vz&xo~5W-~;0V;6j zxdAs9#MupHPJ`mqIDi8%C9Z`$JRqEmWa)T%wEk+G6h+VkMUHU-SVqv^fYhl}3<2x9 z0*j+`G=Tj{TuiBSG2|;)3)u7&<3Yq2jj1T5UKlpT*V&UaPSRT7^2#Rw(=fl|3<_LJ z=`<0z7P#qCvcdKESl4kr8KI(a4U?-1qj3oyK}F&ayaFec^hV$&9fQ-ui_^YWn5J3D=S$TMe3%>-Xk_h8sAh zR;PTO$l+9WZd+2*^2Ev~V}UwhSJF_Y(cPE4Td~u03glQ#MU7ICSHnNb5hM-GKAP}o zt?}{GbyEi}Rp%Yl3FqXsHkwAEHwRO!mY@Zk+PqKy-03&+e4%6M#PbO|ht_xRUhma= zhA`|+%!#{y$oi)cw5X|ZcE(x^xo>$XWT|s$XwSasg_=W8r#7j&&Ys$HTg%%VX1hOO zrMFF2Psf>6g+3`Det5$Vlg^dat9-kXPuu|qrKE}R0AB<~p< znbKfsEhAJx3Da6-A~N=|=O@uHMfbUe%9)dS<<{*4*g zCxX+maBZVWXEQ&ZBfDs4Q9ER}U+AYf;49U3=bWo-Uwo^u+-l}$OZyE*=I`tE+3U1$ zvae#6Nkv+C-RZ9qJK==ttSW1#TC?AOmUU#D^}ZgPZf);qe5;Q<+2U}6nEB!H$QIpYtDW$t>Xbn&sEa%ZpN-_TBrX|1MFqyhHt>O%yOap!Bl+ zvI@72g_YUGZf&c3mvXn{-d3;msm2Q%GaaV9+}?Sh@^rDfC+W)PS*{ndWI^`}eUil| zyH}cvvM-8X${mutk0}nwsU0T6>KdE~?k$O4w97j>cDwzPTrZ2562vAjqveXhInvc| zP4FbgrX6lHHpOdyxuKQQhjWADHaELi`&}`d=X9^{kw@Z@BTuYz@|CHqq>MG%n79b^ zy3`ZX;xdZqbJ!bKZNX_G0)t>F$#g40DgnHPAVs8>7zZFYP`7~|RV-Gw7^!WNdld-C-F5*vc_-D&qn6 zitHC3u$d?rF@pm%R74<7gAkY*4ge0qW0|VTV;F_>9NWcXyF65{Sg2koF{6j9&`TJ} z1a^|e2r5Jo9v?waF0@VtHGlnE