diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb58ea61..64e69e98a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,9 @@ in order to get warned about deprecated features used in your code. This can also be enabled programmatically with `warnings.simplefilter('default', DeprecationWarning)`. ## [2.5.7] - not released yet +### Changed +- `fpdf2` now uses [fontTools](https://fonttools.readthedocs.io/en/latest/) to read and embed fonts in the PDF, thanks to @gmischler and @RedShy + ### Fixed - `arc()` not longer renders artefacts at intersection point, thanks to @Jmillan-Dev; [#488](https://github.com/PyFPDF/fpdf2/issues/488) - `write_html`: `` & `` HTML tags are now properly supported - they were ignored previously; [#498](https://github.com/PyFPDF/fpdf2/issues/498) diff --git a/README.md b/README.md index 0682caae8..5e70660a6 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,8 @@ pdf.output("hello_world.pdf") It is a fork and the successor of `PyFPDF` (_cf._ [history](https://pyfpdf.github.io/fpdf2/Development.html#history)). Compared with other PDF libraries, `fpdf2` is **fast, versatile, easy to learn and to extend** ([example](https://github.com/digidigital/Extensions-and-Scripts-for-pyFPDF-fpdf2)). -It is also entirely writen in Python and has very few dependencies: -[Pillow](https://pillow.readthedocs.io/en/stable/), [defusedxml](https://pypi.org/project/defusedxml/) & [svg.path](https://pypi.org/project/svg.path/). +It is also entirely written in Python and has very few dependencies: +[Pillow](https://pillow.readthedocs.io/en/stable/), [defusedxml](https://pypi.org/project/defusedxml/), [svg.path](https://pypi.org/project/svg.path/) & [fontTools](https://fonttools.readthedocs.io/en/latest/index.html). **Development status**: this project is **mature** and **actively maintained**. diff --git a/fpdf/enums.py b/fpdf/enums.py index 2da5c41d5..5b89ebb48 100644 --- a/fpdf/enums.py +++ b/fpdf/enums.py @@ -1,4 +1,4 @@ -from enum import Enum, IntEnum +from enum import Enum, IntEnum, Flag from sys import intern from .syntax import Name @@ -600,5 +600,34 @@ class Corner(CoerciveEnum): BOTTOM_LEFT = "BOTTOM_LEFT" +class FontDescriptorFlags(Flag): + """An enumeration of the flags for the unsigned 32-bit integer entry in the font descriptor specifying various + characteristics of the font. Bit positions are numbered from 1 (low-order) to 32 (high-order).""" + + FIXED_PITCH = 0x0000001 + """ + "All glyphs have the same width (as opposed to proportional or + variable-pitch fonts, which have different widths." + """ + + SYMBOLIC = 0x0000004 + """ + "Font contains glyphs outside the Adobe standard Latin character set. + This flag and the Nonsymbolic flag shall not both be set or both be clear." + """ + + ITALIC = 0x0000040 + """ + "Glyphs have dominant vertical strokes that are slanted." + """ + + FORCE_BOLD = 0x0040000 + """ + "The flag shall determine whether bold glyphs shall be painted with extra pixels even at very + small text sizes by a conforming reader. If set, features of bold glyphs may be thickened at + small text sizes." + """ + + # This enum is only used internally: __pdoc__ = {"DocumentState": False} diff --git a/fpdf/fpdf.py b/fpdf/fpdf.py index d3d69b561..1075b7d2a 100644 --- a/fpdf/fpdf.py +++ b/fpdf/fpdf.py @@ -32,6 +32,9 @@ from os.path import splitext from pathlib import Path from typing import Callable, List, NamedTuple, Optional, Tuple, Union +from fontTools import ttLib +from fontTools import subset as ftsubset +from io import BytesIO try: from PIL.Image import Image @@ -69,6 +72,7 @@ class Image: XPos, YPos, Corner, + FontDescriptorFlags, ) from .errors import FPDFException, FPDFPageFormatException, FPDFUnicodeEncodingException from .fonts import fpdf_charwidths @@ -85,14 +89,12 @@ class Image: from .syntax import create_list_string as pdf_list from .syntax import create_stream as pdf_stream from .syntax import iobj_ref as pdf_ref -from .ttfonts import TTFontFile from .util import ( enclose_in_parens, escape_parens, format_date, get_scale_factor, object_id_for_page, - substr, ) # Public global variables: @@ -520,7 +522,7 @@ def _set_min_pdf_version(self, version): self.pdf_version = max(self.pdf_version, version) @property - def unifontsubset(self): + def is_ttf_font(self): return self.current_font.get("type") == "TTF" @property @@ -1809,6 +1811,7 @@ def add_font(self, family, style="", fname=None, uni="DEPRECATED"): """ if not fname: raise ValueError('"fname" parameter is required') + ext = splitext(str(fname))[1] if ext not in (".otf", ".otc", ".ttf", ".ttc"): raise ValueError( @@ -1816,12 +1819,14 @@ def add_font(self, family, style="", fname=None, uni="DEPRECATED"): " add_font() used to accept .pkl file as input, but for security reasons" " this feature is deprecated since v2.5.1 and has been removed in v2.5.3." ) + if uni != "DEPRECATED": warnings.warn( '"uni" parameter is deprecated, unused and will soon be removed', DeprecationWarning, stacklevel=2, ) + style = "".join(sorted(style.upper())) if any(letter not in "BI" for letter in style): raise ValueError( @@ -1833,15 +1838,70 @@ def add_font(self, family, style="", fname=None, uni="DEPRECATED"): if fontkey in self.fonts or fontkey in self.core_fonts: warnings.warn(f"Core font or font already added '{fontkey}': doing nothing") return + for parent in (".", FPDF_FONT_DIR): if not parent: continue + if (Path(parent) / fname).exists(): ttffilename = Path(parent) / fname break else: raise FileNotFoundError(f"TTF Font file not found: {fname}") + font = ttLib.TTFont(ttffilename) + self.font_files[fontkey] = { + "length1": os.stat(ttffilename).st_size, + "type": "TTF", + "ttffile": ttffilename, + } + + scale = 1000 / font["head"].unitsPerEm + default_width = round(scale * font["hmtx"].metrics[".notdef"][0]) + + try: + cap_height = font["OS/2"].sCapHeight + except AttributeError: + cap_height = font["hhea"].ascent + + # entry for the PDF font descriptor specifying various characteristics of the font + flags = FontDescriptorFlags.SYMBOLIC + if font["post"].isFixedPitch: + flags |= FontDescriptorFlags.FIXED_PITCH + if font["post"].italicAngle != 0: + flags |= FontDescriptorFlags.ITALIC + if font["OS/2"].usWeightClass >= 600: + flags |= FontDescriptorFlags.FORCE_BOLD + + desc = { + "Ascent": round(font["hhea"].ascent * scale), + "Descent": round(font["hhea"].descent * scale), + "CapHeight": round(cap_height * scale), + "Flags": flags.value, + "FontBBox": ( + f"[{font['head'].xMin * scale:.0f} {font['head'].yMin * scale:.0f}" + f" {font['head'].xMax * scale:.0f} {font['head'].yMax * scale:.0f}]" + ), + "ItalicAngle": int(font["post"].italicAngle), + "StemV": round(50 + int(pow((font["OS/2"].usWeightClass / 65), 2))), + "MissingWidth": default_width, + } + + # a map unicode_char -> char_width + char_widths = defaultdict(lambda: default_width) + for char in font.getBestCmap().keys(): + # take glyph associated to char + glyph = font.getBestCmap()[char] + + # take width associated to glyph + w = font["hmtx"].metrics[glyph][0] + + # probably this check could be deleted + if w == 65535: + w = 0 + + char_widths[char] = round(scale * w + 0.001) # ROUND_HALF_UP + # include numbers in the subset! (if alias present) # ensure that alias is mapped 1-by-1 additionally (must be replaceable) sbarr = "\x00 " @@ -1849,50 +1909,18 @@ def add_font(self, family, style="", fname=None, uni="DEPRECATED"): sbarr += "0123456789" sbarr += self.str_alias_nb_pages - ttf = TTFontFile() - ttf.getMetrics(ttffilename) - desc = { - "Ascent": round(ttf.ascent), - "Descent": round(ttf.descent), - "CapHeight": round(ttf.capHeight), - "Flags": ttf.flags, - "FontBBox": ( - f"[{ttf.bbox[0]:.0f} {ttf.bbox[1]:.0f}" - f" {ttf.bbox[2]:.0f} {ttf.bbox[3]:.0f}]" - ), - "ItalicAngle": int(ttf.italicAngle), - "StemV": round(ttf.stemV), - "MissingWidth": round(ttf.defaultWidth), - } - - font_dict = { + self.fonts[fontkey] = { + "i": len(self.fonts) + 1, "type": "TTF", - "name": re.sub("[ ()]", "", ttf.fullName), + "name": re.sub("[ ()]", "", font["name"].getBestFullName()), "desc": desc, - "up": round(ttf.underlinePosition), - "ut": round(ttf.underlineThickness), + "up": round(font["post"].underlinePosition * scale), + "ut": round(font["post"].underlineThickness * scale), + "cw": char_widths, "ttffile": ttffilename, "fontkey": fontkey, - "originalsize": os.stat(ttffilename).st_size, - "cw": ttf.charWidths, - } - self.fonts[fontkey] = { - "i": len(self.fonts) + 1, - "type": font_dict["type"], - "name": font_dict["name"], - "desc": font_dict["desc"], - "up": font_dict["up"], - "ut": font_dict["ut"], - "cw": font_dict["cw"], - "ttffile": font_dict["ttffile"], - "fontkey": fontkey, "subset": SubsetMap(map(ord, sbarr)), } - self.font_files[fontkey] = { - "length1": font_dict["originalsize"], - "type": "TTF", - "ttffile": ttffilename, - } def set_font(self, family=None, style="", size=0): """ @@ -2381,7 +2409,7 @@ def text(self, x, y, txt=""): if not self.font_family: raise FPDFException("No font set, you need to call set_font() beforehand") txt = self.normalize_text(txt) - if self.unifontsubset: + if self.is_ttf_font: txt_mapped = "" for char in txt: uni = ord(char) @@ -2914,7 +2942,7 @@ def _render_styled_text_line( current_text_mode = frag.text_mode sl.append(f"{frag.text_mode} Tr {frag.line_width:.2f} w") - if frag.unicode_font: + if frag.is_ttf_font: mapped_text = "" for char in frag.string: uni = ord(char) @@ -3800,7 +3828,7 @@ def normalize_text(self, txt): """Check that text input is in the correct format/encoding""" # - for TTF unicode fonts: unicode object (utf8 encoding) # - for built-in fonts: string instances (encoding: latin-1, cp1252) - if not self.unifontsubset and self.core_fonts_encoding: + if not self.is_ttf_font and self.core_fonts_encoding: try: return txt.encode(self.core_fonts_encoding).decode("latin-1") except UnicodeEncodeError as error: @@ -4042,7 +4070,6 @@ def _insert_table_of_contents(self): self.state = prev_state def _putfonts(self): - nf = self.n for diff in self.diffs.values(): # Encodings self._newobj() @@ -4054,112 +4081,81 @@ def _putfonts(self): ) self._out("endobj") - for name, info in self.font_files.items(): - if "type" in info and info["type"] != "TTF": - # Font file embedding - self._newobj() - info["n"] = self.n - font = (FPDF_FONT_DIR / name).read_bytes() - compressed = substr(name, -2) == ".z" - if not compressed and "length2" in info: - header = ord(font[0]) == 128 - if header: - # Strip first binary header - font = substr(font, 6) - if header and ord(font[info["length1"]]) == 128: - # Strip second binary header - font = substr(font, 0, info["length1"]) + substr( - font, info["length1"] + 6 - ) - - self._out(f"<>") - self._out(pdf_stream(font)) - self._out("endobj") - # Font objects flist = [(x[1]["i"], x[0], x[1]) for x in self.fonts.items()] flist.sort() for _, font_name, font in flist: self.fonts[font_name]["n"] = self.n + 1 - my_type = font["type"] - name = font["name"] # Standard font - if my_type == "core": + if font["type"] == "core": self._newobj() self._out("<>") self._out("endobj") + elif font["type"] == "TTF": + fontname = f"MPDFAA+{font['name']}" - # Additional Type1 or TrueType font - elif my_type in ("Type1", "TrueType"): - self._newobj() - self._out("<>") - self._out("endobj") + # unicode_char -> new_code_char map for chars embedded in the PDF + uni_to_new_code_char = font["subset"].dict() - # Widths - self._newobj() - self._out( - "[" - + " ".join(_char_width(font, chr(i)) for i in range(32, 256)) - + "]" + # why we delete 0-element? + del uni_to_new_code_char[0] + + # ---- FONTTOOLS SUBSETTER ---- + # recalcTimestamp=False means that it doesn't modify the "modified" timestamp in head table + # if we leave recalcTimestamp=True the tests will break every time + fonttools_font = ttLib.TTFont( + file=font["ttffile"], recalcTimestamp=False ) - self._out("endobj") - # Descriptor - self._newobj() - s = f"<>") - self._out("endobj") - elif my_type == "TTF": - self.fonts[font_name]["n"] = self.n + 1 - ttf = TTFontFile() - fontname = f"MPDFAA+{font['name']}" - subset = font["subset"].dict() - del subset[0] - ttfontstream = ttf.makeSubset(font["ttffile"], subset) + # 1. get all glyphs in PDF + cmap = fonttools_font["cmap"].getBestCmap() + glyph_names = [ + cmap[unicode] for unicode in uni_to_new_code_char if unicode in cmap + ] + + # 2. make a subset + # notdef_outline=True means that keeps the white box for the .notdef glyph + # recommended_glyphs=True means that adds the .notdef, .null, CR, and space glyphs + options = ftsubset.Options(notdef_outline=True, recommended_glyphs=True) + # dropping the tables previous dropped in the old ttfonts.py file #issue 418 + options.drop_tables += ["GDEF", "GSUB", "GPOS", "MATH", "hdmx"] + subsetter = ftsubset.Subsetter(options) + subsetter.populate(glyphs=glyph_names) + subsetter.subset(fonttools_font) + + # 3. make codeToGlyph + # is a map Character_ID -> Glyph_ID + # it's used for associating glyphs to new codes + # this basically takes the old code of the character + # take the glyph associated with it + # and then associate to the new code the glyph associated with the old code + code_to_glyph = {} + for code, new_code_mapped in uni_to_new_code_char.items(): + if code in cmap: + glyph_name = cmap[code] + code_to_glyph[new_code_mapped] = fonttools_font.getGlyphID( + glyph_name + ) + else: + # notdef is associated if no glyph was associated to the old code + # it's not necessary to do this, it seems to be done by default + code_to_glyph[new_code_mapped] = fonttools_font.getGlyphID( + ".notdef" + ) + + # 4. return the ttfile + output = BytesIO() + fonttools_font.save(output) + + output.seek(0) + ttfontstream = output.read() ttfontsize = len(ttfontstream) fontstream = zlib.compress(ttfontstream) - codeToGlyph = ttf.codeToGlyph - # del codeToGlyph[0] # Type0 Font # A composite font - a font composed of other fonts, @@ -4185,7 +4181,7 @@ def _putfonts(self): self._out(f"/FontDescriptor {pdf_ref(self.n + 3)}") if font["desc"].get("MissingWidth"): self._out(f"/DW {font['desc']['MissingWidth']}") - self._putTTfontwidths(font, ttf.maxUni) + self._putTTfontwidths(font, max(uni_to_new_code_char)) self._out(f"/CIDToGIDMap {pdf_ref(self.n + 4)}") self._out(">>") self._out("endobj") @@ -4195,9 +4191,9 @@ def _putfonts(self): # character that each used 16-bit code belongs to. It # allows searching the file and copying text from it. bfChar = [] - subset = font["subset"].dict() - for code in subset: - code_mapped = subset.get(code) + uni_to_new_code_char = font["subset"].dict() + for code in uni_to_new_code_char: + code_mapped = uni_to_new_code_char.get(code) if code > 0xFFFF: # Calculate surrogate pair code_high = 0xD800 | (code - 0x10000) >> 10 @@ -4248,39 +4244,28 @@ def _putfonts(self): self._newobj() self._out("<>") self._out("endobj") # Embed CIDToGIDMap # A specification of the mapping from CIDs to glyph indices - cidtogidmap = ["\x00"] * 256 * 256 * 2 - for cc, glyph in codeToGlyph.items(): - cidtogidmap[cc * 2] = chr(glyph >> 8) - cidtogidmap[cc * 2 + 1] = chr(glyph & 0xFF) - cidtogidmap = "".join(cidtogidmap) + cid_to_gid_map = ["\x00"] * 256 * 256 * 2 + for cc, glyph in code_to_glyph.items(): + cid_to_gid_map[cc * 2] = chr(glyph >> 8) + cid_to_gid_map[cc * 2 + 1] = chr(glyph & 0xFF) + cid_to_gid_map = "".join(cid_to_gid_map) + # manage binary data as latin1 until PEP461-like function is implemented - cidtogidmap = zlib.compress(cidtogidmap.encode("latin1")) + cid_to_gid_map = zlib.compress(cid_to_gid_map.encode("latin1")) + self._newobj() - self._out(f"<>") - self._out(pdf_stream(cidtogidmap)) + self._out(pdf_stream(cid_to_gid_map)) self._out("endobj") # Font file @@ -4291,14 +4276,6 @@ def _putfonts(self): self._out(">>") self._out(pdf_stream(fontstream)) self._out("endobj") - del ttf - else: - # Allow for additional types - mtd = f"_put{my_type.lower()}" - # check if self has a attr mtd which is callable (method) - if not callable(getattr(self, mtd, None)): - raise FPDFException(f"Unsupported font type: {my_type}") - self.mtd(font) # pylint: disable=no-member def _putTTfontwidths(self, font, maxUni): rangeid = 0 @@ -5206,14 +5183,7 @@ def _apply_style(self, title_style): def _char_width(font, char): - cw = font["cw"] - try: - width = cw[char] - except (IndexError, KeyError): - width = font.get("desc", {}).get("MissingWidth") or 500 - if width == 65535: - width = 0 - return width + return font["cw"][char] def _sizeof_fmt(num, suffix="B"): diff --git a/fpdf/line_break.py b/fpdf/line_break.py index 78e61053f..ae050a19b 100644 --- a/fpdf/line_break.py +++ b/fpdf/line_break.py @@ -47,7 +47,7 @@ def font(self, v): self.graphics_state["current_font"] = v @property - def unicode_font(self): + def is_ttf_font(self): return self.font.get("type") == "TTF" @property @@ -130,21 +130,12 @@ def get_width( precedence over the start/end arguments. """ - def char_width(char): - try: - width = self.font["cw"][char] - except (IndexError, KeyError): - width = self.font.get("desc", {}).get("MissingWidth") or 500 - if width == 65535: - return 0 - return width - if chars is None: chars = self.characters[start:end] - if self.unicode_font: - w = sum(char_width(ord(c)) for c in chars) + if self.is_ttf_font: + w = sum(self.font["cw"][ord(c)] for c in chars) else: - w = sum(char_width(c) for c in chars) + w = sum(self.font["cw"][c] for c in chars) char_spacing = self.char_spacing if self.font_stretching != 100: w *= self.font_stretching * 0.01 diff --git a/fpdf/ttfonts.py b/fpdf/ttfonts.py deleted file mode 100644 index de2660119..000000000 --- a/fpdf/ttfonts.py +++ /dev/null @@ -1,1047 +0,0 @@ -# ****************************************************************************** -# TTFontFile class -# -# This class is based on The ReportLab Open Source PDF library -# written in Python - http://www.reportlab.com/software/opensource/ -# together with ideas from the OpenOffice source code and others. -# -# Version: 1.04 -# Date: 2011-09-18 -# Author: Ian Back -# License: LGPL -# Copyright (c) Ian Back, 2010 -# Ported to Python 2.7 by Mariano Reingart (reingart@gmail.com) on 2012 -# This header must be retained in any redistribution or -# modification of the file. -# -# ****************************************************************************** - -import re -import warnings -from struct import error as StructError, pack, unpack - -from .util import b, substr - -# Define the value used in the "head" table of a created TTF file -# 0x74727565 "true" for Mac -# 0x00010000 for Windows -# Either seems to work for a font embedded in a PDF file -# when read by Adobe Reader on a Windows PC(!) -_TTF_MAC_HEADER = False - -# TrueType Font Glyph operators -GF_WORDS = 1 << 0 -GF_SCALE = 1 << 3 -GF_MORE = 1 << 5 -GF_XYSCALE = 1 << 6 -GF_TWOBYTWO = 1 << 7 - - -def sub32(x, y): - xlo = x[1] - xhi = x[0] - ylo = y[1] - yhi = y[0] - if ylo > xlo: - xlo += 1 << 16 - yhi += 1 - reslo = xlo - ylo - if yhi > xhi: - xhi += 1 << 16 - reshi = xhi - yhi - reshi = reshi & 0xFFFF - return reshi, reslo - - -def calcChecksum(data): - if len(data) % 4: - data += b("\0") * (4 - (len(data) % 4)) - hi = 0x0000 - lo = 0x0000 - for i in range(0, len(data), 4): - hi += (data[i] << 8) + data[i + 1] - lo += (data[i + 2] << 8) + data[i + 3] - hi += lo >> 16 - lo &= 0xFFFF - hi &= 0xFFFF - return hi, lo - - -class TTFontFile: - def __init__(self): - # Maximum size of glyph table to read in as string - # (otherwise reads each glyph from file) - self.maxStrLenRead = 200000 - - def getMetrics(self, file): - self.filename = file - with open(file, "rb") as self.fh: - self._pos = 0 - self.charWidths = [] - self.glyphPos = {} - self.charToGlyph = {} - self.tables = {} - self.otables = {} - self.ascent = 0 - self.descent = 0 - self.version = version = self.read_ulong() - if version == 0x4F54544F: - raise RuntimeError("Postscript outlines are not supported") - if version == 0x74746366: - raise RuntimeError("ERROR - TrueType Fonts Collections not supported") - if version not in (0x00010000, 0x74727565): - raise RuntimeError(f"Not a TrueType font: version=0x{version:x}") - self.readTableDirectory() - self.extractInfo() - - def readTableDirectory( - self, - ): - self.numTables = self.read_ushort() - self.searchRange = self.read_ushort() - self.entrySelector = self.read_ushort() - self.rangeShift = self.read_ushort() - self.tables = {} - for _ in range(self.numTables): - record = { - "tag": self.read_tag(), - "checksum": (self.read_ushort(), self.read_ushort()), - "offset": self.read_ulong(), - "length": self.read_ulong(), - } - self.tables[record["tag"]] = record - - def get_table_pos(self, tag): - offset = self.tables[tag]["offset"] - length = self.tables[tag]["length"] - return offset, length - - def seek(self, pos): - self._pos = pos - self.fh.seek(self._pos) - - def skip(self, delta): - self._pos = self._pos + delta - self.fh.seek(self._pos) - - def seek_table(self, tag, offset_in_table=0): - tpos = self.get_table_pos(tag) - self._pos = tpos[0] + offset_in_table - self.fh.seek(self._pos) - return self._pos - - def read_tag(self): - self._pos += 4 - return self.fh.read(4).decode("latin1") - - def read_short(self): - self._pos += 2 - s = self.fh.read(2) - a = (s[0] << 8) + s[1] - if a & (1 << 15): - a = a - (1 << 16) - return a - - def read_ushort(self): - self._pos += 2 - s = self.fh.read(2) - return (s[0] << 8) + s[1] - - def read_ulong(self): - self._pos += 4 - s = self.fh.read(4) - # if large uInt32 as an integer, PHP converts it to -ve - return s[0] * 16777216 + (s[1] << 16) + (s[2] << 8) + s[3] # 16777216 = 1<<24 - - def get_ushort(self, pos): - self.fh.seek(pos) - s = self.fh.read(2) - return (s[0] << 8) + s[1] - - @staticmethod - def splice(stream, offset, value): - return substr(stream, 0, offset) + value + substr(stream, offset + len(value)) - - def _set_ushort(self, stream, offset, value): - up = pack(">H", value) - return self.splice(stream, offset, up) - - def get_chunk(self, pos, length): - self.fh.seek(pos) - if length < 1: - return "" - return self.fh.read(length) - - def get_table(self, tag): - (pos, length) = self.get_table_pos(tag) - if length == 0: - raise RuntimeError( - f"Truetype font ({self.filename}): error reading table: {tag}" - ) - self.fh.seek(pos) - return self.fh.read(length) - - def add(self, tag, data): - if tag == "head": - data = self.splice(data, 8, b("\0\0\0\0")) - self.otables[tag] = data - - def extractInfo(self): - # name - Naming table - self.sFamilyClass = 0 - self.sFamilySubClass = 0 - - name_offset = self.seek_table("name") - fmt = self.read_ushort() - if fmt != 0: - raise RuntimeError(f"Unknown name table format {fmt}") - numRecords = self.read_ushort() - string_data_offset = name_offset + self.read_ushort() - names = {1: "", 2: "", 3: "", 4: "", 6: ""} - K = list(names) - nameCount = len(names) - for _ in range(numRecords): - platformId = self.read_ushort() - encodingId = self.read_ushort() - languageId = self.read_ushort() - nameId = self.read_ushort() - length = self.read_ushort() - offset = self.read_ushort() - if nameId not in K: - continue - N = "" - if ( - platformId == 3 and encodingId == 1 and languageId == 0x409 - ): # Microsoft, Unicode, US English, PS Name - opos = self._pos - self.seek(string_data_offset + offset) - if length % 2 != 0: - raise RuntimeError( - "PostScript name is UTF-16BE string of odd length" - ) - length //= 2 - N = "" - while length > 0: - char = self.read_ushort() - N += chr(char) - length -= 1 - self._pos = opos - self.seek(opos) - - elif ( - platformId == 1 and encodingId == 0 and languageId == 0 - ): # Macintosh, Roman, English, PS Name - opos = self._pos - N = self.get_chunk(string_data_offset + offset, length).decode("latin1") - self._pos = opos - self.seek(opos) - - if N and names[nameId] == "": - names[nameId] = N - nameCount -= 1 - if nameCount == 0: - break - - if names[6]: - psName = names[6] - elif names[4]: - psName = re.sub(" ", "-", names[4]) - elif names[1]: - psName = re.sub(" ", "-", names[1]) - else: - psName = "" - if not psName: - raise RuntimeError("Could not find PostScript font name") - self.name = psName - self.familyName = names[1] or psName - self.styleName = names[2] or "Regular" - self.fullName = names[4] or psName - self.uniqueFontID = names[3] or psName - if names[6]: - self.fullName = names[6] - - # head - Font header table - self.seek_table("head") - self.skip(18) - self.unitsPerEm = unitsPerEm = self.read_ushort() - scale = 1000 / unitsPerEm - self.skip(16) - xMin = self.read_short() - yMin = self.read_short() - xMax = self.read_short() - yMax = self.read_short() - self.bbox = [(xMin * scale), (yMin * scale), (xMax * scale), (yMax * scale)] - self.skip(3 * 2) - # pylint: disable=unused-variable - indexToLocFormat = self.read_ushort() - glyphDataFormat = self.read_ushort() - if glyphDataFormat != 0: - raise RuntimeError(f"Unknown glyph data format {glyphDataFormat}") - - # hhea metrics table - # ttf2t1 seems to use this value rather than the one in OS/2 - so put in for - # compatibility - if "hhea" in self.tables: - self.seek_table("hhea") - self.skip(4) - hheaAscender = self.read_short() - hheaDescender = self.read_short() - self.ascent = hheaAscender * scale - self.descent = hheaDescender * scale - - # OS/2 - OS/2 and Windows metrics table - if "OS/2" in self.tables: - self.seek_table("OS/2") - version = self.read_ushort() - self.skip(2) - usWeightClass = self.read_ushort() - self.skip(2) - fsType = self.read_ushort() - if fsType == 0x0002 or (fsType & 0x0300) != 0: - raise RuntimeError( - f"ERROR - Font file {self.filename} cannot be embedded due to copyright restrictions." - ) - - self.skip(20) - sF = self.read_short() - self.sFamilyClass = sF >> 8 - self.sFamilySubClass = sF & 0xFF - self._pos += 10 # PANOSE = 10 byte length - panose = self.fh.read(10) - self.skip(26) - sTypoAscender = self.read_short() - sTypoDescender = self.read_short() - if not self.ascent: - self.ascent = sTypoAscender * scale - if not self.descent: - self.descent = sTypoDescender * scale - if version > 1: - self.skip(16) - sCapHeight = self.read_short() - self.capHeight = sCapHeight * scale - else: - self.capHeight = self.ascent - - else: - usWeightClass = 500 - if not self.ascent: - self.ascent = yMax * scale - if not self.descent: - self.descent = yMin * scale - self.capHeight = self.ascent - - self.stemV = 50 + int(pow((usWeightClass / 65), 2)) - - # post - PostScript table - self.seek_table("post") - self.skip(4) - self.italicAngle = self.read_short() + self.read_ushort() / 65536 - self.underlinePosition = self.read_short() * scale - self.underlineThickness = self.read_short() * scale - isFixedPitch = self.read_ulong() - - self.flags = 4 - - if self.italicAngle != 0: - self.flags |= 64 - if usWeightClass >= 600: - self.flags |= 262144 - if isFixedPitch: - self.flags |= 1 - - # hhea - Horizontal header table - self.seek_table("hhea") - self.skip(32) - metricDataFormat = self.read_ushort() - if metricDataFormat != 0: - raise RuntimeError( - f"Unknown horizontal metric data format: {metricDataFormat}" - ) - numberOfHMetrics = self.read_ushort() - if numberOfHMetrics == 0: - raise RuntimeError("Number of horizontal metrics is 0") - - # maxp - Maximum profile table - self.seek_table("maxp") - self.skip(4) - numGlyphs = self.read_ushort() - - # cmap - Character to glyph index mapping table - cmap_offset = self.seek_table("cmap") - self.skip(2) - cmapTableCount = self.read_ushort() - unicode_cmap_offset = 0 - unicode_cmap_offset12 = 0 - - for _ in range(cmapTableCount): - platformID = self.read_ushort() - encodingID = self.read_ushort() - offset = self.read_ulong() - save_pos = self._pos - if platformID == 3 and encodingID == 10: # Microsoft, UCS-4 - fmt = self.get_ushort(cmap_offset + offset) - if fmt == 12: - if not unicode_cmap_offset12: - unicode_cmap_offset12 = cmap_offset + offset - break - if ( - platformID == 3 and encodingID == 1 - ) or platformID == 0: # Microsoft, Unicode - fmt = self.get_ushort(cmap_offset + offset) - if fmt == 4: - if not unicode_cmap_offset: - unicode_cmap_offset = cmap_offset + offset - # Don't break here since we might later get - # unicode_cmap_offset12 which is needed for - # characters => 0x10000 (CMAP12) - # - # break - - self.seek(save_pos) - - if not unicode_cmap_offset and not unicode_cmap_offset12: - raise RuntimeError( - f"Font ({self.filename}) does not have cmap for Unicode (platform 3, " - f"encoding 1, format 4, or platform 3, encoding 10, format 12, or " - f"platform 0, any encoding, format 4)" - ) - - glyphToChar = {} - charToGlyph = {} - if unicode_cmap_offset12: - self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph) - else: - self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph) - - # hmtx - Horizontal metrics table - self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale) - - def makeSubset(self, file, subset): - self.filename = file - with open(file, "rb") as self.fh: - self._pos = 0 - self.charWidths = [] - self.glyphPos = {} - self.charToGlyph = {} - self.tables = {} - self.otables = {} - self.ascent = 0 - self.descent = 0 - self.skip(4) - self.maxUni = 0 - self.readTableDirectory() - - # head - Font header table - self.seek_table("head") - self.skip(50) - indexToLocFormat = self.read_ushort() - # pylint: disable=unused-variable - glyphDataFormat = self.read_ushort() - - # hhea - Horizontal header table - self.seek_table("hhea") - self.skip(32) - metricDataFormat = self.read_ushort() - orignHmetrics = numberOfHMetrics = self.read_ushort() - - # maxp - Maximum profile table - self.seek_table("maxp") - self.skip(4) - numGlyphs = self.read_ushort() - - # cmap - Character to glyph index mapping table - cmap_offset = self.seek_table("cmap") - self.skip(2) - cmapTableCount = self.read_ushort() - unicode_cmap_offset = 0 - unicode_cmap_offset12 = 0 - for _ in range(cmapTableCount): - platformID = self.read_ushort() - encodingID = self.read_ushort() - offset = self.read_ulong() - save_pos = self._pos - if platformID == 3 and encodingID == 10: # Microsoft, UCS-4 - fmt = self.get_ushort(cmap_offset + offset) - if fmt == 12: - if not unicode_cmap_offset12: - unicode_cmap_offset12 = cmap_offset + offset - break - if ( - platformID == 3 and encodingID == 1 - ) or platformID == 0: # Microsoft, Unicode - fmt = self.get_ushort(cmap_offset + offset) - if fmt == 4: - unicode_cmap_offset = cmap_offset + offset - # Don't break here since we might later get - # unicode_cmap_offset12 which is needed for - # characters => 0x10000 (CMAP12) - # - # break - - self.seek(save_pos) - - if not unicode_cmap_offset and not unicode_cmap_offset12: - raise RuntimeError( - f"Font ({self.filename}) does not have cmap for Unicode " - f"(platform 3, encoding 1, format 4, or platform 3, encoding 10, " - f"format 12, or platform 0, any encoding, format 4)" - ) - - glyphToChar = {} - charToGlyph = {} - if unicode_cmap_offset12: - self.getCMAP12(unicode_cmap_offset12, glyphToChar, charToGlyph) - else: - self.getCMAP4(unicode_cmap_offset, glyphToChar, charToGlyph) - - self.charToGlyph = charToGlyph - - # hmtx - Horizontal metrics table - scale = 1 # not used - self.getHMTX(numberOfHMetrics, numGlyphs, glyphToChar, scale) - - # loca - Index to location - self.getLOCA(indexToLocFormat, numGlyphs) - - subsetglyphs = [(0, 0)] # special "sorted dict"! - subsetCharToGlyph = {} - for code in subset: - target = subset[code] if isinstance(subset, dict) else code - if target > 65535: - raise Exception( - f"Character U+{target:X} must be remapped since it cannot be indexed in CMAP4 table" - ) - if code in self.charToGlyph: - if (self.charToGlyph[code], target) not in subsetglyphs: - subsetglyphs.append( - (self.charToGlyph[code], target) - ) # Old Glyph ID => Unicode - subsetCharToGlyph[target] = self.charToGlyph[ - code - ] # Unicode to old GlyphID - self.maxUni = max(self.maxUni, code) - (start, _) = self.get_table_pos("glyf") - - subsetglyphs.sort() - glyphSet = {} - n = 0 - # maximum Unicode index (character code) in this font, according to the cmap - # subtable for platform ID 3 and platform- specific encoding ID 0 or 1. - fsLastCharIndex = 0 - for originalGlyphIdx, uni in subsetglyphs: - fsLastCharIndex = max(fsLastCharIndex, uni) - glyphSet[originalGlyphIdx] = n # old glyphID to new glyphID - n += 1 - - codeToGlyph = {} - for uni, originalGlyphIdx in sorted(subsetCharToGlyph.items()): - codeToGlyph[uni] = glyphSet[originalGlyphIdx] - - self.codeToGlyph = codeToGlyph - - for originalGlyphIdx, uni in subsetglyphs: - nonlocals = { - "start": start, - "glyphSet": glyphSet, - "subsetglyphs": subsetglyphs, - } - self.getGlyphs(originalGlyphIdx, nonlocals) - - numGlyphs = numberOfHMetrics = len(subsetglyphs) - - # tables copied from the original - tags = ["name"] - for tag in tags: - self.add(tag, self.get_table(tag)) - tags = ["cvt ", "fpgm", "prep", "gasp"] - for tag in tags: - if tag in self.tables: - self.add(tag, self.get_table(tag)) - - # post - PostScript - opost = self.get_table("post") - post = ( - b("\x00\x03\x00\x00") - + substr(opost, 4, 12) - + b("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00") - ) - self.add("post", post) - - # Sort CID2GID map into segments of contiguous codes - if 0 in codeToGlyph: - del codeToGlyph[0] - # unset(codeToGlyph[65535]) - rangeid = 0 - range_ = {} - prevcid = -2 - prevglidx = -1 - # for each character - for cid, glidx in sorted(codeToGlyph.items()): - if cid == (prevcid + 1) and glidx == (prevglidx + 1): - range_[rangeid].append(glidx) - else: - # new range - rangeid = cid - range_[rangeid] = [] - range_[rangeid].append(glidx) - prevcid = cid - prevglidx = glidx - - # cmap - Character to glyph mapping - Format 4 (MS / ) - segCount = len(range_) + 1 # + 1 Last segment has missing character 0xFFFF - searchRange = 1 - entrySelector = 0 - while searchRange * 2 <= segCount: - searchRange = searchRange * 2 - entrySelector = entrySelector + 1 - - searchRange = searchRange * 2 - rangeShift = segCount * 2 - searchRange - length = 16 + (8 * segCount) + (numGlyphs + 1) - cmap = [ - 0, - 1, # Index : version, number of encoding subtables - 3, - 1, # Encoding Subtable : platform (MS=3), encoding (Unicode) - 0, - 12, # Encoding Subtable : offset (hi,lo) - 4, - length, - 0, # Format 4 Mapping subtable: format, length, language - segCount * 2, - searchRange, - entrySelector, - rangeShift, - ] - - range_ = sorted(range_.items()) - - # endCode(s) - for start, subrange in range_: - endCode = start + (len(subrange) - 1) - cmap.append(endCode) # endCode(s) - - cmap.append(0xFFFF) # endCode of last Segment - cmap.append(0) # reservedPad - - # startCode(s) - for start, subrange in range_: - cmap.append(start) # startCode(s) - - cmap.append(0xFFFF) # startCode of last Segment - # idDelta(s) - for start, subrange in range_: - idDelta = -(start - subrange[0]) - n += len(subrange) - cmap.append(idDelta) # idDelta(s) - - cmap.append(1) # idDelta of last Segment - # idRangeOffset(s) - for subrange in range_: - # idRangeOffset[segCount] Offset in bytes to glyph indexArray, or 0 - cmap.append(0) - - cmap.append(0) # idRangeOffset of last Segment - for subrange, glidx in range_: - cmap.extend(glidx) - - cmap.append(0) # Mapping for last character - cmapstr = b("") - for cm in cmap: - if cm >= 0: - cmapstr += pack(">H", cm) - else: - try: - cmapstr += pack(">h", cm) - except StructError: - # cmap value too big to fit in a short (h), - # putting it in an unsigned short (H): - cmapstr += pack(">H", -cm) - self.add("cmap", cmapstr) - - # glyf - Glyph data - (glyfOffset, glyfLength) = self.get_table_pos("glyf") - if glyfLength < self.maxStrLenRead: - glyphData = self.get_table("glyf") - - offsets = [] - glyf = b("") - pos = 0 - - hmtxstr = b("") - maxComponentElements = 0 # number of glyphs referenced at top level - self.glyphdata = {} - - for originalGlyphIdx, uni in subsetglyphs: - # hmtx - Horizontal Metrics - hm = self.getHMetric(orignHmetrics, originalGlyphIdx) - hmtxstr += hm - - offsets.append(pos) - try: - glyphPos = self.glyphPos[originalGlyphIdx] - glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos - except IndexError: - warnings.warn(f"Missing glyph {originalGlyphIdx} in {file}") - glyphLen = 0 - - if glyfLength < self.maxStrLenRead: - data = substr(glyphData, glyphPos, glyphLen) - else: - if glyphLen > 0: - data = self.get_chunk(glyfOffset + glyphPos, glyphLen) - else: - data = b("") - - if glyphLen > 0: - up = unpack(">H", substr(data, 0, 2))[0] - if glyphLen > 2 and ( - up & (1 << 15) - ): # If number of contours <= -1 i.e. composite glyph - pos_in_glyph = 10 - flags = GF_MORE - nComponentElements = 0 - while flags & GF_MORE: - nComponentElements += ( - 1 # number of glyphs referenced at top level - ) - up = unpack(">H", substr(data, pos_in_glyph, 2)) - flags = up[0] - up = unpack(">H", substr(data, pos_in_glyph + 2, 2)) - glyphIdx = up[0] - self.glyphdata.setdefault(originalGlyphIdx, {}).setdefault( - "compGlyphs", [] - ).append(glyphIdx) - try: - data = self._set_ushort( - data, pos_in_glyph + 2, glyphSet[glyphIdx] - ) - except KeyError: - data = 0 - warnings.warn(f"Missing glyph data {glyphIdx} in {file}") - pos_in_glyph += 4 - if flags & GF_WORDS: - pos_in_glyph += 4 - else: - pos_in_glyph += 2 - if flags & GF_SCALE: - pos_in_glyph += 2 - elif flags & GF_XYSCALE: - pos_in_glyph += 4 - elif flags & GF_TWOBYTWO: - pos_in_glyph += 8 - - maxComponentElements = max(maxComponentElements, nComponentElements) - - glyf += data - pos += glyphLen - if pos % 4 != 0: - padding = 4 - (pos % 4) - glyf += b("\0") * padding - pos += padding - - offsets.append(pos) - self.add("glyf", glyf) - - # hmtx - Horizontal Metrics - self.add("hmtx", hmtxstr) - - # loca - Index to location - locastr = b("") - if ((pos + 1) >> 1) > 0xFFFF: - indexToLocFormat = 1 # long format - for offset in offsets: - locastr += pack(">L", offset) - else: - indexToLocFormat = 0 # short format - for offset in offsets: - locastr += pack(">H", offset // 2) - - self.add("loca", locastr) - - # head - Font header - head = self.get_table("head") - head = self._set_ushort(head, 50, indexToLocFormat) - self.add("head", head) - - # hhea - Horizontal Header - hhea = self.get_table("hhea") - hhea = self._set_ushort(hhea, 34, numberOfHMetrics) - self.add("hhea", hhea) - - # maxp - Maximum Profile - maxp = self.get_table("maxp") - maxp = self._set_ushort(maxp, 4, numGlyphs) - self.add("maxp", maxp) - - # OS/2 - OS/2 - os2 = self.get_table("OS/2") - self.add("OS/2", os2) - - # Put the TTF file together - stm = self.endTTFile("") - return stm - - # Recursively get composite glyphs - def getGlyphs(self, originalGlyphIdx, nonlocals): - # &start, &glyphSet, &subsetglyphs) - - try: - glyphPos = self.glyphPos[originalGlyphIdx] - glyphLen = self.glyphPos[originalGlyphIdx + 1] - glyphPos - except IndexError: - return - - if not glyphLen: - return - - self.seek(nonlocals["start"] + glyphPos) - numberOfContours = self.read_short() - if numberOfContours < 0: - self.skip(8) - flags = GF_MORE - while flags & GF_MORE: - flags = self.read_ushort() - glyphIdx = self.read_ushort() - if glyphIdx not in nonlocals["glyphSet"]: - nonlocals["glyphSet"][glyphIdx] = len( - nonlocals["subsetglyphs"] - ) # old glyphID to new glyphID - nonlocals["subsetglyphs"].append((glyphIdx, 1)) - - savepos = self.fh.tell() - self.getGlyphs(glyphIdx, nonlocals) - self.seek(savepos) - if flags & GF_WORDS: - self.skip(4) - else: - self.skip(2) - if flags & GF_SCALE: - self.skip(2) - elif flags & GF_XYSCALE: - self.skip(4) - elif flags & GF_TWOBYTWO: - self.skip(8) - - def getHMTX(self, numberOfHMetrics, numGlyphs, glyphToChar, scale): - start = self.seek_table("hmtx") - aw = 0 - self.charWidths = [] - - def resize_cw(size, default): - size = (((size + 1) // 1024) + 1) * 1024 - delta = size - len(self.charWidths) - if delta > 0: - self.charWidths += [default] * delta - - nCharWidths = 0 - if (numberOfHMetrics * 4) < self.maxStrLenRead: - data = self.get_chunk(start, (numberOfHMetrics * 4)) - arr = unpack(f">{len(data) // 2}H", data) - else: - self.seek(start) - for glyph in range(numberOfHMetrics): - if (numberOfHMetrics * 4) < self.maxStrLenRead: - aw = arr[(glyph * 2)] # PHP starts arrays from index 0!? +1 - else: - aw = self.read_ushort() - # pylint: disable=unused-variable - lsb = self.read_ushort() - - if glyph in glyphToChar or glyph == 0: - if aw >= (1 << 15): - aw = 0 # 1.03 Some (arabic) fonts have -ve values for width - # although should be unsigned value - # - comes out as e.g. 65108 (intended -50) - if glyph == 0: - self.defaultWidth = scale * aw - continue - - for char in glyphToChar[glyph]: - if char not in (0, 65535): - w = round(scale * aw + 0.001) # ROUND_HALF_UP - if w == 0: - w = 65535 - if char < 196608: - if char >= len(self.charWidths): - resize_cw(char, self.defaultWidth) - self.charWidths[char] = w - nCharWidths += 1 - - data = self.get_chunk((start + numberOfHMetrics * 4), (numGlyphs * 2)) - arr = unpack(f">{len(data) // 2}H", data) - diff = numGlyphs - numberOfHMetrics - for pos in range(diff): - glyph = pos + numberOfHMetrics - if glyph in glyphToChar: - for char in glyphToChar[glyph]: - if char not in (0, 65535): - w = round(scale * aw + 0.001) # ROUND_HALF_UP - if w == 0: - w = 65535 - if char < 196608: - if char >= len(self.charWidths): - resize_cw(char, self.defaultWidth) - self.charWidths[char] = w - nCharWidths += 1 - - # NB 65535 is a set width of 0 - # First bytes define number of chars in font - self.charWidths[0] = nCharWidths - - def getHMetric(self, numberOfHMetrics, gid): - start = self.seek_table("hmtx") - if gid < numberOfHMetrics: - self.seek(start + (gid * 4)) - hm = self.fh.read(4) - else: - self.seek(start + ((numberOfHMetrics - 1) * 4)) - hm = self.fh.read(2) - self.seek(start + (numberOfHMetrics * 2) + (gid * 2)) - hm += self.fh.read(2) - return hm - - def getLOCA(self, indexToLocFormat, numGlyphs): - try: - start = self.seek_table("loca") - except KeyError: - # pylint: disable=raise-missing-from - raise RuntimeError( - f"Unknown location table format, index={indexToLocFormat}" - ) - self.glyphPos = [] - if indexToLocFormat == 0: - data = self.get_chunk(start, (numGlyphs * 2) + 2) - arr = unpack(f">{len(data) // 2}H", data) - for n in range(numGlyphs): - self.glyphPos.append(arr[n] * 2) # n+1 !? - elif indexToLocFormat == 1: - data = self.get_chunk(start, (numGlyphs * 4) + 4) - arr = unpack(f">{len(data) // 4}L", data) - for n in range(numGlyphs): - self.glyphPos.append(arr[n]) # n+1 !? - else: - raise RuntimeError( - f"Unknown location table format, index={indexToLocFormat}" - ) - - # CMAP Format 4 - def getCMAP4(self, unicode_cmap_offset, glyphToChar, charToGlyph): - self.maxUniChar = 0 - self.seek(unicode_cmap_offset + 2) - length = self.read_ushort() - limit = unicode_cmap_offset + length - self.skip(2) - - segCount = self.read_ushort() // 2 - self.skip(6) - endCount = [] - for _ in range(segCount): - endCount.append(self.read_ushort()) - self.skip(2) - startCount = [] - for _ in range(segCount): - startCount.append(self.read_ushort()) - idDelta = [] - for _ in range(segCount): - idDelta.append(self.read_short()) # ???? was unsigned short - idRangeOffset_start = self._pos - idRangeOffset = [] - for _ in range(segCount): - idRangeOffset.append(self.read_ushort()) - - for n in range(segCount): - endpoint = endCount[n] + 1 - for unichar in range(startCount[n], endpoint, 1): - if idRangeOffset[n] == 0: - glyph = (unichar + idDelta[n]) & 0xFFFF - else: - offset = (unichar - startCount[n]) * 2 + idRangeOffset[n] - offset = idRangeOffset_start + 2 * n + offset - if offset >= limit: - glyph = 0 - else: - glyph = self.get_ushort(offset) - if glyph != 0: - glyph = (glyph + idDelta[n]) & 0xFFFF - - charToGlyph[unichar] = glyph - if unichar < 196608: - self.maxUniChar = max(unichar, self.maxUniChar) - glyphToChar.setdefault(glyph, []).append(unichar) - - # CMAP Format 12 - def getCMAP12(self, unicode_cmap_offset, glyphToChar, charToGlyph): - self.maxUniChar = 0 - # table (skip format version, should be 12) - self.seek(unicode_cmap_offset + 2) - # reserved - self.skip(2) - # table length - length = self.read_ulong() - # language (should be 0) - self.skip(4) - # groups count - grpCount = self.read_ulong() - - if 2 + 2 + 4 + 4 + 4 + grpCount * 3 * 4 > length: - raise RuntimeError("TTF format 12 cmap table too small") - for _ in range(grpCount): - startCharCode = self.read_ulong() - endCharCode = self.read_ulong() - glyph = self.read_ulong() - for unichar in range(startCharCode, endCharCode + 1): - charToGlyph[unichar] = glyph - if unichar < 196608: - self.maxUniChar = max(unichar, self.maxUniChar) - glyphToChar.setdefault(glyph, []).append(unichar) - glyph += 1 - - # Put the TTF file together - def endTTFile(self, stm): - stm = b("") - numTables = len(self.otables) - searchRange = 1 - entrySelector = 0 - while searchRange * 2 <= numTables: - searchRange *= 2 - entrySelector += 1 - - searchRange *= 16 - rangeShift = numTables * 16 - searchRange - - # Header - if _TTF_MAC_HEADER: - stm += pack( - ">LHHHH", 0x74727565, numTables, searchRange, entrySelector, rangeShift - ) # Mac - else: - stm += pack( - ">LHHHH", 0x00010000, numTables, searchRange, entrySelector, rangeShift - ) # Windows - - # Table directory - tables = self.otables - - offset = 12 + numTables * 16 - sorted_tables = sorted(tables.items()) - for tag, data in sorted_tables: - if tag == "head": - head_start = offset - stm += tag.encode("latin1") - checksum = calcChecksum(data) - stm += pack(">HH", checksum[0], checksum[1]) - stm += pack(">LL", offset, len(data)) - paddedLength = (len(data) + 3) & ~3 - offset = offset + paddedLength - - # Table data - for tag, data in sorted_tables: - data += b("\0\0\0") - stm += substr(data, 0, (len(data) & ~3)) - - checksum = calcChecksum(stm) - checksum = sub32((0xB1B0, 0xAFBA), checksum) - chk = pack(">HH", checksum[0], checksum[1]) - stm = self.splice(stm, (head_start + 8), chk) - return stm diff --git a/fpdf/util.py b/fpdf/util.py index 43e0b56e1..20f2fe066 100644 --- a/fpdf/util.py +++ b/fpdf/util.py @@ -8,12 +8,6 @@ def object_id_for_page(page): return 2 * page + 1 -def substr(s, start, length=-1): - if length < 0: - length = len(s) - start - return s[start : start + length] - - def format_date(date: datetime, with_tz=False) -> str: if with_tz: assert date.tzinfo diff --git a/setup.py b/setup.py index 7a596adac..d9618fc61 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "defusedxml", "svg.path", "Pillow>=6.2.2", # minimum version tested there: https://github.com/PyFPDF/fpdf2/actions/runs/2295868575 + "fonttools", ], classifiers=[ "Development Status :: 5 - Production/Stable", diff --git a/test/end_to_end_legacy/charmap/charmap_first_999_chars-DejaVuSans.pdf b/test/end_to_end_legacy/charmap/charmap_first_999_chars-DejaVuSans.pdf index 51f579fe5..11bd2b0b5 100644 Binary files a/test/end_to_end_legacy/charmap/charmap_first_999_chars-DejaVuSans.pdf and b/test/end_to_end_legacy/charmap/charmap_first_999_chars-DejaVuSans.pdf differ diff --git a/test/end_to_end_legacy/charmap/charmap_first_999_chars-DroidSansFallback.pdf b/test/end_to_end_legacy/charmap/charmap_first_999_chars-DroidSansFallback.pdf index 0a5eeaa86..e96f7766b 100644 Binary files a/test/end_to_end_legacy/charmap/charmap_first_999_chars-DroidSansFallback.pdf and b/test/end_to_end_legacy/charmap/charmap_first_999_chars-DroidSansFallback.pdf differ diff --git a/test/end_to_end_legacy/charmap/charmap_first_999_chars-Roboto-Regular.pdf b/test/end_to_end_legacy/charmap/charmap_first_999_chars-Roboto-Regular.pdf index 7611268a3..2c34e354d 100644 Binary files a/test/end_to_end_legacy/charmap/charmap_first_999_chars-Roboto-Regular.pdf and b/test/end_to_end_legacy/charmap/charmap_first_999_chars-Roboto-Regular.pdf differ diff --git a/test/end_to_end_legacy/charmap/charmap_first_999_chars-cmss12.pdf b/test/end_to_end_legacy/charmap/charmap_first_999_chars-cmss12.pdf index f53106119..546541c1a 100644 Binary files a/test/end_to_end_legacy/charmap/charmap_first_999_chars-cmss12.pdf and b/test/end_to_end_legacy/charmap/charmap_first_999_chars-cmss12.pdf differ diff --git a/test/end_to_end_legacy/charmap/test_charmap.py b/test/end_to_end_legacy/charmap/test_charmap.py index 80411ffa1..f7d559855 100644 --- a/test/end_to_end_legacy/charmap/test_charmap.py +++ b/test/end_to_end_legacy/charmap/test_charmap.py @@ -14,30 +14,12 @@ import pytest import fpdf -from fpdf.ttfonts import TTFontFile +from fontTools import ttLib from test.conftest import assert_pdf_equal HERE = Path(__file__).resolve().parent -class MyTTFontFile(TTFontFile): - """MyTTFontFile docstring - - I clearly have no idea what this does. It'd be great if this class were - even a little bit better documented, so that it would be clearer what this - test is testing, otherwise this test isn't clearly testing one class or the - other. - """ - - def getCMAP4(self, unicode_cmap_offset, glyphToChar, charToGlyph): - TTFontFile.getCMAP4(self, unicode_cmap_offset, glyphToChar, charToGlyph) - self.saveChar = charToGlyph - - def getCMAP12(self, unicode_cmap_offset, glyphToChar, charToGlyph): - TTFontFile.getCMAP12(self, unicode_cmap_offset, glyphToChar, charToGlyph) - self.saveChar = charToGlyph - - @pytest.mark.parametrize( "font_filename", ["DejaVuSans.ttf", "DroidSansFallback.ttf", "Roboto-Regular.ttf", "cmss12.ttf"], @@ -51,11 +33,11 @@ def test_first_999_chars(font_filename, tmp_path): pdf.add_font(font_name, fname=font_path) pdf.set_font(font_name, size=10) - ttf = MyTTFontFile() - ttf.getMetrics(font_path) + font = ttLib.TTFont(font_path) + cmap = font.getBestCmap() # Create a PDF with the first 999 charters defined in the font: - for counter, character in enumerate(ttf.saveChar, 0): + for counter, character in enumerate(cmap, 0): pdf.write(8, f"{counter:03}) {character:03x} - {character:c}", print_sh=True) pdf.ln() if counter >= 999: diff --git a/test/fonts/Quicksand-Bold.otf b/test/fonts/Quicksand-Bold.otf new file mode 100644 index 000000000..fc7384b5d Binary files /dev/null and b/test/fonts/Quicksand-Bold.otf differ diff --git a/test/fonts/Quicksand-Italic.otf b/test/fonts/Quicksand-Italic.otf new file mode 100644 index 000000000..ad33f699f Binary files /dev/null and b/test/fonts/Quicksand-Italic.otf differ diff --git a/test/fonts/add_font_unicode.pdf b/test/fonts/add_font_unicode.pdf index e20ac11d1..6aeb23a74 100644 Binary files a/test/fonts/add_font_unicode.pdf and b/test/fonts/add_font_unicode.pdf differ diff --git a/test/fonts/fonts_emoji_glyph.pdf b/test/fonts/fonts_emoji_glyph.pdf index 645f43f84..2ae6c7855 100644 Binary files a/test/fonts/fonts_emoji_glyph.pdf and b/test/fonts/fonts_emoji_glyph.pdf differ diff --git a/test/fonts/fonts_otf.pdf b/test/fonts/fonts_otf.pdf new file mode 100644 index 000000000..4da5e8107 Binary files /dev/null and b/test/fonts/fonts_otf.pdf differ diff --git a/test/fonts/fonts_remap_nb.pdf b/test/fonts/fonts_remap_nb.pdf index 7c9fdacee..337bd7270 100644 Binary files a/test/fonts/fonts_remap_nb.pdf and b/test/fonts/fonts_remap_nb.pdf differ diff --git a/test/fonts/fonts_two_mappings.pdf b/test/fonts/fonts_two_mappings.pdf index 3997e0e27..5f468f3c2 100644 Binary files a/test/fonts/fonts_two_mappings.pdf and b/test/fonts/fonts_two_mappings.pdf differ diff --git a/test/fonts/render_en_dash.pdf b/test/fonts/render_en_dash.pdf index fe8d36aff..171f82ec2 100644 Binary files a/test/fonts/render_en_dash.pdf and b/test/fonts/render_en_dash.pdf differ diff --git a/test/fonts/test_add_font.py b/test/fonts/test_add_font.py index 87d4fc2d4..a5da17d17 100644 --- a/test/fonts/test_add_font.py +++ b/test/fonts/test_add_font.py @@ -90,9 +90,18 @@ def test_render_en_dash(tmp_path): # issue-166 assert_pdf_equal(pdf, HERE / "render_en_dash.pdf", tmp_path) -def test_add_font_otf(): +def test_add_font_otf(tmp_path): pdf = FPDF() - font_file_path = HERE / "Quicksand-Regular.otf" - with pytest.raises(RuntimeError) as error: - pdf.add_font("Quicksand", fname=font_file_path) - assert str(error.value) == "Postscript outlines are not supported" + pdf.add_page() + pdf.add_font("Quicksand", style="", fname=HERE / "Quicksand-Regular.otf") + pdf.add_font("Quicksand", style="B", fname=HERE / "Quicksand-Bold.otf") + pdf.add_font("Quicksand", style="I", fname=HERE / "Quicksand-Italic.otf") + pdf.set_font("Quicksand", size=32) + text = ( + "Lorem ipsum dolor, **consectetur adipiscing** elit," + " eiusmod __tempor incididunt__ ut labore et dolore --magna aliqua--." + ) + pdf.multi_cell(w=pdf.epw, txt=text, markdown=True) + pdf.ln() + pdf.multi_cell(w=pdf.epw, txt=text, markdown=True, align="L") + assert_pdf_equal(pdf, HERE / "fonts_otf.pdf", tmp_path) diff --git a/test/fonts/thai_text.pdf b/test/fonts/thai_text.pdf index 29125ce07..d6cfb9016 100644 Binary files a/test/fonts/thai_text.pdf and b/test/fonts/thai_text.pdf differ diff --git a/test/html/html_heading_hebrew.pdf b/test/html/html_heading_hebrew.pdf index d41112519..74959777e 100644 Binary files a/test/html/html_heading_hebrew.pdf and b/test/html/html_heading_hebrew.pdf differ diff --git a/test/html/issue_156.pdf b/test/html/issue_156.pdf index 950f3ddc8..0bbed9b7a 100644 Binary files a/test/html/issue_156.pdf and b/test/html/issue_156.pdf differ diff --git a/test/outline/russian_heading.pdf b/test/outline/russian_heading.pdf index 0d6cf2d66..44913b8c9 100644 Binary files a/test/outline/russian_heading.pdf and b/test/outline/russian_heading.pdf differ diff --git a/test/text/cell_curfont_leak.pdf b/test/text/cell_curfont_leak.pdf index 6e7069e94..9e59596d8 100644 Binary files a/test/text/cell_curfont_leak.pdf and b/test/text/cell_curfont_leak.pdf differ diff --git a/test/text/cell_markdown_right_aligned.pdf b/test/text/cell_markdown_right_aligned.pdf index dd239d30d..f4085442d 100644 Binary files a/test/text/cell_markdown_right_aligned.pdf and b/test/text/cell_markdown_right_aligned.pdf differ diff --git a/test/text/cell_markdown_with_ttf_fonts.pdf b/test/text/cell_markdown_with_ttf_fonts.pdf index 496bd27f6..d14051d4f 100644 Binary files a/test/text/cell_markdown_with_ttf_fonts.pdf and b/test/text/cell_markdown_with_ttf_fonts.pdf differ diff --git a/test/text/multi_cell_char_spacing.pdf b/test/text/multi_cell_char_spacing.pdf index 2118000d9..ff65b8b11 100644 Binary files a/test/text/multi_cell_char_spacing.pdf and b/test/text/multi_cell_char_spacing.pdf differ diff --git a/test/text/multi_cell_font_leakage.pdf b/test/text/multi_cell_font_leakage.pdf index a785c9497..b5944401c 100644 Binary files a/test/text/multi_cell_font_leakage.pdf and b/test/text/multi_cell_font_leakage.pdf differ diff --git a/test/text/multi_cell_font_stretching.pdf b/test/text/multi_cell_font_stretching.pdf index 51290a0e4..aaafaaa6b 100644 Binary files a/test/text/multi_cell_font_stretching.pdf and b/test/text/multi_cell_font_stretching.pdf differ diff --git a/test/text/multi_cell_j_paragraphs.pdf b/test/text/multi_cell_j_paragraphs.pdf index 1c276006c..caa70f4d1 100644 Binary files a/test/text/multi_cell_j_paragraphs.pdf and b/test/text/multi_cell_j_paragraphs.pdf differ diff --git a/test/text/multi_cell_markdown_with_ttf_fonts.pdf b/test/text/multi_cell_markdown_with_ttf_fonts.pdf index 3178c2bd7..f9995ece9 100644 Binary files a/test/text/multi_cell_markdown_with_ttf_fonts.pdf and b/test/text/multi_cell_markdown_with_ttf_fonts.pdf differ diff --git a/test/text/test_line_break.py b/test/text/test_line_break.py index 2354e3cf6..bbc1b0773 100644 --- a/test/text/test_line_break.py +++ b/test/text/test_line_break.py @@ -36,9 +36,9 @@ def test_fragment_properties(): f"frag.font ({frag.font['name']}/{frag.font['fontkey']})" f" != pdf.current_font ({pdf.current_font['name']}/{pdf.current_font['fontkey']})" ) - assert frag.unicode_font == pdf.unifontsubset, ( - f"frag.unicode_font ({frag.unicode_font})" - f" != pdf.unifontsubset ({pdf.unifontsubset})" + assert frag.is_ttf_font == pdf.is_ttf_font, ( + f"frag.is_ttf_font ({frag.is_ttf_font})" + f" != pdf.unifontsubset ({pdf.is_ttf_font})" ) assert frag.font_family == pdf.font_family, ( f"frag.font_family ({frag.font_family})" diff --git a/test/text/test_multi_cell_justified_with_unicode_font.pdf b/test/text/test_multi_cell_justified_with_unicode_font.pdf index 69148568f..6a3eb7a2b 100644 Binary files a/test/text/test_multi_cell_justified_with_unicode_font.pdf and b/test/text/test_multi_cell_justified_with_unicode_font.pdf differ diff --git a/test/text/text_positioning.pdf b/test/text/text_positioning.pdf index 673858ed7..afaa7526d 100644 Binary files a/test/text/text_positioning.pdf and b/test/text/text_positioning.pdf differ diff --git a/test/text/varfrags_fonts.pdf b/test/text/varfrags_fonts.pdf index 5f00114bb..a45c3ac5f 100644 Binary files a/test/text/varfrags_fonts.pdf and b/test/text/varfrags_fonts.pdf differ diff --git a/test/text/write_font_stretching.pdf b/test/text/write_font_stretching.pdf index e7f42373b..4d15c29ed 100644 Binary files a/test/text/write_font_stretching.pdf and b/test/text/write_font_stretching.pdf differ