From 4455b0d6bcc4b1feb2a3f4b18166514be1c97419 Mon Sep 17 00:00:00 2001 From: Connum Date: Fri, 25 Sep 2020 22:12:23 +0200 Subject: [PATCH 1/3] process hexadecimal strings containing line breaks, but strip line breaks first (fix #273) --- src/Smalot/PdfParser/Font.php | 4 +++- tests/Integration/FontTest.php | Bin 10604 -> 11446 bytes 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Smalot/PdfParser/Font.php b/src/Smalot/PdfParser/Font.php index f5bfdf89..73337418 100644 --- a/src/Smalot/PdfParser/Font.php +++ b/src/Smalot/PdfParser/Font.php @@ -248,7 +248,9 @@ public static function decodeHexadecimal($hexa, $add_braces = false) $parts = preg_split('/(<[a-f0-9]+>)/si', $hexa, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); foreach ($parts as $part) { - if (preg_match('/^<.*>$/', $part) && false === stripos($part, '$/s', $part) && false === stripos($part, ''); if ($add_braces) { $text .= '('; diff --git a/tests/Integration/FontTest.php b/tests/Integration/FontTest.php index 1873f39ee54760b43ad1c97dfe8e1d9ea87a8fd4..6e9c3a21a41f196aefaec021c1a534c6133d88fc 100644 GIT binary patch delta 801 zcmZvX!AiqG5QY&v6}(pHr9<>m5VD)>CT*%7J$UpW2)U$9+5~G_k`4BjhsZN{7X+Wm zR}h@t*-Bely8Jsk|NqV8=jr?9GaR-(us_#@JliH|HZR_i71XLK$|bxPDu)DCMVY~@ z%92;8^L4XIp!g^9v!B~KGfOqZXs7&WDFUF`d zJer9<#I#IIs9}a^AT}5Rbckycn)O}i5CY8IX5>zF4tqO>9(pF From 20c9f92ab4c296bf663bd4a4612c20ef56c1ffd2 Mon Sep 17 00:00:00 2001 From: Connum Date: Mon, 28 Sep 2020 10:13:09 +0200 Subject: [PATCH 2/3] remove binary symbold from test data string --- tests/Integration/FontTest.php | Bin 11446 -> 11445 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/Integration/FontTest.php b/tests/Integration/FontTest.php index 6e9c3a21a41f196aefaec021c1a534c6133d88fc..564bfbe7cb3aebde313b84d07227dcd4abd0628e 100644 GIT binary patch delta 12 TcmdlMxixZwuiR!oxePu4B-jMb delta 14 VcmdlQxh-;ouN))8W?#7sJ^(9A1kL~e From 8d42c4e031b858161c323235198fa4af987975d8 Mon Sep 17 00:00:00 2001 From: Connum Date: Wed, 30 Sep 2020 10:28:21 +0200 Subject: [PATCH 3/3] code linting --- src/Smalot/PdfParser/Font.php | 2 +- tests/Integration/FontTest.php | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Smalot/PdfParser/Font.php b/src/Smalot/PdfParser/Font.php index 40c530f9..415fbcc1 100644 --- a/src/Smalot/PdfParser/Font.php +++ b/src/Smalot/PdfParser/Font.php @@ -250,7 +250,7 @@ public static function decodeHexadecimal($hexa, $add_braces = false) foreach ($parts as $part) { if (preg_match('/^<.*>$/s', $part) && false === stripos($part, ''); if ($add_braces) { $text .= '('; diff --git a/tests/Integration/FontTest.php b/tests/Integration/FontTest.php index 564bfbe7..3d4c79e7 100644 --- a/tests/Integration/FontTest.php +++ b/tests/Integration/FontTest.php @@ -240,7 +240,6 @@ public function testDecodeHexadecimal() // addressing issue #273: https://github.com/smalot/pdfparser/issues/273 $hexa = "<0027004c0056005300520051004c0045004c004f004c005d0044006f006d0052001d000300560048005b00570044001000490048004c00550044000f0003001400170003004700480003004900480059004800550048004c00550052000300470048000300\n15001300150013>"; $this->assertEquals("\x0\x27\x0\x4c\x0\x56\x0\x53\x0\x52\x0\x51\x0\x4c\x0\x45\x0\x4c\x0\x4f\x0\x4c\x0\x5d\x0\x44\x0\x6f\x0\x6d\x0\x52\x0\x1d\x0\x3\x0\x56\x0\x48\x0\x5b\x0\x57\x0\x44\x0\x10\x0\x49\x0\x48\x0\x4c\x0\x55\x0\x44\x0\xf\x0\x3\x0\x14\x0\x17\x0\x3\x0\x47\x0\x48\x0\x3\x0\x49\x0\x48\x0\x59\x0\x48\x0\x55\x0\x48\x0\x4c\x0\x55\x0\x52\x0\x3\x0\x47\x0\x48\x0\x3\x0\x15\x0\x13\x0\x15\x0\x13", Font::decodeHexadecimal($hexa)); - } public function testDecodeOctal()