From fafe2f29158051eeff17c55374df4488b2344f7d Mon Sep 17 00:00:00 2001 From: Serge Titov Date: Wed, 10 May 2023 13:37:03 +0600 Subject: [PATCH] Reduce excessive substring allocation, using preg_match's offset parameter (#595) * use preg_match offset * clean code * fix codestyle * preg_match returns false just on error --- src/Smalot/PdfParser/Element.php | 2 +- src/Smalot/PdfParser/PDFObject.php | 69 ++++++++++++++++++++++++------ 2 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/Smalot/PdfParser/Element.php b/src/Smalot/PdfParser/Element.php index 4acfcf8b..bdd4afc3 100644 --- a/src/Smalot/PdfParser/Element.php +++ b/src/Smalot/PdfParser/Element.php @@ -107,7 +107,7 @@ public static function parse(string $content, ?Document $document = null, int &$ $old_position = $position; if (!$only_values) { - if (!preg_match('/^\s*(?P\/[A-Z0-9\._]+)(?P.*)/si', substr($content, $position), $match)) { + if (!preg_match('/\G\s*(?P\/[A-Z0-9\._]+)(?P.*)/si', $content, $match, 0, $position)) { break; } else { $name = ltrim($match['name'], '/'); diff --git a/src/Smalot/PdfParser/PDFObject.php b/src/Smalot/PdfParser/PDFObject.php index 57c0f898..d038702d 100644 --- a/src/Smalot/PdfParser/PDFObject.php +++ b/src/Smalot/PdfParser/PDFObject.php @@ -592,18 +592,22 @@ public function getCommandsText(string $text_part, int &$offset = 0): array case '/': $type = $char; if (preg_match( - '/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si', - substr($text_part, $offset), - $matches + '/\G\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si', + $text_part, + $matches, + 0, + $offset ) ) { $operator = $matches[2]; $command = $matches[1]; $offset += \strlen($matches[0]); } elseif (preg_match( - '/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si', - substr($text_part, $offset), - $matches + '/\G\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si', + $text_part, + $matches, + 0, + $offset ) ) { $operator = $matches[2]; @@ -621,7 +625,14 @@ public function getCommandsText(string $text_part, int &$offset = 0): array // get elements $command = $this->getCommandsText($text_part, $offset); - if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) { + if (preg_match( + '/\G\s*[A-Z]{1,2}\s*/si', + $text_part, + $matches, + 0, + $offset + ) + ) { $operator = trim($matches[0]); $offset += \strlen($matches[0]); } @@ -642,7 +653,14 @@ public function getCommandsText(string $text_part, int &$offset = 0): array $offset = $strpos + 1; } - if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) { + if (preg_match( + '/\G\s*[A-Z]{1,2}\s*/si', + $text_part, + $matches, + 0, + $offset + ) + ) { $operator = trim($matches[0]); $offset += \strlen($matches[0]); } @@ -682,7 +700,14 @@ public function getCommandsText(string $text_part, int &$offset = 0): array $command = substr($text_part, $offset, $strpos - $offset - 1); $offset = $strpos; - if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) { + if (preg_match( + '/\G\s*([A-Z\']{1,2})\s*/si', + $text_part, + $matches, + 0, + $offset + ) + ) { $operator = $matches[1]; $offset += \strlen($matches[0]); } @@ -693,19 +718,35 @@ public function getCommandsText(string $text_part, int &$offset = 0): array if ('ET' == substr($text_part, $offset, 2)) { break; } elseif (preg_match( - '/^\s*(?P([0-9\.\-]+\s*?)+)\s+(?P[A-Z]{1,3})\s*/si', - substr($text_part, $offset), - $matches + '/\G\s*(?P([0-9\.\-]+\s*?)+)\s+(?P[A-Z]{1,3})\s*/si', + $text_part, + $matches, + 0, + $offset ) ) { $operator = trim($matches['id']); $command = trim($matches['data']); $offset += \strlen($matches[0]); - } elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) { + } elseif (preg_match( + '/\G\s*([0-9\.\-]+\s*?)+\s*/si', + $text_part, + $matches, + 0, + $offset + ) + ) { $type = 'n'; $command = trim($matches[0]); $offset += \strlen($matches[0]); - } elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) { + } elseif (preg_match( + '/\G\s*([A-Z\*]+)\s*/si', + $text_part, + $matches, + 0, + $offset + ) + ) { $type = ''; $operator = $matches[1]; $command = '';