Skip to content

Commit

Permalink
Reduce excessive substring allocation, using preg_match's offset para…
Browse files Browse the repository at this point in the history
…meter (#595)

* use preg_match offset

* clean code

* fix codestyle

* preg_match returns false just on error
  • Loading branch information
se-ti committed May 10, 2023
1 parent 9094d77 commit fafe2f2
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/Smalot/PdfParser/Element.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ public static function parse(string $content, ?Document $document = null, int &$
$old_position = $position;

if (!$only_values) {
if (!preg_match('/^\s*(?P<name>\/[A-Z0-9\._]+)(?P<value>.*)/si', substr($content, $position), $match)) {
if (!preg_match('/\G\s*(?P<name>\/[A-Z0-9\._]+)(?P<value>.*)/si', $content, $match, 0, $position)) {
break;
} else {
$name = ltrim($match['name'], '/');
Expand Down
69 changes: 55 additions & 14 deletions src/Smalot/PdfParser/PDFObject.php
Original file line number Diff line number Diff line change
Expand Up @@ -592,18 +592,22 @@ public function getCommandsText(string $text_part, int &$offset = 0): array
case '/':
$type = $char;
if (preg_match(
'/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
'/\G\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
$text_part,
$matches,
0,
$offset
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
} elseif (preg_match(
'/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
'/\G\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
$text_part,
$matches,
0,
$offset
)
) {
$operator = $matches[2];
Expand All @@ -621,7 +625,14 @@ public function getCommandsText(string $text_part, int &$offset = 0): array
// get elements
$command = $this->getCommandsText($text_part, $offset);

if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
if (preg_match(
'/\G\s*[A-Z]{1,2}\s*/si',
$text_part,
$matches,
0,
$offset
)
) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
}
Expand All @@ -642,7 +653,14 @@ public function getCommandsText(string $text_part, int &$offset = 0): array
$offset = $strpos + 1;
}

if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
if (preg_match(
'/\G\s*[A-Z]{1,2}\s*/si',
$text_part,
$matches,
0,
$offset
)
) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
}
Expand Down Expand Up @@ -682,7 +700,14 @@ public function getCommandsText(string $text_part, int &$offset = 0): array
$command = substr($text_part, $offset, $strpos - $offset - 1);
$offset = $strpos;

if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
if (preg_match(
'/\G\s*([A-Z\']{1,2})\s*/si',
$text_part,
$matches,
0,
$offset
)
) {
$operator = $matches[1];
$offset += \strlen($matches[0]);
}
Expand All @@ -693,19 +718,35 @@ public function getCommandsText(string $text_part, int &$offset = 0): array
if ('ET' == substr($text_part, $offset, 2)) {
break;
} elseif (preg_match(
'/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
substr($text_part, $offset),
$matches
'/\G\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
$text_part,
$matches,
0,
$offset
)
) {
$operator = trim($matches['id']);
$command = trim($matches['data']);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
} elseif (preg_match(
'/\G\s*([0-9\.\-]+\s*?)+\s*/si',
$text_part,
$matches,
0,
$offset
)
) {
$type = 'n';
$command = trim($matches[0]);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
} elseif (preg_match(
'/\G\s*([A-Z\*]+)\s*/si',
$text_part,
$matches,
0,
$offset
)
) {
$type = '';
$operator = $matches[1];
$command = '';
Expand Down

0 comments on commit fafe2f2

Please sign in to comment.