Skip to content

Commit

Permalink
Merge pull request #119 from tedious/issue_68
Browse files Browse the repository at this point in the history
Regex Refactoring, Removal of Buffer
  • Loading branch information
tedivm authored Mar 5, 2023
2 parents dfe6fc8 + 552a46c commit 9db60b1
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 34 deletions.
14 changes: 11 additions & 3 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" bootstrap="./tests/bootstrap.php" colors="true" xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd" cacheDirectory=".phpunit.cache">
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd"
cacheDirectory=".phpunit.cache"
failOnWarning="true"
colors="true"
bootstrap="./tests/bootstrap.php"
displayDetailsOnTestsThatTriggerDeprecations="true"
displayDetailsOnTestsThatTriggerWarnings="true"
displayDetailsOnTestsThatTriggerErrors="true">
<coverage>
<include>
<directory suffix=".php">./src/JShrink/</directory>
</include>
<report>
<text outputFile="php://stdout" showUncoveredFiles="false"/>
<text outputFile="php://stdout" showUncoveredFiles="false" />
</report>
</coverage>
<testsuites>
Expand All @@ -19,5 +27,5 @@
<group>development</group>
</exclude>
</groups>
<logging/>
<logging />
</phpunit>
125 changes: 94 additions & 31 deletions src/JShrink/Minifier.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ class Minifier
*/
protected $c;

/**
* This character is only active when certain look ahead actions take place.
*
* @var string
*/
protected $last_char;

/**
* This character is only active when certain look ahead actions take place.
*
* @var string
*/
protected $output;

/**
* Contains the options for the current minification process.
*
Expand All @@ -95,6 +109,9 @@ class Minifier
*/
protected static $defaultOptions = ['flaggedComments' => true];


protected static $keywords = ["delete", "do", "for", "in", "instanceof", "return", "typeof", "yield"];

/**
* Contains lock ids which are used to replace certain code patterns and
* prevent them from being minified
Expand All @@ -115,17 +132,11 @@ class Minifier
public static function minify($js, $options = [])
{
try {
ob_start();

$jshrink = new Minifier();
$js = $jshrink->lock($js);
$jshrink->minifyDirectToOutput($js, $options);

// Sometimes there's a leading new line, so we trim that out here.
$js = ltrim(ob_get_clean());
$js = ltrim($jshrink->minifyToString($js, $options));
$js = $jshrink->unlock($js);
unset($jshrink);

return $js;
} catch (\Exception $e) {
if (isset($jshrink)) {
Expand All @@ -134,9 +145,6 @@ public static function minify($js, $options = [])
$jshrink->clean();
unset($jshrink);
}

// without this call things get weird, with partially outputted js.
ob_end_clean();
throw $e;
}
}
Expand All @@ -148,11 +156,12 @@ public static function minify($js, $options = [])
* @param string $js The raw javascript to be minified
* @param array $options Various runtime options in an associative array
*/
protected function minifyDirectToOutput($js, $options)
protected function minifyToString($js, $options)
{
$this->initialize($js, $options);
$this->loop();
$this->clean();
return $this->output;
}

/**
Expand All @@ -177,7 +186,9 @@ protected function initialize($js, $options)
// Populate "a" with a new line, "b" with the first character, before
// entering the loop
$this->a = "\n";
$this->b = $this->getReal();
$this->b = "\n";
$this->last_char = "\n";
$this->output = "";
}

/**
Expand All @@ -192,6 +203,14 @@ protected function initialize($js, $options)
'[' => true,
'@' => true];


protected function echo($char) {
echo($char);
$this->output .= $char;
$this->last_char = $char[-1];
}


/**
* The primary action occurs here. This function loops through the input string,
* outputting anything that's relevant and discarding anything that is not.
Expand All @@ -201,10 +220,11 @@ protected function loop()
while ($this->a !== false && !is_null($this->a) && $this->a !== '') {
switch ($this->a) {
// new lines
case "\r":
case "\n":
// if the next line is something that can't stand alone preserve the newline
if ($this->b !== false && isset($this->noNewLineCharacters[$this->b])) {
echo $this->a;
$this->echo($this->a);
$this->saveString();
break;
}
Expand All @@ -220,22 +240,23 @@ protected function loop()
// no break
case ' ':
if (static::isAlphaNumeric($this->b)) {
echo $this->a;
$this->echo($this->a);
}

$this->saveString();
break;

default:
switch ($this->b) {
case "\r":
case "\n":
if (strpos('}])+-"\'', $this->a) !== false) {
echo $this->a;
$this->echo($this->a);
$this->saveString();
break;
} else {
if (static::isAlphaNumeric($this->a)) {
echo $this->a;
$this->echo($this->a);
$this->saveString();
}
}
Expand All @@ -254,7 +275,7 @@ protected function loop()
continue 3;
}

echo $this->a;
$this->echo($this->a);
$this->saveString();
break;
}
Expand All @@ -263,9 +284,20 @@ protected function loop()
// do reg check of doom
$this->b = $this->getReal();

if (($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) {
$this->saveRegex();
if ($this->b == '/') {
$valid_tokens = "(,=:[!&|?\n";
if (strpos($valid_tokens, $this->last_char) !== false || strpos($valid_tokens, $this->a) !== false) {
// Regex can appear unquoted after these symbols
$this->saveRegex();
} else if ($this->endsInKeyword()) {
// This block checks for the "return" token before the slash.
$this->saveRegex();
}
}

// if (($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) {
// $this->saveRegex();
// }
}
}

Expand Down Expand Up @@ -332,8 +364,25 @@ protected function getChar()
*/
protected function peek()
{
# Pull the next character but don't push the index.
return $this->index < $this->len ? $this->input[$this->index] : false;
if ($this->index >= $this->len) {
return false;
}

$char = $this->input[$this->index];
# Convert all line endings to unix standard.
# `\r\n` converts to `\n\n` and is minified.
if ($char == "\r") {
$char = "\n";
}

// Normalize all whitespace except for the newline character into a
// standard space.
if ($char !== "\n" && $char < "\x20") {
return ' ';
}

# Return the next character but don't push the index.
return $char;
}

/**
Expand Down Expand Up @@ -428,17 +477,17 @@ protected function processMultiLineComments($startIndex)
// If conditional comments or flagged comments are not the first thing in the script
// we need to echo a and fill it with a space before moving on.
if ($startIndex > 0) {
echo $this->a;
$this->echo($this->a);
$this->a = " ";

// If the comment started on a new line we let it stay on the new line
if ($this->input[($startIndex - 1)] === "\n") {
echo "\n";
$this->echo("\n");
}
}

$endPoint = ($this->index - 1) - $startIndex;
echo substr($this->input, $startIndex, $endPoint);
$this->echo(substr($this->input, $startIndex, $endPoint));

$this->c = $char;

Expand Down Expand Up @@ -504,7 +553,7 @@ protected function saveString()
$stringType = $this->a;

// Echo out that starting quote
echo $this->a;
$this->echo($this->a);

// Loop until the string is done
// Grab the very next character and load it into a
Expand All @@ -523,7 +572,7 @@ protected function saveString()
// block below.
case "\n":
if ($stringType === '`') {
echo $this->a;
$this->echo($this->a);
} else {
throw new \RuntimeException('Unclosed string at position: ' . $startpos);
}
Expand All @@ -543,14 +592,14 @@ protected function saveString()
}

// echo out the escaped character and restart the loop.
echo $this->a . $this->b;
$this->echo($this->a . $this->b);
break;


// Since we're not dealing with any special cases we simply
// output the character and continue our loop.
default:
echo $this->a;
$this->echo($this->a);
}
}
}
Expand All @@ -563,23 +612,23 @@ protected function saveString()
*/
protected function saveRegex()
{
echo $this->a . $this->b;
$this->echo($this->a . $this->b);

while (($this->a = $this->getChar()) !== false) {
if ($this->a === '/') {
break;
}

if ($this->a === '\\') {
echo $this->a;
$this->echo($this->a);
$this->a = $this->getChar();
}

if ($this->a === "\n") {
throw new \RuntimeException('Unclosed regex pattern at position: ' . $this->index);
}

echo $this->a;
$this->echo($this->a);
}
$this->b = $this->getReal();
}
Expand All @@ -595,6 +644,20 @@ protected static function isAlphaNumeric($char)
return preg_match('/^[\w\$\pL]$/', $char) === 1 || $char == '/';
}

protected function endsInKeyword() {
foreach(static::$keywords as $keyword) {
if (str_ends_with($this->output, $keyword)) {
return true;
}
if (str_ends_with($this->output, $keyword . " ")) {
return true;
}
}
return false;
}



/**
* Replace patterns in the given string and store the replacement
*
Expand Down
3 changes: 3 additions & 0 deletions tests/Resources/jshrink/input/regex_keywords.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
return /'/

typeof /'/
7 changes: 7 additions & 0 deletions tests/Resources/jshrink/input/regex_spaces.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
function airplaneIsCarrierBased (model) {
return /^(FI-167|Swordfish|Fulmar|Firefly|F4F Wildcat|F6F-[35] Hellcat|Latécoère 298|A[567]M)$/.test(
model
)
}

console.log(airplaneIsCarrierBased('F6F-5 Hellcat'))
1 change: 1 addition & 0 deletions tests/Resources/jshrink/input/regex_with_quote.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/^(")$/
3 changes: 3 additions & 0 deletions tests/Resources/jshrink/input/regex_with_quote_real.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
function test (input) {
return /^(אחה"צ|אחרי הצהריים|בערב)$/.test(input)
}
2 changes: 2 additions & 0 deletions tests/Resources/jshrink/output/regex_keywords.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
return /'/
typeof /'/
2 changes: 2 additions & 0 deletions tests/Resources/jshrink/output/regex_spaces.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
function airplaneIsCarrierBased(model){return /^(FI-167|Swordfish|Fulmar|Firefly|F4F Wildcat|F6F-[35] Hellcat|Latécoère 298|A[567]M)$/.test(model)}
console.log(airplaneIsCarrierBased('F6F-5 Hellcat'))
1 change: 1 addition & 0 deletions tests/Resources/jshrink/output/regex_with_quote.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/^(")$/
1 change: 1 addition & 0 deletions tests/Resources/jshrink/output/regex_with_quote_real.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
function test(input){return /^(אחה"צ|אחרי הצהריים|בערב)$/.test(input)}

0 comments on commit 9db60b1

Please sign in to comment.