Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regex Refactoring, Removal of Buffer #119

Merged
merged 2 commits into from
Mar 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions phpunit.xml.dist
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" bootstrap="./tests/bootstrap.php" colors="true" xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd" cacheDirectory=".phpunit.cache">
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/10.0/phpunit.xsd"
cacheDirectory=".phpunit.cache"
failOnWarning="true"
colors="true"
bootstrap="./tests/bootstrap.php"
displayDetailsOnTestsThatTriggerDeprecations="true"
displayDetailsOnTestsThatTriggerWarnings="true"
displayDetailsOnTestsThatTriggerErrors="true">
<coverage>
<include>
<directory suffix=".php">./src/JShrink/</directory>
</include>
<report>
<text outputFile="php://stdout" showUncoveredFiles="false"/>
<text outputFile="php://stdout" showUncoveredFiles="false" />
</report>
</coverage>
<testsuites>
Expand All @@ -19,5 +27,5 @@
<group>development</group>
</exclude>
</groups>
<logging/>
<logging />
</phpunit>
125 changes: 94 additions & 31 deletions src/JShrink/Minifier.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,20 @@ class Minifier
*/
protected $c;

/**
* This character is only active when certain look ahead actions take place.
*
* @var string
*/
protected $last_char;

/**
* This character is only active when certain look ahead actions take place.
*
* @var string
*/
protected $output;

/**
* Contains the options for the current minification process.
*
Expand All @@ -95,6 +109,9 @@ class Minifier
*/
protected static $defaultOptions = ['flaggedComments' => true];


protected static $keywords = ["delete", "do", "for", "in", "instanceof", "return", "typeof", "yield"];

/**
* Contains lock ids which are used to replace certain code patterns and
* prevent them from being minified
Expand All @@ -115,17 +132,11 @@ class Minifier
public static function minify($js, $options = [])
{
try {
ob_start();

$jshrink = new Minifier();
$js = $jshrink->lock($js);
$jshrink->minifyDirectToOutput($js, $options);

// Sometimes there's a leading new line, so we trim that out here.
$js = ltrim(ob_get_clean());
$js = ltrim($jshrink->minifyToString($js, $options));
$js = $jshrink->unlock($js);
unset($jshrink);

return $js;
} catch (\Exception $e) {
if (isset($jshrink)) {
Expand All @@ -134,9 +145,6 @@ public static function minify($js, $options = [])
$jshrink->clean();
unset($jshrink);
}

// without this call things get weird, with partially outputted js.
ob_end_clean();
throw $e;
}
}
Expand All @@ -148,11 +156,12 @@ public static function minify($js, $options = [])
* @param string $js The raw javascript to be minified
* @param array $options Various runtime options in an associative array
*/
protected function minifyDirectToOutput($js, $options)
protected function minifyToString($js, $options)
{
$this->initialize($js, $options);
$this->loop();
$this->clean();
return $this->output;
}

/**
Expand All @@ -177,7 +186,9 @@ protected function initialize($js, $options)
// Populate "a" with a new line, "b" with the first character, before
// entering the loop
$this->a = "\n";
$this->b = $this->getReal();
$this->b = "\n";
$this->last_char = "\n";
$this->output = "";
}

/**
Expand All @@ -192,6 +203,14 @@ protected function initialize($js, $options)
'[' => true,
'@' => true];


protected function echo($char) {
echo($char);
$this->output .= $char;
$this->last_char = $char[-1];
}


/**
* The primary action occurs here. This function loops through the input string,
* outputting anything that's relevant and discarding anything that is not.
Expand All @@ -201,10 +220,11 @@ protected function loop()
while ($this->a !== false && !is_null($this->a) && $this->a !== '') {
switch ($this->a) {
// new lines
case "\r":
case "\n":
// if the next line is something that can't stand alone preserve the newline
if ($this->b !== false && isset($this->noNewLineCharacters[$this->b])) {
echo $this->a;
$this->echo($this->a);
$this->saveString();
break;
}
Expand All @@ -220,22 +240,23 @@ protected function loop()
// no break
case ' ':
if (static::isAlphaNumeric($this->b)) {
echo $this->a;
$this->echo($this->a);
}

$this->saveString();
break;

default:
switch ($this->b) {
case "\r":
case "\n":
if (strpos('}])+-"\'', $this->a) !== false) {
echo $this->a;
$this->echo($this->a);
$this->saveString();
break;
} else {
if (static::isAlphaNumeric($this->a)) {
echo $this->a;
$this->echo($this->a);
$this->saveString();
}
}
Expand All @@ -254,7 +275,7 @@ protected function loop()
continue 3;
}

echo $this->a;
$this->echo($this->a);
$this->saveString();
break;
}
Expand All @@ -263,9 +284,20 @@ protected function loop()
// do reg check of doom
$this->b = $this->getReal();

if (($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) {
$this->saveRegex();
if ($this->b == '/') {
$valid_tokens = "(,=:[!&|?\n";
if (strpos($valid_tokens, $this->last_char) !== false || strpos($valid_tokens, $this->a) !== false) {
// Regex can appear unquoted after these symbols
$this->saveRegex();
} else if ($this->endsInKeyword()) {
// This block checks for the "return" token before the slash.
$this->saveRegex();
}
}

// if (($this->b == '/' && strpos('(,=:[!&|?', $this->a) !== false)) {
// $this->saveRegex();
// }
}
}

Expand Down Expand Up @@ -332,8 +364,25 @@ protected function getChar()
*/
protected function peek()
{
# Pull the next character but don't push the index.
return $this->index < $this->len ? $this->input[$this->index] : false;
if ($this->index >= $this->len) {
return false;
}

$char = $this->input[$this->index];
# Convert all line endings to unix standard.
# `\r\n` converts to `\n\n` and is minified.
if ($char == "\r") {
$char = "\n";
}

// Normalize all whitespace except for the newline character into a
// standard space.
if ($char !== "\n" && $char < "\x20") {
return ' ';
}

# Return the next character but don't push the index.
return $char;
}

/**
Expand Down Expand Up @@ -428,17 +477,17 @@ protected function processMultiLineComments($startIndex)
// If conditional comments or flagged comments are not the first thing in the script
// we need to echo a and fill it with a space before moving on.
if ($startIndex > 0) {
echo $this->a;
$this->echo($this->a);
$this->a = " ";

// If the comment started on a new line we let it stay on the new line
if ($this->input[($startIndex - 1)] === "\n") {
echo "\n";
$this->echo("\n");
}
}

$endPoint = ($this->index - 1) - $startIndex;
echo substr($this->input, $startIndex, $endPoint);
$this->echo(substr($this->input, $startIndex, $endPoint));

$this->c = $char;

Expand Down Expand Up @@ -504,7 +553,7 @@ protected function saveString()
$stringType = $this->a;

// Echo out that starting quote
echo $this->a;
$this->echo($this->a);

// Loop until the string is done
// Grab the very next character and load it into a
Expand All @@ -523,7 +572,7 @@ protected function saveString()
// block below.
case "\n":
if ($stringType === '`') {
echo $this->a;
$this->echo($this->a);
} else {
throw new \RuntimeException('Unclosed string at position: ' . $startpos);
}
Expand All @@ -543,14 +592,14 @@ protected function saveString()
}

// echo out the escaped character and restart the loop.
echo $this->a . $this->b;
$this->echo($this->a . $this->b);
break;


// Since we're not dealing with any special cases we simply
// output the character and continue our loop.
default:
echo $this->a;
$this->echo($this->a);
}
}
}
Expand All @@ -563,23 +612,23 @@ protected function saveString()
*/
protected function saveRegex()
{
echo $this->a . $this->b;
$this->echo($this->a . $this->b);

while (($this->a = $this->getChar()) !== false) {
if ($this->a === '/') {
break;
}

if ($this->a === '\\') {
echo $this->a;
$this->echo($this->a);
$this->a = $this->getChar();
}

if ($this->a === "\n") {
throw new \RuntimeException('Unclosed regex pattern at position: ' . $this->index);
}

echo $this->a;
$this->echo($this->a);
}
$this->b = $this->getReal();
}
Expand All @@ -595,6 +644,20 @@ protected static function isAlphaNumeric($char)
return preg_match('/^[\w\$\pL]$/', $char) === 1 || $char == '/';
}

protected function endsInKeyword() {
foreach(static::$keywords as $keyword) {
if (str_ends_with($this->output, $keyword)) {
return true;
}
if (str_ends_with($this->output, $keyword . " ")) {
return true;
}
}
return false;
}



/**
* Replace patterns in the given string and store the replacement
*
Expand Down
3 changes: 3 additions & 0 deletions tests/Resources/jshrink/input/regex_keywords.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
return /'/

typeof /'/
7 changes: 7 additions & 0 deletions tests/Resources/jshrink/input/regex_spaces.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
function airplaneIsCarrierBased (model) {
return /^(FI-167|Swordfish|Fulmar|Firefly|F4F Wildcat|F6F-[35] Hellcat|Latécoère 298|A[567]M)$/.test(
model
)
}

console.log(airplaneIsCarrierBased('F6F-5 Hellcat'))
1 change: 1 addition & 0 deletions tests/Resources/jshrink/input/regex_with_quote.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/^(")$/
3 changes: 3 additions & 0 deletions tests/Resources/jshrink/input/regex_with_quote_real.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
function test (input) {
return /^(אחה"צ|אחרי הצהריים|בערב)$/.test(input)
}
2 changes: 2 additions & 0 deletions tests/Resources/jshrink/output/regex_keywords.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
return /'/
typeof /'/
2 changes: 2 additions & 0 deletions tests/Resources/jshrink/output/regex_spaces.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
function airplaneIsCarrierBased(model){return /^(FI-167|Swordfish|Fulmar|Firefly|F4F Wildcat|F6F-[35] Hellcat|Latécoère 298|A[567]M)$/.test(model)}
console.log(airplaneIsCarrierBased('F6F-5 Hellcat'))
1 change: 1 addition & 0 deletions tests/Resources/jshrink/output/regex_with_quote.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/^(")$/
1 change: 1 addition & 0 deletions tests/Resources/jshrink/output/regex_with_quote_real.js
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
function test(input){return /^(אחה"צ|אחרי הצהריים|בערב)$/.test(input)}