Skip to content

Commit

Permalink
Merge pull request minkphp#722 from stof/optimize_splitting
Browse files Browse the repository at this point in the history
Optimize the splitting of XPath unions
  • Loading branch information
stof authored Dec 30, 2016
2 parents 12e09bf + 6ca8f1d commit bb28774
Showing 1 changed file with 36 additions and 38 deletions.
74 changes: 36 additions & 38 deletions src/Selector/Xpath/Manipulator.php
Original file line number Diff line number Diff line change
Expand Up @@ -76,56 +76,54 @@ public function prepend($xpath, $prefix)
*/
private function splitUnionParts($xpath)
{
// Split any unions into individual expressions. We need to iterate
// through the string to correctly parse opening/closing quotes and
// braces which is not possible with regular expressions.
$unionParts = array();
$inSingleQuotedString = false;
$inDoubleQuotedString = false;
$openedBrackets = 0;
$lastUnion = 0;
$xpathLength = strlen($xpath);

for ($i = 0; $i < $xpathLength; $i++) {
$char = $xpath[$i];

if ($char === "'" && !$inDoubleQuotedString) {
$inSingleQuotedString = !$inSingleQuotedString;

continue;
}
if (false === strpos($xpath, '|')) {
return array($xpath); // If there is no pipe in the string, we know for sure that there is no union
}

if ($char === '"' && !$inSingleQuotedString) {
$inDoubleQuotedString = !$inDoubleQuotedString;
$xpathLen = strlen($xpath);
$openedBrackets = 0;
// Consume whitespaces chars at the beginning of the string (this is the list of chars removed by trim() by default)
$startPosition = strspn($xpath, " \t\n\r\0\x0B");

continue;
}
$unionParts = array();

if ($inSingleQuotedString || $inDoubleQuotedString) {
continue;
for ($i = $startPosition; $i <= $xpathLen; ++$i) {
// Consume all chars until we reach a quote, a bracket or a pipe
$i += strcspn($xpath, '"\'[]|', $i);

if ($i < $xpathLen) {
switch ($xpath[$i]) {
case '"':
case "'":
// Move to the end of the string literal
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
return array($xpath); // The XPath expression is invalid, don't split it
}
continue 2;
case '[':
++$openedBrackets;
continue 2;
case ']':
--$openedBrackets;
continue 2;
}
}

if ($char === '[') {
$openedBrackets++;

if ($openedBrackets) {
continue;
}

if ($char === ']') {
$openedBrackets--;
$unionParts[] = substr($xpath, $startPosition, $i - $startPosition);

continue;
if ($i === $xpathLen) {
return $unionParts;
}

if ($char === '|' && $openedBrackets === 0) {
$unionParts[] = substr($xpath, $lastUnion, $i - $lastUnion);
$lastUnion = $i + 1;
}
// Consume any whitespace chars after the pipe
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
$startPosition = $i + 1;
}

$unionParts[] = substr($xpath, $lastUnion);

return $unionParts;
return array($xpath); // The XPath expression is invalid
}

}

0 comments on commit bb28774

Please sign in to comment.