Skip to content
This repository has been archived by the owner on Jan 29, 2020. It is now read-only.

Commit

Permalink
Ensure that quoted strings are matched correctly
Browse files Browse the repository at this point in the history
We need to match the initial quote, and then all characters up to but
not including a matching final quote, taking into account escaped quotes
as well.

This patch uses a technique learned from http://blog.stevenlevithan.com/archives/match-quoted-string
to do so, and updates existing regular expressions, including the one
modified for #21, to properly match quoted strings, introducing several
new test cases to verify the changes.
  • Loading branch information
weierophinney committed Apr 9, 2018
1 parent dd45d1e commit bb5f960
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 10 deletions.
14 changes: 7 additions & 7 deletions src/Document/Query.php
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ public static function cssToXpath($path)

// Arbitrary attribute value contains whitespace
$path = preg_replace_callback(
'/\[\S+?(["\'])(.+?)\1\]/',
'/\[\S+?([\'"])((?:(?!\1)[^\\\]|\\.)*)\1\]/',
function ($matches) {
return str_replace($matches[2], preg_replace('/\s+/', '\s', $matches[2]), $matches[0]);
},
Expand Down Expand Up @@ -147,29 +147,29 @@ protected static function _tokenize($expression)

// arbitrary attribute strict equality
$expression = preg_replace_callback(
'|\[@?([a-z0-9_-]+)=[\'"]([^\'"]+)[\'"]\]|i',
'/\[@?([a-z0-9_-]+)=([\'"])((?:(?!\2)[^\\\]|\\.)*)\2\]/i',
function ($matches) {
return '[@' . strtolower($matches[1]) . "='" . $matches[2] . "']";
return sprintf("[@%s='%s']", strtolower($matches[1]), str_replace("'", "\\'", $matches[3]));
},
$expression
);

// arbitrary attribute contains full word
$expression = preg_replace_callback(
'|\[([a-z0-9_-]+)~=[\'"]([^\'"]+)[\'"]\]|i',
'/\[([a-z0-9_-]+)~=([\'"])((?:(?!\2)[^\\\]|\\.)*)\2\]/i',
function ($matches) {
return "[contains(concat(' ', normalize-space(@" . strtolower($matches[1]) . "), ' '), ' "
. $matches[2] . " ')]";
. $matches[3] . " ')]";
},
$expression
);

// arbitrary attribute contains specified content
$expression = preg_replace_callback(
'|\[([a-z0-9_-]+)\*=[\'"]([^\'"]+)[\'"]\]|i',
'/\[([a-z0-9_-]+)\*=([\'"])((?:(?!\2)[^\\\]|\\.)*)\2\]/i',
function ($matches) {
return "[contains(@" . strtolower($matches[1]) . ", '"
. $matches[2] . "')]";
. $matches[3] . "')]";
},
$expression
);
Expand Down
30 changes: 27 additions & 3 deletions test/Document/QueryTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -173,9 +173,33 @@ public function testCanTransformWithAttributeAndDot()
$this->assertEquals("//a[@href='http://example.com']", $test);
}

public function testTransformNestedAttributeSelectors()
public function nestedAttributeSelectors()
{
return [
'with-double-quotes' => [
'select[name="foo"] option[selected="selected"]',
"//select[@name='foo']//option[@selected='selected']",
],
'with-single-quotes' => [
"select[name='foo'] option[selected='selected']",
"//select[@name='foo']//option[@selected='selected']",
],
'double-quotes-containing-single-quotes' => [
"select[name=\"f'oo\"] option[selected=\"sel'ected\"]",
"//select[@name='f\'oo']//option[@selected='sel\'ected']",
],
'single-quotes-containing-double-quotes' => [
"select[name='f\"oo'] option[selected='sel\"ected']",
"//select[@name='f\"oo']//option[@selected='sel\"ected']",
],
];
}

/**
* @dataProvider nestedAttributeSelectors
*/
public function testTransformNestedAttributeSelectors($selector, $expectedXpath)
{
$test = Query::cssToXpath('select[name="foo"] option[selected="selected"]');
$this->assertEquals("//select[@name='foo']//option[@selected='selected']", $test);
$this->assertEquals($expectedXpath, Query::cssToXpath($selector));
}
}

0 comments on commit bb5f960

Please sign in to comment.