Skip to content

Commit

Permalink
Fix operator handling in subquery segment values (#21506)
Browse files Browse the repository at this point in the history
* Add tests for subquery segments

* Fix operator handling in subquery segment values

* Escape segment values using separate method

* Quote delimiter pattern for regex usage

Co-authored-by: Michal Kleiner <[email protected]>

---------

Co-authored-by: Michal Kleiner <[email protected]>
  • Loading branch information
mneudert and michalkleiner authored Nov 13, 2023
1 parent 3956094 commit 7055925
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 1 deletion.
13 changes: 12 additions & 1 deletion core/Segment.php
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ protected function getCleanedExpression($expression)
// This is required to ensure segments like actionUrl!@value really do not include any visit having an action containing `value`
if ($this->doesSegmentNeedSubquery($matchType, $name)) {
$operator = $this->getInvertedOperatorForSubQuery($matchType);
$stringSegment = $name . $operator . $value;
$stringSegment = $name . $operator . $this->escapeSegmentValue($value);
$segmentObj = new Segment($stringSegment, $this->idSites, $this->startDate, $this->endDate);

$select = 'log_visit.idvisit';
Expand Down Expand Up @@ -705,4 +705,15 @@ public function getOriginalString()
{
return $this->originalString;
}

/**
* Escapes segment expression delimiters in a segment value with a backslash if not already done.
*/
private function escapeSegmentValue(string $value): string
{
$delimiterPattern = SegmentExpression::AND_DELIMITER . SegmentExpression::OR_DELIMITER;
$pattern = '/((?<!\\\)[' . preg_quote($delimiterPattern) . '])/';

return preg_replace($pattern, '\\\$1', $value);
}
}
137 changes: 137 additions & 0 deletions tests/PHPUnit/Integration/SegmentTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,143 @@ public function testCommon($segment, $expected)
$this->assertEquals(32, strlen($segment->getHash()));
}

/**
* @return iterable<string, array{string, string, array{where: string, bind: string}}>
*/
public function getCommonSubqueryTestData(): iterable
{
$encodedValueOr = urlencode(urlencode('a,b'));
$encodedValueAnd = urlencode(urlencode('a;b'));
$escapedValueOr = urlencode(urlencode('a\,b'));
$escapedValueAnd = urlencode(urlencode('a\;b'));

$segmentFrom = '2020-02-02 02:00:00';

$whereSingle = '(log_visit.idvisit NOT IN(SELECT log_visit.idvisit FROM log_visit AS log_visit LEFT JOIN log_link_visit_action AS log_link_visit_action ON log_link_visit_action.idvisit = log_visit.idvisit WHERE(log_visit.visit_last_action_time >= ?)AND(log_link_visit_action.idaction_name = ?)))';
$whereMultiAnd = '(log_visit.idvisit NOT IN(SELECT log_visit.idvisit FROM log_visit AS log_visit LEFT JOIN log_link_visit_action AS log_link_visit_action ON log_link_visit_action.idvisit = log_visit.idvisit WHERE(log_visit.visit_last_action_time >= ?)AND(log_link_visit_action.idaction_name = ?)))AND(log_visit.idvisit NOT IN(SELECT log_visit.idvisit FROM log_visit AS log_visit LEFT JOIN log_link_visit_action AS log_link_visit_action ON log_link_visit_action.idvisit = log_visit.idvisit WHERE(log_visit.visit_last_action_time >= ?)AND(log_link_visit_action.idaction_name = ?)))';
$whereMultiOr = '((log_visit.idvisit NOT IN(SELECT log_visit.idvisit FROM log_visit AS log_visit LEFT JOIN log_link_visit_action AS log_link_visit_action ON log_link_visit_action.idvisit = log_visit.idvisit WHERE(log_visit.visit_last_action_time >= ?)AND(log_link_visit_action.idaction_name = ?)))OR(log_visit.idvisit NOT IN(SELECT log_visit.idvisit FROM log_visit AS log_visit LEFT JOIN log_link_visit_action AS log_link_visit_action ON log_link_visit_action.idvisit = log_visit.idvisit WHERE(log_visit.visit_last_action_time >= ?)AND(log_link_visit_action.idaction_name = ?))))';

yield 'normal segment' => [
'pageTitle!=a',
$segmentFrom,
[
'where' => $whereSingle,
'bind' => [$segmentFrom, '1'],
],
];

yield 'segment with AND in value' => [
'pageTitle!=' . $encodedValueAnd,
$segmentFrom,
[
'where' => $whereSingle,
'bind' => [$segmentFrom, '3'],
],
];

yield 'segment with AND in value, already escaped' => [
'pageTitle!=' . $escapedValueAnd,
$segmentFrom,
[
'where' => $whereSingle,
'bind' => [$segmentFrom, '3'],
],
];

yield 'segment with OR in value' => [
'pageTitle!=' . $encodedValueOr,
$segmentFrom,
[
'where' => $whereSingle,
'bind' => [$segmentFrom, '4'],
],
];

yield 'segment with OR in value, already escaped' => [
'pageTitle!=' . $escapedValueOr,
$segmentFrom,
[
'where' => $whereSingle,
'bind' => [$segmentFrom, '4'],
],
];

yield 'segment with two values, AND operator' => [
'pageTitle!=a;pageTitle!=b',
$segmentFrom,
[
'where' => $whereMultiAnd,
'bind' => [$segmentFrom, '1', $segmentFrom, '2'],
],
];

yield 'segment with two values, OR operator' => [
'pageTitle!=a,pageTitle!=b',
$segmentFrom,
[
'where' => $whereMultiOr,
'bind' => [$segmentFrom, '1', $segmentFrom, '2'],
],
];

yield 'mixed operator in value and two segments' => [
'pageTitle!=' . $encodedValueAnd . ';pageTitle!=' . $encodedValueOr,
$segmentFrom,
[
'where' => $whereMultiAnd,
'bind' => [$segmentFrom, '3', $segmentFrom, '4'],
],
];

yield 'mixed operator in value and two segments, already escaped' => [
'pageTitle!=' . $escapedValueAnd . ',pageTitle!=' . $escapedValueOr,
$segmentFrom,
[
'where' => $whereMultiOr,
'bind' => [$segmentFrom, '3', $segmentFrom, '4'],
],
];
}

/**
* @dataProvider getCommonSubqueryTestData
*
* @param array{where: string, bind: string} $expected
*/
public function testCommonSubquery(string $segment, string $segmentFrom, array $expected): void
{
$this->insertPageUrlAsAction('a', 'idaction_name', Action::TYPE_PAGE_TITLE);
$this->insertPageUrlAsAction('b', 'idaction_name', Action::TYPE_PAGE_TITLE);
$this->insertPageUrlAsAction('a;b', 'idaction_name', Action::TYPE_PAGE_TITLE);
$this->insertPageUrlAsAction('a,b', 'idaction_name', Action::TYPE_PAGE_TITLE);

$select = 'log_visit.idvisit';
$from = 'log_visit';

$expected = array(
'sql' => '
SELECT
log_visit.idvisit
FROM
' . Common::prefixTable('log_visit') . ' AS log_visit
WHERE
' . $expected['where'],
'bind' => $expected['bind']
);

$segment = new Segment($segment, $idSites = array(), Date::factory($segmentFrom));
$sql = $segment->getSelectQuery($select, $from, false);
$this->assertQueryDoesNotFail($sql);

$this->assertEquals($this->removeExtraWhiteSpaces($expected), $this->removeExtraWhiteSpaces($sql));

// calling twice should give same results
$sql = $segment->getSelectQuery($select, array($from));
$this->assertEquals($this->removeExtraWhiteSpaces($expected), $this->removeExtraWhiteSpaces($sql));

$this->assertEquals(32, strlen($segment->getHash()));
}

public function test_getSelectQuery_whenNoJoin()
{
$select = '*';
Expand Down

0 comments on commit 7055925

Please sign in to comment.