Skip to content

Commit

Permalink
Fix problem with unprefixed group by columns in select queries (#21370)
Browse files Browse the repository at this point in the history
* Fix problem with unprefixed order by columns in select queries

* add some tests for changed methods
  • Loading branch information
sgiehl authored Oct 10, 2023
1 parent dbeffa3 commit 66bd55a
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 35 deletions.
81 changes: 55 additions & 26 deletions core/DataAccess/LogAggregator.php
Original file line number Diff line number Diff line change
Expand Up @@ -659,45 +659,74 @@ protected function getDimensionsToSelect($dimensions, $additionalSelects)
}

/**
* Returns the dimensions array, where
* (1) the table name is prepended to the field
* (2) the "AS `label` " is appended to the field
* Returns an array of select expressions based on the provided dimensions array
* Each dimension will be prefixed with the table name, if it's not an expression and will be alias
* with the dimension name or an custom alias if one was provided as array key.
*
* @param $dimensions
* @param $tableName
* @param bool $appendSelectAs
* @return mixed
* @param array $dimensions An array of dimensions, where an alias can be provided as key
* @param string $tableName
* @return array
*/
protected function getSelectDimensions($dimensions, $tableName, $appendSelectAs = true)
protected function getSelectDimensions(array $dimensions, string $tableName): array
{
foreach ($dimensions as $selectAs => &$field) {
$selectAsString = $field;
$selectDimensions = [];

if (!is_numeric($selectAs)) {
$selectAsString = $selectAs;
} else if ($this->isFieldFunctionOrComplexExpression($field)) {
// if complex expression has a select as, use it
if (!$appendSelectAs && preg_match('/\s+AS\s+(.*?)\s*$/', $field, $matches)) {
$field = $matches[1];
continue;
}
foreach ($dimensions as $selectAs => $field) {

// if function w/o select as, do not alias or prefix
$selectAsString = $appendSelectAs = false;
if ($this->isFieldFunctionOrComplexExpression($field) && is_numeric($selectAs)) {
// an expression or field function without an alias should be used as is
$selectDimensions[] = $field;
continue;
}

$isKnownField = !in_array($field, array('referrer_data'));
$selectAlias = !is_numeric($selectAs) ? $selectAs : $field;

if ($selectAsString == $field && $isKnownField) {
if (!$this->isFieldFunctionOrComplexExpression($field)) {
// prefix field name with table if it's not an expression
$field = $this->prefixColumn($field, $tableName);
}

if ($appendSelectAs && $selectAsString) {
$field = $this->prefixColumn($field, $tableName) . $this->getSelectAliasAs($selectAsString);
// append " AS alias"
$field .= $this->getSelectAliasAs($selectAlias);
$selectDimensions[] = $field;
}

return $selectDimensions;
}

/**
* Returns an array of fields to be used in an grouped by statement.
* For that either the alias, the field expression or prefixed column name of the provided dimensions will be used.
*
* @param array $dimensions An array of dimensions, where an alias can be provided as key
* @param string $tableName
* @return array
*/
protected function getGroupByDimensions(array $dimensions, string $tableName): array
{
$orderByDimensions = [];

foreach ($dimensions as $selectAs => $field) {
if (!is_numeric($selectAs)) {
$orderByDimensions[] = $selectAs;
continue;
}

if ($this->isFieldFunctionOrComplexExpression($field)) {
// if complex expression has a select as, use it
if (preg_match('/\s+AS\s+(.*?)\s*$/', $field, $matches)) {
$orderByDimensions[] = $matches[1];
continue;
}

$orderByDimensions[] = $field;
continue;
}

$orderByDimensions[] = $this->prefixColumn($field, $tableName);
}

return $dimensions;
return $orderByDimensions;
}

/**
Expand Down Expand Up @@ -755,7 +784,7 @@ public function getWhereStatement($tableName, $datetimeField, $extraWhere = fals

protected function getGroupByStatement($dimensions, $tableName)
{
$dimensions = $this->getSelectDimensions($dimensions, $tableName, $appendSelectAs = false);
$dimensions = $this->getGroupByDimensions($dimensions, $tableName);
$groupBy = implode(", ", $dimensions);

return $groupBy;
Expand Down
12 changes: 5 additions & 7 deletions plugins/Transitions/API.php
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ protected function queryFollowingActions($idaction, $actionType, LogAggregator $
protected function queryExternalReferrers($idaction, $actionType, $logAggregator, $limitBeforeGrouping = 0)
{

$rankingQuery = new RankingQuery($limitBeforeGrouping ? $limitBeforeGrouping: $this->limitBeforeGrouping);
$rankingQuery = new RankingQuery($limitBeforeGrouping ?: $this->limitBeforeGrouping);
$rankingQuery->setOthersLabel('Others');

// we generate a single column that contains the interesting data for each referrer.
Expand All @@ -360,16 +360,14 @@ protected function queryExternalReferrers($idaction, $actionType, $logAggregator
// group by. when we group by both, we don't get a single column for the keyword but instead
// one column per keyword + search engine url. this way, we could not get the top keywords using
// the ranking query.
$dimensions = array('referrer_data', 'referer_type');
$rankingQuery->addLabelColumn('referrer_data');
$selects = array(
'CASE log_visit.referer_type
$dimensions = array('referrer_data' => 'CASE log_visit.referer_type
WHEN ' . Common::REFERRER_TYPE_DIRECT_ENTRY . ' THEN \'\'
WHEN ' . Common::REFERRER_TYPE_SEARCH_ENGINE . ' THEN log_visit.referer_keyword
WHEN ' . Common::REFERRER_TYPE_SOCIAL_NETWORK . ' THEN log_visit.referer_name
WHEN ' . Common::REFERRER_TYPE_WEBSITE . ' THEN log_visit.referer_url
WHEN ' . Common::REFERRER_TYPE_CAMPAIGN . ' THEN CONCAT(log_visit.referer_name, \' \', log_visit.referer_keyword)
END AS `referrer_data`');
END', 'referer_type');
$rankingQuery->addLabelColumn('referrer_data');

// get one limited group per referrer type
$rankingQuery->partitionResultIntoMultipleGroups('referer_type', array(
Expand All @@ -384,7 +382,7 @@ protected function queryExternalReferrers($idaction, $actionType, $logAggregator
$where = 'visit_entry_idaction_' . $type . ' = ' . intval($idaction);

$metrics = array(Metrics::INDEX_NB_VISITS);
$data = $logAggregator->queryVisitsByDimension($dimensions, $where, $selects, $metrics, $rankingQuery, false, Config::getInstance()->General['live_query_max_execution_time']);
$data = $logAggregator->queryVisitsByDimension($dimensions, $where, [], $metrics, $rankingQuery, false, Config::getInstance()->General['live_query_max_execution_time']);

$referrerData = array();
$referrerSubData = array();
Expand Down
106 changes: 104 additions & 2 deletions tests/PHPUnit/Integration/DataAccess/LogAggregatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,115 @@ public function setUp(): void
$this->site = new Site($idSite);
$date = Date::factory('2010-03-06');
$this->period = Period\Factory::build('month', $date);
$segment = new Segment('', array($this->site->getId()));

$segment = new Segment('', [$this->site->getId()]);

$params = new Parameters($this->site, $this->period, $segment);
$this->logAggregator = new LogAggregator($params);
}

/**
* @dataProvider getSelectDimensionTestData
*/
public function testGetSelectDimensions($dimensions, $tableName, $expectedResult)
{
$class = new \ReflectionClass(LogAggregator::class);
$method = $class->getMethod('getSelectDimensions');
$method->setAccessible(true);
$output = $method->invoke($this->logAggregator, $dimensions, $tableName);
$this->assertEquals($expectedResult, $output);
}

public function getSelectDimensionTestData(): iterable
{
yield 'normal column names' => [
['column', 'column2'],
'log_visit',
['log_visit.column AS `column`', 'log_visit.column2 AS `column2`']
];

yield 'normal column names with alias' => [
['alias' => 'column', 'alias2' => 'column2'],
'log_conversion',
['log_conversion.column AS `alias`', 'log_conversion.column2 AS `alias2`']
];

yield 'normal column names with and without alias' => [
['alias' => 'column', 'column2'],
'log_conversion',
['log_conversion.column AS `alias`', 'log_conversion.column2 AS `column2`']
];

yield 'column expression' => [
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"],
'log_conversion',
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"]
];

yield 'column expression with alias' => [
['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"],
'log_conversion',
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, '')) AS `alias`"]
];

yield 'mixed dimension content' => [
['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))", 'mycolumn', 'newalias' => 'column2'],
'log_conversion',
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, '')) AS `alias`", 'log_conversion.mycolumn AS `mycolumn`', 'log_conversion.column2 AS `newalias`']
];
}


/**
* @dataProvider getGroupByDimensionTestData
*/
public function testGetGroupByDimensions($dimensions, $tableName, $expectedResult)
{
$class = new \ReflectionClass(LogAggregator::class);
$method = $class->getMethod('getGroupByDimensions');
$method->setAccessible(true);
$output = $method->invoke($this->logAggregator, $dimensions, $tableName);
$this->assertEquals($expectedResult, $output);
}

public function getGroupByDimensionTestData(): iterable
{
yield 'normal column names' => [
['column', 'column2'],
'log_visit',
['log_visit.column', 'log_visit.column2']
];

yield 'normal column names with alias' => [
['alias' => 'column', 'alias2' => 'column2'],
'log_conversion',
['alias', 'alias2']
];

yield 'normal column names with and without alias' => [
['alias' => 'column', 'column2'],
'log_conversion',
['alias', 'log_conversion.column2']
];

yield 'column expression' => [
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"],
'log_conversion',
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"]
];

yield 'column expression with alias' => [
['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"],
'log_conversion',
['alias']
];

yield 'mixed dimension content' => [
['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))", 'mycolumn', 'newalias' => 'column2'],
'log_conversion',
['alias', 'log_conversion.mycolumn', 'newalias']
];
}

public function test_generateQuery()
{
$query = $this->logAggregator->generateQuery('test, test2', 'log_visit', '1=1', false, '5');
Expand Down

0 comments on commit 66bd55a

Please sign in to comment.