Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix problem with unprefixed group by columns in select queries #21370

Merged
merged 2 commits into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 55 additions & 26 deletions core/DataAccess/LogAggregator.php
Original file line number Diff line number Diff line change
Expand Up @@ -659,45 +659,74 @@ protected function getDimensionsToSelect($dimensions, $additionalSelects)
}

/**
* Returns the dimensions array, where
* (1) the table name is prepended to the field
* (2) the "AS `label` " is appended to the field
* Returns an array of select expressions based on the provided dimensions array
* Each dimension will be prefixed with the table name, if it's not an expression and will be alias
* with the dimension name or an custom alias if one was provided as array key.
*
* @param $dimensions
* @param $tableName
* @param bool $appendSelectAs
* @return mixed
* @param array $dimensions An array of dimensions, where an alias can be provided as key
* @param string $tableName
* @return array
*/
protected function getSelectDimensions($dimensions, $tableName, $appendSelectAs = true)
protected function getSelectDimensions(array $dimensions, string $tableName): array
{
foreach ($dimensions as $selectAs => &$field) {
$selectAsString = $field;
$selectDimensions = [];

if (!is_numeric($selectAs)) {
$selectAsString = $selectAs;
} else if ($this->isFieldFunctionOrComplexExpression($field)) {
// if complex expression has a select as, use it
if (!$appendSelectAs && preg_match('/\s+AS\s+(.*?)\s*$/', $field, $matches)) {
$field = $matches[1];
continue;
}
foreach ($dimensions as $selectAs => $field) {

// if function w/o select as, do not alias or prefix
$selectAsString = $appendSelectAs = false;
if ($this->isFieldFunctionOrComplexExpression($field) && is_numeric($selectAs)) {
// an expression or field function without an alias should be used as is
$selectDimensions[] = $field;
continue;
}

$isKnownField = !in_array($field, array('referrer_data'));
$selectAlias = !is_numeric($selectAs) ? $selectAs : $field;

if ($selectAsString == $field && $isKnownField) {
if (!$this->isFieldFunctionOrComplexExpression($field)) {
// prefix field name with table if it's not an expression
$field = $this->prefixColumn($field, $tableName);
}

if ($appendSelectAs && $selectAsString) {
$field = $this->prefixColumn($field, $tableName) . $this->getSelectAliasAs($selectAsString);
// append " AS alias"
$field .= $this->getSelectAliasAs($selectAlias);
$selectDimensions[] = $field;
}

return $selectDimensions;
}

/**
* Returns an array of fields to be used in an grouped by statement.
* For that either the alias, the field expression or prefixed column name of the provided dimensions will be used.
*
* @param array $dimensions An array of dimensions, where an alias can be provided as key
* @param string $tableName
* @return array
*/
protected function getGroupByDimensions(array $dimensions, string $tableName): array
{
$orderByDimensions = [];

foreach ($dimensions as $selectAs => $field) {
if (!is_numeric($selectAs)) {
$orderByDimensions[] = $selectAs;
continue;
}

if ($this->isFieldFunctionOrComplexExpression($field)) {
// if complex expression has a select as, use it
if (preg_match('/\s+AS\s+(.*?)\s*$/', $field, $matches)) {
$orderByDimensions[] = $matches[1];
continue;
}

$orderByDimensions[] = $field;
continue;
}

$orderByDimensions[] = $this->prefixColumn($field, $tableName);
}

return $dimensions;
return $orderByDimensions;
}

/**
Expand Down Expand Up @@ -755,7 +784,7 @@ public function getWhereStatement($tableName, $datetimeField, $extraWhere = fals

protected function getGroupByStatement($dimensions, $tableName)
{
$dimensions = $this->getSelectDimensions($dimensions, $tableName, $appendSelectAs = false);
$dimensions = $this->getGroupByDimensions($dimensions, $tableName);
$groupBy = implode(", ", $dimensions);

return $groupBy;
Expand Down
12 changes: 5 additions & 7 deletions plugins/Transitions/API.php
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ protected function queryFollowingActions($idaction, $actionType, LogAggregator $
protected function queryExternalReferrers($idaction, $actionType, $logAggregator, $limitBeforeGrouping = 0)
{

$rankingQuery = new RankingQuery($limitBeforeGrouping ? $limitBeforeGrouping: $this->limitBeforeGrouping);
$rankingQuery = new RankingQuery($limitBeforeGrouping ?: $this->limitBeforeGrouping);
$rankingQuery->setOthersLabel('Others');

// we generate a single column that contains the interesting data for each referrer.
Expand All @@ -360,16 +360,14 @@ protected function queryExternalReferrers($idaction, $actionType, $logAggregator
// group by. when we group by both, we don't get a single column for the keyword but instead
// one column per keyword + search engine url. this way, we could not get the top keywords using
// the ranking query.
$dimensions = array('referrer_data', 'referer_type');
$rankingQuery->addLabelColumn('referrer_data');
$selects = array(
'CASE log_visit.referer_type
$dimensions = array('referrer_data' => 'CASE log_visit.referer_type
WHEN ' . Common::REFERRER_TYPE_DIRECT_ENTRY . ' THEN \'\'
WHEN ' . Common::REFERRER_TYPE_SEARCH_ENGINE . ' THEN log_visit.referer_keyword
WHEN ' . Common::REFERRER_TYPE_SOCIAL_NETWORK . ' THEN log_visit.referer_name
WHEN ' . Common::REFERRER_TYPE_WEBSITE . ' THEN log_visit.referer_url
WHEN ' . Common::REFERRER_TYPE_CAMPAIGN . ' THEN CONCAT(log_visit.referer_name, \' \', log_visit.referer_keyword)
END AS `referrer_data`');
END', 'referer_type');
$rankingQuery->addLabelColumn('referrer_data');

// get one limited group per referrer type
$rankingQuery->partitionResultIntoMultipleGroups('referer_type', array(
Expand All @@ -384,7 +382,7 @@ protected function queryExternalReferrers($idaction, $actionType, $logAggregator
$where = 'visit_entry_idaction_' . $type . ' = ' . intval($idaction);

$metrics = array(Metrics::INDEX_NB_VISITS);
$data = $logAggregator->queryVisitsByDimension($dimensions, $where, $selects, $metrics, $rankingQuery, false, Config::getInstance()->General['live_query_max_execution_time']);
$data = $logAggregator->queryVisitsByDimension($dimensions, $where, [], $metrics, $rankingQuery, false, Config::getInstance()->General['live_query_max_execution_time']);

$referrerData = array();
$referrerSubData = array();
Expand Down
106 changes: 104 additions & 2 deletions tests/PHPUnit/Integration/DataAccess/LogAggregatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,115 @@ public function setUp(): void
$this->site = new Site($idSite);
$date = Date::factory('2010-03-06');
$this->period = Period\Factory::build('month', $date);
$segment = new Segment('', array($this->site->getId()));

$segment = new Segment('', [$this->site->getId()]);

$params = new Parameters($this->site, $this->period, $segment);
$this->logAggregator = new LogAggregator($params);
}

/**
* @dataProvider getSelectDimensionTestData
*/
public function testGetSelectDimensions($dimensions, $tableName, $expectedResult)
{
$class = new \ReflectionClass(LogAggregator::class);
$method = $class->getMethod('getSelectDimensions');
$method->setAccessible(true);
$output = $method->invoke($this->logAggregator, $dimensions, $tableName);
$this->assertEquals($expectedResult, $output);
}

public function getSelectDimensionTestData(): iterable
{
yield 'normal column names' => [
['column', 'column2'],
'log_visit',
['log_visit.column AS `column`', 'log_visit.column2 AS `column2`']
];

yield 'normal column names with alias' => [
['alias' => 'column', 'alias2' => 'column2'],
'log_conversion',
['log_conversion.column AS `alias`', 'log_conversion.column2 AS `alias2`']
];

yield 'normal column names with and without alias' => [
['alias' => 'column', 'column2'],
'log_conversion',
['log_conversion.column AS `alias`', 'log_conversion.column2 AS `column2`']
];

yield 'column expression' => [
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"],
'log_conversion',
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"]
];

yield 'column expression with alias' => [
['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"],
'log_conversion',
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, '')) AS `alias`"]
];

yield 'mixed dimension content' => [
['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))", 'mycolumn', 'newalias' => 'column2'],
'log_conversion',
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, '')) AS `alias`", 'log_conversion.mycolumn AS `mycolumn`', 'log_conversion.column2 AS `newalias`']
];
}


/**
* @dataProvider getGroupByDimensionTestData
*/
public function testGetGroupByDimensions($dimensions, $tableName, $expectedResult)
{
$class = new \ReflectionClass(LogAggregator::class);
$method = $class->getMethod('getGroupByDimensions');
$method->setAccessible(true);
$output = $method->invoke($this->logAggregator, $dimensions, $tableName);
$this->assertEquals($expectedResult, $output);
}

public function getGroupByDimensionTestData(): iterable
{
yield 'normal column names' => [
['column', 'column2'],
'log_visit',
['log_visit.column', 'log_visit.column2']
];

yield 'normal column names with alias' => [
['alias' => 'column', 'alias2' => 'column2'],
'log_conversion',
['alias', 'alias2']
];

yield 'normal column names with and without alias' => [
['alias' => 'column', 'column2'],
'log_conversion',
['alias', 'log_conversion.column2']
];

yield 'column expression' => [
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"],
'log_conversion',
["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"]
];

yield 'column expression with alias' => [
['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"],
'log_conversion',
['alias']
];

yield 'mixed dimension content' => [
['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))", 'mycolumn', 'newalias' => 'column2'],
'log_conversion',
['alias', 'log_conversion.mycolumn', 'newalias']
];
}

public function test_generateQuery()
{
$query = $this->logAggregator->generateQuery('test, test2', 'log_visit', '1=1', false, '5');
Expand Down