From 66a2e2ddf66c72a87a83a23bd52b4272ea5d8d8e Mon Sep 17 00:00:00 2001 From: sgiehl Date: Thu, 5 Oct 2023 16:39:53 +0200 Subject: [PATCH 1/2] Fix problem with unprefixed order by columns in select queries --- core/DataAccess/LogAggregator.php | 81 +++++++++++++++++++++---------- plugins/Transitions/API.php | 12 ++--- 2 files changed, 60 insertions(+), 33 deletions(-) diff --git a/core/DataAccess/LogAggregator.php b/core/DataAccess/LogAggregator.php index 6bfead967c9..a09988180df 100644 --- a/core/DataAccess/LogAggregator.php +++ b/core/DataAccess/LogAggregator.php @@ -659,45 +659,74 @@ protected function getDimensionsToSelect($dimensions, $additionalSelects) } /** - * Returns the dimensions array, where - * (1) the table name is prepended to the field - * (2) the "AS `label` " is appended to the field + * Returns an array of select expressions based on the provided dimensions array + * Each dimension will be prefixed with the table name, if it's not an expression and will be alias + * with the dimension name or an custom alias if one was provided as array key. * - * @param $dimensions - * @param $tableName - * @param bool $appendSelectAs - * @return mixed + * @param array $dimensions An array of dimensions, where an alias can be provided as key + * @param string $tableName + * @return array */ - protected function getSelectDimensions($dimensions, $tableName, $appendSelectAs = true) + protected function getSelectDimensions(array $dimensions, string $tableName): array { - foreach ($dimensions as $selectAs => &$field) { - $selectAsString = $field; + $selectDimensions = []; - if (!is_numeric($selectAs)) { - $selectAsString = $selectAs; - } else if ($this->isFieldFunctionOrComplexExpression($field)) { - // if complex expression has a select as, use it - if (!$appendSelectAs && preg_match('/\s+AS\s+(.*?)\s*$/', $field, $matches)) { - $field = $matches[1]; - continue; - } + foreach ($dimensions as $selectAs => $field) { - // if function w/o select as, do not alias or prefix - $selectAsString = $appendSelectAs = false; + if ($this->isFieldFunctionOrComplexExpression($field) && is_numeric($selectAs)) { + // an expression or field function without an alias should be used as is + $selectDimensions[] = $field; + continue; } - $isKnownField = !in_array($field, array('referrer_data')); + $selectAlias = !is_numeric($selectAs) ? $selectAs : $field; - if ($selectAsString == $field && $isKnownField) { + if (!$this->isFieldFunctionOrComplexExpression($field)) { + // prefix field name with table if it's not an expression $field = $this->prefixColumn($field, $tableName); } - if ($appendSelectAs && $selectAsString) { - $field = $this->prefixColumn($field, $tableName) . $this->getSelectAliasAs($selectAsString); + // append " AS alias" + $field .= $this->getSelectAliasAs($selectAlias); + $selectDimensions[] = $field; + } + + return $selectDimensions; + } + + /** + * Returns an array of fields to be used in an grouped by statement. + * For that either the alias, the field expression or prefixed column name of the provided dimensions will be used. + * + * @param array $dimensions An array of dimensions, where an alias can be provided as key + * @param string $tableName + * @return array + */ + protected function getGroupByDimensions(array $dimensions, string $tableName): array + { + $orderByDimensions = []; + + foreach ($dimensions as $selectAs => $field) { + if (!is_numeric($selectAs)) { + $orderByDimensions[] = $selectAs; + continue; + } + + if ($this->isFieldFunctionOrComplexExpression($field)) { + // if complex expression has a select as, use it + if (preg_match('/\s+AS\s+(.*?)\s*$/', $field, $matches)) { + $orderByDimensions[] = $matches[1]; + continue; + } + + $orderByDimensions[] = $field; + continue; } + + $orderByDimensions[] = $this->prefixColumn($field, $tableName); } - return $dimensions; + return $orderByDimensions; } /** @@ -755,7 +784,7 @@ public function getWhereStatement($tableName, $datetimeField, $extraWhere = fals protected function getGroupByStatement($dimensions, $tableName) { - $dimensions = $this->getSelectDimensions($dimensions, $tableName, $appendSelectAs = false); + $dimensions = $this->getGroupByDimensions($dimensions, $tableName); $groupBy = implode(", ", $dimensions); return $groupBy; diff --git a/plugins/Transitions/API.php b/plugins/Transitions/API.php index 35e996451cf..eafe6fcbf6a 100644 --- a/plugins/Transitions/API.php +++ b/plugins/Transitions/API.php @@ -351,7 +351,7 @@ protected function queryFollowingActions($idaction, $actionType, LogAggregator $ protected function queryExternalReferrers($idaction, $actionType, $logAggregator, $limitBeforeGrouping = 0) { - $rankingQuery = new RankingQuery($limitBeforeGrouping ? $limitBeforeGrouping: $this->limitBeforeGrouping); + $rankingQuery = new RankingQuery($limitBeforeGrouping ?: $this->limitBeforeGrouping); $rankingQuery->setOthersLabel('Others'); // we generate a single column that contains the interesting data for each referrer. @@ -360,16 +360,14 @@ protected function queryExternalReferrers($idaction, $actionType, $logAggregator // group by. when we group by both, we don't get a single column for the keyword but instead // one column per keyword + search engine url. this way, we could not get the top keywords using // the ranking query. - $dimensions = array('referrer_data', 'referer_type'); - $rankingQuery->addLabelColumn('referrer_data'); - $selects = array( - 'CASE log_visit.referer_type + $dimensions = array('referrer_data' => 'CASE log_visit.referer_type WHEN ' . Common::REFERRER_TYPE_DIRECT_ENTRY . ' THEN \'\' WHEN ' . Common::REFERRER_TYPE_SEARCH_ENGINE . ' THEN log_visit.referer_keyword WHEN ' . Common::REFERRER_TYPE_SOCIAL_NETWORK . ' THEN log_visit.referer_name WHEN ' . Common::REFERRER_TYPE_WEBSITE . ' THEN log_visit.referer_url WHEN ' . Common::REFERRER_TYPE_CAMPAIGN . ' THEN CONCAT(log_visit.referer_name, \' \', log_visit.referer_keyword) - END AS `referrer_data`'); + END', 'referer_type'); + $rankingQuery->addLabelColumn('referrer_data'); // get one limited group per referrer type $rankingQuery->partitionResultIntoMultipleGroups('referer_type', array( @@ -384,7 +382,7 @@ protected function queryExternalReferrers($idaction, $actionType, $logAggregator $where = 'visit_entry_idaction_' . $type . ' = ' . intval($idaction); $metrics = array(Metrics::INDEX_NB_VISITS); - $data = $logAggregator->queryVisitsByDimension($dimensions, $where, $selects, $metrics, $rankingQuery, false, Config::getInstance()->General['live_query_max_execution_time']); + $data = $logAggregator->queryVisitsByDimension($dimensions, $where, [], $metrics, $rankingQuery, false, Config::getInstance()->General['live_query_max_execution_time']); $referrerData = array(); $referrerSubData = array(); From 39a3cbeebdef265d93bdd75018a9c1154655b4d8 Mon Sep 17 00:00:00 2001 From: sgiehl Date: Thu, 5 Oct 2023 17:39:49 +0200 Subject: [PATCH 2/2] add some tests for changed methods --- .../DataAccess/LogAggregatorTest.php | 106 +++++++++++++++++- 1 file changed, 104 insertions(+), 2 deletions(-) diff --git a/tests/PHPUnit/Integration/DataAccess/LogAggregatorTest.php b/tests/PHPUnit/Integration/DataAccess/LogAggregatorTest.php index ee4372e95d5..92ffd994ab6 100644 --- a/tests/PHPUnit/Integration/DataAccess/LogAggregatorTest.php +++ b/tests/PHPUnit/Integration/DataAccess/LogAggregatorTest.php @@ -58,13 +58,115 @@ public function setUp(): void $this->site = new Site($idSite); $date = Date::factory('2010-03-06'); $this->period = Period\Factory::build('month', $date); - $segment = new Segment('', array($this->site->getId())); - + $segment = new Segment('', [$this->site->getId()]); $params = new Parameters($this->site, $this->period, $segment); $this->logAggregator = new LogAggregator($params); } + /** + * @dataProvider getSelectDimensionTestData + */ + public function testGetSelectDimensions($dimensions, $tableName, $expectedResult) + { + $class = new \ReflectionClass(LogAggregator::class); + $method = $class->getMethod('getSelectDimensions'); + $method->setAccessible(true); + $output = $method->invoke($this->logAggregator, $dimensions, $tableName); + $this->assertEquals($expectedResult, $output); + } + + public function getSelectDimensionTestData(): iterable + { + yield 'normal column names' => [ + ['column', 'column2'], + 'log_visit', + ['log_visit.column AS `column`', 'log_visit.column2 AS `column2`'] + ]; + + yield 'normal column names with alias' => [ + ['alias' => 'column', 'alias2' => 'column2'], + 'log_conversion', + ['log_conversion.column AS `alias`', 'log_conversion.column2 AS `alias2`'] + ]; + + yield 'normal column names with and without alias' => [ + ['alias' => 'column', 'column2'], + 'log_conversion', + ['log_conversion.column AS `alias`', 'log_conversion.column2 AS `column2`'] + ]; + + yield 'column expression' => [ + ["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"], + 'log_conversion', + ["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"] + ]; + + yield 'column expression with alias' => [ + ['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"], + 'log_conversion', + ["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, '')) AS `alias`"] + ]; + + yield 'mixed dimension content' => [ + ['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))", 'mycolumn', 'newalias' => 'column2'], + 'log_conversion', + ["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, '')) AS `alias`", 'log_conversion.mycolumn AS `mycolumn`', 'log_conversion.column2 AS `newalias`'] + ]; + } + + + /** + * @dataProvider getGroupByDimensionTestData + */ + public function testGetGroupByDimensions($dimensions, $tableName, $expectedResult) + { + $class = new \ReflectionClass(LogAggregator::class); + $method = $class->getMethod('getGroupByDimensions'); + $method->setAccessible(true); + $output = $method->invoke($this->logAggregator, $dimensions, $tableName); + $this->assertEquals($expectedResult, $output); + } + + public function getGroupByDimensionTestData(): iterable + { + yield 'normal column names' => [ + ['column', 'column2'], + 'log_visit', + ['log_visit.column', 'log_visit.column2'] + ]; + + yield 'normal column names with alias' => [ + ['alias' => 'column', 'alias2' => 'column2'], + 'log_conversion', + ['alias', 'alias2'] + ]; + + yield 'normal column names with and without alias' => [ + ['alias' => 'column', 'column2'], + 'log_conversion', + ['alias', 'log_conversion.column2'] + ]; + + yield 'column expression' => [ + ["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"], + 'log_conversion', + ["CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"] + ]; + + yield 'column expression with alias' => [ + ['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))"], + 'log_conversion', + ['alias'] + ]; + + yield 'mixed dimension content' => [ + ['alias' => "CONCAT(log_visit.config_os, ';', COALESCE(log_visit.config_os_version, ''))", 'mycolumn', 'newalias' => 'column2'], + 'log_conversion', + ['alias', 'log_conversion.mycolumn', 'newalias'] + ]; + } + public function test_generateQuery() { $query = $this->logAggregator->generateQuery('test, test2', 'log_visit', '1=1', false, '5');