Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix behaviour for segments using "not equals" or "not contains" on an action dimension #16172

Merged
merged 19 commits into from
Jul 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions core/Archive/ArchiveQueryFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public function build($idSites, $strPeriod, $strDate, $strSegment = false, $_res
{
list($websiteIds, $timezone, $idSiteIsAll) = $this->getSiteInfoFromQueryParam($idSites, $_restrictSitesToLogin);
list($allPeriods, $isMultipleDate) = $this->getPeriodInfoFromQueryParam($strDate, $strPeriod, $timezone);
$segment = $this->getSegmentFromQueryParam($strSegment, $websiteIds);
$segment = $this->getSegmentFromQueryParam($strSegment, $websiteIds, $allPeriods);

return $this->factory($segment, $allPeriods, $websiteIds, $idSiteIsAll, $isMultipleDate);
}
Expand Down Expand Up @@ -118,10 +118,13 @@ protected function getPeriodInfoFromQueryParam($strDate, $strPeriod, $timezone)
*
* @param string $strSegment the value of the 'segment' query parameter.
* @param int[] $websiteIds the list of sites being queried.
* @param Period[] $allPeriods list of all periods
* @return Segment
*/
protected function getSegmentFromQueryParam($strSegment, $websiteIds)
protected function getSegmentFromQueryParam($strSegment, $websiteIds, $allPeriods)
{
return new Segment($strSegment, $websiteIds);
// we might have multiple periods, so use the start date of the first one and
// the end date of the last one to limit the possible segment subquery
return new Segment($strSegment, $websiteIds, reset($allPeriods)->getDateTimeStart(), end($allPeriods)->getDateTimeEnd());
}
}
2 changes: 1 addition & 1 deletion core/ArchiveProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -650,7 +650,7 @@ public function processDependentArchive($plugin, $segment)
return;
}

$newSegment = new Segment($newSegment, $idSites);
$newSegment = new Segment($newSegment, $idSites, $params->getDateStart(), $params->getDateEnd());
if (ArchiveProcessor\Rules::isSegmentPreProcessed($idSites, $newSegment)) {
// will be processed anyway
return;
Expand Down
8 changes: 4 additions & 4 deletions core/CronArchive.php
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ private function launchArchivingFor($archives)
$idSite = $archive['idsite'];
$dateStr = $archive['period'] == Range::PERIOD_ID ? ($archive['date1'] . ',' . $archive['date2']) : $archive['date1'];
$period = PeriodFactory::build($this->periodIdsToLabels[$archive['period']], $dateStr);
$params = new Parameters(new Site($idSite), $period, new Segment($segment, [$idSite]));
$params = new Parameters(new Site($idSite), $period, new Segment($segment, [$idSite], $period->getDateStart(), $period->getDateEnd()));

$loader = new Loader($params);
if ($loader->canSkipThisArchive()) {
Expand Down Expand Up @@ -941,7 +941,7 @@ public function invalidateArchivedReportsForSitesThatNeedToBeArchivedAgain()
continue;
}

$params = new Parameters(new Site($idSite), $period, new Segment('', [$idSite]));
$params = new Parameters(new Site($idSite), $period, new Segment('', [$idSite], $period->getDateStart(), $period->getDateEnd()));
if ($this->isThereExistingValidPeriod($params)) {
$this->logger->info(' Found usable archive for custom date range {date} for site {idSite}, skipping archiving.', ['date' => $date, 'idSite' => $idSite]);
continue;
Expand Down Expand Up @@ -989,7 +989,7 @@ private function invalidateRecentDate($dateStr)
$date = Date::factory($dateStr);
$period = PeriodFactory::build('day', $date);

$params = new Parameters(new Site($idSite), $period, new Segment('', [$idSite]));
$params = new Parameters(new Site($idSite), $period, new Segment('', [$idSite], $period->getDateStart(), $period->getDateEnd()));
if ($this->isThereExistingValidPeriod($params, $isYesterday)) {
$this->logger->debug(" Found existing valid archive for $dateStr, skipping invalidation...");
continue;
Expand Down Expand Up @@ -1420,7 +1420,7 @@ private function canSkipArchiveBecauseNoPoint(array $invalidatedArchive)
$dateStr = $periodLabel == 'range' ? ($invalidatedArchive['date1'] . ',' . $invalidatedArchive['date2']) : $invalidatedArchive['date1'];
$period = PeriodFactory::build($periodLabel, $dateStr);

$segment = new Segment($invalidatedArchive['segment'], [$invalidatedArchive['idsite']]);
$segment = new Segment($invalidatedArchive['segment'], [$invalidatedArchive['idsite']], $period->getDateStart(), $period->getDateEnd());

$params = new Parameters($site, $period, $segment);

Expand Down
122 changes: 115 additions & 7 deletions core/Segment.php
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ class Segment
*/
protected $idSites = null;

/**
* @var Date
*/
protected $startDate = null;

/**
* @var Date
*/
protected $endDate = null;

/**
* @var LogQueryBuilder
*/
Expand All @@ -93,12 +103,20 @@ class Segment
/**
* Constructor.
*
* When using segments that contain a != or !@ condition on a non visit dimension (e.g. action, conversion, ...) it
* is needed to use a subquery to get correct results. To avoid subqueries that fetch too many data it's required to
* set a startDate in this case. That date will be used to limit the subquery (along with possibly given idSites or
* endDate). If no startDate is given for such a segment it will generate a query that directly joins the according
* tables, but trigger a php warning as results might be incorrect.
*
* @param string $segmentCondition The segment condition, eg, `'browserCode=ff;countryCode=CA'`.
* @param array $idSites The list of sites the segment will be used with. Some segments are
* dependent on the site, such as goal segments.
* @param Date|null $startDate start date used to limit subqueries
* @param Date|null $endDate end date used to limit subqueries
* @throws
*/
public function __construct($segmentCondition, $idSites)
public function __construct($segmentCondition, $idSites, Date $startDate = null, Date $endDate = null)
{
$this->segmentQueryBuilder = StaticContainer::get('Piwik\DataAccess\LogQueryBuilder');

Expand All @@ -111,6 +129,14 @@ public function __construct($segmentCondition, $idSites)

$this->originalString = $segmentCondition;

if ($startDate instanceof Date) {
$this->startDate = $startDate;
}

if ($endDate instanceof Date) {
$this->endDate = $endDate;
}

// The segment expression can be urlencoded. Unfortunately, both the encoded and decoded versions
// can usually be parsed successfully. To pick the right one, we try both and pick the one w/ more
// successfully parsed subexpressions.
Expand Down Expand Up @@ -195,6 +221,12 @@ protected function initializeSegment($string, $idSites)
$string = substr($string, 0, self::SEGMENT_TRUNCATE_LIMIT);

$this->string = $string;

if (empty($idSites)) {
$idSites = [];
} else if (!is_array($idSites)) {
$idSites = [$idSites];
}
$this->idSites = $idSites;
$segment = new SegmentExpression($string);
$this->segmentExpression = $segment;
Expand All @@ -209,8 +241,7 @@ protected function initializeSegment($string, $idSites)
$cleanedExpressions = array();
foreach ($expressions as $expression) {
$operand = $expression[SegmentExpression::INDEX_OPERAND];
$cleanedExpression = $this->getCleanedExpression($operand);
$expression[SegmentExpression::INDEX_OPERAND] = $cleanedExpression;
$expression[SegmentExpression::INDEX_OPERAND] = $this->getCleanedExpression($operand);
$cleanedExpressions[] = $expression;
}

Expand All @@ -226,7 +257,8 @@ private function getExpressionsWithUnionsResolved($expressions)

$availableSegment = $this->getSegmentByName($name);

if (!empty($availableSegment['unionOfSegments'])) {
// We leave segments using !@ and != operands untouched for segments not on log_visit table as they will be build using a subquery
if (!$this->doesSegmentNeedSubquery($operand[SegmentExpression::INDEX_OPERAND_OPERATOR], $name) && !empty($availableSegment['unionOfSegments'])) {
$count = 0;
foreach ($availableSegment['unionOfSegments'] as $segmentNameOfUnion) {
$count++;
Expand All @@ -252,6 +284,51 @@ private function getExpressionsWithUnionsResolved($expressions)
return $expressionsWithUnions;
}

private function isVisitSegment($name)
{
$availableSegment = $this->getSegmentByName($name);

if (!empty($availableSegment['unionOfSegments'])) {
foreach ($availableSegment['unionOfSegments'] as $segmentNameOfUnion) {
$unionSegment = $this->getSegmentByName($segmentNameOfUnion);
if (strpos($unionSegment['sqlSegment'], 'log_visit.') === 0) {
return true;
}
}
} else if (strpos($availableSegment['sqlSegment'], 'log_visit.') === 0) {
return true;
}

return false;
}

private function doesSegmentNeedSubquery($operator, $segmentName)
{
$requiresSubQuery = in_array($operator, [
SegmentExpression::MATCH_DOES_NOT_CONTAIN,
SegmentExpression::MATCH_NOT_EQUAL
]) && !$this->isVisitSegment($segmentName);

if ($requiresSubQuery && empty($this->startDate)) {
$e = new Exception();
Log::warning("Avoiding segment subquery due to missing start date. Please ensure a start date is set when initializing a segment if it's used to build a query. Stacktrace:\n" . $e->getTraceAsString());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess you mean when start and end date is empty?

created follow up issue so we don't forget to act on it #16200

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry just realise shouldn't have merged this yet as it needs to also check if endDate is empty @sgiehl ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

created #16201

return false;
}

return $requiresSubQuery;
}

private function getInvertedOperatorForSubQuery($operator)
{
if ($operator === SegmentExpression::MATCH_DOES_NOT_CONTAIN) {
return SegmentExpression::MATCH_CONTAINS;
} else if ($operator === SegmentExpression::MATCH_NOT_EQUAL) {
return SegmentExpression::MATCH_EQUAL;
}

throw new Exception("Operator not support for subqueries");
}

/**
* Returns `true` if the segment is empty, `false` if otherwise.
*/
Expand All @@ -277,9 +354,6 @@ public function willBeArchived()
}

$idSites = $this->idSites;
if (!is_array($idSites)) {
$idSites = array($this->idSites);
}

return Rules::isRequestAuthorizedToArchive()
|| Rules::isBrowserArchivingAvailableForSegments()
Expand All @@ -297,6 +371,40 @@ protected function getCleanedExpression($expression)
$segment = $this->getSegmentByName($name);
$sqlName = $segment['sqlSegment'];

// Build subqueries for segments that are not on log_visit table but use !@ or != as operator
// This is required to ensure segments like actionUrl!@value really do not include any visit having an action containing `value`
if ($this->doesSegmentNeedSubquery($matchType, $name)) {
sgiehl marked this conversation as resolved.
Show resolved Hide resolved
$operator = $this->getInvertedOperatorForSubQuery($matchType);
$stringSegment = $name . $operator . $value;
$segmentObj = new Segment($stringSegment, $this->idSites, $this->startDate, $this->endDate);

$select = 'log_visit.idvisit';
$from = 'log_visit';
$datetimeField = 'visit_last_action_time';
$where = [];
$bind = [];
if (!empty($this->idSites)) {
$where[] = "$from.idsite IN (" . Common::getSqlStringFieldsArray($this->idSites) . ")";
$bind = $this->idSites;
}
if ($this->startDate instanceof Date) {
$where[] = "$from.$datetimeField >= ?";
$bind[] = $this->startDate->toString(Date::DATE_TIME_FORMAT);
}
if ($this->endDate instanceof Date) {
$where[] = "$from.$datetimeField <= ?";
$bind[] = $this->endDate->toString(Date::DATE_TIME_FORMAT);
}

$logQueryBuilder = StaticContainer::get('Piwik\DataAccess\LogQueryBuilder');
$forceGroupByBackup = $logQueryBuilder->getForcedInnerGroupBySubselect();
$logQueryBuilder->forceInnerGroupBySubselect(LogQueryBuilder::FORCE_INNER_GROUP_BY_NO_SUBSELECT);
$query = $segmentObj->getSelectQuery($select, $from, implode(' AND ', $where), $bind);
$logQueryBuilder->forceInnerGroupBySubselect($forceGroupByBackup);

return ['log_visit.idvisit', SegmentExpression::MATCH_ACTIONS_NOT_CONTAINS, $query];
}

if ($matchType != SegmentExpression::MATCH_IS_NOT_NULL_NOR_EMPTY
&& $matchType != SegmentExpression::MATCH_IS_NULL_OR_EMPTY) {

Expand Down
11 changes: 10 additions & 1 deletion core/Segment/SegmentExpression.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class SegmentExpression

// Special case, since we look up Page URLs/Page titles in a sub SQL query
const MATCH_ACTIONS_CONTAINS = 'IN';
const MATCH_ACTIONS_NOT_CONTAINS = 'NOTIN';

const INDEX_BOOL_OPERATOR = 0;
const INDEX_OPERAND = 1;
Expand Down Expand Up @@ -289,6 +290,14 @@ protected function getSqlMatchFromDefinition($def, &$availableTables)
$sqlMatch = '%s IN (' . $value['SQL'] . ')';
$value = $value['bind'];
break;
case self::MATCH_ACTIONS_NOT_CONTAINS:
// this match type is not accessible from the outside
// (it won't be matched in self::parseSubExpressions())
// it can be used internally to inject sub-expressions into the query.
// see Segment::getCleanedExpression()
$sqlMatch = '%s NOT IN (' . $value['sql'] . ')';
$value = $value['bind'];
break;
default:
throw new Exception("Filter contains the match type '" . $matchType . "' which is not supported");
break;
Expand All @@ -298,7 +307,7 @@ protected function getSqlMatchFromDefinition($def, &$availableTables)
$alsoMatchNULLValues = $alsoMatchNULLValues && !empty($value);
$sqlMatch = str_replace('%s', $field, $sqlMatch);

if ($matchType === self::MATCH_ACTIONS_CONTAINS
if ($matchType === self::MATCH_ACTIONS_CONTAINS || $matchType === self::MATCH_ACTIONS_NOT_CONTAINS
|| is_null($value)
) {
$sqlExpression = "( $sqlMatch )";
Expand Down
9 changes: 5 additions & 4 deletions plugins/Live/Model.php
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,12 @@ private function getLastMinutesCounterForQuery($idSite, $lastMinutes, $segment,
$now = $now ?: time();

$bind = $idSites;
$bind[] = Date::factory($now - $lastMinutes * 60)->toString('Y-m-d H:i:s');
$startDate = Date::factory($now - $lastMinutes * 60);
$bind[] = $startDate->toString('Y-m-d H:i:s');

$where = $whereIdSites . "AND " . $where;

$segment = new Segment($segment, $idSite);
$segment = new Segment($segment, $idSite, $startDate, $endDate = null);
$query = $segment->getSelectQuery($select, $from, $where, $bind);

$numVisitors = Db::getReader()->fetchOne($query['sql'], $query['bind']);
Expand Down Expand Up @@ -428,7 +429,7 @@ public function queryAdjacentVisitorId($idSite, $visitorId, $visitLastActionTime
$orderBy = "MAX(log_visit.visit_last_action_time) $orderByDir";
$groupBy = "log_visit.idvisitor";

$segment = new Segment($segment, $idSite);
$segment = new Segment($segment, $idSite, $dateOneDayAgo, $dateOneDayInFuture);
$queryInfo = $segment->getSelectQuery($select, $from, $where, $whereBind, $orderBy, $groupBy);

$sql = "SELECT sub.idvisitor, sub.visit_last_action_time FROM ({$queryInfo['sql']}) as sub
Expand Down Expand Up @@ -466,7 +467,7 @@ public function makeLogVisitsQueryString($idSite, $startDate, $endDate, $segment
$filterSortOrder = 'DESC';
}

$segment = new Segment($segment, $idSite);
$segment = new Segment($segment, $idSite, $startDate, $endDate);

// Subquery to use the indexes for ORDER BY
$select = "log_visit.*";
Expand Down
2 changes: 1 addition & 1 deletion plugins/SegmentEditor/SegmentEditor.php
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,12 @@ private function getSegmentIfIsUnprocessed()
if (empty($segment)) {
return null;
}
$segment = new Segment($segment, [$idSite]);

// get period
$date = Common::getRequestVar('date', false);
$periodStr = Common::getRequestVar('period', false);
$period = Period\Factory::build($periodStr, $date);
$segment = new Segment($segment, [$idSite], $period->getDateStart(), $period->getDateEnd());

// check if archiving is enabled. if so, the segment should have been processed.
$isArchivingDisabled = Rules::isArchivingDisabledFor([$idSite], $segment, $period);
Expand Down
2 changes: 1 addition & 1 deletion plugins/Transitions/API.php
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,9 @@ public function getTransitionsForAction($actionName, $actionType, $idSite, $peri
}

// prepare log aggregator
$segment = new Segment($segment, $idSite);
$site = new Site($idSite);
$period = Period\Factory::build($period, $date);
$segment = new Segment($segment, $idSite, $period->getDateStart(), $period->getDateEnd());
$params = new ArchiveProcessor\Parameters($site, $period, $segment);
$logAggregator = new LogAggregator($params);

Expand Down
2 changes: 1 addition & 1 deletion tests/PHPUnit/Integration/Archive/ChunksTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ private function createArchiveProcessorParameters()
{
$oPeriod = PeriodFactory::makePeriodFromQueryParams('UTC', 'day', $this->date);

$segment = new Segment(false, array(1));
$segment = new Segment(false, array(1), $oPeriod->getDateStart(), $oPeriod->getDateEnd());
$params = new Parameters(new Site(1), $oPeriod, $segment);

return $params;
Expand Down
2 changes: 1 addition & 1 deletion tests/PHPUnit/Integration/ArchiveProcessingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ private function _createArchiveProcessor($periodLabel, $dateLabel, $siteTimezone
$site = $this->_createWebsite($siteTimezone);
$date = Date::factory($dateLabel);
$period = Period\Factory::build($periodLabel, $date);
$segment = new Segment('', $site->getId());
$segment = new Segment('', $site->getId(), $period->getDateStart(), $period->getDateEnd());

$params = new ArchiveProcessor\Parameters($site, $period, $segment);
return new ArchiveProcessorTest($params);
Expand Down
Loading