diff --git a/core/Archive.php b/core/Archive.php index 8f3a5fdbfa5..10aca77b8c0 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -234,6 +234,14 @@ public static function factory(Segment $segment, array $periods, array $idSites, $isMultipleDate); } + public static function shouldSkipArchiveIfSkippingSegmentArchiveForToday(Site $site, Period $period, Segment $segment) + { + $now = Date::factory('now', $site->getTimezone()); + return $period->getLabel() === 'day' + && !$segment->isEmpty() + && $period->getDateStart()->toString() === $now->toString(); + } + /** * Queries and returns metric data in an array. * @@ -580,10 +588,8 @@ private function cacheArchiveIdsAfterLaunching($archiveGroups, $plugins) foreach ($this->params->getIdSites() as $idSite) { $site = new Site($idSite); - if ($period->getLabel() === 'day' - && !$this->params->getSegment()->isEmpty() - && Common::getRequestVar('skipArchiveSegmentToday', 0, 'int') - && $period->getDateStart()->toString() === Date::factory('now', $site->getTimezone())->toString() + if (Common::getRequestVar('skipArchiveSegmentToday', 0, 'int') + && self::shouldSkipArchiveIfSkippingSegmentArchiveForToday($site, $period, $this->params->getSegment()) ) { Log::debug("Skipping archive %s for %s as segment today is disabled", $period->getLabel(), $period->getPrettyString()); continue; diff --git a/core/CronArchive.php b/core/CronArchive.php index fd16a424c75..a2335520fb4 100644 --- a/core/CronArchive.php +++ b/core/CronArchive.php @@ -161,13 +161,6 @@ class CronArchive */ public $maxConcurrentArchivers = false; - /** - * If enabled, segments will be only archived for yesterday, but not today. If the segment was created recently, - * then it will still be archived for today and the setting will be ignored for this segment. - * @var bool - */ - public $skipSegmentsToday = false; - private $archivingStartingTime; private $formatter; @@ -312,10 +305,6 @@ public function init() $this->archiveFilter->logFilterInfo($this->logger); } - if ($this->skipSegmentsToday) { - $this->logger->info('Will skip segments archiving for today unless they were created recently'); - } - /** * This event is triggered after a CronArchive instance is initialized. * @@ -526,7 +515,7 @@ private function generateUrlToArchiveFromArchiveInfo($archive) $url = $this->makeRequestUrl($url); if (!empty($segment)) { - $shouldSkipToday = !$this->wasSegmentChangedRecently($segment, + $shouldSkipToday = $this->archiveFilter->isSkipSegmentsForToday() && !$this->wasSegmentChangedRecently($segment, $this->segmentArchiving->getAllSegments()); if ($shouldSkipToday) { diff --git a/core/CronArchive/ArchiveFilter.php b/core/CronArchive/ArchiveFilter.php index 081426e2495..7789ed556b2 100644 --- a/core/CronArchive/ArchiveFilter.php +++ b/core/CronArchive/ArchiveFilter.php @@ -8,11 +8,15 @@ namespace Piwik\CronArchive; +use Piwik\Archive; use Piwik\Container\StaticContainer; use Piwik\Date; +use Piwik\Period\Factory; use Piwik\Period\Factory as PeriodFactory; use Piwik\Piwik; use Piwik\Plugins\SegmentEditor\Model as SegmentEditorModel; +use Piwik\Segment; +use Piwik\Site; use Psr\Log\LoggerInterface; class ArchiveFilter @@ -42,6 +46,13 @@ class ArchiveFilter */ private $periodIdsToLabels; + /** + * If enabled, segments will be only archived for yesterday, but not today. If the segment was created recently, + * then it will still be archived for today and the setting will be ignored for this segment. + * @var bool + */ + private $skipSegmentsForToday = false; + public function __construct() { $this->setRestrictToPeriods(''); @@ -67,6 +78,15 @@ public function filterArchive($archive) } } + if (!empty($this->skipSegmentsForToday)) { + $site = new Site($archive['idsite']); + $period = Factory::build($this->periodIdsToLabels[$archive['period']], $archive['date1']); + $segment = new Segment($segment, [$archive['idsite']]); + if (Archive::shouldSkipArchiveIfSkippingSegmentArchiveForToday($site, $period, $segment)) { + return "skipping segment archives for today"; + } + } + if (!empty($this->restrictToDateRange) && ($this->restrictToDateRange[0]->isLater(Date::factory($archive['date2'])) || $this->restrictToDateRange[1]->isEarlier(Date::factory($archive['date1'])) @@ -89,6 +109,7 @@ public function logFilterInfo(LoggerInterface $logger) { $this->logForcedSegmentInfo($logger); $this->logForcedPeriodInfo($logger); + $this->logSkipSegmentInfo($logger); } private function logForcedSegmentInfo(LoggerInterface $logger) @@ -190,12 +211,24 @@ public function setSegmentsToForce(array $segments) $this->segmentsToForce = $segments; } + public function setSkipSegmentsForToday($skipSegmentsForToday) + { + $this->skipSegmentsForToday = $skipSegmentsForToday; + } + + /** + * @return bool + */ + public function isSkipSegmentsForToday(): bool + { + return $this->skipSegmentsForToday; + } + /** * @return array */ private function getPeriodsToProcess() { - return $this->restrictToPeriods; } @@ -229,4 +262,11 @@ public function setRestrictToPeriods($restrictToPeriods) $this->restrictToPeriods = array_intersect($this->restrictToPeriods, $this->getDefaultPeriodsToProcess()); $this->restrictToPeriods = array_intersect($this->restrictToPeriods, PeriodFactory::getPeriodsEnabledForAPI()); } + + private function logSkipSegmentInfo(LoggerInterface $logger) + { + if ($this->skipSegmentsForToday) { + $logger->info('Will skip segments archiving for today unless they were created recently'); + } + } } \ No newline at end of file diff --git a/plugins/CoreConsole/Commands/CoreArchiver.php b/plugins/CoreConsole/Commands/CoreArchiver.php index 44719e7b64c..6407f4bbe2a 100644 --- a/plugins/CoreConsole/Commands/CoreArchiver.php +++ b/plugins/CoreConsole/Commands/CoreArchiver.php @@ -41,7 +41,6 @@ public static function makeArchiver($url, InputInterface $input) $archiver->dateLastForced = $input->getOption('force-date-last-n'); $archiver->concurrentRequestsPerWebsite = $input->getOption('concurrent-requests-per-website'); $archiver->maxConcurrentArchivers = $input->getOption('concurrent-archivers'); - $archiver->skipSegmentsToday = $input->getOption('skip-segments-today'); $archiver->shouldArchiveAllSites = $input->getOption('force-all-websites'); $archiver->setUrlToPiwik($url); @@ -49,6 +48,7 @@ public static function makeArchiver($url, InputInterface $input) $archiveFilter->setDisableSegmentsArchiving($input->getOption('skip-all-segments')); $archiveFilter->setRestrictToDateRange($input->getOption("force-date-range")); $archiveFilter->setRestrictToPeriods($input->getOption("force-periods")); + $archiveFilter->setSkipSegmentsForToday($input->getOption('skip-segments-today')); $segmentIds = $input->getOption('force-idsegments'); $segmentIds = explode(',', $segmentIds); diff --git a/tests/PHPUnit/Integration/CronArchive/ArchiveFilterTest.php b/tests/PHPUnit/Integration/CronArchive/ArchiveFilterTest.php index 53a9b684491..3331cbc881c 100644 --- a/tests/PHPUnit/Integration/CronArchive/ArchiveFilterTest.php +++ b/tests/PHPUnit/Integration/CronArchive/ArchiveFilterTest.php @@ -11,6 +11,7 @@ use Piwik\ArchiveProcessor\Rules; use Piwik\CronArchive\ArchiveFilter; +use Piwik\Date; use Piwik\Plugins\SegmentEditor\API as SegmentAPI; use Piwik\Tests\Framework\Fixture; use Piwik\Tests\Framework\TestCase\IntegrationTestCase; @@ -34,6 +35,28 @@ public function test_setSegmentsToForceFromSegmentIds_CorrectlyGetsSegmentDefini $this->assertEquals($expectedSegments, array_values($cronarchive->getSegmentsToForce())); } + public function test_filterArchive_filtersSegmentArchivesForToday_IfSkippingSegmentsForToday() + { + Date::$now = strtotime('2020-03-04 04:05:06'); + + Fixture::createWebsite('2014-12-12 00:01:02', 0, false, false, 1, null, null, 'America/Los_Angeles'); + + $cronarchive = new ArchiveFilter(); + $cronarchive->setSkipSegmentsForToday(true); + + $result = $cronarchive->filterArchive(['idsite' => 1, 'period' => 1, 'date1' => '2020-03-04', 'segment' => 'browserCode==IE']); + $this->assertFalse($result); + + $result = $cronarchive->filterArchive(['idsite' => 1, 'period' => 1, 'date1' => '2020-03-03', 'segment' => 'browserCode==IE']); + $this->assertEquals('skipping segment archives for today', $result); + + $result = $cronarchive->filterArchive(['idsite' => 1, 'period' => 1, 'date1' => '2020-03-02', 'segment' => 'browserCode==IE']); + $this->assertFalse($result); + + $result = $cronarchive->filterArchive(['idsite' => 1, 'period' => 2, 'date1' => '2020-03-03', 'segment' => 'browserCode==IE']); + $this->assertFalse($result); + } + public function test_filterArchive_filtersSegmentArchives_IfSegmentArchivingIsDisabled() { $filter = new ArchiveFilter(); diff --git a/tests/PHPUnit/Integration/CronArchive/QueueConsumerTest.php b/tests/PHPUnit/Integration/CronArchive/QueueConsumerTest.php index bffdb0796a4..450648e030a 100644 --- a/tests/PHPUnit/Integration/CronArchive/QueueConsumerTest.php +++ b/tests/PHPUnit/Integration/CronArchive/QueueConsumerTest.php @@ -354,7 +354,136 @@ public function test_invalidateConsumeOrder() $this->assertEquals($uniqueInvalidationDescs, $invalidationDescs, "Found duplicate archives being processed."); } - private function makeTestArchiveFilter($restrictToDateRange = null, $restrictToPeriods = null, $segmentsToForce = null, $disableSegmentsArchiving = false) + public function test_skipSegmentsToday() + { + Date::$now = strtotime('2018-03-04 01:00:00'); + + Fixture::createWebsite('2015-02-03'); + + Rules::setBrowserTriggerArchiving(false); + API::getInstance()->add('testegment', 'browserCode==IE', false, true); + API::getInstance()->add('testegment', 'browserCode==FF', false, true); + Rules::setBrowserTriggerArchiving(true); + + // force archiving so we don't skip those without visits + Piwik::addAction('Archiving.getIdSitesToArchiveWhenNoVisits', function (&$idSites) { + $idSites[] = 1; + }); + + $cronArchive = new CronArchive(); + $cronArchive->init(); + + $archiveFilter = $this->makeTestArchiveFilter(null, null, null, false, true); + + $queueConsumer = new QueueConsumer( + StaticContainer::get(LoggerInterface::class), + new FixedSiteIds([1]), + 3, + 24, + new Model(), + new SegmentArchiving('beginning_of_time'), + $cronArchive, + new RequestParser(true), + $archiveFilter + ); + + $segmentHash1 = (new Segment('browserCode==IE', [1]))->getHash(); + $segmentHash2 = (new Segment('browserCode==FF', [1]))->getHash(); + + $invalidations = [ + ['idarchive' => 1, 'name' => 'done' . $segmentHash1, 'idsite' => 1, 'date1' => '2018-03-04', 'date2' => '2018-03-04', 'period' => 1, 'report' => null], + ['idarchive' => 1, 'name' => 'done' . $segmentHash2, 'idsite' => 1, 'date1' => '2018-03-04', 'date2' => '2018-03-04', 'period' => 1, 'report' => null], + ['idarchive' => 1, 'name' => 'done' . $segmentHash1, 'idsite' => 1, 'date1' => '2018-03-03', 'date2' => '2018-03-03', 'period' => 1, 'report' => null], + ['idarchive' => 1, 'name' => 'done' . $segmentHash2 . '.ExamplePlugin', 'idsite' => 1, 'date1' => '2018-03-04', 'date2' => '2018-03-04', 'period' => 1, 'report' => null], + ['idarchive' => 1, 'name' => 'done' . $segmentHash1, 'idsite' => 1, 'date1' => '2018-03-01', 'date2' => '2018-03-31', 'period' => 3, 'report' => null], + ['idarchive' => 1, 'name' => 'done', 'idsite' => 1, 'date1' => '2018-03-04', 'date2' => '2018-03-04', 'period' => 1, 'report' => null], + ]; + shuffle($invalidations); + + $this->insertInvalidations($invalidations); + + $iteratedInvalidations = []; + while (true) { + $next = $queueConsumer->getNextArchivesToProcess(); + if ($next === null) { + break; + } + + foreach ($next as &$item) { + Db::query("UPDATE " . Common::prefixTable('archive_invalidations') . " SET status = 1 WHERE idinvalidation = ?", [$item['idinvalidation']]); + + unset($item['periodObj']); + unset($item['idinvalidation']); + } + + $iteratedInvalidations[] = $next; + } + + $expectedInvalidationsFound = [ + array ( + array ( + 'idarchive' => '1', + 'idsite' => '1', + 'date1' => '2018-03-04', + 'date2' => '2018-03-04', + 'period' => '1', + 'name' => 'done', + 'report' => NULL, + 'plugin' => NULL, + 'segment' => '', + ), + array ( + 'idarchive' => '1', + 'idsite' => '1', + 'date1' => '2018-03-03', + 'date2' => '2018-03-03', + 'period' => '1', + 'name' => 'done5f4f9bafeda3443c3c2d4b2ef4dffadc', + 'report' => NULL, + 'plugin' => NULL, + 'segment' => 'browserCode==IE', + ), + ), + array ( + 0 => + array ( + 'idarchive' => '1', + 'idsite' => '1', + 'date1' => '2018-03-01', + 'date2' => '2018-03-31', + 'period' => '3', + 'name' => 'done5f4f9bafeda3443c3c2d4b2ef4dffadc', + 'report' => NULL, + 'plugin' => NULL, + 'segment' => 'browserCode==IE', + ), + ), + array (// end of idsite=1 + ), + ]; + + try { + $this->assertEquals($expectedInvalidationsFound, $iteratedInvalidations); + } catch (\Exception $ex) { + print "\nInvalidations inserted:\n" . var_export($invalidations, true) . "\n"; + throw $ex; + } + + // automated check for no duplicates + $invalidationDescs = []; + foreach ($iteratedInvalidations as $group) { + foreach ($group as $invalidation) { + unset($invalidation['idarchive']); + $invalidationDescs[] = implode('.', $invalidation); + } + } + $uniqueInvalidationDescs = array_unique($invalidationDescs); + + $this->assertEquals($uniqueInvalidationDescs, $invalidationDescs, "Found duplicate archives being processed."); + } + + private function makeTestArchiveFilter($restrictToDateRange = null, $restrictToPeriods = null, $segmentsToForce = null, + $disableSegmentsArchiving = false, $skipSegmentsToday = false) { $archiveFilter = new CronArchive\ArchiveFilter(); if ($restrictToDateRange) { @@ -367,6 +496,9 @@ private function makeTestArchiveFilter($restrictToDateRange = null, $restrictToP if ($segmentsToForce) { $archiveFilter->setSegmentsToForceFromSegmentIds($segmentsToForce); } + if ($skipSegmentsToday) { + $archiveFilter->setSkipSegmentsForToday(true); + } return $archiveFilter; } diff --git a/tests/PHPUnit/Integration/CronArchiveTest.php b/tests/PHPUnit/Integration/CronArchiveTest.php index 2246dca80fd..8fde5fde7f1 100644 --- a/tests/PHPUnit/Integration/CronArchiveTest.php +++ b/tests/PHPUnit/Integration/CronArchiveTest.php @@ -226,7 +226,9 @@ public function test_skipSegmentsToday() $archiver = new CronArchive(null, $logger); $archiver->init(); - $archiver->skipSegmentsToday = true; + $archiveFilter = new CronArchive\ArchiveFilter(); + $archiveFilter->setSkipSegmentsForToday(true); + $archiver->setArchiveFilter($archiveFilter); $archiver->shouldArchiveAllSites = true; $archiver->shouldArchiveAllPeriodsSince = true; $archiver->init();