From b644addbef13cca5fe060fe5c7b2381b87c6170b Mon Sep 17 00:00:00 2001 From: David Roberts Date: Sun, 19 May 2019 10:32:18 +0100 Subject: [PATCH] Revert "[7.2][ML] Improve hard_limit audit message (#486) (#487)" This reverts commit 7a782a9715e36711740c78724f490e1516552973. --- docs/CHANGELOG.asciidoc | 2 - include/model/CResourceMonitor.h | 15 ++----- lib/api/CModelSizeStatsJsonWriter.cc | 20 +++++----- lib/api/unittest/CJsonOutputWriterTest.cc | 27 +++++-------- .../unittest/CModelSnapshotJsonWriterTest.cc | 10 +---- lib/model/CResourceMonitor.cc | 39 +++++-------------- 6 files changed, 35 insertions(+), 78 deletions(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 454d67135b..f66937e080 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -44,8 +44,6 @@ to the model. (See {ml-pull}214[#214].) * Stop linking to libcrypt on Linux. (See {ml-pull}480[#480].) -* Improvements to hard_limit audit message. (See {ml-pull}486[#486].) - === Bug Fixes * Handle NaNs when detrending seasonal components. {ml-pull}408[#408] diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index b6088d99de..f1c3077d64 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -34,15 +34,12 @@ class MODEL_EXPORT CResourceMonitor { public: struct MODEL_EXPORT SResults { std::size_t s_Usage; - std::size_t s_AdjustedUsage; std::size_t s_ByFields; std::size_t s_PartitionFields; std::size_t s_OverFields; std::size_t s_AllocationFailures; model_t::EMemoryStatus s_MemoryStatus; core_t::TTime s_BucketStartTime; - std::size_t s_BytesExceeded; - std::size_t s_BytesMemoryLimit; }; public: @@ -68,6 +65,10 @@ class MODEL_EXPORT CResourceMonitor { //! taking up too much memory and further allocations should be banned bool areAllocationsAllowed() const; + //! Query the resource monitor to found out if it's Ok to + //! create structures of a certain size + bool areAllocationsAllowed(std::size_t size) const; + //! Return the amount of remaining space for allocations std::size_t allocationLimit() const; @@ -163,11 +164,6 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Adjusts the amount of memory reported to take into - //! account the current value of the byte limit margin and the effects - //! of background persistence. - std::size_t adjustedUsage(std::size_t usage) const; - private: //! The registered collection of components TDetectorPtrSizeUMap m_Detectors; @@ -230,9 +226,6 @@ class MODEL_EXPORT CResourceMonitor { //! Don't do any sort of memory checking if this is set bool m_NoLimit; - //! The number of bytes over the high limit for memory usage at the last allocation failure - std::size_t m_CurrentBytesExceeded; - //! Test friends friend class ::CResourceMonitorTest; friend class ::CResourceLimitTest; diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 0959812912..ff2b94286d 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -16,8 +16,6 @@ namespace { const std::string JOB_ID("job_id"); const std::string MODEL_SIZE_STATS("model_size_stats"); const std::string MODEL_BYTES("model_bytes"); -const std::string MODEL_BYTES_EXCEEDED("model_bytes_exceeded"); -const std::string MODEL_BYTES_MEMORY_LIMIT("model_bytes_memory_limit"); const std::string TOTAL_BY_FIELD_COUNT("total_by_field_count"); const std::string TOTAL_OVER_FIELD_COUNT("total_over_field_count"); const std::string TOTAL_PARTITION_FIELD_COUNT("total_partition_field_count"); @@ -35,15 +33,17 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.String(JOB_ID); writer.String(jobId); - writer.String(MODEL_BYTES); - writer.Uint64(results.s_AdjustedUsage); - - writer.String(MODEL_BYTES_EXCEEDED); - writer.Uint64(results.s_BytesExceeded); - - writer.String(MODEL_BYTES_MEMORY_LIMIT); - writer.Uint64(results.s_BytesMemoryLimit); + // Background persist causes the memory size to double due to copying + // the models. On top of that, after the persist is done we may not + // be able to retrieve that memory back. Thus, we report twice the + // memory usage in order to allow for that. + // See https://github.com/elastic/x-pack-elasticsearch/issues/1020. + // Issue https://github.com/elastic/x-pack-elasticsearch/issues/857 + // discusses adding an option to perform only foreground persist. + // If that gets implemented, we should only double when background + // persist is configured. + writer.Uint64(results.s_Usage * 2); writer.String(TOTAL_BY_FIELD_COUNT); writer.Uint64(results.s_ByFields); diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 741e45d68a..135f9267fa 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1555,15 +1555,12 @@ void CJsonOutputWriterTest::testReportMemoryUsage() { ml::model::CResourceMonitor::SResults resourceUsage; resourceUsage.s_Usage = 1; - resourceUsage.s_AdjustedUsage = 2; - resourceUsage.s_ByFields = 3; - resourceUsage.s_PartitionFields = 4; - resourceUsage.s_OverFields = 5; - resourceUsage.s_AllocationFailures = 6; + resourceUsage.s_ByFields = 2; + resourceUsage.s_PartitionFields = 3; + resourceUsage.s_OverFields = 4; + resourceUsage.s_AllocationFailures = 5; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_BucketStartTime = 7; - resourceUsage.s_BytesExceeded = 8; - resourceUsage.s_BytesMemoryLimit = 9; + resourceUsage.s_BucketStartTime = 6; writer.reportMemoryUsage(resourceUsage); writer.endOutputBatch(false, 1ul); @@ -1583,15 +1580,15 @@ void CJsonOutputWriterTest::testReportMemoryUsage() { CPPUNIT_ASSERT(sizeStats.HasMember("model_bytes")); CPPUNIT_ASSERT_EQUAL(2, sizeStats["model_bytes"].GetInt()); CPPUNIT_ASSERT(sizeStats.HasMember("total_by_field_count")); - CPPUNIT_ASSERT_EQUAL(3, sizeStats["total_by_field_count"].GetInt()); + CPPUNIT_ASSERT_EQUAL(2, sizeStats["total_by_field_count"].GetInt()); CPPUNIT_ASSERT(sizeStats.HasMember("total_partition_field_count")); - CPPUNIT_ASSERT_EQUAL(4, sizeStats["total_partition_field_count"].GetInt()); + CPPUNIT_ASSERT_EQUAL(3, sizeStats["total_partition_field_count"].GetInt()); CPPUNIT_ASSERT(sizeStats.HasMember("total_over_field_count")); - CPPUNIT_ASSERT_EQUAL(5, sizeStats["total_over_field_count"].GetInt()); + CPPUNIT_ASSERT_EQUAL(4, sizeStats["total_over_field_count"].GetInt()); CPPUNIT_ASSERT(sizeStats.HasMember("bucket_allocation_failures_count")); - CPPUNIT_ASSERT_EQUAL(6, sizeStats["bucket_allocation_failures_count"].GetInt()); + CPPUNIT_ASSERT_EQUAL(5, sizeStats["bucket_allocation_failures_count"].GetInt()); CPPUNIT_ASSERT(sizeStats.HasMember("timestamp")); - CPPUNIT_ASSERT_EQUAL(7000, sizeStats["timestamp"].GetInt()); + CPPUNIT_ASSERT_EQUAL(6000, sizeStats["timestamp"].GetInt()); CPPUNIT_ASSERT(sizeStats.HasMember("memory_status")); CPPUNIT_ASSERT_EQUAL(std::string("hard_limit"), std::string(sizeStats["memory_status"].GetString())); @@ -1599,10 +1596,6 @@ void CJsonOutputWriterTest::testReportMemoryUsage() { int64_t nowMs = ml::core::CTimeUtils::now() * 1000ll; CPPUNIT_ASSERT(nowMs >= sizeStats["log_time"].GetInt64()); CPPUNIT_ASSERT(nowMs + 1000ll >= sizeStats["log_time"].GetInt64()); - CPPUNIT_ASSERT(sizeStats.HasMember("model_bytes_exceeded")); - CPPUNIT_ASSERT_EQUAL(8, sizeStats["model_bytes_exceeded"].GetInt()); - CPPUNIT_ASSERT(sizeStats.HasMember("model_bytes_memory_limit")); - CPPUNIT_ASSERT_EQUAL(9, sizeStats["model_bytes_memory_limit"].GetInt()); } void CJsonOutputWriterTest::testWriteScheduledEvent() { diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index d0002aabef..9d493ea703 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -33,15 +33,12 @@ void CModelSnapshotJsonWriterTest::testWrite() { { model::CResourceMonitor::SResults modelSizeStats{ 10000, // bytes used - 20000, // bytes used (adjusted) 3, // # by fields 1, // # partition fields 150, // # over fields 4, // # allocation failures model_t::E_MemoryStatusOk, // memory status - core_t::TTime(1521046309), // bucket start time - 0, // model bytes exceeded - 50000 // model bytes memory limit + core_t::TTime(1521046309) // bucket start time }; CModelSnapshotJsonWriter::SModelSnapshotReport report{ @@ -117,11 +114,6 @@ void CModelSnapshotJsonWriterTest::testWrite() { CPPUNIT_ASSERT(modelSizeStats.HasMember("timestamp")); CPPUNIT_ASSERT_EQUAL(int64_t(1521046309000), modelSizeStats["timestamp"].GetInt64()); CPPUNIT_ASSERT(modelSizeStats.HasMember("log_time")); - CPPUNIT_ASSERT(modelSizeStats.HasMember("model_bytes_exceeded")); - CPPUNIT_ASSERT_EQUAL(int64_t(0), modelSizeStats["model_bytes_exceeded"].GetInt64()); - CPPUNIT_ASSERT(modelSizeStats.HasMember("model_bytes_memory_limit")); - CPPUNIT_ASSERT_EQUAL(int64_t(50000), - modelSizeStats["model_bytes_memory_limit"].GetInt64()); CPPUNIT_ASSERT(snapshot.HasMember("quantiles")); const rapidjson::Value& quantiles = snapshot["quantiles"]; diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 842bd9866c..29ddb502b0 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -35,8 +35,7 @@ CResourceMonitor::CResourceMonitor(double byteLimitMargin) m_HasPruningStarted(false), m_PruneThreshold(0), m_LastPruneTime(0), m_PruneWindow(std::numeric_limits::max()), m_PruneWindowMaximum(std::numeric_limits::max()), - m_PruneWindowMinimum(std::numeric_limits::max()), - m_NoLimit(false), m_CurrentBytesExceeded(0) { + m_PruneWindowMinimum(std::numeric_limits::max()), m_NoLimit(false) { this->updateMemoryLimitsAndPruneThreshold(DEFAULT_MEMORY_LIMIT_MB); } @@ -109,21 +108,18 @@ void CResourceMonitor::refresh(CAnomalyDetector& detector) { void CResourceMonitor::forceRefresh(CAnomalyDetector& detector) { this->memUsage(&detector); - + core::CProgramCounters::counter(counter_t::E_TSADMemoryUsage) = this->totalMemory(); + LOG_TRACE(<< "Checking allocations: currently at " << this->totalMemory()); this->updateAllowAllocations(); } void CResourceMonitor::updateAllowAllocations() { std::size_t total{this->totalMemory()}; - core::CProgramCounters::counter(counter_t::E_TSADMemoryUsage) = total; - LOG_TRACE(<< "Checking allocations: currently at " << total); if (m_AllowAllocations) { if (total > this->highLimit()) { LOG_INFO(<< "Over current allocation high limit. " << total << " bytes used, the limit is " << this->highLimit()); m_AllowAllocations = false; - std::size_t bytesExceeded{total - this->highLimit()}; - m_CurrentBytesExceeded = this->adjustedUsage(bytesExceeded); } } else if (total < this->lowLimit()) { LOG_INFO(<< "Below current allocation low limit. " << total @@ -208,6 +204,13 @@ bool CResourceMonitor::areAllocationsAllowed() const { return m_AllowAllocations; } +bool CResourceMonitor::areAllocationsAllowed(std::size_t size) const { + if (m_AllowAllocations) { + return this->totalMemory() + size < this->highLimit(); + } + return false; +} + std::size_t CResourceMonitor::allocationLimit() const { return this->highLimit() - std::min(this->highLimit(), this->totalMemory()); } @@ -265,9 +268,6 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi res.s_OverFields = 0; res.s_PartitionFields = 0; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); - res.s_BytesMemoryLimit = 2 * m_ByteLimitHigh; - res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_AllocationFailures = 0; res.s_MemoryStatus = m_MemoryStatus; res.s_BucketStartTime = bucketStartTime; @@ -281,25 +281,6 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi return res; } -std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { - // Background persist causes the memory size to double due to copying - // the models. On top of that, after the persist is done we may not - // be able to retrieve that memory back. Thus, we report twice the - // memory usage in order to allow for that. - // See https://github.com/elastic/x-pack-elasticsearch/issues/1020. - // Issue https://github.com/elastic/x-pack-elasticsearch/issues/857 - // discusses adding an option to perform only foreground persist. - // If that gets implemented, we should only double when background - // persist is configured. - - // We also scale the reported memory usage by the inverse of the byte limit margin. - // This gives the user a fairer indication of how close the job is to hitting - // the model memory limit in a concise manner (as the limit is scaled down by - // the margin during the beginning period of the job's existence). - size_t adjustedUsage = static_cast(2 * usage / m_ByteLimitMargin); - return adjustedUsage; -} - void CResourceMonitor::acceptAllocationFailureResult(core_t::TTime time) { m_MemoryStatus = model_t::E_MemoryStatusHardLimit; ++m_AllocationFailures[time];