Skip to content

Commit

Permalink
[ML] Adding failed_category_count to model_size_stats (#55761)
Browse files Browse the repository at this point in the history
The failed_category_count statistic records the number of times
categorization wanted to create a new category but couldn't
because the job had reached its model_memory_limit.

Backport of #55716
  • Loading branch information
droberts195 authored Apr 25, 2020
1 parent ad54cca commit 3ba44a5
Show file tree
Hide file tree
Showing 13 changed files with 89 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public class ModelSizeStats implements ToXContentObject {
public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count");
public static final ParseField RARE_CATEGORY_COUNT_FIELD = new ParseField("rare_category_count");
public static final ParseField DEAD_CATEGORY_COUNT_FIELD = new ParseField("dead_category_count");
public static final ParseField FAILED_CATEGORY_COUNT_FIELD = new ParseField("failed_category_count");
public static final ParseField CATEGORIZATION_STATUS_FIELD = new ParseField("categorization_status");
public static final ParseField LOG_TIME_FIELD = new ParseField("log_time");
public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp");
Expand All @@ -81,6 +82,7 @@ public class ModelSizeStats implements ToXContentObject {
PARSER.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD);
PARSER.declareLong(Builder::setRareCategoryCount, RARE_CATEGORY_COUNT_FIELD);
PARSER.declareLong(Builder::setDeadCategoryCount, DEAD_CATEGORY_COUNT_FIELD);
PARSER.declareLong(Builder::setFailedCategoryCount, FAILED_CATEGORY_COUNT_FIELD);
PARSER.declareField(Builder::setCategorizationStatus,
p -> CategorizationStatus.fromString(p.text()), CATEGORIZATION_STATUS_FIELD, ValueType.STRING);
PARSER.declareField(Builder::setLogTime,
Expand Down Expand Up @@ -143,15 +145,16 @@ public String toString() {
private final long frequentCategoryCount;
private final long rareCategoryCount;
private final long deadCategoryCount;
private final long failedCategoryCount;
private final CategorizationStatus categorizationStatus;
private final Date timestamp;
private final Date logTime;

private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, Long modelBytesMemoryLimit, long totalByFieldCount,
long totalOverFieldCount, long totalPartitionFieldCount, long bucketAllocationFailuresCount,
MemoryStatus memoryStatus, long categorizedDocCount, long totalCategoryCount, long frequentCategoryCount,
long rareCategoryCount, long deadCategoryCount, CategorizationStatus categorizationStatus,
Date timestamp, Date logTime) {
long rareCategoryCount, long deadCategoryCount, long failedCategoryCount,
CategorizationStatus categorizationStatus, Date timestamp, Date logTime) {
this.jobId = jobId;
this.modelBytes = modelBytes;
this.modelBytesExceeded = modelBytesExceeded;
Expand All @@ -166,6 +169,7 @@ private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, L
this.frequentCategoryCount = frequentCategoryCount;
this.rareCategoryCount = rareCategoryCount;
this.deadCategoryCount = deadCategoryCount;
this.failedCategoryCount = failedCategoryCount;
this.categorizationStatus = categorizationStatus;
this.timestamp = timestamp;
this.logTime = logTime;
Expand Down Expand Up @@ -194,6 +198,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount);
builder.field(RARE_CATEGORY_COUNT_FIELD.getPreferredName(), rareCategoryCount);
builder.field(DEAD_CATEGORY_COUNT_FIELD.getPreferredName(), deadCategoryCount);
builder.field(FAILED_CATEGORY_COUNT_FIELD.getPreferredName(), failedCategoryCount);
builder.field(CATEGORIZATION_STATUS_FIELD.getPreferredName(), categorizationStatus);
builder.timeField(LOG_TIME_FIELD.getPreferredName(), LOG_TIME_FIELD.getPreferredName() + "_string", logTime.getTime());
if (timestamp != null) {
Expand Down Expand Up @@ -260,6 +265,10 @@ public long getDeadCategoryCount() {
return deadCategoryCount;
}

public long getFailedCategoryCount() {
return failedCategoryCount;
}

public CategorizationStatus getCategorizationStatus() {
return categorizationStatus;
}
Expand All @@ -286,7 +295,7 @@ public Date getLogTime() {
public int hashCode() {
return Objects.hash(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount,
totalPartitionFieldCount, this.bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount,
frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime);
frequentCategoryCount, rareCategoryCount, deadCategoryCount, failedCategoryCount, categorizationStatus, timestamp, logTime);
}

/**
Expand Down Expand Up @@ -314,6 +323,7 @@ public boolean equals(Object other) {
&& this.frequentCategoryCount == that.frequentCategoryCount
&& this.rareCategoryCount == that.rareCategoryCount
&& this.deadCategoryCount == that.deadCategoryCount
&& this.failedCategoryCount == that.failedCategoryCount
&& Objects.equals(this.categorizationStatus, that.categorizationStatus)
&& Objects.equals(this.timestamp, that.timestamp)
&& Objects.equals(this.logTime, that.logTime)
Expand All @@ -336,6 +346,7 @@ public static class Builder {
private long frequentCategoryCount;
private long rareCategoryCount;
private long deadCategoryCount;
private long failedCategoryCount;
private CategorizationStatus categorizationStatus;
private Date timestamp;
private Date logTime;
Expand All @@ -362,6 +373,7 @@ public Builder(ModelSizeStats modelSizeStats) {
this.frequentCategoryCount = modelSizeStats.frequentCategoryCount;
this.rareCategoryCount = modelSizeStats.rareCategoryCount;
this.deadCategoryCount = modelSizeStats.deadCategoryCount;
this.failedCategoryCount = modelSizeStats.failedCategoryCount;
this.categorizationStatus = modelSizeStats.categorizationStatus;
this.timestamp = modelSizeStats.timestamp;
this.logTime = modelSizeStats.logTime;
Expand Down Expand Up @@ -433,6 +445,11 @@ public Builder setDeadCategoryCount(long deadCategoryCount) {
return this;
}

public Builder setFailedCategoryCount(long failedCategoryCount) {
this.failedCategoryCount = failedCategoryCount;
return this;
}

public Builder setCategorizationStatus(CategorizationStatus categorizationStatus) {
Objects.requireNonNull(categorizationStatus, "[" + CATEGORIZATION_STATUS_FIELD.getPreferredName() + "] must not be null");
this.categorizationStatus = categorizationStatus;
Expand All @@ -452,7 +469,7 @@ public Builder setLogTime(Date logTime) {
public ModelSizeStats build() {
return new ModelSizeStats(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount,
totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount,
frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime);
frequentCategoryCount, rareCategoryCount, deadCategoryCount, failedCategoryCount, categorizationStatus, timestamp, logTime);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public void testDefaultConstructor() {
assertEquals(0, stats.getFrequentCategoryCount());
assertEquals(0, stats.getRareCategoryCount());
assertEquals(0, stats.getDeadCategoryCount());
assertEquals(0, stats.getFailedCategoryCount());
assertEquals(CategorizationStatus.OK, stats.getCategorizationStatus());
}

Expand Down Expand Up @@ -109,6 +110,9 @@ public static ModelSizeStats createRandomized() {
if (randomBoolean()) {
stats.setDeadCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setFailedCategoryCount(randomNonNegativeLong());
}
if (randomBoolean()) {
stats.setCategorizationStatus(randomFrom(CategorizationStatus.values()));
}
Expand Down
3 changes: 3 additions & 0 deletions docs/reference/cat/anomaly-detectors.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=categorized-doc-count]
`model.dead_category_count`, `mdcc`, `modelDeadCategoryCount`:::
include::{docdir}/ml/ml-shared.asciidoc[tag=dead-category-count]

`model.failed_category_count`, `mdcc`, `modelFailedCategoryCount`:::
include::{docdir}/ml/ml-shared.asciidoc[tag=failed-category-count]

`model.frequent_category_count`, `mfcc`, `modelFrequentCategoryCount`:::
include::{docdir}/ml/ml-shared.asciidoc[tag=frequent-category-count]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-status]
(long)
include::{docdir}/ml/ml-shared.asciidoc[tag=dead-category-count]
`failed_category_count`:::
(long)
include::{docdir}/ml/ml-shared.asciidoc[tag=failed-category-count]
`frequent_category_count`:::
(long)
include::{docdir}/ml/ml-shared.asciidoc[tag=frequent-category-count]
Expand Down Expand Up @@ -409,6 +413,7 @@ The API returns the following results:
"frequent_category_count" : 0,
"rare_category_count" : 0,
"dead_category_count" : 0,
"failed_category_count" : 0,
"categorization_status" : "ok",
"log_time" : 1576017596000,
"timestamp" : 1580410800000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ never be assigned again because another category's definition
makes it a superset of the dead category. (Dead categories are a
side effect of the way categorization has no prior training.)
`failed_category_count`:::
(long)
include::{docdir}/ml/ml-shared.asciidoc[tag=failed-category-count]
`frequent_category_count`:::
(long) The number of categories that match more than 1% of categorized
documents.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ When the operation is complete, you receive the following results:
"frequent_category_count" : 0,
"rare_category_count" : 0,
"dead_category_count" : 0,
"failed_category_count" : 0,
"categorization_status" : "ok",
"log_time" : 1575402237000,
"timestamp" : 1576965600000
Expand Down
7 changes: 7 additions & 0 deletions docs/reference/ml/ml-shared.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,13 @@ If `true`, the output excludes interim results. By default, interim results are
included.
end::exclude-interim-results[]

tag::failed-category-count[]
The number of times that categorization wanted to create a new category but
couldn't because the job had hit its `model_memory_limit`. This count does not
track which specific categories failed to be created. Therefore you cannot use
this value to determine the number of unique categories that were missed.
end::failed-category-count[]

tag::feature-bag-fraction[]
Advanced configuration option. Defines the fraction of features that will be
used when selecting a random bag for each candidate split.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ public class ModelSizeStats implements ToXContentObject, Writeable {
public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count");
public static final ParseField RARE_CATEGORY_COUNT_FIELD = new ParseField("rare_category_count");
public static final ParseField DEAD_CATEGORY_COUNT_FIELD = new ParseField("dead_category_count");
public static final ParseField FAILED_CATEGORY_COUNT_FIELD = new ParseField("failed_category_count");
public static final ParseField CATEGORIZATION_STATUS_FIELD = new ParseField("categorization_status");
public static final ParseField LOG_TIME_FIELD = new ParseField("log_time");
public static final ParseField TIMESTAMP_FIELD = new ParseField("timestamp");
Expand All @@ -76,6 +77,7 @@ private static ConstructingObjectParser<Builder, Void> createParser(boolean igno
parser.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD);
parser.declareLong(Builder::setRareCategoryCount, RARE_CATEGORY_COUNT_FIELD);
parser.declareLong(Builder::setDeadCategoryCount, DEAD_CATEGORY_COUNT_FIELD);
parser.declareLong(Builder::setFailedCategoryCount, FAILED_CATEGORY_COUNT_FIELD);
parser.declareField(Builder::setCategorizationStatus,
p -> CategorizationStatus.fromString(p.text()), CATEGORIZATION_STATUS_FIELD, ValueType.STRING);
parser.declareField(Builder::setLogTime,
Expand Down Expand Up @@ -154,15 +156,16 @@ public String toString() {
private final long frequentCategoryCount;
private final long rareCategoryCount;
private final long deadCategoryCount;
private final long failedCategoryCount;
private final CategorizationStatus categorizationStatus;
private final Date timestamp;
private final Date logTime;

private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, Long modelBytesMemoryLimit, long totalByFieldCount,
long totalOverFieldCount, long totalPartitionFieldCount, long bucketAllocationFailuresCount,
MemoryStatus memoryStatus, long categorizedDocCount, long totalCategoryCount, long frequentCategoryCount,
long rareCategoryCount, long deadCategoryCount, CategorizationStatus categorizationStatus,
Date timestamp, Date logTime) {
long rareCategoryCount, long deadCategoryCount, long failedCategoryCount,
CategorizationStatus categorizationStatus, Date timestamp, Date logTime) {
this.jobId = jobId;
this.modelBytes = modelBytes;
this.modelBytesExceeded = modelBytesExceeded;
Expand All @@ -177,6 +180,7 @@ private ModelSizeStats(String jobId, long modelBytes, Long modelBytesExceeded, L
this.frequentCategoryCount = frequentCategoryCount;
this.rareCategoryCount = rareCategoryCount;
this.deadCategoryCount = deadCategoryCount;
this.failedCategoryCount = failedCategoryCount;
this.categorizationStatus = categorizationStatus;
this.timestamp = timestamp;
this.logTime = logTime;
Expand Down Expand Up @@ -206,13 +210,19 @@ public ModelSizeStats(StreamInput in) throws IOException {
frequentCategoryCount = in.readVLong();
rareCategoryCount = in.readVLong();
deadCategoryCount = in.readVLong();
if (in.getVersion().onOrAfter(Version.V_7_8_0)) {
failedCategoryCount = in.readVLong();
} else {
failedCategoryCount = 0;
}
categorizationStatus = CategorizationStatus.readFromStream(in);
} else {
categorizedDocCount = 0;
totalCategoryCount = 0;
frequentCategoryCount = 0;
rareCategoryCount = 0;
deadCategoryCount = 0;
failedCategoryCount = 0;
categorizationStatus = CategorizationStatus.OK;
}
logTime = new Date(in.readVLong());
Expand Down Expand Up @@ -248,6 +258,9 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeVLong(frequentCategoryCount);
out.writeVLong(rareCategoryCount);
out.writeVLong(deadCategoryCount);
if (out.getVersion().onOrAfter(Version.V_7_8_0)) {
out.writeVLong(failedCategoryCount);
}
categorizationStatus.writeTo(out);
}
out.writeVLong(logTime.getTime());
Expand Down Expand Up @@ -286,6 +299,7 @@ public XContentBuilder doXContentBody(XContentBuilder builder) throws IOExceptio
builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount);
builder.field(RARE_CATEGORY_COUNT_FIELD.getPreferredName(), rareCategoryCount);
builder.field(DEAD_CATEGORY_COUNT_FIELD.getPreferredName(), deadCategoryCount);
builder.field(FAILED_CATEGORY_COUNT_FIELD.getPreferredName(), failedCategoryCount);
builder.field(CATEGORIZATION_STATUS_FIELD.getPreferredName(), categorizationStatus);
builder.timeField(LOG_TIME_FIELD.getPreferredName(), LOG_TIME_FIELD.getPreferredName() + "_string", logTime.getTime());
if (timestamp != null) {
Expand Down Expand Up @@ -351,6 +365,10 @@ public long getDeadCategoryCount() {
return deadCategoryCount;
}

public long getFailedCategoryCount() {
return deadCategoryCount;
}

public CategorizationStatus getCategorizationStatus() {
return categorizationStatus;
}
Expand All @@ -376,7 +394,7 @@ public int hashCode() {
// this.id excluded here as it is generated by the datastore
return Objects.hash(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount,
totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount,
frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime);
frequentCategoryCount, rareCategoryCount, deadCategoryCount, failedCategoryCount, categorizationStatus, timestamp, logTime);
}

/**
Expand Down Expand Up @@ -405,6 +423,7 @@ public boolean equals(Object other) {
&& Objects.equals(this.frequentCategoryCount, that.frequentCategoryCount)
&& Objects.equals(this.rareCategoryCount, that.rareCategoryCount)
&& Objects.equals(this.deadCategoryCount, that.deadCategoryCount)
&& Objects.equals(this.failedCategoryCount, that.failedCategoryCount)
&& Objects.equals(this.categorizationStatus, that.categorizationStatus)
&& Objects.equals(this.timestamp, that.timestamp)
&& Objects.equals(this.logTime, that.logTime)
Expand All @@ -427,6 +446,7 @@ public static class Builder {
private long frequentCategoryCount;
private long rareCategoryCount;
private long deadCategoryCount;
private long failedCategoryCount;
private CategorizationStatus categorizationStatus;
private Date timestamp;
private Date logTime;
Expand All @@ -453,6 +473,7 @@ public Builder(ModelSizeStats modelSizeStats) {
this.frequentCategoryCount = modelSizeStats.frequentCategoryCount;
this.rareCategoryCount = modelSizeStats.rareCategoryCount;
this.deadCategoryCount = modelSizeStats.deadCategoryCount;
this.failedCategoryCount = modelSizeStats.failedCategoryCount;
this.categorizationStatus = modelSizeStats.categorizationStatus;
this.timestamp = modelSizeStats.timestamp;
this.logTime = modelSizeStats.logTime;
Expand Down Expand Up @@ -524,6 +545,11 @@ public Builder setDeadCategoryCount(long deadCategoryCount) {
return this;
}

public Builder setFailedCategoryCount(long failedCategoryCount) {
this.failedCategoryCount = failedCategoryCount;
return this;
}

public Builder setCategorizationStatus(CategorizationStatus categorizationStatus) {
Objects.requireNonNull(categorizationStatus, "[" + CATEGORIZATION_STATUS_FIELD.getPreferredName() + "] must not be null");
this.categorizationStatus = categorizationStatus;
Expand All @@ -543,7 +569,8 @@ public Builder setLogTime(Date logTime) {
public ModelSizeStats build() {
return new ModelSizeStats(jobId, modelBytes, modelBytesExceeded, modelBytesMemoryLimit, totalByFieldCount, totalOverFieldCount,
totalPartitionFieldCount, bucketAllocationFailuresCount, memoryStatus, categorizedDocCount, totalCategoryCount,
frequentCategoryCount, rareCategoryCount, deadCategoryCount, categorizationStatus, timestamp, logTime);
frequentCategoryCount, rareCategoryCount, deadCategoryCount, failedCategoryCount, categorizationStatus, timestamp,
logTime);
}
}
}
Loading

0 comments on commit 3ba44a5

Please sign in to comment.