-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix IndexAuditTrail rolling upgrade on rollover edge - take 2 #38286
Changes from all commits
126551a
cf3a839
bec26d4
26e30b5
962d19f
c955247
874c9d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,6 +109,7 @@ | |
import static org.elasticsearch.xpack.security.audit.AuditUtil.indices; | ||
import static org.elasticsearch.xpack.security.audit.AuditUtil.restRequestContent; | ||
import static org.elasticsearch.xpack.security.audit.index.IndexNameResolver.resolve; | ||
import static org.elasticsearch.xpack.security.audit.index.IndexNameResolver.resolveNext; | ||
import static org.elasticsearch.xpack.security.support.SecurityIndexManager.SECURITY_VERSION_STRING; | ||
|
||
/** | ||
|
@@ -308,6 +309,17 @@ private String getIndexName() { | |
return index; | ||
} | ||
|
||
private String getNextIndexName() { | ||
final Message first = peek(); | ||
final String index; | ||
if (first == null) { | ||
index = resolveNext(IndexAuditTrailField.INDEX_NAME_PREFIX, DateTime.now(DateTimeZone.UTC), rollover); | ||
} else { | ||
index = resolveNext(IndexAuditTrailField.INDEX_NAME_PREFIX, first.timestamp, rollover); | ||
} | ||
return index; | ||
} | ||
|
||
private boolean hasStaleMessage() { | ||
final Message first = peek(); | ||
if (first == null) { | ||
|
@@ -337,7 +349,7 @@ public void onResponse(ClusterStateResponse clusterStateResponse) { | |
updateCurrentIndexMappingsIfNecessary(clusterStateResponse.getState()); | ||
} else if (TemplateUtils.checkTemplateExistsAndVersionMatches(INDEX_TEMPLATE_NAME, | ||
SECURITY_VERSION_STRING, clusterStateResponse.getState(), logger, | ||
Version.CURRENT::onOrAfter) == false) { | ||
Version.CURRENT::onOrBefore) == false) { | ||
putTemplate(customAuditIndexSettings(settings, logger), | ||
e -> { | ||
logger.error("failed to put audit trail template", e); | ||
|
@@ -377,6 +389,7 @@ public void onFailure(Exception e) { | |
|
||
// pkg private for tests | ||
void updateCurrentIndexMappingsIfNecessary(ClusterState state) { | ||
final String nextIndex = getNextIndexName(); | ||
final String index = getIndexName(); | ||
|
||
AliasOrIndex aliasOrIndex = state.getMetaData().getAliasAndIndexLookup().get(index); | ||
|
@@ -391,48 +404,60 @@ void updateCurrentIndexMappingsIfNecessary(ClusterState state) { | |
MappingMetaData docMapping = indexMetaData.mapping("doc"); | ||
if (docMapping == null) { | ||
if (indexToRemoteCluster || state.nodes().isLocalNodeElectedMaster() || hasStaleMessage()) { | ||
putAuditIndexMappingsAndStart(index); | ||
putAuditIndexMappingsAndStart(index, nextIndex); | ||
} else { | ||
logger.trace("audit index [{}] is missing mapping for type [{}]", index, DOC_TYPE); | ||
logger.debug("audit index [{}] is missing mapping for type [{}]", index, DOC_TYPE); | ||
transitionStartingToInitialized(); | ||
} | ||
} else { | ||
@SuppressWarnings("unchecked") | ||
Map<String, Object> meta = (Map<String, Object>) docMapping.sourceAsMap().get("_meta"); | ||
if (meta == null) { | ||
logger.info("Missing _meta field in mapping [{}] of index [{}]", docMapping.type(), index); | ||
throw new IllegalStateException("Cannot read security-version string in index " + index); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixes #37062 (comment) . A non-master node detects an un-updated audit index and bails. Instead it should hold off, and retry. The index is un-updated because the master had updated the mapping for the index before it the rollover timeline ("the race" - the template upgrade happend after the rollover edge, but audit events on the master came before that). |
||
|
||
final String versionString = (String) meta.get(SECURITY_VERSION_STRING); | ||
if (versionString != null && Version.fromString(versionString).onOrAfter(Version.CURRENT)) { | ||
innerStart(); | ||
} else { | ||
logger.warn("Missing _meta field in mapping [{}] of index [{}]", docMapping.type(), index); | ||
if (indexToRemoteCluster || state.nodes().isLocalNodeElectedMaster() || hasStaleMessage()) { | ||
putAuditIndexMappingsAndStart(index); | ||
} else if (versionString == null) { | ||
logger.trace("audit index [{}] mapping is missing meta field [{}]", index, SECURITY_VERSION_STRING); | ||
transitionStartingToInitialized(); | ||
putAuditIndexMappingsAndStart(index, nextIndex); | ||
} else { | ||
logger.trace("audit index [{}] has the incorrect version [{}]", index, versionString); | ||
logger.debug("audit index [{}] is missing _meta for type [{}]", index, DOC_TYPE); | ||
transitionStartingToInitialized(); | ||
} | ||
} else { | ||
final String versionString = (String) meta.get(SECURITY_VERSION_STRING); | ||
if (versionString != null && Version.fromString(versionString).onOrAfter(Version.CURRENT)) { | ||
innerStart(); | ||
} else { | ||
if (indexToRemoteCluster || state.nodes().isLocalNodeElectedMaster() || hasStaleMessage()) { | ||
putAuditIndexMappingsAndStart(index, nextIndex); | ||
} else if (versionString == null) { | ||
logger.debug("audit index [{}] mapping is missing meta field [{}]", index, SECURITY_VERSION_STRING); | ||
transitionStartingToInitialized(); | ||
} else { | ||
logger.debug("audit index [{}] has the incorrect version [{}]", index, versionString); | ||
transitionStartingToInitialized(); | ||
} | ||
} | ||
} | ||
} | ||
} else { | ||
innerStart(); | ||
} | ||
} | ||
|
||
private void putAuditIndexMappingsAndStart(String index) { | ||
putAuditIndexMappings(index, getPutIndexTemplateRequest(Settings.EMPTY).mappings().get(DOC_TYPE), | ||
ActionListener.wrap(ignore -> { | ||
logger.trace("updated mappings on audit index [{}]", index); | ||
private void putAuditIndexMappingsAndStart(String index, String nextIndex) { | ||
final String docMapping = getPutIndexTemplateRequest(Settings.EMPTY).mappings().get(DOC_TYPE); | ||
putAuditIndexMappings(index, docMapping, ActionListener.wrap(ignore -> { | ||
logger.debug("updated mappings on audit index [{}]", index); | ||
putAuditIndexMappings(nextIndex, docMapping, ActionListener.wrap(ignoreToo -> { | ||
logger.debug("updated mappings on next audit index [{}]", nextIndex); | ||
innerStart(); | ||
}, e2 -> { | ||
// best effort only | ||
logger.debug("Failed to update mappings on next audit index [{}]", nextIndex); | ||
innerStart(); | ||
}, e -> { | ||
logger.error(new ParameterizedMessage("failed to update mappings on audit index [{}]", index), e); | ||
transitionStartingToInitialized(); // reset to initialized so we can retry | ||
})); | ||
}, e -> { | ||
logger.error(new ParameterizedMessage("failed to update mappings on audit index [{}]", index), e); | ||
transitionStartingToInitialized(); // reset to initialized so we can retry | ||
})); | ||
} | ||
|
||
private void transitionStartingToInitialized() { | ||
|
@@ -451,7 +476,7 @@ void innerStart() { | |
assert false : message; | ||
logger.error(message); | ||
} else { | ||
logger.trace("successful state transition from starting to started, current value: [{}]", state.get()); | ||
logger.debug("successful state transition from starting to started, current value: [{}]", state.get()); | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -157,6 +157,7 @@ subprojects { | |
setting 'xpack.security.audit.outputs', 'index' | ||
setting 'xpack.security.transport.ssl.keystore.path', 'testnode.jks' | ||
setting 'xpack.security.transport.ssl.keystore.password', 'testnode' | ||
setting 'logger.org.elasticsearch.xpack.security.audit.index', 'DEBUG' | ||
if (version.onOrAfter('6.0.0') == false) { | ||
// this is needed since in 5.6 we don't bootstrap the token service if there is no explicit initial password | ||
keystoreSetting 'xpack.security.authc.token.passphrase', 'xpack_token_passphrase' | ||
|
@@ -217,6 +218,7 @@ subprojects { | |
setting 'xpack.security.enabled', 'true' | ||
setting 'xpack.security.transport.ssl.enabled', 'true' | ||
setting 'xpack.security.transport.ssl.keystore.path', 'testnode.jks' | ||
setting 'logger.org.elasticsearch.xpack.security.audit.index', 'DEBUG' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should help in future possible failures! |
||
keystoreSetting 'xpack.security.transport.ssl.keystore.secure_password', 'testnode' | ||
if (version.onOrAfter('6.0.0') == false) { | ||
// this is needed since in 5.6 we don't bootstrap the token service if there is no explicit initial password | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import static org.hamcrest.Matchers.hasSize; | ||
|
||
|
@@ -62,12 +63,11 @@ public void findMinVersionInCluster() throws IOException { | |
} | ||
} | ||
|
||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/33867") | ||
public void testAuditLogs() throws Exception { | ||
assertBusy(() -> { | ||
assertAuditDocsExist(); | ||
assertNumUniqueNodeNameBuckets(expectedNumUniqueNodeNameBuckets()); | ||
}); | ||
}, 30, TimeUnit.SECONDS); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. allows some slack for creating and allocating a new audit index by the old nodes while the master is down for upgrade. |
||
} | ||
|
||
private int expectedNumUniqueNodeNameBuckets() throws IOException { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Follow-up of #35988 , but does not affect the failures #33867 #37062 .