diff --git a/src/core/server/saved_objects/migrations/README.md b/src/core/server/saved_objects/migrations/README.md index 60bf84eef87a6..d8382e4f0e061 100644 --- a/src/core/server/saved_objects/migrations/README.md +++ b/src/core/server/saved_objects/migrations/README.md @@ -133,6 +133,14 @@ is left out of the description for brevity. ## INIT ### Next action +`initAction` + +Check that replica allocation is enabled from cluster settings (`cluster.routing.allocation.enabled`). Migrations will fail when replica allocation is disabled during the bulk index operation that waits for all active shards. Migrations wait for all active shards to ensure that saved objects are replicated to protect against data loss. + +The Elasticsearch documentation mentions switching off replica allocation when restoring a cluster and this is a setting that might be overlooked when a restore is done. Migrations will fail early if replica allocation is incorrectly set to avoid adding a write block to the old index before running into a failure later. + +If replica allocation is set to 'all', the migration continues to fetch the saved object indices: + `fetchIndices` Fetch the saved object indices, mappings and aliases to find the source index @@ -140,17 +148,21 @@ and determine whether we’re migrating from a legacy index or a v1 migrations index. ### New control state -1. If `.kibana` and the version specific aliases both exists and are pointing +1. Two conditions have to be met before migrations begin: + 1. If replica allocation is set as a persistent or transient setting to "perimaries", "new_primaries" or "none" fail the migration. Without replica allocation enabled or not set to 'all', the migration will timeout when waiting for index yellow status before bulk indexing. The check only considers persistent and transient settings and does not take static configuration in `elasticsearch.yml` into account. If `cluster.routing.allocation.enable` is configured in `elaticsearch.yml` and not set to the default of 'all', the migration will timeout. Static settings can only be returned from the `nodes/info` API. + → `FATAL` + + 2. If `.kibana` is pointing to an index that belongs to a later version of + Kibana .e.g. a 7.11.0 instance found the `.kibana` alias pointing to + `.kibana_7.12.0_001` fail the migration + → `FATAL` + +2. If `.kibana` and the version specific aliases both exists and are pointing to the same index. This version's migration has already been completed. Since the same version could have plugins enabled at any time that would introduce new transforms or mappings. → `OUTDATED_DOCUMENTS_SEARCH` -2. If `.kibana` is pointing to an index that belongs to a later version of -Kibana .e.g. a 7.11.0 instance found the `.kibana` alias pointing to -`.kibana_7.12.0_001` fail the migration - → `FATAL` - 3. If the `.kibana` alias exists we’re migrating from either a v1 or v2 index and the migration source index is the index the `.kibana` alias points to. → `WAIT_FOR_YELLOW_SOURCE` diff --git a/src/core/server/saved_objects/migrations/actions/index.ts b/src/core/server/saved_objects/migrations/actions/index.ts index 4e88e9c448d40..1123588309deb 100644 --- a/src/core/server/saved_objects/migrations/actions/index.ts +++ b/src/core/server/saved_objects/migrations/actions/index.ts @@ -20,6 +20,9 @@ export { export type { RetryableEsClientError }; // actions/* imports +export type { InitActionParams, UnsupportedClusterRoutingAllocation } from './initialize_action'; +export { initAction } from './initialize_action'; + export type { FetchIndexResponse, FetchIndicesParams } from './fetch_indices'; export { fetchIndices } from './fetch_indices'; @@ -81,6 +84,8 @@ export type { export { updateAndPickupMappings } from './update_and_pickup_mappings'; import type { UnknownDocsFound } from './check_for_unknown_docs'; +import type { UnsupportedClusterRoutingAllocation } from './initialize_action'; + export type { CheckForUnknownDocsParams, UnknownDocsFound, @@ -143,6 +148,7 @@ export interface ActionErrorTypeMap { documents_transform_failed: DocumentsTransformFailed; request_entity_too_large_exception: RequestEntityTooLargeException; unknown_docs_found: UnknownDocsFound; + unsupported_cluster_routing_allocation: UnsupportedClusterRoutingAllocation; } /** diff --git a/src/core/server/saved_objects/migrations/actions/initialize_action.test.ts b/src/core/server/saved_objects/migrations/actions/initialize_action.test.ts new file mode 100644 index 0000000000000..7c75470b890aa --- /dev/null +++ b/src/core/server/saved_objects/migrations/actions/initialize_action.test.ts @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { catchRetryableEsClientErrors } from './catch_retryable_es_client_errors'; +import { errors as EsErrors } from '@elastic/elasticsearch'; +jest.mock('./catch_retryable_es_client_errors'); +import { elasticsearchClientMock } from '../../../elasticsearch/client/mocks'; +import { initAction } from './initialize_action'; + +describe('initAction', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + const retryableError = new EsErrors.ResponseError( + elasticsearchClientMock.createApiResponse({ + statusCode: 503, + body: { error: { type: 'es_type', reason: 'es_reason' } }, + }) + ); + const client = elasticsearchClientMock.createInternalClient( + elasticsearchClientMock.createErrorTransportRequestPromise(retryableError) + ); + it('calls catchRetryableEsClientErrors when the promise rejects', async () => { + const task = initAction({ client, indices: ['my_index'] }); + try { + await task(); + } catch (e) { + /** ignore */ + } + expect(catchRetryableEsClientErrors).toHaveBeenCalledWith(retryableError); + }); +}); diff --git a/src/core/server/saved_objects/migrations/actions/initialize_action.ts b/src/core/server/saved_objects/migrations/actions/initialize_action.ts new file mode 100644 index 0000000000000..73502382c9ca0 --- /dev/null +++ b/src/core/server/saved_objects/migrations/actions/initialize_action.ts @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import * as TaskEither from 'fp-ts/lib/TaskEither'; +import * as Either from 'fp-ts/lib/Either'; +import { pipe } from 'fp-ts/lib/pipeable'; +import { ElasticsearchClient } from '../../../elasticsearch'; +import { + catchRetryableEsClientErrors, + RetryableEsClientError, +} from './catch_retryable_es_client_errors'; + +import { FetchIndexResponse, fetchIndices } from './fetch_indices'; + +const routingAllocationEnable = 'cluster.routing.allocation.enable'; +export interface ClusterRoutingAllocationEnabled { + clusterRoutingAllocationEnabled: boolean; +} + +export interface InitActionParams { + client: ElasticsearchClient; + indices: string[]; +} + +export interface UnsupportedClusterRoutingAllocation { + type: 'unsupported_cluster_routing_allocation'; +} + +export const checkClusterRoutingAllocationEnabledTask = + ({ + client, + }: { + client: ElasticsearchClient; + }): TaskEither.TaskEither => + () => { + return client.cluster + .getSettings({ + flat_settings: true, + }) + .then((settings) => { + const clusterRoutingAllocations: string[] = + settings?.transient?.[routingAllocationEnable] ?? + settings?.persistent?.[routingAllocationEnable] ?? + []; + + const clusterRoutingAllocationEnabled = + [...clusterRoutingAllocations].length === 0 || + [...clusterRoutingAllocations].every((s: string) => s === 'all'); // if set, only allow 'all' + + if (!clusterRoutingAllocationEnabled) { + return Either.left({ type: 'unsupported_cluster_routing_allocation' as const }); + } else { + return Either.right({}); + } + }) + .catch(catchRetryableEsClientErrors); + }; + +export const initAction = ({ + client, + indices, +}: InitActionParams): TaskEither.TaskEither< + RetryableEsClientError | UnsupportedClusterRoutingAllocation, + FetchIndexResponse +> => { + return pipe( + checkClusterRoutingAllocationEnabledTask({ client }), + TaskEither.chainW((value) => { + return fetchIndices({ client, indices }); + }) + ); +}; diff --git a/src/core/server/saved_objects/migrations/actions/integration_tests/actions.test.ts b/src/core/server/saved_objects/migrations/actions/integration_tests/actions.test.ts index ef84f0cb49231..bac8f491534f0 100644 --- a/src/core/server/saved_objects/migrations/actions/integration_tests/actions.test.ts +++ b/src/core/server/saved_objects/migrations/actions/integration_tests/actions.test.ts @@ -14,7 +14,6 @@ import { cloneIndex, closePit, createIndex, - fetchIndices, openPit, OpenPitResponse, reindex, @@ -35,6 +34,7 @@ import { removeWriteBlock, transformDocs, waitForIndexStatusYellow, + initAction, } from '../../actions'; import * as Either from 'fp-ts/lib/Either'; import * as Option from 'fp-ts/lib/Option'; @@ -111,10 +111,20 @@ describe('migration actions', () => { await esServer.stop(); }); - describe('fetchIndices', () => { + describe('initAction', () => { + afterAll(async () => { + await client.cluster.putSettings({ + body: { + persistent: { + // Remove persistent test settings + cluster: { routing: { allocation: { enable: null } } }, + }, + }, + }); + }); it('resolves right empty record if no indices were found', async () => { expect.assertions(1); - const task = fetchIndices({ client, indices: ['no_such_index'] }); + const task = initAction({ client, indices: ['no_such_index'] }); await expect(task()).resolves.toMatchInlineSnapshot(` Object { "_tag": "Right", @@ -124,7 +134,7 @@ describe('migration actions', () => { }); it('resolves right record with found indices', async () => { expect.assertions(1); - const res = (await fetchIndices({ + const res = (await initAction({ client, indices: ['no_such_index', 'existing_index_with_docs'], })()) as Either.Right; @@ -139,6 +149,69 @@ describe('migration actions', () => { }) ); }); + it('resolves left with cluster routing allocation disabled', async () => { + expect.assertions(3); + await client.cluster.putSettings({ + body: { + persistent: { + // Disable all routing allocation + cluster: { routing: { allocation: { enable: 'none' } } }, + }, + }, + }); + const task = initAction({ + client, + indices: ['existing_index_with_docs'], + }); + await expect(task()).resolves.toMatchInlineSnapshot(` + Object { + "_tag": "Left", + "left": Object { + "type": "unsupported_cluster_routing_allocation", + }, + } + `); + await client.cluster.putSettings({ + body: { + persistent: { + // Allow routing to existing primaries only + cluster: { routing: { allocation: { enable: 'primaries' } } }, + }, + }, + }); + const task2 = initAction({ + client, + indices: ['existing_index_with_docs'], + }); + await expect(task2()).resolves.toMatchInlineSnapshot(` + Object { + "_tag": "Left", + "left": Object { + "type": "unsupported_cluster_routing_allocation", + }, + } + `); + await client.cluster.putSettings({ + body: { + persistent: { + // Allow routing to new primaries only + cluster: { routing: { allocation: { enable: 'new_primaries' } } }, + }, + }, + }); + const task3 = initAction({ + client, + indices: ['existing_index_with_docs'], + }); + await expect(task3()).resolves.toMatchInlineSnapshot(` + Object { + "_tag": "Left", + "left": Object { + "type": "unsupported_cluster_routing_allocation", + }, + } + `); + }); }); describe('setWriteBlock', () => { diff --git a/src/core/server/saved_objects/migrations/integration_tests/cluster_routing_allocation_disabled.test.ts b/src/core/server/saved_objects/migrations/integration_tests/cluster_routing_allocation_disabled.test.ts new file mode 100644 index 0000000000000..0f4522b156fe7 --- /dev/null +++ b/src/core/server/saved_objects/migrations/integration_tests/cluster_routing_allocation_disabled.test.ts @@ -0,0 +1,155 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import Path from 'path'; +import fs from 'fs/promises'; +import JSON5 from 'json5'; +import * as kbnTestServer from '../../../../test_helpers/kbn_server'; +import { Root } from '../../../root'; +import { ElasticsearchClient } from '../../../elasticsearch'; +import { LogRecord } from '@kbn/logging'; +import { retryAsync } from '../test_helpers/retry_async'; + +const logFilePath = Path.join(__dirname, 'unsupported_cluster_routing_allocation.log'); + +async function removeLogFile() { + // ignore errors if it doesn't exist + await fs.unlink(logFilePath).catch(() => void 0); +} + +const { startES } = kbnTestServer.createTestServers({ + adjustTimeout: (t: number) => jest.setTimeout(t), + settings: { + es: { + license: 'basic', + dataArchive: Path.join(__dirname, 'archives', '7.7.2_xpack_100k_obj.zip'), + }, + }, +}); + +function createKbnRoot() { + return kbnTestServer.createRootWithCorePlugins( + { + migrations: { + skip: false, + }, + logging: { + appenders: { + file: { + type: 'file', + fileName: logFilePath, + layout: { + type: 'json', + }, + }, + }, + loggers: [ + { + name: 'root', + level: 'info', + appenders: ['file'], + }, + ], + }, + }, + { + oss: false, + } + ); +} +const getClusterRoutingAllocations = (settings: Record) => { + const routingAllocations = + settings?.transient?.['cluster.routing.allocation.enable'] ?? + settings?.persistent?.['cluster.routing.allocation.enable'] ?? + []; + return ( + [...routingAllocations].length === 0 || + [...routingAllocations].every((s: string) => s === 'all') + ); // if set, only allow 'all'; +}; +let esServer: kbnTestServer.TestElasticsearchUtils; + +async function updateRoutingAllocations( + esClient: ElasticsearchClient, + settingType: string = 'persistent', + value: string = 'none' +) { + return await esClient.cluster.putSettings({ + [settingType]: { cluster: { routing: { allocation: { enable: value } } } }, + }); +} + +describe('unsupported_cluster_routing_allocation', () => { + let client: ElasticsearchClient; + let root: Root; + + beforeAll(async () => { + await removeLogFile(); + esServer = await startES(); + client = esServer.es.getClient(); + }); + afterAll(async () => { + await esServer.stop(); + }); + + it('fails with a descriptive message when persistent replica allocation is not enabled', async () => { + const initialSettings = await client.cluster.getSettings({ flat_settings: true }); + + expect(getClusterRoutingAllocations(initialSettings)).toBe(true); + + await updateRoutingAllocations(client, 'persistent', 'none'); + + const updatedSettings = await client.cluster.getSettings({ flat_settings: true }); + + expect(getClusterRoutingAllocations(updatedSettings)).toBe(false); + + // now try to start Kibana + root = createKbnRoot(); + await root.preboot(); + await root.setup(); + + await expect(root.start()).rejects.toMatchInlineSnapshot( + `[Error: Unable to complete saved object migrations for the [.kibana] index: The elasticsearch cluster has cluster routing allocation incorrectly set for migrations to continue. To proceed, please remove the cluster routing allocation settings with PUT /_cluster/settings {"transient": {"cluster.routing.allocation.enable": null}, "persistent": {"cluster.routing.allocation.enable": null}}]` + ); + + await retryAsync( + async () => { + const logFileContent = await fs.readFile(logFilePath, 'utf-8'); + const records = logFileContent + .split('\n') + .filter(Boolean) + .map((str) => JSON5.parse(str)) as LogRecord[]; + expect( + records.find((rec) => + rec.message.startsWith( + `Unable to complete saved object migrations for the [.kibana] index: The elasticsearch cluster has cluster routing allocation incorrectly set for migrations to continue.` + ) + ) + ).toBeDefined(); + }, + { retryAttempts: 10, retryDelayMs: 200 } + ); + }); + + it('fails with a descriptive message when persistent replica allocation is set to "primaries"', async () => { + await updateRoutingAllocations(client, 'persistent', 'primaries'); + + const updatedSettings = await client.cluster.getSettings({ flat_settings: true }); + + expect(getClusterRoutingAllocations(updatedSettings)).toBe(false); + + // now try to start Kibana + root = createKbnRoot(); + await root.preboot(); + await root.setup(); + + await expect(root.start()).rejects.toMatchInlineSnapshot( + `[Error: Unable to complete saved object migrations for the [.kibana] index: The elasticsearch cluster has cluster routing allocation incorrectly set for migrations to continue. To proceed, please remove the cluster routing allocation settings with PUT /_cluster/settings {"transient": {"cluster.routing.allocation.enable": null}, "persistent": {"cluster.routing.allocation.enable": null}}]` + ); + }); +}); diff --git a/src/core/server/saved_objects/migrations/kibana_migrator.test.ts b/src/core/server/saved_objects/migrations/kibana_migrator.test.ts index 4bb24a3f8240d..2adf4d5dee184 100644 --- a/src/core/server/saved_objects/migrations/kibana_migrator.test.ts +++ b/src/core/server/saved_objects/migrations/kibana_migrator.test.ts @@ -110,10 +110,16 @@ describe('KibanaMigrator', () => { it('only runs migrations once if called multiple times', async () => { const options = mockOptions(); - options.client.indices.get.mockResponse({}, { statusCode: 404 }); options.client.indices.getAlias.mockResponse({}, { statusCode: 404 }); + options.client.cluster.getSettings.mockResponse( + { + transient: {}, + persistent: {}, + }, + { statusCode: 404 } + ); const migrator = new KibanaMigrator(options); migrator.prepareMigrations(); @@ -197,6 +203,13 @@ type MockedOptions = KibanaMigratorOptions & { const mockV2MigrationOptions = () => { const options = mockOptions(); + options.client.cluster.getSettings.mockResponse( + { + transient: {}, + persistent: {}, + }, + { statusCode: 200 } + ); options.client.indices.get.mockResponse( { diff --git a/src/core/server/saved_objects/migrations/model/model.test.ts b/src/core/server/saved_objects/migrations/model/model.test.ts index 5ca6713ca163f..de8483bb4abce 100644 --- a/src/core/server/saved_objects/migrations/model/model.test.ts +++ b/src/core/server/saved_objects/migrations/model/model.test.ts @@ -291,6 +291,17 @@ describe('migrations v2 model', () => { expect(newState.retryCount).toEqual(0); expect(newState.retryDelay).toEqual(0); }); + test('INIT -> FATAL when cluster routing allocation is not enabled', () => { + const res: ResponseType<'INIT'> = Either.left({ + type: 'unsupported_cluster_routing_allocation', + }); + const newState = model(initState, res) as FatalState; + + expect(newState.controlState).toEqual('FATAL'); + expect(newState.reason).toMatchInlineSnapshot( + `"The elasticsearch cluster has cluster routing allocation incorrectly set for migrations to continue. To proceed, please remove the cluster routing allocation settings with PUT /_cluster/settings {\\"transient\\": {\\"cluster.routing.allocation.enable\\": null}, \\"persistent\\": {\\"cluster.routing.allocation.enable\\": null}}"` + ); + }); test("INIT -> FATAL when .kibana points to newer version's index", () => { const res: ResponseType<'INIT'> = Either.right({ '.kibana_7.12.0_001': { diff --git a/src/core/server/saved_objects/migrations/model/model.ts b/src/core/server/saved_objects/migrations/model/model.ts index e9efb72bca6f5..c2f11ba18069c 100644 --- a/src/core/server/saved_objects/migrations/model/model.ts +++ b/src/core/server/saved_objects/migrations/model/model.ts @@ -72,7 +72,26 @@ export const model = (currentState: State, resW: ResponseType): if (stateP.controlState === 'INIT') { const res = resW as ExcludeRetryableEsError>; - if (Either.isRight(res)) { + if (Either.isLeft(res)) { + const left = res.left; + if (isLeftTypeof(left, 'unsupported_cluster_routing_allocation')) { + return { + ...stateP, + controlState: 'FATAL', + reason: `The elasticsearch cluster has cluster routing allocation incorrectly set for migrations to continue. To proceed, please remove the cluster routing allocation settings with PUT /_cluster/settings {"transient": {"cluster.routing.allocation.enable": null}, "persistent": {"cluster.routing.allocation.enable": null}}`, + logs: [ + ...stateP.logs, + { + level: 'error', + message: `The elasticsearch cluster has cluster routing allocation incorrectly set for migrations to continue. Ensure that the persistent and transient Elasticsearch configuration option 'cluster.routing.allocation.enable' is not set or set it to a value of 'all'.`, + }, + ], + }; + } else { + return throwBadResponse(stateP, left); + } + } else if (Either.isRight(res)) { + // cluster routing allocation is enabled and we can continue with the migration as normal const indices = res.right; const aliases = getAliases(indices); diff --git a/src/core/server/saved_objects/migrations/next.ts b/src/core/server/saved_objects/migrations/next.ts index 419b350a0b5f6..24a4204c3009e 100644 --- a/src/core/server/saved_objects/migrations/next.ts +++ b/src/core/server/saved_objects/migrations/next.ts @@ -59,7 +59,7 @@ export type ResponseType = Awaited< export const nextActionMap = (client: ElasticsearchClient, transformRawDocs: TransformRawDocs) => { return { INIT: (state: InitState) => - Actions.fetchIndices({ client, indices: [state.currentAlias, state.versionAlias] }), + Actions.initAction({ client, indices: [state.currentAlias, state.versionAlias] }), WAIT_FOR_YELLOW_SOURCE: (state: WaitForYellowSourceState) => Actions.waitForIndexStatusYellow({ client, index: state.sourceIndex.value }), CHECK_UNKNOWN_DOCUMENTS: (state: CheckUnknownDocumentsState) =>