diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..ef67ce89 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +# Ignore Gradle project-specific cache directory +.gradle + +# Ignore Gradle build output directory +build/ + +# Ignore intellij files +.idea/ \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 00000000..5b627cfa --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,4 @@ +## Code of Conduct +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +opensource-codeofconduct@amazon.com with any additional questions or comments. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 00000000..c4b6a1c5 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,59 @@ +# Contributing Guidelines + +Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional +documentation, we greatly value feedback and contributions from our community. + +Please read through this document before submitting any issues or pull requests to ensure we have all the necessary +information to effectively respond to your bug report or contribution. + + +## Reporting Bugs/Feature Requests + +We welcome you to use the GitHub issue tracker to report bugs or suggest features. + +When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already +reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: + +* A reproducible test case or series of steps +* The version of our code being used +* Any modifications you've made relevant to the bug +* Anything unusual about your environment or deployment + + +## Contributing via Pull Requests +Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: + +1. You are working against the latest source on the *main* branch. +2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. +3. You open an issue to discuss any significant work - we would hate for your time to be wasted. + +To send us a pull request, please: + +1. Fork the repository. +2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. +3. Ensure local tests pass. +4. Commit to your fork using clear commit messages. +5. Send us a pull request, answering any default questions in the pull request interface. +6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. + +GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and +[creating a pull request](https://help.github.com/articles/creating-a-pull-request/). + + +## Finding contributions to work on +Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. + + +## Code of Conduct +This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). +For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact +opensource-codeofconduct@amazon.com with any additional questions or comments. + + +## Security issue notifications +If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. + + +## Licensing + +See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. diff --git a/HANDBOOK.md b/HANDBOOK.md new file mode 100644 index 00000000..92cde8be --- /dev/null +++ b/HANDBOOK.md @@ -0,0 +1,345 @@ +# Handbook for commands/API’s while using replication plugin + +- [Handbook for commands/API’s while using replication plugin](#handbook-for-commandsapis-while-using-replication-plugin) + - [Spin up two test clusters with Open Distro for Elasticsearch and install replication plugin](#spin-up-two-test-clusters-with-open-distro-for-elasticsearch-and-install-replication-plugin) + - [Setup cross-cluster connectivity](#setup-cross-cluster-connectivity) + - [Security](#security) + - [Required permissions on follower cluster](#required-permissions-on-follower-cluster) + - [Required permissions on leader cluster](#required-permissions-on-leader-cluster) + - [Populate it on test clusters](#populate-it-on-test-clusters) + - [Start replication](#start-replication) + - [Stop replication](#stop-replication) + - [Start replication via Autofollow pattern](#start-replication-via-autofollow-pattern) + - [Stop AutoFollow](#stop-autofollow) + - [Check ongoing replication tasks](#check-ongoing-replication-tasks) + - [Check completed and failed replication tasks](#check-completed-and-failed-replication-tasks) + +This document helps you with sample commands/api’s to run, for the various scenarios supported by replication plugin. + +The example uses docker based setup to spin up the clusters with OpenDistro for Elasticsearch security plugin. + +## Spin up two test clusters with Open Distro for Elasticsearch and install replication plugin + +Clone the cross-cluster-replication repository and spin up the clusters from the [packaged example](https://github.com/opendistro-for-elasticsearch/cross-cluster-replication/tree/main/examples/sample). + +```bash + +# 1. Clone the cross-cluster-replication repo +git clone https://github.com/opendistro-for-elasticsearch/cross-cluster-replication.git + +# 2. Navigate to example directory +cd cross-cluster-replication/examples/sample + +# 3. Build local image with replication plugin +docker build -t odfe-with-replication ./odfe-with-replication + +# 4. Bring up 2 clusters with replication plugin installed +docker-compose up + +# 5. Set variables for readability (in different terminal window/tab where you will run rest of the steps) +export LEADER=localhost:9200 +export FOLLOWER=localhost:9201 +export LEADER_IP=172.16.0.10 +``` + +If you are setting this up on your own Open Distro for Elasticsearch 1.13 clusters, you can install cross-cluster-replication plugin on every node of leader and follower clusters as follows + +```bash +sudo bin/elasticsearch-plugin install \ +https://github.com/opendistro-for-elasticsearch/cross-cluster-replication/releases/download/experimental-opendistro-1.9.0.2/replication-7.8.0.zip +``` + +Further you need to ensure *user_injection* is enabled in the elasticsearch.yml as follows in addition to other [security configuration](https://opendistro.github.io/for-elasticsearch-docs/docs/security/configuration/) to use it with Open Distro for Elasticsearch security plugin. + +```yml +opendistro_security.unsupported.inject_user.enabled: true +``` + +## Setup cross-cluster connectivity + +Setup remote cluster connection from follower cluster to the leader cluster. The Open Distro for Elasticsearch security plugin ensures the cross-cluster traffic is encrypted. + +```bash +curl -k -u admin:admin -XPUT "https://${FOLLOWER}/_cluster/settings?pretty" \ +-H 'Content-Type: application/json' -d" +{ + \"persistent\": { + \"cluster\": { + \"remote\": { + \"leader-cluster\": { + \"seeds\": [ \"${LEADER_IP}:9300\" ] + } + } + } + } +} +" +``` + +## Security + +### Required permissions on follower cluster + +``` +# Index Level Permissions + +indices:admin/close +indices:admin/close[s] +indices:admin/create +indices:admin/mapping/put +indices:admin/open +indices:admin/opendistro/replication/index/start +indices:admin/opendistro/replication/index/stop +indices:data/read/opendistro/replication/file_metadata +indices:data/write/index +indices:data/write/opendistro/replication/changes +indices:data/write/replication +indices:monitor/stats + +# Cluster Level Permissions + +cluster:monitor/state +cluster:admin/snapshot/restore +cluster:admin/opendistro/replication/autofollow/update +``` + +### Required permissions on leader cluster + +``` +# Index Level Permissions +indices:data/read/opendistro/replication/file_chunk +indices:data/read/opendistro/replication/file_metadata +indices:admin/opendistro/replication/resources/release +indices:data/read/opendistro/replication/changes +indices:admin/mappings/get +indices:monitor/stats + +# Cluster Level Permissions +cluster:monitor/state +``` + +### Populate it on test clusters + +You can run the [example script](https://github.com/opendistro-for-elasticsearch/cross-cluster-replication/tree/main/examples/sample/setup_permissions.sh) to setup the required permissions on the test clusters. + +```bash +sh ./setup_permissions.sh "${LEADER}" +sh ./setup_permissions.sh "${FOLLOWER}" +``` + +## Start replication + +This API is used to initiate replication of an index from the leader cluster onto the follower cluster. The API is invoked on the follower and the desired remote index from leader cluster is provided as parameters. + +**Signature** + +```bash +# REQUEST + +PUT localhost:{{foll_port}}/_opendistro/_replication//_start +Content-Type: application/json + +{ "remote_cluster" : "leader-cluster", "remote_index": ""} + + +# RESPONSE + +{ + "acknowledged": true +} + +``` + +**Example** +```bash +curl -k -u testuser:testuser -XPUT \ +"https://${FOLLOWER}/_opendistro/_replication/follower-01/_start?pretty" \ +-H 'Content-type: application/json' \ +-d'{"remote_cluster":"leader-cluster", "remote_index": "leader-01"}' + +# Now there should be a ReadOnly index named 'follower-01' on the follower cluster that should continuously stay updated with changes to 'leader-01' index on the leader cluster. +``` + +## Stop replication + +Replication can be stopped anytime by invocation of Stop API on the follower cluster. Stopping replication opens up the index for writes. + +Note that the follower index is NOT deleted on stopping replication. + +**Signature** + +```bash +# REQUEST + +POST localhost:{{foll_port}}/_opendistro//replicate/_stop +Content-Type: application/json +{} + +# RESPONSE + +{ + "acknowledged": true +} +``` + +**Example** +```bash +curl -k -u testuser:testuser -XPOST \ +"https://${FOLLOWER}/_opendistro/_replication/follower-01/_stop?pretty" \ +-H 'Content-type: application/json' -d'{}' + +# You can confirm data isn't replicated any more by making modifications to +# leader-01 index on $LEADER cluster +``` + +## Start replication via Autofollow pattern + +AutoFollow API helps to automatically start replication on indices matching a pattern. + +**Signature** + +```bash +# REQUEST + +POST localhost:{{foll_port}}/_opendistro/_replication/_autofollow +Content-Type: application/json + +{"connection" : "", "pattern": "", "name": ""} + +# RESPONSE + +{ + "acknowledged": true +} +``` + +**Example** +```bash +curl -k -u testuser:testuser -XPOST \ +"https://${FOLLOWER}/_opendistro/_replication/_autofollow?pretty" \ +-H 'Content-type: application/json' \ +-d'{"connection":"leader-cluster","pattern":"leader-*", "name":"my-replication"} +``` + +## Stop AutoFollow + +AutoFollow can be removed by invoking API on the follower as follows. Invocation of the API is only to stop any new auto-follow activity and does NOT stop replication already initiated by the auto-follow. + +**Signature** + +```bash +DELETE localhost:{{foll_port}}/_opendistro/_replication/_autofollow +Content-Type: application/json + +{ + "connection": "leader-cluster", + "name": "test" +} +``` + +**Example** + +```bash +curl -k -u testuser:testuser -XDELETE \ +"https://${FOLLOWER}/_opendistro/_replication/_autofollow?pretty" \ +-H 'Content-type: application/json' \ +-d'{"connection":"leader-cluster", "name":"my-replication"} +``` + +## Check ongoing replication tasks + +Until a status API is added, you can check ongoing replication via the tasks API. + +```bash +curl -k -u admin:admin -XGET "https://${FOLLOWER}/_cat/tasks?v&actions=*replication*&detailed" + +action task_id parent_task_id type start_time timestamp running_time ip node description +cluster:indices/admin/replication[c] ltIs84uTRLOYnOr8Giu0VQ:118 cluster:1 persistent 1613651479438 12:31:19 17.7s 172.18.0.20 odfe-follower1 replication:leader-cluster:[leader-01/g3d9ddwZQHeuvEGEouQxDQ] -> follower-01 +cluster:indices/shards/replication[c] ltIs84uTRLOYnOr8Giu0VQ:147 cluster:2 persistent 1613651480095 12:31:20 17.1s 172.18.0.20 odfe-follower1 replication:leader-cluster:[leader-01][0] -> [follower-01][0] +``` + +## Check completed and failed replication tasks + +Failed and completed tasks are captured in '.tasks' index. For failed tasks, the failure reason is also captured. You can look for the replicated index name to identify the tasks corresponding to the index. + +```bash +curl -k -u admin:admin -XGET "https://${FOLLOWER}/.tasks/_search?pretty" + +{ + "took" : 5, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 2, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : ".tasks", + "_type" : "task", + "_id" : "ltIs84uTRLOYnOr8Giu0VQ:118", + "_score" : 1.0, + "_source" : { + "completed" : true, + "task" : { + "node" : "ltIs84uTRLOYnOr8Giu0VQ", + "id" : 118, + "type" : "persistent", + "action" : "cluster:indices/admin/replication[c]", + "status" : { + "state" : "STARTED" + }, + "description" : "replication:leader-cluster:[leader-01/g3d9ddwZQHeuvEGEouQxDQ] -> follower-01", + "start_time_in_millis" : 1613651479438, + "running_time_in_nanos" : 79627167100, + "cancellable" : true, + "parent_task_id" : "cluster:1", + "headers" : { } + }, + "response" : { + "index_task_status" : "COMPLETED", + "following_tasks" : { + "state" : "MONITORING" + } + } + } + }, + { + "_index" : ".tasks", + "_type" : "task", + "_id" : "ltIs84uTRLOYnOr8Giu0VQ:147", + "_score" : 1.0, + "_source" : { + "completed" : true, + "task" : { + "node" : "ltIs84uTRLOYnOr8Giu0VQ", + "id" : 147, + "type" : "persistent", + "action" : "cluster:indices/shards/replication[c]", + "status" : { + "state" : "STARTED" + }, + "description" : "replication:leader-cluster:[leader-01][0] -> [follower-01][0]", + "start_time_in_millis" : 1613651480095, + "running_time_in_nanos" : 78969894200, + "cancellable" : true, + "parent_task_id" : "cluster:2", + "headers" : { } + }, + "response" : { + "status" : "COMPLETED" + } + } + } + ] + } +} +``` + diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..67db8588 --- /dev/null +++ b/LICENSE @@ -0,0 +1,175 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. diff --git a/NOTICE b/NOTICE new file mode 100644 index 00000000..616fc588 --- /dev/null +++ b/NOTICE @@ -0,0 +1 @@ +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. diff --git a/README.md b/README.md new file mode 100644 index 00000000..27fbed8b --- /dev/null +++ b/README.md @@ -0,0 +1,151 @@ +# Open Distro Cross Cluster Replication Plugin + +- [Open Distro Cross Cluster Replication Plugin](#open-distro-cross-cluster-replication-plugin) + - [Build](#build) + - [Building from the command line](#building-from-the-command-line) + - [Intellij Setup](#intellij-setup) + - [Getting Started](#getting-started) + - [Step 1: Start test clusters with replication plugin locally](#step-1-start-test-clusters-with-replication-plugin-locally) + - [Step 2: Setup cross-cluster connectivity](#step-2-setup-cross-cluster-connectivity) + - [Step 3: Populate leader cluster with sample data](#step-3-populate-leader-cluster-with-sample-data) + - [Step 4: Start replication](#step-4-start-replication) + - [Step 5: Make changes to data on leader index and validate replication](#step-5-make-changes-to-data-on-leader-index-and-validate-replication) + - [Step 5: Stop replication](#step-5-stop-replication) + - [CONTRIBUTING GUIDELINES](#contributing-guidelines) + - [License](#license) + + +Cross-Cluster Replication Plugin enables users to replicate data across two elasticsearch clusters which enables a number of use cases such as + +- **Disaster Recovery (DR) or High Availability (HA):** For production systems with high availability requirements, cross-cluster replication provides the safety-net of being able to failover to an alternate cluster in case of failure or outages on the primary cluster. +- **Reduced Query Latency:** For critical business needs, responding to the customer query in the shortest time is critical. Replicating data to a cluster that is closest to the user can drastically reduce the query latency. Applications can redirect the customer query to the nearest data center where data has been replicated. +- **Scaling out query heavy workloads:** Splitting a query heavy workload across multiple replica clusters improves horizontal scalability. +- **Aggregated reports** - Enterprise customers can roll up reports continually from smaller clusters belonging to different lines of business into a central cluster for consolidated reports, dashboards or visualizations. + +Following are the tenets that guided our design: + +- **Secure**: Cross-cluster replication should offer strong security controls for all flows and APIs. +- **Correctness**: There must be no difference between the intended contents of the follower index and the leader index. +- **Performance**: Replication should not impact indexing rate of the leader cluster. +- **Lag**: The replication lag between the leader and the follower cluster should be under a few seconds. +- **Resource usage**: Replication should use minimal resources. + + +The replication machinery is implemented as an Elasticsearch plugin that exposes APIs to control replication, spawns background persistent tasks to asynchronously replicate indices and utilizes snapshot repository abstraction to facilitate bootstrap. Replication relies on cross cluster connection setup from the follower cluster to the leader cluster for connectivity. Once replication is initiated on an index, a background persistent task per primary shard on the follower cluster continuously polls corresponding shards from the leader index and applies the changes on to the follower shard. The replication plugin offers seamless integration with the Open Distro for Elasticsearch Security plugin for secure data transfer and access control. + + +## Build + +The project in this package uses the [Gradle](https://docs.gradle.org/current/userguide/userguide.html) build system. Gradle comes with excellent documentation that should be your first stop when trying to figure out how to operate or modify the build. + +### Building from the command line +Set JAVA_HOME to JDK-14 or above + +1. `./gradlew build` builds and tests project. +2. `./gradlew clean release` cleans previous builds, creates new build and tests project. +3. `./gradlew clean run -PnumNodes=3` launches a 3 node cluster of both leader and follower with replication plugin installed. +4. `./gradlew integTest` launches a single node cluster's and runs all integ tests. +5. `./gradlew integTest -Dtests.class=*{class-name}` runs a single integ class. +6. `./gradlew integTest -Dtests.class=*{class-name} -Dtests.method="{method-name}"` runs a single integ test method (remember to quote the test method name if it contains spaces). + +## Intellij Setup + +Launch Intellij IDEA, choose **Import Project**, and select the `settings.gradle` file in the root of this package. + +## Getting Started + +Following steps will help you install the replication plugin on a test cluster. + +### Step 1: Start test clusters with replication plugin locally + +```bash +./gradlew clean run -PnumNodes=3 + + +# Set variables for readability (in different terminal window/tab where you will run rest of the steps) +export LEADER=localhost:9200 +export FOLLOWER=localhost:9201 +export LEADER_TRANSPORT=localhost:9300 +``` + +### Step 2: Setup cross-cluster connectivity + +Setup remote cluster connection from follower cluster to the leader cluster + +```bash +curl -XPUT "http://${FOLLOWER}/_cluster/settings?pretty" \ +-H 'Content-Type: application/json' -d" +{ + \"persistent\": { + \"cluster\": { + \"remote\": { + \"leader-cluster\": { + \"seeds\": [ \"${LEADER_TRANSPORT}\" ] + } + } + } + } +} +" +``` + +### Step 3: Populate leader cluster with sample data + +```bash +curl -XPOST "http://${LEADER}/leader-01/_doc/1" -H 'Content-Type: application/json' -d '{"value" : "data1"}' +``` + +### Step 4: Start replication + +```bash +curl -XPUT "http://${FOLLOWER}/_opendistro/_replication/follower-01/_start?pretty" \ +-H 'Content-type: application/json' \ +-d'{"remote_cluster":"leader-cluster", "remote_index": "leader-01"}' +``` + +### Step 5: Make changes to data on leader index and validate replication + +```bash +# 1. Modify doc with id 1 +curl -XPOST "http://${LEADER}/leader-01/_doc/1" \ +-H 'Content-Type: application/json' -d '{"value" : "data1-modified"}' + +# 2. Add doc with id 2 +curl -XPOST "http://${LEADER}/leader-01/_doc/2" \ +-H 'Content-Type: application/json' -d '{"value" : "data2"}' + +# 3. Validate replicated index exists +curl -XGET "http://${FOLLOWER}/_cat/indices" +# The above should list "follower-01" as on of the index as well + +# 4. Check content of follower-01 +curl -XGET "http://${FOLLOWER}/follower-01/_search" +# The above should list 2 documents with id 1 and 2 and matching content of +# leader-01 index on $LEADER cluster + +``` + +At this point, any changes to leader-01 continues to be replicated to follower-01. + +### Step 5: Stop replication + +Stopping replication opens up the replicated index on the follower cluster for writes. This can be leveraged to failover to the follower cluster when the need arises. + +```bash +curl -XPOST "http://${FOLLOWER}/_opendistro/_replication/follower-01/_stop?pretty" \ +-H 'Content-type: application/json' -d'{}' + +# You can confirm data isn't replicated any more by making modifications to +# leader-01 index on $LEADER cluster +``` + +For much detailed instructions/examples please refer to [HANDBOOK](HANDBOOK.md) under examples. + + +## CONTRIBUTING GUIDELINES + +See [CONTRIBUTING](CONTRIBUTING.md) for more information. + +## License + +This project is licensed under the Apache-2.0 License. diff --git a/build.gradle b/build.gradle new file mode 100644 index 00000000..8e37b4af --- /dev/null +++ b/build.gradle @@ -0,0 +1,206 @@ +import java.nio.charset.StandardCharsets +import java.nio.file.Files +import java.util.concurrent.Callable +import org.elasticsearch.gradle.testclusters.TestClusterConfiguration +import java.util.function.Predicate +import java.util.concurrent.TimeUnit +import java.util.stream.Collectors + +plugins { + id "elasticsearch.esplugin" version "7.10.2" + id 'org.jetbrains.kotlin.jvm' version "1.3.72" +} + +group = "com.amazon.es" +version = "${version}.0" + +ext.kotlin_version = '1.3.72' +repositories { + mavenCentral() +} + +apply plugin: 'elasticsearch.testclusters' +apply plugin: 'elasticsearch.rest-test' + +dependencies { + // Elasticsearch nanny state marks all dependencies non-transitive forcing us to list them out. + compile "org.jetbrains.kotlin:kotlin-stdlib-jdk8" + compile "org.jetbrains.kotlin:kotlin-stdlib-jdk7" + compile "org.jetbrains.kotlin:kotlin-stdlib" + compile "org.jetbrains.kotlin:kotlin-stdlib-common" + compile "org.jetbrains:annotations:13.0" + compile "org.jetbrains.kotlinx:kotlinx-coroutines-core:1.3.5" + + testImplementation "org.assertj:assertj-core:3.17.2" + testImplementation "org.elasticsearch.client:elasticsearch-rest-high-level-client:${versions.elasticsearch}" + testImplementation "org.jetbrains.kotlinx:kotlinx-coroutines-test:1.3.5" +} + +// Elasticsearch nanny state forces us to manually resolve all conflicts +configurations.all { + if (it.state != Configuration.State.UNRESOLVED) return + resolutionStrategy { + force "org.jetbrains.kotlin:kotlin-stdlib:${kotlin_version}" + force "org.jetbrains.kotlin:kotlin-stdlib-common:${kotlin_version}" + } +} + +compileKotlin { + kotlinOptions { + // This should be 11, but the ES logger usage checker tool doesn't like classes > 1.8 + jvmTarget = "1.8" + freeCompilerArgs = ['-Xjsr305=strict'] // Handle Elasticsearch @Nullable annotation correctly + } +} + +compileTestKotlin { + kotlinOptions { + jvmTarget = "1.8" + freeCompilerArgs = ['-Xjsr305=strict'] + } +} + +esplugin { + name = project.name + description = "Open Distro Cross Cluster Replication Plugin" + classname = "com.amazon.elasticsearch.replication.ReplicationPlugin" +} + +ext { + licenseFile = rootProject.file('LICENSE') + noticeFile = rootProject.file('NOTICE') +} + +javadoc.enabled = false +licenseHeaders.enabled = false +dependencyLicenses.enabled = false +thirdPartyAudit.enabled = true +validateNebulaPom.enabled = false +loggerUsageCheck.enabled = false + +test { + systemProperty 'tests.security.manager', 'false' + if (System.getProperty("tests.debug") == "true") { + debug true + debugOptions { + port = 8000 + suspend = false + } + } +} + +// Setting RunTask.debug = true configures the JVM to use a debugger in listen mode (server=n,suspend=y). This is a +// pain for multi node clusters since the node startup fails if it can't connect to a debugger. So instead we manually +// configure the debugger in attach mode (server=y) so that we can attach to a specific node after it has been started. +static String getDebugJvmArgs(int debugPort) { + return " -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=${debugPort}" +} + +def securityPluginFile = new Callable() { + @Override + RegularFile call() throws Exception { + return new RegularFile() { + @Override + File getAsFile() { + return fileTree("$projectDir/src/test/resources/security/plugin").getSingleFile() + } + } + } + } + +// TODO: Remove this once the integration test framework supports configuring and installing other plugins +def isReleaseTask = "release" in gradle.startParameter.taskNames + +/* +* TODO: Default to false as it needs extending RunTask for automation +* If enabled, make sure to run initializeSecurityIndex task +*/ +def securityEnabled = findProperty("security") == "true" + +File repo = file("$buildDir/testclusters/repo") +def _numNodes = findProperty('numNodes') as Integer ?: 1 +testClusters { + leaderCluster { + plugin(project.tasks.bundlePlugin.archiveFile) + if(!isReleaseTask && securityEnabled) { + plugin(provider(securityPluginFile)) + cliSetup("opendistro_security/install_demo_configuration.sh", "-y") + } + int debugPort = 5005 + testDistribution = "INTEG_TEST" + if (_numNodes > 1) numberOfNodes = _numNodes + //numberOfNodes = 3 + setting 'path.repo', repo.absolutePath + if(_numNodes == 1) jvmArgs "${-> getDebugJvmArgs(debugPort++)}" + } + followCluster { + testDistribution = "INTEG_TEST" + plugin(project.tasks.bundlePlugin.archiveFile) + if(!isReleaseTask && securityEnabled) { + plugin(provider(securityPluginFile)) + cliSetup("opendistro_security/install_demo_configuration.sh", "-y") + } + int debugPort = 5010 + if (_numNodes > 1) numberOfNodes = _numNodes + //numberOfNodes = 3 + setting 'path.repo', repo.absolutePath + if(_numNodes == 1) jvmArgs "${-> getDebugJvmArgs(debugPort++)}" + } +} + +integTest { + useCluster testClusters.leaderCluster + useCluster testClusters.followCluster + doFirst { + getClusters().forEach { cluster -> + systemProperty "tests.cluster.${cluster.name}.http_hosts", "${-> cluster.allHttpSocketURI.join(',')}" + systemProperty "tests.cluster.${cluster.name}.transport_hosts", "${-> cluster.allTransportPortURI.join(',')}" + } + } +} + +run { + useCluster testClusters.leaderCluster + useCluster testClusters.followCluster + doFirst { + getClusters().forEach { cluster -> + LinkedHashMap> waitConditions = new LinkedHashMap<>() + cluster.waitForConditions(waitConditions, System.currentTimeMillis(), 40, TimeUnit.SECONDS, cluster) + // Write unicast file manually - we could't wait on internal method(waitForAllConditions) as + // cluster health needs changes based on security plugin installation. + String unicastUris = cluster.nodes.stream().flatMap { node -> + node.getAllTransportPortURI().stream() + }.collect(Collectors.joining("\n")) + cluster.nodes.forEach{node -> + try { + Files.write(node.getConfigDir().resolve("unicast_hosts.txt"), unicastUris.getBytes(StandardCharsets.UTF_8)); + } catch (IOException e) { + throw new java.io.UncheckedIOException("Failed to write unicast_hosts for " + this, e); + } + } + // TODO: Add health check and avoid wait for the cluster formation + cluster.waitForConditions(waitConditions, System.currentTimeMillis(), 40, TimeUnit.SECONDS, cluster) + } + } +} + +task initializeSecurityIndex { + doLast { + exec { + executable "src/test/resources/security/scripts/SecurityAdminWrapper.sh" + args "${buildDir}" + } + } +} + +testingConventions { + naming { + IT { + baseClass 'com.amazon.elasticsearch.replication.MultiClusterRestTestCase' + } + } +} + +task release { + dependsOn 'build' +} diff --git a/examples/sample/docker-compose.yml b/examples/sample/docker-compose.yml new file mode 100644 index 00000000..c9b27931 --- /dev/null +++ b/examples/sample/docker-compose.yml @@ -0,0 +1,72 @@ +version: '3' +services: + odfe-leader1: + image: odfe-with-replication + container_name: odfe-leader1 + environment: + - cluster.name=odfe-leader + - node.name=odfe-leader1 + - discovery.seed_hosts=odfe-leader1 + - cluster.initial_master_nodes=odfe-leader1 + - bootstrap.memory_lock=true # along with the memlock settings below, disables swapping + - opendistro_security.unsupported.inject_user.enabled=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" # minimum and maximum Java heap size, recommend setting both to 50% of system RAM + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 # maximum number of open files for the Elasticsearch user, set to at least 65536 on modern systems + hard: 65536 + volumes: + - odfe-leader-data1:/usr/share/elasticsearch/data + ports: + - 9200:9200 + - 9600:9600 # required for Performance Analyzer + networks: + odfe-net: + ipv4_address: 172.18.0.10 + odfe-follower1: + image: odfe-with-replication + build: + context: . + dockerfile: Dockerfile + container_name: odfe-follower1 + environment: + - cluster.name=odfe-follower + - node.name=odfe-follower1 + - discovery.seed_hosts=odfe-follower1 + - cluster.initial_master_nodes=odfe-follower1 + - opendistro_security.unsupported.inject_user.enabled=true + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - odfe-follower-data1:/usr/share/elasticsearch/data + ports: + - 9201:9200 + - 9601:9600 # required for Performance Analyzer + networks: + odfe-net: + ipv4_address: 172.18.0.20 + +volumes: + odfe-leader-data1: + odfe-follower-data1: + +networks: + odfe-net: + driver: bridge + driver_opts: + com.docker.network.enable_ipv6: "false" + ipam: + driver: default + config: + - subnet: 172.18.0.0/16 + diff --git a/examples/sample/odfe-with-replication/Dockerfile b/examples/sample/odfe-with-replication/Dockerfile new file mode 100644 index 00000000..584baebc --- /dev/null +++ b/examples/sample/odfe-with-replication/Dockerfile @@ -0,0 +1,15 @@ +FROM amazon/opendistro-for-elasticsearch:1.13.0 +COPY --chown=elasticsearch:elasticsearch opendistro-cross-cluster-replication-1.13.0.0.zip /tmp/opendistro-cross-cluster-replication-1.13.0.0.zip +RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install -v file:///tmp/opendistro-cross-cluster-replication-1.13.0.0.zip +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-performance-analyzer +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-alerting +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-anomaly-detection +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-asynchronous-search +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-index-management +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-reports-scheduler +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-job-scheduler +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-knn +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-sql +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin remove -v opendistro-security +#COPY --chown=elasticsearch:elasticsearch opendistro-cross-cluster-replication-1.13.0.0.zip /tmp/opendistro-cross-cluster-replication-1.13.0.0.zip +#RUN /usr/share/elasticsearch/bin/elasticsearch-plugin install -v file:///tmp/opendistro-cross-cluster-replication-1.13.0.0.zip diff --git a/examples/sample/odfe-with-replication/opendistro-cross-cluster-replication-1.13.0.0.zip b/examples/sample/odfe-with-replication/opendistro-cross-cluster-replication-1.13.0.0.zip new file mode 100644 index 00000000..2e9f0975 Binary files /dev/null and b/examples/sample/odfe-with-replication/opendistro-cross-cluster-replication-1.13.0.0.zip differ diff --git a/examples/sample/setup_permissions.sh b/examples/sample/setup_permissions.sh new file mode 100755 index 00000000..bf6b5109 --- /dev/null +++ b/examples/sample/setup_permissions.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +admin='admin:admin' +testuser="testuser" + +if [ -z "$1" ]; then + echo "Please provide endpoint hostname:port" + exit 1 +fi + +endpoint="$1" + +echo "Creating user '${testuser}' and associating with replication_backend role" +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/internalusers/${testuser}?pretty" -H 'Content-Type: application/json' -d' +{ + "password": "testuser", + "backend_roles": ["replication_backend"] +} +' +echo +echo "-----" + +echo "Creating actiongroup 'follower-replication-action-group' and associating index level permissions to start/stop replication" +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/actiongroups/follower-replication-action-group" -H 'Content-Type: application/json' -d' +{ + "allowed_actions": [ + "indices:admin/close", + "indices:admin/close[s]", + "indices:admin/create", + "indices:admin/mapping/put", + "indices:admin/open", + "indices:admin/opendistro/replication/index/start", + "indices:admin/opendistro/replication/index/stop", + "indices:data/read/opendistro/replication/file_metadata", + "indices:data/write/index", + "indices:data/write/opendistro/replication/changes", + "indices:data/write/replication", + "indices:monitor/stats" + ] +} +' +echo +echo "-----" + +echo "Creating actiongroup 'follower-replication-cluster-action-group' and associating cluster level permissions for replication." +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/actiongroups/follower-replication-cluster-action-group" -H 'Content-Type: application/json' -d' +{ + "allowed_actions": [ + "cluster:monitor/state", + "cluster:admin/snapshot/restore", + "cluster:admin/opendistro/replication/autofollow/update" + ] +} +' +echo +echo "-----" + + +echo "Creating actiongroup 'leader-replication-action-group' and associating index level permissions for replication." +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/actiongroups/leader-replication-action-group" -H 'Content-Type: application/json' -d' +{ + "allowed_actions": [ + "indices:data/read/opendistro/replication/file_chunk", + "indices:data/read/opendistro/replication/file_metadata", + "indices:admin/opendistro/replication/resources/release", + "indices:data/read/opendistro/replication/changes", + "indices:admin/mappings/get", + "indices:monitor/stats" + ] +} +' +echo +echo "-----" + + +echo "Creating actiongroup 'leader-replication-cluster-action-group' and associating cluster level permissions for replication." +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/actiongroups/leader-replication-cluster-action-group" -H 'Content-Type: application/json' -d' +{ + "allowed_actions": [ + "cluster:monitor/state" + ] +} +' +echo +echo "-----" + +echo "Creating role 'replication_follower_role' and associating for index pattern '*' and actiongroups ['follower-replication-action-group', 'follower-replication-cluster-action-group']" +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/roles/replication_follower_role" -H 'Content-Type: application/json' -d' +{ + "cluster_permissions": [ + "follower-replication-cluster-action-group" + ], + "index_permissions": [{ + "index_patterns": [ + "*" + ], + "allowed_actions": [ + "follower-replication-action-group" + ] + }] +} +' +echo +echo "-----" + +echo "Creating role 'replication_leader_role' and associating for index pattern '*' and actiongroup ['leader-replication-action-group', 'leader-replication-cluster-action-group']" +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/roles/replication_leader_role" -H 'Content-Type: application/json' -d' +{ + "cluster_permissions": [ + "leader-replication-cluster-action-group" + ], + "index_permissions": [{ + "index_patterns": [ + "*" + ], + "allowed_actions": [ + "leader-replication-action-group" + ] + }] +} +' +echo +echo "-----" + + +echo "Mapping role 'replication_follower_role' to 'replication_backend' backend role" +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/rolesmapping/replication_follower_role?pretty" -H 'Content-Type: application/json' -d' +{ + "backend_roles" : [ + "replication_backend" + ] +} +' +echo +echo "-----" + +echo "Mapping role 'replication_leader_role' to 'replication_backend' backend role" +curl -ks -u $admin -XPUT "https://${endpoint}/_opendistro/_security/api/rolesmapping/replication_leader_role?pretty" -H 'Content-Type: application/json' -d' +{ + "backend_roles" : [ + "replication_backend" + ] +} +' +echo +echo "-----" + +echo + diff --git a/gradle.properties b/gradle.properties new file mode 100644 index 00000000..d3899c90 --- /dev/null +++ b/gradle.properties @@ -0,0 +1 @@ +version = 1.13.0 \ No newline at end of file diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 00000000..87b738cb Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 00000000..33682bbb --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.6.1-all.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 00000000..af6708ff --- /dev/null +++ b/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 00000000..6d57edc7 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/http-client.env.json b/http-client.env.json new file mode 100644 index 00000000..f34e0c46 --- /dev/null +++ b/http-client.env.json @@ -0,0 +1,6 @@ +{ + "dev": { + "foll_port": "9201", + "leader_port": "9200" + } +} \ No newline at end of file diff --git a/replication.http b/replication.http new file mode 100644 index 00000000..fd673a53 --- /dev/null +++ b/replication.http @@ -0,0 +1,154 @@ +### Setup remote connection with security plugin +PUT localhost:{{foll_port}}/_cluster/settings?pretty +Authorization: Basic admin admin +Content-Type: application/json + + +{ + "persistent": { + "cluster": { + "remote": { + "source": { + "seeds": [ "127.0.0.1:9300" ] + } + } + } + } +} + +### Add a document to leader with security plugin +PUT localhost:{{leader_port}}/customer/_doc/1?pretty +Authorization: Basic admin admin +Content-Type: application/json + + +{ + "name": "John Doe" +} + +### request with security plugin +PUT https://localhost:{{foll_port}}/_opendistro/_replication/customer/_start?pretty +Authorization: Basic admin admin +Content-Type: application/json + + +{ + "remote_cluster": "remote-cluster", + "remote_index": "remote-index" +} + +### Setup remote connection +PUT localhost:{{foll_port}}/_cluster/settings?pretty +Content-Type: application/json + + +{ + "persistent": { + "cluster": { + "remote": { + "source": { + "seeds": [ "127.0.0.1:9300" ] + } + } + } + } +} + +### Add a document to leader +PUT localhost:{{leader_port}}/customer/_doc/1?pretty +Content-Type: application/json + + +{ + "name": "John Doe", + "age": 21 +} + +### Create empty index +PUT localhost:{{leader_port}}/customer +Content-Type: application/json + + +### flush index +POST localhost:{{leader_port}}/_flush + +### Start replication +PUT localhost:{{foll_port}}/_opendistro/_replication/customer/_start +Content-Type: application/json + + +{ + "remote_cluster" : "source", + "remote_index": "customer" +} + +### Add another document after replication started +PUT localhost:{{leader_port}}/customer/_doc/2?pretty +Content-Type: application/json + + +{ + "name": "Jane Doe" +} + +### View leader data +GET localhost:{{leader_port}}/customer/_search?pretty&q=*:*&seq_no_primary_term=true + +### View follower data all nodes +GET localhost:{{foll_port}}/customer/_search?pretty&q=*:*&seq_no_primary_term=true + +### View follower data node-0 +GET localhost:{{foll_port}}/customer/_search?pretty&q=*:*&seq_no_primary_term=true&preference=_only_nodes:node-0 + +### View follower data node-1 +GET localhost:{{foll_port}}/customer/_search?pretty&q=*:*&seq_no_primary_term=true&preference=_only_nodes:node-1 + +### View follower data node-2 +GET localhost:{{foll_port}}/customer/_search?pretty&q=*:*&seq_no_primary_term=true&preference=_only_nodes:node-2 + +### Show follower cluster nodes +GET localhost:{{foll_port}}/_cat/nodes?v&h=name,p,ip,po,m,node.role + +### Show follower cluster shards +GET localhost:{{foll_port}}/_cat/shards?v + +### Get leader retention lease info +GET localhost:{{leader_port}}/customer/_stats/docs?pretty&level=shards + +### Show tasks +GET localhost:{{foll_port}}/_tasks?actions=*replication*&detailed + +### Get follower metadata +GET localhost:{{foll_port}}/_cluster/state/metadata + +### Leader shards info +GET localhost:{{leader_port}}/_cat/shards?v&h=i,s,pr,node,globalCheckpoint,maxSeqNo,segmentsCount,docs + +### Follower shards info +GET localhost:{{foll_port}}/_cat/shards?v&h=i,s,pr,node,globalCheckpoint,maxSeqNo,segmentsCount,docs + +### Update auto follow actions +POST localhost:{{foll_port}}/_opendistro/_replication/_autofollow +Content-Type: application/json + +{ + "connection": "source", + "name": "test", + "pattern": "*customer*" +} + +### Delete the auto follow pattern +DELETE localhost:{{foll_port}}/_opendistro/_replication/_autofollow +Content-Type: application/json + +{ + "connection": "source", + "name": "test" +} + +### Stop replication +POST localhost:{{foll_port}}/_opendistro/_replication/customer/_stop +Content-Type: application/json + +{ +} diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 00000000..87e45785 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,17 @@ +pluginManagement { + repositories { + mavenCentral() + jcenter() + gradlePluginPortal() + } + resolutionStrategy { + eachPlugin { + // ES not available in gradle plugin portal so hand code here + if(requested.id.namespace == "elasticsearch") { + useModule "org.elasticsearch.gradle:build-tools:${requested.version}" + } + } + } +} + +rootProject.name = "opendistro-cross-cluster-replication" \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationEngine.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationEngine.kt new file mode 100644 index 00000000..e0aaf9a5 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationEngine.kt @@ -0,0 +1,46 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import org.elasticsearch.index.engine.EngineConfig +import org.elasticsearch.index.engine.InternalEngine +import org.elasticsearch.index.seqno.SequenceNumbers + +class ReplicationEngine(config: EngineConfig) : InternalEngine(config) { + + override fun assertPrimaryIncomingSequenceNumber(origin: Operation.Origin, seqNo: Long): Boolean { + assert(origin == Operation.Origin.PRIMARY) { "Expected origin PRIMARY for replicated ops but was $origin" } + assert(seqNo != SequenceNumbers.UNASSIGNED_SEQ_NO) { "Expected valid sequence number for replicated op but was unassigned" } + return true + } + + override fun generateSeqNoForOperationOnPrimary(operation: Operation): Long { + check(operation.seqNo() != SequenceNumbers.UNASSIGNED_SEQ_NO) { "Expected valid sequence number for replicate op but was unassigned"} + return operation.seqNo() + } + + override fun indexingStrategyForOperation(index: Index): IndexingStrategy { + return planIndexingAsNonPrimary(index) + } + + override fun deletionStrategyForOperation(delete: Delete): DeletionStrategy { + return planDeletionAsNonPrimary(delete) + } + + override fun assertNonPrimaryOrigin(operation: Operation): Boolean { + return true + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationException.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationException.kt new file mode 100644 index 00000000..7275ecb5 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationException.kt @@ -0,0 +1,42 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import org.elasticsearch.ElasticsearchException +import org.elasticsearch.action.ShardOperationFailedException +import org.elasticsearch.cluster.metadata.IndexMetadata.INDEX_UUID_NA_VALUE +import org.elasticsearch.index.shard.ShardId + +/** + * Base class replication exceptions. Note: Replication process may throw exceptions that do not derive from this such as + * [org.elasticsearch.ResourceAlreadyExistsException], [org.elasticsearch.index.IndexNotFoundException] or + * [org.elasticsearch.index.shard.ShardNotFoundException]. + */ +class ReplicationException: ElasticsearchException { + + constructor(message: String, vararg args: Any) : super(message, *args) + + constructor(message: String, cause: Throwable, vararg args: Any) : super(message, cause, *args) + + constructor(message: String, shardFailures: Array) : super(message) { + shardFailures.firstOrNull()?.let { + setShard(ShardId(it.index(), INDEX_UUID_NA_VALUE, it.shardId())) + // Add first failure as cause and rest as suppressed... + initCause(it.cause) + shardFailures.drop(1).forEach { f -> addSuppressed(f.cause) } + } + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationPlugin.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationPlugin.kt new file mode 100644 index 00000000..a0b6c7f5 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/ReplicationPlugin.kt @@ -0,0 +1,246 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import com.amazon.elasticsearch.replication.action.autofollow.TransportUpdateAutoFollowPatternAction +import com.amazon.elasticsearch.replication.action.autofollow.UpdateAutoFollowPatternAction +import com.amazon.elasticsearch.replication.action.changes.GetChangesAction +import com.amazon.elasticsearch.replication.action.changes.TransportGetChangesAction +import com.amazon.elasticsearch.replication.action.index.ReplicateIndexAction +import com.amazon.elasticsearch.replication.action.index.ReplicateIndexMasterNodeAction +import com.amazon.elasticsearch.replication.action.index.TransportReplicateIndexAction +import com.amazon.elasticsearch.replication.action.index.TransportReplicateIndexMasterNodeAction +import com.amazon.elasticsearch.replication.action.replay.ReplayChangesAction +import com.amazon.elasticsearch.replication.action.replay.TransportReplayChangesAction +import com.amazon.elasticsearch.replication.action.repository.GetFileChunkAction +import com.amazon.elasticsearch.replication.action.repository.GetStoreMetadataAction +import com.amazon.elasticsearch.replication.action.repository.ReleaseLeaderResourcesAction +import com.amazon.elasticsearch.replication.action.repository.TransportGetFileChunkAction +import com.amazon.elasticsearch.replication.action.repository.TransportGetStoreMetadataAction +import com.amazon.elasticsearch.replication.action.stop.StopIndexReplicationAction +import com.amazon.elasticsearch.replication.action.stop.TransportStopIndexReplicationAction +import com.amazon.elasticsearch.replication.action.repository.TransportReleaseLeaderResourcesAction +import com.amazon.elasticsearch.replication.metadata.ReplicationMetadata +import com.amazon.elasticsearch.replication.repository.REMOTE_REPOSITORY_TYPE +import com.amazon.elasticsearch.replication.repository.RemoteClusterRepositoriesService +import com.amazon.elasticsearch.replication.repository.RemoteClusterRepository +import com.amazon.elasticsearch.replication.repository.RemoteClusterRestoreLeaderService +import com.amazon.elasticsearch.replication.rest.ReplicateIndexHandler +import com.amazon.elasticsearch.replication.rest.StopIndexReplicationHandler +import com.amazon.elasticsearch.replication.rest.UpdateAutoFollowPatternsHandler +import com.amazon.elasticsearch.replication.task.IndexCloseListener +import com.amazon.elasticsearch.replication.task.autofollow.AutoFollowExecutor +import com.amazon.elasticsearch.replication.task.autofollow.AutoFollowParams +import com.amazon.elasticsearch.replication.task.index.IndexReplicationExecutor +import com.amazon.elasticsearch.replication.task.index.IndexReplicationParams +import com.amazon.elasticsearch.replication.task.index.IndexReplicationState +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationExecutor +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationParams +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationState +import com.amazon.elasticsearch.replication.util.Injectables +import org.elasticsearch.action.ActionRequest +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.NamedDiff +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.metadata.Metadata +import org.elasticsearch.cluster.metadata.RepositoryMetadata +import org.elasticsearch.cluster.node.DiscoveryNodes +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.CheckedFunction +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.component.LifecycleComponent +import org.elasticsearch.common.io.stream.NamedWriteableRegistry +import org.elasticsearch.common.io.stream.Writeable +import org.elasticsearch.common.settings.ClusterSettings +import org.elasticsearch.common.settings.IndexScopedSettings +import org.elasticsearch.common.settings.Setting +import org.elasticsearch.common.settings.Settings +import org.elasticsearch.common.settings.SettingsFilter +import org.elasticsearch.common.settings.SettingsModule +import org.elasticsearch.common.unit.TimeValue +import org.elasticsearch.common.xcontent.NamedXContentRegistry +import org.elasticsearch.common.xcontent.XContentParser +import org.elasticsearch.env.Environment +import org.elasticsearch.env.NodeEnvironment +import org.elasticsearch.index.IndexModule +import org.elasticsearch.index.IndexSettings +import org.elasticsearch.index.engine.EngineFactory +import org.elasticsearch.indices.recovery.RecoverySettings +import org.elasticsearch.persistent.PersistentTaskParams +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.persistent.PersistentTasksExecutor +import org.elasticsearch.plugins.ActionPlugin +import org.elasticsearch.plugins.ActionPlugin.ActionHandler +import org.elasticsearch.plugins.EnginePlugin +import org.elasticsearch.plugins.PersistentTaskPlugin +import org.elasticsearch.plugins.Plugin +import org.elasticsearch.plugins.RepositoryPlugin +import org.elasticsearch.repositories.RepositoriesService +import org.elasticsearch.repositories.Repository +import org.elasticsearch.rest.RestController +import org.elasticsearch.rest.RestHandler +import org.elasticsearch.script.ScriptService +import org.elasticsearch.threadpool.ExecutorBuilder +import org.elasticsearch.threadpool.ScalingExecutorBuilder +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.watcher.ResourceWatcherService +import java.util.Optional +import java.util.function.Supplier +import com.amazon.elasticsearch.replication.action.index.block.UpdateIndexBlockAction +import com.amazon.elasticsearch.replication.action.index.block.TransportUpddateIndexBlockAction + +internal class ReplicationPlugin : Plugin(), ActionPlugin, PersistentTaskPlugin, RepositoryPlugin, EnginePlugin { + + private lateinit var client: Client + private lateinit var threadPool: ThreadPool + + companion object { + const val REPLICATION_EXECUTOR_NAME = "replication" + val REPLICATED_INDEX_SETTING = Setting.simpleString("index.opendistro.replicated", + Setting.Property.InternalIndex, Setting.Property.IndexScope) + val REPLICATION_CHANGE_BATCH_SIZE = Setting.intSetting("opendistro.replication.ops_batch_size", 512, 16, + Setting.Property.Dynamic, Setting.Property.NodeScope) + } + + override fun createComponents(client: Client, clusterService: ClusterService, threadPool: ThreadPool, + resourceWatcherService: ResourceWatcherService, scriptService: ScriptService, + xContentRegistry: NamedXContentRegistry, environment: Environment, + nodeEnvironment: NodeEnvironment, + namedWriteableRegistry: NamedWriteableRegistry, + indexNameExpressionResolver: IndexNameExpressionResolver, + repositoriesService: Supplier): Collection { + this.client = client + this.threadPool = threadPool + return listOf(RemoteClusterRepositoriesService(repositoriesService, clusterService)) + } + + override fun getGuiceServiceClasses(): Collection> { + return listOf(Injectables::class.java, RemoteClusterRestoreLeaderService::class.java) + } + + override fun getActions(): List> { + return listOf(ActionHandler(GetChangesAction.INSTANCE, TransportGetChangesAction::class.java), + ActionHandler(ReplicateIndexAction.INSTANCE, TransportReplicateIndexAction::class.java), + ActionHandler(ReplicateIndexMasterNodeAction.INSTANCE, TransportReplicateIndexMasterNodeAction::class.java), + ActionHandler(ReplayChangesAction.INSTANCE, TransportReplayChangesAction::class.java), + ActionHandler(GetStoreMetadataAction.INSTANCE, TransportGetStoreMetadataAction::class.java), + ActionHandler(GetFileChunkAction.INSTANCE, TransportGetFileChunkAction::class.java), + ActionHandler(UpdateAutoFollowPatternAction.INSTANCE, TransportUpdateAutoFollowPatternAction::class.java), + ActionHandler(StopIndexReplicationAction.INSTANCE, TransportStopIndexReplicationAction::class.java), + ActionHandler(UpdateIndexBlockAction.INSTANCE, TransportUpddateIndexBlockAction::class.java), + ActionHandler(ReleaseLeaderResourcesAction.INSTANCE, TransportReleaseLeaderResourcesAction::class.java) + ) + } + + override fun getRestHandlers(settings: Settings?, restController: RestController, + clusterSettings: ClusterSettings?, indexScopedSettings: IndexScopedSettings, + settingsFilter: SettingsFilter?, + indexNameExpressionResolver: IndexNameExpressionResolver, + nodesInCluster: Supplier): List { + return listOf(ReplicateIndexHandler(), + UpdateAutoFollowPatternsHandler(), + StopIndexReplicationHandler()) + } + + override fun getExecutorBuilders(settings: Settings): List> { + //TODO: get the executor size from settings + return listOf(ScalingExecutorBuilder(REPLICATION_EXECUTOR_NAME, 1, 10, TimeValue.timeValueMinutes(1))) + } + + override fun getPersistentTasksExecutor(clusterService: ClusterService, threadPool: ThreadPool, client: Client, + settingsModule: SettingsModule, + expressionResolver: IndexNameExpressionResolver) + : List> { + return listOf( + ShardReplicationExecutor(REPLICATION_EXECUTOR_NAME, clusterService, threadPool, client), + IndexReplicationExecutor(REPLICATION_EXECUTOR_NAME, clusterService, threadPool, client), + AutoFollowExecutor(REPLICATION_EXECUTOR_NAME, clusterService, threadPool, client)) + } + + override fun getNamedWriteables(): List { + return listOf( + NamedWriteableRegistry.Entry(PersistentTaskParams::class.java, ShardReplicationParams.NAME, + // can't directly pass in ::ReplicationTaskParams due to https://youtrack.jetbrains.com/issue/KT-35912 + Writeable.Reader { inp -> ShardReplicationParams(inp) }), + NamedWriteableRegistry.Entry(PersistentTaskState::class.java, ShardReplicationState.NAME, + Writeable.Reader { inp -> ShardReplicationState.reader(inp) }), + + NamedWriteableRegistry.Entry(PersistentTaskParams::class.java, IndexReplicationParams.NAME, + Writeable.Reader { inp -> IndexReplicationParams(inp) }), + NamedWriteableRegistry.Entry(PersistentTaskState::class.java, IndexReplicationState.NAME, + Writeable.Reader { inp -> IndexReplicationState.reader(inp) }), + + NamedWriteableRegistry.Entry(PersistentTaskParams::class.java, AutoFollowParams.NAME, + Writeable.Reader { inp -> AutoFollowParams(inp) }), + + NamedWriteableRegistry.Entry(Metadata.Custom::class.java, ReplicationMetadata.NAME, + Writeable.Reader { inp -> ReplicationMetadata(inp) }), + NamedWriteableRegistry.Entry(NamedDiff::class.java, ReplicationMetadata.NAME, + Writeable.Reader { inp -> ReplicationMetadata.Diff(inp) }) + + ) + } + + override fun getNamedXContent(): List { + return listOf( + NamedXContentRegistry.Entry(PersistentTaskParams::class.java, + ParseField(IndexReplicationParams.NAME), + CheckedFunction { parser: XContentParser -> IndexReplicationParams.fromXContent(parser)}), + NamedXContentRegistry.Entry(PersistentTaskState::class.java, + ParseField(IndexReplicationState.NAME), + CheckedFunction { parser: XContentParser -> IndexReplicationState.fromXContent(parser)}), + NamedXContentRegistry.Entry(PersistentTaskParams::class.java, + ParseField(ShardReplicationParams.NAME), + CheckedFunction { parser: XContentParser -> ShardReplicationParams.fromXContent(parser)}), + NamedXContentRegistry.Entry(PersistentTaskState::class.java, + ParseField(ShardReplicationState.NAME), + CheckedFunction { parser: XContentParser -> ShardReplicationState.fromXContent(parser)}), + NamedXContentRegistry.Entry(PersistentTaskParams::class.java, + ParseField(AutoFollowParams.NAME), + CheckedFunction { parser: XContentParser -> AutoFollowParams.fromXContent(parser)}), + NamedXContentRegistry.Entry(Metadata.Custom::class.java, + ParseField(ReplicationMetadata.NAME), + CheckedFunction { parser: XContentParser -> ReplicationMetadata.fromXContent(parser)}) + ) + } + + override fun getSettings(): List> { + return listOf(REPLICATED_INDEX_SETTING, REPLICATION_CHANGE_BATCH_SIZE) + } + + override fun getInternalRepositories(env: Environment, namedXContentRegistry: NamedXContentRegistry, + clusterService: ClusterService, recoverySettings: RecoverySettings): Map { + val repoFactory = Repository.Factory { repoMetadata: RepositoryMetadata -> + RemoteClusterRepository(repoMetadata, client, clusterService, recoverySettings) } + return mapOf(REMOTE_REPOSITORY_TYPE to repoFactory) + } + + override fun getEngineFactory(indexSettings: IndexSettings): Optional { + return if (indexSettings.settings.get(REPLICATED_INDEX_SETTING.key) != null) { + Optional.of(EngineFactory { config -> ReplicationEngine(config) }) + } else { + Optional.empty() + } + } + + override fun onIndexModule(indexModule: IndexModule) { + super.onIndexModule(indexModule) + if (indexModule.settings.get(REPLICATED_INDEX_SETTING.key) != null) { + indexModule.addIndexEventListener(IndexCloseListener) + } + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/TransportUpdateAutoFollowPatternAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/TransportUpdateAutoFollowPatternAction.kt new file mode 100644 index 00000000..6de8e4bb --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/TransportUpdateAutoFollowPatternAction.kt @@ -0,0 +1,141 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.autofollow + +import com.amazon.elasticsearch.replication.metadata.ReplicationMetadata +import com.amazon.elasticsearch.replication.metadata.UpdateAutoFollowPattern +import com.amazon.elasticsearch.replication.task.autofollow.AutoFollowExecutor +import com.amazon.elasticsearch.replication.task.autofollow.AutoFollowParams +import com.amazon.elasticsearch.replication.util.SecurityContext +import com.amazon.elasticsearch.replication.util.completeWith +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.persistentTasksService +import com.amazon.elasticsearch.replication.util.removeTask +import com.amazon.elasticsearch.replication.util.startTask +import com.amazon.elasticsearch.replication.util.waitForClusterStateUpdate +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.elasticsearch.ElasticsearchException +import org.elasticsearch.ResourceAlreadyExistsException +import org.elasticsearch.ResourceNotFoundException +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.action.support.master.TransportMasterNodeAction +import org.elasticsearch.client.node.NodeClient +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.block.ClusterBlockException +import org.elasticsearch.cluster.block.ClusterBlockLevel +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportService + +class TransportUpdateAutoFollowPatternAction @Inject constructor( + transportService: TransportService, clusterService: ClusterService, threadPool: ThreadPool, + actionFilters: ActionFilters, indexNameExpressionResolver: IndexNameExpressionResolver, + private val client: NodeClient) : TransportMasterNodeAction( + UpdateAutoFollowPatternAction.NAME, true, transportService, clusterService, threadPool, actionFilters, + ::UpdateAutoFollowPatternRequest, indexNameExpressionResolver), CoroutineScope by GlobalScope { + + companion object { + private val log = LogManager.getLogger(TransportUpdateAutoFollowPatternAction::class.java) + const val AUTOFOLLOW_EXCEPTION_GENERIC_STRING = "Failed to update autofollow pattern" + } + + override fun executor(): String = ThreadPool.Names.SAME + + override fun read(inp: StreamInput) = AcknowledgedResponse(inp) + + override fun masterOperation(request: UpdateAutoFollowPatternRequest, state: ClusterState, + listener: ActionListener) { + // simplest way to check if there's a connection with the given name. Throws NoSuchRemoteClusterException if not.. + try { + client.getRemoteClusterClient(request.connection) + } catch (e : Exception) { + listener.onFailure(e) + return + } + + launch(threadPool.coroutineContext(ThreadPool.Names.MANAGEMENT)) { + listener.completeWith { + val injectedUser = SecurityContext.fromSecurityThreadContext(threadPool.threadContext) + val replicationMetadata = clusterService.state().metadata.custom(ReplicationMetadata.NAME) + ?: ReplicationMetadata.EMPTY + if (request.action == UpdateAutoFollowPatternRequest.Action.REMOVE) { + // Stopping the tasks and removing the context information from the cluster state + replicationMetadata.removePattern(request.connection, request.patternName).also { + val shouldStop = it.autoFollowPatterns[request.connection]?.get(request.patternName) == null + if (shouldStop) stopAutoFollowTask(request.connection, request.patternName) + } + } + + val response: AcknowledgedResponse = clusterService.waitForClusterStateUpdate("update autofollow patterns") { l -> + UpdateAutoFollowPattern(request, threadPool, injectedUser, l) + } + + if(!response.isAcknowledged) { + throw ElasticsearchException(AUTOFOLLOW_EXCEPTION_GENERIC_STRING) + } + + if (request.action == UpdateAutoFollowPatternRequest.Action.ADD) { + // Should start the task if there were no follow patterns before adding this + val shouldStart = replicationMetadata.autoFollowPatterns[request.connection]?.get(request.patternName) == null + if (shouldStart) startAutoFollowTask(request.connection, request.patternName) + } + response + } + } + } + + override fun checkBlock(request: UpdateAutoFollowPatternRequest, state: ClusterState): ClusterBlockException? { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE) + } + + private suspend fun startAutoFollowTask(clusterAlias: String, patternName: String) { + try { + val response = persistentTasksService.startTask("autofollow:$clusterAlias:$patternName", + AutoFollowExecutor.TASK_NAME, + AutoFollowParams(clusterAlias, patternName)) + if (!response.isAssigned) { + log.warn("""Failed to assign auto follow task for cluster $clusterAlias:$patternName to any node. Check if any + |cluster blocks are active.""".trimMargin()) + } + } catch(e: ResourceAlreadyExistsException) { + // Log and bail as task is already running + log.warn("Task already started for '$clusterAlias:$patternName'", e) + } catch (e: Exception) { + log.error("Failed to start auto follow task for cluster '$clusterAlias:$patternName'", e) + throw ElasticsearchException(AUTOFOLLOW_EXCEPTION_GENERIC_STRING) + } + } + + private suspend fun stopAutoFollowTask(clusterAlias: String, patternName: String) { + try { + persistentTasksService.removeTask("autofollow:$clusterAlias:$patternName") + } catch(e: ResourceNotFoundException) { + // Log warn as the task is already removed + log.warn("Task already stopped for '$clusterAlias:$patternName'", e) + } catch (e: Exception) { + log.error("Failed to stop auto follow task for cluster '$clusterAlias:$patternName'", e) + throw ElasticsearchException(AUTOFOLLOW_EXCEPTION_GENERIC_STRING) + } + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/UpdateAutoFollowPatternAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/UpdateAutoFollowPatternAction.kt new file mode 100644 index 00000000..df2bc527 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/UpdateAutoFollowPatternAction.kt @@ -0,0 +1,27 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.autofollow + +import org.elasticsearch.action.ActionType +import org.elasticsearch.action.support.master.AcknowledgedResponse + +class UpdateAutoFollowPatternAction : ActionType(NAME, ::AcknowledgedResponse) { + + companion object { + const val NAME = "cluster:admin/opendistro/replication/autofollow/update" + val INSTANCE = UpdateAutoFollowPatternAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/UpdateAutoFollowPatternRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/UpdateAutoFollowPatternRequest.kt new file mode 100644 index 00000000..ba3f6f97 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/autofollow/UpdateAutoFollowPatternRequest.kt @@ -0,0 +1,87 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.autofollow + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.support.master.AcknowledgedRequest +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.XContentParser +import org.elasticsearch.common.xcontent.XContentParser.Token +import org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken + +class UpdateAutoFollowPatternRequest: AcknowledgedRequest { + + companion object { + fun fromXContent(xcp: XContentParser, action: Action) : UpdateAutoFollowPatternRequest { + var connection: String? = null + var patternName: String? = null + var pattern: String? = null + + ensureExpectedToken(Token.START_OBJECT, xcp.nextToken(), xcp) + while (xcp.nextToken() != Token.END_OBJECT) { + val fieldName = xcp.currentName() + xcp.nextToken() + when (fieldName) { + "connection" -> connection = xcp.text() + "name" -> patternName = xcp.text() + "pattern" -> pattern = xcp.textOrNull() + } + } + requireNotNull(connection) { "missing connection" } + requireNotNull(patternName) { "missing pattern name" } + if (action == Action.REMOVE) { + require(pattern == null) { "unexpected pattern provided" } + } else { + requireNotNull(pattern) { "missing pattern" } + } + return UpdateAutoFollowPatternRequest(connection, patternName, pattern, action) + } + } + val connection: String + val patternName: String + val pattern: String? + + enum class Action { + ADD, REMOVE + } + val action : Action + + constructor(connection: String, patternName: String, pattern: String?, action: Action) { + this.connection = connection + this.patternName = patternName + this.pattern = pattern + this.action = action + } + + constructor(inp: StreamInput) : super(inp) { + connection = inp.readString() + patternName = inp.readString() + pattern = inp.readOptionalString() + action = inp.readEnum(Action::class.java) + } + + + override fun validate(): ActionRequestValidationException? = null + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeString(connection) + out.writeString(patternName) + out.writeOptionalString(pattern) + out.writeEnum(action) + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesAction.kt new file mode 100644 index 00000000..1fbe202d --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesAction.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.changes + +import org.elasticsearch.action.ActionType + +class GetChangesAction private constructor() : ActionType(NAME, ::GetChangesResponse) { + + companion object { + const val NAME = "indices:data/read/opendistro/replication/changes" + val INSTANCE = GetChangesAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesRequest.kt new file mode 100644 index 00000000..23eb13d2 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesRequest.kt @@ -0,0 +1,62 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.changes + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.support.single.shard.SingleShardRequest +import org.elasticsearch.cluster.node.DiscoveryNode +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.transport.RemoteClusterAwareRequest + +class GetChangesRequest : SingleShardRequest, RemoteClusterAwareRequest { + + val remoteNode: DiscoveryNode + val shardId : ShardId + val fromSeqNo: Long + val toSeqNo: Long + + constructor(remoteNode: DiscoveryNode, shardId: ShardId, fromSeqNo: Long, toSeqNo: Long) : super(shardId.indexName) { + this.remoteNode = remoteNode + this.shardId = shardId + this.fromSeqNo = fromSeqNo + this.toSeqNo = toSeqNo + } + + constructor(input : StreamInput) : super(input) { + this.remoteNode = DiscoveryNode(input) + this.shardId = ShardId(input) + this.fromSeqNo = input.readLong() + this.toSeqNo = input.readVLong() + } + + override fun validate(): ActionRequestValidationException? { + return super.validateNonNullIndex() + } + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + remoteNode.writeTo(out) + shardId.writeTo(out) + out.writeLong(fromSeqNo) + out.writeVLong(toSeqNo) + } + + override fun getPreferredTargetNode(): DiscoveryNode { + return remoteNode + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesResponse.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesResponse.kt new file mode 100644 index 00000000..d7a6947f --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/GetChangesResponse.kt @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.changes + +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.index.translog.Translog + +class GetChangesResponse(val changes: List, + val fromSeqNo: Long, + val maxSeqNoOfUpdatesOrDeletes: Long) : ActionResponse() { + + constructor(inp: StreamInput) : this(inp.readList(Translog.Operation::readOperation), inp.readVLong(), inp.readLong()) + + override fun writeTo(out: StreamOutput) { + out.writeCollection(changes, Translog.Operation::writeOperation) + out.writeVLong(fromSeqNo) + out.writeLong(maxSeqNoOfUpdatesOrDeletes) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/TransportGetChangesAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/TransportGetChangesAction.kt new file mode 100644 index 00000000..56fecd41 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/changes/TransportGetChangesAction.kt @@ -0,0 +1,112 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.changes + +import com.amazon.elasticsearch.replication.action.repository.GetFileChunkAction +import com.amazon.elasticsearch.replication.util.completeWith +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.waitForGlobalCheckpoint +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import org.elasticsearch.ElasticsearchTimeoutException +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.single.shard.TransportSingleShardAction +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.routing.ShardsIterator +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.Writeable +import org.elasticsearch.common.unit.TimeValue +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.translog.Translog +import org.elasticsearch.indices.IndicesService +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportActionProxy +import org.elasticsearch.transport.TransportService +import kotlin.math.min + +class TransportGetChangesAction @Inject constructor(threadPool: ThreadPool, clusterService: ClusterService, + transportService: TransportService, actionFilters: ActionFilters, + indexNameExpressionResolver: IndexNameExpressionResolver, + private val indicesService: IndicesService) : + TransportSingleShardAction( + GetChangesAction.NAME, threadPool, clusterService, transportService, actionFilters, + indexNameExpressionResolver, ::GetChangesRequest, ThreadPool.Names.SEARCH) { + + init { + TransportActionProxy.registerProxyAction(transportService, GetChangesAction.NAME, ::GetChangesResponse) + } + + companion object { + val WAIT_FOR_NEW_OPS_TIMEOUT = TimeValue.timeValueMinutes(1)!! + } + + override fun shardOperation(request: GetChangesRequest, shardId: ShardId): GetChangesResponse { + throw UnsupportedOperationException("use asyncShardOperation") + } + + @Suppress("BlockingMethodInNonBlockingContext") + override fun asyncShardOperation(request: GetChangesRequest, shardId: ShardId, listener: ActionListener) { + GlobalScope.launch(threadPool.coroutineContext(ThreadPool.Names.SEARCH)) { + // TODO: Figure out if we need to acquire a primary permit here + listener.completeWith { + val indexShard = indicesService.indexServiceSafe(shardId.index).getShard(shardId.id) + if (indexShard.lastSyncedGlobalCheckpoint < request.fromSeqNo) { + // There are no new operations to sync. Do a long poll and wait for GlobalCheckpoint to advance. If + // the checkpoint doesn't advance by the timeout this throws an ESTimeoutException which the caller + // should catch and start a new poll. + val gcp = indexShard.waitForGlobalCheckpoint(request.fromSeqNo, WAIT_FOR_NEW_OPS_TIMEOUT) + + // At this point indexShard.lastKnownGlobalCheckpoint has advanced but it may not yet have been synced + // to the translog, which means we can't return those changes. Return to the caller to retry. + // TODO: Figure out a better way to wait for the global checkpoint to be synced to the translog + if (indexShard.lastSyncedGlobalCheckpoint < request.fromSeqNo) { + assert(gcp > indexShard.lastSyncedGlobalCheckpoint) { "Checkpoint didn't advance at all" } + throw ElasticsearchTimeoutException("global checkpoint not synced. Retry after a few miliseconds...") + } + } + + // At this point lastSyncedGlobalCheckpoint is at least fromSeqNo + val toSeqNo = min(indexShard.lastSyncedGlobalCheckpoint, request.toSeqNo) + indexShard.newChangesSnapshot("odr", request.fromSeqNo, toSeqNo, true).use { snapshot -> + val ops = ArrayList(snapshot.totalOperations()) + var op = snapshot.next() + while (op != null) { + ops.add(op) + op = snapshot.next() + } + GetChangesResponse(ops, request.fromSeqNo, indexShard.maxSeqNoOfUpdatesOrDeletes) + } + } + } + } + + override fun resolveIndex(request: GetChangesRequest): Boolean { + return true + } + + override fun getResponseReader(): Writeable.Reader { + return Writeable.Reader { inp: StreamInput -> GetChangesResponse(inp) } + } + + override fun shards(state: ClusterState, request: InternalRequest): ShardsIterator { + // TODO: Investigate using any active shards instead of just primary + return state.routingTable().shardRoutingTable(request.request().shardId).primaryShardIt() + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexAction.kt new file mode 100644 index 00000000..5af59697 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexAction.kt @@ -0,0 +1,25 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index + +import org.elasticsearch.action.ActionType + +class ReplicateIndexAction private constructor(): ActionType(NAME, ::ReplicateIndexResponse) { + companion object { + const val NAME = "indices:admin/opendistro/replication/index/start" + val INSTANCE: ReplicateIndexAction = ReplicateIndexAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexMasterNodeAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexMasterNodeAction.kt new file mode 100644 index 00000000..7d97722b --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexMasterNodeAction.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index + +import org.elasticsearch.action.ActionType +import org.elasticsearch.action.support.master.AcknowledgedResponse + +class ReplicateIndexMasterNodeAction private constructor(): ActionType(NAME, ::AcknowledgedResponse) { + companion object { + const val NAME = "internal:indices/admin/opendistro/replication/index/start" + val INSTANCE: ReplicateIndexMasterNodeAction = ReplicateIndexMasterNodeAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexMasterNodeRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexMasterNodeRequest.kt new file mode 100644 index 00000000..b108b7d5 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexMasterNodeRequest.kt @@ -0,0 +1,58 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.support.master.MasterNodeRequest +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.ToXContentObject +import org.elasticsearch.common.xcontent.XContentBuilder + +class ReplicateIndexMasterNodeRequest: + MasterNodeRequest, ToXContentObject { + + var user: String? + var replicateIndexReq: ReplicateIndexRequest + + override fun validate(): ActionRequestValidationException? { + return null + } + + constructor(user: String?, replicateIndexReq: ReplicateIndexRequest): super() { + this.user = user + this.replicateIndexReq = replicateIndexReq + } + + constructor(inp: StreamInput) : super(inp) { + user = inp.readOptionalString() + replicateIndexReq = ReplicateIndexRequest(inp) + } + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeOptionalString(user) + replicateIndexReq.writeTo(out) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + val responseBuilder = builder.startObject() + .field("user", user) + .field("replication_request") + return replicateIndexReq.toXContent(responseBuilder, params).endObject() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexRequest.kt new file mode 100644 index 00000000..e4af8ffa --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexRequest.kt @@ -0,0 +1,113 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.IndicesRequest +import org.elasticsearch.action.support.IndicesOptions +import org.elasticsearch.action.support.master.AcknowledgedRequest +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ObjectParser +import org.elasticsearch.common.xcontent.ToXContent.Params +import org.elasticsearch.common.xcontent.ToXContentObject +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.common.xcontent.XContentParser +import java.io.IOException + +class ReplicateIndexRequest : AcknowledgedRequest, IndicesRequest.Replaceable, ToXContentObject { + + lateinit var followerIndex: String + lateinit var remoteCluster: String + lateinit var remoteIndex: String + // Used for integ tests to wait until the restore from remote cluster completes + var waitForRestore: Boolean = false + + private constructor() { + } + + constructor(followerIndex: String, remoteCluster: String, remoteIndex: String) : super() { + this.followerIndex = followerIndex + this.remoteCluster = remoteCluster + this.remoteIndex = remoteIndex + } + + companion object { + private val PARSER = ObjectParser("FollowIndexRequestParser") { ReplicateIndexRequest() } + + init { + PARSER.declareString(ReplicateIndexRequest::remoteCluster::set, ParseField("remote_cluster")) + PARSER.declareString(ReplicateIndexRequest::remoteIndex::set, ParseField("remote_index")) + } + + @Throws(IOException::class) + fun fromXContent(parser: XContentParser, followerIndex: String): ReplicateIndexRequest { + val followIndexRequest = PARSER.parse(parser, null) + followIndexRequest.followerIndex = followerIndex + return followIndexRequest + } + } + + override fun validate(): ActionRequestValidationException? { + var validationException: ActionRequestValidationException? = null + if (!this::remoteCluster.isInitialized || + !this::remoteIndex.isInitialized || + !this::followerIndex.isInitialized) { + validationException = ActionRequestValidationException() + validationException.addValidationError("Mandatory params are missing for the request") + } + return validationException + } + + override fun indices(vararg indices: String?): IndicesRequest { + return this + } + + override fun indices(): Array { + return arrayOf(followerIndex) + } + + override fun indicesOptions(): IndicesOptions { + return IndicesOptions.strictSingleIndexNoExpandForbidClosed() + } + + constructor(inp: StreamInput) : super(inp) { + remoteCluster = inp.readString() + remoteIndex = inp.readString() + followerIndex = inp.readString() + waitForRestore = inp.readBoolean() + } + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeString(remoteCluster) + out.writeString(remoteIndex) + out.writeString(followerIndex) + out.writeBoolean(waitForRestore) + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: Params): XContentBuilder { + builder.startObject() + builder.field("remote_cluster", remoteCluster) + builder.field("remote_index", remoteIndex) + builder.field("follower_index", followerIndex) + builder.field("wait_for_restore", waitForRestore) + builder.endObject() + return builder + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexResponse.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexResponse.kt new file mode 100644 index 00000000..6f4e83f8 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/ReplicateIndexResponse.kt @@ -0,0 +1,28 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index + +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput + +class ReplicateIndexResponse(acknowledged: Boolean) : AcknowledgedResponse(acknowledged) { + constructor(inp: StreamInput) : this(inp.readBoolean()) + + override fun writeTo(out: StreamOutput) { + out.writeBoolean(acknowledged) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/TransportReplicateIndexAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/TransportReplicateIndexAction.kt new file mode 100644 index 00000000..f27a76e9 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/TransportReplicateIndexAction.kt @@ -0,0 +1,71 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index + +import com.amazon.elasticsearch.replication.util.SecurityContext +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.suspendExecute +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.HandledTransportAction +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.client.Client +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.tasks.Task +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportService + +class TransportReplicateIndexAction @Inject constructor(transportService: TransportService, + val threadPool: ThreadPool, + actionFilters: ActionFilters, + private val client : Client) : + HandledTransportAction(ReplicateIndexAction.NAME, + transportService, actionFilters, ::ReplicateIndexRequest), + CoroutineScope by GlobalScope { + + companion object { + private val log = LogManager.getLogger(TransportReplicateIndexAction::class.java) + } + + override fun doExecute(task: Task, request: ReplicateIndexRequest, listener: ActionListener) { + log.trace("Starting replication for ${request.remoteCluster}:${request.remoteIndex} -> ${request.followerIndex}") + + // Captures the security context and triggers relevant operation on the master + try { + val user = SecurityContext.fromSecurityThreadContext(threadPool.threadContext) + log.debug("Obtained security context - $user") + val req = ReplicateIndexMasterNodeRequest(user, request) + client.execute(ReplicateIndexMasterNodeAction.INSTANCE, req, object: ActionListener{ + override fun onFailure(e: Exception) { + listener.onFailure(e) + } + + override fun onResponse(response: AcknowledgedResponse) { + listener.onResponse(ReplicateIndexResponse(response.isAcknowledged)) + } + }) + } catch (e: Exception) { + log.error("Failed to trigger replication for ${request.followerIndex} - $e") + listener.onFailure(e) + } + + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/TransportReplicateIndexMasterNodeAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/TransportReplicateIndexMasterNodeAction.kt new file mode 100644 index 00000000..b2f72316 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/TransportReplicateIndexMasterNodeAction.kt @@ -0,0 +1,156 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index + +import com.amazon.elasticsearch.replication.action.replicationstatedetails.UpdateReplicationStateDetailsRequest +import com.amazon.elasticsearch.replication.metadata.REPLICATION_OVERALL_STATE_KEY +import com.amazon.elasticsearch.replication.metadata.REPLICATION_OVERALL_STATE_RUNNING_VALUE +import com.amazon.elasticsearch.replication.metadata.UpdateReplicatedIndices +import com.amazon.elasticsearch.replication.metadata.UpdateReplicationStateDetailsTaskExecutor +import com.amazon.elasticsearch.replication.task.ReplicationState +import com.amazon.elasticsearch.replication.task.index.IndexReplicationExecutor +import com.amazon.elasticsearch.replication.task.index.IndexReplicationParams +import com.amazon.elasticsearch.replication.task.index.IndexReplicationState +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.startTask +import com.amazon.elasticsearch.replication.util.submitClusterStateUpdateTask +import com.amazon.elasticsearch.replication.util.suspending +import com.amazon.elasticsearch.replication.util.waitForClusterStateUpdate +import com.amazon.elasticsearch.replication.util.waitForTaskCondition +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.IndicesOptions +import org.elasticsearch.action.support.master.AcknowledgedRequest +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.action.support.master.TransportMasterNodeAction +import org.elasticsearch.client.node.NodeClient +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.ClusterStateTaskExecutor +import org.elasticsearch.cluster.block.ClusterBlockException +import org.elasticsearch.cluster.block.ClusterBlockLevel +import org.elasticsearch.cluster.metadata.IndexMetadata +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.index.IndexNotFoundException +import org.elasticsearch.persistent.PersistentTasksService +import org.elasticsearch.repositories.RepositoriesService +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportService +import java.io.IOException + +class TransportReplicateIndexMasterNodeAction @Inject constructor(transportService: TransportService, + clusterService: ClusterService, + threadPool: ThreadPool, + actionFilters: ActionFilters, + indexNameExpressionResolver: IndexNameExpressionResolver, + private val persistentTasksService: PersistentTasksService, + private val nodeClient : NodeClient, + private val repositoryService: RepositoriesService) : + TransportMasterNodeAction(ReplicateIndexMasterNodeAction.NAME, + transportService, clusterService, threadPool, actionFilters, ::ReplicateIndexMasterNodeRequest, indexNameExpressionResolver), + CoroutineScope by GlobalScope { + + companion object { + private val log = LogManager.getLogger(TransportReplicateIndexMasterNodeAction::class.java) + } + + override fun executor(): String { + return ThreadPool.Names.SAME + } + + @Throws(IOException::class) + override fun read(input: StreamInput): ReplicateIndexResponse { + return ReplicateIndexResponse(input) + } + + @Throws(Exception::class) + override fun masterOperation(request: ReplicateIndexMasterNodeRequest, state: ClusterState, + listener: ActionListener) { + val replicateIndexReq = request.replicateIndexReq + val user = request.user + log.trace("Triggering relevant tasks to start replication for " + + "${replicateIndexReq.remoteCluster}:${replicateIndexReq.remoteIndex} -> ${replicateIndexReq.followerIndex}") + + // For now this returns a response after creating the follower index and starting the replication tasks + // for each shard. If that takes too long we can start the task asynchronously and return the response first. + launch(Dispatchers.Unconfined + threadPool.coroutineContext()) { + try { + val remoteMetadata = getRemoteIndexMetadata(replicateIndexReq.remoteCluster, replicateIndexReq.remoteIndex) + val params = IndexReplicationParams(replicateIndexReq.remoteCluster, remoteMetadata.index, replicateIndexReq.followerIndex) + updateReplicationStateToStarted(replicateIndexReq.followerIndex) + + val response : ReplicateIndexResponse = + clusterService.waitForClusterStateUpdate("updating replicated indices") { l -> + UpdateReplicatedIndices(replicateIndexReq, user, l) + } + + val task = persistentTasksService.startTask("replication:index:${replicateIndexReq.followerIndex}", + IndexReplicationExecutor.TASK_NAME, params) + + if (!task.isAssigned) { + log.error("Failed to assign task") + listener.onResponse(ReplicateIndexResponse(false)) + } + + // Now wait for the replication to start and the follower index to get created before returning + persistentTasksService.waitForTaskCondition(task.id, replicateIndexReq.timeout()) { t -> + val replicationState = (t.state as IndexReplicationState?)?.state + replicationState == ReplicationState.FOLLOWING || + (!replicateIndexReq.waitForRestore && replicationState == ReplicationState.RESTORING) + } + + listener.onResponse(response) + } catch (e: Exception) { + log.error("Failed to trigger replication for ${replicateIndexReq.followerIndex} - $e") + listener.onFailure(e) + } + } + } + + private suspend fun updateReplicationStateToStarted(indexName: String) { + val replicationStateParamMap = HashMap() + replicationStateParamMap[REPLICATION_OVERALL_STATE_KEY] = REPLICATION_OVERALL_STATE_RUNNING_VALUE + val updateReplicationStateDetailsRequest = UpdateReplicationStateDetailsRequest(indexName, replicationStateParamMap, + UpdateReplicationStateDetailsRequest.UpdateType.ADD) + submitClusterStateUpdateTask(updateReplicationStateDetailsRequest, UpdateReplicationStateDetailsTaskExecutor.INSTANCE + as ClusterStateTaskExecutor>, + clusterService, + "remove-replication-state-params") + } + + private suspend fun getRemoteIndexMetadata(remoteCluster: String, remoteIndex: String): IndexMetadata { + val remoteClusterClient = nodeClient.getRemoteClusterClient(remoteCluster).admin().cluster() + val clusterStateRequest = remoteClusterClient.prepareState() + .clear() + .setIndices(remoteIndex) + .setMetadata(true) + .setIndicesOptions(IndicesOptions.strictSingleIndexNoExpandForbidClosed()) + .request() + val remoteState = suspending(remoteClusterClient::state)(clusterStateRequest).state + return remoteState.metadata.index(remoteIndex) ?: throw IndexNotFoundException("${remoteCluster}:${remoteIndex}") + } + + override fun checkBlock(request: ReplicateIndexMasterNodeRequest, state: ClusterState): ClusterBlockException? { + return state.blocks.globalBlockedException(ClusterBlockLevel.METADATA_WRITE) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/TransportUpddateIndexBlockAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/TransportUpddateIndexBlockAction.kt new file mode 100644 index 00000000..7aab567f --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/TransportUpddateIndexBlockAction.kt @@ -0,0 +1,102 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index.block + +import com.amazon.elasticsearch.replication.metadata.AddIndexBlockTask +import com.amazon.elasticsearch.replication.metadata.checkIfIndexBlockedWithLevel +import com.amazon.elasticsearch.replication.util.completeWith +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.waitForClusterStateUpdate +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.elasticsearch.ElasticsearchException +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.action.support.master.TransportMasterNodeAction +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.block.ClusterBlockException +import org.elasticsearch.cluster.block.ClusterBlockLevel +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportService +import java.io.IOException + + +class TransportUpddateIndexBlockAction @Inject constructor(transportService: TransportService, + clusterService: ClusterService, + threadPool: ThreadPool, + actionFilters: ActionFilters, + indexNameExpressionResolver: + IndexNameExpressionResolver, + val client: Client) : + TransportMasterNodeAction(UpdateIndexBlockAction.NAME, + transportService, clusterService, threadPool, actionFilters, ::UpdateIndexBlockRequest, + indexNameExpressionResolver), CoroutineScope by GlobalScope { + + companion object { + private val log = LogManager.getLogger(TransportUpddateIndexBlockAction::class.java) + } + + override fun checkBlock(request: UpdateIndexBlockRequest?, state: ClusterState): ClusterBlockException? { + try { + if (request != null) { + state.routingTable.index(request.indexName) ?: return null + checkIfIndexBlockedWithLevel(clusterService, request.indexName, ClusterBlockLevel.METADATA_WRITE) + } + } catch (exception: ClusterBlockException) { + return exception + } + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE) + } + + @Throws(Exception::class) + override fun masterOperation(request: UpdateIndexBlockRequest?, state: ClusterState?, listener: ActionListener) { + val followerIndexName = request!!.indexName + launch(threadPool.coroutineContext(ThreadPool.Names.MANAGEMENT)) { + listener.completeWith { addIndexBlockForReplication(followerIndexName) } + } + } + + private suspend fun addIndexBlockForReplication(indexName: String): AcknowledgedResponse { + val addIndexBlockTaskResponse : AcknowledgedResponse = + clusterService.waitForClusterStateUpdate("add-block") { + l -> + AddIndexBlockTask(UpdateIndexBlockRequest(indexName, IndexBlockUpdateType.ADD_BLOCK), l) + } + if (!addIndexBlockTaskResponse.isAcknowledged) { + throw ElasticsearchException("Failed to add index block to index:$indexName") + } + return addIndexBlockTaskResponse + } + + override fun executor(): String { + return ThreadPool.Names.SAME + } + + @Throws(IOException::class) + override fun read(inp: StreamInput?): AcknowledgedResponse { + return AcknowledgedResponse(inp) + } + + +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/UpdateIndexBlockAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/UpdateIndexBlockAction.kt new file mode 100644 index 00000000..9073ae24 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/UpdateIndexBlockAction.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index.block + +import org.elasticsearch.action.ActionType +import org.elasticsearch.action.support.master.AcknowledgedResponse + +class UpdateIndexBlockAction private constructor(): ActionType(NAME, ::AcknowledgedResponse) { + companion object { + const val NAME = "internal:indices/admin/opendistro/replication/index/add_block" + val INSTANCE: UpdateIndexBlockAction = UpdateIndexBlockAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/UpdateIndexBlockRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/UpdateIndexBlockRequest.kt new file mode 100644 index 00000000..ef6caf93 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/index/block/UpdateIndexBlockRequest.kt @@ -0,0 +1,78 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.index.block + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.IndicesRequest +import org.elasticsearch.action.support.IndicesOptions +import org.elasticsearch.action.support.master.AcknowledgedRequest +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ObjectParser +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.ToXContentObject +import org.elasticsearch.common.xcontent.XContentBuilder +import java.util.function.Supplier + +enum class IndexBlockUpdateType { + ADD_BLOCK, REMOVE_BLOCK +} + +class UpdateIndexBlockRequest : AcknowledgedRequest, IndicesRequest, ToXContentObject { + + var indexName: String + var updateType: IndexBlockUpdateType + + constructor(index: String, updateType: IndexBlockUpdateType): super() { + this.indexName = index + this.updateType = updateType + } + + constructor(inp: StreamInput): super(inp) { + indexName = inp.readString() + updateType = inp.readEnum(IndexBlockUpdateType::class.java) + } + + override fun validate(): ActionRequestValidationException? { + /* No validation for now. Null checks are implicit as constructor doesn't + allow nulls to be passed into the request. + */ + return null; + } + + override fun indices(): Array { + return arrayOf(indexName) + } + + override fun indicesOptions(): IndicesOptions { + return IndicesOptions.strictSingleIndexNoExpandForbidClosed() + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + builder.field("indexName", indexName) + builder.field("updateType", updateType) + builder.endObject() + return builder + } + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeString(indexName) + out.writeEnum(updateType) + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesAction.kt new file mode 100644 index 00000000..87fe8e38 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesAction.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.replay + +import org.elasticsearch.action.ActionType + +class ReplayChangesAction private constructor() : ActionType(NAME, ::ReplayChangesResponse) { + + companion object { + const val NAME = "indices:data/write/opendistro/replication/changes" + val INSTANCE = ReplayChangesAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesRequest.kt new file mode 100644 index 00000000..9e95c878 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesRequest.kt @@ -0,0 +1,60 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.replay + +import org.elasticsearch.action.support.replication.ReplicatedWriteRequest +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.translog.Translog + +class ReplayChangesRequest : ReplicatedWriteRequest { + + val remoteCluster: String + val remoteIndex: String + val changes: List + val maxSeqNoOfUpdatesOrDeletes: Long + + constructor(shardId: ShardId, + changes: List, + maxSeqNoOfUpdatesOrDeletes: Long, + remoteCluster: String, + remoteIndex: String) : super(shardId) { + this.changes = changes + this.maxSeqNoOfUpdatesOrDeletes = maxSeqNoOfUpdatesOrDeletes + this.remoteCluster = remoteCluster + this.remoteIndex = remoteIndex + } + + constructor(inp: StreamInput) : super(inp) { + remoteCluster = inp.readString() + remoteIndex = inp.readString() + changes = inp.readList(Translog.Operation::readOperation) + maxSeqNoOfUpdatesOrDeletes = inp.readLong() + } + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeString(remoteCluster) + out.writeString(remoteIndex) + out.writeCollection(changes, Translog.Operation::writeOperation) + out.writeLong(maxSeqNoOfUpdatesOrDeletes) + } + + override fun toString(): String { + return "ReplayChangesRequest[changes=<${changes.take(3)}]" + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesResponse.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesResponse.kt new file mode 100644 index 00000000..26dcea72 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/ReplayChangesResponse.kt @@ -0,0 +1,33 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.replay + +import org.elasticsearch.action.support.WriteResponse +import org.elasticsearch.action.support.replication.ReplicationResponse +import org.elasticsearch.common.io.stream.StreamInput + +class ReplayChangesResponse : ReplicationResponse, WriteResponse { + + constructor(inp: StreamInput) : super(inp) + + constructor(): super() + + override fun setForcedRefresh(forcedRefresh: Boolean) { + //no-op + } + + +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/TransportReplayChangesAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/TransportReplayChangesAction.kt new file mode 100644 index 00000000..61f1ed60 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replay/TransportReplayChangesAction.kt @@ -0,0 +1,247 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.replay + +import com.amazon.elasticsearch.replication.ReplicationException +import com.amazon.elasticsearch.replication.metadata.checkIfIndexBlockedWithLevel +import com.amazon.elasticsearch.replication.util.completeWith +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.suspending +import com.amazon.elasticsearch.replication.util.waitForNextChange +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.SupervisorJob +import kotlinx.coroutines.asCoroutineDispatcher +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsRequest +import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest +import org.elasticsearch.action.bulk.TransportShardBulkAction +import org.elasticsearch.action.resync.TransportResyncReplicationAction +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.IndicesOptions +import org.elasticsearch.action.support.replication.TransportWriteAction +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.ClusterStateObserver +import org.elasticsearch.cluster.action.index.MappingUpdatedAction +import org.elasticsearch.cluster.action.shard.ShardStateAction +import org.elasticsearch.cluster.block.ClusterBlockLevel +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.bytes.BytesReference +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.Writeable +import org.elasticsearch.common.settings.Settings +import org.elasticsearch.common.xcontent.XContentType +import org.elasticsearch.index.IndexingPressure +import org.elasticsearch.index.engine.Engine +import org.elasticsearch.index.shard.IndexShard +import org.elasticsearch.index.translog.Translog +import org.elasticsearch.indices.IndicesService +import org.elasticsearch.indices.SystemIndices +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportService +import java.util.function.Function + +/** + * Similar to [TransportResyncReplicationAction] except it also writes the changes to the primary before replicating + * to the replicas. The source of changes is, of course, the remote cluster. + */ +class TransportReplayChangesAction @Inject constructor(settings: Settings, transportService: TransportService, + clusterService: ClusterService, indicesService: IndicesService, + threadPool: ThreadPool, shardStateAction: ShardStateAction, + actionFilters: ActionFilters, + indexingPressure: IndexingPressure, + systemIndices: SystemIndices, + private val client: Client, + // Unused for now because of a bug in creating the PutMappingRequest + private val mappingUpdatedAction: MappingUpdatedAction) : + TransportWriteAction( + settings, ACTION_NAME, transportService, clusterService, indicesService, threadPool, shardStateAction, + actionFilters, Writeable.Reader { inp -> ReplayChangesRequest(inp) }, Writeable.Reader { inp -> ReplayChangesRequest(inp) }, + EXECUTOR_NAME_FUNCTION, false, indexingPressure, systemIndices) { + + companion object { + const val ACTION_NAME = "indices:data/write/replication" + private val log = LogManager.getLogger(TransportReplayChangesAction::class.java)!! + private val EXECUTOR_NAME_FUNCTION = Function { shard: IndexShard -> + if (shard.indexSettings().indexMetadata.isSystem) { + ThreadPool.Names.SYSTEM_WRITE + } else { + ThreadPool.Names.WRITE + } + } + } + + private val job = SupervisorJob() + private val scope = CoroutineScope(threadPool.executor(ThreadPool.Names.WRITE).asCoroutineDispatcher() + job) + + override fun newResponseInstance(inp: StreamInput): ReplayChangesResponse = ReplayChangesResponse(inp) + + override fun dispatchedShardOperationOnPrimary(request: ReplayChangesRequest, primaryShard: IndexShard, + listener: ActionListener>) { + + scope.launch(threadPool.coroutineContext()) { + listener.completeWith { + performOnPrimary(request, primaryShard) + } + } + } + + override fun dispatchedShardOperationOnReplica(request: ReplayChangesRequest, replica: IndexShard, + listener: ActionListener) { + scope.launch(threadPool.coroutineContext()) { + listener.completeWith { + performOnSecondary(request, replica) + } + } + } + + suspend fun performOnPrimary(request: ReplayChangesRequest, primaryShard: IndexShard) + : WritePrimaryResult { + + checkIfIndexBlockedWithLevel(clusterService, request.index(), ClusterBlockLevel.WRITE) + var location: Translog.Location? = null + request.changes.asSequence().map { + it.withPrimaryTerm(primaryShard.operationPrimaryTerm) + }.forEach { op -> + if(primaryShard.maxSeqNoOfUpdatesOrDeletes < request.maxSeqNoOfUpdatesOrDeletes) { + primaryShard.advanceMaxSeqNoOfUpdatesOrDeletes(request.maxSeqNoOfUpdatesOrDeletes) + } + var result = primaryShard.applyTranslogOperation(op, Engine.Operation.Origin.PRIMARY) + if (result.resultType == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) { + waitForMappingUpdate { + // fetch mappings from the remote cluster when applying on PRIMARY... + syncRemoteMapping(request.remoteCluster, request.remoteIndex, request.shardId()!!.indexName, + op.docType()) + } + result = primaryShard.applyTranslogOperation(op, Engine.Operation.Origin.PRIMARY) + } + + location = syncOperationResultOrThrow(result, location) + } + val response = ReplayChangesResponse() // TODO: Figure out what to add to response + return WritePrimaryResult(request, response, location, null, primaryShard, log) + } + + /** + * This requires duplicating the code above due to mapping updates being asynchronous. + */ + suspend fun performOnSecondary(request: ReplayChangesRequest, replicaShard: IndexShard) + : WriteReplicaResult { + + checkIfIndexBlockedWithLevel(clusterService, request.index(), ClusterBlockLevel.WRITE) + var location: Translog.Location? = null + request.changes.asSequence().map { + it.withPrimaryTerm(replicaShard.operationPrimaryTerm) + }.forEach { op -> + var result = replicaShard.applyTranslogOperation(op, Engine.Operation.Origin.REPLICA) + if (result.resultType == Engine.Result.Type.MAPPING_UPDATE_REQUIRED) { + waitForMappingUpdate() + result = replicaShard.applyTranslogOperation(op, Engine.Operation.Origin.REPLICA) + } + location = syncOperationResultOrThrow(result, location) + } + return WriteReplicaResult(request, location, null, replicaShard, log) + } + + private fun Translog.Operation.docType(): String { + return when (this) { + is Translog.Index -> type() + is Translog.Delete -> type() + else -> TODO("Operation ${opType()} not expected to have a document type") + } + } + + /** + * Fetches the index mapping from the remote cluster, applies it to the local cluster's master and then waits + * for the mapping to become available on the current shard. Should only be called on the primary shard . + */ + private suspend fun syncRemoteMapping(remoteCluster: String, remoteIndex: String, + followerIndex: String, type: String) { + log.debug("Syncing mappings from ${remoteCluster}:${remoteIndex}/${type} -> $followerIndex...") + val remoteClient = client.getRemoteClusterClient(remoteCluster) + val options = IndicesOptions.strictSingleIndexNoExpandForbidClosed() + val getMappingsRequest = GetMappingsRequest().indices(remoteIndex).indicesOptions(options) + val getMappingsResponse = suspending(remoteClient.admin().indices()::getMappings)(getMappingsRequest) + val mappingSource = getMappingsResponse.mappings().get(remoteIndex).get(type).source().string() + + // This should use MappingUpdateAction but that uses PutMappingRequest internally and + // PutMappingRequest#setConcreteIndex has a bug where it throws an NPE.This is fixed upstream in + // https://github.com/elastic/elasticsearch/pull/58419 and we should update to that when it is released. + val putMappingRequest = PutMappingRequest().indices(followerIndex).indicesOptions(options) + .type(type).source(mappingSource, XContentType.JSON) + //TODO: call .masterNodeTimeout() with the setting indices.mapping.dynamic_timeout + val putMappingResponse = suspending(client.admin().indices()::putMapping)(putMappingRequest) + if (!putMappingResponse.isAcknowledged) { + throw ReplicationException("failed to update mappings to match mapping in source clusters") + } else { + log.debug("Mappings synced for $followerIndex") + } + } + + /** + * Waits for an index mapping update to become available on the current shard. If a [mappingUpdater] is provided + * it will be called to fetch and update the mapping. The updater is normally run only on the primary shard to fetch + * mappings from the remote index. On replica shards an updater is not required as the primary should have already + * updated the mapping - we just have to wait for it to reach this node. + */ + private suspend fun waitForMappingUpdate(mappingUpdater: suspend () -> Unit = {}) { + log.debug("Waiting for mapping update...") + val clusterStateObserver = ClusterStateObserver(clusterService, log, threadPool.threadContext) + mappingUpdater() + clusterStateObserver.waitForNextChange("mapping update on replica") + log.debug("Mapping updated.") + } + + private fun Translog.Operation.withPrimaryTerm(operationPrimaryTerm: Long): Translog.Operation { + @Suppress("DEPRECATION") + return when (opType()!!) { + Translog.Operation.Type.CREATE, Translog.Operation.Type.INDEX -> { + val sourceOp = this as Translog.Index + Translog.Index(sourceOp.type(), sourceOp.id(), sourceOp.seqNo(), operationPrimaryTerm, + sourceOp.version(), BytesReference.toBytes(sourceOp.source()), + sourceOp.routing(), sourceOp.autoGeneratedIdTimestamp) + } + Translog.Operation.Type.DELETE -> { + val sourceOp = this as Translog.Delete + Translog.Delete(sourceOp.type(), sourceOp.id(), sourceOp.uid(), sourceOp.seqNo(), operationPrimaryTerm, + sourceOp.version()) + } + Translog.Operation.Type.NO_OP -> { + val sourceOp = this as Translog.NoOp + Translog.NoOp(sourceOp.seqNo(), operationPrimaryTerm, sourceOp.reason()) + } + } + } + + override fun globalBlockLevel(): ClusterBlockLevel? { + return ClusterBlockLevel.WRITE + } + + override fun indexBlockLevel(): ClusterBlockLevel? { + /* Ideally, we want to block if there is already a WRITE block added to cluster. + However, we dont want to be blocked from our own replication write block. + + Since this method doesn't have access to actual block, we can't block on WRITE level + from here without getting blocked from our own cluster-block. To mitigate this we would + add code in our ReplayChanges action to check for other WRITE blocks(eg disk space block, etc) + before going ahead with shard update. + */ + return null + } +} + diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/replicationstatedetails/UpdateReplicationStateDetailsRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replicationstatedetails/UpdateReplicationStateDetailsRequest.kt new file mode 100644 index 00000000..15e86f87 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/replicationstatedetails/UpdateReplicationStateDetailsRequest.kt @@ -0,0 +1,57 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.replicationstatedetails + +import com.amazon.elasticsearch.replication.metadata.ReplicationStateParams +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.support.master.AcknowledgedRequest +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput + +class UpdateReplicationStateDetailsRequest: AcknowledgedRequest { + + val followIndexName : String + val replicationStateParams : ReplicationStateParams + val updateType : UpdateType + + enum class UpdateType { + ADD, REMOVE + } + + constructor(followIndexName : String, + replicationStateParams: ReplicationStateParams, + updateType: UpdateType) { + this.followIndexName = followIndexName + this.replicationStateParams = replicationStateParams + this.updateType = updateType + } + + override fun validate(): ActionRequestValidationException? { + return null + } + + constructor(inp: StreamInput) : super(inp) { + followIndexName = inp.readString() + replicationStateParams = inp.readMap(StreamInput::readString, StreamInput::readString) + updateType = inp.readEnum(UpdateType::class.java) + } + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeString(followIndexName) + out.writeMap(replicationStateParams) + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkAction.kt new file mode 100644 index 00000000..9f0948f2 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkAction.kt @@ -0,0 +1,25 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.ActionType + +class GetFileChunkAction private constructor() : ActionType(NAME, ::GetFileChunkResponse) { + companion object { + const val NAME = "indices:data/read/opendistro/replication/file_chunk" + val INSTANCE = GetFileChunkAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkRequest.kt new file mode 100644 index 00000000..126f9496 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkRequest.kt @@ -0,0 +1,56 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.support.single.shard.SingleShardRequest +import org.elasticsearch.cluster.node.DiscoveryNode +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.store.StoreFileMetadata +import org.elasticsearch.transport.RemoteClusterAwareRequest + +class GetFileChunkRequest : RemoteClusterRepositoryRequest { + val storeFileMetadata: StoreFileMetadata + val offset: Long + val length: Int + + constructor(restoreUUID: String, node: DiscoveryNode, leaderShardId: ShardId, storeFileMetaData: StoreFileMetadata, + offset: Long, length: Int, followerCluster: String, followerShardId: ShardId): + super(restoreUUID, node, leaderShardId, followerCluster, followerShardId) { + this.storeFileMetadata = storeFileMetaData + this.offset = offset + this.length = length + } + + constructor(input : StreamInput): super(input) { + storeFileMetadata = StoreFileMetadata(input) + offset = input.readLong() + length = input.readInt() + } + + override fun validate(): ActionRequestValidationException? { + return null + } + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + storeFileMetadata.writeTo(out) + out.writeLong(offset) + out.writeInt(length) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkResponse.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkResponse.kt new file mode 100644 index 00000000..c8ac18b2 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetFileChunkResponse.kt @@ -0,0 +1,47 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.common.bytes.BytesReference +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.index.store.StoreFileMetadata + +class GetFileChunkResponse : ActionResponse { + + val storeFileMetadata: StoreFileMetadata + val offset: Long + val data: BytesReference + + constructor(storeFileMetadata: StoreFileMetadata, offset: Long, data: BytesReference): super() { + this.storeFileMetadata = storeFileMetadata + this.offset = offset + this.data = data + } + + constructor(inp: StreamInput): super(inp) { + storeFileMetadata = StoreFileMetadata(inp) + offset = inp.readLong() + data = inp.readBytesReference() + } + + override fun writeTo(out: StreamOutput) { + storeFileMetadata.writeTo(out) + out.writeLong(offset) + out.writeBytesReference(data) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataAction.kt new file mode 100644 index 00000000..9e2432f4 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataAction.kt @@ -0,0 +1,25 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.ActionType + +class GetStoreMetadataAction private constructor() : ActionType(NAME, ::GetStoreMetadataResponse) { + companion object { + const val NAME = "indices:data/read/opendistro/replication/file_metadata" + val INSTANCE = GetStoreMetadataAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataRequest.kt new file mode 100644 index 00000000..ca0e481e --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataRequest.kt @@ -0,0 +1,37 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.support.single.shard.SingleShardRequest +import org.elasticsearch.cluster.node.DiscoveryNode +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.transport.RemoteClusterAwareRequest + +class GetStoreMetadataRequest : RemoteClusterRepositoryRequest { + + constructor(restoreUUID: String, node: DiscoveryNode, leaderShardId: ShardId, + followerCluster: String, followerShardId: ShardId): + super(restoreUUID, node, leaderShardId, followerCluster, followerShardId) + + constructor(input : StreamInput): super(input) + + override fun validate(): ActionRequestValidationException? { + return null + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataResponse.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataResponse.kt new file mode 100644 index 00000000..b36aa44b --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/GetStoreMetadataResponse.kt @@ -0,0 +1,38 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.index.store.Store + +class GetStoreMetadataResponse : ActionResponse { + + val metadataSnapshot : Store.MetadataSnapshot + + constructor(metadataSnapshot: Store.MetadataSnapshot): super() { + this.metadataSnapshot = metadataSnapshot + } + + constructor(inp: StreamInput) : super(inp) { + metadataSnapshot = Store.MetadataSnapshot(inp) + } + + override fun writeTo(out: StreamOutput) { + metadataSnapshot.writeTo(out) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/ReleaseLeaderResourcesAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/ReleaseLeaderResourcesAction.kt new file mode 100644 index 00000000..8a443ba5 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/ReleaseLeaderResourcesAction.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.ActionType +import org.elasticsearch.action.support.master.AcknowledgedResponse + +class ReleaseLeaderResourcesAction private constructor() : ActionType(NAME, ::AcknowledgedResponse) { + companion object { + const val NAME = "indices:admin/opendistro/replication/resources/release" + val INSTANCE = ReleaseLeaderResourcesAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/ReleaseLeaderResourcesRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/ReleaseLeaderResourcesRequest.kt new file mode 100644 index 00000000..0ef25dc1 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/ReleaseLeaderResourcesRequest.kt @@ -0,0 +1,34 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.cluster.node.DiscoveryNode +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.index.shard.ShardId + +class ReleaseLeaderResourcesRequest: RemoteClusterRepositoryRequest { + + constructor(restoreUUID: String, node: DiscoveryNode, leaderShardId: ShardId, + followerCluster: String, followerShardId: ShardId): + super(restoreUUID, node, leaderShardId, followerCluster, followerShardId) + + constructor(input : StreamInput): super(input) + + override fun validate(): ActionRequestValidationException? { + return null + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/RemoteClusterRepositoryRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/RemoteClusterRepositoryRequest.kt new file mode 100644 index 00000000..398b665e --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/RemoteClusterRepositoryRequest.kt @@ -0,0 +1,66 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import org.elasticsearch.action.support.single.shard.SingleShardRequest +import org.elasticsearch.cluster.node.DiscoveryNode +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.transport.RemoteClusterAwareRequest + +abstract class RemoteClusterRepositoryRequest?>: + SingleShardRequest, RemoteClusterAwareRequest { + + val restoreUUID: String + val node: DiscoveryNode + val leaderShardId: ShardId + val followerCluster: String + val followerShardId: ShardId + + constructor(restoreUUID: String, + node: DiscoveryNode, + leaderShardId: ShardId, + followerCluster: String, + followerShardId: ShardId): super(leaderShardId.indexName) { + this.restoreUUID = restoreUUID + this.node = node + this.leaderShardId = leaderShardId + this.followerCluster = followerCluster + this.followerShardId = followerShardId + } + + constructor(input: StreamInput) { + restoreUUID = input.readString() + node = DiscoveryNode(input) + leaderShardId = ShardId(input) + followerCluster = input.readString() + followerShardId = ShardId(input) + super.index = leaderShardId.indexName + } + + override fun writeTo(out: StreamOutput) { + out.writeString(restoreUUID) + node.writeTo(out) + leaderShardId.writeTo(out) + out.writeString(followerCluster) + followerShardId.writeTo(out) + } + + override fun getPreferredTargetNode(): DiscoveryNode { + return node + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportGetFileChunkAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportGetFileChunkAction.kt new file mode 100644 index 00000000..4cdf7843 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportGetFileChunkAction.kt @@ -0,0 +1,87 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import com.amazon.elasticsearch.replication.repository.RemoteClusterRestoreLeaderService +import com.amazon.elasticsearch.replication.util.performOp +import org.apache.logging.log4j.LogManager +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.single.shard.TransportSingleShardAction +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.routing.ShardsIterator +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.bytes.BytesArray +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.Writeable +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.indices.IndicesService +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportActionProxy +import org.elasticsearch.transport.TransportService + +class TransportGetFileChunkAction @Inject constructor(threadPool: ThreadPool, clusterService: ClusterService, + transportService: TransportService, actionFilters: ActionFilters, + indexNameExpressionResolver: IndexNameExpressionResolver, + private val indicesService: IndicesService, + private val restoreLeaderService: RemoteClusterRestoreLeaderService) : + TransportSingleShardAction(GetFileChunkAction.NAME, + threadPool, clusterService, transportService, actionFilters, + indexNameExpressionResolver, ::GetFileChunkRequest, ThreadPool.Names.GET) { + + init { + TransportActionProxy.registerProxyAction(transportService, GetFileChunkAction.NAME, ::GetFileChunkResponse) + } + + companion object { + private val log = LogManager.getLogger(TransportGetFileChunkAction::class.java) + } + + override fun shardOperation(request: GetFileChunkRequest, shardId: ShardId): GetFileChunkResponse { + log.debug(request.toString()) + val indexShard = indicesService.indexServiceSafe(shardId.index).getShard(shardId.id) + val store = indexShard.store() + val buffer = ByteArray(request.length) + var bytesRead = 0 + + store.performOp({ + val fileMetaData = request.storeFileMetadata + val currentInput = restoreLeaderService.openInputStream(request.restoreUUID, request, + fileMetaData.name(), fileMetaData.length()) + val offset = request.offset + if (offset < fileMetaData.length()) { + currentInput.skip(offset) + bytesRead = currentInput.read(buffer) + } + }) + + return GetFileChunkResponse(request.storeFileMetadata, request.offset, BytesArray(buffer, 0, bytesRead)) + } + + override fun resolveIndex(request: GetFileChunkRequest): Boolean { + return true + } + + override fun getResponseReader(): Writeable.Reader { + return Writeable.Reader { inp: StreamInput -> GetFileChunkResponse(inp) } + } + + override fun shards(state: ClusterState, request: InternalRequest): ShardsIterator? { + return state.routingTable().shardRoutingTable(request.request().leaderShardId).primaryShardIt() + } + +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportGetStoreMetadataAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportGetStoreMetadataAction.kt new file mode 100644 index 00000000..a113e925 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportGetStoreMetadataAction.kt @@ -0,0 +1,66 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import com.amazon.elasticsearch.replication.repository.RemoteClusterRestoreLeaderService +import org.apache.logging.log4j.LogManager +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.single.shard.TransportSingleShardAction +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.routing.ShardsIterator +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.Writeable +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportActionProxy +import org.elasticsearch.transport.TransportService + +class TransportGetStoreMetadataAction @Inject constructor(threadPool: ThreadPool, clusterService: ClusterService, + transportService: TransportService, actionFilters: ActionFilters, + indexNameExpressionResolver: IndexNameExpressionResolver, + private val restoreLeaderService: RemoteClusterRestoreLeaderService) : + TransportSingleShardAction(GetStoreMetadataAction.NAME, + threadPool, clusterService, transportService, actionFilters, + indexNameExpressionResolver, ::GetStoreMetadataRequest, ThreadPool.Names.GET) { + init { + TransportActionProxy.registerProxyAction(transportService, GetStoreMetadataAction.NAME, ::GetStoreMetadataResponse) + } + + companion object { + private val log = LogManager.getLogger(TransportGetStoreMetadataAction::class.java) + } + + override fun shardOperation(request: GetStoreMetadataRequest, shardId: ShardId): GetStoreMetadataResponse { + log.debug(request.toString()) + var metadataSnapshot = restoreLeaderService.addRemoteClusterRestore(request.restoreUUID, request).metadataSnapshot + return GetStoreMetadataResponse(metadataSnapshot) + } + + override fun resolveIndex(request: GetStoreMetadataRequest): Boolean { + return true + } + + override fun getResponseReader(): Writeable.Reader { + return Writeable.Reader { inp: StreamInput -> GetStoreMetadataResponse(inp) } + } + + override fun shards(state: ClusterState, request: InternalRequest): ShardsIterator { + return state.routingTable().shardRoutingTable(request.request().leaderShardId).primaryShardIt() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportReleaseLeaderResourcesAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportReleaseLeaderResourcesAction.kt new file mode 100644 index 00000000..60bf7bad --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/repository/TransportReleaseLeaderResourcesAction.kt @@ -0,0 +1,67 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.repository + +import com.amazon.elasticsearch.replication.repository.RemoteClusterRestoreLeaderService +import org.apache.logging.log4j.LogManager +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.action.support.single.shard.TransportSingleShardAction +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.routing.ShardsIterator +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.Writeable +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportActionProxy +import org.elasticsearch.transport.TransportService + +class TransportReleaseLeaderResourcesAction @Inject constructor(threadPool: ThreadPool, clusterService: ClusterService, + transportService: TransportService, actionFilters: ActionFilters, + indexNameExpressionResolver: IndexNameExpressionResolver, + private val restoreLeaderService: RemoteClusterRestoreLeaderService) : + TransportSingleShardAction(ReleaseLeaderResourcesAction.NAME, + threadPool, clusterService, transportService, actionFilters, + indexNameExpressionResolver, ::ReleaseLeaderResourcesRequest, ThreadPool.Names.GET) { + init { + TransportActionProxy.registerProxyAction(transportService, ReleaseLeaderResourcesAction.NAME, ::AcknowledgedResponse) + } + + companion object { + private val log = LogManager.getLogger(TransportReleaseLeaderResourcesAction::class.java) + } + + override fun shardOperation(request: ReleaseLeaderResourcesRequest, shardId: ShardId): AcknowledgedResponse { + log.info("Releasing resources for $shardId with restore-id as ${request.restoreUUID}") + restoreLeaderService.removeRemoteClusterRestore(request.restoreUUID) + return AcknowledgedResponse(true) + } + + override fun resolveIndex(request: ReleaseLeaderResourcesRequest?): Boolean { + return true + } + + override fun getResponseReader(): Writeable.Reader { + return Writeable.Reader { inp: StreamInput -> AcknowledgedResponse(inp) } + } + + override fun shards(state: ClusterState, request: InternalRequest): ShardsIterator? { + return state.routingTable().shardRoutingTable(request.request().leaderShardId).primaryShardIt() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/StopIndexReplicationAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/StopIndexReplicationAction.kt new file mode 100644 index 00000000..ec36782c --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/StopIndexReplicationAction.kt @@ -0,0 +1,26 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.stop + +import org.elasticsearch.action.ActionType +import org.elasticsearch.action.support.master.AcknowledgedResponse + +class StopIndexReplicationAction private constructor(): ActionType(NAME, ::AcknowledgedResponse) { + companion object { + const val NAME = "indices:admin/opendistro/replication/index/stop" + val INSTANCE: StopIndexReplicationAction = StopIndexReplicationAction() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/StopIndexReplicationRequest.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/StopIndexReplicationRequest.kt new file mode 100644 index 00000000..afacc6df --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/StopIndexReplicationRequest.kt @@ -0,0 +1,82 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.stop + +import org.elasticsearch.action.ActionRequestValidationException +import org.elasticsearch.action.IndicesRequest +import org.elasticsearch.action.support.IndicesOptions +import org.elasticsearch.action.support.master.AcknowledgedRequest +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.* + +class StopIndexReplicationRequest : AcknowledgedRequest, IndicesRequest.Replaceable, ToXContentObject { + + lateinit var indexName: String + + constructor(indexName: String) { + this.indexName = indexName + } + + private constructor() { + } + + constructor(inp: StreamInput): super(inp) { + indexName = inp.readString() + } + + companion object { + private val PARSER = ObjectParser("StopReplicationRequestParser") { + StopIndexReplicationRequest() + } + + fun fromXContent(parser: XContentParser, followerIndex: String): StopIndexReplicationRequest { + val stopIndexReplicationRequest = PARSER.parse(parser, null) + stopIndexReplicationRequest.indexName = followerIndex + return stopIndexReplicationRequest + } + } + + override fun validate(): ActionRequestValidationException? { + return null + } + + override fun indices(vararg indices: String?): IndicesRequest { + return this + } + + override fun indices(): Array { + return arrayOf(indexName) + } + + override fun indicesOptions(): IndicesOptions { + return IndicesOptions.strictSingleIndexNoExpandForbidClosed() + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + builder.field("indexName", indexName) + builder.endObject() + return builder + } + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeString(indexName) + } + +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/TransportStopIndexReplicationAction.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/TransportStopIndexReplicationAction.kt new file mode 100644 index 00000000..6815434b --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/action/stop/TransportStopIndexReplicationAction.kt @@ -0,0 +1,185 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.action.stop + +import com.amazon.elasticsearch.replication.ReplicationPlugin.Companion.REPLICATED_INDEX_SETTING +import com.amazon.elasticsearch.replication.metadata.INDEX_REPLICATION_BLOCK +import com.amazon.elasticsearch.replication.metadata.checkIfIndexBlockedWithLevel +import com.amazon.elasticsearch.replication.metadata.REPLICATION_OVERALL_STATE_KEY +import com.amazon.elasticsearch.replication.metadata.REPLICATION_OVERALL_STATE_RUNNING_VALUE +import com.amazon.elasticsearch.replication.metadata.ReplicationMetadata +import com.amazon.elasticsearch.replication.metadata.getReplicationStateParamsForIndex +import com.amazon.elasticsearch.replication.util.completeWith +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.suspending +import com.amazon.elasticsearch.replication.util.waitForClusterStateUpdate +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.elasticsearch.ElasticsearchException +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.admin.indices.close.CloseIndexRequest +import org.elasticsearch.action.admin.indices.open.OpenIndexRequest +import org.elasticsearch.action.support.ActionFilters +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.action.support.master.TransportMasterNodeAction +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.AckedClusterStateUpdateTask +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.RestoreInProgress +import org.elasticsearch.cluster.block.ClusterBlockException +import org.elasticsearch.cluster.block.ClusterBlockLevel +import org.elasticsearch.cluster.block.ClusterBlocks +import org.elasticsearch.cluster.metadata.IndexMetadata +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver +import org.elasticsearch.cluster.metadata.Metadata +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.settings.Settings +import org.elasticsearch.index.IndexNotFoundException +import org.elasticsearch.threadpool.ThreadPool +import org.elasticsearch.transport.TransportService +import java.io.IOException + +class TransportStopIndexReplicationAction @Inject constructor(transportService: TransportService, + clusterService: ClusterService, + threadPool: ThreadPool, + actionFilters: ActionFilters, + indexNameExpressionResolver: + IndexNameExpressionResolver, + val client: Client) : + TransportMasterNodeAction (StopIndexReplicationAction.NAME, + transportService, clusterService, threadPool, actionFilters, ::StopIndexReplicationRequest, + indexNameExpressionResolver), CoroutineScope by GlobalScope { + + companion object { + private val log = LogManager.getLogger(TransportStopIndexReplicationAction::class.java) + } + + override fun checkBlock(request: StopIndexReplicationRequest, state: ClusterState): ClusterBlockException? { + try { + checkIfIndexBlockedWithLevel(clusterService, request.indexName, ClusterBlockLevel.METADATA_WRITE) + } catch (exception: ClusterBlockException) { + return exception + } catch (exception: IndexNotFoundException) { + log.warn("Index ${request.indexName} is deleted") + } + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE) + } + + @Throws(Exception::class) + override fun masterOperation(request: StopIndexReplicationRequest, state: ClusterState, + listener: ActionListener) { + launch(Dispatchers.Unconfined + threadPool.coroutineContext()) { + listener.completeWith { + log.info("Stopping index replication on index:" + request.indexName) + validateStopReplicationRequest(request) + + // Index will be deleted if replication is stopped while it is restoring. So no need to close/reopen + val restoring = clusterService.state().custom(RestoreInProgress.TYPE).any { entry -> + entry.indices().any { it == request.indexName } + } + if (!restoring && + state.routingTable.hasIndex(request.indexName)) { + val closeResponse = suspending(client.admin().indices()::close)(CloseIndexRequest(request.indexName)) + if (!closeResponse.isAcknowledged) { + throw ElasticsearchException("Unable to close index: ${request.indexName}") + } + } + + val stateUpdateResponse : AcknowledgedResponse = + clusterService.waitForClusterStateUpdate("stop_replication") { l -> StopReplicationTask(request, l)} + if (!stateUpdateResponse.isAcknowledged) { + throw ElasticsearchException("Failed to update cluster state") + } + + // Index will be deleted if stop is called while it is restoring. So no need to reopen + if (!restoring && + state.routingTable.hasIndex(request.indexName)) { + val reopenResponse = suspending(client.admin().indices()::open)(OpenIndexRequest(request.indexName)) + if (!reopenResponse.isAcknowledged) { + throw ElasticsearchException("Failed to reopen index: ${request.indexName}") + } + } + AcknowledgedResponse(true) + } + } + } + + private fun validateStopReplicationRequest(request: StopIndexReplicationRequest) { + val replicationStateParams = getReplicationStateParamsForIndex(clusterService, request.indexName) + ?: + throw IllegalArgumentException("No replication in progress for index:${request.indexName}") + val replicationOverallState = replicationStateParams[REPLICATION_OVERALL_STATE_KEY] + if (replicationOverallState == REPLICATION_OVERALL_STATE_RUNNING_VALUE) + return + throw IllegalStateException("Unknown value of replication state:$replicationOverallState") + } + + override fun executor(): String { + return ThreadPool.Names.SAME + } + + @Throws(IOException::class) + override fun read(inp: StreamInput): AcknowledgedResponse { + return AcknowledgedResponse(inp) + } + + class StopReplicationTask(val request: StopIndexReplicationRequest, listener: ActionListener) : + AckedClusterStateUpdateTask(request, listener) { + + override fun execute(currentState: ClusterState): ClusterState { + val newState = ClusterState.builder(currentState) + + // remove index block + if (currentState.blocks.hasIndexBlock(request.indexName, INDEX_REPLICATION_BLOCK)) { + val newBlocks = ClusterBlocks.builder().blocks(currentState.blocks) + .removeIndexBlock(request.indexName, INDEX_REPLICATION_BLOCK) + newState.blocks(newBlocks) + } + + // remove replication metadata and state params + val mdBuilder = Metadata.builder(currentState.metadata) + val currentReplicationMetadata = currentState.metadata().custom(ReplicationMetadata.NAME) + ?: ReplicationMetadata.EMPTY + val clusterAlias = currentReplicationMetadata.replicatedIndices.entries.firstOrNull { + it.value.containsKey(request.indexName) + }?.key + if (clusterAlias != null) { + val newMetadata = currentReplicationMetadata.removeIndex(clusterAlias, request.indexName) + .removeReplicationStateParams(request.indexName) + .removeSecurityContext(clusterAlias, request.indexName) + mdBuilder.putCustom(ReplicationMetadata.NAME, newMetadata) + } + + // remove replicated index setting + val currentIndexMetadata = currentState.metadata.index(request.indexName) + if (currentIndexMetadata != null) { + val newIndexMetadata = IndexMetadata.builder(currentIndexMetadata) + .settings(Settings.builder().put(currentIndexMetadata.settings).putNull(REPLICATED_INDEX_SETTING.key)) + .settingsVersion(1 + currentIndexMetadata.settingsVersion) + mdBuilder.put(newIndexMetadata) + } + newState.metadata(mdBuilder) + return newState.build() + } + + override fun newResponse(acknowledged: Boolean) = AcknowledgedResponse(acknowledged) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/AddIndexBlockTask.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/AddIndexBlockTask.kt new file mode 100644 index 00000000..3fd56113 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/AddIndexBlockTask.kt @@ -0,0 +1,83 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.metadata + +import com.amazon.elasticsearch.replication.action.index.block.UpdateIndexBlockRequest +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.cluster.AckedClusterStateUpdateTask +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.block.ClusterBlock +import org.elasticsearch.cluster.block.ClusterBlockException +import org.elasticsearch.cluster.block.ClusterBlockLevel +import org.elasticsearch.cluster.block.ClusterBlocks +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.collect.ImmutableOpenMap +import org.elasticsearch.index.IndexNotFoundException +import org.elasticsearch.rest.RestStatus +import java.util.* + + +/* This is our custom index block to prevent changes to follower + index while replication is in progress. + */ +val INDEX_REPLICATION_BLOCK = ClusterBlock( + 1000, + "index read-only(cross-cluster-replication)", + false, + false, + false, + RestStatus.FORBIDDEN, + /* Follower index deletion is allowed in the absence of Metadata block. */ + // TODO: Add METADATA_WRITE to the list of blocked actions once we have a way for the replication tasks + // to make metadata changes like updating document mappings. + EnumSet.of(ClusterBlockLevel.WRITE)) + +/* This function checks the local cluster state to see if given + index is blocked with given level with any block other than + our own INDEX_REPLICATION_BLOCK +*/ +fun checkIfIndexBlockedWithLevel(clusterService: ClusterService, + indexName: String, + clusterBlockLevel: ClusterBlockLevel) { + clusterService.state().routingTable.index(indexName) ?: + throw IndexNotFoundException("Index with name:$indexName doesn't exist") + val writeIndexBlockMap : ImmutableOpenMap> = clusterService.state().blocks() + .indices(clusterBlockLevel) + if (!writeIndexBlockMap.containsKey(indexName)) + return + val clusterBlocksSet : Set = writeIndexBlockMap.get(indexName) + if (clusterBlocksSet.contains(INDEX_REPLICATION_BLOCK) + && clusterBlocksSet.size > 1) + throw ClusterBlockException(clusterBlocksSet) +} + +class AddIndexBlockTask(val request: UpdateIndexBlockRequest, listener: ActionListener) : + AckedClusterStateUpdateTask(request, listener) +{ + override fun execute(currentState: ClusterState): ClusterState { + val newState = ClusterState.builder(currentState) + + if (!currentState.blocks.hasIndexBlock(request.indexName, INDEX_REPLICATION_BLOCK)) { + val newBlocks = ClusterBlocks.builder().blocks(currentState.blocks) + .addIndexBlock(request.indexName, INDEX_REPLICATION_BLOCK) + newState.blocks(newBlocks) + } + return newState.build() + } + + override fun newResponse(acknowledged: Boolean) = AcknowledgedResponse(acknowledged) +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/AutoFollowPattern.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/AutoFollowPattern.kt new file mode 100644 index 00000000..cf164572 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/AutoFollowPattern.kt @@ -0,0 +1,39 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.metadata + +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.io.stream.Writeable +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.XContentBuilder + +data class AutoFollowPattern(val name: String, val pattern: String?) : Writeable, ToXContent { + + constructor(inp : StreamInput) : this(inp.readString(), inp.readOptionalString()) + + override fun writeTo(out: StreamOutput) { + out.writeString(name) + out.writeOptionalString(pattern) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return builder.startObject() + .field("name", name) + .field("pattern", pattern) + .endObject() + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/ReplicationMetadata.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/ReplicationMetadata.kt new file mode 100644 index 00000000..fba7acce --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/ReplicationMetadata.kt @@ -0,0 +1,381 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.metadata + +import org.elasticsearch.Version +import org.elasticsearch.cluster.DiffableUtils +import org.elasticsearch.cluster.DiffableUtils.NonDiffableValueSerializer +import org.elasticsearch.cluster.DiffableUtils.getStringKeySerializer +import org.elasticsearch.cluster.NamedDiff +import org.elasticsearch.cluster.metadata.Metadata +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.common.xcontent.XContentParser +import java.io.IOException +import java.util.EnumSet +import kotlin.collections.HashMap +import org.elasticsearch.cluster.Diff as ESDiff + +// some descriptive type aliases to make it easier to read the code. +typealias AutoFollowPatterns = Map // { pattern name -> pattern } +typealias ReplicatedIndices = Map // { follower index name -> remote index name } +typealias SecurityContexts = Map // { follower index name -> User detail string } +typealias ClusterAlias = String +typealias ReplicationStateParams = Map +typealias FollowIndexName = String + +data class ReplicationMetadata(val autoFollowPatterns: Map, + val replicatedIndices: Map, + val replicationDetails: Map, + val securityContexts: Map) : Metadata.Custom { + + companion object { + const val NAME = "replication_metadata" + const val AUTO_FOLLOW_PATTERNS_KEY = "auto_follow_patterns" + const val REPLICATED_INDICES_KEY = "replicated_indices" + const val REPLICATION_DETAILS_KEY = "replication_details" + const val SECURITY_CONTEXTS_KEY = "security_contexts" + const val AUTOFOLLOW_SECURITY_CONTEXT_PATTERN_PREFIX = "odfe_autofollow_security_context_" + + val EMPTY = ReplicationMetadata(mapOf(), mapOf(), mapOf(), mapOf()) + + val patternsSerializer = object : NonDiffableValueSerializer() { + override fun write(value: AutoFollowPatterns, out: StreamOutput) { + out.writeMap(value, StreamOutput::writeString) { o, v -> v.writeTo(o) } + } + + override fun read(inp: StreamInput, key: String): AutoFollowPatterns { + return inp.readMap(StreamInput::readString, ::AutoFollowPattern) + } + } + + val indicesSerializer = object: NonDiffableValueSerializer() { + override fun write(value: ReplicatedIndices, out: StreamOutput) { + out.writeMap(value, StreamOutput::writeString, StreamOutput::writeString) + } + + override fun read(inp: StreamInput, key: String): ReplicatedIndices { + return inp.readMap(StreamInput::readString, StreamInput::readString) + } + } + + val replicationDetailsSerializer = object: NonDiffableValueSerializer() { + override fun write(value: ReplicationStateParams, out: StreamOutput) { + out.writeMap(value, StreamOutput::writeString, StreamOutput::writeString) + } + + override fun read(inp: StreamInput, key: String): ReplicationStateParams { + return inp.readMap(StreamInput::readString, StreamInput::readString) + } + } + + val securityContextsSerializer : NonDiffableValueSerializer = indicesSerializer + + @Throws(IOException::class) + fun fromXContent(parser: XContentParser): ReplicationMetadata { + var builder = Builder() + if (parser.currentToken() == null) { + parser.nextToken() + } + var token = parser.currentToken() + require(token == XContentParser.Token.START_OBJECT) { "expected start object but got a $token"} + + var currentField: String? = null + while (parser.nextToken().also { token = it } !== XContentParser.Token.END_OBJECT) { + if(token == XContentParser.Token.FIELD_NAME) { + currentField = parser.currentName() + } else if (AUTO_FOLLOW_PATTERNS_KEY == currentField) { + val allPatterns = HashMap() + while(parser.nextToken().also { token = it } != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentField = parser.currentName() + } + else if(token == XContentParser.Token.START_OBJECT) { + var patternsMap = parser.mapStrings() + var connectionPatterns = HashMap() + // It is converted to patternName -> pattern under cluster alias on disk + patternsMap.forEach{ (patternName, pattern) -> + connectionPatterns[patternName] = AutoFollowPattern(patternName, pattern) + } + allPatterns[currentField!!] = connectionPatterns + } + else { + throw IllegalArgumentException("Unexpected token during parsing " + + "replication_metadata[$AUTO_FOLLOW_PATTERNS_KEY] - $token") + } + } + builder.autoFollowPatterns(allPatterns) + } else if (REPLICATED_INDICES_KEY == currentField) { + val allreplicatedIndices = HashMap() + while(parser.nextToken().also { token = it } != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentField = parser.currentName() + } else if (token == XContentParser.Token.START_OBJECT) { + var replicatedIndices = parser.mapStrings() + allreplicatedIndices[currentField!!] = replicatedIndices + } else { + throw IllegalArgumentException("Unexpected token during parsing " + + "replication_metadata[$REPLICATED_INDICES_KEY] - $token") + } + } + builder.replicatedIndices(allreplicatedIndices) + } else if (REPLICATION_DETAILS_KEY == currentField) { + val onGoingReplicationDetails = HashMap() + while(parser.nextToken().also { token = it } != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentField = parser.currentName() + } + else if(token == XContentParser.Token.START_OBJECT) { + var replicationDetails = parser.mapStrings() + onGoingReplicationDetails[currentField!!] = replicationDetails + } else { + throw IllegalArgumentException("Unexpected token during parsing " + + "replication_metadata[$REPLICATED_INDICES_KEY] - $token") + } + } + builder.replicationDetails(onGoingReplicationDetails) + } else if (SECURITY_CONTEXTS_KEY == currentField) { + val allSecurityContexts = HashMap() + while(parser.nextToken().also { token = it } != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentField = parser.currentName() + } + else if(token == XContentParser.Token.START_OBJECT) { + var securityContexts = parser.mapStrings() + allSecurityContexts[currentField!!] = securityContexts + } else { + throw IllegalArgumentException("Unexpected token during parsing " + + "replication_metadata[$REPLICATED_INDICES_KEY] - $token") + } + } + builder.securityContexts(allSecurityContexts) + } + } + return builder.build() + } + } + + class Builder { + private var autoFollowPattern: Map = mapOf() + private var replicatedIndices: Map = mapOf() + private var replicationDetails: Map = mapOf() + private var securityContexts: Map = mapOf() + + fun autoFollowPatterns(patterns: Map): Builder { + this.autoFollowPattern = patterns + return this + } + + fun replicatedIndices(replicatedIndices: Map): Builder { + this.replicatedIndices = replicatedIndices + return this + } + + fun replicationDetails(replicationDetails: Map): Builder { + this.replicationDetails = replicationDetails + return this + } + + fun securityContexts(securityContexts: Map): Builder { + this.securityContexts = securityContexts + return this + } + + fun build(): ReplicationMetadata { + return ReplicationMetadata(autoFollowPattern, replicatedIndices, replicationDetails, securityContexts) + } + } + + constructor(inp: StreamInput) : this( + inp.readMap(StreamInput::readString) { i -> patternsSerializer.read(i, "") }, + inp.readMap(StreamInput::readString) { i -> indicesSerializer.read(i, "") }, + inp.readMap(StreamInput::readString) {i -> replicationDetailsSerializer.read(i, "")}, + inp.readMap(StreamInput::readString) { i -> securityContextsSerializer.read(i, "") } + ) + + override fun writeTo(out: StreamOutput) { + out.writeMap(autoFollowPatterns, StreamOutput::writeString) { o, v -> patternsSerializer.write(v, o) } + out.writeMap(replicatedIndices, StreamOutput::writeString) { o, v -> indicesSerializer.write(v, o) } + out.writeMap(replicationDetails, StreamOutput::writeString) { o, v -> replicationDetailsSerializer.write(v, o) } + out.writeMap(securityContexts, StreamOutput::writeString) { o, v -> securityContextsSerializer.write(v, o)} + } + + override fun diff(previousState: Metadata.Custom) = Diff(previousState as ReplicationMetadata, this) + + override fun getWriteableName(): String = NAME + + override fun getMinimalSupportedVersion(): Version = Version.V_7_1_0 + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject(AUTO_FOLLOW_PATTERNS_KEY) + autoFollowPatterns.forEach { (connectionName, patterns) -> + builder.field(connectionName, patterns.values.associate { it.name to it.pattern }) + } + builder.endObject() + builder.startObject(REPLICATED_INDICES_KEY) + replicatedIndices.forEach { (connectionName, indices) -> + builder.field(connectionName, indices) + } + builder.endObject() + builder.startObject(REPLICATION_DETAILS_KEY) + replicationDetails.forEach { (followIndex, replicationParams) -> + builder.field(followIndex, replicationParams) + } + builder.endObject() + builder.startObject(SECURITY_CONTEXTS_KEY) + securityContexts.forEach { (connectionName, securityContext) -> + builder.field(connectionName, securityContext) + } + return builder.endObject() + } + + override fun context(): EnumSet = Metadata.ALL_CONTEXTS + + fun removeRemoteCluster(clusterAlias: ClusterAlias) : ReplicationMetadata { + if (clusterAlias !in autoFollowPatterns && clusterAlias !in replicatedIndices) { + return this + } + return ReplicationMetadata(autoFollowPatterns.minus(clusterAlias), replicatedIndices.minus(clusterAlias), + replicationDetails, securityContexts.minus(clusterAlias)) + } + + fun removePattern(clusterAlias: ClusterAlias, patternName: String): ReplicationMetadata { + val currentPatterns = autoFollowPatterns.getOrDefault(clusterAlias, emptyMap()) + if (patternName !in currentPatterns) { + return this + } + val newPatterns = autoFollowPatterns.plus(clusterAlias to currentPatterns.minus(patternName)) + return copy(autoFollowPatterns = newPatterns) + } + + fun removeIndex(clusterAlias: ClusterAlias, index: String) : ReplicationMetadata { + val currentIndices = replicatedIndices.getOrDefault(clusterAlias, emptyMap()) + if (index !in currentIndices) { + return this + } + val newIndices = replicatedIndices.plus(clusterAlias to currentIndices.minus(index)) + return copy(replicatedIndices = newIndices) + } + + fun removeSecurityContext(clusterAlias: ClusterAlias, index: String) : ReplicationMetadata { + val currentIndices = securityContexts.getOrDefault(clusterAlias, emptyMap()) + if(index !in currentIndices) { + return this + } + val newSecurityContext = securityContexts.plus(clusterAlias to currentIndices.minus(index)) + return copy(securityContexts = newSecurityContext) + } + + fun addPattern(clusterAlias: ClusterAlias, newPattern: AutoFollowPattern) : ReplicationMetadata { + val currentPatterns = autoFollowPatterns.getOrDefault(clusterAlias, emptyMap()) + val currentPattern = currentPatterns[newPattern.name] + if (currentPattern == newPattern) { + return this + } + val newPatterns = autoFollowPatterns.plus(clusterAlias to currentPatterns.plus(newPattern.name to newPattern)) + return copy(autoFollowPatterns = newPatterns) + } + + fun addIndex(clusterAlias: ClusterAlias, index: String, remoteIndex: String) : ReplicationMetadata { + val currentIndices = replicatedIndices.getOrDefault(clusterAlias, emptyMap()) + if (index in currentIndices) { + check(currentIndices[index] == remoteIndex) { + "$index is already replicating ${currentIndices[index]}, can't replicate $remoteIndex." + } + return this + } + val newIndices = replicatedIndices.plus(clusterAlias to currentIndices.plus(index to remoteIndex)) + return copy(replicatedIndices = newIndices) + } + + fun addReplicationStateParams(followIndexName: String, replicationParams: ReplicationStateParams) + : ReplicationMetadata { + val currentStateParamsForIndex = replicationDetails.getOrDefault(followIndexName, emptyMap()) + val newStateParamsForIndex = currentStateParamsForIndex.plus(replicationParams) + val newReplicationDetails = replicationDetails.plus(followIndexName to newStateParamsForIndex) + return copy(replicationDetails = newReplicationDetails) + } + + fun removeReplicationStateParams(followIndexName: String) : + ReplicationMetadata { + replicationDetails[followIndexName] ?: return this + return copy(replicationDetails = replicationDetails.minus(followIndexName)) + } + + fun addSecurityContext(clusterAlias: ClusterAlias, index: String, injectedUser: String?) : ReplicationMetadata { + val currentIndices = securityContexts.getOrDefault(clusterAlias, emptyMap()) + if((index in currentIndices && injectedUser.equals(currentIndices[index])) || injectedUser == null) { + return this + } + val newSecurityContext = securityContexts.plus(clusterAlias to currentIndices.plus(index to injectedUser)) + return copy(securityContexts = newSecurityContext) + } + + class Diff : NamedDiff { + + private val autoFollowPatterns : ESDiff> + private val replicatedIndices : ESDiff> + private val replicationDetails : ESDiff> + private val securityContexts : ESDiff> + + constructor(previous: ReplicationMetadata, current: ReplicationMetadata) { + autoFollowPatterns = DiffableUtils.diff(previous.autoFollowPatterns, current.autoFollowPatterns, + getStringKeySerializer(), patternsSerializer) + replicatedIndices = DiffableUtils.diff(previous.replicatedIndices, current.replicatedIndices, + getStringKeySerializer(), indicesSerializer) + replicationDetails = DiffableUtils.diff(previous.replicationDetails, current.replicationDetails, + getStringKeySerializer(), replicationDetailsSerializer) + securityContexts = DiffableUtils.diff(previous.securityContexts, current.securityContexts, + getStringKeySerializer(), securityContextsSerializer) + } + + constructor(inp: StreamInput) { + autoFollowPatterns = DiffableUtils.readJdkMapDiff(inp, getStringKeySerializer(), patternsSerializer) + replicatedIndices = DiffableUtils.readJdkMapDiff(inp, getStringKeySerializer(), indicesSerializer) + replicationDetails = DiffableUtils.readJdkMapDiff(inp, getStringKeySerializer(), replicationDetailsSerializer) + securityContexts = DiffableUtils.readJdkMapDiff(inp, getStringKeySerializer(), securityContextsSerializer) + } + + override fun writeTo(out: StreamOutput) { + autoFollowPatterns.writeTo(out) + replicatedIndices.writeTo(out) + replicationDetails.writeTo(out) + securityContexts.writeTo(out) + } + + override fun getWriteableName() = NAME + + override fun apply(part: Metadata.Custom): Metadata.Custom { + part as ReplicationMetadata + return ReplicationMetadata(autoFollowPatterns.apply(part.autoFollowPatterns), + replicatedIndices.apply(part.replicatedIndices), + replicationDetails.apply(part.replicationDetails), + securityContexts.apply(part.securityContexts)) + } + } +} + +const val REPLICATION_OVERALL_STATE_KEY = "REPLICATION_OVERALL_STATE_KEY" +const val REPLICATION_OVERALL_STATE_RUNNING_VALUE = "RUNNING" + +fun getReplicationStateParamsForIndex(clusterService: ClusterService, + followerIndex: String) : ReplicationStateParams? { + return clusterService.state().metadata.custom(ReplicationMetadata.NAME) + ?.replicationDetails?.get(followerIndex) +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/UpdateReplicationMetadata.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/UpdateReplicationMetadata.kt new file mode 100644 index 00000000..94cf4d80 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/metadata/UpdateReplicationMetadata.kt @@ -0,0 +1,153 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.metadata + +import com.amazon.elasticsearch.replication.action.autofollow.UpdateAutoFollowPatternRequest +import com.amazon.elasticsearch.replication.action.index.ReplicateIndexRequest +import com.amazon.elasticsearch.replication.action.index.ReplicateIndexResponse +import com.amazon.elasticsearch.replication.action.replicationstatedetails.UpdateReplicationStateDetailsRequest +import com.amazon.elasticsearch.replication.action.stop.StopIndexReplicationRequest +import com.amazon.elasticsearch.replication.task.autofollow.AutoFollowExecutor +import com.amazon.elasticsearch.replication.task.autofollow.AutoFollowParams +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.persistentTasksService +import com.amazon.elasticsearch.replication.util.removeTask +import com.amazon.elasticsearch.replication.util.startTask +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.support.master.AcknowledgedRequest +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.action.support.master.MasterNodeRequest +import org.elasticsearch.cluster.AckedClusterStateUpdateTask +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.ClusterStateTaskExecutor +import org.elasticsearch.cluster.ack.AckedRequest +import org.elasticsearch.cluster.metadata.Metadata +import org.elasticsearch.threadpool.ThreadPool + +abstract class UpdateReplicationMetadata(request: AckedRequest, listener: ActionListener) + : AckedClusterStateUpdateTask(request, listener) { + + override fun execute(currentState: ClusterState): ClusterState { + val currentMetadata = currentState.metadata().custom(ReplicationMetadata.NAME) ?: ReplicationMetadata.EMPTY + val newMetadata = updateMetadata(currentMetadata) + return if (currentMetadata == newMetadata) { + currentState // no change + } else { + val mdBuilder = Metadata.builder(currentState.metadata) + .putCustom(ReplicationMetadata.NAME, newMetadata) + ClusterState.Builder(currentState).metadata(mdBuilder).build() + } + } + + abstract fun updateMetadata(currentMetadata: ReplicationMetadata): ReplicationMetadata +} + +class UpdateAutoFollowPattern(val request: UpdateAutoFollowPatternRequest, + val threadPool: ThreadPool, + val injectedUser: String?, + listener: ActionListener) + : UpdateReplicationMetadata(request, listener) { + + override fun updateMetadata(currentMetadata: ReplicationMetadata) : ReplicationMetadata { + return when (request.action) { + UpdateAutoFollowPatternRequest.Action.REMOVE -> { + currentMetadata.removePattern(request.connection, request.patternName) + .removeSecurityContext(request.connection, ReplicationMetadata.AUTOFOLLOW_SECURITY_CONTEXT_PATTERN_PREFIX + + request.patternName) + } + + UpdateAutoFollowPatternRequest.Action.ADD -> { + val newPattern = AutoFollowPattern(request.patternName, + checkNotNull(request.pattern) { "null pattern" }) + currentMetadata.addPattern(request.connection, newPattern) + .addSecurityContext(request.connection, ReplicationMetadata.AUTOFOLLOW_SECURITY_CONTEXT_PATTERN_PREFIX + + request.patternName, injectedUser) + } + } + } + + override fun newResponse(acknowledged: Boolean) = AcknowledgedResponse(acknowledged) +} + +class UpdateReplicatedIndices>(val request: AcknowledgedRequest, + val injectedUser: String?, + listener: ActionListener) + : UpdateReplicationMetadata(request, listener) { + + override fun updateMetadata(currentMetadata: ReplicationMetadata): ReplicationMetadata { + if (request is ReplicateIndexRequest) + return currentMetadata.addIndex(request.remoteCluster, request.followerIndex, request.remoteIndex) + .addSecurityContext(request.remoteCluster, request.followerIndex, injectedUser) + else if(request is StopIndexReplicationRequest) { + val clusterAlias = currentMetadata.replicatedIndices.entries.firstOrNull { + it.value.containsKey(request.indexName)}?.key + clusterAlias?: throw IllegalStateException("Cant find cluster alias for follower index:${request.indexName}") + return currentMetadata.removeIndex(clusterAlias, request.indexName) + .removeSecurityContext(clusterAlias, request.indexName) + } + throw IllegalArgumentException("Unrecognised request:$request") + } + + override fun newResponse(acknowledged: Boolean): ReplicateIndexResponse = ReplicateIndexResponse(acknowledged) +} + +class UpdateReplicationStateDetailsTaskExecutor private constructor() + : ClusterStateTaskExecutor { + + companion object { + private val log = LogManager.getLogger(UpdateReplicationStateDetailsTaskExecutor::class.java) + val INSTANCE = UpdateReplicationStateDetailsTaskExecutor() + } + + override fun execute(currentState: ClusterState, tasks: List) + : ClusterStateTaskExecutor.ClusterTasksResult { + return getClusterStateUpdateTaskResult(tasks[0], currentState) + } + + private fun getClusterStateUpdateTaskResult(request: UpdateReplicationStateDetailsRequest, + currentState: ClusterState) + : ClusterStateTaskExecutor.ClusterTasksResult { + val currentMetadata = currentState.metadata().custom(ReplicationMetadata.NAME) ?: ReplicationMetadata.EMPTY + val newMetadata = getUpdatedReplicationMetadata(request, currentMetadata) + if (currentMetadata == newMetadata) { + return getStateUpdateTaskResultForClusterState(request, currentState) // no change + } else { + val mdBuilder = Metadata.builder(currentState.metadata) + .putCustom(ReplicationMetadata.NAME, newMetadata) + val newClusterState = ClusterState.Builder(currentState).metadata(mdBuilder).build() + return getStateUpdateTaskResultForClusterState(request, newClusterState) + } + } + + private fun getStateUpdateTaskResultForClusterState(request: UpdateReplicationStateDetailsRequest, + clusterState: ClusterState) + : ClusterStateTaskExecutor.ClusterTasksResult { + return ClusterStateTaskExecutor.ClusterTasksResult.builder() + .success(request).build(clusterState) + } + + private fun getUpdatedReplicationMetadata(request: UpdateReplicationStateDetailsRequest, + currentMetadata: ReplicationMetadata) + : ReplicationMetadata { + if (request.updateType == UpdateReplicationStateDetailsRequest.UpdateType.ADD) + return currentMetadata.addReplicationStateParams(request.followIndexName, + request.replicationStateParams) + return currentMetadata.removeReplicationStateParams(request.followIndexName) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterMultiChunkTransfer.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterMultiChunkTransfer.kt new file mode 100644 index 00000000..75ac3a53 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterMultiChunkTransfer.kt @@ -0,0 +1,114 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.repository + +import com.amazon.elasticsearch.replication.action.repository.GetFileChunkAction +import com.amazon.elasticsearch.replication.action.repository.GetFileChunkRequest +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.suspendExecute +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.GlobalScope +import kotlinx.coroutines.launch +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock +import org.apache.logging.log4j.Logger +import org.elasticsearch.action.ActionListener +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.node.DiscoveryNode +import org.elasticsearch.common.unit.ByteSizeValue +import org.elasticsearch.common.util.concurrent.ThreadContext +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.store.Store +import org.elasticsearch.index.store.StoreFileMetadata +import org.elasticsearch.indices.recovery.MultiChunkTransfer +import org.elasticsearch.indices.recovery.MultiFileWriter +import org.elasticsearch.indices.recovery.RecoveryState + +class RemoteClusterMultiChunkTransfer(val logger: Logger, + val followerClusterName: String, + threadContext: ThreadContext, + val localStore: Store, + maxConcurrentFileChunks: Int, + val restoreUUID: String, + val remoteNode: DiscoveryNode, + val remoteShardId: ShardId, + val remoteFiles: List, + val remoteClusterClient: Client, + val recoveryState: RecoveryState, + val chunkSize: ByteSizeValue, + listener: ActionListener) : + MultiChunkTransfer(logger, + threadContext, listener, maxConcurrentFileChunks, remoteFiles), CoroutineScope by GlobalScope { + + private var offset = 0L + private val tempFilePrefix = "${RESTORE_SHARD_TEMP_FILE_PREFIX}${restoreUUID}." + private val multiFileWriter = MultiFileWriter(localStore, recoveryState.index, tempFilePrefix, logger) {} + private val mutex = Mutex() + + init { + // Add all the available files to show the recovery status + for(fileMetadata in remoteFiles) { + recoveryState.index.addFileDetail(fileMetadata.name(), fileMetadata.length(), false) + } + recoveryState.index.setFileDetailsComplete() + } + + companion object { + const val RESTORE_SHARD_TEMP_FILE_PREFIX = "CLUSTER_REPO_TEMP_" + } + + override fun handleError(md: StoreFileMetadata, e: Exception) { + logger.error("Error while transferring segments $e") + } + + override fun onNewResource(md: StoreFileMetadata) { + // Reset the values for the next file + offset = 0L + } + + override fun executeChunkRequest(request: RemoteClusterRepositoryFileChunk, listener: ActionListener) { + val getFileChunkRequest = GetFileChunkRequest(restoreUUID, remoteNode, remoteShardId, request.storeFileMetadata, + request.offset, request.length, followerClusterName, recoveryState.shardId) + + launch(Dispatchers.IO + remoteClusterClient.threadPool().coroutineContext()) { + try { + val response = remoteClusterClient.suspendExecute(GetFileChunkAction.INSTANCE, getFileChunkRequest) + logger.debug("Filename: ${request.storeFileMetadata.name()}, " + + "response_size: ${response.data.length()}, response_offset: ${response.offset}") + mutex.withLock { + multiFileWriter.writeFileChunk(response.storeFileMetadata, response.offset, response.data, request.lastChunk()) + listener.onResponse(null) + } + } catch (e: Exception) { + logger.error("Failed to fetch file chunk for ${request.storeFileMetadata.name()} with offset ${request.offset}: $e") + listener.onFailure(e) + } + } + + } + + override fun nextChunkRequest(md: StoreFileMetadata): RemoteClusterRepositoryFileChunk { + val chunkReq = RemoteClusterRepositoryFileChunk(md, offset, chunkSize.bytesAsInt()) + offset += chunkSize.bytesAsInt() + return chunkReq + } + + override fun close() { + multiFileWriter.renameAllTempFiles() + multiFileWriter.close() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepositoriesService.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepositoriesService.kt new file mode 100644 index 00000000..c0e1b8f6 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepositoriesService.kt @@ -0,0 +1,44 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.repository +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.settings.ClusterSettings +import org.elasticsearch.repositories.RepositoriesService +import org.elasticsearch.transport.SniffConnectionStrategy.REMOTE_CLUSTER_SEEDS +import java.util.function.Supplier + +class RemoteClusterRepositoriesService(private val repositoriesService: Supplier, + clusterService: ClusterService) { + + init { + listenForUpdates(clusterService.clusterSettings) + } + + private fun listenForUpdates(clusterSettings: ClusterSettings) { + // TODO: Proxy support from ES 7.7. Needs additional handling based on those settings + clusterSettings.addAffixUpdateConsumer(REMOTE_CLUSTER_SEEDS, this::updateRepositoryDetails) { _, _ -> Unit } + } + + private fun updateRepositoryDetails(alias: String, seeds: List?) { + if(seeds == null || seeds.isEmpty()) { + repositoriesService.get().unregisterInternalRepository(REMOTE_REPOSITORY_PREFIX + alias) + return + } + //TODO: Check to see if register should happen based on every seed node update + repositoriesService.get().registerInternalRepository(REMOTE_REPOSITORY_PREFIX + alias, REMOTE_REPOSITORY_TYPE) + } + +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepository.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepository.kt new file mode 100644 index 00000000..46308c4a --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepository.kt @@ -0,0 +1,357 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.repository + +import com.amazon.elasticsearch.replication.ReplicationPlugin +import com.amazon.elasticsearch.replication.action.repository.GetStoreMetadataAction +import com.amazon.elasticsearch.replication.action.repository.GetStoreMetadataRequest +import com.amazon.elasticsearch.replication.action.repository.ReleaseLeaderResourcesAction +import com.amazon.elasticsearch.replication.util.SecurityContext +import com.amazon.elasticsearch.replication.action.repository.ReleaseLeaderResourcesRequest +import com.amazon.elasticsearch.replication.util.executeUnderSecurityContext +import org.apache.logging.log4j.LogManager +import org.apache.lucene.index.IndexCommit +import org.elasticsearch.Version +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.ActionRequest +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.action.ActionType +import org.elasticsearch.action.admin.indices.stats.IndicesStatsAction +import org.elasticsearch.action.admin.indices.stats.IndicesStatsRequest +import org.elasticsearch.action.support.IndicesOptions +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.ClusterStateUpdateTask +import org.elasticsearch.cluster.metadata.IndexMetadata +import org.elasticsearch.cluster.metadata.Metadata +import org.elasticsearch.cluster.metadata.RepositoryMetadata +import org.elasticsearch.cluster.node.DiscoveryNode +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.Nullable +import org.elasticsearch.common.UUIDs +import org.elasticsearch.common.component.AbstractLifecycleComponent +import org.elasticsearch.common.metrics.CounterMetric +import org.elasticsearch.common.settings.Settings +import org.elasticsearch.index.mapper.MapperService +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus +import org.elasticsearch.index.store.Store +import org.elasticsearch.index.store.StoreStats +import org.elasticsearch.indices.recovery.RecoverySettings +import org.elasticsearch.indices.recovery.RecoveryState +import org.elasticsearch.repositories.IndexId +import org.elasticsearch.repositories.Repository +import org.elasticsearch.repositories.RepositoryData +import org.elasticsearch.repositories.RepositoryShardId +import org.elasticsearch.repositories.ShardGenerations +import org.elasticsearch.snapshots.SnapshotId +import org.elasticsearch.snapshots.SnapshotInfo +import org.elasticsearch.snapshots.SnapshotState +import java.util.UUID +import java.util.function.Consumer +import java.util.function.Function +import kotlin.collections.ArrayList + +const val REMOTE_REPOSITORY_PREFIX = "opendistro-remote-repo-" +const val REMOTE_REPOSITORY_TYPE = "opendistro-remote-repository" +const val REMOTE_SNAPSHOT_NAME = "opendistro-remote-snapshot" + +class RemoteClusterRepository(private val repositoryMetadata: RepositoryMetadata, + private val client: Client, + private val clusterService: ClusterService, + private val recoverySettings: RecoverySettings): AbstractLifecycleComponent(), Repository { + + // Lazy init because we initialize when a remote cluster seed setting is added at which point the remote + // cluster connection might not be available yet + private val remoteClusterClient by lazy { client.getRemoteClusterClient(repositoryMetadata.remoteClusterName()) } + + companion object { + private val log = LogManager.getLogger(RemoteClusterRepository::class.java) + private val restoreRateLimitingTimeInNanos = CounterMetric() + private fun String.asUUID() : String = UUID.nameUUIDFromBytes(toByteArray()).toString() + private fun RepositoryMetadata.remoteClusterName() : String = this.name().split(REMOTE_REPOSITORY_PREFIX)[1] + const val REMOTE_CLUSTER_REPO_REQ_TIMEOUT_IN_MILLI_SEC = 60000L + + fun clusterForRepo(remoteRepoName: String) = remoteRepoName.split(REMOTE_REPOSITORY_PREFIX)[1] + fun repoForCluster(remoteClusterName: String) : String = REMOTE_REPOSITORY_PREFIX + remoteClusterName + } + + @Volatile private var parallelChunks = recoverySettings.maxConcurrentFileChunks + @Volatile private var chunkSize = recoverySettings.chunkSize + + override fun getRestoreThrottleTimeInNanos(): Long { + return restoreRateLimitingTimeInNanos.count() + } + + override fun finalizeSnapshot(shardGenerations: ShardGenerations?, repositoryStateId: Long, clusterMetadata: Metadata?, + snapshotInfo: SnapshotInfo?, repositoryMetaVersion: Version?, + stateTransformer: Function?, + listener: ActionListener?) { + throw UnsupportedOperationException("Operation not permitted") + } + + override fun deleteSnapshots(snapshotIds: MutableCollection?, repositoryStateId: Long, + repositoryMetaVersion: Version?, listener: ActionListener?) { + throw UnsupportedOperationException("Operation not permitted") + } + + override fun initializeSnapshot(snapshotId: SnapshotId, indices: MutableList, metadata: Metadata) { + throw UnsupportedOperationException("Operation not permitted") + } + + override fun startVerification(): String { + throw UnsupportedOperationException("Operation not permitted") + } + + override fun snapshotShard(store: Store?, mapperService: MapperService?, snapshotId: SnapshotId?, indexId: IndexId?, + snapshotIndexCommit: IndexCommit?, @Nullable shardStateIdentifier: String?, + snapshotStatus: IndexShardSnapshotStatus?, repositoryMetaVersion: Version?, + userMetadata: MutableMap?, listener: ActionListener?) { + throw UnsupportedOperationException("Operation not permitted") + } + + override fun getMetadata(): RepositoryMetadata { + return repositoryMetadata + } + + override fun verify(verificationToken: String, localNode: DiscoveryNode) { + } + + override fun cloneShardSnapshot(source: SnapshotId?, target: SnapshotId?, shardId: RepositoryShardId?, shardGeneration: String?, listener: ActionListener?) { + throw UnsupportedOperationException("Operation not permitted") + } + + override fun doStart() { + } + + override fun doStop() { + } + + override fun doClose() { + } + + override fun endVerification(verificationToken: String) { + } + + override fun getSnapshotThrottleTimeInNanos(): Long { + throw UnsupportedOperationException("Operation not permitted") + } + + override fun getShardSnapshotStatus(snapshotId: SnapshotId, indexId: IndexId, + shardId: ShardId): IndexShardSnapshotStatus? { + val indicesStatsRequest = IndicesStatsRequest().all().indices(indexId.name) + val indicesStatsResponse = remoteClusterGetAction(IndicesStatsAction.INSTANCE, indicesStatsRequest, shardId.indexName) + for(i in indicesStatsResponse.shards.indices) { + if(indicesStatsResponse.shards[i].shardRouting.shardId().id == shardId.id) { + val sizeInBytes = indicesStatsResponse.shards[i].stats?.store?.sizeInBytes!! + // Filling in dummy values except size + return IndexShardSnapshotStatus.newDone(0L, 3L, 1, + 1, sizeInBytes, sizeInBytes , "") + } + } + return null + } + + override fun updateState(state: ClusterState) { + // TODO: Update any state as required + } + + override fun executeConsistentStateUpdate(createUpdateTask: Function?, + source: String?, onFailure: Consumer?) { + throw UnsupportedOperationException("Operation not permitted") + } + + /* + * Step 1: Gets all the indices from the remote cluster. + * At this point, we don't have information on targeted index for restore. + * Fetches all the information and creates a repository data object for the restore workflow. + */ + override fun getRepositoryData(listener: ActionListener) { + val clusterState = getRemoteClusterState(false, false) + val shardGenerations = ShardGenerations.builder() + clusterState.metadata.indices.values() + .map { it.value } + .forEach { indexMetadata -> + val indexId = IndexId(indexMetadata.index.name, indexMetadata.indexUUID) + for (i in 0 until indexMetadata.numberOfShards) { + // Generations only make sense for eventually consistent BlobStores so just use a dummy value here. + shardGenerations.put(indexId, i, "dummy") + } + } + val snapshotId = SnapshotId(REMOTE_SNAPSHOT_NAME, REMOTE_SNAPSHOT_NAME.asUUID()) + val repositoryData = RepositoryData.EMPTY + .addSnapshot(snapshotId, SnapshotState.SUCCESS, Version.CURRENT, shardGenerations.build(), null, null) + listener.onResponse(repositoryData) + } + + /* + * Step 2: Creates the Snapshot object to give information + * on the indices present against the snapshotId + */ + override fun getSnapshotInfo(snapshotId: SnapshotId): SnapshotInfo { + val remoteClusterState = getRemoteClusterState(false, false) + assert(REMOTE_SNAPSHOT_NAME.equals(snapshotId.name), { "SnapshotName differs" }) + val indices = remoteClusterState.metadata().indices().keys().map { x -> x.value } + return SnapshotInfo(snapshotId, indices, emptyList(), SnapshotState.SUCCESS, Version.CURRENT) + } + + /* + * Step 3: Global metadata params are not passed in the restore workflow for this use-case + * TODO: Implement this after analysing all the use-cases + */ + override fun getSnapshotGlobalMetadata(snapshotId: SnapshotId): Metadata { + TODO("not implemented") //To change body of created functions use File | Settings | File Templates. + } + + /* + * Step 4: Constructs the index metadata object for the index requested + */ + override fun getSnapshotIndexMetaData(repositoryData: RepositoryData, snapshotId: SnapshotId, index: IndexId): IndexMetadata { + assert(REMOTE_SNAPSHOT_NAME.equals(snapshotId.name), { "SnapshotName differs" }) + val remoteClusterState = getRemoteClusterState(false, false, index.name) + val indexMetadata = remoteClusterState.metadata.index(index.name) + + // Add replication specific settings + val builder = Settings.builder().put(indexMetadata.settings) + val replicatedIndex = "${repositoryMetadata.remoteClusterName()}:${index.name}" + builder.put(ReplicationPlugin.REPLICATED_INDEX_SETTING.key, replicatedIndex) + val indexMdBuilder = IndexMetadata.builder(indexMetadata).settings(builder) + indexMetadata.aliases.valuesIt().forEach { + indexMdBuilder.putAlias(it) + } + return indexMdBuilder.build() + } + + /* + * Step 5: restore shard by fetching the lucene segments from the remote cluster + */ + override fun restoreShard(store: Store, snapshotId: SnapshotId, indexId: IndexId, snapshotShardId: ShardId, + recoveryState: RecoveryState, listener: ActionListener) { + var multiChunkTransfer: RemoteClusterMultiChunkTransfer? + var restoreUUID: String? + var remoteShardNode: DiscoveryNode? + var remoteShardId: ShardId? + try { + store.incRef() + val followerIndexName = store.shardId().indexName + val followerShardId = store.shardId() + // 1. Get all the files info from the remote cluster for this shardId + // Node containing the shard + val remoteClusterState = getRemoteClusterState(true, true, indexId.name) + val remoteShardRouting = remoteClusterState.routingTable.shardRoutingTable(snapshotShardId.indexName, + snapshotShardId.id).primaryShard() + remoteShardNode = remoteClusterState.nodes.get(remoteShardRouting.currentNodeId()) + + // Get the index UUID of the remote cluster for the metadata request + remoteShardId = ShardId(snapshotShardId.indexName, + remoteClusterState.metadata.index(indexId.name).indexUUID, + snapshotShardId.id) + restoreUUID = UUIDs.randomBase64UUID() + val getStoreMetadataRequest = GetStoreMetadataRequest(restoreUUID, remoteShardNode, remoteShardId, + clusterService.clusterName.value(), followerShardId) + + // Gets the remote store metadata + val metadataResponse = remoteClusterGetAction(GetStoreMetadataAction.INSTANCE, getStoreMetadataRequest, followerIndexName) + val metadataSnapshot = metadataResponse.metadataSnapshot + + // 2. Request for individual files from remote cluster for this shardId + // make sure the store is not released until we are done. + val fileMetadata = ArrayList(metadataSnapshot.asMap().values) + multiChunkTransfer = RemoteClusterMultiChunkTransfer(log, clusterService.clusterName.value(), client.threadPool().threadContext, + store, parallelChunks, restoreUUID, remoteShardNode, + remoteShardId, fileMetadata, remoteClusterClient, recoveryState, chunkSize, + object: ActionListener{ + override fun onFailure(e: java.lang.Exception?) { + log.error("Restore of ${store.shardId()} failed due to $e") + store.decRef() + releaseLeaderResources(restoreUUID, remoteShardNode, remoteShardId, followerShardId, followerIndexName) + listener.onFailure(e) + } + override fun onResponse(response: Void?) { + log.info("Restore successful for ${store.shardId()}") + store.decRef() + releaseLeaderResources(restoreUUID, remoteShardNode, remoteShardId, followerShardId, followerIndexName) + listener.onResponse(null) + } + }) + if(fileMetadata.isEmpty()) { + log.info("Initializing with empty store for shard:" + snapshotShardId.id) + store.createEmpty(store.indexSettings().indexVersionCreated.luceneVersion) + store.decRef() + releaseLeaderResources(restoreUUID, remoteShardNode, remoteShardId, followerShardId, followerIndexName) + listener.onResponse(null) + } + else { + remoteClusterClient.executeUnderSecurityContext(clusterService, repositoryMetadata.remoteClusterName(), followerIndexName) { + multiChunkTransfer.start() + } + } + } catch (e: Exception) { + log.error("Restore of shard from remote cluster repository failed due to $e") + store.decRef() + listener.onFailure(e) + } + } + + private fun releaseLeaderResources(restoreUUID: String, remoteShardNode: DiscoveryNode, + remoteShardId: ShardId, followerShardId: ShardId, followerIndexName: String) { + val releaseResourcesReq = ReleaseLeaderResourcesRequest(restoreUUID, remoteShardNode, remoteShardId, + clusterService.clusterName.value(), followerShardId) + if(remoteClusterGetAction(ReleaseLeaderResourcesAction.INSTANCE, releaseResourcesReq, followerIndexName).isAcknowledged) { + log.info("Successfully released resources at the leader cluster for $remoteShardId at $remoteShardNode") + } + } + + override fun isReadOnly(): Boolean { + return true + } + + /* + * This method makes a blocking call to the remote cluster + * For restore workflow this is expected. + */ + private fun getRemoteClusterState(includeNodes: Boolean, includeRoutingTable: Boolean, vararg remoteIndices: String): ClusterState { + val clusterStateRequest = remoteClusterClient.admin().cluster().prepareState() + .clear() + .setIndices(*remoteIndices) + .setMetadata(true) + .setNodes(includeNodes) + .setRoutingTable(includeRoutingTable) + .setIndicesOptions(IndicesOptions.strictSingleIndexNoExpandForbidClosed()) + .request() + + val remoteState = remoteClusterClient.admin().cluster().state(clusterStateRequest) + .actionGet(REMOTE_CLUSTER_REPO_REQ_TIMEOUT_IN_MILLI_SEC).state + log.trace("Successfully fetched the cluster state from remote repository ${remoteState}") + return remoteState + } + + /* + * Makes transport action to the remote cluster by making a blocking call + * For restore workflow this is expected. + */ + private fun remoteClusterGetAction(actionType: ActionType, + actionRequest: ActionRequest, + followerIndex: String): T { + val userString = SecurityContext.fromClusterState(clusterService.state(), + repositoryMetadata.remoteClusterName(), + followerIndex) + remoteClusterClient.threadPool().threadContext.newStoredContext(true).use { + SecurityContext.toThreadContext(remoteClusterClient.threadPool().threadContext, userString) + return remoteClusterClient.execute(actionType, actionRequest).actionGet(REMOTE_CLUSTER_REPO_REQ_TIMEOUT_IN_MILLI_SEC) + } + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepositoryFileChunk.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepositoryFileChunk.kt new file mode 100644 index 00000000..296a56e6 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRepositoryFileChunk.kt @@ -0,0 +1,28 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.repository + +import org.elasticsearch.index.store.StoreFileMetadata +import org.elasticsearch.indices.recovery.MultiChunkTransfer.ChunkRequest + +class RemoteClusterRepositoryFileChunk constructor(val storeFileMetadata: StoreFileMetadata, + val offset: Long, + val length: Int): ChunkRequest { + + override fun lastChunk(): Boolean { + return storeFileMetadata.length() <= offset + length + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRestoreLeaderService.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRestoreLeaderService.kt new file mode 100644 index 00000000..22b4d343 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RemoteClusterRestoreLeaderService.kt @@ -0,0 +1,153 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.repository + +import com.amazon.elasticsearch.replication.action.repository.RemoteClusterRepositoryRequest +import com.amazon.elasticsearch.replication.seqno.RemoteClusterRetentionLeaseHelper +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationTask +import com.amazon.elasticsearch.replication.util.performOp +import org.elasticsearch.ElasticsearchException +import org.elasticsearch.action.support.single.shard.SingleShardRequest +import org.elasticsearch.client.node.NodeClient +import org.elasticsearch.common.component.AbstractLifecycleComponent +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.common.inject.Singleton +import org.elasticsearch.common.lucene.store.InputStreamIndexInput +import org.elasticsearch.core.internal.io.IOUtils +import org.elasticsearch.index.engine.Engine +import org.elasticsearch.index.seqno.RetentionLeaseActions +import org.elasticsearch.index.seqno.SequenceNumbers +import org.elasticsearch.index.shard.IndexShard +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.store.Store +import org.elasticsearch.indices.IndicesService +import java.io.Closeable +import java.io.IOException + +/* + * Restore source service tracks all the ongoing restore operations + * relying on the leader shards. Once the restore is completed the + * relevant resources are released. Also, listens on the index events + * to update the resources + */ +@Singleton +class RemoteClusterRestoreLeaderService @Inject constructor(private val indicesService: IndicesService, + private val nodeClient : NodeClient) : + AbstractLifecycleComponent() { + + // TODO: Listen for the index events and release relevant resources. + private val onGoingRestores: MutableMap = mutableMapOf() + private val closableResources: MutableList = mutableListOf() + + override fun doStart() { + } + + override fun doStop() { + } + + override fun doClose() { + // Obj in the list being null or closed has no effect + IOUtils.close(closableResources) + } + + @Synchronized + fun ?> addRemoteClusterRestore(restoreUUID: String, + request: RemoteClusterRepositoryRequest): RestoreContext { + return onGoingRestores.getOrPut(restoreUUID) { constructRestoreContext(restoreUUID, request)} + } + + private fun getRemoteClusterRestore(restoreUUID: String): RestoreContext { + return onGoingRestores[restoreUUID] ?: throw IllegalStateException("missing restoreContext") + } + + @Synchronized + fun ?> openInputStream(restoreUUID: String, + request: RemoteClusterRepositoryRequest, + fileName: String, + length: Long): InputStreamIndexInput { + val leaderIndexShard = indicesService.getShardOrNull(request.leaderShardId) + ?: throw ElasticsearchException("Shard [$request.leaderShardId] missing") + val store = leaderIndexShard.store() + val restoreContext = getRemoteClusterRestore(restoreUUID) + val indexInput = restoreContext.openInput(store, fileName) + + return object : InputStreamIndexInput(indexInput, length) { + @Throws(IOException::class) + override fun close() { + IOUtils.close(indexInput, Closeable { super.close() }) // InputStreamIndexInput's close is a noop + } + } + } + + private fun ?> constructRestoreContext(restoreUUID: String, + request: RemoteClusterRepositoryRequest): RestoreContext { + val leaderIndexShard = indicesService.getShardOrNull(request.leaderShardId) + ?: throw ElasticsearchException("Shard [$request.leaderShardId] missing") + // Passing nodeclient of the leader to acquire the retention lease on leader shard + val retentionLeaseHelper = RemoteClusterRetentionLeaseHelper(request.followerCluster, nodeClient) + /** + * ODFE Replication supported for >= ES 7.8. History of operations directly from + * lucene index. With the retention lock set - safe commit should have all the history + * upto the current retention leases. + */ + val retentionLock = leaderIndexShard.acquireHistoryRetentionLock(Engine.HistorySource.INDEX) + closableResources.add(retentionLock) + + /** + * Construct restore via safe index commit + * at the leader cluster. All the references from this commit + * should be available until it is closed. + */ + val indexCommitRef = leaderIndexShard.acquireSafeIndexCommit() + + val store = leaderIndexShard.store() + var metadataSnapshot = Store.MetadataSnapshot.EMPTY + store.performOp({ + metadataSnapshot = store.getMetadata(indexCommitRef.indexCommit) + }) + + // Identifies the seq no to start the replication operations from + var fromSeqNo = RetentionLeaseActions.RETAIN_ALL + + // Adds the retention lease for fromSeqNo for the next stage of the replication. + retentionLeaseHelper.addRetentionLease(request.leaderShardId, fromSeqNo, + request.followerShardId, RemoteClusterRepository.REMOTE_CLUSTER_REPO_REQ_TIMEOUT_IN_MILLI_SEC) + + /** + * At this point, it should be safe to release retention lock as the retention lease + * is acquired from the local checkpoint and the rest of the follower replay actions + * can be performed using this retention lease. + */ + retentionLock.close() + + var restoreContext = RestoreContext(restoreUUID, leaderIndexShard, + indexCommitRef, metadataSnapshot, fromSeqNo) + onGoingRestores[restoreUUID] = restoreContext + + closableResources.add(restoreContext) + return restoreContext + } + + @Synchronized + fun removeRemoteClusterRestore(restoreUUID: String) { + val restoreContext = onGoingRestores.remove(restoreUUID) + /** + * cleaning the resources - Closing only index safe commit + * as retention lease will be updated in the GetChanges flow + */ + restoreContext?.close() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RestoreContext.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RestoreContext.kt new file mode 100644 index 00000000..156807a4 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/repository/RestoreContext.kt @@ -0,0 +1,60 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.repository + +import com.amazon.elasticsearch.replication.util.performOp +import org.apache.lucene.store.IOContext +import org.apache.lucene.store.IndexInput +import org.elasticsearch.ElasticsearchException +import org.elasticsearch.index.engine.Engine +import org.elasticsearch.index.shard.IndexShard +import org.elasticsearch.index.store.Store +import java.io.Closeable + +class RestoreContext(val restoreUUID: String, + val shard: IndexShard, + val indexCommitRef: Engine.IndexCommitRef, + val metadataSnapshot: Store.MetadataSnapshot, + val replayOperationsFrom: Long): Closeable { + + companion object { + private const val INITIAL_FILE_CACHE_CAPACITY = 20 + } + private val currentFiles = LinkedHashMap(INITIAL_FILE_CACHE_CAPACITY) + + fun openInput(store: Store, fileName: String): IndexInput { + var currentIndexInput = currentFiles.getOrDefault(fileName, null) + if(currentIndexInput != null) { + return currentIndexInput.clone() + } + store.performOp({ + currentIndexInput = store.directory().openInput(fileName, IOContext.READONCE) + }) + + currentFiles[fileName] = currentIndexInput!! + return currentIndexInput!!.clone() + } + + override fun close() { + // Close all the open index input obj + currentFiles.entries.forEach { + it.value.close() + } + currentFiles.clear() + indexCommitRef.close() + } + +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/rest/ReplicateIndexHandler.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/rest/ReplicateIndexHandler.kt new file mode 100644 index 00000000..a4ac7331 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/rest/ReplicateIndexHandler.kt @@ -0,0 +1,51 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.rest + +import com.amazon.elasticsearch.replication.action.index.ReplicateIndexAction +import com.amazon.elasticsearch.replication.action.index.ReplicateIndexRequest +import org.elasticsearch.client.node.NodeClient +import org.elasticsearch.rest.BaseRestHandler +import org.elasticsearch.rest.BaseRestHandler.RestChannelConsumer +import org.elasticsearch.rest.RestChannel +import org.elasticsearch.rest.RestHandler +import org.elasticsearch.rest.RestRequest +import org.elasticsearch.rest.action.RestToXContentListener +import java.io.IOException + +class ReplicateIndexHandler : BaseRestHandler() { + + override fun routes(): List { + return listOf(RestHandler.Route(RestRequest.Method.PUT, "/_opendistro/_replication/{index}/_start")) + } + + override fun getName(): String { + return "opendistro_index_start_replicate_action" + } + + @Throws(IOException::class) + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + request.contentOrSourceParamParser().use { parser -> + val followerIndex = request.param("index") + val followIndexRequest = ReplicateIndexRequest.fromXContent(parser, followerIndex) + followIndexRequest.waitForRestore = request.paramAsBoolean("wait_for_restore", false) + return RestChannelConsumer { + channel: RestChannel? -> client.admin().cluster() + .execute(ReplicateIndexAction.INSTANCE, followIndexRequest, RestToXContentListener(channel)) + } + } + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/rest/StopIndexReplicationHandler.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/rest/StopIndexReplicationHandler.kt new file mode 100644 index 00000000..28180fc9 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/rest/StopIndexReplicationHandler.kt @@ -0,0 +1,49 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.rest + +import com.amazon.elasticsearch.replication.action.stop.StopIndexReplicationAction +import com.amazon.elasticsearch.replication.action.stop.StopIndexReplicationRequest +import org.elasticsearch.client.node.NodeClient +import org.elasticsearch.rest.BaseRestHandler +import org.elasticsearch.rest.RestChannel +import org.elasticsearch.rest.RestHandler +import org.elasticsearch.rest.RestRequest +import org.elasticsearch.rest.action.RestToXContentListener +import java.io.IOException + +class StopIndexReplicationHandler : BaseRestHandler() { + + override fun routes(): List { + return listOf(RestHandler.Route(RestRequest.Method.POST, "/_opendistro/_replication/{index}/_stop")) + } + + override fun getName(): String { + return "opendistro_index_stop_replicate_action" + } + + @Throws(IOException::class) + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + request.contentOrSourceParamParser().use { parser -> + val followIndex = request.param("index") + val stopReplicationRequest = StopIndexReplicationRequest.fromXContent(parser, followIndex) + return RestChannelConsumer { channel: RestChannel? -> + client.admin().cluster() + .execute(StopIndexReplicationAction.INSTANCE, stopReplicationRequest, RestToXContentListener(channel)) + } + } + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/rest/UpdateAutoFollowPatternsHandler.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/rest/UpdateAutoFollowPatternsHandler.kt new file mode 100644 index 00000000..4cb5757f --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/rest/UpdateAutoFollowPatternsHandler.kt @@ -0,0 +1,57 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.rest + +import com.amazon.elasticsearch.replication.action.autofollow.UpdateAutoFollowPatternAction +import com.amazon.elasticsearch.replication.action.autofollow.UpdateAutoFollowPatternRequest +import org.elasticsearch.ElasticsearchStatusException +import org.elasticsearch.client.node.NodeClient +import org.elasticsearch.rest.BaseRestHandler +import org.elasticsearch.rest.BaseRestHandler.RestChannelConsumer +import org.elasticsearch.rest.RestHandler +import org.elasticsearch.rest.RestRequest +import org.elasticsearch.rest.RestStatus +import org.elasticsearch.rest.action.RestToXContentListener + +class UpdateAutoFollowPatternsHandler : BaseRestHandler() { + + companion object { + const val PATH = "/_opendistro/_replication/_autofollow" + } + + override fun routes(): List { + return listOf(RestHandler.Route(RestRequest.Method.POST, PATH), + RestHandler.Route(RestRequest.Method.DELETE, PATH)) + } + + override fun getName() = "opendistro_replication_autofollow_update" + + override fun prepareRequest(request: RestRequest, client: NodeClient): RestChannelConsumer { + val action = when { + request.method() == RestRequest.Method.POST -> UpdateAutoFollowPatternRequest.Action.ADD + request.method() == RestRequest.Method.DELETE -> UpdateAutoFollowPatternRequest.Action.REMOVE + // Should not be reached unless someone updates the restController with a new method but forgets to add it here. + else -> + throw ElasticsearchStatusException("Unsupported method ", RestStatus.METHOD_NOT_ALLOWED, request.method()) + } + + val updateRequest = UpdateAutoFollowPatternRequest.fromXContent(request.contentParser(), action) + return RestChannelConsumer { channel -> + client.admin().cluster() + .execute(UpdateAutoFollowPatternAction.INSTANCE, updateRequest, RestToXContentListener(channel)) + } + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/seqno/RemoteClusterRetentionLeaseHelper.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/seqno/RemoteClusterRetentionLeaseHelper.kt new file mode 100644 index 00000000..70c28ffe --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/seqno/RemoteClusterRetentionLeaseHelper.kt @@ -0,0 +1,98 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.seqno + +import com.amazon.elasticsearch.replication.util.suspendExecute +import org.apache.logging.log4j.LogManager +import org.elasticsearch.client.Client +import org.elasticsearch.common.logging.Loggers +import org.elasticsearch.index.seqno.RetentionLeaseActions +import org.elasticsearch.index.seqno.RetentionLeaseAlreadyExistsException +import org.elasticsearch.index.seqno.RetentionLeaseNotFoundException +import org.elasticsearch.index.shard.ShardId + +class RemoteClusterRetentionLeaseHelper constructor(val followerClusterName: String, val client: Client) { + + private val retentionLeaseSource = retentionLeaseSource(followerClusterName) + + companion object { + private val log = LogManager.getLogger(RemoteClusterRetentionLeaseHelper::class.java) + fun retentionLeaseSource(followerClusterName: String): String = "replication:${followerClusterName}" + + fun retentionLeaseIdForShard(followerClusterName: String, followerShardId: ShardId): String { + val retentionLeaseSource = retentionLeaseSource(followerClusterName) + return "$retentionLeaseSource:${followerShardId}" + } + } + + public suspend fun addRetentionLease(remoteShardId: ShardId, seqNo: Long, followerShardId: ShardId) { + val retentionLeaseId = retentionLeaseIdForShard(followerClusterName, followerShardId) + val request = RetentionLeaseActions.AddRequest(remoteShardId, retentionLeaseId, seqNo, retentionLeaseSource) + try { + client.suspendExecute(RetentionLeaseActions.Add.INSTANCE, request) + } catch (e: RetentionLeaseAlreadyExistsException) { + log.error("${e.message}") + log.info("Renew retention lease as it already exists $retentionLeaseId with $seqNo") + // Only one retention lease should exists for the follower shard + // Ideally, this should have got cleaned-up + renewRetentionLease(remoteShardId, seqNo, followerShardId) + } + } + + public suspend fun renewRetentionLease(remoteShardId: ShardId, seqNo: Long, followerShardId: ShardId) { + val retentionLeaseId = retentionLeaseIdForShard(followerClusterName, followerShardId) + val request = RetentionLeaseActions.RenewRequest(remoteShardId, retentionLeaseId, seqNo, retentionLeaseSource) + client.suspendExecute(RetentionLeaseActions.Renew.INSTANCE, request) + } + + public suspend fun removeRetentionLease(remoteShardId: ShardId, followerShardId: ShardId) { + val retentionLeaseId = retentionLeaseIdForShard(followerClusterName, followerShardId) + val request = RetentionLeaseActions.RemoveRequest(remoteShardId, retentionLeaseId) + try { + client.suspendExecute(RetentionLeaseActions.Remove.INSTANCE, request) + log.info("Removed retention lease with id - $retentionLeaseId") + } catch(e: RetentionLeaseNotFoundException) { + // log error and bail + log.error("${e.message}") + } + } + + + /** + * Remove these once the callers are moved to above APIs + */ + public fun addRetentionLease(remoteShardId: ShardId, seqNo: Long, + followerShardId: ShardId, timeout: Long) { + val retentionLeaseId = retentionLeaseIdForShard(followerClusterName, followerShardId) + val request = RetentionLeaseActions.AddRequest(remoteShardId, retentionLeaseId, seqNo, retentionLeaseSource) + try { + client.execute(RetentionLeaseActions.Add.INSTANCE, request).actionGet(timeout) + } catch (e: RetentionLeaseAlreadyExistsException) { + log.error("${e.message}") + log.info("Renew retention lease as it already exists $retentionLeaseId with $seqNo") + // Only one retention lease should exists for the follower shard + // Ideally, this should have got cleaned-up + renewRetentionLease(remoteShardId, seqNo, followerShardId, timeout) + } + } + + public fun renewRetentionLease(remoteShardId: ShardId, seqNo: Long, + followerShardId: ShardId, timeout: Long) { + val retentionLeaseId = retentionLeaseIdForShard(followerClusterName, followerShardId) + val request = RetentionLeaseActions.RenewRequest(remoteShardId, retentionLeaseId, seqNo, retentionLeaseSource) + client.execute(RetentionLeaseActions.Renew.INSTANCE, request).actionGet(timeout) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/CrossClusterReplicationTask.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/CrossClusterReplicationTask.kt new file mode 100644 index 00000000..400b192e --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/CrossClusterReplicationTask.kt @@ -0,0 +1,185 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task + +import com.amazon.elasticsearch.replication.util.SecurityContext +import com.amazon.elasticsearch.replication.util.coroutineContext +import com.amazon.elasticsearch.replication.util.suspending +import kotlinx.coroutines.CancellationException +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.NonCancellable +import kotlinx.coroutines.cancel +import kotlinx.coroutines.launch +import kotlinx.coroutines.withContext +import kotlinx.coroutines.withTimeoutOrNull +import org.apache.logging.log4j.Logger +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.ToXContentObject +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.persistent.AllocatedPersistentTask +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.persistent.PersistentTasksService +import org.elasticsearch.tasks.TaskId +import org.elasticsearch.tasks.TaskManager +import org.elasticsearch.threadpool.ThreadPool + +abstract class CrossClusterReplicationTask(id: Long, type: String, action: String, description: String, parentTask: TaskId, + headers: Map, + protected val executor: String, + protected val clusterService: ClusterService, + protected val threadPool: ThreadPool, + protected val client: Client) : + AllocatedPersistentTask(id, type, action, description, parentTask, headers) { + + protected val scope = CoroutineScope(threadPool.coroutineContext(executor)) + protected abstract val log : Logger + protected abstract val followerIndexName: String + protected abstract val remoteCluster: String + @Volatile private lateinit var taskManager: TaskManager + + override fun init(persistentTasksService: PersistentTasksService, taskManager: TaskManager, + persistentTaskId: String, allocationId: Long) { + super.init(persistentTasksService, taskManager, persistentTaskId, allocationId) + this.taskManager = taskManager + } + + override fun onCancelled() { + super.onCancelled() + scope.cancel() + } + + fun run(initialState: PersistentTaskState? = null) { + scope.launch { + var exception : Throwable? = null + try { + registerCloseListeners() + setSecurityContext() + execute(initialState) + markAsCompleted() + } catch (e: Exception) { + if (isCancelled || e is CancellationException) { + markAsCompleted() + log.info("Completed the task with id:$id") + } else { + exception = e + markAsFailed(e) + } + } finally { + unregisterCloseListeners() + // Need to execute cleanup regardless of cancellation so run in NonCancellable context but with a + // timeout. See https://kotlinlang.org/docs/reference/coroutines/cancellation-and-timeouts.html#run-non-cancellable-block + withContext(NonCancellable) { + withTimeoutOrNull(60000) { + cleanupFinally(exception) + } + } + } + } + } + + protected abstract fun replicationTaskResponse(): CrossClusterReplicationTaskResponse + + override fun markAsCompleted() { + taskManager.storeResult(this, replicationTaskResponse(), ActionListener.wrap( + {log.info("Successfully persisted task status")}, + {e -> log.warn("Error storing result $e")} + )) + super.markAsCompleted() + } + + override fun markAsFailed(e: Exception) { + taskManager.storeResult(this, e, ActionListener.wrap( + {log.info("Successfully persisted failure")}, + {log.error("Task failed due to $e")} + )) + super.markAsFailed(e) + } + + /** + * A list of [ShardId]s or index names for which this task's [onIndexOrShardClosed] method should be called when + * closed. + */ + protected open fun indicesOrShards() : List = emptyList() + + private fun registerCloseListeners() { + for (indexOrShard in indicesOrShards()) { + IndexCloseListener.addCloseListener(indexOrShard, this) + } + } + + private fun unregisterCloseListeners() { + for (indexOrShard in indicesOrShards()) { + IndexCloseListener.removeCloseListener(indexOrShard, this) + } + } + + fun onIndexOrShardClosed(indexOrShardId: Any) { + scope.cancel("$indexOrShardId was closed.") + } + + /** + * Persists the state of the task in the cluster metadata. If the task is resumed on a different node then this + * will be used to restart the task from the correct state. + */ + protected suspend fun updateTaskState(state: PersistentTaskState) { + suspending(::updatePersistentTaskState)(state) + } + + protected abstract suspend fun execute(initialState: PersistentTaskState?) + + protected open suspend fun cleanup() {} + + /** + * Handles case where a suspending finally block throws an exception. + */ + private suspend fun cleanupFinally(cause: Throwable?) { + if (cause == null) { + cleanup() + } else { + try { + cleanup() + } catch(e: Exception) { + cause.addSuppressed(e) + } + } + } + + /** + * Sets the security context + */ + protected open fun setSecurityContext() { + val injectedUser = SecurityContext.fromClusterState(clusterService.state(), remoteCluster, followerIndexName) + SecurityContext.toThreadContext(threadPool.threadContext, injectedUser) + } + + open class CrossClusterReplicationTaskResponse(val status: String): ActionResponse(), ToXContentObject { + override fun writeTo(out: StreamOutput) { + out.writeString(status) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return builder.startObject() + .field("status", status) + .endObject() + } + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/IndexCloseListener.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/IndexCloseListener.kt new file mode 100644 index 00000000..4b3cef81 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/IndexCloseListener.kt @@ -0,0 +1,65 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task + +import org.elasticsearch.common.settings.Settings +import org.elasticsearch.index.IndexService +import org.elasticsearch.index.shard.IndexEventListener +import org.elasticsearch.index.shard.IndexShard +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.indices.cluster.IndicesClusterStateService +import java.util.Collections +import java.util.concurrent.ConcurrentHashMap + +object IndexCloseListener : IndexEventListener { + + private val tasks = ConcurrentHashMap>() + + fun addCloseListener(indexOrShardId: Any, task: CrossClusterReplicationTask) { + require(indexOrShardId is String || indexOrShardId is ShardId) { + "Can't register a close listener for ${indexOrShardId}. Only Index or ShardIds are allowed." + } + tasks.computeIfAbsent(indexOrShardId) { Collections.synchronizedSet(mutableSetOf()) }.add(task) + } + + fun removeCloseListener(indexOrShardId: Any, task: CrossClusterReplicationTask) { + tasks.computeIfPresent(indexOrShardId) { _, v -> + v.remove(task) + if (v.isEmpty()) null else v + } + } + + override fun beforeIndexShardClosed(shardId: ShardId, indexShard: IndexShard?, indexSettings: Settings) { + super.beforeIndexShardClosed(shardId, indexShard, indexSettings) + val tasksToCancel = tasks.remove(shardId) + if (tasksToCancel != null) { + for (task in tasksToCancel) { + task.onIndexOrShardClosed(shardId) + } + } + } + + override fun beforeIndexRemoved(indexService: IndexService, + reason: IndicesClusterStateService.AllocatedIndices.IndexRemovalReason) { + super.beforeIndexRemoved(indexService, reason) + val tasksToCancel = tasks.remove(indexService.index().name) + if (tasksToCancel != null) { + for (task in tasksToCancel) { + task.onIndexOrShardClosed(indexService.index().name) + } + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/ReplicationState.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/ReplicationState.kt new file mode 100644 index 00000000..e8f6fa55 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/ReplicationState.kt @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task + +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.io.stream.Writeable +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.ToXContentFragment +import org.elasticsearch.common.xcontent.XContentBuilder + +/** + * Enum that represents the state of replication of either shards or indices. + */ +enum class ReplicationState : Writeable, ToXContentFragment { + + INIT, RESTORING, INIT_FOLLOW, FOLLOWING, MONITORING, FAILED, COMPLETED; // TODO: Add PAUSED state + + override fun writeTo(out: StreamOutput) { + out.writeEnum(this) + fun readState(inp : StreamInput) : ReplicationState = inp.readEnum(ReplicationState::class.java) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + return builder.value(toString()) + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowExecutor.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowExecutor.kt new file mode 100644 index 00000000..8b7caf55 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowExecutor.kt @@ -0,0 +1,54 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.autofollow + +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.persistent.AllocatedPersistentTask +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask +import org.elasticsearch.persistent.PersistentTasksExecutor +import org.elasticsearch.tasks.TaskId +import org.elasticsearch.threadpool.ThreadPool + +class AutoFollowExecutor(executor: String, private val clusterService: ClusterService, + private val threadPool: ThreadPool, private val client: Client) : + PersistentTasksExecutor(TASK_NAME, executor) { + + companion object { + const val TASK_NAME = "cluster:opendistro/admin/replication/autofollow" + } + + override fun nodeOperation(task: AllocatedPersistentTask, params: AutoFollowParams, state: PersistentTaskState?) { + if (task is AutoFollowTask) { + task.run() + } else { + task.markAsFailed(IllegalArgumentException("unknown task type : ${task::class.java}")) + } + } + + override fun createTask(id: Long, type: String, action: String, parentTaskId: TaskId, + taskInProgress: PersistentTask, + headers: Map): AllocatedPersistentTask { + return AutoFollowTask(id, type, action, getDescription(taskInProgress), parentTaskId, headers, + executor, clusterService, threadPool, client, taskInProgress.params!!) + } + + override fun getDescription(taskInProgress: PersistentTask): String { + return "replication auto follow task for remote cluster: ${taskInProgress.params?.remoteCluster} with pattern " + + "${taskInProgress.params?.patternName}" + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowParams.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowParams.kt new file mode 100644 index 00000000..bbbf6c01 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowParams.kt @@ -0,0 +1,74 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.autofollow + +import org.elasticsearch.Version +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ObjectParser +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.common.xcontent.XContentParser +import org.elasticsearch.persistent.PersistentTaskParams +import java.io.IOException + +class AutoFollowParams : PersistentTaskParams { + + lateinit var remoteCluster: String + lateinit var patternName: String + + companion object { + const val NAME = AutoFollowExecutor.TASK_NAME + + private val PARSER = ObjectParser(NAME, true) { AutoFollowParams() } + init { + PARSER.declareString(AutoFollowParams::remoteCluster::set, ParseField("remote_cluster")) + PARSER.declareString(AutoFollowParams::patternName::set, ParseField("pattern_name")) + } + + @Throws(IOException::class) + fun fromXContent(parser: XContentParser): AutoFollowParams { + return PARSER.parse(parser, null) + } + } + + private constructor() { + } + + constructor(remoteCluster: String, patternName: String) { + this.remoteCluster = remoteCluster + this.patternName = patternName + } + + constructor(inp: StreamInput) : this(inp.readString(), inp.readString()) + + override fun writeTo(out: StreamOutput) { + out.writeString(remoteCluster) + out.writeString(patternName) + } + + override fun getWriteableName() = NAME + + override fun getMinimalSupportedVersion() = Version.V_7_1_0 + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + return builder.startObject() + .field("remote_cluster", remoteCluster) + .field("pattern_name", patternName) + .endObject() + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowTask.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowTask.kt new file mode 100644 index 00000000..b3de2a39 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/autofollow/AutoFollowTask.kt @@ -0,0 +1,129 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.autofollow + +import com.amazon.elasticsearch.replication.action.index.ReplicateIndexAction +import com.amazon.elasticsearch.replication.action.index.ReplicateIndexRequest +import com.amazon.elasticsearch.replication.metadata.ReplicationMetadata +import com.amazon.elasticsearch.replication.task.CrossClusterReplicationTask +import com.amazon.elasticsearch.replication.task.ReplicationState +import com.amazon.elasticsearch.replication.util.suspendExecute +import com.amazon.elasticsearch.replication.util.suspending +import kotlinx.coroutines.delay +import kotlinx.coroutines.isActive +import org.elasticsearch.action.admin.indices.get.GetIndexRequest +import org.elasticsearch.action.support.IndicesOptions +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.logging.Loggers +import org.elasticsearch.common.unit.TimeValue +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.tasks.TaskId +import org.elasticsearch.threadpool.ThreadPool + +class AutoFollowTask(id: Long, type: String, action: String, description: String, parentTask: TaskId, + headers: Map, + executor: String, + clusterService: ClusterService, + threadPool: ThreadPool, + client: Client, + val params: AutoFollowParams) : + CrossClusterReplicationTask(id, type, action, description, parentTask, headers, + executor, clusterService, threadPool, client) { + + override val remoteCluster = params.remoteCluster + val patternName = params.patternName + override val followerIndexName: String = ReplicationMetadata.AUTOFOLLOW_SECURITY_CONTEXT_PATTERN_PREFIX + + params.patternName //Special case for auto follow + override val log = Loggers.getLogger(javaClass, remoteCluster) + private var trackingIndicesOnTheCluster = setOf() + + companion object { + //TODO: Convert to setting + val AUTO_FOLLOW_CHECK_DELAY = TimeValue.timeValueSeconds(30)!! + } + + override suspend fun execute(initialState: PersistentTaskState?) { + while (scope.isActive) { + autoFollow() + delay(AUTO_FOLLOW_CHECK_DELAY.millis) + } + } + + private suspend fun autoFollow() { + log.debug("Checking $remoteCluster under pattern name $patternName for new indices to auto follow") + val replicationMetadata = clusterService.state().metadata.custom(ReplicationMetadata.NAME) ?: ReplicationMetadata.EMPTY + val entry = replicationMetadata.autoFollowPatterns[remoteCluster]?.get(patternName) + if (entry?.pattern == null) { + log.debug("No auto follow patterns setup for cluster $remoteCluster with pattern name $followerIndexName") + return + } + + // Fetch remote indices matching auto follow pattern + val remoteClient = client.getRemoteClusterClient(remoteCluster) + val indexReq = GetIndexRequest().features(*emptyArray()) + .indices(entry.pattern) + .indicesOptions(IndicesOptions.lenientExpandOpen()) + val response = suspending(remoteClient.admin().indices()::getIndex)(indexReq) + var remoteIndices = response.indices.asIterable() + + val replicatedRemoteIndices = replicationMetadata.replicatedIndices + .getOrDefault(remoteCluster, emptyMap()).values + remoteIndices = remoteIndices.minus(replicatedRemoteIndices) + + var currentIndices = clusterService.state().metadata().concreteAllIndices.asIterable() // All indices - open and closed on the cluster + if(remoteIndices.intersect(currentIndices).isNotEmpty()) { + // Log this once when we see any update on indices on the follower cluster to prevent log flood + if(currentIndices.toSet() != trackingIndicesOnTheCluster) { + log.info("Cannot initiate replication for the following indices from remote ($remoteCluster) as indices with " + + "same name already exists on the cluster ${remoteIndices.intersect(currentIndices)}") + trackingIndicesOnTheCluster = currentIndices.toSet() + } + } + remoteIndices = remoteIndices.minus(currentIndices) + + for (newRemoteIndex in remoteIndices) { + startReplication(newRemoteIndex) + } + } + + private suspend fun startReplication(remoteIndex: String) { + if (clusterService.state().metadata().hasIndex(remoteIndex)) { + log.info("""Cannot replicate $remoteCluster:$remoteIndex as an index with the same name already + |exists.""".trimMargin()) + return + } + + try { + log.info("Auto follow starting replication from ${remoteCluster}:$remoteIndex -> $remoteIndex") + val request = ReplicateIndexRequest(remoteIndex, remoteCluster, remoteIndex) + val response = client.suspendExecute(ReplicateIndexAction.INSTANCE, request) + if (!response.isAcknowledged) { + log.warn("Failed to auto follow remote index $remoteIndex") + } + } catch (e: Exception) { + log.warn("Failed to start replication for $remoteCluster:$remoteIndex -> $remoteIndex.", e) + } + } + + override fun toString(): String { + return "AutoFollowTask(from=${remoteCluster} with pattern=${params.patternName})" + } + + override fun replicationTaskResponse(): CrossClusterReplicationTaskResponse { + return CrossClusterReplicationTaskResponse(ReplicationState.COMPLETED.name) + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationExecutor.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationExecutor.kt new file mode 100644 index 00000000..8dab25b7 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationExecutor.kt @@ -0,0 +1,78 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.index + +import com.amazon.elasticsearch.replication.metadata.REPLICATION_OVERALL_STATE_KEY +import com.amazon.elasticsearch.replication.metadata.REPLICATION_OVERALL_STATE_RUNNING_VALUE +import com.amazon.elasticsearch.replication.metadata.getReplicationStateParamsForIndex +import com.amazon.elasticsearch.replication.util.persistentTasksService +import org.apache.logging.log4j.LogManager +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.persistent.AllocatedPersistentTask +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask +import org.elasticsearch.persistent.PersistentTasksExecutor +import org.elasticsearch.tasks.TaskId +import org.elasticsearch.threadpool.ThreadPool + +class IndexReplicationExecutor(executor: String, private val clusterService: ClusterService, + private val threadPool: ThreadPool, private val client: Client) + : PersistentTasksExecutor(TASK_NAME, executor) { + + companion object { + const val TASK_NAME = "cluster:indices/admin/replication" + val INITIAL_STATE = InitialState + val log = LogManager.getLogger(IndexReplicationExecutor::class.java) + } + + override fun validate(params: IndexReplicationParams, clusterState: ClusterState) { + if (clusterState.routingTable.hasIndex(params.followerIndexName)) { + throw IllegalArgumentException("Cant use same index again for replication. Either close or " + + "delete the index:${params.followerIndexName}") + } + val replicationStateParams = getReplicationStateParamsForIndex(clusterService, params.followerIndexName) + ?: + throw IllegalStateException("Index task started without replication state in cluster metadata") + if (replicationStateParams[REPLICATION_OVERALL_STATE_KEY] != REPLICATION_OVERALL_STATE_RUNNING_VALUE) { + throw IllegalArgumentException("Replication state for index:${params.followerIndexName} should be RUNNING, " + + "but was:${replicationStateParams[REPLICATION_OVERALL_STATE_KEY]}") + } + } + + override fun nodeOperation(task: AllocatedPersistentTask, params: IndexReplicationParams, + state: PersistentTaskState?) { + if (task is IndexReplicationTask) { + task.run(state ?: INITIAL_STATE) + } else { + task.markAsFailed(IllegalArgumentException("Unknown task class ${task::class.java}")) + } + } + + override fun createTask(id: Long, type: String, action: String, parentTaskId: TaskId, + taskInProgress: PersistentTask, + headers: MutableMap?): AllocatedPersistentTask { + return IndexReplicationTask(id, type, action, getDescription(taskInProgress), parentTaskId, + executor, clusterService, threadPool, client, requireNotNull(taskInProgress.params), + persistentTasksService) + } + + override fun getDescription(taskInProgress: PersistentTask): String { + val params = requireNotNull(taskInProgress.params) + return "replication:${params.remoteCluster}:${params.remoteIndex} -> ${params.followerIndexName}" + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationParams.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationParams.kt new file mode 100644 index 00000000..5823e9d7 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationParams.kt @@ -0,0 +1,90 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.index + +import org.elasticsearch.Version +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.Strings +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ContextParser +import org.elasticsearch.common.xcontent.ObjectParser +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.common.xcontent.XContentParser +import org.elasticsearch.index.Index +import org.elasticsearch.persistent.PersistentTaskParams +import java.io.IOException + +class IndexReplicationParams : PersistentTaskParams { + + lateinit var remoteCluster: String + lateinit var remoteIndex: Index + lateinit var followerIndexName: String + + companion object { + const val NAME = IndexReplicationExecutor.TASK_NAME + + private val PARSER = ObjectParser(NAME, true) { IndexReplicationParams() } + init { + PARSER.declareString(IndexReplicationParams::remoteCluster::set, ParseField("remote_cluster")) + PARSER.declareObject(IndexReplicationParams::remoteIndex::set, + { parser: XContentParser, _ -> Index.fromXContent(parser) }, + ParseField("remote_index")) + PARSER.declareString(IndexReplicationParams::followerIndexName::set, ParseField("follower_index")) + } + + @Throws(IOException::class) + fun fromXContent(parser: XContentParser): IndexReplicationParams { + return PARSER.parse(parser, null) + } + } + + constructor(remoteCluster: String, remoteIndex: Index, followerIndexName: String) { + this.remoteCluster = remoteCluster + this.remoteIndex = remoteIndex + this.followerIndexName = followerIndexName + } + + constructor(inp: StreamInput) : this(inp.readString(), Index(inp), inp.readString()) + + private constructor() { + } + + override fun getWriteableName(): String = NAME + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + return builder.startObject() + .field("remote_cluster", remoteCluster) + .field("remote_index", remoteIndex) + .field("follower_index", followerIndexName) + .endObject() + } + + override fun writeTo(out: StreamOutput) { + out.writeString(remoteCluster) + remoteIndex.writeTo(out) + out.writeString(followerIndexName) + } + + override fun getMinimalSupportedVersion(): Version { + return Version.V_7_1_0 + } + + override fun toString(): String { + return Strings.toString(this) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationState.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationState.kt new file mode 100644 index 00000000..d7d8216f --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationState.kt @@ -0,0 +1,168 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.index + +import com.amazon.elasticsearch.replication.task.ReplicationState +import com.amazon.elasticsearch.replication.task.autofollow.AutoFollowParams +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationParams +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ObjectParser +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.common.xcontent.XContentParser +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask +import java.io.IOException +import java.lang.IllegalArgumentException + +sealed class IndexReplicationState : PersistentTaskState { + var state: ReplicationState + + companion object { + const val NAME = IndexReplicationExecutor.TASK_NAME + + fun reader(inp : StreamInput) : IndexReplicationState { + val state = inp.readEnum(ReplicationState::class.java)!! + return when (state) { + ReplicationState.INIT -> InitialState + ReplicationState.RESTORING -> RestoreState + ReplicationState.INIT_FOLLOW -> InitFollowState + ReplicationState.FOLLOWING -> FollowingState(inp) + ReplicationState.COMPLETED -> CompletedState + ReplicationState.MONITORING -> MonitoringState + ReplicationState.FAILED -> FailedState(inp) + } + } + + private val PARSER = ObjectParser(NAME, true) { Builder() } + + init { + PARSER.declareString(Builder::state, ParseField("state")) + } + + @Throws(IOException::class) + fun fromXContent(parser: XContentParser): IndexReplicationState { + return PARSER.parse(parser, null).build() + } + } + + constructor(state: ReplicationState) { + this.state = state + } + + override fun writeTo(out: StreamOutput) { + out.writeEnum(state) + } + + final override fun getWriteableName(): String = NAME + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + return builder.startObject() + .field("state", state) + .endObject() + } + + class Builder { + lateinit var state: String + + fun state(state: String) { + this.state = state + } + + fun build(): IndexReplicationState { + return when (state) { + ReplicationState.INIT.name -> InitialState + ReplicationState.RESTORING.name -> RestoreState + ReplicationState.INIT_FOLLOW.name -> InitFollowState + ReplicationState.FOLLOWING.name -> FollowingState(mapOf()) + ReplicationState.COMPLETED.name -> CompletedState + ReplicationState.MONITORING.name -> MonitoringState + ReplicationState.FAILED.name -> FailedState(mapOf(), "") + else -> throw IllegalArgumentException("$state - Not a valid state for index replication task") + } + } + } +} + +/** + * Singleton that represent initial state. + */ +object InitialState : IndexReplicationState(ReplicationState.INIT) + +/** + * Singleton that represents an in-progress restore. + */ +object RestoreState : IndexReplicationState(ReplicationState.RESTORING) + +/** + * Singleton that represents initial follow. + */ +object InitFollowState : IndexReplicationState(ReplicationState.INIT_FOLLOW) + +/** + * Singleton that represents completed task state. + */ +object CompletedState : IndexReplicationState(ReplicationState.COMPLETED) + +/** + * Singleton that represents monitoring state. + */ +object MonitoringState : IndexReplicationState(ReplicationState.MONITORING) + +/** + * State when index task is in failed state. + */ +data class FailedState(val failedShards: Map>, val errorMsg: String) + : IndexReplicationState(ReplicationState.FAILED) { + constructor(inp: StreamInput) : this(inp.readMap(::ShardId, ::PersistentTask), "") + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeMap(failedShards, { o, k -> k.writeTo(o) }, { o, v -> v.writeTo(o) }) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + return builder.startObject() + .field("error_message", errorMsg) + .field("failed_shard_replication_tasks").map(failedShards.mapKeys { it.key.toString() }) + .field("state", state) + .endObject() + } +} + +/** + * State when index is being actively replicated. + */ +data class FollowingState(val shardReplicationTasks: Map>) + : IndexReplicationState(ReplicationState.FOLLOWING) { + + constructor(inp: StreamInput) : this(inp.readMap(::ShardId, ::PersistentTask)) + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + out.writeMap(shardReplicationTasks, { o, k -> k.writeTo(o) }, { o, v -> v.writeTo(o) }) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + return builder.startObject() + .field("shard_replication_tasks").map(shardReplicationTasks.mapKeys { it.key.toString() }) + .field("state", state) + .endObject() + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationTask.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationTask.kt new file mode 100644 index 00000000..6087ed82 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/index/IndexReplicationTask.kt @@ -0,0 +1,327 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.index + +import com.amazon.elasticsearch.replication.ReplicationException +import com.amazon.elasticsearch.replication.action.index.block.IndexBlockUpdateType +import com.amazon.elasticsearch.replication.action.index.block.UpdateIndexBlockRequest +import com.amazon.elasticsearch.replication.action.stop.StopIndexReplicationAction +import com.amazon.elasticsearch.replication.action.stop.StopIndexReplicationRequest +import com.amazon.elasticsearch.replication.metadata.getReplicationStateParamsForIndex +import com.amazon.elasticsearch.replication.repository.REMOTE_SNAPSHOT_NAME +import com.amazon.elasticsearch.replication.repository.RemoteClusterRepository +import com.amazon.elasticsearch.replication.seqno.RemoteClusterRetentionLeaseHelper +import com.amazon.elasticsearch.replication.task.CrossClusterReplicationTask +import com.amazon.elasticsearch.replication.task.ReplicationState +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationExecutor +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationParams +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationTask +import com.amazon.elasticsearch.replication.util.suspending +import com.amazon.elasticsearch.replication.util.waitForNextChange +import com.amazon.elasticsearch.replication.util.startTask +import com.amazon.elasticsearch.replication.util.suspendExecute +import kotlinx.coroutines.cancel +import kotlinx.coroutines.delay +import kotlinx.coroutines.isActive +import org.elasticsearch.ElasticsearchTimeoutException +import org.elasticsearch.ResourceNotFoundException +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.ClusterChangedEvent +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.ClusterStateListener +import org.elasticsearch.cluster.ClusterStateObserver +import org.elasticsearch.cluster.RestoreInProgress +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.logging.Loggers +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.ToXContentObject +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.index.Index +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.persistent.PersistentTasksCustomMetadata +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask +import org.elasticsearch.persistent.PersistentTasksNodeService +import org.elasticsearch.persistent.PersistentTasksService +import org.elasticsearch.tasks.TaskId +import org.elasticsearch.threadpool.ThreadPool +import java.util.function.Predicate +import java.util.stream.Collectors +import kotlin.coroutines.resume +import kotlin.coroutines.resumeWithException +import kotlin.coroutines.suspendCoroutine +import com.amazon.elasticsearch.replication.action.index.block.UpdateIndexBlockAction + +class IndexReplicationTask(id: Long, type: String, action: String, description: String, + parentTask: TaskId, + executor: String, + clusterService: ClusterService, + threadPool: ThreadPool, + client: Client, + params: IndexReplicationParams, + private val persistentTasksService: PersistentTasksService) + : CrossClusterReplicationTask(id, type, action, description, parentTask, emptyMap(), executor, + clusterService, threadPool, client), ClusterStateListener { + private lateinit var currentTaskState : IndexReplicationState + private lateinit var followingTaskState : IndexReplicationState + + override val remoteCluster = params.remoteCluster + + private val remoteClient = client.getRemoteClusterClient(remoteCluster) + val remoteIndex = params.remoteIndex + override val followerIndexName = params.followerIndexName + + override val log = Loggers.getLogger(javaClass, Index(params.followerIndexName, ClusterState.UNKNOWN_UUID)) + private val cso = ClusterStateObserver(clusterService, log, threadPool.threadContext) + private val retentionLeaseHelper = RemoteClusterRetentionLeaseHelper(clusterService.clusterName.value(), remoteClient) + + private val SLEEP_TIME_BETWEEN_POLL_MS = 5000L + + override fun indicesOrShards(): List = listOf(followerIndexName) + + override suspend fun execute(initialState: PersistentTaskState?) { + checkNotNull(initialState) { "Missing initial state" } + followingTaskState = FollowingState(emptyMap()) + currentTaskState = initialState as IndexReplicationState + while (scope.isActive) { + val newState = when (currentTaskState.state) { + ReplicationState.INIT -> { + addListenerToInterruptTask() + startRestore() + } + ReplicationState.RESTORING -> { + waitForRestore() + } + ReplicationState.INIT_FOLLOW -> { + startShardFollowTasks(emptyMap()) + } + ReplicationState.FOLLOWING -> { + if (currentTaskState is FollowingState) { + followingTaskState = (currentTaskState as FollowingState) + addIndexBlockForReplication() + } else { + throw ReplicationException("Wrong state type: ${currentTaskState::class}") + } + } + ReplicationState.MONITORING -> { + pollShardTaskStatus((followingTaskState as FollowingState).shardReplicationTasks) + } + ReplicationState.FAILED -> { + stopReplicationTasks() + currentTaskState + } + ReplicationState.COMPLETED -> { + markAsCompleted() + CompletedState + } + } + if (newState != currentTaskState) { + currentTaskState = updateState(newState) + } + if (isCompleted) break + } + } + + private fun addListenerToInterruptTask() { + clusterService.addListener(this) + } + + private suspend fun pollShardTaskStatus(shardTasks: Map>): IndexReplicationState { + val failedShardTasks = findFailedShardTasks(shardTasks, clusterService.state()) + if (failedShardTasks.isNotEmpty()) + return FailedState(failedShardTasks, "At least one of the shard replication task has failed") + delay(SLEEP_TIME_BETWEEN_POLL_MS) + return MonitoringState + } + + private suspend fun stopReplicationTasks() { + val stopReplicationResponse = client.suspendExecute(StopIndexReplicationAction.INSTANCE, StopIndexReplicationRequest(followerIndexName)) + if (!stopReplicationResponse.isAcknowledged) + throw ReplicationException("Failed to gracefully stop replication after one or more shard tasks failed. " + + "Replication tasks may need to be stopped manually.") + } + + private fun findFailedShardTasks(shardTasks: Map>, clusterState: ClusterState) + :Map> { + + val persistentTasks = clusterState.metadata.custom(PersistentTasksCustomMetadata.TYPE) + val runningShardTasks = persistentTasks.findTasks(ShardReplicationExecutor.TASK_NAME, Predicate { true }).stream() + .map { task -> task.params as ShardReplicationParams } + .collect(Collectors.toList()) + return shardTasks.filterKeys { shardId -> + runningShardTasks.find { task -> task.followerShardId == shardId } == null} + } + + override suspend fun cleanup() { + if (currentTaskState.state == ReplicationState.INIT || currentTaskState.state == ReplicationState.RESTORING) { + log.info("Replication stopped before restore could finish, so removing partial restore..") + cancelRestore() + } + /* This is to minimise overhead of calling an additional listener as + * it continues to be called even after the task is completed. + */ + clusterService.removeListener(this) + } + + private suspend fun addIndexBlockForReplication(): IndexReplicationState { + val request = UpdateIndexBlockRequest(followerIndexName, IndexBlockUpdateType.ADD_BLOCK) + client.suspendExecute(UpdateIndexBlockAction.INSTANCE, request) + return MonitoringState + } + + private suspend fun updateState(newState: IndexReplicationState) : IndexReplicationState { + return suspendCoroutine { cont -> + updatePersistentTaskState(newState, object : ActionListener> { + override fun onFailure(e: Exception) { + cont.resumeWithException(e) + } + + override fun onResponse(response: PersistentTask<*>) { + cont.resume(response.state as IndexReplicationState) + } + }) + } + } + + private suspend fun + startShardFollowTasks(tasks: Map>): FollowingState { + assert(clusterService.state().routingTable.hasIndex(followerIndexName)) { "Can't find index $followerIndexName" } + val shards = clusterService.state().routingTable.indicesRouting().get(followerIndexName).shards() + val newTasks = shards.map { + it.value.shardId + }.associate { shardId -> + val task = tasks.getOrElse(shardId) { + startReplicationTask(ShardReplicationParams(remoteCluster, ShardId(remoteIndex, shardId.id), shardId)) + } + return@associate shardId to task + } + return FollowingState(newTasks) + } + + private suspend fun cancelRestore() { + /* + * Should be safe to delete the retention leases here for all the shards + * as the restore is not yet completed + */ + val shards = clusterService.state().routingTable.indicesRouting().get(followerIndexName).shards() + shards.forEach { + val followerShardId = it.value.shardId + retentionLeaseHelper.removeRetentionLease(ShardId(remoteIndex, followerShardId.id), followerShardId) + } + + /* As given here + * (https://www.elastic.co/guide/en/elasticsearch/reference/6.8/modules-snapshots.html#_stopping_currently_running_snapshot_and_restore_operations) + * a snapshot restore can be cancelled by deleting the indices being restored. + */ + log.info("Deleting the index $followerIndexName") + suspending(client.admin().indices()::delete)(DeleteIndexRequest(followerIndexName)) + } + + private suspend fun startRestore(): IndexReplicationState { + val restoreRequest = client.admin().cluster() + .prepareRestoreSnapshot(RemoteClusterRepository.repoForCluster(remoteCluster), REMOTE_SNAPSHOT_NAME) + .setIndices(remoteIndex.name) + .request() + if (remoteIndex.name != followerIndexName) { + restoreRequest.renamePattern(remoteIndex.name) + .renameReplacement(followerIndexName) + } + val response = suspending(client.admin().cluster()::restoreSnapshot)(restoreRequest) + if (response.restoreInfo != null) { + if (response.restoreInfo.failedShards() != 0) { + throw ReplicationException("Restore failed: $response") + } + return FollowingState(emptyMap()) + } + cso.waitForNextChange("remote restore start") { inProgressRestore(it) != null } + return RestoreState + } + + private suspend fun waitForRestore(): IndexReplicationState { + var restore = inProgressRestore() ?: throw ResourceNotFoundException(""" + Unable to find in progress restore for remote index: $remoteCluster:$remoteIndex. + This can happen if there was a badly timed master node failure. + """.trimIndent()) + while (restore.state() != RestoreInProgress.State.FAILURE && restore.state() != RestoreInProgress.State.SUCCESS) { + try { + cso.waitForNextChange("remote restore finish") + } catch(e: ElasticsearchTimeoutException) { + log.info("Waiting for restore to complete") + } + restore = inProgressRestore() ?: throw ResourceNotFoundException(""" + Unable to find in progress restore for remote index: $remoteCluster:$remoteIndex. + This can happen if there was a badly timed master node failure. + """.trimIndent()) + } + + if (restore.state() == RestoreInProgress.State.FAILURE) { + val failureReason = restore.shards().values().find { + it.value.state() == RestoreInProgress.State.FAILURE + }!!.value.reason() + throw ReplicationException("Remote restore failed: $failureReason") + } else { + return InitFollowState + } + } + + private fun inProgressRestore(cs: ClusterState = clusterService.state()): RestoreInProgress.Entry? { + return cs.custom(RestoreInProgress.TYPE).singleOrNull { entry -> + entry.snapshot().repository == RemoteClusterRepository.repoForCluster(remoteCluster) && + entry.indices().singleOrNull { idx -> idx == followerIndexName } != null + } + } + + private suspend + fun startReplicationTask(replicationParams : ShardReplicationParams) : PersistentTask { + return persistentTasksService.startTask(ShardReplicationTask.taskIdForShard(replicationParams.followerShardId), + ShardReplicationExecutor.TASK_NAME, replicationParams) + } + + override fun clusterChanged(event: ClusterChangedEvent) { + log.debug("Cluster metadata listener invoked on index task...") + if (event.metadataChanged()) { + val replicationStateParams = getReplicationStateParamsForIndex(clusterService, followerIndexName) + if (replicationStateParams == null) { + if (PersistentTasksNodeService.Status(State.STARTED) == status) + scope.cancel("Index replication task received an interrupt.") + } + } + } + + override fun replicationTaskResponse(): CrossClusterReplicationTaskResponse { + return IndexReplicationTaskResponse(currentTaskState) + } + + class IndexReplicationTaskResponse(private val taskState : IndexReplicationState): + CrossClusterReplicationTaskResponse(ReplicationState.COMPLETED.name), ToXContentObject { + + override fun writeTo(out: StreamOutput) { + super.writeTo(out) + taskState.writeTo(out) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + var responseBuilder = builder.startObject() + .field("index_task_status", ReplicationState.COMPLETED.name) + .field("following_tasks") + return taskState.toXContent(responseBuilder, params).endObject() + } + } +} + diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationExecutor.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationExecutor.kt new file mode 100644 index 00000000..1247315b --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationExecutor.kt @@ -0,0 +1,83 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.shard + +import com.amazon.elasticsearch.replication.metadata.REPLICATION_OVERALL_STATE_KEY +import com.amazon.elasticsearch.replication.metadata.REPLICATION_OVERALL_STATE_RUNNING_VALUE +import com.amazon.elasticsearch.replication.metadata.getReplicationStateParamsForIndex +import org.apache.logging.log4j.LogManager +import org.elasticsearch.ElasticsearchException +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.persistent.AllocatedPersistentTask +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.Assignment +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask +import org.elasticsearch.persistent.PersistentTasksExecutor +import org.elasticsearch.tasks.TaskId +import org.elasticsearch.threadpool.ThreadPool + +class ShardReplicationExecutor(executor: String, private val clusterService : ClusterService, + private val threadPool: ThreadPool, private val client: Client) : + PersistentTasksExecutor(TASK_NAME, executor) { + + companion object { + const val TASK_NAME = "cluster:indices/shards/replication" + val SHARD_NOT_ACTIVE = Assignment(null, "No active shard found") + val log = LogManager.getLogger(ShardReplicationExecutor::class.java) + } + + override fun validate(params: ShardReplicationParams, clusterState: ClusterState) { + // Checks that there is a primary shard. Side-effect will check that the index and shard exists. + clusterState.routingTable.shardRoutingTable(params.followerShardId) + .primaryShard() ?: throw ElasticsearchException("no primary shard available for ${params.followerShardId}") + val replicationStateParams = getReplicationStateParamsForIndex(clusterService, params.followerShardId.indexName) + ?: + throw IllegalStateException("Cant find replication details metadata for followIndex:${params.followerShardId.indexName}. " + + "Seems like replication is not in progress, so not starting shard task for shardId:${params.followerShardId}") + if (replicationStateParams[REPLICATION_OVERALL_STATE_KEY] != REPLICATION_OVERALL_STATE_RUNNING_VALUE) + throw IllegalStateException("Unknown replication state metadata:${replicationStateParams[REPLICATION_OVERALL_STATE_KEY]} " + + " followIndex:${params.followerShardId.indexName}") + } + + override fun getAssignment(params: ShardReplicationParams, clusterState: ClusterState) : Assignment { + val primaryShard = clusterState.routingTable().shardRoutingTable(params.followerShardId).primaryShard() + if (!primaryShard.active()) return SHARD_NOT_ACTIVE + return Assignment(primaryShard.currentNodeId(), "node with primary shard") + } + + override fun nodeOperation(task: AllocatedPersistentTask, params: ShardReplicationParams, state: PersistentTaskState?) { + if (task is ShardReplicationTask) { + log.info("starting persistent replication task: $params, $state, ${task.allocationId}, ${task.status}") + task.run() + } else { + task.markAsFailed(IllegalArgumentException("Unknown task class ${task::class.java}")) + } + } + + override fun createTask(id: Long, type: String, action: String, parentTaskId: TaskId, + taskInProgress: PersistentTask, + headers: Map): AllocatedPersistentTask { + return ShardReplicationTask(id, type, action, getDescription(taskInProgress), parentTaskId, + taskInProgress.params!!, executor, clusterService, threadPool, client) + } + + override fun getDescription(taskInProgress: PersistentTask): String { + val params = requireNotNull(taskInProgress.params) + return "replication:${params.remoteCluster}:${params.remoteShardId} -> ${params.followerShardId}" + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationParams.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationParams.kt new file mode 100644 index 00000000..1612d75f --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationParams.kt @@ -0,0 +1,129 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.shard + +import org.elasticsearch.Version +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.Strings +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ObjectParser +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.common.xcontent.XContentParser +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.persistent.PersistentTaskParams +import java.io.IOException +import org.elasticsearch.index.Index + + +class ShardReplicationParams : PersistentTaskParams { + + var remoteCluster: String + var remoteShardId: ShardId + var followerShardId: ShardId + + constructor(remoteCluster: String, remoteShardId: ShardId, followerShardId: ShardId) { + this.remoteCluster = remoteCluster + this.remoteShardId = remoteShardId + this.followerShardId = followerShardId + } + + constructor(inp : StreamInput) : this(inp.readString(), ShardId(inp), ShardId(inp)) + + companion object { + const val NAME = ShardReplicationExecutor.TASK_NAME + + private val PARSER = ObjectParser(ShardReplicationExecutor.TASK_NAME, true) { Builder() } + init { + PARSER.declareString(Builder::remoteCluster, ParseField("remote_cluster")) + // ShardId is converted to String - parsing from the same format to construct the params + PARSER.declareString(Builder::remoteShardId, ParseField("remote_shard")) + PARSER.declareString(Builder::remoteIndexUUID, ParseField("remote_index_uuid")) + PARSER.declareString(Builder::followerShardId, ParseField("follower_shard")) + PARSER.declareString(Builder::followerIndexUUID, ParseField("follower_index_uuid")) + } + + @Throws(IOException::class) + fun fromXContent(parser: XContentParser): ShardReplicationParams { + return PARSER.parse(parser, null).build() + } + } + + override fun getWriteableName(): String { + return NAME + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + return builder.startObject() + .field("remote_cluster", remoteCluster) + .field("remote_shard", remoteShardId) + .field("remote_index_uuid", remoteShardId.index.uuid) // The XContent of ShardId doesn't serialize index uuid + .field("follower_shard", followerShardId) + .field("follower_index_uuid", followerShardId.index.uuid) + .endObject() + } + + override fun writeTo(out: StreamOutput) { + out.writeString(remoteCluster) + remoteShardId.writeTo(out) + followerShardId.writeTo(out) + } + + override fun getMinimalSupportedVersion(): Version { + return Version.V_7_1_0 + } + + override fun toString(): String { + return Strings.toString(this) + } + + class Builder { + lateinit var remoteCluster: String + lateinit var remoteShardId: String + lateinit var remoteIndexUUID: String + lateinit var followerShardId: String + lateinit var followerIndexUUID: String + + fun remoteCluster(remoteCluster: String) { + this.remoteCluster = remoteCluster + } + + fun remoteShardId(remoteShardId: String) { + this.remoteShardId = remoteShardId + } + + fun remoteIndexUUID(remoteIndexUUID: String) { + this.remoteIndexUUID = remoteIndexUUID + } + + fun followerShardId(followerShardId: String) { + this.followerShardId = followerShardId + } + + fun followerIndexUUID(followerIndexUUID: String) { + this.followerIndexUUID = followerIndexUUID + } + + fun build(): ShardReplicationParams { + val remoteShardIdObj = ShardId.fromString(remoteShardId) + val followerShardIdObj = ShardId.fromString(followerShardId) + return ShardReplicationParams(remoteCluster, ShardId(Index(remoteShardIdObj.indexName, remoteIndexUUID), + remoteShardIdObj.id), ShardId(Index(followerShardIdObj.indexName, followerIndexUUID), + followerShardIdObj.id)) + } + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationState.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationState.kt new file mode 100644 index 00000000..0c5163ff --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationState.kt @@ -0,0 +1,99 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.shard + +import com.amazon.elasticsearch.replication.task.ReplicationState +import org.elasticsearch.common.ParseField +import org.elasticsearch.common.io.stream.StreamInput +import org.elasticsearch.common.io.stream.StreamOutput +import org.elasticsearch.common.xcontent.ObjectParser +import org.elasticsearch.common.xcontent.ToXContent +import org.elasticsearch.common.xcontent.XContentBuilder +import org.elasticsearch.common.xcontent.XContentParser +import org.elasticsearch.persistent.PersistentTaskState +import java.io.IOException +import java.lang.IllegalArgumentException +import java.lang.IllegalStateException + +sealed class ShardReplicationState : PersistentTaskState { + + var state: ReplicationState + companion object { + const val NAME = ShardReplicationExecutor.TASK_NAME + fun reader(inp : StreamInput): ShardReplicationState { + val state = inp.readEnum(ReplicationState::class.java)!! + return when(state) { + ReplicationState.INIT -> throw IllegalStateException("INIT - Illegal state for shard replication task") + ReplicationState.RESTORING -> throw IllegalStateException("RESTORING - Illegal state for shard replication task") + ReplicationState.INIT_FOLLOW -> throw IllegalStateException("INIT_FOLLOW - Illegal state for shard replication task") + ReplicationState.FOLLOWING -> FollowingState + ReplicationState.COMPLETED -> CompletedState + else -> throw IllegalArgumentException("$state - Not a valid state for shard replication task") + } + } + + private val PARSER = ObjectParser(NAME, true) { Builder() } + init { + PARSER.declareString(Builder::state, ParseField("state")) + } + + @Throws(IOException::class) + fun fromXContent(parser: XContentParser): ShardReplicationState { + return PARSER.parse(parser, null).build() + } + } + + constructor(state: ReplicationState) { + this.state = state + } + + override fun writeTo(out: StreamOutput) { + out.writeEnum(state) + } + + override fun getWriteableName(): String { + return NAME + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + return builder.startObject() + .field("state", state) + .endObject() + } + + class Builder { + lateinit var state: String + + fun state(state: String) { + this.state = state + } + fun build(): ShardReplicationState { + return when (state) { + ReplicationState.INIT.name -> throw IllegalArgumentException("INIT - Illegal state for shard replication task") + ReplicationState.RESTORING.name -> throw IllegalArgumentException("RESTORING - Illegal state for shard replication task") + ReplicationState.INIT_FOLLOW.name -> throw IllegalArgumentException("INIT_FOLLOW - Illegal state for shard replication task") + ReplicationState.FOLLOWING.name -> FollowingState + ReplicationState.COMPLETED.name -> CompletedState + else -> throw IllegalArgumentException("$state - Not a valid state for shard replication task") + } + } + } +} + + +object FollowingState : ShardReplicationState(ReplicationState.FOLLOWING) +object CompletedState : ShardReplicationState(ReplicationState.COMPLETED) + diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationTask.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationTask.kt new file mode 100644 index 00000000..a012dc77 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/ShardReplicationTask.kt @@ -0,0 +1,175 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.shard + +import com.amazon.elasticsearch.replication.ReplicationPlugin.Companion.REPLICATION_CHANGE_BATCH_SIZE +import com.amazon.elasticsearch.replication.action.changes.GetChangesAction +import com.amazon.elasticsearch.replication.action.changes.GetChangesRequest +import com.amazon.elasticsearch.replication.action.changes.GetChangesResponse +import com.amazon.elasticsearch.replication.metadata.getReplicationStateParamsForIndex +import com.amazon.elasticsearch.replication.seqno.RemoteClusterRetentionLeaseHelper +import com.amazon.elasticsearch.replication.task.CrossClusterReplicationTask +import com.amazon.elasticsearch.replication.task.ReplicationState +import com.amazon.elasticsearch.replication.util.indicesService +import com.amazon.elasticsearch.replication.util.suspendExecuteWithRetries +import com.amazon.elasticsearch.replication.util.suspending +import kotlinx.coroutines.ObsoleteCoroutinesApi +import kotlinx.coroutines.cancel +import kotlinx.coroutines.isActive +import kotlinx.coroutines.sync.Semaphore +import org.elasticsearch.ElasticsearchTimeoutException +import org.elasticsearch.action.NoSuchNodeException +import org.elasticsearch.action.support.IndicesOptions +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.ClusterChangedEvent +import org.elasticsearch.cluster.ClusterStateListener +import org.elasticsearch.cluster.node.DiscoveryNode +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.logging.Loggers +import org.elasticsearch.index.seqno.RetentionLeaseActions +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.shard.ShardNotFoundException +import org.elasticsearch.persistent.PersistentTaskState +import org.elasticsearch.persistent.PersistentTasksNodeService +import org.elasticsearch.tasks.TaskId +import org.elasticsearch.threadpool.ThreadPool + +class ShardReplicationTask(id: Long, type: String, action: String, description: String, parentTask: TaskId, + params: ShardReplicationParams, executor: String, clusterService: ClusterService, + threadPool: ThreadPool, client: Client) + : CrossClusterReplicationTask(id, type, action, description, parentTask, emptyMap(), + executor, clusterService, threadPool, client) { + + override val remoteCluster: String = params.remoteCluster + override val followerIndexName: String = params.followerShardId.indexName + private val remoteShardId = params.remoteShardId + private val followerShardId = params.followerShardId + private val remoteClient = client.getRemoteClusterClient(remoteCluster) + private val retentionLeaseHelper = RemoteClusterRetentionLeaseHelper(clusterService.clusterName.value(), remoteClient) + + private val clusterStateListenerForTaskInterruption = ClusterStateListenerForTaskInterruption() + + @Volatile private var batchSize = clusterService.clusterSettings.get(REPLICATION_CHANGE_BATCH_SIZE) + init { + clusterService.clusterSettings.addSettingsUpdateConsumer(REPLICATION_CHANGE_BATCH_SIZE) { batchSize = it } + } + + override val log = Loggers.getLogger(javaClass, followerShardId)!! + + companion object { + fun taskIdForShard(shardId: ShardId) = "replication:${shardId}" + const val CONCURRENT_REQUEST_RATE_LIMIT = 10 + } + + @ObsoleteCoroutinesApi + override suspend fun execute(initialState: PersistentTaskState?) { + replicate() + } + + override suspend fun cleanup() { + retentionLeaseHelper.removeRetentionLease(remoteShardId, followerShardId) + /* This is to minimise overhead of calling an additional listener as + * it continues to be called even after the task is completed. + */ + clusterService.removeListener(clusterStateListenerForTaskInterruption) + } + + private fun addListenerToInterruptTask() { + clusterService.addListener(clusterStateListenerForTaskInterruption) + } + + inner class ClusterStateListenerForTaskInterruption : ClusterStateListener { + override fun clusterChanged(event: ClusterChangedEvent) { + log.debug("Cluster metadata listener invoked on shard task...") + if (event.metadataChanged()) { + val replicationStateParams = getReplicationStateParamsForIndex(clusterService, followerShardId.indexName) + if (replicationStateParams == null) { + if (PersistentTasksNodeService.Status(State.STARTED) == status) + scope.cancel("Shard replication task received an interrupt.") + } + } + } + } + + override fun indicesOrShards() = listOf(followerShardId) + + @ObsoleteCoroutinesApi + private suspend fun replicate() { + updateTaskState(FollowingState) + // TODO: Acquire retention lease prior to initiating remote recovery + retentionLeaseHelper.addRetentionLease(remoteShardId, RetentionLeaseActions.RETAIN_ALL, followerShardId) + val followerIndexService = indicesService.indexServiceSafe(followerShardId.index) + val indexShard = followerIndexService.getShard(followerShardId.id) + // After restore, persisted localcheckpoint is matched with maxSeqNo. + // Fetch the operations after localCheckpoint from the leader + var seqNo = indexShard.localCheckpoint + 1 + val node = primaryShardNode() + addListenerToInterruptTask() + + // Not really used yet as we only have one get changes action at a time. + val rateLimiter = Semaphore(CONCURRENT_REQUEST_RATE_LIMIT) + val sequencer = TranslogSequencer(scope, followerShardId, remoteCluster, remoteShardId.indexName, + TaskId(clusterService.nodeName, id), client, rateLimiter, seqNo - 1) + + // TODO: Redesign this to avoid sharing the rateLimiter between this block and the sequencer. + // This was done as a stopgap to work around a concurrency bug that needed to be fixed fast. + while (scope.isActive) { + rateLimiter.acquire() + try { + val changesResponse = getChanges(node, seqNo) + log.info("Got ${changesResponse.changes.size} changes starting from seqNo: $seqNo") + sequencer.send(changesResponse) + seqNo = changesResponse.changes.lastOrNull()?.seqNo()?.inc() ?: seqNo + } catch (e: ElasticsearchTimeoutException) { + log.info("Timed out waiting for new changes. Current seqNo: $seqNo") + rateLimiter.release() + continue + } + retentionLeaseHelper.renewRetentionLease(remoteShardId, seqNo, followerShardId) + } + sequencer.close() + } + + private suspend fun primaryShardNode(): DiscoveryNode { + val clusterStateRequest = remoteClient.admin().cluster().prepareState() + .clear() + .setIndices(remoteShardId.indexName) + .setRoutingTable(true) + .setNodes(true) + .setIndicesOptions(IndicesOptions.strictSingleIndexNoExpandForbidClosed()) + .request() + val remoteState = suspending(remoteClient.admin().cluster()::state)(clusterStateRequest).state + val shardRouting = remoteState.routingNodes.activePrimary(remoteShardId) + ?: throw ShardNotFoundException(remoteShardId, "cluster: $remoteCluster") + return remoteState.nodes().get(shardRouting.currentNodeId()) + ?: throw NoSuchNodeException("remote: $remoteCluster:${shardRouting.currentNodeId()}") + } + + private suspend fun getChanges(remoteNode: DiscoveryNode, fromSeqNo: Long): GetChangesResponse { + val remoteClient = client.getRemoteClusterClient(remoteCluster) + val request = GetChangesRequest(remoteNode, remoteShardId, fromSeqNo, fromSeqNo + batchSize) + return remoteClient.suspendExecuteWithRetries(action = GetChangesAction.INSTANCE, req = request, log = log) + } + + override fun toString(): String { + return "ShardReplicationTask(from=${remoteCluster}$remoteShardId to=$followerShardId)" + } + + override fun replicationTaskResponse(): CrossClusterReplicationTaskResponse { + // Cancellation and valid executions are marked as completed + return CrossClusterReplicationTaskResponse(ReplicationState.COMPLETED.name) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/TranslogSequencer.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/TranslogSequencer.kt new file mode 100644 index 00000000..fb52cb0f --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/task/shard/TranslogSequencer.kt @@ -0,0 +1,96 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.shard + +import com.amazon.elasticsearch.replication.ReplicationException +import com.amazon.elasticsearch.replication.action.changes.GetChangesResponse +import com.amazon.elasticsearch.replication.action.replay.ReplayChangesAction +import com.amazon.elasticsearch.replication.action.replay.ReplayChangesRequest +import com.amazon.elasticsearch.replication.util.suspendExecute +import kotlinx.coroutines.CompletableDeferred +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.ObsoleteCoroutinesApi +import kotlinx.coroutines.channels.Channel +import kotlinx.coroutines.channels.actor +import kotlinx.coroutines.sync.Semaphore +import org.elasticsearch.client.Client +import org.elasticsearch.common.logging.Loggers +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.translog.Translog +import org.elasticsearch.tasks.TaskId +import java.util.concurrent.ConcurrentHashMap + +/** + * A TranslogSequencer allows multiple producers of [Translog.Operation]s to write them in sequence number order to an + * index. It internally uses an [actor] to serialize writes to the index. Producer can call the [send] method + * to add a batch of operations to the queue. If the queue is full the producer will be suspended. Operations can be + * sent out of order i.e. the operation with sequence number 2 can be sent before the operation with sequence number 1. + * In this case the Sequencer will internally buffer the operations that cannot be delivered until the missing in-order + * operations arrive. + * + * This uses the ObsoleteCoroutinesApi actor API. As described in the [actor] docs there is no current replacement for + * this API and a new one is being worked on to which we can migrate when needed. + */ +@ObsoleteCoroutinesApi +class TranslogSequencer(scope: CoroutineScope, private val followerShardId: ShardId, + private val remoteCluster: String, private val remoteIndexName: String, + private val parentTaskId: TaskId, private val client: Client, + private val rateLimiter: Semaphore, initialSeqNo: Long) { + + private val unAppliedChanges = ConcurrentHashMap() + private val log = Loggers.getLogger(javaClass, followerShardId)!! + private val completed = CompletableDeferred() + + // Channel is unlimited capacity as changes can arrive out of order but must be applied in-order. If the channel + // had limited capacity it could deadlock. Instead we use a separate rate limiter Semaphore whose permits are + // always acquired in order of sequence number to avoid deadlock. + private val sequencer = scope.actor(capacity = Channel.UNLIMITED) { + // Exceptions thrown here will mark the channel as failed and the next attempt to send to the channel will + // raise the same exception. See [SendChannel.close] method for details. + var highWatermark = initialSeqNo + for (m in channel) { + while (unAppliedChanges.containsKey(highWatermark + 1)) { + try { + val next = unAppliedChanges.remove(highWatermark + 1)!! + val replayRequest = ReplayChangesRequest(followerShardId, next.changes, next.maxSeqNoOfUpdatesOrDeletes, + remoteCluster, remoteIndexName) + replayRequest.parentTask = parentTaskId + val replayResponse = client.suspendExecute(ReplayChangesAction.INSTANCE, replayRequest) + if (replayResponse.shardInfo.failed > 0) { + replayResponse.shardInfo.failures.forEachIndexed { i, failure -> + log.error("Failed replaying changes. Failure:$i:$failure") + } + throw ReplicationException("failed to replay changes", replayResponse.shardInfo.failures) + } + highWatermark = next.changes.lastOrNull()?.seqNo() ?: highWatermark + } finally { + rateLimiter.release() + } + } + } + completed.complete(Unit) + } + + suspend fun close() { + sequencer.close() + completed.await() + } + + suspend fun send(changes : GetChangesResponse) { + unAppliedChanges[changes.fromSeqNo] = changes + sequencer.send(Unit) + } +} diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/util/Coroutines.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/util/Coroutines.kt new file mode 100644 index 00000000..1279ef05 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/util/Coroutines.kt @@ -0,0 +1,198 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.util + +import kotlinx.coroutines.* +import org.elasticsearch.ElasticsearchTimeoutException +import org.elasticsearch.ExceptionsHelper +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.ActionRequest +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.action.ActionType +import org.elasticsearch.action.support.master.AcknowledgedRequest +import org.elasticsearch.action.support.master.MasterNodeRequest +import org.elasticsearch.client.Client +import org.elasticsearch.client.ElasticsearchClient +import org.elasticsearch.cluster.* +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.common.Priority +import org.elasticsearch.common.unit.TimeValue +import org.elasticsearch.common.util.concurrent.ThreadContext +import org.elasticsearch.index.shard.GlobalCheckpointListeners +import org.elasticsearch.index.shard.IndexShard +import org.elasticsearch.persistent.PersistentTaskParams +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask +import org.elasticsearch.persistent.PersistentTasksService +import org.elasticsearch.threadpool.ThreadPool +import java.util.concurrent.TimeoutException +import kotlin.coroutines.* + +/** + * Converts methods that take an ActionListener callback into a suspending function. Any method in [Client] that takes + * an [ActionListener] callback can be translated to a suspending function as follows: + * + * client.get(GetRequest("index", "id"), object: ActionListener { + * override fun onResponse(resp: GetResponse) { ... } + * override fun onFailure(e: Exception) { ... } + * } + * ) + + * // becomes the much more readable (and chain-able)... + * val response = suspending(client::get)(GetRequest("index", "id")) // throws exception on failure that can be caught + * + * @param fn - a block of code that is passed an [ActionListener] that should be passed to the ES client API. + */ +fun suspending(fn: (Req, ActionListener) -> Unit): suspend (Req) -> Resp { + return { req: Req -> suspendCancellableCoroutine { cont -> fn(req, CoroutineActionListener(cont)) } } +} + +suspend fun + ElasticsearchClient.suspendExecute(action: ActionType, req: Req) : Resp { + return suspendCancellableCoroutine { cont -> execute(action, req, CoroutineActionListener(cont)) } +} + +suspend fun IndexShard.waitForGlobalCheckpoint(waitingForGlobalCheckpoint: Long, timeout: TimeValue?) : Long { + return suspendCancellableCoroutine { cont -> + val listener = object : GlobalCheckpointListeners.GlobalCheckpointListener { + + // The calling coroutine context should be configured with an explicit executor so this choice shouldn't matter + override fun executor() = Dispatchers.Default.asExecutor() + + override fun accept(gcp: Long, e: Exception?) { + when { + e is TimeoutException -> cont.resumeWithException(ElasticsearchTimeoutException(e.message)) + e != null -> cont.resumeWithException(e) + else -> cont.resume(gcp) + } + } + + } + addGlobalCheckpointListener(waitingForGlobalCheckpoint, listener, timeout) + } +} + +suspend fun ClusterStateObserver.waitForNextChange(reason: String, predicate: (ClusterState) -> Boolean = { true }) { + return suspendCancellableCoroutine { cont -> + waitForNextChange(object : ClusterStateObserver.Listener { + override fun onClusterServiceClose() { + cont.cancel() + } + + override fun onNewClusterState(state: ClusterState?) { + cont.resume(Unit) + } + + override fun onTimeout(timeout: TimeValue?) { + cont.resumeWithException(ElasticsearchTimeoutException("timed out waiting for $reason")) + } + }, predicate) + } +} + +suspend fun ClusterService.waitForClusterStateUpdate(source: String, + updateTaskFactory: (ActionListener) -> + AckedClusterStateUpdateTask) : T = + suspendCoroutine { cont -> submitStateUpdateTask(source, updateTaskFactory(CoroutineActionListener(cont))) } + +suspend fun + PersistentTasksService.startTask(taskId: String, taskName: String, params : T): PersistentTask { + return suspendCoroutine { cont -> this.sendStartRequest(taskId, taskName, params, CoroutineActionListener(cont)) } +} + +suspend fun PersistentTasksService.removeTask(taskId: String): PersistentTask<*> { + return suspendCoroutine { cont -> this.sendRemoveRequest(taskId, CoroutineActionListener(cont)) } +} + +suspend fun PersistentTasksService.waitForTaskCondition(taskId: String, timeout: TimeValue, + condition: (PersistentTask<*>) -> Boolean) : PersistentTask<*> { + return suspendCancellableCoroutine { cont -> + val listener = object : PersistentTasksService.WaitForPersistentTaskListener { + override fun onResponse(response: PersistentTask) = cont.resume(response) + override fun onFailure(e: Exception) = cont.resumeWithException(e) + } + waitForPersistentTaskCondition(taskId, { p -> condition(p) }, timeout, listener) + } +} + +class CoroutineActionListener(private val continuation: Continuation) : ActionListener { + override fun onResponse(result: T) = continuation.resume(result) + override fun onFailure(e: Exception) = continuation.resumeWithException(ExceptionsHelper.unwrapCause(e)) +} + +/** + * Extension function variant of [ActionListener.completeWith] + */ +inline fun ActionListener.completeWith(block : () -> T) { + try { + onResponse(block()) + } catch (e: Exception) { + onFailure(e) + } +} + +/** + * Stores and restores the Elasticsearch [ThreadContext] when the coroutine is suspended and resumed. + * + * The implementation is a little confusing because Elasticsearch and Kotlin uses [ThreadContext.stashContext] to + * restore the default context. + * + * @param threadContext - a [ThreadContext] instance + */ +class ElasticThreadContextElement(private val threadContext: ThreadContext) : ThreadContextElement { + + companion object Key : CoroutineContext.Key + private var context: ThreadContext.StoredContext = threadContext.newStoredContext(true) + + override val key: CoroutineContext.Key<*> + get() = Key + + override fun restoreThreadContext(context: CoroutineContext, oldState: Unit) { + this.context = threadContext.stashContext() + } + + override fun updateThreadContext(context: CoroutineContext) = this.context.close() +} + +fun ThreadPool.coroutineContext() : CoroutineContext = ElasticThreadContextElement(threadContext) + +/** + * Captures the current Elastic [ThreadContext] in the coroutine context as well as sets the given executor as the dispatcher + */ +fun ThreadPool.coroutineContext(executorName: String) : CoroutineContext = + executor(executorName).asCoroutineDispatcher() + coroutineContext() + +suspend fun > submitClusterStateUpdateTask(request: AcknowledgedRequest, + taskExecutor: ClusterStateTaskExecutor>, + clusterService: ClusterService, + source: String): ClusterState { + return suspendCoroutine { continuation -> + clusterService.submitStateUpdateTask( + source, + request, + ClusterStateTaskConfig.build(Priority.NORMAL), + taskExecutor, + object : ClusterStateTaskListener { + override fun onFailure(source: String, e: java.lang.Exception) { + continuation.resumeWithException(e) + } + + override fun clusterStateProcessed(source: String?, oldState: ClusterState?, newState: ClusterState) { + continuation.resume(newState) + } + }) + } + +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/util/Extensions.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/util/Extensions.kt new file mode 100644 index 00000000..cd12fab3 --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/util/Extensions.kt @@ -0,0 +1,103 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.util + +import kotlinx.coroutines.delay +import org.apache.logging.log4j.Logger +import org.elasticsearch.ElasticsearchException +import org.elasticsearch.action.ActionRequest +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.action.ActionType +import org.elasticsearch.client.Client +import org.elasticsearch.cluster.service.ClusterService +import org.elasticsearch.index.store.Store +import org.elasticsearch.transport.ConnectTransportException +import org.elasticsearch.transport.NodeDisconnectedException + +/* + * Extension function to use the store object + */ +fun Store.performOp(tryBlock: () -> Unit, finalBlock: () -> Unit = {}) { + incRef() + try { + tryBlock() + } + finally { + finalBlock() + decRef() + } +} + +fun Client.executeUnderSecurityContext(clusterService: ClusterService, + remoteClusterName: String, + followerIndexName: String, + block: () -> T) { + val userString = SecurityContext.fromClusterState(clusterService.state(), + remoteClusterName, followerIndexName) + this.threadPool().threadContext.newStoredContext(true).use { + SecurityContext.toThreadContext(this.threadPool().threadContext, userString) + block() + } +} + + +/** + * Retries a given block of code. + * Only specified error are retried + * + * @param numberOfRetries - Number of retries + * @param backoff - Retry interval + * @param maxTimeOut - Time out for retries + * @param factor - ExponentialBackoff factor + * @param log - logger used to log intermediate failures + * @param retryOn - javaClass name of Elasticsearch exceptions that should be retried along with default retryable exceptions + * @param block - the block of code to retry. This should be a suspend function. + */ +suspend fun Client.suspendExecuteWithRetries( + action: ActionType, + req: Req, + numberOfRetries: Int = 5, + backoff: Long = 10000, // 10 seconds + maxTimeOut: Long = 600000, // 10 minutes + factor: Double = 2.0, + log: Logger, + retryOn: ArrayList> = ArrayList()) : Resp { + var currentBackoff = backoff + retryOn.addAll(defaultRetryableExceptions()) + repeat(numberOfRetries - 1) { + try { + return suspendExecute(action, req) + } catch (e: ElasticsearchException) { + if (retryOn.contains(e.javaClass)) { + log.warn("Encountered a failure. Retrying in ${currentBackoff/1000} seconds.", e) + delay(currentBackoff) + currentBackoff = (currentBackoff * factor).toLong().coerceAtMost(maxTimeOut) + } else { + throw e + } + } + } + return suspendExecute(action, req) // last attempt +} + +private fun defaultRetryableExceptions(): ArrayList> { + val retryableExceptions = ArrayList>() + retryableExceptions.add(NodeDisconnectedException::class.java) + retryableExceptions.add(ConnectTransportException::class.java) + return retryableExceptions +} + + diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/util/Injectables.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/util/Injectables.kt new file mode 100644 index 00000000..2dfd5c0b --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/util/Injectables.kt @@ -0,0 +1,47 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.util + +import org.elasticsearch.common.component.AbstractLifecycleComponent +import org.elasticsearch.common.inject.Inject +import org.elasticsearch.indices.IndicesService +import org.elasticsearch.persistent.PersistentTasksService + +lateinit var indicesService: IndicesService +lateinit var persistentTasksService: PersistentTasksService + +/** + * Provides access to services and components that are not directly available via the [Plugin] interface. This class + * simply get the required instances via the injector and saves them to static variables for access elsewhere. + */ +class Injectables @Inject constructor(indicesSvc: IndicesService, + persistentTasksSvc: PersistentTasksService) + : AbstractLifecycleComponent() { + + init { + indicesService = indicesSvc + persistentTasksService = persistentTasksSvc + } + + override fun doStart() { + } + + override fun doStop() { + } + + override fun doClose() { + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/amazon/elasticsearch/replication/util/SecurityContext.kt b/src/main/kotlin/com/amazon/elasticsearch/replication/util/SecurityContext.kt new file mode 100644 index 00000000..3df6ae1e --- /dev/null +++ b/src/main/kotlin/com/amazon/elasticsearch/replication/util/SecurityContext.kt @@ -0,0 +1,65 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.util + +import com.amazon.elasticsearch.replication.metadata.ReplicationMetadata +import org.apache.logging.log4j.LogManager +import org.elasticsearch.cluster.ClusterState +import org.elasticsearch.common.util.concurrent.ThreadContext + +interface SecurityContext { + companion object { + const val INJECTED_USER = "injected_user" + const val OPENDISTRO_USER_INFO = "_opendistro_security_user_info" + const val OPENDISTRO_USER_INFO_DELIMITOR = "|" + + private val log = LogManager.getLogger(SecurityContext::class.java) + + fun fromSecurityThreadContext(threadContext: ThreadContext): String? { + // Directly return injected_user from the thread context if the user info is not set. + val userInfo = threadContext.getTransient(OPENDISTRO_USER_INFO) ?: return threadContext.getTransient(INJECTED_USER) + val usersAndRoles = userInfo.split(OPENDISTRO_USER_INFO_DELIMITOR) + var userName: String + var userBackendRoles = "" + if(usersAndRoles.isEmpty()) { + log.warn("Failed to parse security user info - $userInfo") + return null + } + userName = usersAndRoles[0] + if(usersAndRoles.size >= 2) { + userBackendRoles = usersAndRoles[1] + } + return "${userName}${OPENDISTRO_USER_INFO_DELIMITOR}${userBackendRoles}" + } + + fun fromClusterState(clusterState: ClusterState, remoteCluster: String, followerIndex: String): String? { + val replicationMetadata = clusterState.metadata.custom(ReplicationMetadata.NAME) + return replicationMetadata?.securityContexts?.get(remoteCluster)?.get(followerIndex) + } + + fun toThreadContext(threadContext: ThreadContext, injectedUser: String?) { + if(injectedUser != null) { + val userInfo = threadContext.getTransient(INJECTED_USER) + if (userInfo != null) { + log.warn("Injected user not empty in thread context $userInfo") + } + else { + threadContext.putTransient(INJECTED_USER, injectedUser) + } + } + } + } +} diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/BasicReplicationIT.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/BasicReplicationIT.kt new file mode 100644 index 00000000..556113c1 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/BasicReplicationIT.kt @@ -0,0 +1,85 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import com.amazon.elasticsearch.replication.MultiClusterAnnotations.ClusterConfiguration +import com.amazon.elasticsearch.replication.MultiClusterAnnotations.ClusterConfigurations +import org.assertj.core.api.Assertions.assertThat +import org.elasticsearch.ElasticsearchStatusException +import org.elasticsearch.action.DocWriteResponse.Result +import org.elasticsearch.action.get.GetRequest +import org.elasticsearch.action.index.IndexRequest +import org.elasticsearch.client.RequestOptions +import org.elasticsearch.client.indices.CreateIndexRequest +import org.elasticsearch.test.ESTestCase.assertBusy +import org.junit.Assert +import java.util.Locale + +const val LEADER = "leaderCluster" +const val FOLL = "followCluster" + +@ClusterConfigurations( + ClusterConfiguration(clusterName = LEADER), + ClusterConfiguration(clusterName = FOLL) +) +class BasicReplicationIT : MultiClusterRestTestCase() { + + fun `test empty index replication`() { + val follower = getClientForCluster(FOLL) + val leader = getClientForCluster(LEADER) + createConnectionBetweenClusters(FOLL, LEADER) + + val leaderIndex = randomAlphaOfLength(10).toLowerCase(Locale.ROOT) + val followerIndex = randomAlphaOfLength(10).toLowerCase(Locale.ROOT) + // Create an empty index on the leader and trigger replication on it + val createIndexResponse = leader.indices().create(CreateIndexRequest(leaderIndex), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + follower.startReplication(StartReplicationRequest("source", leaderIndex, followerIndex), waitForRestore=true) + + val source = mapOf("name" to randomAlphaOfLength(20), "age" to randomInt().toString()) + val response = leader.index(IndexRequest(leaderIndex).id("1").source(source), RequestOptions.DEFAULT) + assertThat(response.result).isEqualTo(Result.CREATED) + + assertBusy { + val getResponse = follower.get(GetRequest(followerIndex, "1"), RequestOptions.DEFAULT) + assertThat(getResponse.isExists).isTrue() + assertThat(getResponse.sourceAsMap).isEqualTo(source) + } + follower.stopReplication(followerIndex) + } + + fun `test existing index replication`() { + val follower = getClientForCluster(FOLL) + val leader = getClientForCluster(LEADER) + createConnectionBetweenClusters(FOLL, LEADER) + + // Create an index with data before commencing replication + val leaderIndex = randomAlphaOfLength(10).toLowerCase(Locale.ROOT) + val followerIndex = randomAlphaOfLength(10).toLowerCase(Locale.ROOT) + val source = mapOf("name" to randomAlphaOfLength(20), "age" to randomInt().toString()) + val response = leader.index(IndexRequest(leaderIndex).id("1").source(source), RequestOptions.DEFAULT) + assertThat(response.result).withFailMessage("Failed to create leader data").isEqualTo(Result.CREATED) + + follower.startReplication(StartReplicationRequest("source", leaderIndex, followerIndex), waitForRestore=true) + + assertBusy { + val getResponse = follower.get(GetRequest(followerIndex, "1"), RequestOptions.DEFAULT) + assertThat(getResponse.isExists).isTrue() + assertThat(getResponse.sourceAsMap).isEqualTo(source) + } + follower.stopReplication(followerIndex) + } +} diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterAnnotations.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterAnnotations.kt new file mode 100644 index 00000000..c592cb6e --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterAnnotations.kt @@ -0,0 +1,51 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import java.util.ArrayList + +/** + * This class defines annotations to help configure multi-cluster + * settings for integration tests + */ +object MultiClusterAnnotations { + @JvmStatic + fun getAnnotationsFromClass(declaringClass: Class<*>, + annotationClass: Class): List { + val annotationList: MutableList = ArrayList() + for (annotation in declaringClass.getAnnotationsByType(annotationClass)) { + annotationList.add(annotationClass.cast(annotation)) + } + return annotationList + } + + @Retention(AnnotationRetention.RUNTIME) + @Target(AnnotationTarget.ANNOTATION_CLASS, AnnotationTarget.CLASS) + @Repeatable + annotation class ClusterConfiguration( /* The name of cluster which is being configured in this configuration + */ + val clusterName: String, /* This setting controls whether indices created by one tests are to be + * preserved in other tests in a single test class.*/ + val preserveIndices: Boolean = false, /* This setting controls whether snapshots created by one tests are to be + * preserved in other tests in a single test class.*/ + val preserveSnapshots: Boolean = false, /* This setting controls whether cluster settings setup by one tests are to be + * preserved in other tests in a single test class.*/ + val preserveClusterSettings: Boolean = false) + + @Retention(AnnotationRetention.RUNTIME) + @Target(AnnotationTarget.ANNOTATION_CLASS, AnnotationTarget.CLASS) + annotation class ClusterConfigurations(vararg val value: ClusterConfiguration) +} \ No newline at end of file diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterRestTestCase.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterRestTestCase.kt new file mode 100644 index 00000000..93638c64 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterRestTestCase.kt @@ -0,0 +1,337 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import com.amazon.elasticsearch.replication.MultiClusterAnnotations.ClusterConfiguration +import com.amazon.elasticsearch.replication.MultiClusterAnnotations.ClusterConfigurations +import com.amazon.elasticsearch.replication.MultiClusterAnnotations.getAnnotationsFromClass +import com.amazon.elasticsearch.replication.task.index.IndexReplicationExecutor +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationExecutor +import org.apache.http.Header +import org.apache.http.HttpHost +import org.apache.http.HttpStatus +import org.apache.http.client.config.RequestConfig +import org.apache.http.entity.ContentType +import org.apache.http.impl.nio.client.HttpAsyncClientBuilder +import org.apache.http.message.BasicHeader +import org.apache.http.nio.conn.ssl.SSLIOSessionStrategy +import org.apache.http.nio.entity.NStringEntity +import org.apache.http.ssl.SSLContexts +import org.apache.lucene.util.SetOnce +import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksRequest +import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksRequestBuilder +import org.elasticsearch.bootstrap.BootstrapInfo +import org.elasticsearch.client.* +import org.elasticsearch.common.Strings +import org.elasticsearch.common.io.PathUtils +import org.elasticsearch.common.settings.Settings +import org.elasticsearch.common.unit.TimeValue +import org.elasticsearch.common.util.concurrent.ThreadContext +import org.elasticsearch.common.xcontent.XContentHelper +import org.elasticsearch.common.xcontent.XContentType +import org.elasticsearch.common.xcontent.json.JsonXContent +import org.elasticsearch.snapshots.SnapshotState +import org.elasticsearch.tasks.TaskInfo +import org.elasticsearch.test.ESTestCase +import org.elasticsearch.test.ESTestCase.assertBusy +import org.elasticsearch.test.rest.ESRestTestCase +import org.hamcrest.Matchers +import org.junit.After +import org.junit.AfterClass +import org.junit.BeforeClass +import java.nio.file.Files +import java.security.KeyManagementException +import java.security.KeyStore +import java.security.KeyStoreException +import java.security.NoSuchAlgorithmException +import java.security.cert.CertificateException +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicReference + +/** + * This class provides basic support of managing life-cyle of + * multiple clusters defined as part of ES build. + */ +abstract class MultiClusterRestTestCase : ESTestCase() { + + class TestCluster(clusterName: String, val httpHosts: List, val transportPorts: List, + val preserveSnapshots: Boolean, val preserveIndices: Boolean, + val preserveClusterSettings: Boolean) { + val restClient : RestHighLevelClient + init { + val builder = RestClient.builder(*httpHosts.toTypedArray()) + configureClient(builder, getClusterSettings(clusterName)) + builder.setStrictDeprecationMode(true) + restClient = RestHighLevelClient(builder) + } + val lowLevelClient = restClient.lowLevelClient!! + } + + companion object { + protected lateinit var testClusters : Map + + private fun createTestCluster(configuration: ClusterConfiguration) : TestCluster { + val cluster = configuration.clusterName + val systemProperties = BootstrapInfo.getSystemProperties() + val httpHostsProp = systemProperties.get("tests.cluster.${cluster}.http_hosts") as String? + val transportHostsProp = systemProperties.get("tests.cluster.${cluster}.transport_hosts") as String? + + requireNotNull(httpHostsProp) { "Missing http hosts property for cluster: $cluster."} + requireNotNull(transportHostsProp) { "Missing transport hosts property for cluster: $cluster."} + + val httpHosts = httpHostsProp.split(',').map { HttpHost.create("http://$it") } + val transportPorts = transportHostsProp.split(',') + return TestCluster(cluster, httpHosts, transportPorts, configuration.preserveSnapshots, + configuration.preserveIndices, configuration.preserveClusterSettings) + } + + private fun getClusterConfigurations(): List { + val repeatedAnnotation = (getAnnotationsFromClass(getTestClass(),ClusterConfiguration::class.java)) + if (repeatedAnnotation.isNotEmpty()) { + return repeatedAnnotation + } + + // Kotlin classes don't support repeatable annotations yet + val groupedAnnotation = getTestClass().getAnnotationsByType(ClusterConfigurations::class.java) + return if (groupedAnnotation.isNotEmpty()) { + groupedAnnotation[0].value.toList() + } else { + emptyList() + } + } + + @BeforeClass @JvmStatic + fun setupTestClustersForSuite() { + testClusters = getClusterConfigurations().associate { it.clusterName to createTestCluster(it) } + } + + @AfterClass @JvmStatic + fun cleanUpRestClients() { + testClusters.values.forEach { + it.restClient.close() + } + } + + protected fun getClusterSettings(clusterName: String): Settings { + /* The default implementation is to return default settings from [ESRestTestCase]. + * This method can be overridden in base classes to allow different settings + * for specific cluster. */ + val builder = Settings.builder() + if (System.getProperty("tests.rest.client_path_prefix") != null) { + builder.put(ESRestTestCase.CLIENT_PATH_PREFIX, System.getProperty("tests.rest.client_path_prefix")) + } + return builder.build() + } + + /* Copied this method from [ESRestCase] */ + protected fun configureClient(builder: RestClientBuilder, settings: Settings) { + val keystorePath = settings[ESRestTestCase.TRUSTSTORE_PATH] + if (keystorePath != null) { + val keystorePass = settings[ESRestTestCase.TRUSTSTORE_PASSWORD] + ?: throw IllegalStateException(ESRestTestCase.TRUSTSTORE_PATH + + " is provided but not " + ESRestTestCase.TRUSTSTORE_PASSWORD) + val path = PathUtils.get(keystorePath) + check( + Files.exists(path)) { ESRestTestCase.TRUSTSTORE_PATH + " is set but points to a non-existing file" } + try { + val keyStoreType = if (keystorePath.endsWith(".p12")) "PKCS12" else "jks" + val keyStore = KeyStore.getInstance(keyStoreType) + Files.newInputStream(path).use { `is` -> keyStore.load(`is`, keystorePass.toCharArray()) } + val sslcontext = SSLContexts.custom().loadTrustMaterial(keyStore, null).build() + val sessionStrategy = SSLIOSessionStrategy(sslcontext) + builder.setHttpClientConfigCallback { httpClientBuilder: HttpAsyncClientBuilder -> + httpClientBuilder.setSSLStrategy(sessionStrategy) + } + } catch (e: KeyStoreException) { + throw RuntimeException("Error setting up ssl", e) + } catch (e: NoSuchAlgorithmException) { + throw RuntimeException("Error setting up ssl", e) + } catch (e: KeyManagementException) { + throw RuntimeException("Error setting up ssl", e) + } catch (e: CertificateException) { + throw RuntimeException("Error setting up ssl", e) + } + } + val headers = ThreadContext.buildDefaultHeaders(settings) + val defaultHeaders = arrayOfNulls
(headers.size) + var i = 0 + for ((key, value) in headers) { + defaultHeaders[i++] = BasicHeader(key, value) + } + builder.setDefaultHeaders(defaultHeaders) + val socketTimeoutString = settings[ESRestTestCase.CLIENT_SOCKET_TIMEOUT] + val socketTimeout = TimeValue.parseTimeValue(socketTimeoutString ?: "60s", + ESRestTestCase.CLIENT_SOCKET_TIMEOUT) + builder.setRequestConfigCallback { conf: RequestConfig.Builder -> + conf.setSocketTimeout( + Math.toIntExact(socketTimeout.millis)) + } + if (settings.hasValue(ESRestTestCase.CLIENT_PATH_PREFIX)) { + builder.setPathPrefix(settings[ESRestTestCase.CLIENT_PATH_PREFIX]) + } + } + } + + @After + fun wipeClusters() { + testClusters.values.forEach { wipeCluster(it) } + } + + private fun wipeCluster(testCluster: TestCluster) { + if (!testCluster.preserveSnapshots) waitForSnapshotWiping(testCluster) + if (!testCluster.preserveIndices) wipeIndicesFromCluster(testCluster) + if (!testCluster.preserveClusterSettings) wipeClusterSettings(testCluster) + } + + private fun waitForSnapshotWiping(testCluster: TestCluster) { + val inProgressSnapshots = SetOnce>>>() + val snapshots = AtomicReference>>>() + try { + // Repeatedly delete the snapshots until there aren't any + assertBusy({ + snapshots.set(_wipeSnapshots(testCluster)) + assertThat(snapshots.get(), Matchers.anEmptyMap()) + }, 2, TimeUnit.MINUTES) + // At this point there should be no snaphots + inProgressSnapshots.set(snapshots.get()) + } catch (e: AssertionError) { + // This will cause an error at the end of this method, but do the rest of the cleanup first + inProgressSnapshots.set(snapshots.get()) + } catch (e: Exception) { + inProgressSnapshots.set(snapshots.get()) + } + } + + protected fun wipeClusterSettings(testCluster: TestCluster) { + val getResponse: Map = ESRestTestCase.entityAsMap(testCluster.lowLevelClient.performRequest( + Request("GET", "/_cluster/settings"))) + var mustClear = false + val clearCommand = JsonXContent.contentBuilder() + clearCommand.startObject() + for ((key1, value) in getResponse) { + val type = key1 + val settings = value as Map<*, *> + if (settings.isEmpty()) { + continue + } + mustClear = true + clearCommand.startObject(type) + for (key in settings.keys) { + clearCommand.field("$key.*").nullValue() + } + clearCommand.endObject() + } + clearCommand.endObject() + if (mustClear) { + val request = Request("PUT", "/_cluster/settings") + request.setJsonEntity(Strings.toString(clearCommand)) + testCluster.lowLevelClient.performRequest(request) + } + } + + protected fun wipeIndicesFromCluster(testCluster: TestCluster) { + try { + val deleteRequest = Request("DELETE", "*,-.*") // All except system indices + val response = testCluster.lowLevelClient.performRequest(deleteRequest) + response.entity.content.use { `is` -> + assertTrue( + XContentHelper.convertToMap(XContentType.JSON.xContent(), `is`, true)["acknowledged"] as Boolean) + } + } catch (e: ResponseException) { + // 404 here just means we had no indexes + if (e.response.statusLine.statusCode != 404) { + throw e + } + } + } + + protected fun _wipeSnapshots(testCluster: TestCluster): Map>> { + val inProgressSnapshots: MutableMap>> = mutableMapOf() + for ((repoName, value) in ESRestTestCase.entityAsMap( + testCluster.lowLevelClient.performRequest(Request("GET", "/_snapshot/_all")))) { + val repoSpec = value as Map<*, *> + val repoType = repoSpec["type"] as String + if (repoType == "fs") { + // All other repo types we really don't have a chance of being able to iterate properly, sadly. + val listRequest = Request("GET", "/_snapshot/$repoName/_all") + listRequest.addParameter("ignore_unavailable", "true") + val snapshots = ESRestTestCase.entityAsMap( + testCluster.lowLevelClient.performRequest(listRequest))["snapshots"] as List> + for (snapshot in snapshots) { + val snapshotInfo = snapshot + val name = snapshotInfo["snapshot"] as String? + if (!SnapshotState.valueOf((snapshotInfo["state"] as String?)!!).completed()) { + inProgressSnapshots.computeIfAbsent(repoName) { mutableListOf() } + .add(snapshotInfo) + } + logger.debug("wiping snapshot [{}/{}]", repoName, name) + testCluster.lowLevelClient.performRequest(Request( + "DELETE", "/_snapshot/$repoName/$name")) + } + } + deleteRepository(testCluster, repoName) + } + return inProgressSnapshots + } + + protected fun deleteRepository(testCluster: TestCluster, repoName: String) { + testCluster.lowLevelClient.performRequest(Request("DELETE", "_snapshot/$repoName")) + } + + fun getNamedCluster(clusterName: String): TestCluster { + return testClusters[clusterName] ?: error("""Given clusterName:$clusterName was not found. + |Please confirm if it is defined in build.gradle file and included in clusterConfiguration + |annotation in test class.""".trimMargin()) + } + + fun getClientForCluster(clusterName: String): RestHighLevelClient { + return getNamedCluster(clusterName).restClient + } + + fun getAsMap(client: RestClient, endpoint: String): Map { + return ESRestTestCase.entityAsMap(client.performRequest(Request("GET", endpoint))) + } + + protected fun createConnectionBetweenClusters(fromClusterName: String, toClusterName: String, connectionName: String="source") { + val toCluster = getNamedCluster(toClusterName) + val fromCluster = getNamedCluster(fromClusterName) + val persistentConnectionRequest = Request("PUT", "_cluster/settings") + val toClusterHostSeed = toCluster.transportPorts[0] + val entityAsString = """ + { + "persistent": { + "cluster": { + "remote": { + "$connectionName": { + "seeds": [ "$toClusterHostSeed" ] + } + } + } + } + }""".trimMargin() + + persistentConnectionRequest.entity = NStringEntity(entityAsString, ContentType.APPLICATION_JSON) + val persistentConnectionResponse = fromCluster.lowLevelClient.performRequest(persistentConnectionRequest) + assertEquals(HttpStatus.SC_OK.toLong(), persistentConnectionResponse.statusLine.statusCode.toLong()) + } + + fun getReplicationTaskList(clusterName: String, action: String="*replication*"): List { + val client = getClientForCluster(clusterName) + val request = ListTasksRequest().setDetailed(true).setActions(action) + val response = client.tasks().list(request,RequestOptions.DEFAULT) + return response.tasks + } +} \ No newline at end of file diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterSetupIT.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterSetupIT.kt new file mode 100644 index 00000000..db89ec55 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/MultiClusterSetupIT.kt @@ -0,0 +1,63 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import com.amazon.elasticsearch.replication.MultiClusterAnnotations.ClusterConfiguration +import org.apache.http.util.EntityUtils +import org.elasticsearch.client.Request + +@MultiClusterAnnotations.ClusterConfigurations( + ClusterConfiguration(clusterName = "leaderCluster"), + ClusterConfiguration(clusterName = "followCluster") +) +class MultiClusterSetupIT : MultiClusterRestTestCase() { + + fun testRepPluginLoadedOnLeaderCluster() { + val restClientForLeader = getNamedCluster("leaderCluster").lowLevelClient + val installedPlugins = getAsMap(restClientForLeader, "_nodes/plugins") + val nodes = installedPlugins["nodes"] as Map>? + for (node in nodes!!.values) { + val nodePlugins = node["plugins"] as List>? + assertTrue("Cross cluster plugin wasn't installed on node: " + node["name"], + isReplicationPluginInstalledOnNode(nodePlugins)) + } + } + + fun testRepPluginInstalledOnFollowerCluster() { + val restClientForLeader = getNamedCluster("followCluster").lowLevelClient + val installedPlugins = getAsMap(restClientForLeader, "_nodes/plugins") + val nodes = installedPlugins["nodes"] as Map>? + for (node in nodes!!.values) { + val nodePlugins = node["plugins"] as List>? + assertTrue("Cross cluster plugin wasn't installed on node: " + node["name"], + isReplicationPluginInstalledOnNode(nodePlugins)) + } + } + + private fun isReplicationPluginInstalledOnNode(nodePlugins: List>?): Boolean { + for (plugin in nodePlugins!!) if (plugin["name"] == "opendistro-cross-cluster-replication") return true + return false + } + + fun testClusterConnection() { + createConnectionBetweenClusters("followCluster", "leaderCluster") + val getSettingsRequest = Request("GET", "/_cluster/settings") + val settingsResponse = getNamedCluster("followCluster").lowLevelClient.performRequest(getSettingsRequest) + val responseString = EntityUtils.toString(settingsResponse.entity) + assertTrue(responseString.contains("remote")) + assertTrue(responseString.contains(getNamedCluster("leaderCluster").transportPorts[0])) + } +} \ No newline at end of file diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationHelpers.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationHelpers.kt new file mode 100644 index 00000000..773291c6 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationHelpers.kt @@ -0,0 +1,140 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import com.amazon.elasticsearch.replication.task.index.IndexReplicationExecutor +import com.amazon.elasticsearch.replication.task.shard.ShardReplicationExecutor +import org.assertj.core.api.Assertions.assertThat +import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest +import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksRequest +import org.elasticsearch.action.support.master.AcknowledgedResponse +import org.elasticsearch.client.Request +import org.elasticsearch.client.RequestOptions +import org.elasticsearch.client.Response +import org.elasticsearch.client.RestHighLevelClient +import org.elasticsearch.common.unit.TimeValue +import org.elasticsearch.common.xcontent.DeprecationHandler +import org.elasticsearch.common.xcontent.NamedXContentRegistry +import org.elasticsearch.common.xcontent.XContentType +import org.elasticsearch.test.ESTestCase.assertBusy +import java.util.concurrent.TimeUnit + +data class StartReplicationRequest(val remoteClusterAlias: String, val remoteIndex: String, val toIndex: String) + +const val REST_REPLICATION_PREFIX = "/_opendistro/_replication/" +const val REST_REPLICATION_START = "$REST_REPLICATION_PREFIX{index}/_start" +const val REST_REPLICATION_STOP = "$REST_REPLICATION_PREFIX{index}/_stop" +const val REST_AUTO_FOLLOW_PATTERN = "_opendistro/_replication/_autofollow" + +fun RestHighLevelClient.startReplication(request: StartReplicationRequest, + waitFor: TimeValue = TimeValue.timeValueSeconds(10), + waitForShardsInit: Boolean = true, + waitForRestore: Boolean = false) { + val lowLevelRequest = Request("PUT", REST_REPLICATION_START.replace("{index}", request.toIndex, true) + + "?wait_for_restore=${waitForRestore}") + lowLevelRequest.setJsonEntity("""{ + "remote_cluster" : "${request.remoteClusterAlias}", + "remote_index": "${request.remoteIndex}" + } + """) + val lowLevelResponse = lowLevelClient.performRequest(lowLevelRequest) + val response = getAckResponse(lowLevelResponse) + assertThat(response.isAcknowledged).withFailMessage("Replication not started.").isTrue() + waitForReplicationStart(request.toIndex, waitFor) + if (waitForShardsInit) + waitForNoInitializingShards() +} +fun getAckResponse(lowLevelResponse: Response): AcknowledgedResponse { + val xContentType = XContentType.fromMediaTypeOrFormat(lowLevelResponse.entity.contentType.value) + val xcp = xContentType.xContent().createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, + lowLevelResponse.entity.content) + return AcknowledgedResponse.fromXContent(xcp) +} + +fun RestHighLevelClient.stopReplication(index: String) { + val lowLevelStopRequest = Request("POST", REST_REPLICATION_STOP.replace("{index}", index,true)) + lowLevelStopRequest.setJsonEntity("{}") + val lowLevelStopResponse = lowLevelClient.performRequest(lowLevelStopRequest) + val response = getAckResponse(lowLevelStopResponse) + assertThat(response.isAcknowledged).withFailMessage("Replication could not be stopped").isTrue() + waitForReplicationStop(index) +} + +fun RestHighLevelClient.waitForReplicationStart(index: String, waitFor : TimeValue = TimeValue.timeValueSeconds(10)) { + assertBusy( + { + // Persistent tasks service appends identifiers like '[c]' to indicate child task hence the '*' wildcard + val request = ListTasksRequest().setDetailed(true).setActions(ShardReplicationExecutor.TASK_NAME + "*", + IndexReplicationExecutor.TASK_NAME + "*") + val response = tasks().list(request,RequestOptions.DEFAULT) + assertThat(response.tasks) + .withFailMessage("replication tasks not started") + .isNotEmpty + }, waitFor.seconds, TimeUnit.SECONDS) +} + +fun RestHighLevelClient.waitForNoInitializingShards() { + val request = ClusterHealthRequest().waitForNoInitializingShards(true) + .timeout(TimeValue.timeValueSeconds(70)) + request.level(ClusterHealthRequest.Level.SHARDS) + this.cluster().health(request, RequestOptions.DEFAULT) +} + +fun RestHighLevelClient.waitForNoRelocatingShards() { + val request = ClusterHealthRequest().waitForNoRelocatingShards(true) + .timeout(TimeValue.timeValueSeconds(70)) + request.level(ClusterHealthRequest.Level.SHARDS) + this.cluster().health(request, RequestOptions.DEFAULT) +} + +fun RestHighLevelClient.waitForReplicationStop(index: String, waitFor : TimeValue = TimeValue.timeValueSeconds(10)) { + assertBusy( + { + // Persistent tasks service appends modifiers to task action hence the '*' + val request = ListTasksRequest().setDetailed(true).setActions(ShardReplicationExecutor.TASK_NAME + "*", + IndexReplicationExecutor.TASK_NAME + "*") + + val response = tasks().list(request,RequestOptions.DEFAULT) + assertThat(response.tasks) + .withFailMessage("replication tasks not stopped.") + .isEmpty() + }, waitFor.seconds, TimeUnit.SECONDS) +} + +fun RestHighLevelClient.updateAutoFollowPattern(connection: String, patternName: String, pattern: String) { + val lowLevelRequest = Request("POST", REST_AUTO_FOLLOW_PATTERN) + lowLevelRequest.setJsonEntity("""{ + "connection" : "${connection}", + "name" : "${patternName}", + "pattern": "${pattern}" + }""") + val lowLevelResponse = lowLevelClient.performRequest(lowLevelRequest) + val response = getAckResponse(lowLevelResponse) + assertThat(response.isAcknowledged).isTrue() +} + +fun RestHighLevelClient.deleteAutoFollowPattern(connection: String, patternName: String) { + val lowLevelRequest = Request("DELETE", REST_AUTO_FOLLOW_PATTERN) + lowLevelRequest.setJsonEntity("""{ + "connection" : "${connection}", + "name" : "${patternName}" + }""") + val lowLevelResponse = lowLevelClient.performRequest(lowLevelRequest) + val response = getAckResponse(lowLevelResponse) + assertThat(response.isAcknowledged).isTrue() +} + + diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationIntegTestCaseIT.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationIntegTestCaseIT.kt new file mode 100644 index 00000000..e21e470d --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationIntegTestCaseIT.kt @@ -0,0 +1,24 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import org.elasticsearch.test.ESIntegTestCase + +class ReplicationIntegTestCaseIT : ESIntegTestCase() { + +} + + diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationPluginTests.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationPluginTests.kt new file mode 100644 index 00000000..0e6117a2 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/ReplicationPluginTests.kt @@ -0,0 +1,24 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication + +import org.elasticsearch.test.ESTestCase + +class ReplicationPluginTests : ESTestCase() { + fun testNothing() { + assertTrue("Write some tests!", true) + } +} \ No newline at end of file diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/StartReplicationIT.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/StartReplicationIT.kt new file mode 100644 index 00000000..8f6a6d95 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/StartReplicationIT.kt @@ -0,0 +1,218 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.integ.rest + + +import com.amazon.elasticsearch.replication.MultiClusterAnnotations +import com.amazon.elasticsearch.replication.MultiClusterRestTestCase +import com.amazon.elasticsearch.replication.StartReplicationRequest +import com.amazon.elasticsearch.replication.startReplication +import org.apache.http.HttpStatus +import org.apache.http.entity.ContentType +import org.apache.http.nio.entity.NStringEntity +import org.assertj.core.api.Assertions.assertThat +import org.assertj.core.api.Assertions.assertThatThrownBy +import org.elasticsearch.action.admin.indices.alias.Alias +import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest +import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest +import org.elasticsearch.client.Request +import org.elasticsearch.client.RequestOptions +import org.elasticsearch.client.ResponseException +import org.elasticsearch.client.indices.CreateIndexRequest +import org.elasticsearch.client.indices.GetIndexRequest +import org.elasticsearch.client.indices.GetMappingsRequest +import org.elasticsearch.cluster.metadata.IndexMetadata +import org.elasticsearch.common.settings.Settings +import org.elasticsearch.test.ESTestCase.assertBusy +import org.junit.Assert + + +@MultiClusterAnnotations.ClusterConfigurations( + MultiClusterAnnotations.ClusterConfiguration(clusterName = LEADER), + MultiClusterAnnotations.ClusterConfiguration(clusterName = FOLLOWER) +) +class StartReplicationIT: MultiClusterRestTestCase() { + private val leaderIndexName = "leader_index" + private val followerIndexName = "follower_index" + + fun `test start replication in following state and empty index`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName)) + assertBusy { + assertThat(followerClient.indices() + .exists(GetIndexRequest(followerIndexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + } + } + + fun `test start replication fails when replication has already been started for the same index`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName)) + assertThatThrownBy { + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName)) + }.isInstanceOf(ResponseException::class.java).hasMessageContaining("{\"error\":{\"root_cause\":[{\"type\":\"resource_already_exists_exception\"," + + "\"reason\":\"task with id {replication:index:follower_index} already exist\"}]") + } + + fun `test start replication fails when remote cluster alias does not exist`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + assertThatThrownBy { + followerClient.startReplication(StartReplicationRequest("doesNotExist", leaderIndexName, followerIndexName)) + }.isInstanceOf(ResponseException::class.java).hasMessageContaining("{\"error\":{\"root_cause\":[{\"type\":\"no_such_remote_cluster_exception\"," + + "\"reason\":\"no such remote cluster: [doesNotExist]\"}]") + } + + fun `test start replication fails when index does not exist`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + assertThatThrownBy { + followerClient.startReplication(StartReplicationRequest("source", "doesNotExist", followerIndexName)) + }.isInstanceOf(ResponseException::class.java).hasMessageContaining("{\"error\":{\"root_cause\":[{\"type\":\"index_not_found_exception\"," + + "\"reason\":\"no such index [doesNotExist]\"") + } + + fun `test start replication fails when the follower cluster is write blocked or metadata blocked`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + addClusterMetadataBlock(FOLLOWER, "true") + assertThatThrownBy { + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName)) + }.isInstanceOf(ResponseException::class.java).hasMessageContaining("{\"error\":{\"root_cause\":[{\"type\":\"cluster_block_exception\"," + + "\"reason\":\"blocked by: [FORBIDDEN/6/cluster read-only (api)];\"}]") + // Removing the metadata block for cleanup + addClusterMetadataBlock(FOLLOWER, "false") + } + + fun `test that follower index has same mapping as leader index`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName)) + assertBusy { + assertThat(followerClient.indices() + .exists(GetIndexRequest(followerIndexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + } + Assert.assertEquals( + leaderClient.indices().getMapping(GetMappingsRequest().indices(leaderIndexName), RequestOptions.DEFAULT) + .mappings()[leaderIndexName], + followerClient.indices().getMapping(GetMappingsRequest().indices(followerIndexName), RequestOptions.DEFAULT) + .mappings()[followerIndexName] + ) + } + + fun `test that index settings are getting replicated`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build() + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName).settings(settings), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName)) + assertBusy { + assertThat(followerClient.indices() + .exists(GetIndexRequest(followerIndexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + } + val getSettingsRequest = GetSettingsRequest() + getSettingsRequest.indices(followerIndexName) + getSettingsRequest.names(IndexMetadata.SETTING_NUMBER_OF_REPLICAS) + Assert.assertEquals( + "0", + followerClient.indices() + .getSettings(getSettingsRequest, RequestOptions.DEFAULT) + .indexToSettings[followerIndexName][IndexMetadata.SETTING_NUMBER_OF_REPLICAS] + ) + } + + fun `test that aliases settings are getting replicated`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName).alias(Alias("leaderAlias")), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName)) + assertBusy { + assertThat(followerClient.indices() + .exists(GetIndexRequest(followerIndexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + } + Assert.assertEquals( + leaderClient.indices().getAlias(GetAliasesRequest().indices(leaderIndexName), + RequestOptions.DEFAULT).aliases[leaderIndexName], + followerClient.indices().getAlias(GetAliasesRequest().indices(followerIndexName), + RequestOptions.DEFAULT).aliases[followerIndexName] + ) + } + + private fun addClusterMetadataBlock(clusterName: String, blockValue: String) { + val cluster = getNamedCluster(clusterName) + val persistentConnectionRequest = Request("PUT", "_cluster/settings") + val entityAsString = """ + { + "persistent": { + "cluster": { + "blocks": { + "read_only": $blockValue + } + } + } + }""".trimMargin() + + persistentConnectionRequest.entity = NStringEntity(entityAsString, ContentType.APPLICATION_JSON) + val persistentConnectionResponse = cluster.lowLevelClient.performRequest(persistentConnectionRequest) + assertEquals(HttpStatus.SC_OK.toLong(), persistentConnectionResponse.statusLine.statusCode.toLong()) + } +} \ No newline at end of file diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/StopReplicationIT.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/StopReplicationIT.kt new file mode 100644 index 00000000..5943f73f --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/StopReplicationIT.kt @@ -0,0 +1,231 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.integ.rest + +import com.amazon.elasticsearch.replication.MultiClusterAnnotations +import com.amazon.elasticsearch.replication.MultiClusterRestTestCase +import com.amazon.elasticsearch.replication.StartReplicationRequest +import com.amazon.elasticsearch.replication.startReplication +import com.amazon.elasticsearch.replication.stopReplication +import org.apache.http.util.EntityUtils +import org.assertj.core.api.Assertions.assertThat +import org.assertj.core.api.Assertions.assertThatThrownBy +import org.elasticsearch.ElasticsearchStatusException +import org.elasticsearch.action.DocWriteResponse +import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest +import org.elasticsearch.action.admin.indices.flush.FlushRequest +import org.elasticsearch.action.index.IndexRequest +import org.elasticsearch.client.Request +import org.elasticsearch.client.RequestOptions +import org.elasticsearch.client.ResponseException +import org.elasticsearch.client.RestHighLevelClient +import org.elasticsearch.client.indices.CreateIndexRequest +import org.elasticsearch.client.indices.GetIndexRequest +import org.elasticsearch.cluster.metadata.IndexMetadata +import org.elasticsearch.common.settings.Settings +import org.elasticsearch.common.unit.TimeValue +import org.elasticsearch.index.mapper.MapperService +import org.elasticsearch.test.ESTestCase.assertBusy +import java.util.concurrent.TimeUnit + + +const val LEADER = "leaderCluster" +const val FOLLOWER = "followCluster" + +@MultiClusterAnnotations.ClusterConfigurations( + MultiClusterAnnotations.ClusterConfiguration(clusterName = LEADER), + MultiClusterAnnotations.ClusterConfiguration(clusterName = FOLLOWER) +) +class StopReplicationIT: MultiClusterRestTestCase() { + private val leaderIndexName = "leader_index" + private val followerIndexName = "follower_index" + + fun `test stop replication in following state and empty index`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName)) + + /* At this point, the follower cluster should be in FOLLOWING state. Next, we stop replication + and verify the same + */ + followerClient.stopReplication(followerIndexName) + // Since, we were still in FOLLOWING phase when stop was called, the index + // in follower index should not have been deleted in follower cluster + assertBusy { + assertThat(followerClient.indices() + .exists(GetIndexRequest(followerIndexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + } + } + + fun `test stop replication in restoring state with multiple shards`() { + val settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 20) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING.key, Long.MAX_VALUE) + .build() + testStopReplicationInRestoringState(settings, 5000, 1000, 1000) + } + + private fun testStopReplicationInRestoringState(settings: Settings, + nFields: Int, + fieldLength: Int, + stepSize: Int) { + logger.info("""Testing stop replication in restoring state with params: + | shards:$settings[IndexMetadata.SETTING_NUMBER_OF_SHARDS] + | nFields:$nFields + | fieldLength:$fieldLength + | stepSize:$stepSize + | """.trimMargin()) + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + createConnectionBetweenClusters(FOLLOWER, LEADER) + + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName).settings(settings), + RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + // Put a large amount of data into the index + fillIndex(leaderClient, leaderIndexName, nFields, fieldLength, stepSize) + assertBusy { + assertThat(leaderClient.indices() + .exists(GetIndexRequest(leaderIndexName), RequestOptions.DEFAULT)) + } + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName), + TimeValue.timeValueSeconds(10), + false) + //Given the size of index, the replication should be in RESTORING phase at this point + followerClient.stopReplication(followerIndexName) + // Since, we were still in RESTORING phase when stop was called, the index + // in follower index should have been deleted in follower cluster + assertBusy { + assertThat(followerClient.indices() + .exists(GetIndexRequest(followerIndexName), RequestOptions.DEFAULT)) + .isEqualTo(false) + } + } + + /* What we want to test here is the there is that STOP replication + is called while shard tasks were starting. Since we can't have this situation + deterministically, we have a high number of shards and repeated tests. This is so that + there is some shard task in follower index which which gets started after STOP api has closed + existing shard tasks. This is how it was tested manually. */ + // TODO: Figure out a way without using @Repeat(iterations = 5) + fun `test stop replication in restoring state while shards are starting`() { + val settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 50) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .build() + testStopReplicationInRestoringState(settings, 5, 10, 5) + } + + private fun fillIndex(clusterClient: RestHighLevelClient, + indexName : String, + nFields: Int, + fieldLength: Int, + stepSize: Int) { + for (i in nFields downTo 1 step stepSize) { + val sourceMap : MutableMap = HashMap() + for (j in stepSize downTo 1) + sourceMap[(i-j).toString()] = randomAlphaOfLength(fieldLength) + logger.info("Updating index with map of size:${sourceMap.size}") + val indexResponse = clusterClient.index(IndexRequest(indexName).id(i.toString()).source(sourceMap), RequestOptions.DEFAULT) + assertThat(indexResponse.result).isIn(DocWriteResponse.Result.CREATED, DocWriteResponse.Result.UPDATED) + } + //flush the index + clusterClient.indices().flush(FlushRequest(indexName), RequestOptions.DEFAULT) + } + + fun `test follower index unblocked after stop replication`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + createConnectionBetweenClusters(FOLLOWER, LEADER) + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + val sourceMap = mapOf("name" to randomAlphaOfLength(5)) + leaderClient.index(IndexRequest(leaderIndexName).id("1").source(sourceMap), RequestOptions.DEFAULT) + // Need to set waitForRestore=true as the cluster blocks are added only + // after restore is completed. + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName), + waitForRestore = true) + // Need to wait till index blocks appear into state + assertBusy ({ + val clusterBlocksResponse = followerClient.lowLevelClient.performRequest(Request("GET", "/_cluster/state/blocks")) + val clusterResponseString = EntityUtils.toString(clusterBlocksResponse.entity) + assertThat(clusterResponseString.contains("cross-cluster-replication")) + .withFailMessage("Cant find replication block afer starting replication") + .isTrue() + }, 10, TimeUnit.SECONDS) + + assertThatThrownBy { + followerClient.index(IndexRequest(followerIndexName).id("blocked").source(sourceMap), RequestOptions + .DEFAULT) + }.isInstanceOf(ElasticsearchStatusException::class.java) + .hasMessage("Elasticsearch exception [type=cluster_block_exception, reason=index [$followerIndexName] " + + "blocked by: [FORBIDDEN/1000/index read-only(cross-cluster-replication)];]") + + //Stop replication and verify that index is not blocked any more + followerClient.stopReplication(followerIndexName) + //Following line shouldn't throw any exception + followerClient.index(IndexRequest(followerIndexName).id("2").source(sourceMap), RequestOptions.DEFAULT) + } + + fun `test stop without replication in progress`() { + val followerClient = getClientForCluster(FOLLOWER) + assertThatThrownBy { + followerClient.stopReplication(followerIndexName) + }.isInstanceOf(ResponseException::class.java) + .hasMessageContaining("No replication in progress for index:follower_index") + } + + fun `test stop with deleted follower index`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + createConnectionBetweenClusters(FOLLOWER, LEADER) + val createIndexResponse = leaderClient.indices().create(CreateIndexRequest(leaderIndexName), RequestOptions.DEFAULT) + assertThat(createIndexResponse.isAcknowledged).isTrue() + val sourceMap = mapOf("name" to randomAlphaOfLength(5)) + leaderClient.index(IndexRequest(leaderIndexName).id("1").source(sourceMap), RequestOptions.DEFAULT) + // Need to set waitForRestore=true as the cluster blocks are added only + // after restore is completed. + followerClient.startReplication(StartReplicationRequest("source", leaderIndexName, followerIndexName), + waitForRestore = true) + // Need to wait till index blocks appear into state + assertBusy ({ + val clusterBlocksResponse = followerClient.lowLevelClient.performRequest(Request("GET", "/_cluster/state/blocks")) + val clusterResponseString = EntityUtils.toString(clusterBlocksResponse.entity) + assertThat(clusterResponseString.contains("cross-cluster-replication")) + .withFailMessage("Cant find replication block afer starting replication") + .isTrue() + }, 10, TimeUnit.SECONDS) + //Now delete the follower index + val deleteIndexResponse = followerClient.indices().delete(DeleteIndexRequest(followerIndexName), RequestOptions.DEFAULT) + assertThat(deleteIndexResponse.isAcknowledged).isTrue() + //Stop the replication + followerClient.stopReplication(followerIndexName) + //verify that replication metadata state is cleared from cluster state + val followerClusterState = followerClient.lowLevelClient.performRequest(Request("GET", "/_cluster/state/metadata")) + val followerClusterStateString = EntityUtils.toString(followerClusterState.entity) + assertThat(followerClusterStateString.contains("REPLICATION_OVERALL_STATE_KEY")) + .isFalse() + .withFailMessage("Replication params existing after stop is called") + + } +} diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/UpdateAutoFollowPatternIT.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/UpdateAutoFollowPatternIT.kt new file mode 100644 index 00000000..89a06289 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/integ/rest/UpdateAutoFollowPatternIT.kt @@ -0,0 +1,199 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.integ.rest + +import com.amazon.elasticsearch.replication.MultiClusterAnnotations +import com.amazon.elasticsearch.replication.MultiClusterRestTestCase +import com.amazon.elasticsearch.replication.StartReplicationRequest +import com.amazon.elasticsearch.replication.deleteAutoFollowPattern +import com.amazon.elasticsearch.replication.startReplication +import com.amazon.elasticsearch.replication.stopReplication +import com.amazon.elasticsearch.replication.task.autofollow.AutoFollowExecutor +import com.amazon.elasticsearch.replication.task.index.IndexReplicationExecutor +import com.amazon.elasticsearch.replication.updateAutoFollowPattern +import org.apache.http.HttpStatus +import org.apache.http.entity.ContentType +import org.apache.http.nio.entity.NStringEntity +import org.assertj.core.api.Assertions +import org.elasticsearch.client.Request +import org.elasticsearch.client.RequestOptions +import org.elasticsearch.client.ResponseException +import org.elasticsearch.client.RestHighLevelClient +import org.elasticsearch.client.indices.CreateIndexRequest +import org.elasticsearch.client.indices.GetIndexRequest +import org.elasticsearch.common.unit.TimeValue +import org.elasticsearch.tasks.TaskInfo +import java.util.Locale + +import java.util.concurrent.TimeUnit + + +@MultiClusterAnnotations.ClusterConfigurations( + MultiClusterAnnotations.ClusterConfiguration(clusterName = LEADER), + MultiClusterAnnotations.ClusterConfiguration(clusterName = FOLLOWER) +) +class UpdateAutoFollowPatternIT: MultiClusterRestTestCase() { + private val indexPrefix = "leader_index_" + private val indexPattern = "leader_index*" + private val indexPatternName = "test_pattern" + private val connectionAlias = "test_conn" + + fun `test auto follow pattern`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + val leaderIndexName = createRandomIndex(leaderClient) + createConnectionBetweenClusters(FOLLOWER, LEADER, connectionAlias) + + try { + followerClient.updateAutoFollowPattern(connectionAlias, indexPatternName, indexPattern) + + // Verify that existing index matching the pattern are replicated. + assertBusy ({ + Assertions.assertThat(followerClient.indices() + .exists(GetIndexRequest(leaderIndexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + }, 30, TimeUnit.SECONDS) + Assertions.assertThat(getAutoFollowTasks(FOLLOWER).size).isEqualTo(1) + + + // Verify that newly created index on leader which match the pattern are also replicated. + val leaderIndexNameNew = createRandomIndex(leaderClient) + assertBusy ({ + Assertions.assertThat(followerClient.indices() + .exists(GetIndexRequest(leaderIndexNameNew), RequestOptions.DEFAULT)) + .isEqualTo(true) + }, 30, TimeUnit.SECONDS) + } finally { + followerClient.deleteAutoFollowPattern(connectionAlias, indexPatternName) + } + } + + fun `test auto follow shouldn't add already triggered index`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + val leaderIndexName = createRandomIndex(leaderClient) + createConnectionBetweenClusters(FOLLOWER, LEADER, connectionAlias) + + try { + followerClient.startReplication(StartReplicationRequest(connectionAlias, leaderIndexName, leaderIndexName), + TimeValue.timeValueSeconds(10),true) + + assertBusy({ + Assertions.assertThat(followerClient.indices() + .exists(GetIndexRequest(leaderIndexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + }, 30, TimeUnit.SECONDS) + + // Assert that there is no auto follow task & one index replication task + Assertions.assertThat(getAutoFollowTasks(FOLLOWER).size).isEqualTo(0) + Assertions.assertThat(getIndexReplicationTasks(FOLLOWER).size).isEqualTo(1) + + try { + followerClient.updateAutoFollowPattern(connectionAlias, indexPatternName, indexPattern) + + // Assert that there is still only one index replication task + Assertions.assertThat(getAutoFollowTasks(FOLLOWER).size).isEqualTo(1) + Assertions.assertThat(getIndexReplicationTasks(FOLLOWER).size).isEqualTo(1) + } finally { + followerClient.deleteAutoFollowPattern(connectionAlias, indexPatternName) + } + } finally { + followerClient.stopReplication(leaderIndexName) + } + } + + fun `test auto follow should fail if remote connection doesn't exist`() { + val followerClient = getClientForCluster(FOLLOWER) + // Call autofollow pattern without setting up remote connection. + Assertions.assertThatThrownBy { + followerClient.updateAutoFollowPattern(connectionAlias, indexPatternName, indexPattern) + }.isInstanceOf(ResponseException::class.java) + .hasMessageContaining("no such remote cluster") + } + + fun `test removing autofollow pattern stop autofollow task`() { + val followerClient = getClientForCluster(FOLLOWER) + val leaderClient = getClientForCluster(LEADER) + createConnectionBetweenClusters(FOLLOWER, LEADER, connectionAlias) + + val leaderIndexName = createRandomIndex(leaderClient) + + try { + followerClient.updateAutoFollowPattern(connectionAlias, indexPatternName, indexPattern) + + // Verify that existing index matching the pattern are replicated. + assertBusy { + Assertions.assertThat(followerClient.indices() + .exists(GetIndexRequest(leaderIndexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + } + + Assertions.assertThat(getAutoFollowTasks(FOLLOWER).size).isEqualTo(1) + Assertions.assertThat(getIndexReplicationTasks(FOLLOWER).size).isEqualTo(1) + } finally { + followerClient.deleteAutoFollowPattern(connectionAlias, indexPatternName) + } + + // Verify that auto follow tasks is stopped but the shard replication task remains. + assertBusy ({ + Assertions.assertThat(getAutoFollowTasks(FOLLOWER).size).isEqualTo(0) + }, 30, TimeUnit.SECONDS) + + Assertions.assertThat(getIndexReplicationTasks(FOLLOWER).size).isEqualTo(1) + } + + fun createRandomIndex(client: RestHighLevelClient): String { + val indexName = indexPrefix + randomAlphaOfLength(6).toLowerCase(Locale.ROOT) + val createIndexResponse = client.indices().create(CreateIndexRequest(indexName), RequestOptions.DEFAULT) + Assertions.assertThat(createIndexResponse.isAcknowledged).isTrue() + assertBusy { + Assertions.assertThat(client.indices() + .exists(GetIndexRequest(indexName), RequestOptions.DEFAULT)) + .isEqualTo(true) + } + return indexName + } + fun getAutoFollowTasks(clusterName: String): List { + return getReplicationTaskList(clusterName, AutoFollowExecutor.TASK_NAME + "*") + } + + fun getIndexReplicationTasks(clusterName: String): List { + return getReplicationTaskList(clusterName, IndexReplicationExecutor.TASK_NAME + "*") + } + + fun createDummyConnection(fromClusterName: String, connectionName: String="source") { + val fromCluster = getNamedCluster(fromClusterName) + val persistentConnectionRequest = Request("PUT", "_cluster/settings") + val toClusterDummyHostSeed = "localhost:65536" + val entityAsString = """ + { + "persistent": { + "cluster": { + "remote": { + "$connectionName": { + "seeds": [ "$toClusterDummyHostSeed" ] + } + } + } + } + }""".trimMargin() + + persistentConnectionRequest.entity = NStringEntity(entityAsString, ContentType.APPLICATION_JSON) + val persistentConnectionResponse = fromCluster.lowLevelClient.performRequest(persistentConnectionRequest) + assertEquals(HttpStatus.SC_OK.toLong(), persistentConnectionResponse.statusLine.statusCode.toLong()) + } + +} \ No newline at end of file diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/metadata/ReplicationMetadataTests.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/metadata/ReplicationMetadataTests.kt new file mode 100644 index 00000000..43824846 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/metadata/ReplicationMetadataTests.kt @@ -0,0 +1,100 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.metadata + +import org.assertj.core.api.Assertions.assertThat +import org.elasticsearch.common.io.stream.BytesStreamOutput +import org.elasticsearch.test.ESTestCase +import org.elasticsearch.test.XContentTestUtils + +class ReplicationMetadataTests : ESTestCase() { + + companion object { + val TEST_PATTERN = AutoFollowPattern("2020 logs", "logs-2020*") + val REMOTE_CLUSTER_ALIAS = "leader" + const val INDEX = "index" + const val REMOTE_INDEX = "remoteIndex" + } + fun `test add autofollow pattern and diff`() { + val expected = ReplicationMetadata.EMPTY.addPattern(REMOTE_CLUSTER_ALIAS, TEST_PATTERN) + assertEquals(TEST_PATTERN, expected.autoFollowPatterns.getValue(REMOTE_CLUSTER_ALIAS).getValue("2020 logs")) + + val diff = expected.diff(ReplicationMetadata.EMPTY) + val actual = diff.apply(ReplicationMetadata.EMPTY) + assertEquals(expected, actual) + } + + fun `test remove autofollow pattern and diff`() { + val metadata = ReplicationMetadata.EMPTY.addPattern(REMOTE_CLUSTER_ALIAS, TEST_PATTERN) + val expected = metadata.removePattern(REMOTE_CLUSTER_ALIAS, TEST_PATTERN.name) + assertThat(expected.autoFollowPatterns[REMOTE_CLUSTER_ALIAS]).isEmpty() + + val diff = expected.diff(metadata) + val actual = diff.apply(metadata) + assertEquals(expected, actual) + } + + fun `test add replicated index and diff`() { + val expected = ReplicationMetadata.EMPTY.addIndex(REMOTE_CLUSTER_ALIAS, INDEX, REMOTE_INDEX) + assertEquals(REMOTE_INDEX, expected.replicatedIndices.getValue(REMOTE_CLUSTER_ALIAS).getValue(INDEX)) + + val diff = expected.diff(ReplicationMetadata.EMPTY) + val actual = diff.apply(ReplicationMetadata.EMPTY) + assertEquals(expected, actual) + } + + fun `test remove replicated index and diff`() { + val metadata = ReplicationMetadata.EMPTY.addIndex(REMOTE_CLUSTER_ALIAS, INDEX, REMOTE_INDEX) + val expected = metadata.removeIndex(REMOTE_CLUSTER_ALIAS, INDEX) + assertThat(expected.replicatedIndices[REMOTE_CLUSTER_ALIAS]).isEmpty() + + val diff = expected.diff(metadata) + val actual = diff.apply(metadata) + assertEquals(expected, actual) + } + + fun `test remove cluster`() { + val metadata = ReplicationMetadata.EMPTY.addIndex(REMOTE_CLUSTER_ALIAS, INDEX, REMOTE_INDEX) + .addPattern(REMOTE_CLUSTER_ALIAS, TEST_PATTERN) + assertThat(metadata.autoFollowPatterns).containsKey(REMOTE_CLUSTER_ALIAS) + assertThat(metadata.replicatedIndices).containsKey(REMOTE_CLUSTER_ALIAS) + + val removed = metadata.removeRemoteCluster(REMOTE_CLUSTER_ALIAS) + assertThat(removed.autoFollowPatterns).doesNotContainKey(REMOTE_CLUSTER_ALIAS) + assertThat(removed.replicatedIndices).doesNotContainKey(REMOTE_CLUSTER_ALIAS) + } + + fun `test serialization`() { + val expected = ReplicationMetadata.EMPTY + .addPattern(REMOTE_CLUSTER_ALIAS, TEST_PATTERN) + .addIndex(REMOTE_CLUSTER_ALIAS, INDEX, REMOTE_INDEX) + val output = BytesStreamOutput() + expected.writeTo(output) + val deserialized = ReplicationMetadata(output.bytes().streamInput()) + assertEquals(expected, deserialized) + } + + fun `test json serialization`() { + val expected = ReplicationMetadata.EMPTY + .addPattern(REMOTE_CLUSTER_ALIAS, TEST_PATTERN) + .addIndex(REMOTE_CLUSTER_ALIAS, INDEX, REMOTE_INDEX) + + val actual = XContentTestUtils.convertToMap(expected) + assertThat(actual).containsKey("auto_follow_patterns") + assertThat(actual["auto_follow_patterns"] as Map>) + .containsEntry(REMOTE_CLUSTER_ALIAS, mapOf(TEST_PATTERN.name to TEST_PATTERN.pattern)) + } +} \ No newline at end of file diff --git a/src/test/kotlin/com/amazon/elasticsearch/replication/task/shard/TranslogSequencerTests.kt b/src/test/kotlin/com/amazon/elasticsearch/replication/task/shard/TranslogSequencerTests.kt new file mode 100644 index 00000000..fb129db5 --- /dev/null +++ b/src/test/kotlin/com/amazon/elasticsearch/replication/task/shard/TranslogSequencerTests.kt @@ -0,0 +1,115 @@ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +package com.amazon.elasticsearch.replication.task.shard + +import com.amazon.elasticsearch.replication.action.changes.GetChangesResponse +import com.amazon.elasticsearch.replication.action.replay.ReplayChangesAction +import com.amazon.elasticsearch.replication.action.replay.ReplayChangesRequest +import com.amazon.elasticsearch.replication.action.replay.ReplayChangesResponse +import kotlinx.coroutines.ExperimentalCoroutinesApi +import kotlinx.coroutines.ObsoleteCoroutinesApi +import kotlinx.coroutines.sync.Semaphore +import kotlinx.coroutines.test.runBlockingTest +import org.assertj.core.api.Assertions.assertThat +import org.elasticsearch.action.ActionListener +import org.elasticsearch.action.ActionRequest +import org.elasticsearch.action.ActionResponse +import org.elasticsearch.action.ActionType +import org.elasticsearch.action.support.replication.ReplicationResponse.ShardInfo +import org.elasticsearch.index.shard.ShardId +import org.elasticsearch.index.translog.Translog +import org.elasticsearch.tasks.TaskId.EMPTY_TASK_ID +import org.elasticsearch.test.ESTestCase +import org.elasticsearch.test.ESTestCase.randomList +import org.elasticsearch.test.client.NoOpClient +import java.util.Locale + +@ObsoleteCoroutinesApi +class TranslogSequencerTests : ESTestCase() { + + class RequestCapturingClient : NoOpClient(TranslogSequencerTests::class.java.simpleName) { + val requestsReceived = mutableListOf() + + override fun doExecute(action: ActionType, + request: Req, + listener: ActionListener) { + if (action === ReplayChangesAction.INSTANCE) { + requestsReceived.add(request as ReplayChangesRequest) + val resp = ReplayChangesResponse() + resp.shardInfo = ShardInfo(1, 1) + @Suppress("UNCHECKED_CAST") + listener.onResponse(resp as Resp) + } else { + super.doExecute(action, request, listener) + } + } + + fun reset() { + requestsReceived.clear() + } + } + + + val remoteCluster = "remoteCluster" + val remoteIndex = "remoteIndex" + val followerShardId = ShardId("follower", "follower_uuid", 0) + val client = RequestCapturingClient() + init { + closeAfterSuite(client) + } + + override fun tearDown() { + client.reset() + super.tearDown() + } + + @ExperimentalCoroutinesApi + fun `test sequencer out of order`() = runBlockingTest { + val startSeqNo = randomNonNegativeLong() + val rateLimiter = Semaphore(10) + val sequencer = TranslogSequencer(this, followerShardId, remoteCluster, remoteIndex, EMPTY_TASK_ID, + client, rateLimiter, startSeqNo) + + // Send requests out of order (shuffled seqNo) and await for them to be processed. + var batchSeqNo = startSeqNo + val batches = randomList(1, rateLimiter.availablePermits) { + val (batch, lastSeqNo) = randomChangesResponse(batchSeqNo) + batchSeqNo = lastSeqNo + batch + } + batches.shuffled().forEach { + rateLimiter.acquire() + sequencer.send(it) + } + sequencer.close() + + // Now verify that there was one replay request for every batch of changes that was sent + assertThat(client.requestsReceived.size).isEqualTo(batches.size) + batches.zip(client.requestsReceived).forEach { (batch, req) -> + assertThat(batch.changes.first().seqNo()).isEqualTo(req.changes.first().seqNo()) + } + } + + fun randomChangesResponse(startSeqNo: Long) : Pair { + var seqNo = startSeqNo + val changes = randomList(1, randomIntBetween(1, 512)) { + seqNo = seqNo.inc() + Translog.Index("_doc", randomAlphaOfLength(10).toLowerCase(Locale.ROOT), seqNo, + 1L, "{}".toByteArray(Charsets.UTF_8)) + } + return Pair(GetChangesResponse(changes, startSeqNo.inc(), startSeqNo), seqNo) + } +} \ No newline at end of file diff --git a/src/test/resources/security/scripts/SecurityAdminWrapper.sh b/src/test/resources/security/scripts/SecurityAdminWrapper.sh new file mode 100755 index 00000000..30a09a13 --- /dev/null +++ b/src/test/resources/security/scripts/SecurityAdminWrapper.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +BUILD_DIR=$1 +LEADER_CONFIG_DIR=$BUILD_DIR/testclusters/leaderCluster-0/config +LEADER_PLUGIN_DIR=$BUILD_DIR/testclusters/leaderCluster-0/distro/7.10.2-INTEG_TEST/plugins/opendistro_security/ +FOLLOWER_CONFIG_DIR=$BUILD_DIR/testclusters/followCluster-0/config +FOLLOWER_PLUGIN_DIR=$BUILD_DIR/testclusters/followCluster-0/distro/7.10.2-INTEG_TEST/plugins/opendistro_security/ + +"$LEADER_PLUGIN_DIR/tools/securityadmin.sh" -p 9300 \ +-cd "$LEADER_PLUGIN_DIR/securityconfig" \ +-icl -key "$LEADER_CONFIG_DIR/kirk-key.pem" \ +-cert "$LEADER_CONFIG_DIR/kirk.pem" \ +-cacert "$LEADER_CONFIG_DIR/root-ca.pem" -nhnv + +"$FOLLOWER_PLUGIN_DIR/tools/securityadmin.sh" -p 9301 \ +-cd "$FOLLOWER_PLUGIN_DIR/securityconfig" \ +-icl -key "$FOLLOWER_CONFIG_DIR/kirk-key.pem" \ +-cert "$FOLLOWER_CONFIG_DIR/kirk.pem" \ +-cacert "$FOLLOWER_CONFIG_DIR/root-ca.pem" -nhnv