From e2125d61116259f590259ee56f8b2936d5175b55 Mon Sep 17 00:00:00 2001 From: Serge Huber Date: Tue, 7 Jan 2025 10:53:48 +0100 Subject: [PATCH] - Make the search port configurable so that we can avoid conflicts between the ElasticSearch and OpenSearch integration tests - Add documentation on how to migrate from ElasticSearch to OpenSearch (not tested yet) --- .github/workflows/unomi-ci-build-tests.yml | 14 +- .gitignore | 1 + itests/pom.xml | 11 +- .../java/org/apache/unomi/itests/BaseIT.java | 16 +- manual/src/main/asciidoc/index.adoc | 4 + .../migrate-elasticsearch-to-opensearch.adoc | 212 ++++++++++++++++++ 6 files changed, 248 insertions(+), 10 deletions(-) create mode 100644 manual/src/main/asciidoc/migrations/migrate-elasticsearch-to-opensearch.adoc diff --git a/.github/workflows/unomi-ci-build-tests.yml b/.github/workflows/unomi-ci-build-tests.yml index be06c53b3..948fa6660 100644 --- a/.github/workflows/unomi-ci-build-tests.yml +++ b/.github/workflows/unomi-ci-build-tests.yml @@ -20,7 +20,6 @@ jobs: uses: actions/setup-java@v1 with: java-version: 11 - cache: maven - name: Build and Unit tests run: mvn -U -ntp -e clean install @@ -30,18 +29,23 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - search-engine: [elasticsearch, opensearch] + include: + - search-engine: elasticsearch + port: 9400 + - search-engine: opensearch + port: 9401 fail-fast: false - max-parallel: 1 # Run tests sequentially to avoid port conflicts steps: - uses: actions/checkout@v4 - name: Set up JDK 11 uses: actions/setup-java@v1 with: java-version: 11 - cache: maven - name: Integration tests - run: mvn -ntp clean install -Pintegration-tests,${{ matrix.search-engine }} + run: | + mvn -ntp clean install -Pintegration-tests,${{ matrix.search-engine }} \ + -Dopensearch.port=${{ matrix.port }} \ + -Delasticsearch.port=${{ matrix.port }} - name: Archive code coverage logs uses: actions/upload-artifact@v3 if: false # UNOMI-746 Reactivate if necessary diff --git a/.gitignore b/.gitignore index 1d2e17afa..f8c30c07e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /NOTICE-generated .settings +.vscode target .classpath .project diff --git a/itests/pom.xml b/itests/pom.xml index b8414186c..5868eee0d 100644 --- a/itests/pom.xml +++ b/itests/pom.xml @@ -354,9 +354,12 @@ opensearch + + 9401 + - use.opensearch + use.opensearch @@ -372,6 +375,7 @@ foo opensearch + localhost:${opensearch.port} @@ -401,7 +405,7 @@ opensearch - 9400:9200 + ${opensearch.port}:9200 single-node @@ -417,12 +421,13 @@ - http://localhost:9400 + http://localhost:${opensearch.port} GET 200 + ${project.build.directory}/opensearch-port.properties diff --git a/itests/src/test/java/org/apache/unomi/itests/BaseIT.java b/itests/src/test/java/org/apache/unomi/itests/BaseIT.java index 497841b31..76de1cd23 100644 --- a/itests/src/test/java/org/apache/unomi/itests/BaseIT.java +++ b/itests/src/test/java/org/apache/unomi/itests/BaseIT.java @@ -307,10 +307,10 @@ public Option[] config() { editConfigurationFilePut("etc/system.properties", SEARCH_ENGINE_PROPERTY, System.getProperty(SEARCH_ENGINE_PROPERTY, SEARCH_ENGINE_ELASTICSEARCH)), editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.graphql.feature.activated", "true"), editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.elasticsearch.cluster.name", "contextElasticSearchITests"), - editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.elasticsearch.addresses", "localhost:9400"), + editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.elasticsearch.addresses", "localhost:" + getSearchPort()), editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.elasticsearch.taskWaitingPollingInterval", "50"), editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.opensearch.cluster.name", "contextElasticSearchITests"), - editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.opensearch.addresses", "localhost:9400"), + editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.opensearch.addresses", "localhost:" + getSearchPort()), editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.opensearch.username", "admin"), editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.opensearch.password", "Unomi.1ntegrat10n.Tests"), editConfigurationFilePut("etc/custom.system.properties", "org.apache.unomi.opensearch.sslEnable", "false"), @@ -686,4 +686,16 @@ public BasicCredentialsProvider getHttpClientCredentialProvider() { credsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(BASIC_AUTH_USER_NAME, BASIC_AUTH_PASSWORD)); return credsProvider; } + + protected static String getSearchPort() { + String searchEngine = System.getProperty(SEARCH_ENGINE_PROPERTY, SEARCH_ENGINE_ELASTICSEARCH); + if (SEARCH_ENGINE_OPENSEARCH.equals(searchEngine)) { + // For OpenSearch, get the port from the system property set by maven-failsafe-plugin + return System.getProperty("org.apache.unomi.opensearch.addresses", "localhost:9401") + .split(":")[1]; // Extract port number from "localhost:9401" + } else { + // For Elasticsearch, use the default port or system property if set + return System.getProperty("elasticsearch.port", "9400"); + } + } } diff --git a/manual/src/main/asciidoc/index.adoc b/manual/src/main/asciidoc/index.adoc index 49ada8ad2..eaffa08b0 100644 --- a/manual/src/main/asciidoc/index.adoc +++ b/manual/src/main/asciidoc/index.adoc @@ -60,6 +60,10 @@ include::graphql-examples.adoc[] include::migrations/migrations.adoc[] +include::migrations/migrate-1.4-to-1.5.adoc[leveloffset=+1] + +include::migrations/migrate-elasticsearch-to-opensearch.adoc[leveloffset=+1] + == Queries and aggregations include::queries-and-aggregations.adoc[] diff --git a/manual/src/main/asciidoc/migrations/migrate-elasticsearch-to-opensearch.adoc b/manual/src/main/asciidoc/migrations/migrate-elasticsearch-to-opensearch.adoc new file mode 100644 index 000000000..a728fb24f --- /dev/null +++ b/manual/src/main/asciidoc/migrations/migrate-elasticsearch-to-opensearch.adoc @@ -0,0 +1,212 @@ +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + += Migrating from Elasticsearch to OpenSearch +:toc: macro +:toclevels: 4 +:toc-title: Table of contents +:numbered: + +toc::[] + +== Overview + +This guide describes how to migrate your Apache Unomi data from Elasticsearch to OpenSearch. The migration process involves using the OpenSearch Replication Tool, which is designed to handle large-scale migrations efficiently while maintaining data consistency. + +== Prerequisites + +Before starting the migration, ensure you have: + +* Running Elasticsearch cluster with your Unomi data +* Target OpenSearch cluster set up and running +* Sufficient disk space on the target cluster +* Java 11 or later installed +* Network connectivity between source and target clusters + +== Migration Options + +=== Option 1: OpenSearch Replication Tool (Recommended) + +The OpenSearch Replication Tool is the recommended approach for production environments, especially for large datasets. + +==== Installation + +[source,bash] +---- +git clone https://github.com/opensearch-project/opensearch-migrations.git +cd opensearch-migrations/replication-tool +./gradlew build +---- + +==== Configuration + +Create a configuration file `config.yml`: + +[source,yaml] +---- +source: + hosts: ["source-elasticsearch-host:9200"] + user: "elastic_user" # if authentication is enabled + password: "elastic_pass" # if authentication is enabled + +destination: + hosts: ["target-opensearch-host:9200"] + user: "opensearch_user" # if authentication is enabled + password: "opensearch_pass" # if authentication is enabled + +indices: + - name: "context-*" # Unomi context indices + - name: "segment-*" # Unomi segment indices + - name: "profile-*" # Unomi profile indices + - name: "session-*" # Unomi session indices +---- + +==== Running the Migration + +[source,bash] +---- +./bin/replication-tool --config config.yml +---- + +The tool provides progress updates and ensures data consistency during the migration. + +=== Option 2: Logstash Pipeline + +For smaller deployments or when more control over the migration process is needed, you can use Logstash. + +==== Logstash Configuration + +Create a file named `logstash-migration.conf`: + +[source,ruby] +---- +input { + elasticsearch { + hosts => ["source-elasticsearch-host:9200"] + index => "context-*" # Repeat for other indices + size => 5000 + scroll => "5m" + docinfo => true + user => "elastic_user" # if authentication is enabled + password => "elastic_pass" # if authentication is enabled + } +} + +output { + opensearch { + hosts => ["target-opensearch-host:9200"] + index => "%{[@metadata][_index]}" + document_id => "%{[@metadata][_id]}" + user => "opensearch_user" # if authentication is enabled + password => "opensearch_pass" # if authentication is enabled + } +} +---- + +==== Running Logstash Migration + +[source,bash] +---- +logstash -f logstash-migration.conf +---- + +== Post-Migration Steps + +1. Verify Data Integrity ++ +[source,bash] +---- +# Check document counts +curl -X GET "source-elasticsearch-host:9200/_cat/indices/context-*?v" +curl -X GET "target-opensearch-host:9200/_cat/indices/context-*?v" +---- + +2. Update Unomi Configuration ++ +Edit `etc/custom.system.properties`: ++ +[source,properties] +---- +# Comment out or remove Elasticsearch properties +#org.apache.unomi.elasticsearch.addresses=localhost:9200 +#org.apache.unomi.elasticsearch.cluster.name=contextElasticSearch + +# Add OpenSearch properties +org.apache.unomi.opensearch.addresses=localhost:9200 +org.apache.unomi.opensearch.cluster.name=contextOpenSearch +org.apache.unomi.opensearch.sslEnable=false +org.apache.unomi.opensearch.username=admin +org.apache.unomi.opensearch.password=admin +---- + +3. Restart Apache Unomi ++ +[source,bash] +---- +./bin/stop +./bin/start +---- + +== Troubleshooting + +=== Common Issues + +1. Connection Timeouts +* Increase the timeout settings in your configuration +* Check network connectivity between clusters + +2. Memory Issues +* Adjust JVM heap size for the migration tool +* Consider reducing batch sizes + +3. Missing Indices +* Verify index patterns in configuration +* Check source cluster health + +=== Monitoring Progress + +The OpenSearch Replication Tool provides progress information during migration: + +* Documents copied +* Time elapsed +* Current transfer rate +* Estimated completion time + +== Best Practices + +1. *Testing* +* Always test the migration process in a non-production environment first +* Verify all Unomi features work with migrated data + +2. *Performance* +* Run migration during off-peak hours +* Monitor system resources during migration +* Use appropriate batch sizes based on document size + +3. *Backup* +* Create backups of your Elasticsearch indices before migration +* Keep source cluster running until verification is complete + +4. *Validation* +* Compare document counts between source and target +* Verify index mappings and settings +* Test Unomi functionality with migrated data + +== Support + +For additional support: + +* OpenSearch Replication Tool: https://github.com/opensearch-project/opensearch-migrations +* Apache Unomi Community: https://unomi.apache.org/community.html +* OpenSearch Forum: https://forum.opensearch.org/ \ No newline at end of file