From 5db3425ba3f24fbcdd214ccf0eeb8f99bacde6eb Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 5 Sep 2019 14:24:04 +0200 Subject: [PATCH 01/24] Split up Solr schema.xml. Moving all and for custom metadata block indexing into separate files for easier deployment and maintainability. See #6142 for more. --- conf/solr/7.3.1/schema.xml | 460 +---------------------- conf/solr/7.3.1/schema_dv_cmb_copies.xml | 229 +++++++++++ conf/solr/7.3.1/schema_dv_cmb_fields.xml | 229 +++++++++++ 3 files changed, 462 insertions(+), 456 deletions(-) create mode 100644 conf/solr/7.3.1/schema_dv_cmb_copies.xml create mode 100644 conf/solr/7.3.1/schema_dv_cmb_fields.xml diff --git a/conf/solr/7.3.1/schema.xml b/conf/solr/7.3.1/schema.xml index deabc789e2e..37a4a1eb3bc 100644 --- a/conf/solr/7.3.1/schema.xml +++ b/conf/solr/7.3.1/schema.xml @@ -220,236 +220,9 @@ + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + @@ -506,233 +279,8 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + @@ -279,7 +279,7 @@ - + diff --git a/conf/solr/7.3.1/schema_dv_cmb_copies.xml b/conf/solr/7.3.1/schema_dv_mdb_copies.xml similarity index 100% rename from conf/solr/7.3.1/schema_dv_cmb_copies.xml rename to conf/solr/7.3.1/schema_dv_mdb_copies.xml diff --git a/conf/solr/7.3.1/schema_dv_cmb_fields.xml b/conf/solr/7.3.1/schema_dv_mdb_fields.xml similarity index 100% rename from conf/solr/7.3.1/schema_dv_cmb_fields.xml rename to conf/solr/7.3.1/schema_dv_mdb_fields.xml diff --git a/conf/solr/7.3.1/updateSchemaMDB.sh b/conf/solr/7.3.1/updateSchemaMDB.sh index b89a5cf739c..bf7efbb0c97 100755 --- a/conf/solr/7.3.1/updateSchemaMDB.sh +++ b/conf/solr/7.3.1/updateSchemaMDB.sh @@ -65,15 +65,15 @@ curl -f -sS "${DATAVERSE_URL}/api/admin/index/solr/schema${UNBLOCK_KEY}" > $TMPF echo "Splitting up based on \"---\" marker" csplit -f"$TMPFILE" --suppress-matched -s $TMPFILE "/---/" '{*}' -echo "Writing ${TARGET}/schema_dv_cmb_fields.xml" -echo "" > ${TARGET}/schema_dv_cmb_fields.xml -cat ${TMPFILE}00 >> ${TARGET}/schema_dv_cmb_fields.xml -echo "" >> ${TARGET}/schema_dv_cmb_fields.xml +echo "Writing ${TARGET}/schema_dv_mdb_fields.xml" +echo "" > ${TARGET}/schema_dv_mdb_fields.xml +cat ${TMPFILE}00 >> ${TARGET}/schema_dv_mdb_fields.xml +echo "" >> ${TARGET}/schema_dv_mdb_fields.xml -echo "Writing ${TARGET}/schema_dv_cmb_copies.xml" -echo "" > ${TARGET}/schema_dv_cmb_copies.xml -cat ${TMPFILE}01 >> ${TARGET}/schema_dv_cmb_copies.xml -echo "" >> ${TARGET}/schema_dv_cmb_copies.xml +echo "Writing ${TARGET}/schema_dv_mdb_copies.xml" +echo "" > ${TARGET}/schema_dv_mdb_copies.xml +cat ${TMPFILE}01 >> ${TARGET}/schema_dv_mdb_copies.xml +echo "" >> ${TARGET}/schema_dv_mdb_copies.xml rm ${TMPFILE}* diff --git a/doc/sphinx-guides/source/developers/dev-environment.rst b/doc/sphinx-guides/source/developers/dev-environment.rst index e3b43b8d664..82b2f0bcc56 100755 --- a/doc/sphinx-guides/source/developers/dev-environment.rst +++ b/doc/sphinx-guides/source/developers/dev-environment.rst @@ -151,9 +151,9 @@ To install Solr, execute the following commands: ``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/7.3.1/schema.xml`` -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/7.3.1/schema_dv_cmb_fields.xml`` +``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/7.3.1/schema_dv_mdb_fields.xml`` -``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/7.3.1/schema_dv_cmb_copies.xml`` +``curl -O https://raw.githubusercontent.com/IQSS/dataverse/develop/conf/solr/7.3.1/schema_dv_mdb_copies.xml`` ``mv schema*.xml collection1/conf`` diff --git a/scripts/installer/Makefile b/scripts/installer/Makefile index ee87c311bcd..fda5c7cf3a6 100644 --- a/scripts/installer/Makefile +++ b/scripts/installer/Makefile @@ -6,7 +6,7 @@ API_SCRIPTS=${INSTALLER_ZIP_DIR}/setup-datasetfields.sh ${INSTALLER_ZIP_DIR}/set DB_SCRIPT=${INSTALLER_ZIP_DIR}/reference_data.sql JHOVE_CONFIG=${INSTALLER_ZIP_DIR}/jhove.conf JHOVE_SCHEMA=${INSTALLER_ZIP_DIR}/jhoveConfig.xsd -SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/schema_dv_cmb_fields.xml ${INSTALLER_ZIP_DIR}/schema_dv_cmb_copies.xml ${INSTALLER_ZIP_DIR}/updateSchemaMDB.sh +SOLR_SCHEMA=${INSTALLER_ZIP_DIR}/schema.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_fields.xml ${INSTALLER_ZIP_DIR}/schema_dv_mdb_copies.xml ${INSTALLER_ZIP_DIR}/updateSchemaMDB.sh SOLR_CONFIG=${INSTALLER_ZIP_DIR}/solrconfig.xml INSTALL_SCRIPT=${INSTALLER_ZIP_DIR}/install @@ -70,7 +70,7 @@ ${JHOVE_SCHEMA}: ../../conf/jhove/jhoveConfig.xsd @mkdir -p ${INSTALLER_ZIP_DIR} /bin/cp ../../conf/jhove/jhoveConfig.xsd ${INSTALLER_ZIP_DIR} -${SOLR_SCHEMA}: ../../conf/solr/7.3.1/schema.xml ../../conf/solr/7.3.1/schema_dv_cmb_fields.xml ../../conf/solr/7.3.1/schema_dv_cmb_copies.xml ../../conf/solr/7.3.1/updateSchemaMDB.sh +${SOLR_SCHEMA}: ../../conf/solr/7.3.1/schema.xml ../../conf/solr/7.3.1/schema_dv_mdb_fields.xml ../../conf/solr/7.3.1/schema_dv_mdb_copies.xml ../../conf/solr/7.3.1/updateSchemaMDB.sh @echo copying Solr schema file @mkdir -p ${INSTALLER_ZIP_DIR} /bin/cp ../../conf/solr/7.3.1/schema*.xml ../../conf/solr/7.3.1/updateSchemaMDB.sh ${INSTALLER_ZIP_DIR} From ecbaca87bdec9cd0ae0edaa98979bce93f02c60f Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Wed, 11 Sep 2019 15:57:22 +0200 Subject: [PATCH 19/24] Add release notes for Solr schema.xml separation. Relates to #6142. --- doc/release-notes/6142-flex-solr-schema.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 doc/release-notes/6142-flex-solr-schema.md diff --git a/doc/release-notes/6142-flex-solr-schema.md b/doc/release-notes/6142-flex-solr-schema.md new file mode 100644 index 00000000000..6f2d56eb3e2 --- /dev/null +++ b/doc/release-notes/6142-flex-solr-schema.md @@ -0,0 +1,17 @@ +### Upgrading: optionally reconfigure Solr + +With this release, we moved all fields in Solr search index that relate to the default metadata schemas to separate +files. Custom metadata block configuration of the search index is getting more flexible that way. For details, +see admin/metadatacustomization.html#updating-the-solr-schema. + +This is optional, but all future changes will go to these files. It might be a good idea to adapt now or be aware to +look for changes to these files in the future, too. + +**When you do want to benefit as of now:** + +1. You will need to replace or modify your `schema.xml` with the recent one (containing XML includes) +2. Copy the schema_dv_mdb_XXX.xml files to the same location as the `schema.xml` +3. A re-index is not necessary as long no other changes happened, as this is only a configuration moving. + +In case you use custom metadata blocks, you might find the new `updateSchemaMDB.sh` script beneficial. Again, +see admin/metadatacustomization.html#updating-the-solr-schema. \ No newline at end of file From 87a8e535f735fc1eb1c383af9e7f2406b766cfb9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 11 Sep 2019 11:03:55 -0400 Subject: [PATCH 20/24] tweak the release notes #6142 --- doc/release-notes/6142-flex-solr-schema.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/release-notes/6142-flex-solr-schema.md b/doc/release-notes/6142-flex-solr-schema.md index 6f2d56eb3e2..bc1c5fe3db5 100644 --- a/doc/release-notes/6142-flex-solr-schema.md +++ b/doc/release-notes/6142-flex-solr-schema.md @@ -1,17 +1,17 @@ ### Upgrading: optionally reconfigure Solr -With this release, we moved all fields in Solr search index that relate to the default metadata schemas to separate -files. Custom metadata block configuration of the search index is getting more flexible that way. For details, +With this release, we moved all fields in Solr search index that relate to the default metadata schemas from schema.xml to separate +files. Custom metadata block configuration of the search index can be more easily automated that way. For details, see admin/metadatacustomization.html#updating-the-solr-schema. -This is optional, but all future changes will go to these files. It might be a good idea to adapt now or be aware to +This is optional, but all future changes will go to these files. It might be a good idea to reconfigure Solr now or be aware to look for changes to these files in the future, too. **When you do want to benefit as of now:** 1. You will need to replace or modify your `schema.xml` with the recent one (containing XML includes) -2. Copy the schema_dv_mdb_XXX.xml files to the same location as the `schema.xml` -3. A re-index is not necessary as long no other changes happened, as this is only a configuration moving. +2. Copy the two schema_dv_mdb_XXX.xml files to the same location as the `schema.xml` +3. A re-index is not necessary as long no other changes happened, as this is only a reorganization of Solr fields from a single schema.xml file into multiple files. In case you use custom metadata blocks, you might find the new `updateSchemaMDB.sh` script beneficial. Again, -see admin/metadatacustomization.html#updating-the-solr-schema. \ No newline at end of file +see admin/metadatacustomization.html#updating-the-solr-schema. From d8ae3255a3dfc37321b1375c97c8b36b2963ffa9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 11 Sep 2019 16:12:19 -0400 Subject: [PATCH 21/24] enumerate files, fix formatting #6142 --- doc/release-notes/6142-flex-solr-schema.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/release-notes/6142-flex-solr-schema.md b/doc/release-notes/6142-flex-solr-schema.md index bc1c5fe3db5..24fe9c6a3f8 100644 --- a/doc/release-notes/6142-flex-solr-schema.md +++ b/doc/release-notes/6142-flex-solr-schema.md @@ -1,16 +1,14 @@ ### Upgrading: optionally reconfigure Solr -With this release, we moved all fields in Solr search index that relate to the default metadata schemas from schema.xml to separate +With this release, we moved all fields in Solr search index that relate to the default metadata schemas from `schema.xml` to separate files. Custom metadata block configuration of the search index can be more easily automated that way. For details, see admin/metadatacustomization.html#updating-the-solr-schema. This is optional, but all future changes will go to these files. It might be a good idea to reconfigure Solr now or be aware to -look for changes to these files in the future, too. - -**When you do want to benefit as of now:** +look for changes to these files in the future, too. Here's how: 1. You will need to replace or modify your `schema.xml` with the recent one (containing XML includes) -2. Copy the two schema_dv_mdb_XXX.xml files to the same location as the `schema.xml` +2. Copy `schema_dv_mdb_fields.xml` and `schema_dv_mdb_copies.xml` to the same location as the `schema.xml` 3. A re-index is not necessary as long no other changes happened, as this is only a reorganization of Solr fields from a single schema.xml file into multiple files. In case you use custom metadata blocks, you might find the new `updateSchemaMDB.sh` script beneficial. Again, From aebc850da6d30f7e57ebeedd24d40620accf9c00 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 11 Sep 2019 16:21:24 -0400 Subject: [PATCH 22/24] echo a *suggestion* to run updateSchemaMDB.sh #6142 --- scripts/api/setup-optional-harvard.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/api/setup-optional-harvard.sh b/scripts/api/setup-optional-harvard.sh index 21aaa93ba41..39ccbb310b6 100755 --- a/scripts/api/setup-optional-harvard.sh +++ b/scripts/api/setup-optional-harvard.sh @@ -55,6 +55,5 @@ curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @da curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/customCHIA.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/customDigaai.tsv -H "Content-type: text/tab-separated-values" curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @data/metadatablocks/custom_hbgdki.tsv -H "Content-type: text/tab-separated-values" -# setup/update local solr instance with custom metadata block fields -sudo -u solr ../../conf/solr/7.3.1/updateSchemaMDB.sh -t /usr/local/solr/server/solr/collection1/conf +echo "Because you have loaded custom metadata blocks, you need to update the include files pulled in by Solr's schema.xml. On the Solr server, you can try running the updateSchemaMDB.sh script mentioned in the Metadata Customization section of the Admin Guide or follow the manual steps listed there." echo From 7b2becd9cb53c0b6433fbb03f6c4a00abcae9cc9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 11 Sep 2019 16:34:51 -0400 Subject: [PATCH 23/24] explain updateSchemaMDB.sh must be run on Solr server #6142 --- doc/sphinx-guides/source/admin/metadatacustomization.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 5422cf9c7c5..1a41d329b3b 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -601,8 +601,7 @@ configuration, including any enabled metadata schemas: ``curl http://localhost:8080/api/admin/index/solr/schema`` -For convenience and automation you can download :download:`updateSchemaMDB.sh <../../../../conf/solr/7.3.1/updateSchemaMDB.sh>`. It downloads, parses and writes the schema -files before triggering a Solr reload. **You should run this as the Solr user, not root.** +For convenience and automation you can download and consider running :download:`updateSchemaMDB.sh <../../../../conf/solr/7.3.1/updateSchemaMDB.sh>`. It uses the API endpoint above and writes schema files to the filesystem (so be sure to run it on the Solr server itself as the Unix user who owns the Solr files) and then triggers a Solr reload. By default, it will download from Dataverse at `http://localhost:8080` and reload Solr at `http://localhost:8983`. You may use the following environment variables with this script or mix'n'match with options: From 863af4fc5e81a5ac5c1fecc31a1aafb1d9ff7cb7 Mon Sep 17 00:00:00 2001 From: Oliver Bertuch Date: Thu, 12 Sep 2019 00:38:11 +0200 Subject: [PATCH 24/24] Refactor update script to avoid csplit and use plain grep. Relates to #6142. --- conf/solr/7.3.1/updateSchemaMDB.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/conf/solr/7.3.1/updateSchemaMDB.sh b/conf/solr/7.3.1/updateSchemaMDB.sh index bf7efbb0c97..e4446083442 100755 --- a/conf/solr/7.3.1/updateSchemaMDB.sh +++ b/conf/solr/7.3.1/updateSchemaMDB.sh @@ -62,17 +62,14 @@ TMPFILE=`mktemp` curl -f -sS "${DATAVERSE_URL}/api/admin/index/solr/schema${UNBLOCK_KEY}" > $TMPFILE ### Processing -echo "Splitting up based on \"---\" marker" -csplit -f"$TMPFILE" --suppress-matched -s $TMPFILE "/---/" '{*}' - echo "Writing ${TARGET}/schema_dv_mdb_fields.xml" echo "" > ${TARGET}/schema_dv_mdb_fields.xml -cat ${TMPFILE}00 >> ${TARGET}/schema_dv_mdb_fields.xml +cat ${TMPFILE} | grep ".*> ${TARGET}/schema_dv_mdb_fields.xml echo "" >> ${TARGET}/schema_dv_mdb_fields.xml echo "Writing ${TARGET}/schema_dv_mdb_copies.xml" echo "" > ${TARGET}/schema_dv_mdb_copies.xml -cat ${TMPFILE}01 >> ${TARGET}/schema_dv_mdb_copies.xml +cat ${TMPFILE} | grep ".*> ${TARGET}/schema_dv_mdb_copies.xml echo "" >> ${TARGET}/schema_dv_mdb_copies.xml rm ${TMPFILE}*