From 9600343e088b98d54072691ba379bec84595ad8b Mon Sep 17 00:00:00 2001 From: David Pilato Date: Fri, 11 Feb 2022 00:50:04 +0100 Subject: [PATCH] Fix Workplace Search integration tests Also make sure that we don't run the delete existing documents as it's not supported yet by FSCrawler. --- .../thirdparty/wpsearch/WPSearchClient.java | 11 ++++---- docs/source/admin/fs/local-fs.rst | 5 ++++ .../fs/client/WorkplaceSearchClient.java | 2 +- .../AbstractWorkplaceSearchITCase.java | 2 +- .../workplacesearch/WPSearchClientIT.java | 22 ++++++++-------- .../src/test/resources/log4j2.xml | 11 +++++--- .../fs/settings/FsCrawlerValidator.java | 6 +++++ .../fs/settings/FsCrawlerValidatorTest.java | 25 +++++++++++++++++++ 8 files changed, 60 insertions(+), 24 deletions(-) diff --git a/3rdparty/workplacesearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/thirdparty/wpsearch/WPSearchClient.java b/3rdparty/workplacesearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/thirdparty/wpsearch/WPSearchClient.java index 0672300e2..486c332ff 100644 --- a/3rdparty/workplacesearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/thirdparty/wpsearch/WPSearchClient.java +++ b/3rdparty/workplacesearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/thirdparty/wpsearch/WPSearchClient.java @@ -85,6 +85,7 @@ public class WPSearchClient implements Closeable { private String sourceId; private final Path rootDir; private final Path jobMappingDir; + private String version; /** * Create a client @@ -191,7 +192,7 @@ public void start() { // We check that the service is available try { - String version = getVersion(); + version = getVersion(); logger.info("Wokplace Search Client connected to a service running version {}", version); } catch (Exception e) { logger.warn("failed to create workplace search client on {}, disabling crawler...", host); @@ -205,10 +206,9 @@ public void start() { * Configure the custom source for this client * @param id custom source id * @param name custom source name - * @param version workplace search server version * @throws IOException in case of communication error */ - public void configureCustomSource(final String id, final String name, String version) throws IOException { + public void configureCustomSource(final String id, final String name) throws IOException { checkStarted(); // Let's check that the source exists if (id != null) { @@ -224,7 +224,7 @@ public void configureCustomSource(final String id, final String name, String ver List customSourceIds = getCustomSourcesByName(name); if (customSourceIds.isEmpty()) { // Let's create a new source - sourceId = createCustomSource(name, version); + sourceId = createCustomSource(name); logger.debug("Custom source [{}] created with id [{}].", name, sourceId); } else { sourceId = customSourceIds.get(0); @@ -372,11 +372,10 @@ public String listAllCustomSources(int page) { /** * Create a custom source by using the built-in template * @param sourceName the source name to build - * @param version version of the workplace search server * @return the id of the source * @throws IOException in case something goes wrong */ - public String createCustomSource(String sourceName, String version) throws IOException { + public String createCustomSource(String sourceName) throws IOException { checkStarted(); // If needed, we create the new settings for this files index diff --git a/docs/source/admin/fs/local-fs.rst b/docs/source/admin/fs/local-fs.rst index 49b0701bc..39d411260 100644 --- a/docs/source/admin/fs/local-fs.rst +++ b/docs/source/admin/fs/local-fs.rst @@ -549,6 +549,11 @@ a directory, you can set ``remove_deleted`` to ``false`` (default to fs: remove_deleted: false +.. note:: + + Setting ``remove_deleted`` is forced to ``false`` when using the Workplace Search output (:ref:`wpsearch-settings`). + + Ignore content ^^^^^^^^^^^^^^ diff --git a/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/WorkplaceSearchClient.java b/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/WorkplaceSearchClient.java index 8acccd41b..dd307b215 100644 --- a/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/WorkplaceSearchClient.java +++ b/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/WorkplaceSearchClient.java @@ -75,7 +75,7 @@ public void start() throws IOException { sourceName = generateDefaultCustomSourceName(settings.getName()); } - wpSearchClient.configureCustomSource(settings.getWorkplaceSearch().getId(), sourceName, version); + wpSearchClient.configureCustomSource(settings.getWorkplaceSearch().getId(), sourceName); } catch (ServiceUnavailableException e) { logger.fatal("Can not connect to Workplace Search service. " + "Check that you have workplace search running at {}: {}", diff --git a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/workplacesearch/AbstractWorkplaceSearchITCase.java b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/workplacesearch/AbstractWorkplaceSearchITCase.java index a6436b8a8..9e509d2a7 100644 --- a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/workplacesearch/AbstractWorkplaceSearchITCase.java +++ b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/workplacesearch/AbstractWorkplaceSearchITCase.java @@ -221,7 +221,7 @@ protected static String initSource(String sourceName) throws Exception { try (WPSearchClient client = createClient()) { cleanExistingCustomSources(sourceName); // Let's create a new source - String customSourceId = client.createCustomSource(sourceName, null); + String customSourceId = client.createCustomSource(sourceName); assertThat(customSourceId, not(isEmptyOrNullString())); staticLogger.debug(" --> we will be using custom source {}.", customSourceId); diff --git a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/workplacesearch/WPSearchClientIT.java b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/workplacesearch/WPSearchClientIT.java index e7be17f8b..fd4771ef3 100644 --- a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/workplacesearch/WPSearchClientIT.java +++ b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/workplacesearch/WPSearchClientIT.java @@ -22,12 +22,10 @@ import com.carrotsearch.randomizedtesting.RandomizedTest; import com.jayway.jsonpath.JsonPath; import fr.pilato.elasticsearch.crawler.fs.client.ESSearchRequest; -import fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClientException; import fr.pilato.elasticsearch.crawler.fs.framework.JsonUtil; import fr.pilato.elasticsearch.crawler.fs.framework.TimeValue; import fr.pilato.elasticsearch.crawler.fs.thirdparty.wpsearch.WPSearchClient; import jakarta.ws.rs.ProcessingException; -import org.hamcrest.Matchers; import org.junit.Test; import java.net.ConnectException; @@ -77,7 +75,7 @@ public void testNonRunningService() { public void testGetSourceById() throws Exception { try (WPSearchClient client = createClient()) { // We first create a source so we can use it later. - String id = client.createCustomSource(sourceName, client.getVersion()); + String id = client.createCustomSource(sourceName); // This is what we want to test actually String source = client.getCustomSourceById(id); @@ -89,7 +87,7 @@ public void testGetSourceById() throws Exception { public void testGetSourceByName() throws Exception { try (WPSearchClient client = createClient()) { // We first create a source so we can use it later. - String id = client.createCustomSource(sourceName, client.getVersion()); + String id = client.createCustomSource(sourceName); // This is what we want to test actually List sourceIds = client.getCustomSourcesByName(sourceName); @@ -102,8 +100,8 @@ public void testGetSourceByName() throws Exception { public void testWithSomeFakeDocuments() throws Exception { try (WPSearchClient client = createClient()) { // We configure the custom source. - String customSourceId = client.createCustomSource(sourceName, client.getVersion()); - client.configureCustomSource(customSourceId, sourceName, client.getVersion()); + String customSourceId = client.createCustomSource(sourceName); + client.configureCustomSource(customSourceId, sourceName); // Index some documents client.indexDocument(fakeDocumentAsMap(RandomizedTest.randomAsciiLettersOfLength(10), "Foo", "EN", "foo", "Foo")); @@ -144,8 +142,8 @@ public void testWithSomeFakeDocuments() throws Exception { public void testGetDocument() throws Exception { try (WPSearchClient client = createClient()) { // We configure the custom source. - String customSourceId = client.createCustomSource(sourceName, client.getVersion()); - client.configureCustomSource(customSourceId, sourceName, client.getVersion()); + String customSourceId = client.createCustomSource(sourceName); + client.configureCustomSource(customSourceId, sourceName); String id = RandomizedTest.randomAsciiLettersOfLength(10); @@ -168,8 +166,8 @@ public void testGetDocument() throws Exception { public void testSearch() throws Exception { try (WPSearchClient client = createClient()) { // We first create a source so we can use it later. - String customSourceId = client.createCustomSource(sourceName, client.getVersion()); - client.configureCustomSource(customSourceId, sourceName, client.getVersion()); + String customSourceId = client.createCustomSource(sourceName); + client.configureCustomSource(customSourceId, sourceName); String uniqueId1 = RandomizedTest.randomAsciiLettersOfLength(10); { @@ -242,8 +240,8 @@ public void testSearch() throws Exception { public void testSendAndRemoveADocument() throws Exception { try (WPSearchClient client = createClient()) { // We first create a source so we can use it later. - String customSourceId = client.createCustomSource(sourceName, client.getVersion()); - client.configureCustomSource(customSourceId, sourceName, client.getVersion()); + String customSourceId = client.createCustomSource(sourceName); + client.configureCustomSource(customSourceId, sourceName); Map document = new HashMap<>(); document.put("id", "testSendAndRemoveADocument"); diff --git a/integration-tests/src/test/resources/log4j2.xml b/integration-tests/src/test/resources/log4j2.xml index f5b77d4db..3b2d36f22 100644 --- a/integration-tests/src/test/resources/log4j2.xml +++ b/integration-tests/src/test/resources/log4j2.xml @@ -15,19 +15,22 @@ + + + - + - + - + @@ -42,7 +45,7 @@ - + diff --git a/settings/src/main/java/fr/pilato/elasticsearch/crawler/fs/settings/FsCrawlerValidator.java b/settings/src/main/java/fr/pilato/elasticsearch/crawler/fs/settings/FsCrawlerValidator.java index eb661b30a..604537076 100644 --- a/settings/src/main/java/fr/pilato/elasticsearch/crawler/fs/settings/FsCrawlerValidator.java +++ b/settings/src/main/java/fr/pilato/elasticsearch/crawler/fs/settings/FsCrawlerValidator.java @@ -115,6 +115,12 @@ public static boolean validateSettings(Logger logger, FsSettings settings, boole settings.setRest(Rest.DEFAULT); } + if (settings.getWorkplaceSearch() != null && settings.getFs().isRemoveDeleted()) { + logger.warn("Workplace Search integration does not support removing existing documents. " + + "but fs.remove_deleted is set to true. We are forcing it to false."); + settings.getFs().setRemoveDeleted(false); + } + return false; } } diff --git a/settings/src/test/java/fr/pilato/elasticsearch/crawler/fs/settings/FsCrawlerValidatorTest.java b/settings/src/test/java/fr/pilato/elasticsearch/crawler/fs/settings/FsCrawlerValidatorTest.java index 70214a59a..a05aedc8c 100644 --- a/settings/src/test/java/fr/pilato/elasticsearch/crawler/fs/settings/FsCrawlerValidatorTest.java +++ b/settings/src/test/java/fr/pilato/elasticsearch/crawler/fs/settings/FsCrawlerValidatorTest.java @@ -88,6 +88,31 @@ public void testSettingsValidation() { assertThat(settings.getRest(), notNullValue()); } + @Test + public void testSettingsValidationWithWorkplaceSearch() { + // Checking default values + FsSettings settings = buildSettings(Fs.builder().build(), null); + settings.setWorkplaceSearch(WorkplaceSearch.builder().build()); + assertThat(settings.getFs().getUrl(), is(Fs.DEFAULT_DIR)); + + // Check that the default fs.remove_deleted is true + assertThat(settings.getFs().isRemoveDeleted(), is(true)); + + assertThat(FsCrawlerValidator.validateSettings(logger, settings, false), is(false)); + assertThat(settings.getFs().getUrl(), is(Fs.DEFAULT_DIR)); + assertThat(settings.getElasticsearch().getNodes(), hasItem(Elasticsearch.NODE_DEFAULT)); + assertThat(settings.getElasticsearch().getIndex(), is(getCurrentTestName())); + assertThat(settings.getElasticsearch().getIndexFolder(), is(getCurrentTestName() + INDEX_SUFFIX_FOLDER)); + assertThat(settings.getServer(), nullValue()); + assertThat(settings.getRest(), nullValue()); + assertThat(settings.getWorkplaceSearch(), notNullValue()); + assertThat(settings.getWorkplaceSearch().getServer(), is(WorkplaceSearch.DEFAULT_SERVER)); + assertThat(settings.getWorkplaceSearch().getUrlPrefix(), is(WorkplaceSearch.DEFAULT_URL_PREFIX)); + + // Check that after the validation of settings we have modified fs.remove_deleted to false + assertThat(settings.getFs().isRemoveDeleted(), is(false)); + } + private FsSettings buildSettings(Fs fs, Server server) { FsSettings.Builder settingsBuilder = FsSettings.builder(getCurrentTestName()); settingsBuilder.setFs(fs == null ? Fs.DEFAULT : fs);