ids) {
}
return lidvids;
}
- /**
- * FIXME: This is a hack to keep changes limited to harvest.
- * Replace the whole registry initiation with something different
- * when moving to multitenancy
- * @param xml2bean
- * @return
- */
- static public RegistryCfg exchangeRegistry (RegistryType xml) {
- RegistryCfg bean = new RegistryCfg();
- bean.authFile = xml.getAuth();
- bean.indexName = xml.getIndex();
- bean.url = xml.getServerUrl();
- return bean;
+ static public ConnectionFactory exchangeRegistry (RegistryType xml) throws Exception {
+ return EstablishConnectionFactory.from (xml.getValue(), xml.getAuth());
}
static public HarvestConfigurationType read(File file) throws JAXBException {
JAXBContext jaxbContext = new JAXBContextFactory().createContext(new Class[]{Harvest.class}, null);
diff --git a/src/main/java/gov/nasa/pds/harvest/cfg/Harvest.java b/src/main/java/gov/nasa/pds/harvest/cfg/Harvest.java
index b0ee7d6b..0c135eaa 100644
--- a/src/main/java/gov/nasa/pds/harvest/cfg/Harvest.java
+++ b/src/main/java/gov/nasa/pds/harvest/cfg/Harvest.java
@@ -14,6 +14,14 @@
/**
+ *
+ * This terrible construct is so that xjc can autodetect this as the
+ * root node for processing. Many things would be better but this is
+ * the most workable solution especially if the making of the binding
+ * code is automated in the pom. The only other real solution is to
+ * modify one of the classes generated by hand.
+ *
+ *
* Java class for anonymous complex type.
*
*
The following schema fragment specifies the expected content contained within this class.
diff --git a/src/main/java/gov/nasa/pds/harvest/cfg/HarvestConfigurationType.java b/src/main/java/gov/nasa/pds/harvest/cfg/HarvestConfigurationType.java
index 8d4c0483..716eb7d4 100644
--- a/src/main/java/gov/nasa/pds/harvest/cfg/HarvestConfigurationType.java
+++ b/src/main/java/gov/nasa/pds/harvest/cfg/HarvestConfigurationType.java
@@ -9,8 +9,8 @@
import jakarta.xml.bind.annotation.XmlAccessType;
import jakarta.xml.bind.annotation.XmlAccessorType;
-import jakarta.xml.bind.annotation.XmlAttribute;
import jakarta.xml.bind.annotation.XmlElement;
+import jakarta.xml.bind.annotation.XmlSchemaType;
import jakarta.xml.bind.annotation.XmlSeeAlso;
import jakarta.xml.bind.annotation.XmlType;
@@ -19,11 +19,11 @@
*
* These are the basic options for the harvest configuration file.
*
- * @nodeName: the PDS node that this harvest run applies to
*
* autogenFields: should not be used except in development testing
- * do: tells where and how to harvest PDS4 labels
+ * load: tells where and how to harvest PDS4 labels
* fileInfo: option allowing filename prefixes to be replaced
+ * nodeName: the PDS node that this harvest run applies to
* productFilter: should not be used except in development testing
* registry: define the server harvest should use
* xpathMaps: allow constraints in the PDS4 label to control harvesting
@@ -41,12 +41,12 @@
*
*
*
+ *
*
*
*
*
*
- *
*
*
*
@@ -67,13 +67,14 @@ public class HarvestConfigurationType {
@XmlElement(required = true)
protected LoadType load;
protected FileInfoType fileInfo;
+ @XmlElement(required = true)
+ @XmlSchemaType(name = "normalizedString")
+ protected NodeNameEnum nodeName;
protected FilterType productFilter;
protected ReferencesType references;
@XmlElement(required = true)
protected RegistryType registry;
protected XpathMapsType xpathMaps;
- @XmlAttribute(name = "nodeName", required = true)
- protected NodeNameEnum nodeName;
/**
* Gets the value of the autogenFields property.
@@ -147,6 +148,30 @@ public void setFileInfo(FileInfoType value) {
this.fileInfo = value;
}
+ /**
+ * Gets the value of the nodeName property.
+ *
+ * @return
+ * possible object is
+ * {@link NodeNameEnum }
+ *
+ */
+ public NodeNameEnum getNodeName() {
+ return nodeName;
+ }
+
+ /**
+ * Sets the value of the nodeName property.
+ *
+ * @param value
+ * allowed object is
+ * {@link NodeNameEnum }
+ *
+ */
+ public void setNodeName(NodeNameEnum value) {
+ this.nodeName = value;
+ }
+
/**
* Gets the value of the productFilter property.
*
@@ -243,28 +268,4 @@ public void setXpathMaps(XpathMapsType value) {
this.xpathMaps = value;
}
- /**
- * Gets the value of the nodeName property.
- *
- * @return
- * possible object is
- * {@link NodeNameEnum }
- *
- */
- public NodeNameEnum getNodeName() {
- return nodeName;
- }
-
- /**
- * Sets the value of the nodeName property.
- *
- * @param value
- * allowed object is
- * {@link NodeNameEnum }
- *
- */
- public void setNodeName(NodeNameEnum value) {
- this.nodeName = value;
- }
-
}
diff --git a/src/main/java/gov/nasa/pds/harvest/cfg/RegistryType.java b/src/main/java/gov/nasa/pds/harvest/cfg/RegistryType.java
index 732be31a..ebd10c76 100644
--- a/src/main/java/gov/nasa/pds/harvest/cfg/RegistryType.java
+++ b/src/main/java/gov/nasa/pds/harvest/cfg/RegistryType.java
@@ -10,9 +10,9 @@
import jakarta.xml.bind.annotation.XmlAccessType;
import jakarta.xml.bind.annotation.XmlAccessorType;
import jakarta.xml.bind.annotation.XmlAttribute;
-import jakarta.xml.bind.annotation.XmlElement;
import jakarta.xml.bind.annotation.XmlSchemaType;
import jakarta.xml.bind.annotation.XmlType;
+import jakarta.xml.bind.annotation.XmlValue;
import jakarta.xml.bind.annotation.adapters.NormalizedStringAdapter;
import jakarta.xml.bind.annotation.adapters.XmlJavaTypeAdapter;
@@ -20,15 +20,12 @@
/**
*
* Define the connection to the registry, security for the connection, and
- * the index within the registry.
+ * the index within the registry. The value of this tag is a pointer to
+ * a registry connection like app://known/direct/localhost.xml or
+ * app://known/cognito/first_test.xml
*
* @auth: a java property file containing a username and password
- * @index: the index to be used by harvest whose default is registry
- * @trust_self_signed: all self signed certificates for https
- *
- * cognito_client_id: the cognito client ID for AWS based instances of opensearch
- * server_url: the opensearch URL when not using AWS services
- *
+ *
*
*
Java class for registry_type complex type.
*
@@ -36,17 +33,11 @@
*
*
{@code
*
- *
- *
- *
- *
- *
- *
+ *
+ *
*
- *
- *
- *
- *
+ *
+ *
*
* }
*
@@ -54,76 +45,41 @@
*/
@XmlAccessorType(XmlAccessType.FIELD)
@XmlType(name = "registry_type", propOrder = {
- "cognitoClientId",
- "serverUrl"
+ "value"
})
public class RegistryType {
- @XmlElement(name = "cognito_client_id")
- @XmlJavaTypeAdapter(NormalizedStringAdapter.class)
- @XmlSchemaType(name = "normalizedString")
- protected String cognitoClientId;
- @XmlElement(name = "server_url")
+ @XmlValue
@XmlJavaTypeAdapter(NormalizedStringAdapter.class)
@XmlSchemaType(name = "normalizedString")
- protected String serverUrl;
+ protected String value;
@XmlAttribute(name = "auth", required = true)
@XmlJavaTypeAdapter(NormalizedStringAdapter.class)
@XmlSchemaType(name = "normalizedString")
protected String auth;
- @XmlAttribute(name = "index")
- @XmlJavaTypeAdapter(NormalizedStringAdapter.class)
- @XmlSchemaType(name = "normalizedString")
- protected String index;
- @XmlAttribute(name = "trust_self_signed")
- protected Boolean trustSelfSigned;
/**
- * Gets the value of the cognitoClientId property.
+ * Gets the value of the value property.
*
* @return
* possible object is
* {@link String }
*
*/
- public String getCognitoClientId() {
- return cognitoClientId;
+ public String getValue() {
+ return value;
}
/**
- * Sets the value of the cognitoClientId property.
+ * Sets the value of the value property.
*
* @param value
* allowed object is
* {@link String }
*
*/
- public void setCognitoClientId(String value) {
- this.cognitoClientId = value;
- }
-
- /**
- * Gets the value of the serverUrl property.
- *
- * @return
- * possible object is
- * {@link String }
- *
- */
- public String getServerUrl() {
- return serverUrl;
- }
-
- /**
- * Sets the value of the serverUrl property.
- *
- * @param value
- * allowed object is
- * {@link String }
- *
- */
- public void setServerUrl(String value) {
- this.serverUrl = value;
+ public void setValue(String value) {
+ this.value = value;
}
/**
@@ -150,60 +106,4 @@ public void setAuth(String value) {
this.auth = value;
}
- /**
- * Gets the value of the index property.
- *
- * @return
- * possible object is
- * {@link String }
- *
- */
- public String getIndex() {
- if (index == null) {
- return "registry";
- } else {
- return index;
- }
- }
-
- /**
- * Sets the value of the index property.
- *
- * @param value
- * allowed object is
- * {@link String }
- *
- */
- public void setIndex(String value) {
- this.index = value;
- }
-
- /**
- * Gets the value of the trustSelfSigned property.
- *
- * @return
- * possible object is
- * {@link Boolean }
- *
- */
- public boolean isTrustSelfSigned() {
- if (trustSelfSigned == null) {
- return false;
- } else {
- return trustSelfSigned;
- }
- }
-
- /**
- * Sets the value of the trustSelfSigned property.
- *
- * @param value
- * allowed object is
- * {@link Boolean }
- *
- */
- public void setTrustSelfSigned(Boolean value) {
- this.trustSelfSigned = value;
- }
-
}
diff --git a/src/main/java/gov/nasa/pds/harvest/crawler/CollectionProcessor.java b/src/main/java/gov/nasa/pds/harvest/crawler/CollectionProcessor.java
index 9beeeaa6..0dd548bf 100644
--- a/src/main/java/gov/nasa/pds/harvest/crawler/CollectionProcessor.java
+++ b/src/main/java/gov/nasa/pds/harvest/crawler/CollectionProcessor.java
@@ -19,7 +19,6 @@
import gov.nasa.pds.harvest.dao.RegistryDao;
import gov.nasa.pds.harvest.dao.RegistryManager;
import gov.nasa.pds.harvest.util.xml.XmlIs;
-import gov.nasa.pds.registry.common.cfg.RegistryCfg;
import gov.nasa.pds.registry.common.es.service.CollectionInventoryWriter;
import gov.nasa.pds.registry.common.meta.CollectionMetadataExtractor;
import gov.nasa.pds.registry.common.meta.Metadata;
@@ -59,12 +58,7 @@ public class CollectionProcessor extends BaseProcessor
public CollectionProcessor(HarvestConfigurationType config) throws Exception
{
super(config);
- // FIXME: multitenancy
- RegistryCfg fixme = new RegistryCfg();
- fixme.url = config.getRegistry().getServerUrl();
- fixme.indexName = config.getRegistry().getIndex();
- fixme.authFile = config.getRegistry().getAuth();
- invWriter = new CollectionInventoryWriter(fixme);
+ invWriter = new CollectionInventoryWriter(ConfigManager.exchangeRegistry(config.getRegistry()));
this.invProc = new CollectionInventoryProcessor(config.getReferences().isPrimaryOnly());
collectionExtractor = new CollectionMetadataExtractor();
}
@@ -137,7 +131,6 @@ private void processMetadata(File file, Document doc, BundleType bCfg) throws Ex
meta.setNodeName(config.getNodeName().toString());
// Collection filter
- // FIXME: this needs to be changed and may relate to other fixme in this file
List lids = ConfigManager.exchangeLids (bCfg.getCollection());
List lidvids = ConfigManager.exchangeLidvids (bCfg.getCollection());
if(!lids.isEmpty() && !lids.contains(meta.lid)) return;
diff --git a/src/main/java/gov/nasa/pds/harvest/dao/EsRequestBuilder.java b/src/main/java/gov/nasa/pds/harvest/dao/EsRequestBuilder.java
deleted file mode 100644
index 4348d271..00000000
--- a/src/main/java/gov/nasa/pds/harvest/dao/EsRequestBuilder.java
+++ /dev/null
@@ -1,96 +0,0 @@
-package gov.nasa.pds.harvest.dao;
-
-import java.io.IOException;
-import java.io.StringWriter;
-import java.io.Writer;
-import java.util.Collection;
-
-import com.google.gson.stream.JsonWriter;
-import gov.nasa.pds.harvest.exception.InvalidPDS4ProductException;
-
-
-/**
- * Elasticsearch request / query builder.
- *
- * @author karpenko
- */
-public class EsRequestBuilder
-{
- private boolean pretty;
-
-
- /**
- * Constructor.
- * @param pretty Generate pretty-formatted JSON
- */
- public EsRequestBuilder(boolean pretty)
- {
- this.pretty = pretty;
- }
-
-
- /**
- * Construcotr
- */
- public EsRequestBuilder()
- {
- this(false);
- }
-
-
- private JsonWriter createJsonWriter(Writer writer)
- {
- JsonWriter jw = new JsonWriter(writer);
- if (pretty)
- {
- jw.setIndent(" ");
- }
-
- return jw;
- }
-
-
- /**
- * Create Elasticsearch query to search for product IDs (lidvids)
- * @param ids Collection of product IDs (lidvids)
- * @param pageSize Number of records to return. Usually pageSize = ids.size().
- * @return JSON Elasticsearch request
- * @throws InvalidPDS4ProductException Invalid PDS4 Product Exception
- * @throws IOException IOException
- */
- public String createSearchIdsRequest(Collection ids, int pageSize) throws InvalidPDS4ProductException, IOException {
- if(ids == null || ids.isEmpty()) throw new InvalidPDS4ProductException("Error reading bundle/collection references. " +
- "Verify the bundle/collection is valid prior to loading the data.");
-
- StringWriter out = new StringWriter();
- JsonWriter writer = createJsonWriter(out);
-
- // Create ids query
- writer.beginObject();
-
- // Exclude source from response
- writer.name("_source").value(false);
- writer.name("size").value(pageSize);
-
- writer.name("query");
- writer.beginObject();
- writer.name("ids");
- writer.beginObject();
-
- writer.name("values");
- writer.beginArray();
- for(String id: ids)
- {
- writer.value(id);
- }
- writer.endArray();
-
- writer.endObject();
- writer.endObject();
- writer.endObject();
-
- writer.close();
- return out.toString();
- }
-
-}
diff --git a/src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java b/src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java
index a777bdf9..b0700c22 100644
--- a/src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java
+++ b/src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java
@@ -12,7 +12,7 @@
import gov.nasa.pds.harvest.crawler.Counter;
import gov.nasa.pds.harvest.util.PackageIdGenerator;
-import gov.nasa.pds.registry.common.cfg.RegistryCfg;
+import gov.nasa.pds.registry.common.ConnectionFactory;
import gov.nasa.pds.registry.common.es.dao.DataLoader;
import gov.nasa.pds.registry.common.meta.Metadata;
@@ -22,7 +22,7 @@ public class MetadataWriter implements Closeable
private final static String WARN_SKIP_PRE = "Skipping registered product ";
private final static String WARN_SKIP_POST = " (LIDVID/LID already exists in registry database)";
private final static int ES_DOC_BATCH_SIZE = 50;
-
+ private final ConnectionFactory conFact;
private Logger log;
private RegistryDao registryDao;
@@ -40,10 +40,11 @@ public class MetadataWriter implements Closeable
* @param cfg registry configuration
* @throws Exception an exception
*/
- public MetadataWriter(RegistryCfg cfg, RegistryDao dao, Counter counter) throws Exception
+ public MetadataWriter(ConnectionFactory conFact, RegistryDao dao, Counter counter) throws Exception
{
+ this.conFact = conFact;
log = LogManager.getLogger(this.getClass());
- loader = new DataLoader(cfg.url, cfg.indexName, cfg.authFile);
+ loader = new DataLoader(conFact);
docBatch = new RegistryDocBatch();
jobId = PackageIdGenerator.getInstance().getPackageId();
@@ -60,7 +61,7 @@ public void setOverwriteExisting(boolean b)
public void write(Metadata meta) throws Exception
{
- docBatch.write(meta, jobId);
+ docBatch.write(this.conFact, meta, jobId);
if(docBatch.size() % ES_DOC_BATCH_SIZE == 0)
{
diff --git a/src/main/java/gov/nasa/pds/harvest/dao/NonExistingIdsResponse.java b/src/main/java/gov/nasa/pds/harvest/dao/NonExistingIdsResponse.java
deleted file mode 100644
index 450d2990..00000000
--- a/src/main/java/gov/nasa/pds/harvest/dao/NonExistingIdsResponse.java
+++ /dev/null
@@ -1,50 +0,0 @@
-package gov.nasa.pds.harvest.dao;
-
-import java.util.Collection;
-import java.util.Set;
-import java.util.TreeSet;
-
-import gov.nasa.pds.registry.common.es.client.SearchResponseParser;
-
-
-/**
- * Helper class to process Elasticsearch response from "search IDs" query.
- *
- * @author karpenko
- */
-public class NonExistingIdsResponse implements SearchResponseParser.Callback
-{
- private Set retIds;
-
-
- /**
- * Constructor
- * @param ids Product IDs (lidvids) sent to Elasticsearch in "search IDs" query.
- * IDs are copied to internal collection.
- * After processing Elasticsearch response, all IDs existing in Elasticsearch
- * "registry" index will be removed from this internal collection.
- */
- public NonExistingIdsResponse(Collection ids)
- {
- retIds = new TreeSet<>(ids);
- }
-
- /**
- * Return collection of product IDs (lidvids) non-existing in Elasticsearch.
- * @return a collection of product IDs (lidvids)
- */
- public Set getIds()
- {
- return retIds;
- }
-
-
- /**
- * This method is called for each record in Elasticsearch response
- */
- @Override
- public void onRecord(String id, Object src) throws Exception
- {
- retIds.remove(id);
- }
-}
diff --git a/src/main/java/gov/nasa/pds/harvest/dao/RegistryDao.java b/src/main/java/gov/nasa/pds/harvest/dao/RegistryDao.java
index 1fe32f07..892c494a 100644
--- a/src/main/java/gov/nasa/pds/harvest/dao/RegistryDao.java
+++ b/src/main/java/gov/nasa/pds/harvest/dao/RegistryDao.java
@@ -5,12 +5,10 @@
import java.util.HashSet;
import java.util.List;
import java.util.Set;
-
-import org.elasticsearch.client.Request;
-import org.elasticsearch.client.Response;
-import org.elasticsearch.client.RestClient;
-
-import gov.nasa.pds.registry.common.es.client.SearchResponseParser;
+import gov.nasa.pds.harvest.exception.InvalidPDS4ProductException;
+import gov.nasa.pds.registry.common.Request;
+import gov.nasa.pds.registry.common.Response;
+import gov.nasa.pds.registry.common.RestClient;
/**
@@ -24,10 +22,6 @@ public class RegistryDao
private String indexName;
private boolean pretty;
- private EsRequestBuilder requestBld;
- private SearchResponseParser parser;
-
-
/**
* Constructor
* @param client Elasticsearch client
@@ -50,9 +44,6 @@ public RegistryDao(RestClient client, String indexName, boolean pretty)
this.client = client;
this.indexName = indexName;
this.pretty = pretty;
-
- requestBld = new EsRequestBuilder();
- parser = new SearchResponseParser();
}
@@ -82,25 +73,20 @@ public boolean idExists(String id) throws Exception
public Set getNonExistingIds(Collection ids) throws Exception
{
if(ids == null || ids.isEmpty()) return new HashSet<>();
- Response resp = searchIds(ids);
-
- NonExistingIdsResponse idsResp = new NonExistingIdsResponse(ids);
- parser.parseResponse(resp, idsResp);
-
- return idsResp.getIds();
+ return searchIds(ids).nonExistingIds(ids);
}
- private Response searchIds(Collection ids) throws Exception
+ private Response.Search searchIds(Collection ids) throws Exception
{
- String json = requestBld.createSearchIdsRequest(ids, ids.size());
-
- String reqUrl = "/" + indexName + "/_search";
- if(pretty) reqUrl += "?pretty";
-
- Request req = new Request("GET", reqUrl);
- req.setJsonEntity(json);
- Response resp = client.performRequest(req);
+ if(ids == null || ids.isEmpty()) throw new InvalidPDS4ProductException("Error reading bundle/collection references. " +
+ "Verify the bundle/collection is valid prior to loading the data.");
+
+ Request.Search req = client.createSearchRequest()
+ .buildTheseIds(ids)
+ .setIndex(this.indexName)
+ .setPretty(pretty);
+ Response.Search resp = client.performRequest(req);
return resp;
}
diff --git a/src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java b/src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java
index fa48e31e..48b3c30b 100644
--- a/src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java
+++ b/src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java
@@ -1,9 +1,14 @@
package gov.nasa.pds.harvest.dao;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
-
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import gov.nasa.pds.registry.common.ConnectionFactory;
+import gov.nasa.pds.registry.common.RestClient;
import gov.nasa.pds.registry.common.meta.Metadata;
+import gov.nasa.pds.registry.common.util.Tuple;
import gov.nasa.pds.registry.common.util.json.RegistryDocBuilder;
/**
@@ -20,7 +25,8 @@ public static class NJsonItem
public String dataJson; // Data JSON (line 2)
}
-
+ final private static HashSet alreadyLearned = new HashSet();
+ final private Logger log = LogManager.getLogger(RegistryDocBatch.class);
private List items;
@@ -32,15 +38,43 @@ public RegistryDocBatch()
items = new ArrayList<>();
}
-
- public void write(Metadata meta, String jobId) throws Exception
+ /* hack for PDS-NASA/harvest#127
+ * search the JSON string for any ref_lid_ and add if necessary to index so that it is searchable
+ */
+ private void updateIndex(ConnectionFactory conFact, String json) {
+ int begin_index = json.indexOf("ref_lid_"), end_index;
+ String name;
+ if (-1 < begin_index && alreadyLearned.isEmpty()) {
+ try (RestClient client = conFact.createRestClient()) {
+ alreadyLearned.addAll(client.performRequest(client.createMappingRequest().setIndex(conFact.getIndexName())).fieldNames());
+ } catch (Exception e) {
+ log.error("Unexpected error (should not have made it here) while getting index " + conFact.getIndexName(),e);
+ }
+ }
+ while (-1 < begin_index) {
+ end_index = json.indexOf('"', begin_index+5);
+ name = json.substring(begin_index, end_index);
+ if (!alreadyLearned.contains(name)) {
+ try (RestClient client = conFact.createRestClient()) {
+ ArrayList new_item = new ArrayList();
+ new_item.add(new Tuple(name, "keyword"));
+ client.performRequest(client.createMappingRequest().setIndex(conFact.getIndexName()).buildUpdateFieldSchema(new_item));
+ alreadyLearned.add(name);
+ } catch (Exception e) {
+ log.error("Unexpected error (should not have made it here) while updating index with " + name,e);
+ }
+ }
+ begin_index = json.indexOf("ref_lid_", end_index);
+ }
+ }
+ public void write(ConnectionFactory conFact, Metadata meta, String jobId) throws Exception
{
NJsonItem item = new NJsonItem();
item.lidvid = meta.lidvid;
item.prodClass = meta.prodClass;
item.pkJson = RegistryDocBuilder.createPKJson(meta);
item.dataJson = RegistryDocBuilder.createDataJson(meta, jobId);
-
+ this.updateIndex(conFact, item.dataJson);
items.add(item);
}
diff --git a/src/main/java/gov/nasa/pds/harvest/dao/RegistryManager.java b/src/main/java/gov/nasa/pds/harvest/dao/RegistryManager.java
index d770348e..efeaef13 100644
--- a/src/main/java/gov/nasa/pds/harvest/dao/RegistryManager.java
+++ b/src/main/java/gov/nasa/pds/harvest/dao/RegistryManager.java
@@ -2,12 +2,11 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
-import org.elasticsearch.client.RestClient;
import gov.nasa.pds.harvest.crawler.Counter;
import gov.nasa.pds.harvest.util.log.LogUtils;
-import gov.nasa.pds.registry.common.cfg.RegistryCfg;
-import gov.nasa.pds.registry.common.es.client.EsClientFactory;
+import gov.nasa.pds.registry.common.ConnectionFactory;
+import gov.nasa.pds.registry.common.RestClient;
import gov.nasa.pds.registry.common.util.CloseUtils;
import gov.nasa.pds.registry.common.es.dao.dd.DataDictionaryDao;
import gov.nasa.pds.registry.common.es.dao.schema.SchemaDao;
@@ -27,10 +26,10 @@ public class RegistryManager
{
private static RegistryManager instance = null;
- private RegistryCfg cfg;
+ private ConnectionFactory conFact;
private boolean overwriteFlag;
- private RestClient esClient;
+ private RestClient client;
private RegistryDao registryDao;
private SchemaDao schemaDao;
@@ -49,30 +48,25 @@ public class RegistryManager
* @param cfg Registry (Elasticsearch) configuration parameters.
* @throws Exception Generic exception
*/
- private RegistryManager(RegistryCfg cfg, boolean overwriteFlag) throws Exception
+ private RegistryManager(ConnectionFactory conFact, boolean overwriteFlag) throws Exception
{
- this.cfg = cfg;
+ this.conFact = conFact;
this.overwriteFlag = overwriteFlag;
this.counter = new Counter();
-
Logger log = LogManager.getLogger(this.getClass());
- log.log(LogUtils.LEVEL_SUMMARY, "Elasticsearch URL: " + cfg.url + ", index: " + cfg.indexName);
-
- esClient = EsClientFactory.createRestClient(cfg.url, cfg.authFile);
-
- String indexName = cfg.indexName;
- if(indexName == null || indexName.isEmpty()) indexName = "registry";
-
- registryDao = new RegistryDao(esClient, indexName);
- schemaDao = new SchemaDao(esClient, indexName);
- ddDao = new DataDictionaryDao(esClient, indexName);
-
+ log.log(LogUtils.LEVEL_SUMMARY, "Connection: " + conFact);
+ client = conFact.createRestClient();
+ registryDao = new RegistryDao(client, conFact.getIndexName());
+ schemaDao = new SchemaDao(client, conFact.getIndexName());
+ ddDao = new DataDictionaryDao(client, conFact.getIndexName());
fieldNameCache = new FieldNameCache(ddDao, schemaDao);
-
- registryWriter = new MetadataWriter(cfg, registryDao, counter);
+ registryWriter = new MetadataWriter(conFact, registryDao, counter);
registryWriter.setOverwriteExisting(overwriteFlag);
+ invWriter = new CollectionInventoryWriter(conFact);
- invWriter = new CollectionInventoryWriter(cfg);
+ if (!this.client.exists(this.conFact.getIndexName())) {
+ throw new RuntimeException("The index '" + this.conFact.getIndexName() + "' does not exist. Please create it first.");
+ }
}
@@ -82,13 +76,9 @@ private RegistryManager(RegistryCfg cfg, boolean overwriteFlag) throws Exception
* @param overwriteFlag overwrite registered products
* @throws Exception Generic exception
*/
- public static void init(RegistryCfg cfg, boolean overwriteFlag) throws Exception
+ public static void init(ConnectionFactory conFact, boolean overwriteFlag) throws Exception
{
- // Registry is not configured. Run Harvest without Registry.
- if(cfg == null) throw new IllegalArgumentException("Registry is not configuraed.");
- if(cfg.url == null || cfg.url.isEmpty()) throw new IllegalArgumentException("Missing Registry URL");
-
- instance = new RegistryManager(cfg, overwriteFlag);
+ instance = new RegistryManager(conFact, overwriteFlag);
}
@@ -100,7 +90,7 @@ public static void destroy()
if(instance == null) return;
CloseUtils.close(instance.registryWriter);
- CloseUtils.close(instance.esClient);
+ CloseUtils.close(instance.client);
instance = null;
}
@@ -116,16 +106,6 @@ public static RegistryManager getInstance()
}
- /**
- * Get registry configuration.
- * @return Registry configuration
- */
- public RegistryCfg getRegistryConfiguration()
- {
- return cfg;
- }
-
-
/**
* Get overwrite flag
* @return if true, overwrite already registered documents
@@ -183,7 +163,7 @@ public FieldNameCache getFieldNameCache()
*/
public MissingFieldsProcessor createMissingFieldsProcessor() throws Exception
{
- SchemaUpdater su = new SchemaUpdater(cfg, ddDao, schemaDao);
+ SchemaUpdater su = new SchemaUpdater(conFact, ddDao, schemaDao);
return new MissingFieldsProcessor(su, fieldNameCache);
}
diff --git a/src/main/resources/conf/configuration.xsd b/src/main/resources/conf/configuration.xsd
index 910ea1f8..ece01b01 100644
--- a/src/main/resources/conf/configuration.xsd
+++ b/src/main/resources/conf/configuration.xsd
@@ -177,11 +177,11 @@
These are the basic options for the harvest configuration file.
- @nodeName: the PDS node that this harvest run applies to
autogenFields: should not be used except in development testing
- do: tells where and how to harvest PDS4 labels
+ load: tells where and how to harvest PDS4 labels
fileInfo: option allowing filename prefixes to be replaced
+ nodeName: the PDS node that this harvest run applies to
productFilter: should not be used except in development testing
registry: define the server harvest should use
xpathMaps: allow constraints in the PDS4 label to control harvesting
@@ -192,12 +192,12 @@
name="autogenFields" type="autogen_fields_type"/>
+
-
@@ -236,23 +236,18 @@
Define the connection to the registry, security for the connection, and
- the index within the registry.
+ the index within the registry. The value of this tag is a pointer to
+ a registry connection like app://known/direct/localhost.xml or
+ app://known/cognito/first_test.xml
@auth: a java property file containing a username and password
- @index: the index to be used by harvest whose default is registry
- @trust_self_signed: all self signed certificates for https
-
- cognito_client_id: the cognito client ID for AWS based instances of opensearch
- server_url: the opensearch URL when not using AWS services
-
+
-
-
-
-
-
-
-
+
+
+
+
+
diff --git a/src/main/resources/conf/examples/bundles.xml b/src/main/resources/conf/examples/bundles.xml
index 9dde2331..959216a7 100644
--- a/src/main/resources/conf/examples/bundles.xml
+++ b/src/main/resources/conf/examples/bundles.xml
@@ -17,13 +17,13 @@
* PSA - Planetary Science Archive
* JAXA - Japan Aerospace Exploration Agency
-->
-
-
+
+ CHANGE_ME
-
- http://localhost:9200
-
+ app://localhost.xml
-
+
+ CHANGE_ME
-
- http://localhost:9200
-
+ app://localhost.xml
diff --git a/src/main/resources/conf/examples/files.xml b/src/main/resources/conf/examples/files.xml
index 758d2f29..854d9789 100644
--- a/src/main/resources/conf/examples/files.xml
+++ b/src/main/resources/conf/examples/files.xml
@@ -14,13 +14,14 @@
* PSA - Planetary Science Archive
* JAXA - Japan Aerospace Exploration Agency
-->
-
+
+ CHANGE_ME
-
- http://localhost:9200
-
+ app://localhost.xml
diff --git a/src/main/resources/conf/examples/xpaths.xml b/src/main/resources/conf/examples/xpaths.xml
index 6637bfbb..bd43895a 100644
--- a/src/main/resources/conf/examples/xpaths.xml
+++ b/src/main/resources/conf/examples/xpaths.xml
@@ -1,6 +1,9 @@
-
+
+ CHANGE_ME
@@ -22,8 +25,6 @@
-
- jdoawierllksjdfkla
-
+ app://cognito.xml
diff --git a/src/test/java/tt/es/TestRegistryDAO.java b/src/test/java/tt/es/TestRegistryDAO.java
index b09e7cbb..3a82b947 100644
--- a/src/test/java/tt/es/TestRegistryDAO.java
+++ b/src/test/java/tt/es/TestRegistryDAO.java
@@ -6,7 +6,7 @@
import gov.nasa.pds.harvest.dao.RegistryDao;
import gov.nasa.pds.harvest.dao.RegistryManager;
-import gov.nasa.pds.registry.common.cfg.RegistryCfg;
+import gov.nasa.pds.registry.common.ConnectionFactory;
public class TestRegistryDAO
@@ -17,10 +17,9 @@ public class TestRegistryDAO
public static void main(String[] args) throws Exception
{
- RegistryCfg cfg = new RegistryCfg();
- cfg.url = "http://localhost:9200";
+ ConnectionFactory conFact = null;//"app:/connections/direct/localhost.xml";
- RegistryManager.init(cfg, true);
+ RegistryManager.init(conFact, true);
try
{
diff --git a/src/test/java/tt/es/TestRequestBuilder.java b/src/test/java/tt/es/TestRequestBuilder.java
deleted file mode 100644
index ea93e8ba..00000000
--- a/src/test/java/tt/es/TestRequestBuilder.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package tt.es;
-
-import java.util.Set;
-import java.util.TreeSet;
-
-import gov.nasa.pds.harvest.dao.EsRequestBuilder;
-
-public class TestRequestBuilder
-{
-
- public static void main(String[] args) throws Exception
- {
- EsRequestBuilder bld = new EsRequestBuilder(true);
-
- Set ids = new TreeSet<>();
- ids.add("id123");
-
- String json = bld.createSearchIdsRequest(ids, 100);
- System.out.println(json);
- }
-
-}