diff --git a/pom.xml b/pom.xml index 6ffa650e..5b066977 100644 --- a/pom.xml +++ b/pom.xml @@ -113,7 +113,7 @@ gov.nasa.pds registry-common - 1.5.1 + 1.6.0-SNAPSHOT @@ -128,8 +128,8 @@ 5.7.0 test - - + + jakarta.xml.bind jakarta.xml.bind-api 4.0.1 diff --git a/src/main/java/gov/nasa/pds/harvest/cfg/ConfigManager.java b/src/main/java/gov/nasa/pds/harvest/cfg/ConfigManager.java index 68050ee7..ba438111 100644 --- a/src/main/java/gov/nasa/pds/harvest/cfg/ConfigManager.java +++ b/src/main/java/gov/nasa/pds/harvest/cfg/ConfigManager.java @@ -6,7 +6,8 @@ import jakarta.xml.bind.JAXBContext; import jakarta.xml.bind.JAXBException; import org.glassfish.jaxb.runtime.v2.JAXBContextFactory; -import gov.nasa.pds.registry.common.cfg.RegistryCfg; +import gov.nasa.pds.registry.common.ConnectionFactory; +import gov.nasa.pds.registry.common.EstablishConnectionFactory; import gov.nasa.pds.registry.common.meta.cfg.FileRefRule; /** @@ -41,19 +42,8 @@ static public List exchangeLidvids (List ids) { } return lidvids; } - /** - * FIXME: This is a hack to keep changes limited to harvest. - * Replace the whole registry initiation with something different - * when moving to multitenancy - * @param xml2bean - * @return - */ - static public RegistryCfg exchangeRegistry (RegistryType xml) { - RegistryCfg bean = new RegistryCfg(); - bean.authFile = xml.getAuth(); - bean.indexName = xml.getIndex(); - bean.url = xml.getServerUrl(); - return bean; + static public ConnectionFactory exchangeRegistry (RegistryType xml) throws Exception { + return EstablishConnectionFactory.from (xml.getValue(), xml.getAuth()); } static public HarvestConfigurationType read(File file) throws JAXBException { JAXBContext jaxbContext = new JAXBContextFactory().createContext(new Class[]{Harvest.class}, null); diff --git a/src/main/java/gov/nasa/pds/harvest/cfg/Harvest.java b/src/main/java/gov/nasa/pds/harvest/cfg/Harvest.java index b0ee7d6b..0c135eaa 100644 --- a/src/main/java/gov/nasa/pds/harvest/cfg/Harvest.java +++ b/src/main/java/gov/nasa/pds/harvest/cfg/Harvest.java @@ -14,6 +14,14 @@ /** + * + * This terrible construct is so that xjc can autodetect this as the + * root node for processing. Many things would be better but this is + * the most workable solution especially if the making of the binding + * code is automated in the pom. The only other real solution is to + * modify one of the classes generated by hand. + * + * *

Java class for anonymous complex type. * *

The following schema fragment specifies the expected content contained within this class. diff --git a/src/main/java/gov/nasa/pds/harvest/cfg/HarvestConfigurationType.java b/src/main/java/gov/nasa/pds/harvest/cfg/HarvestConfigurationType.java index 8d4c0483..716eb7d4 100644 --- a/src/main/java/gov/nasa/pds/harvest/cfg/HarvestConfigurationType.java +++ b/src/main/java/gov/nasa/pds/harvest/cfg/HarvestConfigurationType.java @@ -9,8 +9,8 @@ import jakarta.xml.bind.annotation.XmlAccessType; import jakarta.xml.bind.annotation.XmlAccessorType; -import jakarta.xml.bind.annotation.XmlAttribute; import jakarta.xml.bind.annotation.XmlElement; +import jakarta.xml.bind.annotation.XmlSchemaType; import jakarta.xml.bind.annotation.XmlSeeAlso; import jakarta.xml.bind.annotation.XmlType; @@ -19,11 +19,11 @@ * * These are the basic options for the harvest configuration file. * - * @nodeName: the PDS node that this harvest run applies to * * autogenFields: should not be used except in development testing - * do: tells where and how to harvest PDS4 labels + * load: tells where and how to harvest PDS4 labels * fileInfo: option allowing filename prefixes to be replaced + * nodeName: the PDS node that this harvest run applies to * productFilter: should not be used except in development testing * registry: define the server harvest should use * xpathMaps: allow constraints in the PDS4 label to control harvesting @@ -41,12 +41,12 @@ * * * + * * * * * * - * * * * @@ -67,13 +67,14 @@ public class HarvestConfigurationType { @XmlElement(required = true) protected LoadType load; protected FileInfoType fileInfo; + @XmlElement(required = true) + @XmlSchemaType(name = "normalizedString") + protected NodeNameEnum nodeName; protected FilterType productFilter; protected ReferencesType references; @XmlElement(required = true) protected RegistryType registry; protected XpathMapsType xpathMaps; - @XmlAttribute(name = "nodeName", required = true) - protected NodeNameEnum nodeName; /** * Gets the value of the autogenFields property. @@ -147,6 +148,30 @@ public void setFileInfo(FileInfoType value) { this.fileInfo = value; } + /** + * Gets the value of the nodeName property. + * + * @return + * possible object is + * {@link NodeNameEnum } + * + */ + public NodeNameEnum getNodeName() { + return nodeName; + } + + /** + * Sets the value of the nodeName property. + * + * @param value + * allowed object is + * {@link NodeNameEnum } + * + */ + public void setNodeName(NodeNameEnum value) { + this.nodeName = value; + } + /** * Gets the value of the productFilter property. * @@ -243,28 +268,4 @@ public void setXpathMaps(XpathMapsType value) { this.xpathMaps = value; } - /** - * Gets the value of the nodeName property. - * - * @return - * possible object is - * {@link NodeNameEnum } - * - */ - public NodeNameEnum getNodeName() { - return nodeName; - } - - /** - * Sets the value of the nodeName property. - * - * @param value - * allowed object is - * {@link NodeNameEnum } - * - */ - public void setNodeName(NodeNameEnum value) { - this.nodeName = value; - } - } diff --git a/src/main/java/gov/nasa/pds/harvest/cfg/RegistryType.java b/src/main/java/gov/nasa/pds/harvest/cfg/RegistryType.java index 732be31a..ebd10c76 100644 --- a/src/main/java/gov/nasa/pds/harvest/cfg/RegistryType.java +++ b/src/main/java/gov/nasa/pds/harvest/cfg/RegistryType.java @@ -10,9 +10,9 @@ import jakarta.xml.bind.annotation.XmlAccessType; import jakarta.xml.bind.annotation.XmlAccessorType; import jakarta.xml.bind.annotation.XmlAttribute; -import jakarta.xml.bind.annotation.XmlElement; import jakarta.xml.bind.annotation.XmlSchemaType; import jakarta.xml.bind.annotation.XmlType; +import jakarta.xml.bind.annotation.XmlValue; import jakarta.xml.bind.annotation.adapters.NormalizedStringAdapter; import jakarta.xml.bind.annotation.adapters.XmlJavaTypeAdapter; @@ -20,15 +20,12 @@ /** * * Define the connection to the registry, security for the connection, and - * the index within the registry. + * the index within the registry. The value of this tag is a pointer to + * a registry connection like app://known/direct/localhost.xml or + * app://known/cognito/first_test.xml * * @auth: a java property file containing a username and password - * @index: the index to be used by harvest whose default is registry - * @trust_self_signed: all self signed certificates for https - * - * cognito_client_id: the cognito client ID for AWS based instances of opensearch - * server_url: the opensearch URL when not using AWS services - * + * * *

Java class for registry_type complex type. * @@ -36,17 +33,11 @@ * *

{@code
  * 
- *   
- *     
- *       
- *         
- *         
- *       
+ *   
+ *     
  *       
- *       
- *       
- *     
- *   
+ *     
+ *   
  * 
  * }
* @@ -54,76 +45,41 @@ */ @XmlAccessorType(XmlAccessType.FIELD) @XmlType(name = "registry_type", propOrder = { - "cognitoClientId", - "serverUrl" + "value" }) public class RegistryType { - @XmlElement(name = "cognito_client_id") - @XmlJavaTypeAdapter(NormalizedStringAdapter.class) - @XmlSchemaType(name = "normalizedString") - protected String cognitoClientId; - @XmlElement(name = "server_url") + @XmlValue @XmlJavaTypeAdapter(NormalizedStringAdapter.class) @XmlSchemaType(name = "normalizedString") - protected String serverUrl; + protected String value; @XmlAttribute(name = "auth", required = true) @XmlJavaTypeAdapter(NormalizedStringAdapter.class) @XmlSchemaType(name = "normalizedString") protected String auth; - @XmlAttribute(name = "index") - @XmlJavaTypeAdapter(NormalizedStringAdapter.class) - @XmlSchemaType(name = "normalizedString") - protected String index; - @XmlAttribute(name = "trust_self_signed") - protected Boolean trustSelfSigned; /** - * Gets the value of the cognitoClientId property. + * Gets the value of the value property. * * @return * possible object is * {@link String } * */ - public String getCognitoClientId() { - return cognitoClientId; + public String getValue() { + return value; } /** - * Sets the value of the cognitoClientId property. + * Sets the value of the value property. * * @param value * allowed object is * {@link String } * */ - public void setCognitoClientId(String value) { - this.cognitoClientId = value; - } - - /** - * Gets the value of the serverUrl property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getServerUrl() { - return serverUrl; - } - - /** - * Sets the value of the serverUrl property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setServerUrl(String value) { - this.serverUrl = value; + public void setValue(String value) { + this.value = value; } /** @@ -150,60 +106,4 @@ public void setAuth(String value) { this.auth = value; } - /** - * Gets the value of the index property. - * - * @return - * possible object is - * {@link String } - * - */ - public String getIndex() { - if (index == null) { - return "registry"; - } else { - return index; - } - } - - /** - * Sets the value of the index property. - * - * @param value - * allowed object is - * {@link String } - * - */ - public void setIndex(String value) { - this.index = value; - } - - /** - * Gets the value of the trustSelfSigned property. - * - * @return - * possible object is - * {@link Boolean } - * - */ - public boolean isTrustSelfSigned() { - if (trustSelfSigned == null) { - return false; - } else { - return trustSelfSigned; - } - } - - /** - * Sets the value of the trustSelfSigned property. - * - * @param value - * allowed object is - * {@link Boolean } - * - */ - public void setTrustSelfSigned(Boolean value) { - this.trustSelfSigned = value; - } - } diff --git a/src/main/java/gov/nasa/pds/harvest/crawler/CollectionProcessor.java b/src/main/java/gov/nasa/pds/harvest/crawler/CollectionProcessor.java index 9beeeaa6..0dd548bf 100644 --- a/src/main/java/gov/nasa/pds/harvest/crawler/CollectionProcessor.java +++ b/src/main/java/gov/nasa/pds/harvest/crawler/CollectionProcessor.java @@ -19,7 +19,6 @@ import gov.nasa.pds.harvest.dao.RegistryDao; import gov.nasa.pds.harvest.dao.RegistryManager; import gov.nasa.pds.harvest.util.xml.XmlIs; -import gov.nasa.pds.registry.common.cfg.RegistryCfg; import gov.nasa.pds.registry.common.es.service.CollectionInventoryWriter; import gov.nasa.pds.registry.common.meta.CollectionMetadataExtractor; import gov.nasa.pds.registry.common.meta.Metadata; @@ -59,12 +58,7 @@ public class CollectionProcessor extends BaseProcessor public CollectionProcessor(HarvestConfigurationType config) throws Exception { super(config); - // FIXME: multitenancy - RegistryCfg fixme = new RegistryCfg(); - fixme.url = config.getRegistry().getServerUrl(); - fixme.indexName = config.getRegistry().getIndex(); - fixme.authFile = config.getRegistry().getAuth(); - invWriter = new CollectionInventoryWriter(fixme); + invWriter = new CollectionInventoryWriter(ConfigManager.exchangeRegistry(config.getRegistry())); this.invProc = new CollectionInventoryProcessor(config.getReferences().isPrimaryOnly()); collectionExtractor = new CollectionMetadataExtractor(); } @@ -137,7 +131,6 @@ private void processMetadata(File file, Document doc, BundleType bCfg) throws Ex meta.setNodeName(config.getNodeName().toString()); // Collection filter - // FIXME: this needs to be changed and may relate to other fixme in this file List lids = ConfigManager.exchangeLids (bCfg.getCollection()); List lidvids = ConfigManager.exchangeLidvids (bCfg.getCollection()); if(!lids.isEmpty() && !lids.contains(meta.lid)) return; diff --git a/src/main/java/gov/nasa/pds/harvest/dao/EsRequestBuilder.java b/src/main/java/gov/nasa/pds/harvest/dao/EsRequestBuilder.java deleted file mode 100644 index 4348d271..00000000 --- a/src/main/java/gov/nasa/pds/harvest/dao/EsRequestBuilder.java +++ /dev/null @@ -1,96 +0,0 @@ -package gov.nasa.pds.harvest.dao; - -import java.io.IOException; -import java.io.StringWriter; -import java.io.Writer; -import java.util.Collection; - -import com.google.gson.stream.JsonWriter; -import gov.nasa.pds.harvest.exception.InvalidPDS4ProductException; - - -/** - * Elasticsearch request / query builder. - * - * @author karpenko - */ -public class EsRequestBuilder -{ - private boolean pretty; - - - /** - * Constructor. - * @param pretty Generate pretty-formatted JSON - */ - public EsRequestBuilder(boolean pretty) - { - this.pretty = pretty; - } - - - /** - * Construcotr - */ - public EsRequestBuilder() - { - this(false); - } - - - private JsonWriter createJsonWriter(Writer writer) - { - JsonWriter jw = new JsonWriter(writer); - if (pretty) - { - jw.setIndent(" "); - } - - return jw; - } - - - /** - * Create Elasticsearch query to search for product IDs (lidvids) - * @param ids Collection of product IDs (lidvids) - * @param pageSize Number of records to return. Usually pageSize = ids.size(). - * @return JSON Elasticsearch request - * @throws InvalidPDS4ProductException Invalid PDS4 Product Exception - * @throws IOException IOException - */ - public String createSearchIdsRequest(Collection ids, int pageSize) throws InvalidPDS4ProductException, IOException { - if(ids == null || ids.isEmpty()) throw new InvalidPDS4ProductException("Error reading bundle/collection references. " + - "Verify the bundle/collection is valid prior to loading the data."); - - StringWriter out = new StringWriter(); - JsonWriter writer = createJsonWriter(out); - - // Create ids query - writer.beginObject(); - - // Exclude source from response - writer.name("_source").value(false); - writer.name("size").value(pageSize); - - writer.name("query"); - writer.beginObject(); - writer.name("ids"); - writer.beginObject(); - - writer.name("values"); - writer.beginArray(); - for(String id: ids) - { - writer.value(id); - } - writer.endArray(); - - writer.endObject(); - writer.endObject(); - writer.endObject(); - - writer.close(); - return out.toString(); - } - -} diff --git a/src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java b/src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java index a777bdf9..b0700c22 100644 --- a/src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java +++ b/src/main/java/gov/nasa/pds/harvest/dao/MetadataWriter.java @@ -12,7 +12,7 @@ import gov.nasa.pds.harvest.crawler.Counter; import gov.nasa.pds.harvest.util.PackageIdGenerator; -import gov.nasa.pds.registry.common.cfg.RegistryCfg; +import gov.nasa.pds.registry.common.ConnectionFactory; import gov.nasa.pds.registry.common.es.dao.DataLoader; import gov.nasa.pds.registry.common.meta.Metadata; @@ -22,7 +22,7 @@ public class MetadataWriter implements Closeable private final static String WARN_SKIP_PRE = "Skipping registered product "; private final static String WARN_SKIP_POST = " (LIDVID/LID already exists in registry database)"; private final static int ES_DOC_BATCH_SIZE = 50; - + private final ConnectionFactory conFact; private Logger log; private RegistryDao registryDao; @@ -40,10 +40,11 @@ public class MetadataWriter implements Closeable * @param cfg registry configuration * @throws Exception an exception */ - public MetadataWriter(RegistryCfg cfg, RegistryDao dao, Counter counter) throws Exception + public MetadataWriter(ConnectionFactory conFact, RegistryDao dao, Counter counter) throws Exception { + this.conFact = conFact; log = LogManager.getLogger(this.getClass()); - loader = new DataLoader(cfg.url, cfg.indexName, cfg.authFile); + loader = new DataLoader(conFact); docBatch = new RegistryDocBatch(); jobId = PackageIdGenerator.getInstance().getPackageId(); @@ -60,7 +61,7 @@ public void setOverwriteExisting(boolean b) public void write(Metadata meta) throws Exception { - docBatch.write(meta, jobId); + docBatch.write(this.conFact, meta, jobId); if(docBatch.size() % ES_DOC_BATCH_SIZE == 0) { diff --git a/src/main/java/gov/nasa/pds/harvest/dao/NonExistingIdsResponse.java b/src/main/java/gov/nasa/pds/harvest/dao/NonExistingIdsResponse.java deleted file mode 100644 index 450d2990..00000000 --- a/src/main/java/gov/nasa/pds/harvest/dao/NonExistingIdsResponse.java +++ /dev/null @@ -1,50 +0,0 @@ -package gov.nasa.pds.harvest.dao; - -import java.util.Collection; -import java.util.Set; -import java.util.TreeSet; - -import gov.nasa.pds.registry.common.es.client.SearchResponseParser; - - -/** - * Helper class to process Elasticsearch response from "search IDs" query. - * - * @author karpenko - */ -public class NonExistingIdsResponse implements SearchResponseParser.Callback -{ - private Set retIds; - - - /** - * Constructor - * @param ids Product IDs (lidvids) sent to Elasticsearch in "search IDs" query. - * IDs are copied to internal collection. - * After processing Elasticsearch response, all IDs existing in Elasticsearch - * "registry" index will be removed from this internal collection. - */ - public NonExistingIdsResponse(Collection ids) - { - retIds = new TreeSet<>(ids); - } - - /** - * Return collection of product IDs (lidvids) non-existing in Elasticsearch. - * @return a collection of product IDs (lidvids) - */ - public Set getIds() - { - return retIds; - } - - - /** - * This method is called for each record in Elasticsearch response - */ - @Override - public void onRecord(String id, Object src) throws Exception - { - retIds.remove(id); - } -} diff --git a/src/main/java/gov/nasa/pds/harvest/dao/RegistryDao.java b/src/main/java/gov/nasa/pds/harvest/dao/RegistryDao.java index 1fe32f07..892c494a 100644 --- a/src/main/java/gov/nasa/pds/harvest/dao/RegistryDao.java +++ b/src/main/java/gov/nasa/pds/harvest/dao/RegistryDao.java @@ -5,12 +5,10 @@ import java.util.HashSet; import java.util.List; import java.util.Set; - -import org.elasticsearch.client.Request; -import org.elasticsearch.client.Response; -import org.elasticsearch.client.RestClient; - -import gov.nasa.pds.registry.common.es.client.SearchResponseParser; +import gov.nasa.pds.harvest.exception.InvalidPDS4ProductException; +import gov.nasa.pds.registry.common.Request; +import gov.nasa.pds.registry.common.Response; +import gov.nasa.pds.registry.common.RestClient; /** @@ -24,10 +22,6 @@ public class RegistryDao private String indexName; private boolean pretty; - private EsRequestBuilder requestBld; - private SearchResponseParser parser; - - /** * Constructor * @param client Elasticsearch client @@ -50,9 +44,6 @@ public RegistryDao(RestClient client, String indexName, boolean pretty) this.client = client; this.indexName = indexName; this.pretty = pretty; - - requestBld = new EsRequestBuilder(); - parser = new SearchResponseParser(); } @@ -82,25 +73,20 @@ public boolean idExists(String id) throws Exception public Set getNonExistingIds(Collection ids) throws Exception { if(ids == null || ids.isEmpty()) return new HashSet<>(); - Response resp = searchIds(ids); - - NonExistingIdsResponse idsResp = new NonExistingIdsResponse(ids); - parser.parseResponse(resp, idsResp); - - return idsResp.getIds(); + return searchIds(ids).nonExistingIds(ids); } - private Response searchIds(Collection ids) throws Exception + private Response.Search searchIds(Collection ids) throws Exception { - String json = requestBld.createSearchIdsRequest(ids, ids.size()); - - String reqUrl = "/" + indexName + "/_search"; - if(pretty) reqUrl += "?pretty"; - - Request req = new Request("GET", reqUrl); - req.setJsonEntity(json); - Response resp = client.performRequest(req); + if(ids == null || ids.isEmpty()) throw new InvalidPDS4ProductException("Error reading bundle/collection references. " + + "Verify the bundle/collection is valid prior to loading the data."); + + Request.Search req = client.createSearchRequest() + .buildTheseIds(ids) + .setIndex(this.indexName) + .setPretty(pretty); + Response.Search resp = client.performRequest(req); return resp; } diff --git a/src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java b/src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java index fa48e31e..48b3c30b 100644 --- a/src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java +++ b/src/main/java/gov/nasa/pds/harvest/dao/RegistryDocBatch.java @@ -1,9 +1,14 @@ package gov.nasa.pds.harvest.dao; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; - +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import gov.nasa.pds.registry.common.ConnectionFactory; +import gov.nasa.pds.registry.common.RestClient; import gov.nasa.pds.registry.common.meta.Metadata; +import gov.nasa.pds.registry.common.util.Tuple; import gov.nasa.pds.registry.common.util.json.RegistryDocBuilder; /** @@ -20,7 +25,8 @@ public static class NJsonItem public String dataJson; // Data JSON (line 2) } - + final private static HashSet alreadyLearned = new HashSet(); + final private Logger log = LogManager.getLogger(RegistryDocBatch.class); private List items; @@ -32,15 +38,43 @@ public RegistryDocBatch() items = new ArrayList<>(); } - - public void write(Metadata meta, String jobId) throws Exception + /* hack for PDS-NASA/harvest#127 + * search the JSON string for any ref_lid_ and add if necessary to index so that it is searchable + */ + private void updateIndex(ConnectionFactory conFact, String json) { + int begin_index = json.indexOf("ref_lid_"), end_index; + String name; + if (-1 < begin_index && alreadyLearned.isEmpty()) { + try (RestClient client = conFact.createRestClient()) { + alreadyLearned.addAll(client.performRequest(client.createMappingRequest().setIndex(conFact.getIndexName())).fieldNames()); + } catch (Exception e) { + log.error("Unexpected error (should not have made it here) while getting index " + conFact.getIndexName(),e); + } + } + while (-1 < begin_index) { + end_index = json.indexOf('"', begin_index+5); + name = json.substring(begin_index, end_index); + if (!alreadyLearned.contains(name)) { + try (RestClient client = conFact.createRestClient()) { + ArrayList new_item = new ArrayList(); + new_item.add(new Tuple(name, "keyword")); + client.performRequest(client.createMappingRequest().setIndex(conFact.getIndexName()).buildUpdateFieldSchema(new_item)); + alreadyLearned.add(name); + } catch (Exception e) { + log.error("Unexpected error (should not have made it here) while updating index with " + name,e); + } + } + begin_index = json.indexOf("ref_lid_", end_index); + } + } + public void write(ConnectionFactory conFact, Metadata meta, String jobId) throws Exception { NJsonItem item = new NJsonItem(); item.lidvid = meta.lidvid; item.prodClass = meta.prodClass; item.pkJson = RegistryDocBuilder.createPKJson(meta); item.dataJson = RegistryDocBuilder.createDataJson(meta, jobId); - + this.updateIndex(conFact, item.dataJson); items.add(item); } diff --git a/src/main/java/gov/nasa/pds/harvest/dao/RegistryManager.java b/src/main/java/gov/nasa/pds/harvest/dao/RegistryManager.java index d770348e..efeaef13 100644 --- a/src/main/java/gov/nasa/pds/harvest/dao/RegistryManager.java +++ b/src/main/java/gov/nasa/pds/harvest/dao/RegistryManager.java @@ -2,12 +2,11 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.client.RestClient; import gov.nasa.pds.harvest.crawler.Counter; import gov.nasa.pds.harvest.util.log.LogUtils; -import gov.nasa.pds.registry.common.cfg.RegistryCfg; -import gov.nasa.pds.registry.common.es.client.EsClientFactory; +import gov.nasa.pds.registry.common.ConnectionFactory; +import gov.nasa.pds.registry.common.RestClient; import gov.nasa.pds.registry.common.util.CloseUtils; import gov.nasa.pds.registry.common.es.dao.dd.DataDictionaryDao; import gov.nasa.pds.registry.common.es.dao.schema.SchemaDao; @@ -27,10 +26,10 @@ public class RegistryManager { private static RegistryManager instance = null; - private RegistryCfg cfg; + private ConnectionFactory conFact; private boolean overwriteFlag; - private RestClient esClient; + private RestClient client; private RegistryDao registryDao; private SchemaDao schemaDao; @@ -49,30 +48,25 @@ public class RegistryManager * @param cfg Registry (Elasticsearch) configuration parameters. * @throws Exception Generic exception */ - private RegistryManager(RegistryCfg cfg, boolean overwriteFlag) throws Exception + private RegistryManager(ConnectionFactory conFact, boolean overwriteFlag) throws Exception { - this.cfg = cfg; + this.conFact = conFact; this.overwriteFlag = overwriteFlag; this.counter = new Counter(); - Logger log = LogManager.getLogger(this.getClass()); - log.log(LogUtils.LEVEL_SUMMARY, "Elasticsearch URL: " + cfg.url + ", index: " + cfg.indexName); - - esClient = EsClientFactory.createRestClient(cfg.url, cfg.authFile); - - String indexName = cfg.indexName; - if(indexName == null || indexName.isEmpty()) indexName = "registry"; - - registryDao = new RegistryDao(esClient, indexName); - schemaDao = new SchemaDao(esClient, indexName); - ddDao = new DataDictionaryDao(esClient, indexName); - + log.log(LogUtils.LEVEL_SUMMARY, "Connection: " + conFact); + client = conFact.createRestClient(); + registryDao = new RegistryDao(client, conFact.getIndexName()); + schemaDao = new SchemaDao(client, conFact.getIndexName()); + ddDao = new DataDictionaryDao(client, conFact.getIndexName()); fieldNameCache = new FieldNameCache(ddDao, schemaDao); - - registryWriter = new MetadataWriter(cfg, registryDao, counter); + registryWriter = new MetadataWriter(conFact, registryDao, counter); registryWriter.setOverwriteExisting(overwriteFlag); + invWriter = new CollectionInventoryWriter(conFact); - invWriter = new CollectionInventoryWriter(cfg); + if (!this.client.exists(this.conFact.getIndexName())) { + throw new RuntimeException("The index '" + this.conFact.getIndexName() + "' does not exist. Please create it first."); + } } @@ -82,13 +76,9 @@ private RegistryManager(RegistryCfg cfg, boolean overwriteFlag) throws Exception * @param overwriteFlag overwrite registered products * @throws Exception Generic exception */ - public static void init(RegistryCfg cfg, boolean overwriteFlag) throws Exception + public static void init(ConnectionFactory conFact, boolean overwriteFlag) throws Exception { - // Registry is not configured. Run Harvest without Registry. - if(cfg == null) throw new IllegalArgumentException("Registry is not configuraed."); - if(cfg.url == null || cfg.url.isEmpty()) throw new IllegalArgumentException("Missing Registry URL"); - - instance = new RegistryManager(cfg, overwriteFlag); + instance = new RegistryManager(conFact, overwriteFlag); } @@ -100,7 +90,7 @@ public static void destroy() if(instance == null) return; CloseUtils.close(instance.registryWriter); - CloseUtils.close(instance.esClient); + CloseUtils.close(instance.client); instance = null; } @@ -116,16 +106,6 @@ public static RegistryManager getInstance() } - /** - * Get registry configuration. - * @return Registry configuration - */ - public RegistryCfg getRegistryConfiguration() - { - return cfg; - } - - /** * Get overwrite flag * @return if true, overwrite already registered documents @@ -183,7 +163,7 @@ public FieldNameCache getFieldNameCache() */ public MissingFieldsProcessor createMissingFieldsProcessor() throws Exception { - SchemaUpdater su = new SchemaUpdater(cfg, ddDao, schemaDao); + SchemaUpdater su = new SchemaUpdater(conFact, ddDao, schemaDao); return new MissingFieldsProcessor(su, fieldNameCache); } diff --git a/src/main/resources/conf/configuration.xsd b/src/main/resources/conf/configuration.xsd index 910ea1f8..ece01b01 100644 --- a/src/main/resources/conf/configuration.xsd +++ b/src/main/resources/conf/configuration.xsd @@ -177,11 +177,11 @@ These are the basic options for the harvest configuration file. - @nodeName: the PDS node that this harvest run applies to autogenFields: should not be used except in development testing - do: tells where and how to harvest PDS4 labels + load: tells where and how to harvest PDS4 labels fileInfo: option allowing filename prefixes to be replaced + nodeName: the PDS node that this harvest run applies to productFilter: should not be used except in development testing registry: define the server harvest should use xpathMaps: allow constraints in the PDS4 label to control harvesting @@ -192,12 +192,12 @@ name="autogenFields" type="autogen_fields_type"/> + - @@ -236,23 +236,18 @@ Define the connection to the registry, security for the connection, and - the index within the registry. + the index within the registry. The value of this tag is a pointer to + a registry connection like app://known/direct/localhost.xml or + app://known/cognito/first_test.xml @auth: a java property file containing a username and password - @index: the index to be used by harvest whose default is registry - @trust_self_signed: all self signed certificates for https - - cognito_client_id: the cognito client ID for AWS based instances of opensearch - server_url: the opensearch URL when not using AWS services - + - - - - - - - + + + + + diff --git a/src/main/resources/conf/examples/bundles.xml b/src/main/resources/conf/examples/bundles.xml index 9dde2331..959216a7 100644 --- a/src/main/resources/conf/examples/bundles.xml +++ b/src/main/resources/conf/examples/bundles.xml @@ -17,13 +17,13 @@ * PSA - Planetary Science Archive * JAXA - Japan Aerospace Exploration Agency --> - - + + CHANGE_ME - - http://localhost:9200 - + app://localhost.xml - + + CHANGE_ME - - http://localhost:9200 - + app://localhost.xml diff --git a/src/main/resources/conf/examples/files.xml b/src/main/resources/conf/examples/files.xml index 758d2f29..854d9789 100644 --- a/src/main/resources/conf/examples/files.xml +++ b/src/main/resources/conf/examples/files.xml @@ -14,13 +14,14 @@ * PSA - Planetary Science Archive * JAXA - Japan Aerospace Exploration Agency --> - + + CHANGE_ME - - http://localhost:9200 - + app://localhost.xml diff --git a/src/main/resources/conf/examples/xpaths.xml b/src/main/resources/conf/examples/xpaths.xml index 6637bfbb..bd43895a 100644 --- a/src/main/resources/conf/examples/xpaths.xml +++ b/src/main/resources/conf/examples/xpaths.xml @@ -1,6 +1,9 @@ - + + CHANGE_ME @@ -22,8 +25,6 @@ - - jdoawierllksjdfkla - + app://cognito.xml diff --git a/src/test/java/tt/es/TestRegistryDAO.java b/src/test/java/tt/es/TestRegistryDAO.java index b09e7cbb..3a82b947 100644 --- a/src/test/java/tt/es/TestRegistryDAO.java +++ b/src/test/java/tt/es/TestRegistryDAO.java @@ -6,7 +6,7 @@ import gov.nasa.pds.harvest.dao.RegistryDao; import gov.nasa.pds.harvest.dao.RegistryManager; -import gov.nasa.pds.registry.common.cfg.RegistryCfg; +import gov.nasa.pds.registry.common.ConnectionFactory; public class TestRegistryDAO @@ -17,10 +17,9 @@ public class TestRegistryDAO public static void main(String[] args) throws Exception { - RegistryCfg cfg = new RegistryCfg(); - cfg.url = "http://localhost:9200"; + ConnectionFactory conFact = null;//"app:/connections/direct/localhost.xml"; - RegistryManager.init(cfg, true); + RegistryManager.init(conFact, true); try { diff --git a/src/test/java/tt/es/TestRequestBuilder.java b/src/test/java/tt/es/TestRequestBuilder.java deleted file mode 100644 index ea93e8ba..00000000 --- a/src/test/java/tt/es/TestRequestBuilder.java +++ /dev/null @@ -1,22 +0,0 @@ -package tt.es; - -import java.util.Set; -import java.util.TreeSet; - -import gov.nasa.pds.harvest.dao.EsRequestBuilder; - -public class TestRequestBuilder -{ - - public static void main(String[] args) throws Exception - { - EsRequestBuilder bld = new EsRequestBuilder(true); - - Set ids = new TreeSet<>(); - ids.add("id123"); - - String json = bld.createSearchIdsRequest(ids, 100); - System.out.println(json); - } - -}