Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Baseline refactoring to support OpenSearch Serverless and OpenSearch Java JDK #47

Merged
merged 41 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
76e092f
refactoring has started
Jan 25, 2024
6f6c804
start of a facade
Jan 25, 2024
7bd3eb2
clean up tests
Jan 25, 2024
e29de52
some fixes for ESClient authentication
Jan 26, 2024
246179a
refactory es to start hiding it
Jan 26, 2024
3723706
refactor code to isolate elasticsearch dependencies
Jan 30, 2024
951eec9
back together again but need wrappers
Jan 30, 2024
5c1dadb
wrappers are now in place as well
Jan 30, 2024
8709e60
fixes for harvest to work
Jan 30, 2024
7f5a2e1
new functionality for registry-mgr
Jan 30, 2024
c701f0d
work in progress
Feb 3, 2024
06079fd
new code for new arg handling
Feb 9, 2024
138f0bc
add new registrty connection
Feb 13, 2024
5fac3cc
debug help
Feb 19, 2024
669fa72
update to new connection URL
Feb 19, 2024
08a15dc
testing in progress
Feb 23, 2024
0e57b33
wip
Mar 27, 2024
16566e6
add a bulk wrapper for serverless and harvest
Mar 30, 2024
e744a5a
major refactor
Mar 31, 2024
36d533b
made it part way through serverless but still WIP
Mar 31, 2024
9f9a35d
add handler
Apr 3, 2024
e4bbfe1
correct parsing of app://
Apr 3, 2024
d37e781
add controlling the java SDK to the options
Apr 3, 2024
7338646
make it more clear for localhost
Apr 3, 2024
c13338e
refactored a bit
Apr 3, 2024
4183c15
add configuration value for endpoint in AWS
Apr 4, 2024
9c7d52c
wip: chaning up the returns to be interfaces too
Apr 11, 2024
7fd42e5
WIP
Apr 16, 2024
09b617d
new elements
Apr 16, 2024
0014e52
harvest is working
Apr 17, 2024
1694a68
fix fixme
Apr 17, 2024
0df3fce
add cognito
Apr 17, 2024
090aab2
clean up path for harvest #158
Apr 19, 2024
8d760fe
moved over bad references from mgr
Apr 29, 2024
2c7d15f
moved over bad references from mgr
Apr 29, 2024
6af2a31
add new functionality to aws
Apr 29, 2024
04a4ae4
updates for ref integrity
Jun 18, 2024
a53c6c2
fix for -dd-dd-dd
Jun 18, 2024
a7b344f
sdk1 unimplemented functionality
Jun 18, 2024
a017519
fix not filling -refs index
Jun 19, 2024
ba0a495
fix the unimplemented portion for harvest
Jun 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,43 @@ POSSIBILITY OF SUCH DAMAGE.
<artifactId>tika-core</artifactId>
<version>1.28.3</version>
</dependency>
<!-- three artifacts for complete configuration parsing -->
<dependency>
<groupId>jakarta.xml.bind</groupId>
<artifactId>jakarta.xml.bind-api</artifactId>
<version>4.0.1</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>4.0.4</version>
</dependency>
<dependency>
<groupId>jakarta.activation</groupId>
<artifactId>jakarta.activation-api</artifactId>
<version>2.1.2</version>
</dependency>
<!-- four artifacts for opensearch serverless -->
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.3.1</version>
</dependency>
<dependency>
<groupId>org.opensearch.client</groupId>
<artifactId>opensearch-java</artifactId>
<version>2.10.0</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>opensearch</artifactId>
<version>2.25.31</version>
</dependency>
<dependency>
<groupId>software.amazon.awssdk</groupId>
<artifactId>apache-client</artifactId>
<version>2.25.31</version>
</dependency>
</dependencies>

<build>
Expand Down
17 changes: 17 additions & 0 deletions src/main/java/gov/nasa/pds/registry/common/ConnectionFactory.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package gov.nasa.pds.registry.common;

import org.apache.http.HttpHost;
import org.apache.http.client.CredentialsProvider;

public interface ConnectionFactory {
public ConnectionFactory clone();
public RestClient createRestClient() throws Exception;
public CredentialsProvider getCredentials();
public org.apache.hc.client5.http.auth.CredentialsProvider getCredentials5();
public HttpHost getHost();
public org.apache.hc.core5.http.HttpHost getHost5();
public String getHostName();
public String getIndexName();
public boolean isTrustingSelfSigned();
public ConnectionFactory setIndexName (String idxName);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package gov.nasa.pds.registry.common;

import java.net.URL;
import gov.nasa.pds.registry.common.connection.AuthContent;
import gov.nasa.pds.registry.common.connection.KnownRegistryConnections;
import gov.nasa.pds.registry.common.connection.UseOpensearchSDK1;
import gov.nasa.pds.registry.common.connection.UseOpensearchSDK2;
import gov.nasa.pds.registry.common.connection.RegistryConnectionContent;

public class EstablishConnectionFactory {
public static ConnectionFactory from (String urlToRegistryConnection) throws Exception {
return EstablishConnectionFactory.from (urlToRegistryConnection, AuthContent.DEFAULT);
}
public static ConnectionFactory from (String urlToRegistryConnection, String authFile) throws Exception {
return EstablishConnectionFactory.from (urlToRegistryConnection, AuthContent.from(authFile));
}
private static synchronized ConnectionFactory from (String urlToRegistryConnection, AuthContent auth) throws Exception {
KnownRegistryConnections.initialzeAppHandler();
RegistryConnectionContent conn = RegistryConnectionContent.from (new URL(urlToRegistryConnection));

if (conn.isDirectConnection()) {
if (conn.getServerUrl().getSdk().intValue() == 1) return UseOpensearchSDK1.build(conn.getServerUrl(), auth).setIndexName(conn.getIndex());
if (conn.getServerUrl().getSdk().intValue() == 2) return UseOpensearchSDK2.build(conn.getServerUrl(), auth).setIndexName(conn.getIndex());
throw new RuntimeException("The SDK version '" + String.valueOf(conn.getServerUrl().getSdk().intValue()) + "' is not supported");
}
if (conn.isCognitoConnection()) return UseOpensearchSDK2.build(conn.getCognitoClientId(), auth).setIndexName(conn.getIndex());
throw new RuntimeException("New XML/Java choices in src/main/resources/registry_connection.xsd that are not handled.");
}
}
57 changes: 57 additions & 0 deletions src/main/java/gov/nasa/pds/registry/common/Request.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package gov.nasa.pds.registry.common;

import java.util.Collection;
import java.util.List;
import gov.nasa.pds.registry.common.util.Tuple;

public interface Request {
public interface Bulk { // _bulk
enum Refresh { False, True, WaitFor };
public void add (String statement, String document); // create, index, update
public Bulk buildUpdateStatus(Collection<String> lidvids, String status);
public Bulk setIndex(String name);
public Bulk setRefresh(Refresh type);
}
public interface Count { // _count
public Count setIndex (String name);
public Count setQuery (String q);
}
public interface DeleteByQuery { // delete_by_query
public DeleteByQuery createFilterQuery(String key, String value);
public DeleteByQuery createMatchAllQuery();
public DeleteByQuery setIndex (String name);
public DeleteByQuery setRefresh(boolean state);
}
public interface Get { // _doc
public Get excludeField (String field);
public Get excludeFields (List<String> fields);
public Get includeField (String field);
public Get includeFields (List<String> fields);
public Get setId (String id);
public Get setIndex (String index);
}
public interface Mapping { // _mapping
public Mapping buildUpdateFieldSchema (Collection<Tuple> pairs);
public Mapping setIndex(String name);
}
public interface MGet extends Get { // _mget
public MGet setIds (Collection<String> ids);
}
public interface Search { // _search
public Search all(String sortField, int size, String searchAfter);
public Search all(String filterField, String filterValue, String sortField, int size, String searchAfter);
public Search buildAlternativeIds(Collection<String> lids);
public Search buildGetField(String field_name, String lidvid);
public Search buildLatestLidVids(Collection<String> lids);
public Search buildListFields(String dataType);
public Search buildListLdds (String namespace);
public Search buildTermQuery (String fieldname, String value);
public Search buildTheseIds(Collection<String> lids);
public Search setIndex (String name);
public Search setPretty (boolean pretty);
public Search setSize (int hitsperpage);
}
public interface Setting { // _settings
public Setting setIndex (String name);
}
}
62 changes: 62 additions & 0 deletions src/main/java/gov/nasa/pds/registry/common/Response.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package gov.nasa.pds.registry.common;

import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import gov.nasa.pds.registry.common.es.dao.dd.DataTypeNotFoundException;
import gov.nasa.pds.registry.common.es.dao.dd.LddInfo;
import gov.nasa.pds.registry.common.es.dao.dd.LddVersions;
import gov.nasa.pds.registry.common.util.Tuple;

public interface Response {
public interface Bulk {
public interface Item {
public boolean error();
public String id();
public String index();
public String operation();
public String reason();
public String result();
public int status();
}
public boolean errors();
public List<Item> items();
public void logErrors();
public long took();
}
public interface CreatedIndex {
public boolean acknowledge();
public boolean acknowledgeShards();
public String getIndex();
}
public interface Get {
public interface IdSets {
public Set<String> lids();
public Set<String> lidvids();
}
public List<Tuple> dataTypes(boolean stringForMissing) throws IOException, DataTypeNotFoundException;
public IdSets ids(); // returns null if nothing is found in returned content
public String productClass(); // returns null if product class not in returned content
public List<String> refs(); // returns null if nothing is found in returned content
}
public interface Mapping {
public Set<String> fieldNames();
}
public interface Search {
public Map<String,Set<String>> altIds() throws UnsupportedOperationException, IOException;
public List<Object> batch() throws UnsupportedOperationException, IOException;
public List<Map<String,Object>> documents();
public String field(String name) throws NoSuchFieldException; // null means blob not in document and NoSuchFieldException document not found
public Set<String> fields() throws UnsupportedOperationException, IOException;
public List<String> latestLidvids(); // returns null if nothing is found in returned content
public LddVersions lddInfo() throws UnsupportedOperationException, IOException;
public List<LddInfo> ldds() throws UnsupportedOperationException, IOException;
public Set<String> nonExistingIds(Collection<String> from_ids) throws UnsupportedOperationException, IOException;
}
public interface Settings {
public int replicas();
public int shards();
}
}
10 changes: 10 additions & 0 deletions src/main/java/gov/nasa/pds/registry/common/ResponseException.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package gov.nasa.pds.registry.common;

import java.io.IOException;

abstract public class ResponseException extends IOException {
private static final long serialVersionUID = 8629769947735587642L;
abstract public String extractErrorMessage();
abstract public int statusCode(); // -1 if not known
//abstract public Response getResponse();
}
74 changes: 74 additions & 0 deletions src/main/java/gov/nasa/pds/registry/common/RestClient.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package gov.nasa.pds.registry.common;

import java.io.Closeable;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.StringWriter;
import java.util.Map;
import java.util.TreeMap;
import com.google.gson.Gson;
import com.google.gson.stream.JsonWriter;
import gov.nasa.pds.registry.common.util.CloseUtils;

public interface RestClient extends Closeable {
public Request.Bulk createBulkRequest();
public Request.Count createCountRequest();
public Request.DeleteByQuery createDeleteByQuery();
public Request.Get createGetRequest();
public Request.Mapping createMappingRequest();
public Request.MGet createMGetRequest();
public Request.Search createSearchRequest();
public Request.Setting createSettingRequest();
public Response.CreatedIndex create (String indexName, String configAsJson) throws IOException,ResponseException;
public void delete (String indexName) throws IOException,ResponseException;
public boolean exists (String indexName) throws IOException,ResponseException;
public Response.Bulk performRequest(Request.Bulk request) throws IOException,ResponseException;
public long performRequest(Request.Count request) throws IOException,ResponseException;
public long performRequest(Request.DeleteByQuery request) throws IOException,ResponseException;
public Response.Get performRequest(Request.Get request) throws IOException,ResponseException;
public Response.Mapping performRequest(Request.Mapping request) throws IOException,ResponseException;
public Response.Search performRequest(Request.Search request) throws IOException,ResponseException;
public Response.Settings performRequest(Request.Setting request) throws IOException,ResponseException;
/**
* Build create index request
*
* @param schemaFile index schema file
* @param shards number of shards
* @param replicas number of replicas
* @return JSON
* @throws Exception Generic exception
*/
@SuppressWarnings({"rawtypes", "unchecked"})
public static String createCreateIndexRequest(File schemaFile, int shards, int replicas)
throws Exception {
// Read schema template
FileReader rd = new FileReader(schemaFile);
Gson gson = new Gson();
Object rootObj = gson.fromJson(rd, Object.class);
CloseUtils.close(rd);
Object settingsObj = ((Map) rootObj).get("settings");
if (settingsObj == null) {
settingsObj = new TreeMap();
}
Object mappingsObj = ((Map) rootObj).get("mappings");
if (mappingsObj == null) {
throw new Exception("Missing mappings in schema file " + schemaFile.getAbsolutePath());
}
StringWriter out = new StringWriter();
JsonWriter writer = new JsonWriter(out);
writer.beginObject();
Map settingsMap = (Map) settingsObj;
settingsMap.put("number_of_shards", shards);
settingsMap.put("number_of_replicas", replicas);
// Settings
writer.name("settings");
gson.toJson(settingsObj, Object.class, writer);
// Mappings
writer.name("mappings");
gson.toJson(mappingsObj, Object.class, writer);
writer.endObject();
writer.close();
return out.toString();
}
}
32 changes: 32 additions & 0 deletions src/main/java/gov/nasa/pds/registry/common/app/Handler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package gov.nasa.pds.registry.common.app;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLStreamHandler;

public class Handler extends URLStreamHandler {
@Override
protected void parseURL(URL u, String spec, int start, int limit) {
/*
* Ugly backwards compatibility. Flip any file separator
* characters to be forward slashes. This is a nop on Unix
* and "fixes" win32 file paths. According to RFC 2396,
* only forward slashes may be used to represent hierarchy
* separation in a URL but previous releases unfortunately
* performed this "fixup" behavior in the file URL parsing code
* rather than forcing this to be fixed in the caller of the URL
* class where it belongs. Since backslash is an "unwise"
* character that would normally be encoded if literally intended
* as a non-seperator character the damage of veering away from the
* specification is presumably limited.
*/
super.parseURL(u, spec.replace(File.separatorChar, '/'), start, limit);
}
@Override
protected URLConnection openConnection(URL u) throws IOException {
System.out.println("here");
return null;
}
}
12 changes: 0 additions & 12 deletions src/main/java/gov/nasa/pds/registry/common/cfg/RegistryCfg.java

This file was deleted.

Loading
Loading