-
Notifications
You must be signed in to change notification settings - Fork 38
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix bug in get datasource API and improve memory usage #313
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,8 +20,10 @@ | |
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.Iterator; | ||
import java.util.LinkedList; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Queue; | ||
import java.util.stream.Collectors; | ||
import java.util.zip.ZipEntry; | ||
import java.util.zip.ZipInputStream; | ||
|
@@ -35,6 +37,7 @@ | |
import org.opensearch.OpenSearchException; | ||
import org.opensearch.SpecialPermission; | ||
import org.opensearch.action.ActionListener; | ||
import org.opensearch.action.DocWriteRequest; | ||
import org.opensearch.action.admin.indices.create.CreateIndexRequest; | ||
import org.opensearch.action.bulk.BulkRequest; | ||
import org.opensearch.action.bulk.BulkResponse; | ||
|
@@ -51,8 +54,10 @@ | |
import org.opensearch.common.collect.Tuple; | ||
import org.opensearch.common.settings.ClusterSettings; | ||
import org.opensearch.common.unit.TimeValue; | ||
import org.opensearch.common.xcontent.XContentFactory; | ||
import org.opensearch.common.xcontent.XContentHelper; | ||
import org.opensearch.common.xcontent.XContentType; | ||
import org.opensearch.core.xcontent.XContentBuilder; | ||
import org.opensearch.geospatial.annotation.VisibleForTesting; | ||
import org.opensearch.geospatial.shared.Constants; | ||
import org.opensearch.geospatial.shared.StashedThreadContext; | ||
|
@@ -187,7 +192,7 @@ protected CSVParser internalGetDatabaseReader(final DatasourceManifest manifest, | |
} | ||
|
||
/** | ||
* Create a document in json string format to ingest in datasource database index | ||
* Create a document to ingest in datasource database index | ||
* | ||
* It assumes the first field as ip_range. The rest is added under data field. | ||
* | ||
|
@@ -204,31 +209,23 @@ protected CSVParser internalGetDatabaseReader(final DatasourceManifest manifest, | |
* @param fields a list of field name | ||
* @param values a list of values | ||
* @return Document in json string format | ||
* @throws IOException the exception | ||
*/ | ||
public String createDocument(final String[] fields, final String[] values) { | ||
public XContentBuilder createDocument(final String[] fields, final String[] values) throws IOException { | ||
if (fields.length != values.length) { | ||
throw new OpenSearchException("header[{}] and record[{}] length does not match", fields, values); | ||
} | ||
StringBuilder sb = new StringBuilder(); | ||
sb.append("{\""); | ||
sb.append(IP_RANGE_FIELD_NAME); | ||
sb.append("\":\""); | ||
sb.append(values[0]); | ||
sb.append("\",\""); | ||
sb.append(DATA_FIELD_NAME); | ||
sb.append("\":{"); | ||
XContentBuilder builder = XContentFactory.jsonBuilder(); | ||
builder.startObject(); | ||
builder.field(IP_RANGE_FIELD_NAME, values[0]); | ||
builder.startObject(DATA_FIELD_NAME); | ||
for (int i = 1; i < fields.length; i++) { | ||
if (i != 1) { | ||
sb.append(","); | ||
} | ||
sb.append("\""); | ||
sb.append(fields[i]); | ||
sb.append("\":\""); | ||
sb.append(values[i]); | ||
sb.append("\""); | ||
builder.field(fields[i], values[i]); | ||
} | ||
sb.append("}}"); | ||
return sb.toString(); | ||
builder.endObject(); | ||
builder.endObject(); | ||
builder.close(); | ||
return builder; | ||
} | ||
|
||
/** | ||
|
@@ -368,14 +365,20 @@ public void putGeoIpData( | |
@NonNull final Iterator<CSVRecord> iterator, | ||
final int bulkSize, | ||
@NonNull final Runnable renewLock | ||
) { | ||
) throws IOException { | ||
TimeValue timeout = clusterSettings.get(Ip2GeoSettings.TIMEOUT); | ||
final BulkRequest bulkRequest = new BulkRequest(); | ||
Queue<DocWriteRequest> requests = new LinkedList<>(); | ||
for (int i = 0; i < bulkSize; i++) { | ||
requests.add(Requests.indexRequest(indexName)); | ||
} | ||
while (iterator.hasNext()) { | ||
CSVRecord record = iterator.next(); | ||
String document = createDocument(fields, record.values()); | ||
IndexRequest request = Requests.indexRequest(indexName).source(document, XContentType.JSON); | ||
bulkRequest.add(request); | ||
XContentBuilder document = createDocument(fields, record.values()); | ||
IndexRequest indexRequest = (IndexRequest) requests.poll(); | ||
indexRequest.source(document); | ||
indexRequest.id(record.get(0)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this get by index safe, or we should check for size first? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is safe. If size is wrong, it will fail in |
||
bulkRequest.add(indexRequest); | ||
if (iterator.hasNext() == false || bulkRequest.requests().size() == bulkSize) { | ||
BulkResponse response = StashedThreadContext.run(client, () -> client.bulk(bulkRequest).actionGet(timeout)); | ||
if (response.hasFailures()) { | ||
|
@@ -385,6 +388,7 @@ public void putGeoIpData( | |
response.buildFailureMessage() | ||
); | ||
} | ||
requests.addAll(bulkRequest.requests()); | ||
bulkRequest.requests().clear(); | ||
} | ||
renewLock.run(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,7 @@ | |
import org.opensearch.action.search.SearchResponse; | ||
import org.opensearch.action.support.IndicesOptions; | ||
import org.opensearch.common.Randomness; | ||
import org.opensearch.common.Strings; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please make sure if works with latest update from core, they start moving classes around opensearch-project/OpenSearch#7508 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. There is another PR to fix it.#314 |
||
import org.opensearch.common.SuppressForbidden; | ||
import org.opensearch.common.bytes.BytesReference; | ||
import org.opensearch.geospatial.GeospatialTestHelper; | ||
|
@@ -103,12 +104,13 @@ public void testCreateIndexIfNotExistsWithoutExistingIndex() { | |
verifyingGeoIpDataFacade.createIndexIfNotExists(index); | ||
} | ||
|
||
@SneakyThrows | ||
public void testCreateDocument() { | ||
String[] names = { "ip", "country", "city" }; | ||
String[] values = { "1.0.0.0/25", "USA", "Seattle" }; | ||
assertEquals( | ||
"{\"_cidr\":\"1.0.0.0/25\",\"_data\":{\"country\":\"USA\",\"city\":\"Seattle\"}}", | ||
noOpsGeoIpDataFacade.createDocument(names, values) | ||
Strings.toString(noOpsGeoIpDataFacade.createDocument(names, values)) | ||
); | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you please add a comment with example of the json doc that we're building here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is already a comment on top.