forked from opensearch-project/index-management
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Transform maxclauses fix (opensearch-project#477)
* transform maxClauses fix Signed-off-by: Petar Dzepina <[email protected]> * added bucket log to track processed buckets Signed-off-by: Petar Dzepina <[email protected]> * various renames/changes Signed-off-by: Petar Dzepina <[email protected]> * fixed detekt issues Signed-off-by: Petar Dzepina <[email protected]> * added comments to test Signed-off-by: Petar Dzepina <[email protected]> * removed debug logging Signed-off-by: Petar Dzepina <[email protected]> * empty commit to trigger checks Signed-off-by: Petar Dzepina <[email protected]> * reduced pageSize to 1 in few ITs to avoid flaky tests; fixed bug where pagesProcessed was calculated incorrectly Signed-off-by: Petar Dzepina <[email protected]> * reverted pagesProcessed change; fixed few ITs Signed-off-by: Petar Dzepina <[email protected]> Signed-off-by: Petar Dzepina <[email protected]>
- Loading branch information
Showing
5 changed files
with
215 additions
and
20 deletions.
There are no files selected for viewing
49 changes: 49 additions & 0 deletions
49
src/main/kotlin/org/opensearch/indexmanagement/transform/TransformProcessedBucketLog.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.indexmanagement.transform | ||
|
||
import java.math.BigInteger | ||
import java.security.MessageDigest | ||
|
||
class TransformProcessedBucketLog { | ||
|
||
companion object { | ||
const val MAX_SIZE = 100_000_000 | ||
const val HEX_RADIX = 16 | ||
} | ||
|
||
private var processedBuckets: MutableSet<String> = HashSet() | ||
|
||
fun addBuckets(buckets: List<Map<String, Any>>) { | ||
buckets.forEach { | ||
addBucket(it) | ||
} | ||
} | ||
|
||
fun addBucket(bucket: Map<String, Any>) { | ||
if (processedBuckets.size >= MAX_SIZE) return | ||
processedBuckets.add(computeBucketHash(bucket)) | ||
} | ||
|
||
fun isProcessed(bucket: Map<String, Any>): Boolean { | ||
return processedBuckets.contains(computeBucketHash(bucket)) | ||
} | ||
|
||
fun isNotProcessed(bucket: Map<String, Any>) = !isProcessed(bucket) | ||
|
||
fun computeBucketHash(bucket: Map<String, Any>): String { | ||
val md5Crypt = MessageDigest.getInstance("MD5") | ||
bucket.entries.sortedBy { it.key }.also { | ||
it.forEach { entry -> | ||
md5Crypt.update( | ||
if (entry.value == null) "null".toByteArray() | ||
else entry.value.toString().toByteArray() | ||
) | ||
} | ||
} | ||
return BigInteger(1, md5Crypt.digest()).toString(HEX_RADIX) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters