From c08d9f143b691308320a61f94b5bc9b4a6d06687 Mon Sep 17 00:00:00 2001 From: Avihai Ezaguy <32809676+AvihaiSam@users.noreply.github.com> Date: Tue, 1 Oct 2024 17:35:42 +0300 Subject: [PATCH 1/2] feature/opensearch: modify_refresh_interval flag --- awswrangler/opensearch/_write.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py index 459f56c18..2ba7f6f85 100644 --- a/awswrangler/opensearch/_write.py +++ b/awswrangler/opensearch/_write.py @@ -504,6 +504,7 @@ def index_documents( initial_backoff: int | None = None, max_backoff: int | None = None, use_threads: bool | int = False, + modify_refresh_interval: bool = True, **kwargs: Any, ) -> dict[str, Any]: """ @@ -559,6 +560,8 @@ def index_documents( True to enable concurrent requests, False to disable multiple threads. If enabled os.cpu_count() will be used as the max number of threads. If integer is provided, specified number is used. + modify_refresh_interval + True (default) to enable ``refresh_interval`` modification to ``-1`` (disabled) while indexing documents **kwargs KEYWORD arguments forwarded to bulk operation elasticsearch >= 7.10.2 / opensearch: \ @@ -614,7 +617,7 @@ def index_documents( widgets=widgets, max_value=total_documents, prefix="Indexing: " ).start() for i, bulk_chunk_documents in enumerate(actions): - if i == 1: # second bulk iteration, in case the index didn't exist before + if i == 1 and modify_refresh_interval: # second bulk iteration, in case the index didn't exist before refresh_interval = _get_refresh_interval(client, index) _disable_refresh_interval(client, index) _logger.debug("running bulk index of %s documents", len(bulk_chunk_documents)) @@ -655,6 +658,7 @@ def index_documents( raise e finally: - _set_refresh_interval(client, index, refresh_interval) + if modify_refresh_interval: + _set_refresh_interval(client, index, refresh_interval) return {"success": success, "errors": errors} From f66c3582a20623757d497d31f628e8c482ea5ba2 Mon Sep 17 00:00:00 2001 From: Avihai Ezaguy <32809676+AvihaiSam@users.noreply.github.com> Date: Sat, 5 Oct 2024 23:33:10 +0300 Subject: [PATCH 2/2] CR change: rename modify_refresh_interval to enable_refresh_interval --- awswrangler/opensearch/_write.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/awswrangler/opensearch/_write.py b/awswrangler/opensearch/_write.py index 2ba7f6f85..87955d1c2 100644 --- a/awswrangler/opensearch/_write.py +++ b/awswrangler/opensearch/_write.py @@ -504,7 +504,7 @@ def index_documents( initial_backoff: int | None = None, max_backoff: int | None = None, use_threads: bool | int = False, - modify_refresh_interval: bool = True, + enable_refresh_interval: bool = True, **kwargs: Any, ) -> dict[str, Any]: """ @@ -560,7 +560,7 @@ def index_documents( True to enable concurrent requests, False to disable multiple threads. If enabled os.cpu_count() will be used as the max number of threads. If integer is provided, specified number is used. - modify_refresh_interval + enable_refresh_interval True (default) to enable ``refresh_interval`` modification to ``-1`` (disabled) while indexing documents **kwargs KEYWORD arguments forwarded to bulk operation @@ -617,7 +617,7 @@ def index_documents( widgets=widgets, max_value=total_documents, prefix="Indexing: " ).start() for i, bulk_chunk_documents in enumerate(actions): - if i == 1 and modify_refresh_interval: # second bulk iteration, in case the index didn't exist before + if i == 1 and enable_refresh_interval: # second bulk iteration, in case the index didn't exist before refresh_interval = _get_refresh_interval(client, index) _disable_refresh_interval(client, index) _logger.debug("running bulk index of %s documents", len(bulk_chunk_documents)) @@ -658,7 +658,7 @@ def index_documents( raise e finally: - if modify_refresh_interval: + if enable_refresh_interval: _set_refresh_interval(client, index, refresh_interval) return {"success": success, "errors": errors}