Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: modify_refresh_interval flag in opensearch index_documents #2980

Merged
merged 3 commits into from
Oct 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions awswrangler/opensearch/_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,7 @@ def index_documents(
initial_backoff: int | None = None,
max_backoff: int | None = None,
use_threads: bool | int = False,
enable_refresh_interval: bool = True,
**kwargs: Any,
) -> dict[str, Any]:
"""
Expand Down Expand Up @@ -559,6 +560,8 @@ def index_documents(
True to enable concurrent requests, False to disable multiple threads.
If enabled os.cpu_count() will be used as the max number of threads.
If integer is provided, specified number is used.
enable_refresh_interval
True (default) to enable ``refresh_interval`` modification to ``-1`` (disabled) while indexing documents
**kwargs
KEYWORD arguments forwarded to bulk operation
elasticsearch >= 7.10.2 / opensearch: \
Expand Down Expand Up @@ -614,7 +617,7 @@ def index_documents(
widgets=widgets, max_value=total_documents, prefix="Indexing: "
).start()
for i, bulk_chunk_documents in enumerate(actions):
if i == 1: # second bulk iteration, in case the index didn't exist before
if i == 1 and enable_refresh_interval: # second bulk iteration, in case the index didn't exist before
refresh_interval = _get_refresh_interval(client, index)
_disable_refresh_interval(client, index)
_logger.debug("running bulk index of %s documents", len(bulk_chunk_documents))
Expand Down Expand Up @@ -655,6 +658,7 @@ def index_documents(
raise e

finally:
_set_refresh_interval(client, index, refresh_interval)
if enable_refresh_interval:
_set_refresh_interval(client, index, refresh_interval)

return {"success": success, "errors": errors}
Loading