From 322d7cdc90484546048276e103c931676678a428 Mon Sep 17 00:00:00 2001 From: hagen-danswer Date: Wed, 20 Nov 2024 15:58:28 -0800 Subject: [PATCH] brought the timeout changes too --- .../connectors/confluence/connector.py | 17 ++++++++++++++++ .../connectors/confluence/onyx_confluence.py | 8 +++++--- .../danswer/connectors/confluence/utils.py | 2 +- .../confluence/group_sync.py | 20 +++++++++++++++++-- 4 files changed, 41 insertions(+), 6 deletions(-) diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index 54419e2d374..ea6a5eecb37 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -3,6 +3,8 @@ from typing import Any from urllib.parse import quote +from atlassian import Confluence # type: ignore + from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE from danswer.configs.app_configs import INDEX_BATCH_SIZE @@ -116,6 +118,21 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None is_cloud=self.is_cloud, wiki_base=self.wiki_base, ) + + client_without_retries = Confluence( + api_version="cloud" if self.is_cloud else "latest", + url=self.wiki_base.rstrip("/"), + username=credentials["confluence_username"] if self.is_cloud else None, + password=credentials["confluence_access_token"] if self.is_cloud else None, + token=credentials["confluence_access_token"] if not self.is_cloud else None, + ) + spaces = client_without_retries.get_all_spaces(limit=1) + if not spaces: + raise RuntimeError( + f"No spaces found at {self.wiki_base}! " + "Check your credentials and wiki_base and make sure " + "is_cloud is set correctly." + ) return None def _get_comment_string_for_page_id(self, page_id: str) -> str: diff --git a/backend/danswer/connectors/confluence/onyx_confluence.py b/backend/danswer/connectors/confluence/onyx_confluence.py index c6a4d3857b1..eeb7e7158f9 100644 --- a/backend/danswer/connectors/confluence/onyx_confluence.py +++ b/backend/danswer/connectors/confluence/onyx_confluence.py @@ -84,7 +84,7 @@ def handle_confluence_rate_limit(confluence_call: F) -> F: def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: MAX_RETRIES = 5 - TIMEOUT = 3600 + TIMEOUT = 600 timeout_at = time.monotonic() + TIMEOUT for attempt in range(MAX_RETRIES): @@ -92,13 +92,16 @@ def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: raise TimeoutError( f"Confluence call attempts took longer than {TIMEOUT} seconds." ) - try: # we're relying more on the client to rate limit itself # and applying our own retries in a more specific set of circumstances return confluence_call(*args, **kwargs) except HTTPError as e: delay_until = _handle_http_error(e, attempt) + logger.warning( + f"HTTPError in confluence call. " + f"Retrying in {delay_until} seconds..." + ) while time.monotonic() < delay_until: # in the future, check a signal here to exit time.sleep(1) @@ -107,7 +110,6 @@ def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: # Users reported it to be intermittent, so just retry if attempt == MAX_RETRIES - 1: raise e - logger.exception( "Confluence Client raised an AttributeError. Retrying..." ) diff --git a/backend/danswer/connectors/confluence/utils.py b/backend/danswer/connectors/confluence/utils.py index 9deff36b269..cb5253f4c14 100644 --- a/backend/danswer/connectors/confluence/utils.py +++ b/backend/danswer/connectors/confluence/utils.py @@ -283,6 +283,6 @@ def build_confluence_client( password=credentials_json["confluence_access_token"] if is_cloud else None, token=credentials_json["confluence_access_token"] if not is_cloud else None, backoff_and_retry=True, - max_backoff_retries=60, + max_backoff_retries=10, max_backoff_seconds=60, ) diff --git a/backend/ee/danswer/external_permissions/confluence/group_sync.py b/backend/ee/danswer/external_permissions/confluence/group_sync.py index 9ff32f21e71..dd372265819 100644 --- a/backend/ee/danswer/external_permissions/confluence/group_sync.py +++ b/backend/ee/danswer/external_permissions/confluence/group_sync.py @@ -1,3 +1,4 @@ +from atlassian import Confluence # type: ignore from sqlalchemy.orm import Session from danswer.connectors.confluence.onyx_confluence import OnyxConfluence @@ -37,11 +38,26 @@ def confluence_group_sync( db_session: Session, cc_pair: ConnectorCredentialPair, ) -> None: + credentials = cc_pair.credential.credential_json is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False) + wiki_base = cc_pair.connector.connector_specific_config["wiki_base"] + + # test connection with direct client, no retries + confluence_client = Confluence( + api_version="cloud" if is_cloud else "latest", + url=wiki_base.rstrip("/"), + username=credentials["confluence_username"] if is_cloud else None, + password=credentials["confluence_access_token"] if is_cloud else None, + token=credentials["confluence_access_token"] if not is_cloud else None, + ) + spaces = confluence_client.get_all_spaces(limit=1) + if not spaces: + raise RuntimeError(f"No spaces found at {wiki_base}!") + confluence_client = build_confluence_client( - credentials_json=cc_pair.credential.credential_json, + credentials_json=credentials, is_cloud=is_cloud, - wiki_base=cc_pair.connector.connector_specific_config["wiki_base"], + wiki_base=wiki_base, ) # Get all group names