-
Notifications
You must be signed in to change notification settings - Fork 88
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Timed out after retries #383
Comments
FastLee
pushed a commit
that referenced
this issue
Oct 25, 2023
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The tool failed while creating backup groups even though retries are in place:
`TimeoutError: Timed out after 0:20:00
DatabricksError Traceback (most recent call last)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/retries.py:29, in retried..decorator..wrapper(*args, **kwargs)
28 try:
---> 29 return func(*args, **kwargs)
30 except Exception as err:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/mixins/hardening.py:57, in rate_limited..decorator..wrapper(*args, **kwargs)
56 rate_limiter.throttle()
---> 57 return func(*args, **kwargs)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/groups.py:106, in GroupManager._get_or_create_backup_group(self, source_group_name, source_group)
105 logger.info(f"Creating backup group {backup_group_name}")
--> 106 backup_group = self._ws.groups.create(
107 display_name=backup_group_name,
108 meta=source_group.meta,
109 entitlements=source_group.entitlements,
110 roles=source_group.roles,
111 members=source_group.members,
112 )
113 self._workspace_groups.append(backup_group)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/service/iam.py:1716, in GroupsAPI.create(self, display_name, entitlements, external_id, groups, id, members, meta, roles)
1715 headers = {'Accept': 'application/json', 'Content-Type': 'application/json', }
-> 1716 res = self._api.do('POST', '/api/2.0/preview/scim/v2/Groups', body=body, headers=headers)
1717 return Group.from_dict(res)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/core.py:1061, in ApiClient.do(self, method, path, query, headers, body, raw, files, data)
1059 retryable = retried(timeout=timedelta(seconds=self._retry_timeout_seconds),
1060 is_retryable=self._is_retryable)
-> 1061 return retryable(self._perform)(method,
1062 path,
1063 query=query,
1064 headers=headers,
1065 body=body,
1066 raw=raw,
1067 files=files,
1068 data=data)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/retries.py:47, in retried..decorator..wrapper(*args, **kwargs)
45 if retry_reason is None:
46 # raise if exception is not retryable
---> 47 raise err
49 logger.debug(f'Retrying: {retry_reason} (sleeping ~{sleep}s)')
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/retries.py:29, in retried..decorator..wrapper(*args, **kwargs)
28 try:
---> 29 return func(*args, **kwargs)
30 except Exception as err:
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/core.py:1150, in ApiClient._perform(self, method, path, query, headers, body, raw, files, data)
1149 payload = response.json()
-> 1150 raise self._make_nicer_error(response=response, **payload) from None
1151 if raw:
DatabricksError: None Group with name db-temp-idm2bcd_dssi03prod_crin13_read already exists.
The above exception was the direct cause of the following exception:
TimeoutError Traceback (most recent call last)
File ~/.ipykernel/1030/command--1-1764370551:18
15 entry = [ep for ep in metadata.distribution("databricks_labs_ucx").entry_points if ep.name == "runtime"]
16 if entry:
17 # Load and execute the entrypoint, assumes no parameters
---> 18 entry[0].load()()
19 else:
20 import databricks_labs_ucx
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/runtime.py:215, in main()
214 def main():
--> 215 trigger(*sys.argv)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/framework/tasks.py:93, in trigger(*argv)
90 cfg = WorkspaceConfig.from_file(Path(args["config"]))
91 logging.getLogger("databricks").setLevel(cfg.log_level)
---> 93 current_task.fn(cfg)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/runtime.py:192, in migrate_permissions(cfg)
168 """As we embark on the complex journey of migrating from Hive Metastore to the Databricks Unity Catalog,
169 a crucial phase in this transition involves the careful management of permissions.
170 This intricate process entails several key steps: first, applying permissions to designated backup groups;
(...)
189
190 See interactive tutorial here."""
191 toolkit = GroupMigrationToolkit(cfg)
--> 192 toolkit.prepare_environment()
193 if toolkit.has_groups():
194 toolkit.apply_permissions_to_backup_groups()
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/migration.py:121, in GroupMigrationToolkit.prepare_environment(self)
120 def prepare_environment(self):
--> 121 self._group_manager.prepare_groups_in_environment()
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/groups.py:221, in GroupManager.prepare_groups_in_environment(self)
218 valid_group_names = list(ws_group_names.intersection(ac_group_names))
219 logger.info(f"Found {len(valid_group_names)} workspace groups that have corresponding account groups")
--> 221 self._set_migration_groups(valid_group_names)
222 logger.info("Environment prepared successfully")
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/groups.py:127, in GroupManager._set_migration_groups(self, groups_names)
124 backup_group = self._get_or_create_backup_group(source_group_name=name, source_group=ws_group)
125 return MigrationGroupInfo(workspace=ws_group, backup=backup_group, account=acc_group)
--> 127 collected_groups = ThreadedExecution.gather(
128 "get group info", [partial(get_group_info, group_name) for group_name in groups_names]
129 )
130 for g in collected_groups:
131 self._migration_state.add(g)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/framework/parallel.py:48, in ThreadedExecution.gather(cls, name, tasks)
45 @classmethod
46 def gather(cls, name: str, tasks: list[ExecutableFunction]) -> list[ExecutableResult]:
47 reporter = ProgressReporter(len(tasks), f"{name}: ")
---> 48 return cls(tasks, num_threads=4, progress_reporter=reporter).run()
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/framework/parallel.py:63, in ThreadedExecution.run(self)
60 results = concurrent.futures.wait(self._futures, return_when=ALL_COMPLETED)
62 logger.debug("Collecting the results from threaded execution")
---> 63 collected = [future.result() for future in results.done]
64 return collected
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/framework/parallel.py:63, in (.0)
60 results = concurrent.futures.wait(self._futures, return_when=ALL_COMPLETED)
62 logger.debug("Collecting the results from threaded execution")
---> 63 collected = [future.result() for future in results.done]
64 return collected
File /usr/lib/python3.10/concurrent/futures/_base.py:451, in Future.result(self, timeout)
449 raise CancelledError()
450 elif self._state == FINISHED:
--> 451 return self.__get_result()
453 self._condition.wait(timeout)
455 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
File /usr/lib/python3.10/concurrent/futures/_base.py:403, in Future.__get_result(self)
401 if self._exception:
402 try:
--> 403 raise self._exception
404 finally:
405 # Break a reference cycle with the exception in self._exception
406 self = None
File /usr/lib/python3.10/concurrent/futures/thread.py:58, in _WorkItem.run(self)
55 return
57 try:
---> 58 result = self.fn(*self.args, **self.kwargs)
59 except BaseException as exc:
60 self.future.set_exception(exc)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/labs/ucx/workspace_access/groups.py:124, in GroupManager._set_migration_groups..get_group_info(name)
122 acc_group = self._get_group(name, "account")
123 assert acc_group, f"Group {name} not found on the account level"
--> 124 backup_group = self._get_or_create_backup_group(source_group_name=name, source_group=ws_group)
125 return MigrationGroupInfo(workspace=ws_group, backup=backup_group, account=acc_group)
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.10/site-packages/databricks/sdk/retries.py:52, in retried..decorator..wrapper(*args, **kwargs)
50 time.sleep(sleep + random())
51 attempt += 1
---> 52 raise TimeoutError(f'Timed out after {timeout}') from last_err
TimeoutError: Timed out after 0:20:00`
Last log entries:
INFO [d.l.ucx.workspace_access.groups] Backup group db-temp-idm2bcd_dssi03prod_zme4_dev successfully created 22:58 INFO [d.l.ucx.framework.parallel] get group info: 1084/1084, rps: 0.050/sec
The text was updated successfully, but these errors were encountered: