Skip to content

Commit

Permalink
Gateway only: Ensure launch and request timeouts are in sync (#5317)
Browse files Browse the repository at this point in the history
Prior to this change, the request timeout for a Gateway request
was synchronized with KERNEL_LAUNCH_TIMEOUT only if KLT
was greater. However, the two are closely associated and KLT
should be adjusted if the configurable request_timeout is greater.
This change ensures that the two values are synchronized to the
greater value. It changes the two configurable timeouts to default
to 40 (to match that of KLT) and removes the 2-second pad,
since that wasn't helpful and only confused the situation.

These changes were prompted by this issue: jupyter-server/enterprise_gateway#792
  • Loading branch information
kevin-bates committed Nov 15, 2020
1 parent fcf3070 commit 573adc3
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 11 deletions.
18 changes: 10 additions & 8 deletions jupyter_server/gateway/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def _kernelspecs_endpoint_default(self):
def _kernelspecs_resource_endpoint_default(self):
return os.environ.get(self.kernelspecs_resource_endpoint_env, self.kernelspecs_resource_endpoint_default_value)

connect_timeout_default_value = 60.0
connect_timeout_default_value = 40.0
connect_timeout_env = 'JUPYTER_GATEWAY_CONNECT_TIMEOUT'
connect_timeout = Float(default_value=connect_timeout_default_value, config=True,
help="""The time allowed for HTTP connection establishment with the Gateway server.
Expand All @@ -112,7 +112,7 @@ def _kernelspecs_resource_endpoint_default(self):
def connect_timeout_default(self):
return float(os.environ.get('JUPYTER_GATEWAY_CONNECT_TIMEOUT', self.connect_timeout_default_value))

request_timeout_default_value = 60.0
request_timeout_default_value = 40.0
request_timeout_env = 'JUPYTER_GATEWAY_REQUEST_TIMEOUT'
request_timeout = Float(default_value=request_timeout_default_value, config=True,
help="""The time allowed for HTTP request completion. (JUPYTER_GATEWAY_REQUEST_TIMEOUT env var)""")
Expand Down Expand Up @@ -226,18 +226,20 @@ def gateway_enabled(self):

# Ensure KERNEL_LAUNCH_TIMEOUT has a default value.
KERNEL_LAUNCH_TIMEOUT = int(os.environ.get('KERNEL_LAUNCH_TIMEOUT', 40))
os.environ['KERNEL_LAUNCH_TIMEOUT'] = str(KERNEL_LAUNCH_TIMEOUT)

LAUNCH_TIMEOUT_PAD = int(os.environ.get('LAUNCH_TIMEOUT_PAD', 2))

This comment has been minimized.

Copy link
@CiprianAnton

CiprianAnton Jul 5, 2022

Contributor

@kevin-bates are you sure LAUNCH_TIMEOUT_PAD wasn't helpful? I'm thinking that since KERNEL_LAUNCH_TIMEOUT is forwarded to EG, the request_timeout should always be bigger. I'm thinking at the following scenario:

  1. Make a POST request to EG, to startup a kernel
  2. Let's suppose the EG is able to start a kernel just before timeout
  3. The client will just close connection because it didn't received a reply in the same timeout
  4. From EG side, the created kernel is perfectly valid. The kernel is leaked until culling comes and cleans it up.

This comment has been minimized.

Copy link
@kevin-bates

kevin-bates Jul 5, 2022

Author Member

Hi @CiprianAnton - thanks for bringing this up.

Yes, your assessment is correct. I just felt it was a better simplification to always use the larger value for the two settings and be done. This decision was based on observations that these values tend to dramatically exceed the actual time so the pad wouldn't typically come into play. I agree that having a pad is more correct and would be okay restoring its usage. It was purely a UX simplification.

Are you offering to restore pad's usage?

This comment has been minimized.

Copy link
@CiprianAnton

CiprianAnton Jul 5, 2022

Contributor

@kevin-bates Thanks! I will setup a PR to restore pad's usage


def init_static_args(self):
"""Initialize arguments used on every request. Since these are static values, we'll
perform this operation once.
"""
# Ensure that request timeout is at least "pad" greater than launch timeout.
if self.request_timeout < float(GatewayClient.KERNEL_LAUNCH_TIMEOUT + GatewayClient.LAUNCH_TIMEOUT_PAD):
self.request_timeout = float(GatewayClient.KERNEL_LAUNCH_TIMEOUT + GatewayClient.LAUNCH_TIMEOUT_PAD)
# Ensure that request timeout and KERNEL_LAUNCH_TIMEOUT are the same, taking the
# greater value of the two.
if self.request_timeout < float(GatewayClient.KERNEL_LAUNCH_TIMEOUT):
self.request_timeout = float(GatewayClient.KERNEL_LAUNCH_TIMEOUT)
elif self.request_timeout > float(GatewayClient.KERNEL_LAUNCH_TIMEOUT):
GatewayClient.KERNEL_LAUNCH_TIMEOUT = int(self.request_timeout)
# Ensure any adjustments are reflected in env.
os.environ['KERNEL_LAUNCH_TIMEOUT'] = str(GatewayClient.KERNEL_LAUNCH_TIMEOUT)

self._static_args['headers'] = json.loads(self.headers)
if 'Authorization' not in self._static_args['headers'].keys():
Expand Down
10 changes: 7 additions & 3 deletions tests/test_gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,24 +152,28 @@ async def test_gateway_env_options(init_gateway, jp_serverapp):
assert jp_serverapp.gateway_config.connect_timeout == jp_serverapp.gateway_config.request_timeout
assert jp_serverapp.gateway_config.connect_timeout == 44.4

GatewayClient.instance().init_static_args()
assert GatewayClient.instance().KERNEL_LAUNCH_TIMEOUT == int(jp_serverapp.gateway_config.request_timeout)


async def test_gateway_cli_options(jp_configurable_serverapp):
argv = [
'--gateway-url=' + mock_gateway_url,
'--GatewayClient.http_user=' + mock_http_user,
'--GatewayClient.connect_timeout=44.4',
'--GatewayClient.request_timeout=44.4'
'--GatewayClient.request_timeout=96.0'
]


GatewayClient.clear_instance()
app = jp_configurable_serverapp(argv=argv)

assert app.gateway_config.gateway_enabled is True
assert app.gateway_config.url == mock_gateway_url
assert app.gateway_config.http_user == mock_http_user
assert app.gateway_config.connect_timeout == app.gateway_config.request_timeout
assert app.gateway_config.connect_timeout == 44.4
assert app.gateway_config.request_timeout == 96.0
GatewayClient.instance().init_static_args()
assert GatewayClient.instance().KERNEL_LAUNCH_TIMEOUT == 96 # Ensure KLT gets set from request-timeout
GatewayClient.clear_instance()


Expand Down

0 comments on commit 573adc3

Please sign in to comment.