Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Gateway] Remove redundant list kernels request during session poll #1112

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 10 additions & 7 deletions jupyter_server/gateway/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,18 +297,21 @@ async def get_kernel_spec_resource(self, kernel_name, path):
class GatewaySessionManager(SessionManager):
kernel_manager = Instance("jupyter_server.gateway.managers.GatewayMappingKernelManager")

async def kernel_culled(self, kernel_id):
async def kernel_culled(self, kernel_id: str) -> bool:
"""Checks if the kernel is still considered alive and returns true if it's not found."""
kernel = None
km: Optional[GatewayKernelManager] = None
try:
# Since we keep the models up-to-date via client polling, use that state to determine
# if this kernel no longer exists on the gateway server rather than perform a redundant
# fetch operation - especially since this is called at approximately the same interval.
# This has the effect of reducing GET /api/kernels requests against the gateway server
# by 50%!
# Note that should the redundant polling be consolidated, or replaced with an event-based
# notification model, this will need to be revisited.
km = self.kernel_manager.get_kernel(kernel_id)
kernel = await km.refresh_model()
except Exception: # Let exceptions here reflect culled kernel
pass
return kernel is None


"""KernelManager class to manage a kernel running on a Gateway Server via the REST API"""
return km is None


class GatewayKernelManager(AsyncKernelManager):
Expand Down
66 changes: 55 additions & 11 deletions tests/test_gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,33 +427,49 @@ async def test_gateway_get_named_kernelspec(init_gateway, jp_fetch):
assert expected_http_error(e, 404)


async def test_gateway_session_lifecycle(init_gateway, jp_root_dir, jp_fetch):
@pytest.mark.parametrize("cull_kernel", [False, True])
async def test_gateway_session_lifecycle(init_gateway, jp_root_dir, jp_fetch, cull_kernel):
# Validate session lifecycle functions; create and delete.

# create
session_id, kernel_id = await create_session(jp_root_dir, jp_fetch, "kspec_foo")

# ensure kernel still considered running
assert await is_kernel_running(jp_fetch, kernel_id) is True
assert await is_session_active(jp_fetch, session_id) is True

# interrupt
await interrupt_kernel(jp_fetch, kernel_id)

# ensure kernel still considered running
assert await is_kernel_running(jp_fetch, kernel_id) is True
assert await is_session_active(jp_fetch, session_id) is True

# restart
await restart_kernel(jp_fetch, kernel_id)

# ensure kernel still considered running
assert await is_kernel_running(jp_fetch, kernel_id) is True
assert await is_session_active(jp_fetch, session_id) is True

# delete
await delete_session(jp_fetch, session_id)
assert await is_kernel_running(jp_fetch, kernel_id) is False
if cull_kernel:
running_kernels.pop(kernel_id)

# fetch kernel and session and ensure not considered running
assert await is_kernel_running(jp_fetch, kernel_id) is not cull_kernel
assert await is_session_active(jp_fetch, session_id) is not cull_kernel

# delete session. If culled, ensure 404 is raised
if cull_kernel:
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await delete_session(jp_fetch, session_id)
assert expected_http_error(e, 404)
else:
await delete_session(jp_fetch, session_id)

assert await is_session_active(jp_fetch, session_id) is False


async def test_gateway_kernel_lifecycle(init_gateway, jp_serverapp, jp_ws_fetch, jp_fetch):
@pytest.mark.parametrize("cull_kernel", [False, True])
async def test_gateway_kernel_lifecycle(
init_gateway, jp_serverapp, jp_ws_fetch, jp_fetch, cull_kernel
):
# Validate kernel lifecycle functions; create, interrupt, restart and delete.

# create
Expand All @@ -480,8 +496,20 @@ async def test_gateway_kernel_lifecycle(init_gateway, jp_serverapp, jp_ws_fetch,
# ensure kernel still considered running
assert await is_kernel_running(jp_fetch, kernel_id) is True

# delete
await delete_kernel(jp_fetch, kernel_id)
if cull_kernel:
running_kernels.pop(kernel_id)

# fetch kernel and session and ensure not considered running
assert await is_kernel_running(jp_fetch, kernel_id) is not cull_kernel

# delete kernel. If culled, ensure 404 is raised
if cull_kernel:
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await delete_kernel(jp_fetch, kernel_id)
assert expected_http_error(e, 404)
else:
await delete_kernel(jp_fetch, kernel_id)

assert await is_kernel_running(jp_fetch, kernel_id) is False


Expand Down Expand Up @@ -583,6 +611,22 @@ async def test_channel_queue_get_msg_when_response_router_had_finished():
#
# Test methods below...
#


async def is_session_active(jp_fetch, session_id):
"""Issues request to get the set of running kernels"""
with mocked_gateway:
# Get list of running kernels
r = await jp_fetch("api", "sessions", method="GET")
assert r.code == 200
sessions = json.loads(r.body.decode("utf-8"))
assert len(sessions) == len(running_kernels) # Use running_kernels as truth
for model in sessions:
if model.get("id") == session_id:
return True
return False


async def create_session(root_dir, jp_fetch, kernel_name):
"""Creates a session for a kernel. The session is created against the server
which then uses the gateway for kernel management.
Expand Down