Skip to content

Commit

Permalink
Warn if session is missing after reboot, closes #2075
Browse files Browse the repository at this point in the history
  • Loading branch information
josevalim committed Aug 1, 2023
1 parent 3741193 commit 5a367e5
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 22 deletions.
2 changes: 2 additions & 0 deletions lib/livebook.ex
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ defmodule Livebook do
def config_runtime do
import Config

config :livebook, :random_boot_id, Base.encode64(:crypto.strong_rand_bytes(3))

config :livebook, LivebookWeb.Endpoint,
secret_key_base:
Livebook.Config.secret!("LIVEBOOK_SECRET_KEY_BASE") ||
Expand Down
7 changes: 7 additions & 0 deletions lib/livebook/config.ex
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,13 @@ defmodule Livebook.Config do
Application.fetch_env!(:livebook, :allowed_uri_schemes)
end

@doc """
Returns a random id set on boot.
"""
def random_boot_id() do
Application.fetch_env!(:livebook, :random_boot_id)
end

## Parsing

@doc """
Expand Down
28 changes: 19 additions & 9 deletions lib/livebook/sessions.ex
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,31 @@ defmodule Livebook.Sessions do
@doc """
Returns tracked session with the given id.
"""
@spec fetch_session(Session.id()) :: {:ok, Session.t()} | :error
@spec fetch_session(Session.id()) :: {:ok, Session.t()} | {:error, :not_found | :maybe_crashed}
def fetch_session(id) do
case Livebook.Tracker.fetch_session(id) do
{:ok, session} ->
{:ok, session}

:error ->
# The local tracker server doesn't know about this session,
# but it may not have propagated yet, so we extract the session
# node from id and ask the corresponding tracker directly
with {:ok, other_node} when other_node != node() <- Utils.node_from_node_aware_id(id),
{:ok, session} <- :rpc.call(other_node, Livebook.Tracker, :fetch_session, [id]) do
{:ok, session}
else
_ -> :error
boot_id = Livebook.Config.random_boot_id()

case Utils.node_from_node_aware_id(id) do
# The local tracker server doesn't know about this session,
# but it may not have propagated yet, so we extract the session
# node from id and ask the corresponding tracker directly
{:ok, other_node, _other_boot_id} when other_node != node() ->
case :rpc.call(other_node, Livebook.Tracker, :fetch_session, [id]) do
{:ok, session} -> {:ok, session}
_ -> {:error, :not_found}
end

{:ok, other_node, other_boot_id}
when other_node == node() and other_boot_id != boot_id ->
{:error, :maybe_crashed}

_ ->
{:error, :not_found}
end
end
end
Expand Down
22 changes: 13 additions & 9 deletions lib/livebook/utils.ex
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ defmodule Livebook.Utils do
random_part = :crypto.strong_rand_bytes(9)
binary = <<node_part::binary, random_part::binary>>
# 16B + 9B = 25B is suitable for base32 encoding without padding
Base.encode32(binary, case: :lower)
Livebook.Config.random_boot_id() <> Base.encode32(binary, case: :lower)
end

# Note: the result is always 16 bytes long
Expand All @@ -63,14 +63,18 @@ defmodule Livebook.Utils do
"""
@spec node_from_node_aware_id(id()) :: {:ok, node()} | :error
def node_from_node_aware_id(id) do
binary = Base.decode32!(id, case: :lower)
<<node_part::binary-size(16), _random_part::binary-size(9)>> = binary

known_nodes = [node() | Node.list()]

Enum.find_value(known_nodes, :error, fn node ->
node_hash(node) == node_part && {:ok, node}
end)
with <<boot_id::binary-size(4), id::binary>> <- id,
{:ok, binary} <- Base.decode32(id, case: :lower),
<<node_part::binary-size(16), _random_part::binary-size(9)>> <-
binary do
known_nodes = [node() | Node.list()]

Enum.find_value(known_nodes, :error, fn node ->
node_hash(node) == node_part && {:ok, node, boot_id}
end)
else
_ -> :error
end
end

@doc """
Expand Down
4 changes: 2 additions & 2 deletions lib/livebook_web/controllers/session_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ defmodule LivebookWeb.SessionController do
file = FileSystem.File.resolve(images_dir, name)
serve_static(conn, file)

:error ->
{:error, _} ->
send_resp(conn, 404, "Not found")
end
end
Expand All @@ -57,7 +57,7 @@ defmodule LivebookWeb.SessionController do

send_notebook_source(conn, notebook, file_name, format)

:error ->
{:error, _} ->
send_resp(conn, 404, "Not found")
end
end
Expand Down
12 changes: 11 additions & 1 deletion lib/livebook_web/live/session_live.ex
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,18 @@ defmodule LivebookWeb.SessionLive do
|> prune_outputs()
|> prune_cell_sources()}

:error ->
{:error, :not_found} ->
{:ok, redirect(socket, to: ~p"/")}

{:error, :maybe_crashed} ->
{:ok,
socket
|> put_flash(
:error,
"Could not find notebook session because Livebook has rebooted. " <>
"This may happen if Livebook runs out of memory while installing dependencies or executing code."
)
|> redirect(to: ~p"/")}
end
end

Expand Down
5 changes: 4 additions & 1 deletion test/livebook/sessions_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,17 @@ defmodule Livebook.SessionsTest do
describe "fetch_session/1" do
test "returns an error if no session with the given id exists" do
id = Livebook.Utils.random_node_aware_id()
assert :error = Sessions.fetch_session(id)
assert Sessions.fetch_session(id) == {:error, :not_found}
end

test "returns session matching the given id" do
{:ok, session} = Sessions.create_session()
assert {:ok, ^session} = Sessions.fetch_session(session.id)

Session.close(session.pid)
assert Sessions.fetch_session(session.id) == {:error, :not_found}
Application.put_env(:livebook, :random_boot_id, "^^^^")
assert Sessions.fetch_session(session.id) == {:error, :maybe_crashed}
end
end

Expand Down

0 comments on commit 5a367e5

Please sign in to comment.