Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Warn if session is missing after reboot #2128

Merged
merged 7 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/livebook.ex
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ defmodule Livebook do
def config_runtime do
import Config

config :livebook, :random_boot_id, :crypto.strong_rand_bytes(3)

config :livebook, LivebookWeb.Endpoint,
secret_key_base:
Livebook.Config.secret!("LIVEBOOK_SECRET_KEY_BASE") ||
Expand Down
7 changes: 7 additions & 0 deletions lib/livebook/config.ex
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,13 @@ defmodule Livebook.Config do
Application.fetch_env!(:livebook, :allowed_uri_schemes)
end

@doc """
Returns a random id set on boot.
"""
def random_boot_id() do
Application.fetch_env!(:livebook, :random_boot_id)
end

## Parsing

@doc """
Expand Down
28 changes: 19 additions & 9 deletions lib/livebook/sessions.ex
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,31 @@ defmodule Livebook.Sessions do
@doc """
Returns tracked session with the given id.
"""
@spec fetch_session(Session.id()) :: {:ok, Session.t()} | :error
@spec fetch_session(Session.id()) :: {:ok, Session.t()} | {:error, :not_found | :maybe_crashed}
def fetch_session(id) do
case Livebook.Tracker.fetch_session(id) do
{:ok, session} ->
{:ok, session}

:error ->
# The local tracker server doesn't know about this session,
# but it may not have propagated yet, so we extract the session
# node from id and ask the corresponding tracker directly
with {:ok, other_node} when other_node != node() <- Utils.node_from_node_aware_id(id),
{:ok, session} <- :rpc.call(other_node, Livebook.Tracker, :fetch_session, [id]) do
{:ok, session}
else
_ -> :error
boot_id = Livebook.Config.random_boot_id()

case Utils.node_from_node_aware_id(id) do
# The local tracker server doesn't know about this session,
# but it may not have propagated yet, so we extract the session
# node from id and ask the corresponding tracker directly
{:ok, other_node, _other_boot_id} when other_node != node() ->
case :rpc.call(other_node, Livebook.Tracker, :fetch_session, [id]) do
{:ok, session} -> {:ok, session}
_ -> {:error, :not_found}
end

{:ok, other_node, other_boot_id}
when other_node == node() and other_boot_id != boot_id ->
{:error, :maybe_crashed}
jonatanklosko marked this conversation as resolved.
Show resolved Hide resolved

_ ->
{:error, :not_found}
end
end
end
Expand Down
23 changes: 14 additions & 9 deletions lib/livebook/utils.ex
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,11 @@ defmodule Livebook.Utils do
"""
@spec random_node_aware_id() :: id()
def random_node_aware_id() do
boot_id = Livebook.Config.random_boot_id()
node_part = node_hash(node())
random_part = :crypto.strong_rand_bytes(9)
binary = <<node_part::binary, random_part::binary>>
# 16B + 9B = 25B is suitable for base32 encoding without padding
random_part = :crypto.strong_rand_bytes(11)
binary = <<boot_id::binary, node_part::binary, random_part::binary>>
# 3B + 16B + 11B = 30B is suitable for base32 encoding without padding
josevalim marked this conversation as resolved.
Show resolved Hide resolved
Base.encode32(binary, case: :lower)
end

Expand All @@ -63,14 +64,18 @@ defmodule Livebook.Utils do
"""
@spec node_from_node_aware_id(id()) :: {:ok, node()} | :error
josevalim marked this conversation as resolved.
Show resolved Hide resolved
def node_from_node_aware_id(id) do
binary = Base.decode32!(id, case: :lower)
<<node_part::binary-size(16), _random_part::binary-size(9)>> = binary
case Base.decode32(id, case: :lower) do
{:ok,
<<boot_id::binary-size(3), node_part::binary-size(16), _random_part::binary-size(11)>>} ->
known_nodes = [node() | Node.list()]

known_nodes = [node() | Node.list()]
Enum.find_value(known_nodes, :error, fn node ->
node_hash(node) == node_part && {:ok, node, boot_id}
end)

Enum.find_value(known_nodes, :error, fn node ->
node_hash(node) == node_part && {:ok, node}
end)
_ ->
:error
end
end

@doc """
Expand Down
4 changes: 2 additions & 2 deletions lib/livebook_web/controllers/session_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ defmodule LivebookWeb.SessionController do
file = FileSystem.File.resolve(images_dir, name)
serve_static(conn, file)

:error ->
{:error, _} ->
send_resp(conn, 404, "Not found")
end
end
Expand All @@ -57,7 +57,7 @@ defmodule LivebookWeb.SessionController do

send_notebook_source(conn, notebook, file_name, format)

:error ->
{:error, _} ->
send_resp(conn, 404, "Not found")
end
end
Expand Down
12 changes: 11 additions & 1 deletion lib/livebook_web/live/session_live.ex
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,18 @@ defmodule LivebookWeb.SessionLive do
|> prune_outputs()
|> prune_cell_sources()}

:error ->
{:error, :not_found} ->
{:ok, redirect(socket, to: ~p"/")}

{:error, :maybe_crashed} ->
{:ok,
socket
|> put_flash(
:error,
"Could not find notebook session because Livebook has rebooted. " <>
"This may happen if Livebook runs out of memory while installing dependencies or executing code."
)
|> redirect(to: ~p"/")}
end
end

Expand Down
11 changes: 10 additions & 1 deletion test/livebook/sessions_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ defmodule Livebook.SessionsTest do
describe "fetch_session/1" do
test "returns an error if no session with the given id exists" do
id = Livebook.Utils.random_node_aware_id()
assert :error = Sessions.fetch_session(id)
assert Sessions.fetch_session(id) == {:error, :not_found}
end

test "returns session matching the given id" do
Expand All @@ -41,6 +41,15 @@ defmodule Livebook.SessionsTest do

Session.close(session.pid)
end

test "returns an error if session comes from a different boot" do
Application.put_env(:livebook, :random_boot_id, "aaa")
id = Livebook.Utils.random_node_aware_id()

assert Sessions.fetch_session(id) == {:error, :not_found}
Application.put_env(:livebook, :random_boot_id, "bbb")
jonatanklosko marked this conversation as resolved.
Show resolved Hide resolved
assert Sessions.fetch_session(id) == {:error, :maybe_crashed}
end
end

describe "update_session/1" do
Expand Down