Skip to content

Commit

Permalink
Fix race condition in container starting (#1356)
Browse files Browse the repository at this point in the history
  • Loading branch information
kasperl authored Jan 20, 2023
1 parent 5ab8627 commit 7a01d7e
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 51 deletions.
28 changes: 17 additions & 11 deletions lib/system/api/containers.toit
Original file line number Diff line number Diff line change
Expand Up @@ -9,31 +9,34 @@ import system.containers show ContainerImage
interface ContainerService:
static UUID /string ::= "358ee529-45a4-409e-8fab-7a28f71e5c51"
static MAJOR /int ::= 0
static MINOR /int ::= 5
static MINOR /int ::= 6

static FLAG_RUN_BOOT /int ::= 1 << 0
static FLAG_RUN_CRITICAL /int ::= 1 << 1

static LIST_IMAGES_INDEX /int ::= 0
list_images -> List
static LIST_IMAGES_INDEX /int ::= 0

static START_IMAGE_INDEX /int ::= 1
start_image id/uuid.Uuid arguments/any -> int?
load_image id/uuid.Uuid -> int?
static LOAD_IMAGE_INDEX /int ::= 1

start_container handle/int arguments/any -> none
static START_CONTAINER_INDEX /int ::= 7

static STOP_CONTAINER_INDEX /int ::= 6
stop_container handle/int -> none
static STOP_CONTAINER_INDEX /int ::= 6

static UNINSTALL_IMAGE_INDEX /int ::= 2
uninstall_image id/uuid.Uuid -> none
static UNINSTALL_IMAGE_INDEX /int ::= 2

static IMAGE_WRITER_OPEN_INDEX /int ::= 3
image_writer_open size/int -> int
static IMAGE_WRITER_OPEN_INDEX /int ::= 3

static IMAGE_WRITER_WRITE_INDEX /int ::= 4
image_writer_write handle/int bytes/ByteArray -> none
static IMAGE_WRITER_WRITE_INDEX /int ::= 4

static IMAGE_WRITER_COMMIT_INDEX /int ::= 5
image_writer_commit handle/int flags/int data/int -> uuid.Uuid
static IMAGE_WRITER_COMMIT_INDEX /int ::= 5

class ContainerServiceClient extends ServiceClient implements ContainerService:
constructor --open/bool=true:
Expand All @@ -48,8 +51,11 @@ class ContainerServiceClient extends ServiceClient implements ContainerService:
cursor := it * 3
ContainerImage (uuid.Uuid array[cursor]) array[cursor + 1] array[cursor + 2]

start_image id/uuid.Uuid arguments/any -> int?:
return invoke_ ContainerService.START_IMAGE_INDEX [id.to_byte_array, arguments]
load_image id/uuid.Uuid -> int?:
return invoke_ ContainerService.LOAD_IMAGE_INDEX id.to_byte_array

start_container handle/int arguments/any -> none:
invoke_ ContainerService.START_CONTAINER_INDEX [handle, arguments]

stop_container handle/int -> none:
invoke_ ContainerService.STOP_CONTAINER_INDEX handle
Expand Down
11 changes: 8 additions & 3 deletions lib/system/containers.toit
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,14 @@ current -> uuid.Uuid:
return uuid.Uuid current_image_id_

start id/uuid.Uuid arguments/any=[] -> Container:
handle/int? := _client_.start_image id arguments
if handle: return Container id handle
throw "No such container: $id"
handle/int? := _client_.load_image id
if not handle: throw "No such container: $id"
container := Container id handle
try:
_client_.start_container handle arguments
return container
finally: | is_exception exception |
if is_exception: container.close

uninstall id/uuid.Uuid -> none:
_client_.uninstall_image id
Expand Down
4 changes: 2 additions & 2 deletions system/boot.toit
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ boot container_manager/ContainerManager -> int:
// Always start the system image which is technically already
// running. This allows the container manager to correctly keep
// track of the number of running processes.
container_manager.system_image.start
container_manager.system_image.load.start
container_manager.images.do: | image/ContainerImage |
if image.run_boot: image.start
if image.run_boot: image.load.start
return container_manager.wait_until_done
56 changes: 36 additions & 20 deletions system/containers.toit
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,24 @@ import .services
class Container:
image/ContainerImage ::= ?
gid_/int ::= ?
pids_/Set ::= ? // Set<int>
pids_/Set? := null
resources/Set ::= {}

constructor .image .gid_ pid/int:
pids_ = { pid }
constructor .image .gid_:

id -> int:
return gid_

is_process_running pid/int -> bool:
return pids_.contains pid

start arguments/any=image.default_arguments -> none:
if pids_: throw "Already started"
pids_ = {}
pids_.add (image.spawn this arguments)

stop -> none:
if not pids_: throw "Not started"
if pids_.is_empty: return
pids_.do: container_kill_pid_ it
pids_.clear
Expand Down Expand Up @@ -92,8 +97,18 @@ abstract class ContainerImage:

abstract id -> uuid.Uuid

start -> Container:
return start default_arguments
load -> Container:
// We load the container without starting it, so we can
// register the container correctly before we receive the
// first events from it. This avoids a race condition
// that could occur because spawning a new process is
// inherently asynchronous and we need the client and the
// container manager to be ready for processing messages
// and close notifications.
gid ::= container_next_gid_
container := Container this gid
manager.on_container_load_ container
return container

trace encoded/ByteArray -> bool:
return false
Expand All @@ -116,7 +131,7 @@ abstract class ContainerImage:
// consider making it null instead.
return []

abstract start arguments/any -> Container
abstract spawn container/Container arguments/any -> int
abstract stop_all -> none
abstract delete -> none

Expand All @@ -135,13 +150,8 @@ class ContainerImageFlash extends ContainerImage:
data -> int:
return binary.LITTLE_ENDIAN.uint32 allocation_.metadata 1

start arguments/any -> Container:
gid ::= container_next_gid_
pid ::= container_spawn_ allocation_.offset allocation_.size gid arguments
// TODO(kasper): Can the container stop before we even get it created?
container := Container this gid pid
manager.on_container_start_ container
return container
spawn container/Container arguments/any:
return container_spawn_ allocation_.offset allocation_.size container.id arguments

stop_all -> none:
attempts := 0
Expand Down Expand Up @@ -172,8 +182,11 @@ abstract class ContainerServiceDefinition extends ServiceDefinition
handle pid/int client/int index/int arguments/any -> any:
if index == ContainerService.LIST_IMAGES_INDEX:
return list_images
if index == ContainerService.START_IMAGE_INDEX:
return start_image client (uuid.Uuid arguments[0]) arguments[1]
if index == ContainerService.LOAD_IMAGE_INDEX:
return load_image client (uuid.Uuid arguments)
if index == ContainerService.START_CONTAINER_INDEX:
resource ::= (resource client arguments[0]) as ContainerResource
return start_container resource arguments[1]
if index == ContainerService.STOP_CONTAINER_INDEX:
resource ::= (resource client arguments) as ContainerResource
return stop_container resource
Expand Down Expand Up @@ -203,15 +216,18 @@ abstract class ContainerServiceDefinition extends ServiceDefinition
result.add image.data
return result

start_image id/uuid.Uuid arguments/any -> int:
load_image id/uuid.Uuid -> int:
unreachable // <-- TODO(kasper): Nasty.
start_image client/int id/uuid.Uuid arguments/any -> ContainerResource?:
load_image client/int id/uuid.Uuid -> ContainerResource?:
image/ContainerImage? := lookup_image id
if not image: return null
return ContainerResource (image.start arguments) this client
return ContainerResource image.load this client

start_container resource/ContainerResource arguments/any -> none:
resource.container.start arguments

stop_container resource/ContainerResource? -> none:
stop_container resource/ContainerResource -> none:
resource.container.stop

uninstall_image id/uuid.Uuid -> none:
Expand Down Expand Up @@ -297,7 +313,7 @@ class ContainerManager extends ContainerServiceDefinition implements SystemMessa
if containers_by_id_.is_empty: return 0
return done_.get

on_container_start_ container/Container -> none:
on_container_load_ container/Container -> none:
containers/Map ::= containers_by_image_.get container.image.id --init=: {:}
containers[container.id] = container
containers_by_id_[container.id] = container
Expand Down
6 changes: 2 additions & 4 deletions system/extensions/esp32/boot.toit
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,9 @@ class SystemImage extends ContainerImage:
constructor manager/ContainerManager:
super manager

start arguments/any -> Container:
spawn container/Container arguments/any -> int:
// This container is already running as the system process.
container := Container this 0 Process.current.id
manager.on_container_start_ container
return container
return Process.current.id

stop_all -> none:
unreachable // Not implemented yet.
Expand Down
2 changes: 1 addition & 1 deletion tests/negative/gold/illegal_system_call_test.gold
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
As check failed: null is not a ServiceResource.
0: ServiceDefinition.resource <sdk>/system/services.toit:170:5
1: ContainerServiceDefinition.handle <...>/system/containers.toit:185:19
1: ContainerServiceDefinition.handle <...>/system/containers.toit:198:19
2: ServiceManager_.<lambda> <sdk>/system/services.toit:357:15
3: RpcRequest_.process.<block> <sdk>/rpc/broker.toit:98:26
4: RpcRequest_.process <sdk>/rpc/broker.toit:95:3
Expand Down
14 changes: 4 additions & 10 deletions tools/toit.run.toit
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,9 @@ class SystemContainerImage extends ContainerImageFromSnapshot:
constructor manager/ContainerManager bundle/ByteArray:
super manager bundle

start arguments/any -> Container:
spawn container/Container arguments/any -> int:
// This container is already running as the system process.
container := Container this 0 Process.current.id
manager.on_container_start_ container
return container
return Process.current.id

class ApplicationContainerImage extends ContainerImageFromSnapshot:
snapshot/ByteArray? := null
Expand All @@ -85,12 +83,8 @@ class ApplicationContainerImage extends ContainerImageFromSnapshot:
// the archive.
super reader

start arguments/any -> Container:
gid ::= container_next_gid_
pid ::= launch_snapshot_ snapshot gid id.to_byte_array arguments
container := Container this gid pid
manager.on_container_start_ container
return container
spawn container/Container arguments/any -> int:
return launch_snapshot_ snapshot container.id id.to_byte_array arguments

static launch_snapshot_ snapshot/ByteArray gid/int id/ByteArray arguments/any -> int:
#primitive.snapshot.launch
Expand Down

0 comments on commit 7a01d7e

Please sign in to comment.