Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework container start and remember reason #786

Merged
merged 2 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 64 additions & 29 deletions src/service/containers.toit
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import .esp32.pin-trigger
import .scheduler
import ..shared.utils as utils


class ContainerManager:
logger_/log.Logger
scheduler_/Scheduler
Expand Down Expand Up @@ -45,7 +44,10 @@ class ContainerManager:
// very clear how we should handle it if we cannot. Should
// we drop such an app from the current state? Seems like
// the right thing to do.
if job: add_ job --message="load"
if job:
if job.trigger-boot_:
job.trigger (encode-trigger-reason_ --boot)
add_ job --message="load"

// Mark containers that are needed by connections as runlevel safemode.
// TODO(florian): should required containers be started on-demand?
Expand Down Expand Up @@ -98,9 +100,11 @@ class ContainerManager:
--id=id
--description=description
--pin-trigger-manager=pin-trigger-manager_
--logger=logger_

install job/ContainerJob -> none:
job.has-run-after-install_ = false
if job.trigger-install_:
job.trigger (encode-trigger-reason_ --install)
add_ job --message="install"

uninstall job/ContainerJob -> none:
Expand All @@ -119,11 +123,10 @@ class ContainerManager:
scheduler_.remove-job job
job.update description
pin-trigger-manager_.update-job job
// After updating the description of an app, we
// mark it as being newly installed for the purposes
// of scheduling. This means that it will start
// again if it has an install trigger.
job.has-run-after-install_ = false
// After updating the description of an app, we consider it
// as newly installed. Rearm it if it has an install trigger.
if job.trigger-install_:
job.trigger (encode-trigger-reason_ --install)
logger_.info "update" --tags=job.tags
scheduler_.add-job job

Expand All @@ -137,11 +140,31 @@ class ContainerManager:
scheduler_.remove-job job
logger_.info message --tags=job.tags

encode-trigger-reason_ -> int
--boot/bool=false
--install/bool=false
--interval/bool=false
--restart/bool=false
--critical/bool=false
--pin/int?=null
--touch/int?=null:
// These constants must be kept in sync with the ones in the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we just use the constants from the package?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try to move the function into the package in a follow-up PR.

// Artemis package.
if boot: return 0
if install: return 1
if interval: return 2
if restart: return 3
if critical: return 4
if pin: return (pin << 8) | 10
if touch: return (touch << 8) | 11
throw "invalid trigger"

class ContainerJob extends Job:
// The key of the ID in the $description.
static KEY-ID ::= "id"

pin-trigger-manager_/PinTriggerManager
logger_/log.Logger

id/uuid.Uuid
description_/Map := ?
Expand All @@ -155,16 +178,19 @@ class ContainerJob extends Job:
trigger-gpio-levels_/Map? := null
trigger-gpio-touch_/Set? := null

// The $ContainerManager is responsible for scheduling
// newly installed containers, so it manipulates this
// field directly.
has-run-after-install_/bool := true

is-triggered_/bool := false

constructor --name/string --.id --description/Map --pin-trigger-manager/PinTriggerManager:
// The reason for why this job was triggered.
last-trigger-reason_/int? := null

constructor
--name/string
--.id
--description/Map
--pin-trigger-manager/PinTriggerManager
--logger/log.Logger:
description_ = description
pin-trigger-manager_ = pin-trigger-manager
logger_ = logger.with-name name
super name
update description

Expand Down Expand Up @@ -199,23 +225,29 @@ class ContainerJob extends Job:
// should probably think about how we want to access
// the scheduler state here.
if delayed-until := scheduler-delayed-until_:
return delayed-until
if delayed-until > now: return delayed-until
trigger (encode-trigger-reason_ --restart)
else if is-critical:
// TODO(kasper): Find a way to reboot the device if
// a critical container keeps restarting.
return now
else if trigger-boot_ and not has-run-after-boot:
return now
else if trigger-install_ and not has-run-after-install_:
return now
trigger (encode-trigger-reason_ --critical)
else if trigger-interval_:
return last ? last + trigger-interval_ : now
else if is-triggered_:
return now
else:
// TODO(kasper): Don't run at all. Maybe that isn't
// a great default when you have no triggers?
return null
result := last ? last + trigger-interval_ : now
if result > now: return result
trigger (encode-trigger-reason_ --interval)

if is-triggered_: return now
// TODO(kasper): Don't run at all. Maybe that isn't
// a great default when you have no triggers?
return null

/**
Triggers this job, making it run as soon as possible.
*/
trigger reason/int:
if is-running or is-triggered_: return
is-triggered_ = true
last-trigger-reason_ = reason

schedule-tune last/JobTime -> JobTime:
// If running the container took a long time, we tune the
Expand All @@ -226,7 +258,10 @@ class ContainerJob extends Job:
start -> none:
if running_: return
arguments := description_.get "arguments"
has-run-after-install_ = true
logger_.debug "starting" --tags={
"reason": last-trigger-reason_,
"arguments": arguments
}
running_ = containers.start id arguments

scheduler_.on-job-started this
Expand Down
9 changes: 6 additions & 3 deletions src/service/esp32/pin-trigger.toit
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,10 @@ class PinTriggerManager:
if level: tags["level"] = level
else: tags["touch"] = touch
logger_.info "triggered by pin" --tags=tags
job.is-triggered_ = true
reason := touch
? encode-trigger-reason_ --touch=pin-number
: encode-trigger-reason_ --pin=pin-number
job.trigger reason
// We need a critical_do as `update_` might kill the currently running
// task.
critical-do:
Expand Down Expand Up @@ -346,7 +349,7 @@ class PinTriggerManager:
is-triggered = true
if is-triggered:
job-was-triggered = true
job.is-triggered_ = true
job.trigger (encode-trigger-reason_ --pin=pin)
tags := job.tags.copy
tags["pin"] = pin
tags["level"] = level
Expand All @@ -355,7 +358,7 @@ class PinTriggerManager:
job.trigger-gpio-touch_.do: | pin |
if touch-wakeup-pin == pin:
job-was-triggered = true
job.is-triggered_ = true
job.trigger (encode-trigger-reason_ --touch=pin)
tags := job.tags.copy
tags["pin"] = pin
logger_.info "triggered by touch" --tags=tags
Expand Down
10 changes: 6 additions & 4 deletions src/service/jobs.toit
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ abstract class Job:
// information with jobs.
scheduler_/Scheduler? := null
scheduler-ran-last_/JobTime? := null
scheduler-ran-after-boot_/bool := false

// TODO(kasper): Maybe this should be called run-after? The
// way this interacts with the other triggers isn't super
Expand All @@ -33,9 +32,6 @@ abstract class Job:
runlevel -> int: return RUNLEVEL-NORMAL
stringify -> string: return name

has-run-after-boot -> bool:
return scheduler-ran-after-boot_

abstract schedule now/JobTime last/JobTime? -> JobTime?

schedule-tune last/JobTime -> JobTime:
Expand Down Expand Up @@ -141,6 +137,12 @@ class JobTime implements Comparable:
operator < other/JobTime -> bool:
return us < other.us

operator >= other/JobTime -> bool:
return us >= other.us

operator > other/JobTime -> bool:
return us > other.us

operator + duration/Duration -> JobTime:
return JobTime us + duration.in-us

Expand Down
1 change: 0 additions & 1 deletion src/service/scheduler.toit
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,6 @@ class Scheduler:
logger_.warn "runlevel decreasing timed out" --tags={"runlevel": runlevel}

on-job-started job/Job -> none:
job.scheduler-ran-after-boot_ = true
job.scheduler-ran-last_ = JobTime.now
job.scheduler-delayed-until_ = null
logger_.info "job started" --tags={"job": job}
Expand Down