Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Discovery: Initial connection check #2459

Merged
merged 10 commits into from
Feb 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions acceptance/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,10 @@ fail() {
echo "$(date -u +'%F %T.%6N%z') $@" >&2
exit 1
}

#######################################
# Returns whether this script is running in docker
#######################################
is_running_in_docker() {
cut -d: -f 3 /proc/1/cgroup | grep -q '^/docker/'
}
2 changes: 2 additions & 0 deletions acceptance/discovery_br_fetches_dynamic_acceptance/test
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ test_run() {
# Wait until dynamic topology expires.
sleep 10
check_connectivity "dynamic topology expired"

check_br_fail_action "dynamic"
}

check_connectivity() {
Expand Down
2 changes: 2 additions & 0 deletions acceptance/discovery_br_fetches_static_acceptance/test
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ test_run() {
cp $TOPO $STATIC_FULL
sleep 5
check_connectivity "serve original static topology"

check_br_fail_action "static"
}

check_connectivity_broken() {
Expand Down
2 changes: 2 additions & 0 deletions acceptance/discovery_infra_fetches_dynamic_acceptance/test
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ test_run() {
check_logs "ps$IA_FILE-1"
check_logs "cs$IA_FILE-1"
check_logs "sd$IA_FILE"

check_infra_fail_action "dynamic"
}

check_logs() {
Expand Down
2 changes: 2 additions & 0 deletions acceptance/discovery_infra_fetches_static_acceptance/test
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ test_run() {
check_diff "ps$IA_FILE-1"
check_diff "cs$IA_FILE-1"
check_diff "sd$IA_FILE"

check_infra_fail_action "static"
}

check_logs() {
Expand Down
63 changes: 63 additions & 0 deletions acceptance/discovery_util/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,74 @@ set_interval() {
sed -i -e "/\[discovery.$2]/a Interval = \"1s\"" "$1"
}

set_connect() {
printf "\n[discovery.$2.Connect]\nInitialPeriod = \"$3\"" >> "$1"
}

set_fail_action() {
sed -i -e "/\[discovery.$2.Connect]/a FailAction = \"$3\"" "$1"
}

check_file() {
curl -f -s -S "$( jq -r '.DiscoveryService[].Addrs[].Public | "\(.Addr):\(.L4Port)"' "$TOPO" )/discovery/v1/$1/full.json" > /dev/null
curl -f -s -S "$( jq -r '.DiscoveryService[].Addrs[].Public | "\(.Addr):\(.L4Port)"' "$TOPO" )/discovery/v1/$1/default.json" > /dev/null
}


check_infra_fail_action() {
stop_mock_ds
# Check that services continue if fail action is not set.
for cfg in gen/ISD1/AS$AS_FILE/*/{{cs,ps}config,sciond}.toml; do
set_connect "$cfg" "$1" "5s"
done
./tools/dc scion restart "scion_ps$IA_FILE-1" "scion_cs$IA_FILE-1" "scion_sd$IA_FILE"
sleep 10
check_running "ps$IA_FILE-1" || fail "Error: ps$IA_FILE-1 not running"
check_running "cs$IA_FILE-1" || fail "Error: cs$IA_FILE-1 not running"
check_running "sd$IA_FILE" || fail "Error: sd$IA_FILE not running"

# Check that services exit if fail action is fatal
for cfg in gen/ISD1/AS$AS_FILE/*/{{cs,ps}config,sciond}.toml; do
set_fail_action "$cfg" "$1" "Fatal"
done
./tools/dc scion restart "scion_ps$IA_FILE-1" "scion_cs$IA_FILE-1" "scion_sd$IA_FILE"
sleep 10
check_not_running "ps$IA_FILE-1" || fail "Error: ps$IA_FILE-1 still running"
check_not_running "cs$IA_FILE-1" || fail "Error: cs$IA_FILE-1 still running"
check_not_running "sd$IA_FILE" || fail "Error: sd$IA_FILE still running"
}

check_br_fail_action() {
stop_mock_ds
# Check that border router continues if fail action is not set.
set_connect "gen/ISD1/AS$AS_FILE/br$IA_FILE-1/brconfig.toml" "$1" "5s"
./tools/dc scion restart "scion_br$IA_FILE-1"
sleep 10
check_running "br$IA_FILE-1" || fail "Error: br$IA_FILE-1 not running"

# Check that border router exits if fail action is fatal
set_fail_action "gen/ISD1/AS$AS_FILE/br$IA_FILE-1/brconfig.toml" "$1" "Fatal"
./tools/dc scion restart "scion_br$IA_FILE-1"
sleep 10
check_not_running "br$IA_FILE-1" || fail "Error: br$IA_FILE-1 still running"
}

stop_mock_ds() {
./tools/dc scion stop 'mock_ds1-ff00_0_111-1'
}

check_running() {
if is_running_in_docker; then
local docker="docker_"
fi
docker top "scion_${docker}$1"
}

check_not_running() {
check_running $1 || local running="nope"
[ "$running" == "nope" ] || return 1
}

print_help() {
echo
cat <<-_EOF
Expand Down
25 changes: 25 additions & 0 deletions go/border/brconf/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@ const Sample = `[general]
# empty string, the updated topologies are not written. (default "")
Filename = ""

[discovery.static.connect]
# Maximum time spent attempting to fetch the topology from the
# discovery service on start. If no topology is successfully fetched
# in this period, the FailAction is executed. (default 20s)
InitialPeriod = "20s"

# The action to take if no topology is successfully fetched in
# the InitialPeriod.
# - Fatal: Exit process.
# - Continue: Log error and continue with execution.
# (Fatal | Continue) (default Continue)
FailAction = "Continue"

[discovery.dynamic]
# Enable periodic fetching of the dynamic topology. (default false)
Enable = false
Expand All @@ -91,6 +104,18 @@ const Sample = `[general]
# Require https connection. (default false)
Https = false

[discovery.dynamic.connect]
# Maximum time spent attempting to fetch the topology from the
# discovery service on start. If no topology is successfully fetched
# in this period, the FailAction is executed. (default 20s)
InitialPeriod = "20s"

# The action to take if no topology is successfully fetched in InitialPeriod.
# - Fatal: Exit process.
# - Continue: Log error and continue with execution.
# (Fatal | Continue) (default Continue)
FailAction = "Continue"

[br]
# Enable cpu and memory profiling. (default false)
Profile = false
Expand Down
22 changes: 3 additions & 19 deletions go/cert_srv/internal/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,15 @@ import (
"github.com/BurntSushi/toml"
. "github.com/smartystreets/goconvey/convey"

"github.com/scionproto/scion/go/lib/infra/modules/idiscovery"
"github.com/scionproto/scion/go/lib/infra/modules/idiscovery/idiscoverytest"
)

func TestSampleCorrect(t *testing.T) {
Convey("Load", t, func() {
var cfg Config
// Make sure AutomaticRenewal is set during decoding.
cfg.CS.AutomaticRenewal = true
cfg.Discovery.Dynamic.Enable = true
cfg.Discovery.Dynamic.Https = true
cfg.Discovery.Static.Enable = true
cfg.Discovery.Static.Https = true
cfg.Discovery.Static.Filename = "topology.json"
idiscoverytest.InitTestConfig(&cfg.Discovery)
_, err := toml.Decode(Sample, &cfg)
SoMsg("err", err, ShouldBeNil)

Expand All @@ -48,19 +44,7 @@ func TestSampleCorrect(t *testing.T) {
SoMsg("TrustDB.Backend correct", cfg.TrustDB.Backend, ShouldEqual, "sqlite")
SoMsg("TrustDB.Connection correct", cfg.TrustDB.Connection, ShouldEqual,
"/var/lib/scion/spki/cs-1.trust.db")
SoMsg("Discovery.Static.Enable correct", cfg.Discovery.Static.Enable, ShouldBeFalse)
SoMsg("Discovery.Static.Interval correct", cfg.Discovery.Static.Interval.Duration,
ShouldEqual, idiscovery.DefaultStaticFetchInterval)
SoMsg("Discovery.Static.Timeout correct", cfg.Discovery.Static.Timeout.Duration,
ShouldEqual, idiscovery.DefaultFetchTimeout)
SoMsg("Discovery.Static.Https correct", cfg.Discovery.Static.Https, ShouldBeFalse)
SoMsg("Discovery.Static.Filename correct", cfg.Discovery.Static.Filename, ShouldBeBlank)
SoMsg("Discovery.Dynamic.Enable correct", cfg.Discovery.Dynamic.Enable, ShouldBeFalse)
SoMsg("Discovery.Dynamic.Interval correct", cfg.Discovery.Dynamic.Interval.Duration,
ShouldEqual, idiscovery.DefaultDynamicFetchInterval)
SoMsg("Discovery.Dynamic.Timeout correct", cfg.Discovery.Dynamic.Timeout.Duration,
ShouldEqual, idiscovery.DefaultFetchTimeout)
SoMsg("Discovery.Dynamic.Https correct", cfg.Discovery.Dynamic.Https, ShouldBeFalse)
idiscoverytest.CheckTestConfig(cfg.Discovery)

// csconfig specific
SoMsg("LeafReissueLeadTime correct", cfg.CS.LeafReissueLeadTime.Duration,
Expand Down
25 changes: 25 additions & 0 deletions go/cert_srv/internal/config/sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,19 @@ const Sample = `[general]
# empty string, the updated topologies are not written. (default "")
Filename = ""

[discovery.static.connect]
# Maximum time spent attempting to fetch the topology from the
# discovery service on start. If no topology is successfully fetched
# in this period, the FailAction is executed. (default 20s)
InitialPeriod = "20s"

# The action to take if no topology is successfully fetched in
# the InitialPeriod.
# - Fatal: Exit process.
# - Continue: Log error and continue with execution.
# (Fatal | Continue) (default Continue)
FailAction = "Continue"

[discovery.dynamic]
# Enable periodic fetching of the dynamic topology. (default false)
Enable = false
Expand All @@ -103,6 +116,18 @@ const Sample = `[general]
# Require https connection. (default false)
Https = false

[discovery.dynamic.connect]
# Maximum time spent attempting to fetch the topology from the
# discovery service on start. If no topology is successfully fetched
# in this period, the FailAction is executed. (default 20s)
InitialPeriod = "20s"

# The action to take if no topology is successfully fetched in InitialPeriod.
# - Fatal: Exit process.
# - Continue: Log error and continue with execution.
# (Fatal | Continue) (default Continue)
FailAction = "Continue"

[cs]
# Time between starting reissue requests and leaf cert expiration. If not
# specified, this is set to PathSegmentTTL.
Expand Down
51 changes: 50 additions & 1 deletion go/lib/infra/modules/idiscovery/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
package idiscovery

import (
"strings"
"time"

"github.com/scionproto/scion/go/lib/common"
"github.com/scionproto/scion/go/lib/util"
)

Expand All @@ -27,6 +29,9 @@ var (
DefaultStaticFetchInterval = 5 * time.Minute
// DefaultFetchTimeout is the default timeout for a query.
DefaultFetchTimeout = 1 * time.Second
// DefaultInitialConnectPeriod is the default total amount of time spent attempting
// to connect to the discovery service on start.
DefaultInitialConnectPeriod = 20 * time.Second
)

type Config struct {
Expand All @@ -51,6 +56,7 @@ type StaticConfig struct {
}

func (s *StaticConfig) InitDefaults() {
s.Connect.InitDefaults()
if s.Interval.Duration == 0 {
s.Interval.Duration = DefaultStaticFetchInterval
}
Expand All @@ -65,17 +71,60 @@ type FetchConfig struct {
Enable bool
// Interval specifies the time between two queries.
Interval util.DurWrap
// Timeout specifies the timout for a single query.
// Timeout specifies the timeout for a single query.
Timeout util.DurWrap
// Https indicates whether https must be used to fetch the topology.
Https bool
// Connect contains the parameters for the initial connection
// check to the discovery service.
Connect ConnectParams
}

func (f *FetchConfig) InitDefaults() {
f.Connect.InitDefaults()
if f.Interval.Duration == 0 {
f.Interval.Duration = DefaultDynamicFetchInterval
}
if f.Timeout.Duration == 0 {
f.Timeout.Duration = DefaultFetchTimeout
}
}

type ConnectParams struct {
// InitialPeriod indicates for how long the process tries to get a valid
// response from the discovery service until FailAction is executed.
InitialPeriod util.DurWrap
// FailAction indicates the action that should be taken if no topology can
// be fetched from the discovery service within the InitialPeriod.
FailAction FailAction
}

func (c *ConnectParams) InitDefaults() {
if c.InitialPeriod.Duration == 0 {
c.InitialPeriod.Duration = DefaultInitialConnectPeriod
}
if c.FailAction != FailActionFatal {
c.FailAction = FailActionContinue
}
}

type FailAction string

const (
// FailActionFatal indicates that the process exits on error.
FailActionFatal FailAction = "Fatal"
// FailActionContinue indicates that the process continues on error.
FailActionContinue FailAction = "Continue"
)

func (f *FailAction) UnmarshalText(text []byte) error {
switch strings.ToLower(string(text)) {
case strings.ToLower(string(FailActionFatal)):
*f = FailActionFatal
case strings.ToLower(string(FailActionContinue)):
*f = FailActionContinue
default:
return common.NewBasicError("Unknown FailAction", nil, "input", string(text))
}
return nil
}
Loading