From cecaa8332184b7d170308a2d10477563d4d9f3fb Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 26 Sep 2023 21:28:51 +0200 Subject: [PATCH 1/5] chore(enginenetx): write more tests This diff attempts to improve the code quality of enginenetx by identifying cases where the code could be made to crash with specially written input and adds regress tests and checks to avoid these kind of crashes to happen. While there, perform a code review and improve comments. Part of https://github.com/ooni/probe/issues/2531 --- internal/enginenetx/beaconspolicy.go | 14 ++-- internal/enginenetx/dnspolicy.go | 2 + internal/enginenetx/httpsdialer.go | 57 ++++++++++------ internal/enginenetx/httpsdialer_test.go | 34 +++++----- internal/enginenetx/network.go | 16 +++-- internal/enginenetx/staticpolicy.go | 11 +++- internal/enginenetx/staticpolicy_test.go | 25 +++++-- internal/enginenetx/statsmanager.go | 84 +++++++++++++++++------- internal/enginenetx/statsmanager_test.go | 82 ++++++++++++++++++++++- internal/enginenetx/statspolicy.go | 27 ++++++-- internal/enginenetx/statspolicy_test.go | 55 ++++++++++++++++ 11 files changed, 317 insertions(+), 90 deletions(-) diff --git a/internal/enginenetx/beaconspolicy.go b/internal/enginenetx/beaconspolicy.go index a42f10e975..0ae84804fa 100644 --- a/internal/enginenetx/beaconspolicy.go +++ b/internal/enginenetx/beaconspolicy.go @@ -30,17 +30,19 @@ func (p *beaconsPolicy) LookupTactics(ctx context.Context, domain, port string) out := make(chan *httpsDialerTactic) go func() { - defer close(out) + defer close(out) // tell the parent when we're done index := 0 - // emit beacons related tactics first + // emit beacons related tactics first which is empty if there are + // no beacons for the givend domain and port for tx := range p.tacticsForDomain(domain, port) { tx.InitialDelay = happyEyeballsDelay(index) index += 1 out <- tx } - // now emit tactics using the DNS + // now fallback to get more tactics (typically here the fallback + // uses the DNS and obtains some extra tactic) for tx := range p.Fallback.LookupTactics(ctx, domain, port) { tx.InitialDelay = happyEyeballsDelay(index) index += 1 @@ -55,7 +57,7 @@ func (p *beaconsPolicy) tacticsForDomain(domain, port string) <-chan *httpsDiale out := make(chan *httpsDialerTactic) go func() { - defer close(out) + defer close(out) // tell the parent when we're done // we currently only have beacons for api.ooni.io if domain != "api.ooni.io" { @@ -68,9 +70,7 @@ func (p *beaconsPolicy) tacticsForDomain(domain, port string) <-chan *httpsDiale snis[i], snis[j] = snis[j], snis[i] }) - ipAddrs := p.beaconsAddrs() - - for _, ipAddr := range ipAddrs { + for _, ipAddr := range p.beaconsAddrs() { for _, sni := range snis { out <- &httpsDialerTactic{ Address: ipAddr, diff --git a/internal/enginenetx/dnspolicy.go b/internal/enginenetx/dnspolicy.go index c7d64e2693..3812230739 100644 --- a/internal/enginenetx/dnspolicy.go +++ b/internal/enginenetx/dnspolicy.go @@ -35,6 +35,7 @@ func (p *dnsPolicy) LookupTactics( go func() { // make sure we close the output channel when done + // so the reader knows that we're done defer close(out) // Do not even start the DNS lookup if the context has already been canceled, which @@ -54,6 +55,7 @@ func (p *dnsPolicy) LookupTactics( return } + // The tactics we generate here have SNI == VerifyHostname == domain for idx, addr := range addrs { tactic := &httpsDialerTactic{ Address: addr, diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index f02d06b87d..51a86c2a80 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -55,8 +55,7 @@ func (dt *httpsDialerTactic) String() string { return string(runtimex.Try1(json.Marshal(dt))) } -// tacticSummaryKey returns a string summarizing this [httpsDialerTactic] for -// the specific purpose of inserting the struct into a map. +// tacticSummaryKey returns a string summarizing the tactic's features. // // The fields used to compute the summary are: // @@ -68,33 +67,45 @@ func (dt *httpsDialerTactic) String() string { // // - VerifyHostname // -// The returned string contains the above fields separated by space. +// The returned string contains the above fields separated by space with +// `sni=` before the SNI and `verify=` before the verify hostname. +// +// We should be careful not to change this format unless we also change the +// format version used by static policies and by the state management. func (dt *httpsDialerTactic) tacticSummaryKey() string { - return fmt.Sprintf("%v sni=%v verify=%v", net.JoinHostPort(dt.Address, dt.Port), dt.SNI, dt.VerifyHostname) + return fmt.Sprintf( + "%v sni=%v verify=%v", + net.JoinHostPort(dt.Address, dt.Port), + dt.SNI, + dt.VerifyHostname, + ) } -// domainEndpointKey returns the domain's endpoint string key for storing into a map. +// domainEndpointKey returns a string consisting of the domain endpoint only. +// +// We always use the VerifyHostname to construct the domain endpoint. func (dt *httpsDialerTactic) domainEndpointKey() string { return net.JoinHostPort(dt.VerifyHostname, dt.Port) } -// httpsDialerPolicy describes the policy used by the [*httpsDialer]. +// httpsDialerPolicy is a policy used by the [*httpsDialer]. type httpsDialerPolicy interface { - // LookupTactics returns zero or more tactics for the given host and port. + // LookupTactics emits zero or more tactics for the given host and port + // through the returned channel, which is closed when done. LookupTactics(ctx context.Context, domain, port string) <-chan *httpsDialerTactic } -// httpsDialerStatsTracker tracks what happens while dialing TLS connections. -type httpsDialerStatsTracker interface { +// httpsDialerEventsHandler handles events occurring while we try dialing TLS. +type httpsDialerEventsHandler interface { // These callbacks are invoked during the TLS handshake to inform this - // tactic about events that occurred. A tactic SHOULD keep track of which + // interface about events that occurred. A policy SHOULD keep track of which // addresses, SNIs, etc. work and return them more frequently. // // Callbacks that take an error as argument also take a context as // argument and MUST check whether the context has been canceled or // its timeout has expired (i.e., using ctx.Err()) to determine // whether the operation failed or was merely canceled. In the latter - // case, obviously, the policy MUST NOT consider the tactic failed. + // case, obviously, you MUST NOT consider the tactic failed. OnStarting(tactic *httpsDialerTactic) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) @@ -125,7 +136,7 @@ type httpsDialer struct { rootCAs *x509.CertPool // stats tracks what happens while dialing. - stats httpsDialerStatsTracker + stats httpsDialerEventsHandler } // newHTTPSDialer constructs a new [*httpsDialer] instance. @@ -146,7 +157,7 @@ func newHTTPSDialer( logger model.Logger, netx *netxlite.Netx, policy httpsDialerPolicy, - stats httpsDialerStatsTracker, + stats httpsDialerEventsHandler, ) *httpsDialer { return &httpsDialer{ idGenerator: &atomic.Int64{}, @@ -196,8 +207,8 @@ func (hd *httpsDialer) DialTLSContext(ctx context.Context, network string, endpo ctx, cancel := context.WithCancel(ctx) defer cancel() - // The emitter will emit tactics and then close the channel when done. We spawn 1+ workers - // that handle tactics in paralellel and posts on the collector channel. + // The emitter will emit tactics and then close the channel when done. We spawn 16 workers + // that handle tactics in parallel and post results on the collector channel. emitter := hd.policy.LookupTactics(ctx, hostname, port) collector := make(chan *httpsDialerErrorOrConn) joiner := make(chan any) @@ -252,8 +263,10 @@ func httpsDialerReduceResult(connv []model.TLSConn, errorv []error) (model.TLSCo } } -// worker attempts to establish a TLS connection using and emits a single -// [*httpsDialerErrorOrConn] for each tactic. +// worker attempts to establish a TLS connection and emits the result using +// a [*httpsDialerErrorOrConn] for each tactic, until there are no more tactics +// and the reader channel is closed. At which point it posts on joiner to let +// the parent know that this goroutine has done its job. func (hd *httpsDialer) worker( ctx context.Context, joiner chan<- any, @@ -270,8 +283,10 @@ func (hd *httpsDialer) worker( Logger: hd.logger, } + // perform the actual dial conn, err := hd.dialTLS(ctx, prefixLogger, t0, tactic) + // send result to the parent writer <- &httpsDialerErrorOrConn{Conn: conn, Err: err} } } @@ -283,12 +298,12 @@ func (hd *httpsDialer) dialTLS( t0 time.Time, tactic *httpsDialerTactic, ) (model.TLSConn, error) { - // wait for the tactic to be ready to run + // honor happy-eyeballs delays and wait for the tactic to be ready to run if err := httpsDialerTacticWaitReady(ctx, t0, tactic); err != nil { return nil, err } - // tell the tactic that we're starting + // tell the observer that we're starting hd.stats.OnStarting(tactic) // create dialer and establish TCP connection @@ -306,7 +321,7 @@ func (hd *httpsDialer) dialTLS( // create TLS configuration tlsConfig := &tls.Config{ - InsecureSkipVerify: true, // Note: we're going to verify at the end of the func + InsecureSkipVerify: true, // Note: we're going to verify at the end of the func! NextProtos: []string{"h2", "http/1.1"}, RootCAs: hd.rootCAs, ServerName: tactic.SNI, @@ -343,7 +358,7 @@ func (hd *httpsDialer) dialTLS( return nil, err } - // make sure the tactic know it worked + // make sure the observer knows it worked hd.stats.OnSuccess(tactic) return tlsConn, nil diff --git a/internal/enginenetx/httpsdialer_test.go b/internal/enginenetx/httpsdialer_test.go index 57e3a1cbf1..e6010a0a7b 100644 --- a/internal/enginenetx/httpsdialer_test.go +++ b/internal/enginenetx/httpsdialer_test.go @@ -26,7 +26,7 @@ const ( httpsDialerCancelingContextStatsTrackerOnSuccess ) -// httpsDialerCancelingContextStatsTracker is an [httpsDialerStatsTracker] with a cancel +// httpsDialerCancelingContextStatsTracker is an [httpsDialerEventsHandler] with a cancel // function that causes the context to be canceled once we start dialing. // // This struct helps with testing [httpsDialer] is WAI when the context @@ -36,31 +36,31 @@ type httpsDialerCancelingContextStatsTracker struct { flags int } -var _ httpsDialerStatsTracker = &httpsDialerCancelingContextStatsTracker{} +var _ httpsDialerEventsHandler = &httpsDialerCancelingContextStatsTracker{} -// OnStarting implements httpsDialerStatsTracker. +// OnStarting implements httpsDialerEventsHandler. func (st *httpsDialerCancelingContextStatsTracker) OnStarting(tactic *httpsDialerTactic) { if (st.flags & httpsDialerCancelingContextStatsTrackerOnStarting) != 0 { st.cancel() } } -// OnTCPConnectError implements httpsDialerStatsTracker. +// OnTCPConnectError implements httpsDialerEventsHandler. func (*httpsDialerCancelingContextStatsTracker) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) { // nothing } -// OnTLSHandshakeError implements httpsDialerStatsTracker. +// OnTLSHandshakeError implements httpsDialerEventsHandler. func (*httpsDialerCancelingContextStatsTracker) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // nothing } -// OnTLSVerifyError implements httpsDialerStatsTracker. +// OnTLSVerifyError implements httpsDialerEventsHandler. func (*httpsDialerCancelingContextStatsTracker) OnTLSVerifyError(tactic *httpsDialerTactic, err error) { // nothing } -// OnSuccess implements httpsDialerStatsTracker. +// OnSuccess implements httpsDialerEventsHandler. func (st *httpsDialerCancelingContextStatsTracker) OnSuccess(tactic *httpsDialerTactic) { if (st.flags & httpsDialerCancelingContextStatsTrackerOnSuccess) != 0 { st.cancel() @@ -78,7 +78,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { short bool // stats is the stats tracker to use. - stats httpsDialerStatsTracker + stats httpsDialerEventsHandler // endpoint is the endpoint to connect to consisting of a domain // name or IP address followed by a TCP port @@ -101,7 +101,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { { name: "net.SplitHostPort failure", short: true, - stats: &nullStatsTracker{}, + stats: &nullStatsManager{}, endpoint: "www.example.com", // note: here the port is missing scenario: netemx.InternetScenario, configureDPI: func(dpi *netem.DPIEngine) { @@ -117,7 +117,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { { name: "hd.policy.LookupTactics failure", short: true, - stats: &nullStatsTracker{}, + stats: &nullStatsManager{}, endpoint: "www.example.nonexistent:443", // note: the domain does not exist scenario: netemx.InternetScenario, configureDPI: func(dpi *netem.DPIEngine) { @@ -131,7 +131,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { { name: "successful dial with multiple addresses", short: true, - stats: &nullStatsTracker{}, + stats: &nullStatsManager{}, endpoint: "www.example.com:443", scenario: []*netemx.ScenarioDomainAddresses{{ Domains: []string{ @@ -157,7 +157,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { { name: "with TCP connect errors", short: true, - stats: &nullStatsTracker{}, + stats: &nullStatsManager{}, endpoint: "www.example.com:443", scenario: []*netemx.ScenarioDomainAddresses{{ Domains: []string{ @@ -191,7 +191,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { { name: "with TLS handshake errors", short: true, - stats: &nullStatsTracker{}, + stats: &nullStatsManager{}, endpoint: "www.example.com:443", scenario: []*netemx.ScenarioDomainAddresses{{ Domains: []string{ @@ -221,7 +221,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { { name: "with a TLS certificate valid for ANOTHER domain", short: true, - stats: &nullStatsTracker{}, + stats: &nullStatsManager{}, endpoint: "wrong.host.badssl.com:443", scenario: []*netemx.ScenarioDomainAddresses{{ Domains: []string{ @@ -246,7 +246,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { { name: "with TLS certificate signed by an unknown authority", short: true, - stats: &nullStatsTracker{}, + stats: &nullStatsManager{}, endpoint: "untrusted-root.badssl.com:443", scenario: []*netemx.ScenarioDomainAddresses{{ Domains: []string{ @@ -271,7 +271,7 @@ func TestHTTPSDialerNetemQA(t *testing.T) { { name: "with expired TLS certificate", short: true, - stats: &nullStatsTracker{}, + stats: &nullStatsManager{}, endpoint: "expired.badssl.com:443", scenario: []*netemx.ScenarioDomainAddresses{{ Domains: []string{ @@ -512,7 +512,7 @@ func TestHTTPSDialerHostNetworkQA(t *testing.T) { Logger: log.Log, Resolver: resolver, }, - &nullStatsTracker{}, + &nullStatsManager{}, ) URL := runtimex.Try1(url.Parse(server.URL)) diff --git a/internal/enginenetx/network.go b/internal/enginenetx/network.go index bd7a99ab49..58f4a0f4d6 100644 --- a/internal/enginenetx/network.go +++ b/internal/enginenetx/network.go @@ -18,18 +18,20 @@ import ( ) // Network is the network abstraction used by the OONI engine. +// +// The zero value is invalid; construct using the [NewNetwork] func. type Network struct { reso model.Resolver stats *statsManager txp model.HTTPTransport } -// HTTPTransport returns the [model.HTTPTransport] that the engine should use. +// HTTPTransport returns the underlying [model.HTTPTransport]. func (n *Network) HTTPTransport() model.HTTPTransport { return n.txp } -// NewHTTPClient is a convenience function for building a [model.HTTPClient] using +// NewHTTPClient is a convenience function for building an [*http.Client] using // the underlying [model.HTTPTransport] and the correct cookies configuration. func (n *Network) NewHTTPClient() *http.Client { // Note: cookiejar.New cannot fail, so we're using runtimex.Try1 here @@ -43,7 +45,8 @@ func (n *Network) NewHTTPClient() *http.Client { // Close ensures that we close idle connections and persist statistics. func (n *Network) Close() error { - // TODO(bassosimone): do we want to introduce "once" semantics in this method? + // TODO(bassosimone): do we want to introduce "once" semantics in this method? It + // does not seem necessary since there's no resource we can close just once. // make sure we close the transport's idle connections n.txp.CloseIdleConnections() @@ -63,7 +66,7 @@ func (n *Network) Close() error { // // Arguments: // -// - counter is the [*bytecounter.Counter] to use. +// - counter is the [*bytecounter.Counter] to use; // // - kvStore is a [model.KeyValueStore] for persisting stats; // @@ -73,7 +76,7 @@ func (n *Network) Close() error { // // - resolver is the [model.Resolver] to use. // -// The presence of the proxyURL will cause this function to possibly build a +// The presence of the proxyURL MAY cause this function to possibly build a // network with different behavior with respect to circumvention. If there is // an upstream proxy we're going to trust it is doing circumvention for us. func NewNetwork( @@ -91,6 +94,9 @@ func NewNetwork( // Create manager for keeping track of statistics stats := newStatsManager(kvStore, logger) + // TODO(bassosimone): the documentation says we MAY avoid specific policies + // when using a proxy, should we actually implement that? + // Create a TLS dialer ONLY used for dialing TLS connections. This dialer will use // happy-eyeballs and possibly custom policies for dialing TLS connections. httpsDialer := newHTTPSDialer( diff --git a/internal/enginenetx/staticpolicy.go b/internal/enginenetx/staticpolicy.go index 5ca84a5c1f..80ddea7a00 100644 --- a/internal/enginenetx/staticpolicy.go +++ b/internal/enginenetx/staticpolicy.go @@ -90,15 +90,24 @@ var _ httpsDialerPolicy = &staticPolicy{} // LookupTactics implements httpsDialerPolicy. func (ldp *staticPolicy) LookupTactics( ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { + // check whether an entry exists in the user-provided map tactics, found := ldp.Root.DomainEndpoints[net.JoinHostPort(domain, port)] if !found { return ldp.Fallback.LookupTactics(ctx, domain, port) } + // emit the resuults, which may possibly be empty out := make(chan *httpsDialerTactic) go func() { - defer close(out) + defer close(out) // let the caller know we're done for _, tactic := range tactics { + + // We read this data from disk, we se cannot exclude the case where a user + // provides a file containing an explicitly nil tactic + if tactic == nil { + continue + } + out <- tactic } }() diff --git a/internal/enginenetx/staticpolicy_test.go b/internal/enginenetx/staticpolicy_test.go index 51875873f9..f783b29026 100644 --- a/internal/enginenetx/staticpolicy_test.go +++ b/internal/enginenetx/staticpolicy_test.go @@ -65,13 +65,16 @@ func TestStaticPolicy(t *testing.T) { input: (func() []byte { return runtimex.Try1(json.Marshal(&staticPolicyRoot{ DomainEndpoints: map[string][]*httpsDialerTactic{ + + // Please, note how the input includes explicitly nil entries + // with the purpose of making sure the code can handle them "api.ooni.io:443": {{ Address: "162.55.247.208", InitialDelay: 0, Port: "443", SNI: "api.ooni.io", VerifyHostname: "api.ooni.io", - }, { + }, nil, { Address: "46.101.82.151", InitialDelay: 300 * time.Millisecond, Port: "443", @@ -83,7 +86,7 @@ func TestStaticPolicy(t *testing.T) { Port: "443", SNI: "api.ooni.io", VerifyHostname: "api.ooni.io", - }, { + }, nil, { Address: "46.101.82.151", InitialDelay: 3000 * time.Millisecond, Port: "443", @@ -95,7 +98,9 @@ func TestStaticPolicy(t *testing.T) { Port: "443", SNI: "www.example.com", VerifyHostname: "api.ooni.io", - }}, + }, nil}, + // + }, Version: staticPolicyVersion, })) @@ -111,7 +116,7 @@ func TestStaticPolicy(t *testing.T) { Port: "443", SNI: "api.ooni.io", VerifyHostname: "api.ooni.io", - }, { + }, nil, { Address: "46.101.82.151", InitialDelay: 300 * time.Millisecond, Port: "443", @@ -123,7 +128,7 @@ func TestStaticPolicy(t *testing.T) { Port: "443", SNI: "api.ooni.io", VerifyHostname: "api.ooni.io", - }, { + }, nil, { Address: "46.101.82.151", InitialDelay: 3000 * time.Millisecond, Port: "443", @@ -135,7 +140,7 @@ func TestStaticPolicy(t *testing.T) { Port: "443", SNI: "www.example.com", VerifyHostname: "api.ooni.io", - }}, + }, nil}, }, Version: staticPolicyVersion, }, @@ -182,7 +187,13 @@ func TestStaticPolicy(t *testing.T) { } staticPolicyRoot := &staticPolicyRoot{ DomainEndpoints: map[string][]*httpsDialerTactic{ - "api.ooni.io:443": {expectedTactic}, + // Note that here we're adding explicitly nil entries + // to make sure that the code correctly handles 'em + "api.ooni.io:443": { + nil, + expectedTactic, + nil, + }, }, Version: staticPolicyVersion, } diff --git a/internal/enginenetx/statsmanager.go b/internal/enginenetx/statsmanager.go index fba166cbb4..c429c66efd 100644 --- a/internal/enginenetx/statsmanager.go +++ b/internal/enginenetx/statsmanager.go @@ -18,33 +18,33 @@ import ( "github.com/ooni/probe-cli/v3/internal/runtimex" ) -// nullStatsTracker is the "null" [httpsDialerStatsTracker]. -type nullStatsTracker struct{} +// nullStatsManager is the "null" [httpsDialerEventsHandler]. +type nullStatsManager struct{} -var _ httpsDialerStatsTracker = &nullStatsTracker{} +var _ httpsDialerEventsHandler = &nullStatsManager{} -// OnStarting implements httpsDialerStatsTracker. -func (*nullStatsTracker) OnStarting(tactic *httpsDialerTactic) { +// OnStarting implements httpsDialerEventsHandler. +func (*nullStatsManager) OnStarting(tactic *httpsDialerTactic) { // nothing } -// OnSuccess implements httpsDialerStatsTracker. -func (*nullStatsTracker) OnSuccess(tactic *httpsDialerTactic) { +// OnSuccess implements httpsDialerEventsHandler. +func (*nullStatsManager) OnSuccess(tactic *httpsDialerTactic) { // nothing } -// OnTCPConnectError implements httpsDialerStatsTracker. -func (*nullStatsTracker) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) { +// OnTCPConnectError implements httpsDialerEventsHandler. +func (*nullStatsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) { // nothing } -// OnTLSHandshakeError implements httpsDialerStatsTracker. -func (*nullStatsTracker) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { +// OnTLSHandshakeError implements httpsDialerEventsHandler. +func (*nullStatsManager) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // nothing } -// OnTLSVerifyError implements httpsDialerStatsTracker. -func (*nullStatsTracker) OnTLSVerifyError(tactic *httpsDialerTactic, err error) { +// OnTLSVerifyError implements httpsDialerEventsHandler. +func (*nullStatsManager) OnTLSVerifyError(tactic *httpsDialerTactic, err error) { // nothing } @@ -88,6 +88,8 @@ type statsTactic struct { } func statsCloneMapStringInt64(input map[string]int64) (output map[string]int64) { + // ~BUG: this function always returns an initialized map even when the + // original map is nil--is this an issue? No! output = make(map[string]int64) for key, value := range input { output[key] = value @@ -97,6 +99,9 @@ func statsCloneMapStringInt64(input map[string]int64) (output map[string]int64) // Clone clones a given [*statsTactic] func (st *statsTactic) Clone() *statsTactic { + // Implementation note: a time.Time consists of an uint16, an int64 and + // a pointer to a location which is typically immutable, so it's perfectly + // fine to copy the LastUpdate field by assignment. return &statsTactic{ CountStarted: st.CountStarted, CountTCPConnectError: st.CountTCPConnectError, @@ -126,6 +131,13 @@ func statsDomainEndpointRemoveOldEntries(input *statsDomainEndpoint) (output *st oneWeek := 7 * 24 * time.Hour now := time.Now() for summary, tactic := range input.Tactics { + + // we serialize stats to disk, so we cannot rule out the case where the user + // explicitly edits the stats to include a malformed entry + if tactic == nil || tactic.Tactic == nil { + continue + } + if delta := now.Sub(tactic.LastUpdated); delta > oneWeek { continue } @@ -152,10 +164,21 @@ type statsContainer struct { func statsContainerRemoveOldEntries(input *statsContainer) (output *statsContainer) { output = newStatsContainer() for domainEpnt, inputStats := range input.DomainEndpoints { + + // We serialize this data to disk, so we need to account for the case + // where a user has manually edited the JSON to add a nil value + if inputStats == nil { + continue + } + prunedStats := statsDomainEndpointRemoveOldEntries(inputStats) + + // We don't want to include an entry when it's not empty because all the + // stats inside it have just been pruned if len(prunedStats.Tactics) <= 0 { continue } + output.DomainEndpoints[domainEpnt] = prunedStats } return @@ -183,7 +206,8 @@ func (c *statsContainer) SetStatsTacticLocked(tactic *httpsDialerTactic, record Tactics: map[string]*statsTactic{}, } - // make sure the map is initialized + // make sure the map is initialized -- not a void concern given that we're + // reading this structure from the disk if len(c.DomainEndpoints) <= 0 { c.DomainEndpoints = make(map[string]*statsDomainEndpoint) } @@ -202,8 +226,8 @@ func newStatsContainer() *statsContainer { } } -// statsManager implements [httpsDialerStatsTracker] by storing -// the relevant statistics in a [model.KeyValueStore]. +// statsManager implements [httpsDialerEventsHandler] by storing the +// relevant statistics in a [model.KeyValueStore]. // // The zero value of this structure is not ready to use; please, use the // [newStatsManager] factory to create a new instance. @@ -273,9 +297,9 @@ func newStatsManager(kvStore model.KeyValueStore, logger model.Logger) *statsMan } } -var _ httpsDialerStatsTracker = &statsManager{} +var _ httpsDialerEventsHandler = &statsManager{} -// OnStarting implements httpsDialerStatsManager. +// OnStarting implements httpsDialerEventsHandler. func (mt *statsManager) OnStarting(tactic *httpsDialerTactic) { // get exclusive access defer mt.mu.Unlock() @@ -306,7 +330,7 @@ func (mt *statsManager) OnStarting(tactic *httpsDialerTactic) { record.LastUpdated = time.Now() } -// OnTCPConnectError implements httpsDialerStatsManager. +// OnTCPConnectError implements httpsDialerEventsHandler. func (mt *statsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) { // get exclusive access defer mt.mu.Unlock() @@ -329,7 +353,7 @@ func (mt *statsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDial record.HistoTCPConnectError[err.Error()]++ } -// OnTLSHandshakeError implements httpsDialerStatsManager. +// OnTLSHandshakeError implements httpsDialerEventsHandler. func (mt *statsManager) OnTLSHandshakeError(ctx context.Context, tactic *httpsDialerTactic, err error) { // get exclusive access defer mt.mu.Unlock() @@ -352,7 +376,7 @@ func (mt *statsManager) OnTLSHandshakeError(ctx context.Context, tactic *httpsDi record.HistoTLSHandshakeError[err.Error()]++ } -// OnTLSVerifyError implements httpsDialerStatsManager. +// OnTLSVerifyError implements httpsDialerEventsHandler. func (mt *statsManager) OnTLSVerifyError(tactic *httpsDialerTactic, err error) { // get exclusive access defer mt.mu.Unlock() @@ -371,7 +395,7 @@ func (mt *statsManager) OnTLSVerifyError(tactic *httpsDialerTactic, err error) { record.LastUpdated = time.Now() } -// OnSuccess implements httpsSDialerStatsManager. +// OnSuccess implements httpsDialerEventsHandler. func (mt *statsManager) OnSuccess(tactic *httpsDialerTactic) { // get exclusive access defer mt.mu.Unlock() @@ -391,7 +415,8 @@ func (mt *statsManager) OnSuccess(tactic *httpsDialerTactic) { // Close implements io.Closer func (mt *statsManager) Close() error { - // TODO(bassosimone): do we need to apply a "once" semantics to this method? + // TODO(bassosimone): do we need to apply a "once" semantics to this method? Perhaps no + // given that there is no resource that we can close only once... // get exclusive access defer mt.mu.Unlock() @@ -411,12 +436,21 @@ func (mt *statsManager) LookupTactics(domain string, port string) ([]*statsTacti mt.mu.Lock() // check whether we have information on this endpoint + // + // Note: in case mt.container.DomainEndpoints is nil, this access pattern + // will return to us a nil pointer and false + // + // we also protect against the case where a user has configured a nil + // domainEpnts value inside the serialized JSON to crash us domainEpnts, good := mt.container.DomainEndpoints[net.JoinHostPort(domain, port)] - if !good { - return out, len(out) > 0 + if !good || domainEpnts == nil { + return out, false } // return a copy of each entry + // + // Note: if Tactics here is nil, we're just not going to have + // anything to include into the out list for _, entry := range domainEpnts.Tactics { out = append(out, entry.Clone()) } diff --git a/internal/enginenetx/statsmanager_test.go b/internal/enginenetx/statsmanager_test.go index 9c8d50e4ee..f633acf46e 100644 --- a/internal/enginenetx/statsmanager_test.go +++ b/internal/enginenetx/statsmanager_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "sort" + "sync" "testing" "time" @@ -15,6 +16,7 @@ import ( "github.com/ooni/probe-cli/v3/internal/bytecounter" "github.com/ooni/probe-cli/v3/internal/kvstore" "github.com/ooni/probe-cli/v3/internal/mocks" + "github.com/ooni/probe-cli/v3/internal/model" "github.com/ooni/probe-cli/v3/internal/netemx" "github.com/ooni/probe-cli/v3/internal/netxlite" "github.com/ooni/probe-cli/v3/internal/runtimex" @@ -383,6 +385,25 @@ func TestLoadStatsContainer(t *testing.T) { VerifyHostname: "api.ooni.io", }, }, + "162.55.247.208:443 sni=www.example.xyz verify=api.ooni.io": nil, // should be skipped because nil + "162.55.247.208:443 sni=www.example.it verify=api.ooni.io": { // should be skipped because nil tactic + CountStarted: 4, + CountTCPConnectError: 1, + CountTLSHandshakeError: 1, + CountTLSVerificationError: 1, + CountSuccess: 1, + HistoTCPConnectError: map[string]int64{ + "connection_refused": 1, + }, + HistoTLSHandshakeError: map[string]int64{ + "generic_timeout_error": 1, + }, + HistoTLSVerificationError: map[string]int64{ + "ssl_invalid_hostname": 1, + }, + LastUpdated: fourtyFiveMinutesAgo, + Tactic: nil, + }, }, }, "www.kernel.org:443": { // this whole entry should be skipped because it's too old @@ -413,6 +434,7 @@ func TestLoadStatsContainer(t *testing.T) { }, }, }, + "www.kerneltrap.org:443": nil, // this whole entry should be skipped because it's nil }, Version: statsContainerVersion, } @@ -762,7 +784,7 @@ func TestStatsManagerCallbacks(t *testing.T) { } // Make sure that we can safely obtain statistics for a domain and a port. -func TestStatsManagerLookupTacticsStats(t *testing.T) { +func TestStatsManagerLookupTactics(t *testing.T) { // prepare the content of the stats twentyMinutesAgo := time.Now().Add(-20 * time.Minute) @@ -884,4 +906,62 @@ func TestStatsManagerLookupTacticsStats(t *testing.T) { t.Fatal("unexpected tactics length") } }) + + t.Run("when the stats manager is manually configured to have an empty container", func(t *testing.T) { + stats := &statsManager{ + container: &statsContainer{ /* explicitly empty */ }, + kvStore: kvStore, + logger: model.DiscardLogger, + mu: sync.Mutex{}, + } + tactics, good := stats.LookupTactics("api.ooni.io", "443") + if good { + t.Fatal("expected !good") + } + if len(tactics) != 0 { + t.Fatal("unexpected tactics length") + } + }) + + t.Run("when the stats manager is manually configured to have nil tactics", func(t *testing.T) { + stats := &statsManager{ + container: &statsContainer{ + DomainEndpoints: map[string]*statsDomainEndpoint{ + "api.ooni.io:443": nil, + }, + Version: 0, + }, + kvStore: kvStore, + logger: model.DiscardLogger, + mu: sync.Mutex{}, + } + tactics, good := stats.LookupTactics("api.ooni.io", "443") + if good { + t.Fatal("expected !good") + } + if len(tactics) != 0 { + t.Fatal("unexpected tactics length") + } + }) + + t.Run("when the stats manager is manually configured to have empty tactics", func(t *testing.T) { + stats := &statsManager{ + container: &statsContainer{ + DomainEndpoints: map[string]*statsDomainEndpoint{ + "api.ooni.io:443": { /* explicitly left empty */ }, + }, + Version: 0, + }, + kvStore: kvStore, + logger: model.DiscardLogger, + mu: sync.Mutex{}, + } + tactics, good := stats.LookupTactics("api.ooni.io", "443") + if good { + t.Fatal("expected !good") + } + if len(tactics) != 0 { + t.Fatal("unexpected tactics length") + } + }) } diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index ff0e43cb7a..6adad4af4b 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -32,19 +32,28 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str out := make(chan *httpsDialerTactic) go func() { + defer close(out) // make sure the parent knows when we're done index := 0 - defer close(out) - // make sure we don't emit two equal policy in a single run + // useful to make sure we don't emit two equal policy in a single run uniq := make(map[string]int) // function that emits a given tactic unless we already emitted it maybeEmitTactic := func(t *httpsDialerTactic) { + // as a safety mechanism let's gracefully handle the + // case in which the tactic is nil + if t == nil { + return + } + + // handle the case in which we already emitted a policy key := t.tacticSummaryKey() if uniq[key] > 0 { return } uniq[key]++ + + // 🚀!!! t.InitialDelay = happyEyeballsDelay(index) index += 1 out <- t @@ -65,11 +74,15 @@ func (p *statsPolicy) LookupTactics(ctx context.Context, domain string, port str } func (p *statsPolicy) statsLookupTactics(domain string, port string) (out []*httpsDialerTactic) { + + // obtain information from the stats--here the result may be false if the + // stats do not contain any information about the domain and port tactics, good := p.Stats.LookupTactics(domain, port) if !good { return } + // successRate is a convenience function for computing the success rate successRate := func(t *statsTactic) (rate float64) { if t.CountStarted > 0 { rate = float64(t.CountSuccess) / float64(t.CountStarted) @@ -77,10 +90,9 @@ func (p *statsPolicy) statsLookupTactics(domain string, port string) (out []*htt return } + // Implementation note: the function should implement the "less" semantics + // but we want descending sort, so we're using a "more" semantics sort.SliceStable(tactics, func(i, j int) bool { - // Implementation note: the function should implement the "less" semantics - // but we want descending sort, so we're using a "more" semantics - // // TODO(bassosimone): should we also consider the number of samples // we have and how recent a sample is? return successRate(tactics[i]) > successRate(tactics[j]) @@ -90,7 +102,10 @@ func (p *statsPolicy) statsLookupTactics(domain string, port string) (out []*htt // make sure we only include samples with 1+ successes; we don't want this policy // to return what we already know it's not working and it will be the purpose of the // fallback policy to generate new tactics to test - if t.CountSuccess > 0 { + // + // additionally, as a precautionary and defensive measure, make sure t.Tactic + // is nil before adding the real tactic to the return list + if t.CountSuccess > 0 && t.Tactic != nil { out = append(out, t.Tactic) } } diff --git a/internal/enginenetx/statspolicy_test.go b/internal/enginenetx/statspolicy_test.go index 5031ae1a96..6d7f7f0fe8 100644 --- a/internal/enginenetx/statspolicy_test.go +++ b/internal/enginenetx/statspolicy_test.go @@ -222,4 +222,59 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { t.Fatal(diff) } }) + + t.Run("we avoid manipulating nil tactics", func(t *testing.T) { + // create stats manager + stats := createStatsManager("api.ooni.io:443", expectTacticsStats...) + + // create the composed policy + policy := &statsPolicy{ + Fallback: &mocksPolicy{ + MockLookupTactics: func(ctx context.Context, domain, port string) <-chan *httpsDialerTactic { + out := make(chan *httpsDialerTactic) + go func() { + defer close(out) + out <- nil // explicitly send nil on the channel + }() + return out + }, + }, + Stats: stats, + } + + // obtain the tactics from the saved stats + var tactics []*httpsDialerTactic + for entry := range policy.LookupTactics(context.Background(), "api.ooni.io", "443") { + tactics = append(tactics, entry) + } + + // compute the list of results we expect to see from the stats data + var expect []*httpsDialerTactic + idx := 0 + for _, entry := range expectTacticsStats { + if entry.CountSuccess <= 0 { + continue // we SHOULD NOT include entries that systematically failed + } + t := entry.Tactic.Clone() + t.InitialDelay = happyEyeballsDelay(idx) + expect = append(expect, t) + idx++ + } + + // perform the actual comparison + if diff := cmp.Diff(expect, tactics); diff != "" { + t.Fatal(diff) + } + }) +} + +type mocksPolicy struct { + MockLookupTactics func(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic +} + +var _ httpsDialerPolicy = &mocksPolicy{} + +// LookupTactics implements httpsDialerPolicy. +func (p *mocksPolicy) LookupTactics(ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { + return p.MockLookupTactics(ctx, domain, port) } From aaec5f69f5973a67c95fe0b0ffb2367188faa1a4 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 26 Sep 2023 21:39:59 +0200 Subject: [PATCH 2/5] Apply suggestions from code review --- internal/enginenetx/beaconspolicy.go | 4 ++-- internal/enginenetx/httpsdialer.go | 4 ++-- internal/enginenetx/staticpolicy.go | 2 +- internal/enginenetx/statspolicy.go | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/enginenetx/beaconspolicy.go b/internal/enginenetx/beaconspolicy.go index 0ae84804fa..a2a1fbcf4a 100644 --- a/internal/enginenetx/beaconspolicy.go +++ b/internal/enginenetx/beaconspolicy.go @@ -33,7 +33,7 @@ func (p *beaconsPolicy) LookupTactics(ctx context.Context, domain, port string) defer close(out) // tell the parent when we're done index := 0 - // emit beacons related tactics first which is empty if there are + // emit beacons related tactics first which are empty if there are // no beacons for the givend domain and port for tx := range p.tacticsForDomain(domain, port) { tx.InitialDelay = happyEyeballsDelay(index) @@ -42,7 +42,7 @@ func (p *beaconsPolicy) LookupTactics(ctx context.Context, domain, port string) } // now fallback to get more tactics (typically here the fallback - // uses the DNS and obtains some extra tactic) + // uses the DNS and obtains some extra tactics) for tx := range p.Fallback.LookupTactics(ctx, domain, port) { tx.InitialDelay = happyEyeballsDelay(index) index += 1 diff --git a/internal/enginenetx/httpsdialer.go b/internal/enginenetx/httpsdialer.go index 51a86c2a80..6d4f1bf072 100644 --- a/internal/enginenetx/httpsdialer.go +++ b/internal/enginenetx/httpsdialer.go @@ -83,7 +83,7 @@ func (dt *httpsDialerTactic) tacticSummaryKey() string { // domainEndpointKey returns a string consisting of the domain endpoint only. // -// We always use the VerifyHostname to construct the domain endpoint. +// We always use the VerifyHostname and the Port to construct the domain endpoint. func (dt *httpsDialerTactic) domainEndpointKey() string { return net.JoinHostPort(dt.VerifyHostname, dt.Port) } @@ -286,7 +286,7 @@ func (hd *httpsDialer) worker( // perform the actual dial conn, err := hd.dialTLS(ctx, prefixLogger, t0, tactic) - // send result to the parent + // send results to the parent writer <- &httpsDialerErrorOrConn{Conn: conn, Err: err} } } diff --git a/internal/enginenetx/staticpolicy.go b/internal/enginenetx/staticpolicy.go index 80ddea7a00..24f7279996 100644 --- a/internal/enginenetx/staticpolicy.go +++ b/internal/enginenetx/staticpolicy.go @@ -90,7 +90,7 @@ var _ httpsDialerPolicy = &staticPolicy{} // LookupTactics implements httpsDialerPolicy. func (ldp *staticPolicy) LookupTactics( ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { - // check whether an entry exists in the user-provided map + // check whether an entry exists in the user-provided map, which MAY be nil tactics, found := ldp.Root.DomainEndpoints[net.JoinHostPort(domain, port)] if !found { return ldp.Fallback.LookupTactics(ctx, domain, port) diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 6adad4af4b..1daab75a01 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -104,7 +104,7 @@ func (p *statsPolicy) statsLookupTactics(domain string, port string) (out []*htt // fallback policy to generate new tactics to test // // additionally, as a precautionary and defensive measure, make sure t.Tactic - // is nil before adding the real tactic to the return list + // is not nil before adding the real tactic to the return list if t.CountSuccess > 0 && t.Tactic != nil { out = append(out, t.Tactic) } From 1226f0751032248d277fd91ef36fe7dd2bd9ade0 Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 26 Sep 2023 22:27:21 +0200 Subject: [PATCH 3/5] x --- internal/enginenetx/staticpolicy.go | 24 ++++++++--- internal/enginenetx/staticpolicy_test.go | 51 +++++++++++++++++++++- internal/enginenetx/statsmanager.go | 55 +++++++++++++++++++----- internal/enginenetx/statsmanager_test.go | 53 ++++++++++++++++++++++- 4 files changed, 164 insertions(+), 19 deletions(-) diff --git a/internal/enginenetx/staticpolicy.go b/internal/enginenetx/staticpolicy.go index 24f7279996..90a33c2305 100644 --- a/internal/enginenetx/staticpolicy.go +++ b/internal/enginenetx/staticpolicy.go @@ -91,25 +91,35 @@ var _ httpsDialerPolicy = &staticPolicy{} func (ldp *staticPolicy) LookupTactics( ctx context.Context, domain string, port string) <-chan *httpsDialerTactic { // check whether an entry exists in the user-provided map, which MAY be nil + // if/when the user has chosen the static policy to be as such tactics, found := ldp.Root.DomainEndpoints[net.JoinHostPort(domain, port)] if !found { return ldp.Fallback.LookupTactics(ctx, domain, port) } + // note that we also need to fallback when the tactics contains an empty list + // or a list that only contains nil entries + tactics = staticPolicyRemoveNilEntries(tactics) + if len(tactics) <= 0 { + return ldp.Fallback.LookupTactics(ctx, domain, port) + } + // emit the resuults, which may possibly be empty out := make(chan *httpsDialerTactic) go func() { defer close(out) // let the caller know we're done for _, tactic := range tactics { - - // We read this data from disk, we se cannot exclude the case where a user - // provides a file containing an explicitly nil tactic - if tactic == nil { - continue - } - out <- tactic } }() return out } + +func staticPolicyRemoveNilEntries(input []*httpsDialerTactic) (output []*httpsDialerTactic) { + for _, entry := range input { + if entry != nil { + output = append(output, entry) + } + } + return +} diff --git a/internal/enginenetx/staticpolicy_test.go b/internal/enginenetx/staticpolicy_test.go index f783b29026..baf2010c4d 100644 --- a/internal/enginenetx/staticpolicy_test.go +++ b/internal/enginenetx/staticpolicy_test.go @@ -194,6 +194,13 @@ func TestStaticPolicy(t *testing.T) { expectedTactic, nil, }, + + // We add additional entries to make sure that in those + // cases we are going to fallback as they're basically empty + // and so non-actionable for us. + "api.ooni.xyz:443": nil, + "api.ooni.org:443": {}, + "api.ooni.com:443": {nil, nil, nil}, }, Version: staticPolicyVersion, } @@ -225,7 +232,7 @@ func TestStaticPolicy(t *testing.T) { } }) - t.Run("we fallback if needed", func(t *testing.T) { + t.Run("we fallback if there is no entry in the static policy", func(t *testing.T) { ctx := context.Background() fallback := &dnsPolicy{ @@ -261,5 +268,47 @@ func TestStaticPolicy(t *testing.T) { t.Fatal(diff) } }) + + t.Run("we fallback if the entry in the static policy is ~empty", func(t *testing.T) { + ctx := context.Background() + + fallback := &dnsPolicy{ + Logger: log.Log, + Resolver: &mocks.Resolver{ + MockLookupHost: func(ctx context.Context, domain string) ([]string, error) { + return []string{"93.184.216.34"}, nil + }, + }, + } + + policy, err := newStaticPolicy(kvStore, fallback) + if err != nil { + t.Fatal(err) + } + + // these cases are specially constructed to be empty/invalid static policies + for _, domain := range []string{"api.ooni.xyz", "api.ooni.org", "api.ooni.com"} { + t.Run(domain, func(t *testing.T) { + tactics := policy.LookupTactics(ctx, domain, "443") + got := []*httpsDialerTactic{} + for tactic := range tactics { + t.Logf("%+v", tactic) + got = append(got, tactic) + } + + expect := []*httpsDialerTactic{{ + Address: "93.184.216.34", + InitialDelay: 0, + Port: "443", + SNI: domain, + VerifyHostname: domain, + }} + + if diff := cmp.Diff(expect, got); diff != "" { + t.Fatal(diff) + } + }) + } + }) }) } diff --git a/internal/enginenetx/statsmanager.go b/internal/enginenetx/statsmanager.go index c429c66efd..ce7f0fcb6a 100644 --- a/internal/enginenetx/statsmanager.go +++ b/internal/enginenetx/statsmanager.go @@ -87,9 +87,11 @@ type statsTactic struct { Tactic *httpsDialerTactic } -func statsCloneMapStringInt64(input map[string]int64) (output map[string]int64) { - // ~BUG: this function always returns an initialized map even when the - // original map is nil--is this an issue? No! +func statsMaybeCloneMapStringInt64(input map[string]int64) (output map[string]int64) { + // distinguish and preserve nil versus empty + if input == nil { + return + } output = make(map[string]int64) for key, value := range input { output[key] = value @@ -97,11 +99,21 @@ func statsCloneMapStringInt64(input map[string]int64) (output map[string]int64) return } +func statsMaybeCloneTactic(input *httpsDialerTactic) (output *httpsDialerTactic) { + if input != nil { + output = input.Clone() + } + return +} + // Clone clones a given [*statsTactic] func (st *statsTactic) Clone() *statsTactic { // Implementation note: a time.Time consists of an uint16, an int64 and // a pointer to a location which is typically immutable, so it's perfectly // fine to copy the LastUpdate field by assignment. + // + // here we're using a bunch of robustness aware mechanisms to clone + // considering that the struct may be edited by the user return &statsTactic{ CountStarted: st.CountStarted, CountTCPConnectError: st.CountTCPConnectError, @@ -110,11 +122,11 @@ func (st *statsTactic) Clone() *statsTactic { CountTLSHandshakeInterrupt: st.CountTLSHandshakeInterrupt, CountTLSVerificationError: st.CountTLSVerificationError, CountSuccess: st.CountSuccess, - HistoTCPConnectError: statsCloneMapStringInt64(st.HistoTCPConnectError), - HistoTLSHandshakeError: statsCloneMapStringInt64(st.HistoTLSHandshakeError), - HistoTLSVerificationError: statsCloneMapStringInt64(st.HistoTLSVerificationError), + HistoTCPConnectError: statsMaybeCloneMapStringInt64(st.HistoTCPConnectError), + HistoTLSHandshakeError: statsMaybeCloneMapStringInt64(st.HistoTLSHandshakeError), + HistoTLSVerificationError: statsMaybeCloneMapStringInt64(st.HistoTLSVerificationError), LastUpdated: st.LastUpdated, - Tactic: st.Tactic.Clone(), + Tactic: statsMaybeCloneTactic(st.Tactic), } } @@ -130,6 +142,8 @@ func statsDomainEndpointRemoveOldEntries(input *statsDomainEndpoint) (output *st } oneWeek := 7 * 24 * time.Hour now := time.Now() + + // if .Tactics is empty here we're just going to do nothing for summary, tactic := range input.Tactics { // we serialize stats to disk, so we cannot rule out the case where the user @@ -138,6 +152,12 @@ func statsDomainEndpointRemoveOldEntries(input *statsDomainEndpoint) (output *st continue } + // When .LastUpdated is the zero time.Time value, the check is going to fail + // exactly like the time was 1 or 5 or 10 years ago instead. + // + // See https://go.dev/play/p/HGQT17ueIkq where we show that the zero time + // is handled exactly like any time in the past (it was kinda obvious, but + // sometimes it also make sense to double check assumptions!) if delta := now.Sub(tactic.LastUpdated); delta > oneWeek { continue } @@ -163,6 +183,8 @@ type statsContainer struct { // statsDomainRemoveOldEntries returns a copy of a [*statsContainer] with old entries removed. func statsContainerRemoveOldEntries(input *statsContainer) (output *statsContainer) { output = newStatsContainer() + + // if .DomainEndpoints is nil here we're just going to do nothing for domainEpnt, inputStats := range input.DomainEndpoints { // We serialize this data to disk, so we need to account for the case @@ -330,6 +352,14 @@ func (mt *statsManager) OnStarting(tactic *httpsDialerTactic) { record.LastUpdated = time.Now() } +func statsSafeIncrementMapStringInt64(input *map[string]int64, value string) { + runtimex.Assert(input != nil, "passed nil pointer to a map") + if *input == nil { + *input = make(map[string]int64) + } + (*input)[value]++ +} + // OnTCPConnectError implements httpsDialerEventsHandler. func (mt *statsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDialerTactic, err error) { // get exclusive access @@ -349,8 +379,10 @@ func (mt *statsManager) OnTCPConnectError(ctx context.Context, tactic *httpsDial record.CountTCPConnectInterrupt++ return } + + runtimex.Assert(err != nil, "OnTCPConnectError passed a nil error") record.CountTCPConnectError++ - record.HistoTCPConnectError[err.Error()]++ + statsSafeIncrementMapStringInt64(&record.HistoTCPConnectError, err.Error()) } // OnTLSHandshakeError implements httpsDialerEventsHandler. @@ -372,8 +404,10 @@ func (mt *statsManager) OnTLSHandshakeError(ctx context.Context, tactic *httpsDi record.CountTLSHandshakeInterrupt++ return } + + runtimex.Assert(err != nil, "OnTLSHandshakeError passed a nil error") record.CountTLSHandshakeError++ - record.HistoTLSHandshakeError[err.Error()]++ + statsSafeIncrementMapStringInt64(&record.HistoTLSHandshakeError, err.Error()) } // OnTLSVerifyError implements httpsDialerEventsHandler. @@ -390,8 +424,9 @@ func (mt *statsManager) OnTLSVerifyError(tactic *httpsDialerTactic, err error) { } // update stats + runtimex.Assert(err != nil, "OnTLSVerifyError passed a nil error") record.CountTLSVerificationError++ - record.HistoTLSVerificationError[err.Error()]++ + statsSafeIncrementMapStringInt64(&record.HistoTLSVerificationError, err.Error()) record.LastUpdated = time.Now() } diff --git a/internal/enginenetx/statsmanager_test.go b/internal/enginenetx/statsmanager_test.go index f633acf46e..4714129790 100644 --- a/internal/enginenetx/statsmanager_test.go +++ b/internal/enginenetx/statsmanager_test.go @@ -385,6 +385,31 @@ func TestLoadStatsContainer(t *testing.T) { VerifyHostname: "api.ooni.io", }, }, + "162.55.247.208:443 sni=www.example.tk verify=api.ooni.io": { // should be skipped b/c time is zero + CountStarted: 4, + CountTCPConnectError: 1, + CountTLSHandshakeError: 1, + CountTLSVerificationError: 1, + CountSuccess: 1, + HistoTCPConnectError: map[string]int64{ + "connection_refused": 1, + }, + HistoTLSHandshakeError: map[string]int64{ + "generic_timeout_error": 1, + }, + HistoTLSVerificationError: map[string]int64{ + "ssl_invalid_hostname": 1, + }, + LastUpdated: time.Time{}, // zero value! + Tactic: &httpsDialerTactic{ + Address: "162.55.247.208", + InitialDelay: 0, + Port: "443", + SNI: "www.example.org", + VerifyHostname: "api.ooni.io", + }, + }, + "162.55.247.208:443 sni=www.example.xyz verify=api.ooni.io": nil, // should be skipped because nil "162.55.247.208:443 sni=www.example.it verify=api.ooni.io": { // should be skipped because nil tactic CountStarted: 4, @@ -769,7 +794,6 @@ func TestStatsManagerCallbacks(t *testing.T) { // make sure the stats are the ones we expect diffOptions := []cmp.Option{ cmpopts.IgnoreFields(statsTactic{}, "LastUpdated"), - cmpopts.EquateEmpty(), } if diff := cmp.Diff(tc.expectRoot, root, diffOptions...); diff != "" { t.Fatal(diff) @@ -965,3 +989,30 @@ func TestStatsManagerLookupTactics(t *testing.T) { } }) } + +func TestStatsSafeIncrementMapStringInt64(t *testing.T) { + t.Run("with a nil map", func(t *testing.T) { + var m map[string]int64 + statsSafeIncrementMapStringInt64(&m, "foo") + if m["foo"] != 1 { + t.Fatal("unexpected result") + } + }) + + t.Run("with a non-nil map", func(t *testing.T) { + m := make(map[string]int64) + statsSafeIncrementMapStringInt64(&m, "foo") + if m["foo"] != 1 { + t.Fatal("unexpected result") + } + }) + + t.Run("with an already-initialized map", func(t *testing.T) { + m := make(map[string]int64) + m["foo"] = 16 + statsSafeIncrementMapStringInt64(&m, "foo") + if m["foo"] != 17 { + t.Fatal("unexpected result") + } + }) +} From 9a87823c7876c254626e7e4d139d7c626f6d160f Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 26 Sep 2023 22:36:04 +0200 Subject: [PATCH 4/5] Apply suggestions from code review --- internal/enginenetx/statsmanager.go | 2 +- internal/enginenetx/statspolicy.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/enginenetx/statsmanager.go b/internal/enginenetx/statsmanager.go index ce7f0fcb6a..287434c68a 100644 --- a/internal/enginenetx/statsmanager.go +++ b/internal/enginenetx/statsmanager.go @@ -195,7 +195,7 @@ func statsContainerRemoveOldEntries(input *statsContainer) (output *statsContain prunedStats := statsDomainEndpointRemoveOldEntries(inputStats) - // We don't want to include an entry when it's not empty because all the + // We don't want to include an entry when it's empty because all the // stats inside it have just been pruned if len(prunedStats.Tactics) <= 0 { continue diff --git a/internal/enginenetx/statspolicy.go b/internal/enginenetx/statspolicy.go index 1daab75a01..9b886e1dbe 100644 --- a/internal/enginenetx/statspolicy.go +++ b/internal/enginenetx/statspolicy.go @@ -91,7 +91,7 @@ func (p *statsPolicy) statsLookupTactics(domain string, port string) (out []*htt } // Implementation note: the function should implement the "less" semantics - // but we want descending sort, so we're using a "more" semantics + // but we want descending sorting not ascending, so we're using a "more" semantics sort.SliceStable(tactics, func(i, j int) bool { // TODO(bassosimone): should we also consider the number of samples // we have and how recent a sample is? From f2964a8ba579de50056d17d630919487bff9f8fd Mon Sep 17 00:00:00 2001 From: Simone Basso Date: Tue, 26 Sep 2023 22:43:54 +0200 Subject: [PATCH 5/5] x --- internal/enginenetx/statspolicy_test.go | 52 +++++++++++++++++++++---- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/internal/enginenetx/statspolicy_test.go b/internal/enginenetx/statspolicy_test.go index 6d7f7f0fe8..68530f226d 100644 --- a/internal/enginenetx/statspolicy_test.go +++ b/internal/enginenetx/statspolicy_test.go @@ -28,7 +28,7 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { CountTLSHandshakeError: 0, CountTLSHandshakeInterrupt: 0, CountTLSVerificationError: 0, - CountSuccess: 5, + CountSuccess: 5, // this one always succeeds, so it should be there HistoTCPConnectError: map[string]int64{}, HistoTLSHandshakeError: map[string]int64{}, HistoTLSVerificationError: map[string]int64{}, @@ -47,7 +47,7 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { CountTLSHandshakeError: 1, CountTLSHandshakeInterrupt: 0, CountTLSVerificationError: 0, - CountSuccess: 2, + CountSuccess: 2, // this one sometimes succeded so it should be added HistoTCPConnectError: map[string]int64{}, HistoTLSHandshakeError: map[string]int64{}, HistoTLSVerificationError: map[string]int64{}, @@ -63,7 +63,7 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { CountStarted: 3, CountTCPConnectError: 0, CountTCPConnectInterrupt: 0, - CountTLSHandshakeError: 3, + CountTLSHandshakeError: 3, // this one always failed, so should not be added CountTLSHandshakeInterrupt: 0, CountTLSVerificationError: 0, CountSuccess: 0, @@ -78,6 +78,38 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { SNI: "theconversation.com", VerifyHostname: "api.ooni.io", }, + }, { + CountStarted: 4, + CountTCPConnectError: 0, + CountTCPConnectInterrupt: 0, + CountTLSHandshakeError: 0, + CountTLSHandshakeInterrupt: 0, + CountTLSVerificationError: 0, + CountSuccess: 4, + HistoTCPConnectError: map[string]int64{}, + HistoTLSHandshakeError: map[string]int64{}, + HistoTLSVerificationError: map[string]int64{}, + LastUpdated: twentyMinutesAgo, + Tactic: nil, // the nil policy here should cause this entry to be filtered out + }, { + CountStarted: 0, + CountTCPConnectError: 0, + CountTCPConnectInterrupt: 0, + CountTLSHandshakeError: 0, + CountTLSHandshakeInterrupt: 0, + CountTLSVerificationError: 0, + CountSuccess: 0, + HistoTCPConnectError: map[string]int64{}, + HistoTLSHandshakeError: map[string]int64{}, + HistoTLSVerificationError: map[string]int64{}, + LastUpdated: time.Time{}, // the zero time should exclude this one + Tactic: &httpsDialerTactic{ + Address: beaconAddress, + InitialDelay: 0, + Port: "443", + SNI: "ilpost.it", + VerifyHostname: "api.ooni.io", + }, }} // createStatsManager creates a stats manager given some baseline stats @@ -92,7 +124,9 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { } for _, tx := range tactics { - container.DomainEndpoints[domainEndpoint].Tactics[tx.Tactic.tacticSummaryKey()] = tx + if tx.Tactic != nil { + container.DomainEndpoints[domainEndpoint].Tactics[tx.Tactic.tacticSummaryKey()] = tx + } } kvStore := &kvstore.Memory{} @@ -135,7 +169,7 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { var expect []*httpsDialerTactic idx := 0 for _, entry := range expectTacticsStats { - if entry.CountSuccess <= 0 { + if entry.CountSuccess <= 0 || entry.Tactic == nil { continue // we SHOULD NOT include entries that systematically failed } t := entry.Tactic.Clone() @@ -199,7 +233,7 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { var expect []*httpsDialerTactic idx := 0 for _, entry := range expectTacticsStats { - if entry.CountSuccess <= 0 { + if entry.CountSuccess <= 0 || entry.Tactic == nil { continue // we SHOULD NOT include entries that systematically failed } t := entry.Tactic.Clone() @@ -234,7 +268,9 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { out := make(chan *httpsDialerTactic) go func() { defer close(out) - out <- nil // explicitly send nil on the channel + + // explicitly send nil on the channel + out <- nil }() return out }, @@ -252,7 +288,7 @@ func TestStatsPolicyWorkingAsIntended(t *testing.T) { var expect []*httpsDialerTactic idx := 0 for _, entry := range expectTacticsStats { - if entry.CountSuccess <= 0 { + if entry.CountSuccess <= 0 || entry.Tactic == nil { continue // we SHOULD NOT include entries that systematically failed } t := entry.Tactic.Clone()