Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Agents platform and new CLI #104

Merged
merged 39 commits into from
Apr 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
eb4da9b
Refactor of agents: new snapshot sore client, task manager, and agent…
gdiazlo Mar 29, 2019
8a610d9
Redesign gossip package to refine the API
gdiazlo Apr 7, 2019
40d10bb
Redesign cli flag parsing. Use new library to generate options from c…
gdiazlo Apr 7, 2019
6ec665d
Update scripts to launch qed components
gdiazlo Apr 7, 2019
58844f9
Objects with start / stop should not block
gdiazlo Apr 7, 2019
8729a04
Include sender into the server package.
gdiazlo Apr 7, 2019
066b569
Server start() does not block. Use stop() to shutitdown
gdiazlo Apr 7, 2019
4810039
Change internal server error by bad request errors.
gdiazlo Apr 7, 2019
542d9f0
Remove metadata from BatchSnapshots as it is now in gossip.Message
gdiazlo Apr 7, 2019
940a34b
Comment old testing code until it is refactored with the new gossip api
gdiazlo Apr 7, 2019
59df917
Add documentation to config options for the cli
gdiazlo Apr 8, 2019
8c11b0a
Remove old files
gdiazlo Apr 8, 2019
2a256fd
Mark event as required option for qed client add subcommand
gdiazlo Apr 8, 2019
d6a8b68
Add publisher agent
gdiazlo Apr 8, 2019
dfc0417
Move test service to tests folder
gdiazlo Apr 8, 2019
2a5a133
Update startup scripts to support new cli options
gdiazlo Apr 8, 2019
e86fec6
MOve test_service to notifierstore in testutils
gdiazlo Apr 9, 2019
d36dc3d
Update start scripts with new options
gdiazlo Apr 9, 2019
42a152f
Add default configurations for simple implementations of notifier, st…
gdiazlo Apr 9, 2019
9161b1f
Update comments
gdiazlo Apr 9, 2019
397d95d
Add error to message bus publish api
gdiazlo Apr 9, 2019
cf1b2a0
Check for tasks errors in batchProcessor
gdiazlo Apr 9, 2019
4f3ad48
REmove old test_service e2e implementation
gdiazlo Apr 9, 2019
d63a6c1
Increase batch size to 500, as the qed log performance has increased.
gdiazlo Apr 9, 2019
db80337
remove old agents code
gdiazlo Apr 10, 2019
e3c511b
Fix version checking in balloon consistency query
gdiazlo Apr 10, 2019
5a400d2
Fix default endpoint in client config
gdiazlo Apr 10, 2019
e3ca384
Configure QED client for monitor agent needs
gdiazlo Apr 10, 2019
b004e3c
Remove unneeded debug messages
gdiazlo Apr 10, 2019
f973299
Add var to allow cflags usage in go runs inside the script
gdiazlo Apr 10, 2019
87a9b0f
remove old e2d code
gdiazlo Apr 10, 2019
a089e95
Handle cancel function in metrics server shutdown
gdiazlo Apr 10, 2019
8e1e874
Update tests to reflect latest changes
gdiazlo Apr 10, 2019
3348034
increase timeout in endpoint test
gdiazlo Apr 10, 2019
ec66aac
AWS deploy: add panel with disk throughput in grafana dashboard
panchoh Apr 10, 2019
8f967d0
Fix: server and agents deployment on AWS without config file
Apr 11, 2019
d08fad8
register taskFactory metrics in agent metrics server if it present
gdiazlo Apr 11, 2019
8fb9d65
Fix: prometheus agent metrics port
Apr 11, 2019
85b4389
Fix: use metrics counters
gdiazlo Apr 11, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ RUN mv /tmp/qed/c-deps/* c-deps/
# Build QED, Storage binary and riot
RUN go build -o /usr/local/bin/qed &&\
go build -o /usr/local/bin/riot tests/riot.go &&\
go build -o /usr/local/bin/storage tests/gossip/test_service.go
go build -o /usr/local/bin/storage testutils/notifierstore.go

# Clean
RUN rm -rf /var/lib/apt/lists/* /tmp/qed
4 changes: 2 additions & 2 deletions api/apihttp/apihttp.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,13 @@ func Incremental(balloon raftwal.RaftBalloonApi) http.HandlerFunc {
// Wait for the response
proof, err := balloon.QueryConsistency(request.Start, request.End)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}

out, err := json.Marshal(protocol.ToIncrementalResponse(proof))
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
http.Error(w, err.Error(), http.StatusBadRequest)
return
}

Expand Down
5 changes: 1 addition & 4 deletions balloon/balloon.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,7 @@ func (b Balloon) QueryConsistency(start, end uint64) (*IncrementalProof, error)
stats.AddFloat("QueryConsistency", 1)
var proof IncrementalProof

if start >= b.version ||
end >= b.version ||
start >= end {

if start >= b.version || end >= b.version || start > end {
return nil, errors.New("unable to process proof from history tree: invalid range")
}

Expand Down
38 changes: 21 additions & 17 deletions client/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,52 +89,56 @@ const (

// Config sets the HTTP client configuration
type Config struct {
// Log level
Log string `desc:"Set log level to info, error or debug"`

// Endpoints [host:port,host:port,...] to ask for QED cluster-topology.
Endpoints []string
Endpoints []string `desc:"REST QED Log service endpoint list http://ip1:port1,http://ip2:port2... "`

// ApiKey to query the server endpoint.
APIKey string
APIKey string `desc:"Set API Key to talk to QED Log service"`

// Insecure enables the verification of the server's certificate chain
// and host name, allowing MiTM vector attacks.
Insecure bool
Insecure bool `desc:"Set it to true to disable the verification of the server's certificate chain"`

// Timeout is the number of seconds to wait for a request to QED.
Timeout time.Duration
// Timeout is the time to wait for a request to QED.
Timeout time.Duration `desc:"Time to wait for a request to QED"`

// DialTimeout is the number of seconds to wait for the connection to be established.
DialTimeout time.Duration
// DialTimeout is the time to wait for the connection to be established.
DialTimeout time.Duration `desc:"Time to wait for the connection to be established"`

// HandshakeTimeout is the number of seconds to wait for a handshake negotiation.
HandshakeTimeout time.Duration
// HandshakeTimeout is the time to wait for a handshake negotiation.
HandshakeTimeout time.Duration `desc:"Time to wait for a handshake negotiation"`

// Controls how the client will route all queries to members of the cluster.
ReadPreference ReadPref
ReadPreference ReadPref `flag:"-"`

// MaxRetries sets the maximum number of retries before giving up
// when performing an HTTP request to QED.
MaxRetries int
MaxRetries int `desc:"Sets the maximum number of retries before giving up"`

// EnableTopologyDiscovery enables the process of discovering the cluster
// topology when requests fail.
EnableTopologyDiscovery bool
EnableTopologyDiscovery bool `desc:"Enables the process of discovering the cluster topology when requests fail"`

// EnableHealthChecks enables helthchecks of all endpoints in the current cluster topology.
EnableHealthChecks bool
EnableHealthChecks bool `desc:"Enables helthchecks of all endpoints in the current cluster topology"`

// HealthCheckTimeout is the timeout in seconds the healthcheck waits for a response
// HealthCheckTimeout is the time the healthcheck waits for a response
// from a QED server.
HealthCheckTimeout time.Duration

HealthCheckTimeout time.Duration `desc:"Time the healthcheck waits for a response from QED"`

// AttemptToReviveEndpoints sets if dead endpoints will be marked alive again after a
// round-robin round. This way, they will be picked up in the next try.
AttemptToReviveEndpoints bool
AttemptToReviveEndpoints bool `desc:"Set if dead endpoints will be marked alive again after a round-robin round"`
}

// DefaultConfig creates a Config structures with default values.
func DefaultConfig() *Config {
return &Config{
Endpoints: []string{"127.0.0.1:8800"},
Endpoints: []string{"http://127.0.0.1:8800"},
APIKey: "my-key",
Insecure: DefaultInsecure,
Timeout: DefaultTimeout,
Expand Down
95 changes: 32 additions & 63 deletions cmd/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,76 +17,45 @@
package cmd

import (
"regexp"

"github.com/spf13/cobra"
v "github.com/spf13/viper"
"context"

"github.com/bbva/qed/gossip"
"github.com/bbva/qed/log"
"github.com/octago/sflags/gen/gpflag"
"github.com/spf13/cobra"
)

func newAgentCommand(cmdCtx *cmdContext, args []string) *cobra.Command {

config := gossip.DefaultConfig()

cmd := &cobra.Command{
Use: "agent",
Short: "Start a gossip agent for the verifiable log QED",
}

f := cmd.PersistentFlags()
f.StringVar(&config.NodeName, "node", "", "Unique name for node. If not set, fallback to hostname")
f.StringVar(&config.BindAddr, "bind", "", "Bind address for TCP/UDP gossip on (host:port)")
f.StringVar(&config.AdvertiseAddr, "advertise", "", "Address to advertise to cluster")
f.StringVar(&config.MetricsAddr, "metrics", "", "Address to bind metrics endpoint")
f.StringSliceVar(&config.StartJoin, "join", []string{}, "Comma-delimited list of nodes ([host]:port), through which a cluster can be joined")
f.StringSliceVar(&config.AlertsUrls, "alertsUrls", []string{}, "Comma-delimited list of Alert servers ([host]:port), through which an agent can post alerts")

// Lookups
v.BindPFlag("agent.node", f.Lookup("node"))
v.BindPFlag("agent.bind", f.Lookup("bind"))
v.BindPFlag("agent.advertise", f.Lookup("advertise"))
v.BindPFlag("agent.metrics", f.Lookup("metrics"))
v.BindPFlag("agent.join", f.Lookup("join"))
v.BindPFlag("agent.alerts_urls", f.Lookup("alertsUrls"))

agentPreRun := func(config gossip.Config) gossip.Config {
config.EnableCompression = true
config.NodeName = v.GetString("agent.node")
config.BindAddr = v.GetString("agent.bind")
config.AdvertiseAddr = v.GetString("agent.advertise")
config.MetricsAddr = v.GetString("agent.metrics")
config.StartJoin = v.GetStringSlice("agent.join")
config.AlertsUrls = v.GetStringSlice("agent.alerts_urls")

markStringRequired(config.NodeName, "node")
markStringRequired(config.BindAddr, "bind")
markSliceStringRequired(config.StartJoin, "join")
markSliceStringRequired(config.AlertsUrls, "alertsUrls")

return config
}

var kind string
re := regexp.MustCompile("^monitor$|^auditor$|^publisher$")
for _, arg := range args {
if re.MatchString(arg) {
kind = arg
break
}
}
var agentCmd *cobra.Command = &cobra.Command{
Use: "agent",
Short: "Provides access to the QED gossip agents",
Long: `QED provides standalone agents to help maintain QED security. We have included
three agents into the distribution:
* Monitor agent: checks the lag of the system between the QED Log and the
Snapshot Store as seen by the gossip network
* Auditor agent: verifies QED membership proofs of the snapshots received
throught the gossip network
* Publisher agent: publish snapshots to the snapshot store`,
TraverseChildren: true,
}

switch kind {
case "publisher":
cmd.AddCommand(newAgentPublisherCommand(cmdCtx, *config, agentPreRun))
var agentCtx context.Context = configAgent()

case "auditor":
cmd.AddCommand(newAgentAuditorCommand(cmdCtx, *config, agentPreRun))
func init() {
agentCmd.MarkFlagRequired("bind-addr")
agentCmd.MarkFlagRequired("metrics-addr")
agentCmd.MarkFlagRequired("node-name")
agentCmd.MarkFlagRequired("role")
agentCmd.MarkFlagRequired("log")
Root.AddCommand(agentCmd)
}

case "monitor":
cmd.AddCommand(newAgentMonitorCommand(cmdCtx, *config, agentPreRun))
func configAgent() context.Context {
conf := gossip.DefaultConfig()
err := gpflag.ParseTo(conf, agentCmd.PersistentFlags())
if err != nil {
log.Fatalf("err: %v", err)
}

return cmd

return context.WithValue(Ctx, k("agent.config"), conf)
}

Loading