Skip to content

Commit

Permalink
Merge #33435
Browse files Browse the repository at this point in the history
33435: cli/start,server: clarify init vs join r=knz a=knz

Fixes #33302.

Regarding the behavior of `cockroach start --join` upon first
start (before bootstrap).

Prior to this patch:

```
*
* INFO: initial startup completed, will now wait for `cockroach init`
* or a join to a running cluster to start accepting clients.
* Check the log file(s) for progress.
*
```

The emphasis was incorrectly put on the waiting ("will now wait") and
on init vs join. In practice, waiting is uncommon and a join is more
frequent than init.

After this patch:

```
*
* INFO: initial startup completed.
* Node will now attempt to join a running cluster, or wait for `cockroach init`.
* Client connections will be accepted after this completes successfully.
* Check the log file(s) for progress.
*
```

Release note (cli change): The informational message printed upon
`cockroach start --join` when a node is started the first time has
been reworded, to emphasize that joining a running cluster is the
preferred course of action.

Co-authored-by: Raphael 'kena' Poss <[email protected]>
  • Loading branch information
craig[bot] and knz committed Jan 24, 2019
2 parents 295b6ae + b27d10f commit 645c0c9
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 11 deletions.
3 changes: 2 additions & 1 deletion pkg/cli/interactive_tests/test_init_command.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ source [file join [file dirnam $argv0] common.tcl]
system "$argv start --insecure --pid-file=server_pid -s=path=logs/db --listen-addr=localhost --background --join=localhost:26258 >>logs/expect-cmd.log 2>&1"

start_test "Check that the server has informed us and the log file that it was ready before forking off in the background"
system "grep -q 'initial startup completed, will now wait' logs/db/logs/cockroach.log"
system "grep -q 'initial startup completed' logs/db/logs/cockroach.log"
system "grep -q 'will now attempt to join a running cluster, or wait' logs/db/logs/cockroach.log"
end_test

start_test "Check that the SQL shell successfully times out upon connecting to an uninitialized node"
Expand Down
7 changes: 4 additions & 3 deletions pkg/cli/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -504,9 +504,10 @@ func runStart(cmd *cobra.Command, args []string) error {

if waitForInit {
log.Shout(ctx, log.Severity_INFO,
"initial startup completed, will now wait for `cockroach init`\n"+
"or a join to a running cluster to start accepting clients.\n"+
"Check the log file(s) for progress.")
"initial startup completed.\n"+
"Node will now attempt to join a running cluster, or wait for `cockroach init`.\n"+
"Client connections will be accepted after this completes successfully.\n"+
"Check the log file(s) for progress. ")
}

// Ensure the configuration logging is written to disk in case a
Expand Down
7 changes: 4 additions & 3 deletions pkg/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,11 @@ type Config struct {
EventLogEnabled bool

// ReadyFn is called when the server has started listening on its
// sockets. The boolean argument indicates (iff true) that the
// sockets.
// The argument waitForInit indicates (iff true) that the
// server is not bootstrapped yet, will not bootstrap itself and
// will be waiting for an `init` command. This can be used to inform
// the user.
// will be waiting for an `init` command or accept bootstrapping
// from a joined node.
ReadyFn func(waitForInit bool)

// DelayedBootstrapFn is called if the boostrap process does not complete
Expand Down
34 changes: 30 additions & 4 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -1416,21 +1416,47 @@ func (s *Server) Start(ctx context.Context) error {
// We have no existing stores and we've been told to join a cluster. Wait
// for the initServer to bootstrap the cluster or connect to an existing
// one.
if s.cfg.ReadyFn != nil {
s.cfg.ReadyFn(true /*waitForInit*/)
}
log.Info(ctx, "no stores bootstrapped and --join flag specified, awaiting init command.")
//
// TODO(knz): This may need tweaking when #24118 is addressed.

s.stopper.RunWorker(workersCtx, func(context.Context) {
serveOnMux.Do(func() {
netutil.FatalIfUnexpected(m.Serve())
})
})

ready := make(chan struct{})
if s.cfg.ReadyFn != nil {
// s.cfg.ReadyFn must be called in any case because the `start`
// command requires it to signal readiness to a process manager.
//
// However we want to be somewhat precisely informative to the user
// about whether the node is waiting on init / join, or whether
// the join was successful straight away. So we spawn this gogoroutine
// and either:
// - its timer will fire after 2 seconds and we call ReadyFn(true)
// - bootstrap completes earlier and the ready chan gets closed,
// then we call ReadyFn(false).
go func() {
waitForInit := false
tm := time.After(2 * time.Second)
select {
case <-tm:
waitForInit = true
case <-ready:
}
s.cfg.ReadyFn(waitForInit)
}()
}

log.Info(ctx, "no stores bootstrapped and --join flag specified, awaiting init command or join with an already initialized node.")

initRes, err := s.initServer.awaitBootstrap()
close(ready)
if err != nil {
return err
}

doBootstrap = initRes == needBootstrap
if doBootstrap {
if err := s.bootstrapCluster(ctx); err != nil {
Expand Down

0 comments on commit 645c0c9

Please sign in to comment.