Skip to content

Commit

Permalink
cmd/coordinator: only bound old revdial builds
Browse files Browse the repository at this point in the history
Revert CL 173517 and replace with a similar but different mechanism.

Now that the new revdial is out, only penalize the old revdial users
(a few stragglers who haven't updated). In practice the limit of 10 at
once won't be a problem but will protect the coordinator during submit
floods.

Fixes golang/go#31639

Change-Id: I6b6c3567205fdd98e0b80def96d75827e986fe4f
Reviewed-on: https://go-review.googlesource.com/c/build/+/174325
Reviewed-by: Dmitri Shuralyov <[email protected]>
  • Loading branch information
bradfitz committed Apr 29, 2019
1 parent 4f0f4bb commit 61a567e
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 35 deletions.
7 changes: 0 additions & 7 deletions buildenv/envs.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,6 @@ type Environment struct {
// in a development or staging environment.
MaxBuilds int

// MaxReverseBuilds is the maximum number of concurrent builds
// that can be run for reverse buildlets. Zero means unlimited.
// This is being used to mitigate revdial memory consumption
// problems in issue 31639.
MaxReverseBuilds int

// AutoCertCacheBucket is the GCS bucket to use for the
// golang.org/x/crypto/acme/autocert (LetsEncrypt) cache.
// If empty, LetsEncrypt isn't used.
Expand Down Expand Up @@ -291,7 +285,6 @@ var Production = &Environment{
LogBucket: "go-build-log",
SnapBucket: "go-build-snap",
AutoCertCacheBucket: "farmer-golang-org-autocert-cache",
MaxReverseBuilds: 10,
}

var Development = &Environment{
Expand Down
15 changes: 0 additions & 15 deletions cmd/coordinator/coordinator.go
Original file line number Diff line number Diff line change
Expand Up @@ -448,17 +448,6 @@ func numCurrentBuildsOfType(typ string) (n int) {
return
}

func numCurrentReverseBuilds() (n int) {
statusMu.Lock()
defer statusMu.Unlock()
for _, bs := range status {
if bs.conf.IsReverse() {
n++
}
}
return
}

func isBuilding(work buildgo.BuilderRev) bool {
statusMu.Lock()
defer statusMu.Unlock()
Expand Down Expand Up @@ -488,10 +477,6 @@ func mayBuildRev(rev buildgo.BuilderRev) bool {
if buildEnv.MaxBuilds > 0 && numCurrentBuilds() >= buildEnv.MaxBuilds {
return false
}
if buildEnv.MaxReverseBuilds > 0 && buildConf.IsReverse() && numCurrentReverseBuilds() >= buildEnv.MaxReverseBuilds {
// Issue 31639.
return false
}
if buildConf.MaxAtOnce > 0 && numCurrentBuildsOfType(rev.Name) >= buildConf.MaxAtOnce {
return false
}
Expand Down
60 changes: 47 additions & 13 deletions cmd/coordinator/reverse.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,16 +53,31 @@ import (

const minBuildletVersion = 1

var reversePool = new(reverseBuildletPool)
var reversePool = &reverseBuildletPool{
oldInUse: make(map[*buildlet.Client]bool),
}

const maxOldRevdialUsers = 10

type token struct{}

type reverseBuildletPool struct {
mu sync.Mutex // guards all fields, including fields of *reverseBuildlet
// mu guards all 4 fields below and also fields of
// *reverseBuildlet in buildlets
mu sync.Mutex

// buildlets are the currently connected buildlets.
// TODO: switch to a map[hostType][]buildlets or map of set.
buildlets []*reverseBuildlet
wakeChan map[string]chan token // hostType => best-effort wake-up chan when buildlet free
waiters map[string]int // hostType => number waiters blocked in GetBuildlet

wakeChan map[string]chan token // hostType => best-effort wake-up chan when buildlet free

waiters map[string]int // hostType => number waiters blocked in GetBuildlet

// oldInUse tracks which buildlets with the old revdial code are currently in use.
// These are a liability due to runaway memory issues (Issue 31639) so
// we bound how many can be running at once. Fortunately there aren't many left.
oldInUse map[*buildlet.Client]bool
}

func (p *reverseBuildletPool) ServeReverseStatusJSON(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -126,9 +141,15 @@ func (p *reverseBuildletPool) tryToGrab(hostType string) (bc *buildlet.Client, b
busy++
continue
}
if b.isOldRevDial && len(p.oldInUse) >= maxOldRevdialUsers {
continue
}
// Found an unused match.
b.inUse = true
b.inUseTime = time.Now()
if b.isOldRevDial {
p.oldInUse[b.client] = true
}
return b.client, 0
}
return nil, busy
Expand Down Expand Up @@ -162,6 +183,7 @@ func (p *reverseBuildletPool) noteBuildletAvailable(hostType string) {
func (p *reverseBuildletPool) nukeBuildlet(victim *buildlet.Client) {
p.mu.Lock()
defer p.mu.Unlock()
delete(p.oldInUse, victim)
for i, rb := range p.buildlets {
if rb.client == victim {
defer rb.conn.Close()
Expand Down Expand Up @@ -345,10 +367,12 @@ func (p *reverseBuildletPool) WriteHTMLStatus(w io.Writer) {
p.mu.Lock()
buildlets := append([]*reverseBuildlet(nil), p.buildlets...)
sort.Sort(byTypeThenHostname(buildlets))
numInUse := 0
for _, b := range buildlets {
machStatus := "<i>idle</i>"
if b.inUse {
machStatus = "working"
numInUse++
}
fmt.Fprintf(&buf, "<li>%s (%s) version %s, %s: connected %s, %s for %s</li>\n",
b.hostname,
Expand All @@ -363,6 +387,8 @@ func (p *reverseBuildletPool) WriteHTMLStatus(w io.Writer) {
inUse[b.hostType]++
}
}
numOldInUse := len(p.oldInUse)
numConnected := len(buildlets)
p.mu.Unlock()

var typs []string
Expand All @@ -371,7 +397,13 @@ func (p *reverseBuildletPool) WriteHTMLStatus(w io.Writer) {
}
sort.Strings(typs)

io.WriteString(w, "<b>Reverse pool summary</b> (in use / total)<ul>")
io.WriteString(w, "<b>Reverse pool stats</b><ul>")
fmt.Fprintf(w, "<li>Buildlets connected: %d</li>\n", numConnected)
fmt.Fprintf(w, "<li>Buildlets in use: %d</li>\n", numInUse)
fmt.Fprintf(w, "<li>Old revdial buildlets in use: %d</li>\n", numOldInUse)
io.WriteString(w, "</ul>")

io.WriteString(w, "<b>Reverse pool by host type</b> (in use / total)<ul>")
if len(typs) == 0 {
io.WriteString(w, "<li>no connections</li>")
}
Expand Down Expand Up @@ -450,7 +482,8 @@ type reverseBuildlet struct {
// It doesn't have to be a complete DNS name.
hostname string
// version is the reverse buildlet's version.
version string
version string
isOldRevDial bool // version 22 or under: using the v1 revdial package (Issue 31639)

// sessRand is the unique random number for every unique buildlet session.
sessRand string
Expand Down Expand Up @@ -609,13 +642,14 @@ func handleReverse(w http.ResponseWriter, r *http.Request) {

now := time.Now()
b := &reverseBuildlet{
hostname: hostname,
version: buildletVersion,
hostType: hostType,
client: client,
conn: conn,
inUseTime: now,
regTime: now,
hostname: hostname,
version: buildletVersion,
isOldRevDial: status.Version < 23,
hostType: hostType,
client: client,
conn: conn,
inUseTime: now,
regTime: now,
}
reversePool.addBuildlet(b)
registerBuildlet(modes) // testing only
Expand Down

0 comments on commit 61a567e

Please sign in to comment.