diff --git a/buildenv/envs.go b/buildenv/envs.go index 98a2fd3afe..7199b2f309 100644 --- a/buildenv/envs.go +++ b/buildenv/envs.go @@ -127,12 +127,6 @@ type Environment struct { // in a development or staging environment. MaxBuilds int - // MaxReverseBuilds is the maximum number of concurrent builds - // that can be run for reverse buildlets. Zero means unlimited. - // This is being used to mitigate revdial memory consumption - // problems in issue 31639. - MaxReverseBuilds int - // AutoCertCacheBucket is the GCS bucket to use for the // golang.org/x/crypto/acme/autocert (LetsEncrypt) cache. // If empty, LetsEncrypt isn't used. @@ -291,7 +285,6 @@ var Production = &Environment{ LogBucket: "go-build-log", SnapBucket: "go-build-snap", AutoCertCacheBucket: "farmer-golang-org-autocert-cache", - MaxReverseBuilds: 10, } var Development = &Environment{ diff --git a/cmd/coordinator/coordinator.go b/cmd/coordinator/coordinator.go index 6c310d38b6..5a9a89559b 100644 --- a/cmd/coordinator/coordinator.go +++ b/cmd/coordinator/coordinator.go @@ -448,17 +448,6 @@ func numCurrentBuildsOfType(typ string) (n int) { return } -func numCurrentReverseBuilds() (n int) { - statusMu.Lock() - defer statusMu.Unlock() - for _, bs := range status { - if bs.conf.IsReverse() { - n++ - } - } - return -} - func isBuilding(work buildgo.BuilderRev) bool { statusMu.Lock() defer statusMu.Unlock() @@ -488,10 +477,6 @@ func mayBuildRev(rev buildgo.BuilderRev) bool { if buildEnv.MaxBuilds > 0 && numCurrentBuilds() >= buildEnv.MaxBuilds { return false } - if buildEnv.MaxReverseBuilds > 0 && buildConf.IsReverse() && numCurrentReverseBuilds() >= buildEnv.MaxReverseBuilds { - // Issue 31639. - return false - } if buildConf.MaxAtOnce > 0 && numCurrentBuildsOfType(rev.Name) >= buildConf.MaxAtOnce { return false } diff --git a/cmd/coordinator/reverse.go b/cmd/coordinator/reverse.go index f289b5575d..beba59a00e 100644 --- a/cmd/coordinator/reverse.go +++ b/cmd/coordinator/reverse.go @@ -53,16 +53,31 @@ import ( const minBuildletVersion = 1 -var reversePool = new(reverseBuildletPool) +var reversePool = &reverseBuildletPool{ + oldInUse: make(map[*buildlet.Client]bool), +} + +const maxOldRevdialUsers = 10 type token struct{} type reverseBuildletPool struct { - mu sync.Mutex // guards all fields, including fields of *reverseBuildlet + // mu guards all 4 fields below and also fields of + // *reverseBuildlet in buildlets + mu sync.Mutex + + // buildlets are the currently connected buildlets. // TODO: switch to a map[hostType][]buildlets or map of set. buildlets []*reverseBuildlet - wakeChan map[string]chan token // hostType => best-effort wake-up chan when buildlet free - waiters map[string]int // hostType => number waiters blocked in GetBuildlet + + wakeChan map[string]chan token // hostType => best-effort wake-up chan when buildlet free + + waiters map[string]int // hostType => number waiters blocked in GetBuildlet + + // oldInUse tracks which buildlets with the old revdial code are currently in use. + // These are a liability due to runaway memory issues (Issue 31639) so + // we bound how many can be running at once. Fortunately there aren't many left. + oldInUse map[*buildlet.Client]bool } func (p *reverseBuildletPool) ServeReverseStatusJSON(w http.ResponseWriter, r *http.Request) { @@ -126,9 +141,15 @@ func (p *reverseBuildletPool) tryToGrab(hostType string) (bc *buildlet.Client, b busy++ continue } + if b.isOldRevDial && len(p.oldInUse) >= maxOldRevdialUsers { + continue + } // Found an unused match. b.inUse = true b.inUseTime = time.Now() + if b.isOldRevDial { + p.oldInUse[b.client] = true + } return b.client, 0 } return nil, busy @@ -162,6 +183,7 @@ func (p *reverseBuildletPool) noteBuildletAvailable(hostType string) { func (p *reverseBuildletPool) nukeBuildlet(victim *buildlet.Client) { p.mu.Lock() defer p.mu.Unlock() + delete(p.oldInUse, victim) for i, rb := range p.buildlets { if rb.client == victim { defer rb.conn.Close() @@ -345,10 +367,12 @@ func (p *reverseBuildletPool) WriteHTMLStatus(w io.Writer) { p.mu.Lock() buildlets := append([]*reverseBuildlet(nil), p.buildlets...) sort.Sort(byTypeThenHostname(buildlets)) + numInUse := 0 for _, b := range buildlets { machStatus := "idle" if b.inUse { machStatus = "working" + numInUse++ } fmt.Fprintf(&buf, "