Skip to content

Commit

Permalink
Merge #36418
Browse files Browse the repository at this point in the history
36418:  roachprod: create terraform-based API resource management for AWS r=ajwerner a=ajwerner

In order to run a roachprod cluster in AWS one needs to configure vms with
resources which exist on a per-region or per-availability zone basis.

Each region requires a VPC with security groups, AMIs and subnets which defines
the availability zone within the region. Furthermore, each region's VPC must be
configured with peering to each other VPC.

Currently this is set up just for 3 regions (us-east-2, us-west-2, eu-west-2).
Using additional regions requires setting up the required resources manually
and then connecting them through a variety of less than ergonomic flags.

In gcloud we have a different story; users can just specify a list of
availability zones in which to place the instances. This could yet be made
better by providing an easy way to distribute nodes within regions over
availability zones (see #36400).

This PR creates the machinery to set up AWS for convenient use with arbitrary
regions.

The basic architecture is that there's a collection of terraform files under
`cmd/roachprod/vm/aws/terraform` which defines resources and modules to manage
the above described required AWS resources.

The `main.tf` for the terraform project is generated by a go program
`terraformgen` which allows users to specify the account number, resource name
prefix and the set of regions. The templating is especially important because
of the need for pair-wise peerings between each region. The terraform project
is set up to output the relevant IDs which, with the combination of the the
`--json` flag will be used to produce an artifact which roachprod will consume.

The second commit adopts this information from the first by setting up `go:generate` directives to:

  1. Generate the terraform `main.tf` which is used to generate `config.json`.
  2. Generate the `embedded.go` bindata file to contain the default json value.

The commit then defines the `awsConfig` struct to usefully expose the
information produced by terraform to create instances. Users can override the
embedded default configuration by providing similarly formatted json in a file
passed by path to the '--aws-config` flag.


Co-authored-by: Andrew Werner <[email protected]>
  • Loading branch information
craig[bot] and ajwerner committed Apr 3, 2019
2 parents 12b0114 + 9563efa commit c6df752
Show file tree
Hide file tree
Showing 14 changed files with 2,664 additions and 140 deletions.
5 changes: 3 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ dupl: bin/.bootstrap

.PHONY: generate
generate: ## Regenerate generated code.
generate: protobuf $(DOCGEN_TARGETS) $(EXECGEN_TARGETS) $(OPTGEN_TARGETS) $(SQLPARSER_TARGETS) $(SETTINGS_DOC_PAGE) bin/langgen
generate: protobuf $(DOCGEN_TARGETS) $(EXECGEN_TARGETS) $(OPTGEN_TARGETS) $(SQLPARSER_TARGETS) $(SETTINGS_DOC_PAGE) bin/langgen bin/terraformgen
$(GO) generate $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LINKFLAGS)' $(PKG)

lint lintshort: TESTTIMEOUT := $(LINTTIMEOUT)
Expand Down Expand Up @@ -1487,6 +1487,7 @@ bins = \
bin/docgen \
bin/execgen \
bin/generate-binary \
bin/terraformgen \
bin/github-post \
bin/github-pull-request-make \
bin/gossipsim \
Expand Down Expand Up @@ -1517,7 +1518,7 @@ optgen-package = ./pkg/sql/opt/optgen/cmd/optgen
logictest-package = ./pkg/sql/logictest
logictestccl-package = ./pkg/ccl/logictestccl
logictestopt-package = ./pkg/sql/opt/exec/execbuilder

terraformgen-package = ./pkg/cmd/roachprod/vm/aws/terraformgen
logictest-bins := bin/logictest bin/logictestopt bin/logictestccl

# Additional dependencies for binaries that depend on generated code.
Expand Down
1 change: 1 addition & 0 deletions pkg/cmd/roachprod/vm/aws/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
embedded.go -diff
166 changes: 64 additions & 102 deletions pkg/cmd/roachprod/vm/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package aws

import (
"encoding/json"
"fmt"
"log"
"math"
Expand Down Expand Up @@ -63,17 +64,18 @@ func init() {
} else {
p = flagstub.New(p, unimplemented)
}

vm.Providers[ProviderName] = p
}

// providerOpts implements the vm.ProviderFlags interface for aws.Provider.
type providerOpts struct {
Profile string
AMI []string
Profile string
Config *awsConfig
Zones []string

MachineType string
SecurityGroups []string
SSDMachineType string
Subnets []string
RemoteUserName string
EBSVolumeType string
EBSVolumeSize int
Expand All @@ -85,21 +87,32 @@ const (
defaultMachineType = "m5.xlarge"
)

var defaultConfig = func() (cfg *awsConfig) {
cfg = new(awsConfig)
if err := json.Unmarshal(MustAsset("config.json"), cfg); err != nil {
panic(errors.Wrap(err, "failed to embedded configuration"))
}
return cfg
}()

var defaultZones = []string{
"us-east-2a",
"us-east-2b",
"us-east-2c",
"us-west-2a",
"us-west-2b",
"us-west-2c",
"eu-west-2a",
"eu-west-2b",
"eu-west-2c",
}

// ConfigureCreateFlags is part of the vm.ProviderFlags interface.
// This method sets up a lot of maps between the various EC2
// regions and the ids of the things we want to use there. This is
// somewhat complicated because different EC2 regions may as well
// be parallel universes.
func (o *providerOpts) ConfigureCreateFlags(flags *pflag.FlagSet) {
// You can find AMI ids here https://cloud-images.ubuntu.com/locator/ec2/
// Ubuntu Server 16.04 LTS (HVM), SSD Volume Type
flags.StringSliceVar(&o.AMI, ProviderName+"-ami",
[]string{
"us-east-2:ami-965e6bf3",
"us-west-2:ami-79873901",
"eu-west-2:ami-941e04f0",
},
"AMI images for each region")

// m5.xlarge is a 4core, 16Gb instance, approximately equal to a GCE n1-standard-4
flags.StringVar(&o.MachineType, ProviderName+"-machine-type", defaultMachineType,
Expand All @@ -110,30 +123,6 @@ func (o *providerOpts) ConfigureCreateFlags(flags *pflag.FlagSet) {
flags.StringVar(&o.SSDMachineType, ProviderName+"-machine-type-ssd", defaultSSDMachineType,
"Machine type for --local-ssd (see https://aws.amazon.com/ec2/instance-types/)")

// The subnet actually controls placement into a particular AZ
flags.StringSliceVar(&o.Subnets, ProviderName+"-subnet",
[]string{
// m5 machines not yet available in us-east-2a.
// "us-east-2a:subnet-3ea05c57",
"us-east-2b:subnet-49170331",
"us-east-2c:subnet-46c7f20c",
"us-west-2a:subnet-0ffd1c2a34c9231ca",
"us-west-2b:subnet-0e6c3c944d64cdcaf",
"us-west-2c:subnet-0987b45308598f96a",
"eu-west-2a:subnet-056b3d8c21c5ea593",
"eu-west-2b:subnet-018fa0ae185054048",
"eu-west-2c:subnet-0678178e17d36f556",
},
"Subnet id for zones in each region")

// Set up a roachprod security group in each region
flags.StringSliceVar(&o.SecurityGroups, ProviderName+"-sg",
[]string{
"us-east-2:sg-06a4c809644e32920",
"us-west-2:sg-03548a0ccc7870601",
"eu-west-2:sg-0ebb21d61843dd82f"},
"Security group id in each region")

// AWS images generally use "ubuntu" or "ec2-user"
flags.StringVar(&o.RemoteUserName, ProviderName+"-user",
"ubuntu", "Name of the remote user to SSH as")
Expand All @@ -145,12 +134,19 @@ func (o *providerOpts) ConfigureCreateFlags(flags *pflag.FlagSet) {
flags.IntVar(&o.EBSProvisionedIOPs, ProviderName+"-ebs-iops",
1000, "Number of IOPs to provision, only used if "+ProviderName+
"-ebs-volume-type=io1")

}

func (o *providerOpts) ConfigureClusterFlags(flags *pflag.FlagSet) {
profile := os.Getenv("AWS_DEFAULT_PROFILE") // "" if unset
flags.StringVar(&o.Profile, ProviderName+"-profile", profile,
"Profile to manage cluster in")
configFlagVal := awsConfigValue{awsConfig: *defaultConfig}
o.Config = &configFlagVal.awsConfig
flags.Var(&configFlagVal, ProviderName+"-config",
"Path to json for aws configuration, defaults to predefined confiruation")
flags.StringSliceVar(&o.Zones, ProviderName+"-zones", defaultZones,
"aws availability zones")
}

// Provider implements the vm.Provider interface for AWS.
Expand Down Expand Up @@ -424,46 +420,38 @@ func (p *Provider) Name() string {

// allRegions returns the regions that have been configured with
// AMI and SecurityGroup instances.
func (p *Provider) allRegions() ([]string, error) {
// We're using an ordered list instead of a map here to guarantee
// the same ordering between calls.
regionList, err := orderedKeyList(p.opts.AMI)
if err != nil {
return nil, err
}

securityMap, err := splitMap(p.opts.SecurityGroups)
if err != nil {
return nil, err
}

var keys []string
for _, region := range regionList {
if _, ok := securityMap[region]; ok {
keys = append(keys, region)
} else {
log.Printf("ignoring region %s because it has no associated SecurityGroup", region)
func (p *Provider) allRegions() (regions []string, err error) {
byName := make(map[string]struct{})
for _, z := range p.opts.Zones {
az := p.opts.Config.getAvailabilityZone(z)
if az == nil {
return nil, fmt.Errorf("unknown availability zone %v, please provide a "+
"correct value or update your config accordingly", z)
}
if _, have := byName[az.region.Name]; !have {
byName[az.region.Name] = struct{}{}
regions = append(regions, az.region.Name)
}
}
return keys, nil
return regions, nil
}

// allZones returns all AWS availability zones which have been correctly
// configured within the given region.
func (p *Provider) allZones(region string) ([]string, error) {
subnetMap, err := splitMap(p.opts.Subnets)
if err != nil {
return nil, err
}

var ret []string
for zone := range subnetMap {
if strings.Index(zone, region) == 0 && len(zone) == len(region)+1 {
ret = append(ret, zone)
func (p *Provider) allZones(region string) (zones []string, _ error) {
r := p.opts.Config.getRegion(region)
if r == nil {
return nil, fmt.Errorf("region %s not found", region)
}
for _, z := range p.opts.Zones {
for _, az := range r.AvailabilityZones {
if az.name == z {
zones = append(zones, z)
break
}
}
}

return ret, nil
return zones, nil
}

// listRegion extracts the roachprod-managed instances in the
Expand Down Expand Up @@ -584,18 +572,10 @@ func (p *Provider) runInstance(name string, zone string, opts vm.CreateOpts) err
"machine type when --local-ssd=false")
}

region, err := zoneToRegion(zone)
if err != nil {
return err
}

amiMap, err := splitMap(p.opts.AMI)
if err != nil {
return err
}
amiID, ok := amiMap[region]
az, ok := p.opts.Config.azByName[zone]
if !ok {
return errors.Errorf("could not find an AMI image id for region %s", region)
return fmt.Errorf("no region in %v corresponds to availability zone %v",
p.opts.Config.regionNames(), zone)
}

keyName, err := p.sshKeyName()
Expand All @@ -610,24 +590,6 @@ func (p *Provider) runInstance(name string, zone string, opts vm.CreateOpts) err
machineType = p.opts.MachineType
}

sgMap, err := splitMap(p.opts.SecurityGroups)
if err != nil {
return err
}
sgID, ok := sgMap[region]
if !ok {
return errors.Errorf("could not find a security group id for region %s", region)
}

subnetMap, err := splitMap(p.opts.Subnets)
if err != nil {
return err
}
subnetID, ok := subnetMap[zone]
if !ok {
return errors.Errorf("could not find a subnet id for zone %s", zone)
}

// We avoid the need to make a second call to set the tags by jamming
// all of our metadata into the TagSpec.
tagSpecs := fmt.Sprintf(
Expand Down Expand Up @@ -667,12 +629,12 @@ func (p *Provider) runInstance(name string, zone string, opts vm.CreateOpts) err
"ec2", "run-instances",
"--associate-public-ip-address",
"--count", "1",
"--image-id", amiID,
"--image-id", az.region.AMI,
"--instance-type", machineType,
"--key-name", keyName,
"--region", region,
"--security-group-ids", sgID,
"--subnet-id", subnetID,
"--region", az.region.Name,
"--security-group-ids", az.region.SecurityGroup,
"--subnet-id", az.subnetID,
"--tag-specifications", tagSpecs,
"--user-data", "file://" + filename,
}
Expand Down
Loading

0 comments on commit c6df752

Please sign in to comment.