Skip to content

Commit

Permalink
Allow user to choose between multiple endpoint lists at runtime (#168)
Browse files Browse the repository at this point in the history
* Add CONFIG_PATH to userdata template

* 1st draft of platformType dataflow (AWS/HCP only)

* Dynamically build absolute path of config

* Docs and defaults

* Addressing style feedback

* --help/docs fixes

* Handle invalid platformType & error msg fmt fixes

* Added platformType registration note to README
  • Loading branch information
abyrne55 authored Apr 25, 2023
1 parent dc06d8b commit c4170ae
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 53 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@ The recommended workflow of diagnostic use of ONV is shown in the following flow
If interested, please fork this repo and create pull requests to the `main` branch.

### Golden AMI
osd-network-verifier depends on these publicly available [AMIs](https://github.com/openshift/osd-network-verifier/blob/612be8c5b0ef8ac01e8018eca15dd143ab31cd1f/pkg/verifier/aws/aws_verifier.go#L25-L45) built from the [osd-network-verifier-golden-ami](https://gitlab.cee.redhat.com/service/osd-network-verifier-golden-ami) repo.
osd-network-verifier depends on these publicly available [AMIs](pkg/verifier/aws/aws_verifier.go#L24-L45) built from the [osd-network-verifier-golden-ami](https://gitlab.cee.redhat.com/service/osd-network-verifier-golden-ami) repo.

Golden AMI provides the following:
- runtime environment setup (such as container engine, configurations, etc)
- runtime environment setup (such as container engine, configurations, etc.)
- building and embedding the validator binary which performs the individual checks to the endpoints

### Egress List
### Egress Lists

This list of essential domains for egress verification should be maintained in [gitlab repo](https://gitlab.cee.redhat.com/service/osd-network-verifier-golden-ami/-/blob/master/build/config/config.yaml).
This lists of essential domains for egress verification should be maintained in the [GitLab repo](https://gitlab.cee.redhat.com/service/osd-network-verifier-golden-ami/-/blob/master/build/config/). Newly-added lists should be registered as "platform types" in [`helpers.go`](pkg/helpers/helpers.go#L46) using the list file's extensionless name as the value (e.g., abc.yaml should be registered as `PlatformABC string = "abc"`). Finally, the `--platform` help message and value handling logic in [`cmd.go`](cmd/egress/cmd.go) should also be updated.

### IAM Permission Requirement List

Expand Down
57 changes: 34 additions & 23 deletions cmd/egress/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/openshift/osd-network-verifier/cmd/utils"
"github.com/openshift/osd-network-verifier/pkg/helpers"
"github.com/openshift/osd-network-verifier/pkg/proxy"
"github.com/openshift/osd-network-verifier/pkg/verifier"
gcpverifier "github.com/openshift/osd-network-verifier/pkg/verifier/gcp"
Expand All @@ -17,12 +18,13 @@ import (
)

var (
awsDefaultTags = map[string]string{"osd-network-verifier": "owned", "red-hat-managed": "true", "Name": "osd-network-verifier"}
gcpDefaultTags = map[string]string{"osd-network-verifier": "owned", "red-hat-managed": "true", "name": "osd-network-verifier"}
awsRegionEnvVarStr = "AWS_REGION"
awsRegionDefault = "us-east-2"
gcpRegionEnvVarStr = "GCP_REGION"
gcpRegionDefault = "us-east1"
awsDefaultTags = map[string]string{"osd-network-verifier": "owned", "red-hat-managed": "true", "Name": "osd-network-verifier"}
gcpDefaultTags = map[string]string{"osd-network-verifier": "owned", "red-hat-managed": "true", "name": "osd-network-verifier"}
awsRegionEnvVarStr = "AWS_REGION"
awsRegionDefault = "us-east-2"
gcpRegionEnvVarStr = "GCP_REGION"
gcpRegionDefault = "us-east1"
platformTypeDefault = helpers.PlatformAWS
)

type egressConfig struct {
Expand All @@ -39,16 +41,16 @@ type egressConfig struct {
httpsProxy string
CaCert string
noTls bool
gcp bool
platformType string
awsProfile string
gcpVpcName string
skipAWSInstanceTermination bool
terminateDebugInstance string
}

func getDefaultRegion(isGCP bool) string {
func getDefaultRegion(platformType string) string {

if isGCP {
if platformType == helpers.PlatformGCP {
//gcp region
dRegion, ok := os.LookupEnv(gcpRegionEnvVarStr)
if !ok {
Expand Down Expand Up @@ -81,7 +83,7 @@ are set correctly before execution.

// Set Region
if config.region == "" {
config.region = getDefaultRegion(config.gcp)
config.region = getDefaultRegion(config.platformType)
}

// Set Up Proxy
Expand Down Expand Up @@ -112,11 +114,12 @@ are set correctly before execution.
Timeout: config.timeout,
Tags: config.cloudTags,
InstanceType: config.instanceType,
PlatformType: config.platformType,
Proxy: p,
}

// AWS workflow
if !config.gcp {
if config.platformType == helpers.PlatformAWS || config.platformType == helpers.PlatformHostedCluster {

if len(vei.Tags) == 0 {
vei.Tags = awsDefaultTags
Expand All @@ -130,7 +133,7 @@ are set correctly before execution.

awsVerifier, err := utils.GetAwsVerifier(config.region, config.awsProfile, config.debug)
if err != nil {
fmt.Printf("could not build awsVerifier %v", err)
fmt.Printf("could not build awsVerifier %v\n", err)
os.Exit(1)
}

Expand All @@ -149,8 +152,11 @@ are set correctly before execution.

awsVerifier.Logger.Info(context.TODO(), "Success")
os.Exit(0)
} else {
// GCP workflow
}

// GCP workflow
if config.platformType == helpers.PlatformGCP {

if len(vei.Tags) == 0 {
vei.Tags = gcpDefaultTags
}
Expand Down Expand Up @@ -180,12 +186,12 @@ are set correctly before execution.
// Tries to find google credentials in all known locations stating with env "GOOGLE_APPLICATION_CREDENTIALS""
creds, err := google.FindDefaultCredentials(context.TODO())
if err != nil {
fmt.Printf("could not find gcp Credentials file %v", err)
fmt.Printf("could not find GCP credentials file: %v\n", err)
os.Exit(1)
}
gcpVerifier, err := gcpverifier.NewGcpVerifier(creds, config.debug)
if err != nil {
fmt.Printf("could not build gcpVerifier %v", err)
fmt.Printf("could not build GcpVerifier: %v\n", err)
os.Exit(1)
}

Expand All @@ -201,13 +207,18 @@ are set correctly before execution.
gcpVerifier.Logger.Info(context.TODO(), "Success")
os.Exit(0)
}

// Unknown platformType specified
fmt.Printf("unknown platform type '%v'\n", config.platformType)
os.Exit(1)
},
}

validateEgressCmd.Flags().StringVar(&config.platformType, "platform", platformTypeDefault, fmt.Sprintf("(optional) infra platform type, which determines which endpoints to test. Either '%v', '%v', or '%v' (hypershift)", helpers.PlatformAWS, helpers.PlatformGCP, helpers.PlatformHostedCluster))
validateEgressCmd.Flags().StringVar(&config.vpcSubnetID, "subnet-id", "", "source subnet ID")
validateEgressCmd.Flags().StringVar(&config.cloudImageID, "image-id", "", "(optional) cloud image for the compute instance")
validateEgressCmd.Flags().StringVar(&config.instanceType, "instance-type", "", "(optional) compute instance type")
validateEgressCmd.Flags().StringVar(&config.securityGroupId, "security-group-id", "", "security group id to attach to the created EC2 instance")
validateEgressCmd.Flags().StringVar(&config.securityGroupId, "security-group-id", "", "security group ID to attach to the created EC2 instance")
validateEgressCmd.Flags().StringVar(&config.region, "region", "", fmt.Sprintf("(optional) compute instance region. If absent, environment var %[1]v = %[2]v and %[3]v = %[4]v will be used", awsRegionEnvVarStr, awsRegionDefault, gcpRegionEnvVarStr, gcpRegionDefault))
validateEgressCmd.Flags().StringToStringVar(&config.cloudTags, "cloud-tags", map[string]string{}, "(optional) comma-seperated list of tags to assign to cloud resources e.g. --cloud-tags key1=value1,key2=value2")
validateEgressCmd.Flags().BoolVar(&config.debug, "debug", false, "(optional) if true, enable additional debug-level logging")
Expand All @@ -216,12 +227,12 @@ are set correctly before execution.
validateEgressCmd.Flags().StringVar(&config.httpProxy, "http-proxy", "", "(optional) http-proxy to be used upon http requests being made by verifier, format: http://user:[email protected]:8978")
validateEgressCmd.Flags().StringVar(&config.httpsProxy, "https-proxy", "", "(optional) https-proxy to be used upon https requests being made by verifier, format: https://user:[email protected]:8978")
validateEgressCmd.Flags().StringVar(&config.CaCert, "cacert", "", "(optional) path to cacert file to be used upon https requests being made by verifier")
validateEgressCmd.Flags().BoolVar(&config.noTls, "no-tls", false, "(optional) if true, ignore all ssl certificate validations on client-side.")
validateEgressCmd.Flags().BoolVar(&config.gcp, "gcp", false, "Set to true if cluster is GCP")
validateEgressCmd.Flags().StringVar(&config.awsProfile, "profile", "", "(optional) AWS profile. If present, any credentials passed with CLI will be ignored.")
validateEgressCmd.Flags().StringVar(&config.gcpVpcName, "vpc-name", "", "(optional) Vpc Name where GCP cluster is installed mandatory if --gcp=True")
validateEgressCmd.Flags().BoolVar(&config.skipAWSInstanceTermination, "skip-termination", false, "(optional) Skip Debug Instance Termination to allow further debug.")
validateEgressCmd.Flags().StringVar(&config.terminateDebugInstance, "terminate-debug", "", "(optional) Takes the debug instance ID and terminates it.")
validateEgressCmd.Flags().BoolVar(&config.noTls, "no-tls", false, "(optional) if true, skip client-side SSL certificate validation")
validateEgressCmd.Flags().StringVar(&config.awsProfile, "profile", "", "(optional) AWS profile. If present, any credentials passed with CLI will be ignored")
validateEgressCmd.Flags().StringVar(&config.gcpVpcName, "vpc-name", "", "(optional unless --platform='gcp') VPC name where GCP cluster is installed")
validateEgressCmd.Flags().BoolVar(&config.skipAWSInstanceTermination, "skip-termination", false, "(optional) Skip instance termination to allow further debugging")
validateEgressCmd.Flags().StringVar(&config.terminateDebugInstance, "terminate-debug", "", "(optional) Takes the debug instance ID and terminates it")

if err := validateEgressCmd.MarkFlagRequired("subnet-id"); err != nil {
validateEgressCmd.PrintErr(err)
}
Expand Down
40 changes: 25 additions & 15 deletions docs/aws/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,30 +128,40 @@ repeat the verification process for each subnet ID.
```
If the image id is not provided, it is defaulted to an image id from [AWS account olm-artifacts-template.yaml](https://github.com/openshift/aws-account-operator/blob/17be7a41036e252d59ab19cc2ad1dcaf265758a2/hack/olm-registry/olm-artifacts-template.yaml#L75),
for the same region where your subnet is.
3. platform: This parameter dictates for which set of endpoints the verifier should test. If testing a subnet that hosts (or will host) a traditional OSD/ROSA cluster, set this to `aws` (or leave blank). If you're instead testing a subnet hosting a HyperShift Hosted Cluster (*not* a hosted control plane/management cluster) on AWS, set this to `hostedcluster`.
5. Execute:
```shell
# using AWS profile
./osd-network-verifier egress --subnet-id $SUBNET_ID --profile $AWS_PROFILE
# using AWS profile on an OSD/ROSA cluster
./osd-network-verifier egress --platform aws --subnet-id $SUBNET_ID --profile $AWS_PROFILE
# using AWS secret
# using AWS secret on a HyperShift hosted cluster
AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \
./osd-network-verifier egress --subnet-id $SUBNET_ID
./osd-network-verifier egress --platform hostedcluster --subnet-id $SUBNET_ID
```
Additional optional flags for overriding defaults:
```shell
--cloud-tags stringToString (optional) comma-seperated list of tags to assign to cloud resources e.g. --cloud-tags key1=value1,key2=value2 (default [osd-network-verifier=owned,red-hat-managed=true,Name=osd-network-verifier])
--debug (optional) if true, enable additional debug-level logging
--image-id string (optional) cloud image for the compute instance
--instance-type string (optional) compute instance type (default "t3.micro")
--kms-key-id string (optional) ID of KMS key used to encrypt root volumes of compute instances. Defaults to cloud account default key
--region string (optional) compute instance region. If absent, environment var AWS_REGION will be used, if set (default "us-east-2")
--profile string (optional) AWS profile. If present, any credentials passed with CLI will be ignored.
--subnet-id string source subnet ID
--timeout duration (optional) timeout for individual egress verification requests (default 2s). If timeout is less than 2s, it would likely cause false negatives test results.
```
```shell
--cacert string (optional) path to cacert file to be used upon https requests being made by verifier
--cloud-tags stringToString (optional) comma-seperated list of tags to assign to cloud resources e.g. --cloud-tags key1=value1,key2=value2 (default [])
--debug (optional) if true, enable additional debug-level logging
--http-proxy string (optional) http-proxy to be used upon http requests being made by verifier, format: http://user:[email protected]:8978
--https-proxy string (optional) https-proxy to be used upon https requests being made by verifier, format: https://user:[email protected]:8978
--image-id string (optional) cloud image for the compute instance
--instance-type string (optional) compute instance type
--kms-key-id string (optional) ID of KMS key used to encrypt root volumes of compute instances. Defaults to cloud account default key
--no-tls (optional) if true, skip client-side SSL certificate validation
--platform string (optional) infra platform type, which determines which endpoints to test. Either 'aws', 'gcp', or 'hostedcluster' (hypershift) (default "aws")
--profile string (optional) AWS profile. If present, any credentials passed with CLI will be ignored
--region string (optional) compute instance region. If absent, environment var AWS_REGION = us-east-2 and GCP_REGION = us-east1 will be used
--security-group-id string security group ID to attach to the created EC2 instance
--skip-termination (optional) Skip instance termination to allow further debugging
--subnet-id string source subnet ID
--terminate-debug string (optional) Takes the debug instance ID and terminates it
--timeout duration (optional) timeout for individual egress verification requests (default 2s)
--vpc-name string (optional unless --platform='gcp') VPC name where GCP cluster is installed
```
Get cli help:
Expand Down
2 changes: 2 additions & 0 deletions examples/aws/verify_egress.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"os"
"time"

"github.com/openshift/osd-network-verifier/pkg/helpers"
"github.com/openshift/osd-network-verifier/pkg/proxy"
"github.com/openshift/osd-network-verifier/pkg/verifier"
awsverifier "github.com/openshift/osd-network-verifier/pkg/verifier/aws"
Expand Down Expand Up @@ -61,6 +62,7 @@ func extendValidateEgress() {
KmsKeyID: "kmskeyID",
SecurityGroupId: "SecurityGroupId",
},
PlatformType: helpers.PlatformAWS,
}

// Call egress function with either gcp or aws client
Expand Down
4 changes: 2 additions & 2 deletions pkg/helpers/config/userdata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ write_files:
echo "Using IMAGE : $IMAGE" >> /var/log/userdata-output
if [[ "${CACERT}" != "" ]]; then
echo "${CACERT}" | base64 --decode > /proxy.pem
sudo docker run -v /proxy.pem:/proxy.pem:Z -e "HTTP_PROXY=${HTTP_PROXY}" -e "HTTPS_PROXY=${HTTPS_PROXY}" --env "AWS_REGION=${AWS_REGION}" -e "START_VERIFIER=${VALIDATOR_START_VERIFIER}" -e "END_VERIFIER=${VALIDATOR_END_VERIFIER}" ${IMAGE} --timeout=${TIMEOUT} --cacert=/proxy.pem --no-tls=${NOTLS} >> /var/log/userdata-output || echo "Failed to successfully run the docker container"
sudo docker run -v /proxy.pem:/proxy.pem:Z -e "HTTP_PROXY=${HTTP_PROXY}" -e "HTTPS_PROXY=${HTTPS_PROXY}" --env "AWS_REGION=${AWS_REGION}" -e "START_VERIFIER=${VALIDATOR_START_VERIFIER}" -e "END_VERIFIER=${VALIDATOR_END_VERIFIER}" ${IMAGE} --timeout=${TIMEOUT} --config=${CONFIG_PATH} --cacert=/proxy.pem --no-tls=${NOTLS} >> /var/log/userdata-output || echo "Failed to successfully run the docker container"
else
sudo docker run --env "AWS_REGION=${AWS_REGION}" -e "HTTP_PROXY=${HTTP_PROXY}" -e "HTTPS_PROXY=${HTTPS_PROXY}" -e "START_VERIFIER=${VALIDATOR_START_VERIFIER}" -e "END_VERIFIER=${VALIDATOR_END_VERIFIER}" ${IMAGE} --timeout=${TIMEOUT} >> /var/log/userdata-output || echo "Failed to successfully run the docker container"
sudo docker run --env "AWS_REGION=${AWS_REGION}" -e "HTTP_PROXY=${HTTP_PROXY}" -e "HTTPS_PROXY=${HTTPS_PROXY}" -e "START_VERIFIER=${VALIDATOR_START_VERIFIER}" -e "END_VERIFIER=${VALIDATOR_END_VERIFIER}" ${IMAGE} --timeout=${TIMEOUT} --config=${CONFIG_PATH} >> /var/log/userdata-output || echo "Failed to successfully run the docker container"
fi
echo "${USERDATA_END}" >> /var/log/userdata-output
runcmd:
Expand Down
7 changes: 7 additions & 0 deletions pkg/helpers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,10 @@ func PollImmediate(interval time.Duration, timeout time.Duration, condition func

return errors.New("timed out waiting for the condition")
}

// Enumerated type representing the platform underlying the cluster-under-test
const (
PlatformAWS string = "aws"
PlatformGCP string = "gcp"
PlatformHostedCluster string = "hostedcluster"
)
13 changes: 13 additions & 0 deletions pkg/verifier/aws/entry_point.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,14 @@ import (
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2Types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
handledErrors "github.com/openshift/osd-network-verifier/pkg/errors"
"github.com/openshift/osd-network-verifier/pkg/helpers"
"github.com/openshift/osd-network-verifier/pkg/output"
"github.com/openshift/osd-network-verifier/pkg/verifier"
)

// Base path of the config file
const CONFIG_PATH_FSTRING string = "/app/build/config/%s.yaml"

// ValidateEgress performs validation process for egress
// Basic workflow is:
// - prepare for ec2 instance creation
Expand All @@ -33,6 +37,14 @@ func (a *AwsVerifier) ValidateEgress(vei verifier.ValidateEgressInput) *output.O
return a.Output.AddError(fmt.Errorf("instance type %s is invalid: %s", vei.InstanceType, err))
}

// Select config file based on platform type
configPath := fmt.Sprintf(CONFIG_PATH_FSTRING, vei.PlatformType)
if vei.PlatformType == "" {
// Default to AWS
configPath = fmt.Sprintf(CONFIG_PATH_FSTRING, helpers.PlatformAWS)
}

// Terminate a debug instance leftover from a previous run
if vei.TerminateDebugInstance != "" {
if err := a.AwsClient.TerminateEC2Instance(vei.Ctx, vei.TerminateDebugInstance); err != nil {
a.Output.AddError(err)
Expand All @@ -56,6 +68,7 @@ func (a *AwsVerifier) ValidateEgress(vei verifier.ValidateEgressInput) *output.O
"NOTLS": strconv.FormatBool(vei.Proxy.NoTls),
"IMAGE": "$IMAGE",
"VALIDATOR_REFERENCE": "$VALIDATOR_REFERENCE",
"CONFIG_PATH": configPath,
"DELAY": "5",
}

Expand Down
18 changes: 9 additions & 9 deletions pkg/verifier/package_verifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ type verifierService interface {
}

type ValidateEgressInput struct {
Timeout time.Duration
Ctx context.Context
SubnetID, CloudImageID, InstanceType string
Proxy proxy.ProxyConfig
Tags map[string]string
AWS AwsEgressConfig
GCP GcpEgressConfig
SkipInstanceTermination bool
TerminateDebugInstance string
Timeout time.Duration
Ctx context.Context
SubnetID, CloudImageID, InstanceType, PlatformType string
Proxy proxy.ProxyConfig
Tags map[string]string
AWS AwsEgressConfig
GCP GcpEgressConfig
SkipInstanceTermination bool
TerminateDebugInstance string
}
type AwsEgressConfig struct {
KmsKeyID, SecurityGroupId string
Expand Down

0 comments on commit c4170ae

Please sign in to comment.