diff --git a/.cirrus.yml b/.cirrus.yml index 7d14c7fc..fd1a778b 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -9,7 +9,7 @@ env: # No need to go crazy, but grab enough to cover most PRs CIRRUS_CLONE_DEPTH: 50 # Version of packer to use when building images - PACKER_VERSION: &PACKER_VERSION "1.8.0" + PACKER_VERSION: &PACKER_VERSION "1.8.3" # Unique suffix label to use for all images produced by _this_ run (build) IMG_SFX: "${CIRRUS_BUILD_ID}" @@ -60,8 +60,7 @@ image_builder_task: # Google Application Credentials (JSON) with access to create VM images GAC_JSON: ENCRYPTED[7fba7fb26ab568ae39f799ab58a476123206576b0135b3d1019117c6d682391370c801e149f29324ff4b50133012aed9] AWS_SHARED_CREDENTIALS_FILE: notused - - script: "ci/make_image_builder.sh" + script: "ci/make.sh image_builder" manifest_artifacts: path: image_builder/manifest.json type: application/json @@ -170,9 +169,9 @@ base_images_task: env: PACKER_BUILDS: "ubuntu" env: - GAC_JSON: ENCRYPTED[7fba7fb26ab568ae39f799ab58a476123206576b0135b3d1019117c6d682391370c801e149f29324ff4b50133012aed9] - AWS_INI: ENCRYPTED[4cd69097cd29a9899e51acf3bbacceeb83cb5c907d272ca1e2a8ccd515b03f2368a0680870c0d120fc32bc578bb0a930] - script: "ci/make_base_images.sh" + GAC_JSON: &gac_json ENCRYPTED[7fba7fb26ab568ae39f799ab58a476123206576b0135b3d1019117c6d682391370c801e149f29324ff4b50133012aed9] + AWS_INI: &aws_ini ENCRYPTED[4cd69097cd29a9899e51acf3bbacceeb83cb5c907d272ca1e2a8ccd515b03f2368a0680870c0d120fc32bc578bb0a930] + script: "ci/make.sh base_images" manifest_artifacts: path: base_images/manifest.json type: application/json @@ -226,9 +225,9 @@ cache_images_task: env: PACKER_BUILDS: "ubuntu" env: - GAC_JSON: ENCRYPTED[7fba7fb26ab568ae39f799ab58a476123206576b0135b3d1019117c6d682391370c801e149f29324ff4b50133012aed9] - AWS_INI: ENCRYPTED[4cd69097cd29a9899e51acf3bbacceeb83cb5c907d272ca1e2a8ccd515b03f2368a0680870c0d120fc32bc578bb0a930] - script: "ci/make_cache_images.sh" + GAC_JSON: *gac_json + AWS_INI: *aws_ini + script: "ci/make.sh cache_images" manifest_artifacts: path: cache_images/manifest.json type: application/json diff --git a/.github/actions/bin/create_image_table.py b/.github/actions/bin/create_image_table.py index ca7e3ef7..b67f9463 100755 --- a/.github/actions/bin/create_image_table.py +++ b/.github/actions/bin/create_image_table.py @@ -9,6 +9,19 @@ import json import os + +def stage_sort(item): + """Return sorting-key for build-image-json item""" + if item["stage"] == "import": + return str("0010"+item["name"]) + elif item["stage"] == "base": + return str("0020"+item["name"]) + elif item["stage"] == "cache": + return str("0030"+item["name"]) + else: + return str("0100"+item["name"]) + + if "GITHUB_ENV" not in os.environ: raise KeyError("Error: $GITHUB_ENV is undefined.") @@ -31,7 +44,7 @@ url='https://cirrus-ci.com/task' lines=[] -data.sort(key=lambda item: str(item["stage"]+item["name"])) +data.sort(key=stage_sort) for item in data: lines.append('|*{0}*|[{1}]({2})|`{3}`|\n'.format(item['stage'], item['name'], '{0}/{1}'.format(url, item['task']), diff --git a/Makefile b/Makefile index 26b767ea..589df712 100644 --- a/Makefile +++ b/Makefile @@ -17,19 +17,27 @@ if_ci_else = $(if $(findstring true,$(CI)),$(1),$(2)) export CENTOS_STREAM_RELEASE = 8 -export FEDORA_RELEASE = 36 -export FEDORA_IMAGE_URL = https://dl.fedoraproject.org/pub/fedora/linux/releases/36/Cloud/x86_64/images/Fedora-Cloud-Base-36-1.5.x86_64.qcow2 -export FEDORA_CSUM_URL = https://dl.fedoraproject.org/pub/fedora/linux/releases/36/Cloud/x86_64/images/Fedora-Cloud-36-1.5-x86_64-CHECKSUM -export FEDORA_AMI = ami-08b7bda26f4071b80 -export FEDORA_ARM64_AMI = ami-01925eb0821988986 +# QCOW2 Image URLs and CHECKSUM files +# Ref: https://dl.fedoraproject.org/pub/fedora/linux/ -export PRIOR_FEDORA_RELEASE = 35 -export PRIOR_FEDORA_IMAGE_URL = https://dl.fedoraproject.org/pub/fedora/linux/releases/35/Cloud/x86_64/images/Fedora-Cloud-Base-35-1.2.x86_64.qcow2 -export PRIOR_FEDORA_CSUM_URL = https://dl.fedoraproject.org/pub/fedora/linux/releases/35/Cloud/x86_64/images/Fedora-Cloud-35-1.2-x86_64-CHECKSUM +export FEDORA_RELEASE = 37 +export FEDORA_IMAGE_URL = https://dl.fedoraproject.org/pub/fedora/linux/development/37/Cloud/x86_64/images/Fedora-Cloud-Base-37-20220912.n.0.x86_64.qcow2 +export FEDORA_CSUM_URL = https://dl.fedoraproject.org/pub/fedora/linux/development/37/Cloud/x86_64/images/Fedora-Cloud-37-x86_64-20220912.n.0-CHECKSUM +export FEDORA_ARM64_IMAGE_URL = https://dl.fedoraproject.org/pub/fedora/linux/development/37/Cloud/aarch64/images/Fedora-Cloud-Base-37-20220912.n.0.aarch64.qcow2 +export FEDORA_ARM64_CSUM_URL = https://dl.fedoraproject.org/pub/fedora/linux/development/37/Cloud/aarch64/images/Fedora-Cloud-37-aarch64-20220912.n.0-CHECKSUM + +export PRIOR_FEDORA_RELEASE = 36 +export PRIOR_FEDORA_IMAGE_URL = https://dl.fedoraproject.org/pub/fedora/linux/releases/36/Cloud/x86_64/images/Fedora-Cloud-Base-36-1.5.x86_64.qcow2 +export PRIOR_FEDORA_CSUM_URL = https://dl.fedoraproject.org/pub/fedora/linux/releases/36/Cloud/x86_64/images/Fedora-Cloud-36-1.5-x86_64-CHECKSUM + +# See import_images/README.md +export FEDORA_IMPORT_IMG_SFX = 1662988741 export UBUNTU_RELEASE = 22.04 export UBUNTU_BASE_FAMILY = ubuntu-2204-lts +IMPORT_FORMAT = vhdx + ##### Important Paths and variables ##### # Most targets require possession of service-account credentials (JSON file) @@ -95,6 +103,9 @@ IMG_SFX ?= export CHECKPOINT_DISABLE = 1 # Disable hashicorp phone-home export PACKER_CACHE_DIR = $(call err_if_empty,_TEMPDIR) +# AWS CLI default, in case caller needs to override +export AWS := aws --output json --region us-east-1 + ##### Targets ##### # N/B: The double-# after targets is gawk'd out as the target description @@ -226,15 +237,119 @@ image_builder_debug: $(_TEMPDIR)/image_builder_debug.tar ## Build and enter cont $(_TEMPDIR)/image_builder_debug.tar: $(_TEMPDIR)/.cache/centos $(wildcard image_builder/*) $(call podman_build,$@,image_builder_debug,image_builder,centos) +# Avoid re-downloading unnecessarily +# Ref: https://www.gnu.org/software/make/manual/html_node/Special-Targets.html#Special-Targets +.PRECIOUS: $(_TEMPDIR)/fedora-aws-$(IMG_SFX).$(IMPORT_FORMAT) +$(_TEMPDIR)/fedora-aws-$(IMG_SFX).$(IMPORT_FORMAT): $(_TEMPDIR) + bash import_images/handle_image.sh \ + $@ \ + $(FEDORA_IMAGE_URL) \ + $(FEDORA_CSUM_URL) + +$(_TEMPDIR)/fedora-aws-arm64-$(IMG_SFX).$(IMPORT_FORMAT): $(_TEMPDIR) + bash import_images/handle_image.sh \ + $@ \ + $(FEDORA_ARM64_IMAGE_URL) \ + $(FEDORA_ARM64_CSUM_URL) + +$(_TEMPDIR)/%.md5: $(_TEMPDIR)/%.$(IMPORT_FORMAT) + openssl md5 -binary $< | base64 > $@.tmp + mv $@.tmp $@ + +# MD5 metadata value checked by AWS after upload + 5 retries. +# Cache disabled to avoid sync. issues w/ vmimport service if +# image re-uploaded. +# TODO: Use sha256 from ..._CSUM_URL file instead of recalculating +# https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html +# Avoid re-uploading unnecessarily +.SECONDARY: $(_TEMPDIR)/%.uploaded +$(_TEMPDIR)/%.uploaded: $(_TEMPDIR)/%.$(IMPORT_FORMAT) $(_TEMPDIR)/%.md5 + -$(AWS) s3 rm --quiet s3://packer-image-import/%.$(IMPORT_FORMAT) + $(AWS) s3api put-object \ + --content-md5 "$(file < $(_TEMPDIR)/$*.md5)" \ + --content-encoding binary/octet-stream \ + --cache-control no-cache \ + --bucket packer-image-import \ + --key $*.$(IMPORT_FORMAT) \ + --body $(_TEMPDIR)/$*.$(IMPORT_FORMAT) > $@.tmp + mv $@.tmp $@ + +# For whatever reason, the 'Format' value must be all upper-case. +# Avoid creating unnecessary/duplicate import tasks +.SECONDARY: $(_TEMPDIR)/%.import_task_id +$(_TEMPDIR)/%.import_task_id: $(_TEMPDIR)/%.uploaded + $(AWS) ec2 import-snapshot \ + --disk-container Format=$(shell tr '[:lower:]' '[:upper:]'<<<"$(IMPORT_FORMAT)"),UserBucket="{S3Bucket=packer-image-import,S3Key=$*.$(IMPORT_FORMAT)}" > $@.tmp.json + @cat $@.tmp.json + jq -r -e .ImportTaskId $@.tmp.json > $@.tmp + mv $@.tmp $@ + +# Avoid importing multiple snapshots for the same image +.PRECIOUS: $(_TEMPDIR)/%.snapshot_id +$(_TEMPDIR)/%.snapshot_id: $(_TEMPDIR)/%.import_task_id + bash import_images/wait_import_task.sh "$<" > $@.tmp + mv $@.tmp $@ + +define _register_sed + sed -r \ + -e 's/@@@NAME@@@/$(1)/' \ + -e 's/@@@IMG_SFX@@@/$(IMG_SFX)/' \ + -e 's/@@@ARCH@@@/$(2)/' \ + -e 's/@@@SNAPSHOT_ID@@@/$(3)/' \ + import_images/register.json.in \ + > $(4) +endef + +$(_TEMPDIR)/fedora-aws-$(IMG_SFX).register.json: $(_TEMPDIR)/fedora-aws-$(IMG_SFX).snapshot_id import_images/register.json.in + $(call _register_sed,fedora-aws,x86_64,$(file <$<),$@) + +$(_TEMPDIR)/fedora-aws-arm64-$(IMG_SFX).register.json: $(_TEMPDIR)/fedora-aws-arm64-$(IMG_SFX).snapshot_id import_images/register.json.in + $(call _register_sed,fedora-aws-arm64,arm64,$(file <$<),$@) + +# Avoid multiple registrations for the same image +.PRECIOUS: $(_TEMPDIR)/%.ami.id +$(_TEMPDIR)/%.ami.id: $(_TEMPDIR)/%.register.json + $(AWS) ec2 register-image --cli-input-json "$$(<$<)" > $@.tmp.json + cat $@.tmp.json + jq -r -e .ImageId $@.tmp.json > $@.tmp + mv $@.tmp $@ + +$(_TEMPDIR)/%.ami.name: $(_TEMPDIR)/%.register.json + jq -r -e .Name $< > $@.tmp + mv $@.tmp $@ + +$(_TEMPDIR)/%.ami.json: $(_TEMPDIR)/%.ami.id $(_TEMPDIR)/%.ami.name + $(AWS) ec2 create-tags \ + --resources "$$(<$(_TEMPDIR)/$*.ami.id)" \ + --tags \ + Key=Name,Value=$$(<$(_TEMPDIR)/$*.ami.name) \ + Key=automation,Value=true + $(AWS) --output table ec2 describe-images --image-ids "$$(<$(_TEMPDIR)/$*.ami.id)" \ + | tee $@ + +.PHONY: import_images +import_images: $(_TEMPDIR)/fedora-aws-$(IMG_SFX).ami.json $(_TEMPDIR)/fedora-aws-arm64-$(IMG_SFX).ami.json import_images/manifest.json.in ## Import generic Fedora cloud images into AWS EC2. + sed -r \ + -e 's/@@@IMG_SFX@@@/$(IMG_SFX)/' \ + -e 's/@@@CIRRUS_TASK_ID@@@/$(CIRRUS_TASK_ID)/' \ + import_images/manifest.json.in \ + > import_images/manifest.json + @echo "Image import(s) successful." + @echo "############################################################" + @echo "Please update Makefile value:" + @echo "" + @echo " FEDORA_IMPORT_IMG_SFX = $(IMG_SFX)" + @echo "############################################################" + .PHONY: base_images # This needs to run in a virt/nested-virt capable environment -base_images: base_images/manifest.json ## Create, prepare, and import base-level images into GCE. Optionally, set PACKER_BUILDS= to select builder(s). +base_images: base_images/manifest.json ## Create, prepare, and import base-level images into GCE. base_images/manifest.json: base_images/cloud.json $(wildcard base_images/*.sh) cidata $(_TEMPDIR)/cidata.ssh $(PACKER_INSTALL_DIR)/packer $(call packer_build,base_images/cloud.json) .PHONY: cache_images -cache_images: cache_images/manifest.json ## Create, prepare, and import top-level images into GCE. Optionally, set PACKER_BUILDS= to select builder(s). +cache_images: cache_images/manifest.json ## Create, prepare, and import top-level images into GCE. cache_images/manifest.json: cache_images/cloud.json $(wildcard cache_images/*.sh) $(PACKER_INSTALL_DIR)/packer $(call packer_build,cache_images/cloud.json) @@ -273,12 +388,14 @@ $(_TEMPDIR)/skopeo_cidev.tar: $(wildcard skopeo_base/*) $(_TEMPDIR)/.cache/fedor rm -f $@ podman save --quiet -o $@ skopeo_cidev:$(IMG_SFX) +# TODO: Temporarily force F36 due to: +# https://github.com/aio-libs/aiohttp/issues/6600 .PHONY: ccia ccia: $(_TEMPDIR)/ccia.tar ## Build the Cirrus-CI Artifacts container image $(_TEMPDIR)/ccia.tar: ccia/Containerfile podman build -t ccia:$(call err_if_empty,IMG_SFX) \ --security-opt seccomp=unconfined \ - --build-arg=BASE_TAG=$(FEDORA_RELEASE) \ + --build-arg=BASE_TAG=36 \ ccia rm -f $@ podman save --quiet -o $@ ccia:$(IMG_SFX) @@ -325,5 +442,5 @@ $(_TEMPDIR)/get_ci_vm.tar: lib.sh get_ci_vm/Containerfile get_ci_vm/entrypoint.s clean: ## Remove all generated files referenced in this Makefile -rm -rf $(_TEMPDIR) -rm -f image_builder/*.json - -rm -f base_images/{*.json,cidata*,*-data} + -rm -f *_images/{*.json,cidata*,*-data} -rm -f ci_debug.tar diff --git a/base_images/cloud.yml b/base_images/cloud.yml index 5ae641a6..5e5074f8 100644 --- a/base_images/cloud.yml +++ b/base_images/cloud.yml @@ -21,13 +21,13 @@ variables: # Empty value means it must be passed in on command-line FEDORA_RELEASE: "{{env `FEDORA_RELEASE`}}" FEDORA_IMAGE_URL: "{{env `FEDORA_IMAGE_URL`}}" FEDORA_CSUM_URL: "{{env `FEDORA_CSUM_URL`}}" - FEDORA_AMI: "{{env `FEDORA_AMI`}}" - FEDORA_ARM64_AMI: "{{env `FEDORA_ARM64_AMI`}}" PRIOR_FEDORA_RELEASE: "{{env `PRIOR_FEDORA_RELEASE`}}" PRIOR_FEDORA_IMAGE_URL: "{{env `PRIOR_FEDORA_IMAGE_URL`}}" PRIOR_FEDORA_CSUM_URL: "{{env `PRIOR_FEDORA_CSUM_URL`}}" + FEDORA_IMPORT_IMG_SFX: "{{env `FEDORA_IMPORT_IMG_SFX`}}" + UBUNTU_RELEASE: "{{env `UBUNTU_RELEASE`}}" UBUNTU_BASE_FAMILY: "{{env `UBUNTU_BASE_FAMILY`}}" @@ -108,7 +108,23 @@ builders: - &fedora-aws name: 'fedora-aws' type: 'amazon-ebs' - source_ami: '{{user `FEDORA_AMI`}}' + source_ami_filter: # Will fail if >1 or no AMI found + owners: + # Docs are wrong, specifying the Account ID required to make AMIs private. + # The Account ID is hard-coded here out of expediency, since passing in + # more packer args from the command-line (in Makefile) is non-trivial. + - &accountid '449134212816' + # It's necessary to 'search' for the base-image by these criteria. If + # more than one image is found, Packer will fail the build (and display + # the conflicting AMI IDs). + filters: &ami_filters + architecture: 'x86_64' + image-type: 'machine' + is-public: 'false' + name: '{{build_name}}-i{{user `FEDORA_IMPORT_IMG_SFX`}}' + root-device-type: 'ebs' + state: 'available' + virtualization-type: 'hvm' instance_type: 'm5zn.metal' # In case of packer problem or ungraceful exit, don't wait for shutdown. # This doesn't always work properly, sometimes leaving EC2 instances in @@ -120,7 +136,7 @@ builders: # Required for network access, must be the 'default' group used by Cirrus-CI security_group_id: "sg-042c75677872ef81c" # Prefix IMG_SFX with "b" so this is never confused with a cache_image - ami_name: 'fedora-aws-b{{user `IMG_SFX`}}' + ami_name: &ami_name '{{build_name}}-b{{user `IMG_SFX`}}' ami_description: 'Built in https://cirrus-ci.com/task/{{user `CIRRUS_TASK_ID`}}' ebs_optimized: true launch_block_device_mappings: @@ -133,19 +149,17 @@ builders: tags: &awstags <<: *imgcpylabels # EC2 expects "Name" to be capitalized - Name: 'fedora-aws-b{{user `IMG_SFX`}}' - src: '{{user `FEDORA_AMI`}}' + Name: *ami_name + src: '{{.SourceAMI}}' automation: 'true' release: 'fedora-{{user `FEDORA_RELEASE`}}' run_tags: *awstags run_volume_tags: *awstags snapshot_tags: *awstags - # Docs are wrong, specifying the Account ID required to make AMIs private. # This is necessary for security - The CI service accounts are not permitted - # to use AMI's from any other account, including public ones. The Account - # ID is hard-coded here out of expediency, since passing in more packer args - # from the command-line (in Makefile) is non-trivial. - ami_users: ["449134212816"] + # to use AMI's from any other account, including public ones. + ami_users: + - *accountid ssh_username: 'fedora' ssh_clear_authorized_keys: true # N/B: Required Packer >= 1.8.0 @@ -154,12 +168,15 @@ builders: - <<: *fedora-aws name: 'fedora-aws-arm64' - source_ami: '{{user `FEDORA_ARM64_AMI`}}' + source_ami_filter: + owners: + - *accountid + filters: + <<: *ami_filters + architecture: 'arm64' instance_type: 't4g.medium' # arm64 type - ami_name: 'fedora-aws-arm64-b{{user `IMG_SFX`}}' # must be unique tags: &awsarm64tags <<: *awstags - src: '{{user `FEDORA_ARM64_AMI`}}' arch: 'arm64' run_tags: *awsarm64tags run_volume_tags: *awsarm64tags @@ -213,6 +230,7 @@ post-processors: only: ['prior-fedora'] image_name: "prior-fedora-b{{user `IMG_SFX`}}" image_family: '{{build_name}}-base' + image_description: '{{user `PRIOR_FEDORA_IMAGE_URL`}}' image_labels: <<: *imgcpylabels src: 'fedoraproject' diff --git a/cache_images/fedora_packaging.sh b/cache_images/fedora_packaging.sh index 361b3680..c2c7a209 100644 --- a/cache_images/fedora_packaging.sh +++ b/cache_images/fedora_packaging.sh @@ -189,13 +189,18 @@ DOWNLOAD_PACKAGES=(\ echo "Installing general build/test dependencies" bigto $SUDO dnf install -y $EXARG "${INSTALL_PACKAGES[@]}" -if [[ ${#DOWNLOAD_PACKAGES[@]} -gt 0 ]]; then - echo "Downloading packages for optional installation at runtime, as needed." - $SUDO mkdir -p "$PACKAGE_DOWNLOAD_DIR" - cd "$PACKAGE_DOWNLOAD_DIR" - lilto ooe.sh $SUDO dnf install -y 'dnf-command(download)' - lilto $SUDO dnf download -y --resolve "${DOWNLOAD_PACKAGES[@]}" -fi +echo "Downloading packages for optional installation at runtime, as needed." +$SUDO mkdir -p "$PACKAGE_DOWNLOAD_DIR" +cd "$PACKAGE_DOWNLOAD_DIR" +lilto ooe.sh $SUDO dnf install -y 'dnf-command(download)' +lilto $SUDO dnf download -y --resolve "${DOWNLOAD_PACKAGES[@]}" +# Also cache the current/latest version of minikube +# for use in some specialized testing. +# Ref: https://minikube.sigs.k8s.io/docs/start/ +$SUDO curl --fail --silent --location -O \ + https://storage.googleapis.com/minikube/releases/latest/minikube-latest.x86_64.rpm +cd - + # It was observed in F33, dnf install doesn't always get you the latest/greatest lilto $SUDO dnf update -y diff --git a/ci/make_base_images.sh b/ci/make.sh similarity index 63% rename from ci/make_base_images.sh rename to ci/make.sh index 582c2c4e..daeeb5cc 100755 --- a/ci/make_base_images.sh +++ b/ci/make.sh @@ -1,9 +1,12 @@ #!/bin/bash +set -eo pipefail + # This script is intended to be used by Cirrus-CI, from the VM -# built by the 'image_builder' makefile target in this repo. Use -# of this script in any other context/environment is unlikely to -# function as intended. +# built by the 'image_builder' Makefile target in this repo. +# It's purpose is simply to verify & configure the runtime +# environment from data provided by CI, and call the make +# with the first argument passed to this script. SCRIPT_FILEPATH=$(realpath "${BASH_SOURCE[0]}") SCRIPT_DIRPATH=$(dirname "$SCRIPT_FILEPATH") @@ -17,6 +20,8 @@ if [[ -z "$CI" ]] || [[ "$CI" != "true" ]] || [[ "$CIRRUS_CI" != "$CI" ]]; then die "Unexpected \$CI=$CI and/or \$CIRRUS_CI=$CIRRUS_CI" elif [[ -z "$IMG_SFX" ]] || [[ -z "$PACKER_BUILDS" ]]; then die "Required non-empty values for \$IMG_SFX=$IMG_SFX and \$PACKER_BUILDS=$PACKER_BUILDS" +elif [[ -z "$1" ]]; then + die "Build stage name is required as the first argument" fi if skip_on_pr_label; then @@ -24,10 +29,14 @@ if skip_on_pr_label; then fi set_gac_filepath -set_aws_filepath -set -exo pipefail +# Not all builds need this. +if [[ -n "$AWS_INI" ]]; then + set_aws_filepath +fi + +set -x cd "$REPO_DIRPATH" export IMG_SFX=$IMG_SFX export PACKER_BUILDS=$PACKER_BUILDS -make base_images +make ${1} diff --git a/ci/make_cache_images.sh b/ci/make_cache_images.sh deleted file mode 100755 index 45bbbc79..00000000 --- a/ci/make_cache_images.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash - -set -eo pipefail - -# This script is intended to be used by Cirrus-CI, from the container -# built by the ContainerFile in this directory. Use of this script -# in any other context/environment is unlikely to function as intended. - -SCRIPT_FILEPATH=$(realpath "${BASH_SOURCE[0]}") -SCRIPT_DIRPATH=$(dirname "$SCRIPT_FILEPATH") -REPO_DIRPATH=$(realpath "$SCRIPT_DIRPATH/../") - -# shellcheck source=./lib.sh -source "$REPO_DIRPATH/lib.sh" - -# shellcheck disable=SC2154 -if [[ -z "$CI" ]] || [[ "$CI" != "true" ]] || [[ "$CIRRUS_CI" != "$CI" ]]; then - die "Unexpected \$CI=$CI and/or \$CIRRUS_CI=$CIRRUS_CI" -elif [[ -z "$IMG_SFX" ]] || [[ -z "$PACKER_BUILDS" ]]; then - die "Required non-empty values for \$IMG_SFX=$IMG_SFX and \$PACKER_BUILDS=$PACKER_BUILDS" -fi - -if skip_on_pr_label; then - exit 0 # skip build -fi - -set_gac_filepath -set_aws_filepath - -set -x -cd "$REPO_DIRPATH" -export IMG_SFX=$IMG_SFX -export PACKER_BUILDS=$PACKER_BUILDS -make cache_images diff --git a/ci/make_image_builder.sh b/ci/make_image_builder.sh deleted file mode 100755 index 4d3438c5..00000000 --- a/ci/make_image_builder.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# This script is intended to be used by Cirrus-CI, from the container -# built by the ContainerFile in this directory. Use of this script -# in any other context/environment is unlikely to function as intended. - -SCRIPT_FILEPATH=$(realpath "${BASH_SOURCE[0]}") -SCRIPT_DIRPATH=$(dirname "$SCRIPT_FILEPATH") -REPO_DIRPATH=$(realpath "$SCRIPT_DIRPATH/../") - -# shellcheck source=./lib.sh -source "$REPO_DIRPATH/lib.sh" - -# shellcheck disable=SC2154 -if [[ -z "$CI" ]] || [[ "$CI" != "true" ]] || [[ "$CIRRUS_CI" != "$CI" ]]; then - die "Unexpected \$CI=$CI and/or \$CIRRUS_CI=$CIRRUS_CI" -elif [[ -z "$IMG_SFX" ]]; then - die "Required non-empty values for \$IMG_SFX=$IMG_SFX" -fi - -if skip_on_pr_label; then - exit 0 # skip build -fi - -set_gac_filepath - -set -exo pipefail -cd "$REPO_DIRPATH" -export IMG_SFX=$IMG_SFX -make image_builder diff --git a/image_builder/install_packages.txt b/image_builder/install_packages.txt index dc25cb52..cb0377a4 100644 --- a/image_builder/install_packages.txt +++ b/image_builder/install_packages.txt @@ -1,3 +1,4 @@ +awscli buildah bash-completion curl @@ -13,6 +14,7 @@ libvirt-client libvirt-daemon make openssh +openssl podman python3 python3-pyyaml diff --git a/import_images/README.md b/import_images/README.md new file mode 100644 index 00000000..e2236908 --- /dev/null +++ b/import_images/README.md @@ -0,0 +1,93 @@ +# Semi-manual image imports + +## Overview + +[Due to a bug in +packer](https://github.com/hashicorp/packer-plugin-amazon/issues/264) and +the sheer complexity of EC2 image imports, this process is impractical for +full automation. It tends toward nearly always requiring supervision of a +human: + +* There are multiple failure-points, some are not well reported to + the user by tools here or by AWS itself. +* The upload of the image to s3 can be unreliable. Silently corrupting image + data. +* The import-process is managed by a hosted AWS service which can be slow + and is occasionally unreliable. +* Failure often results in one or more leftover/incomplete resources + (s3 objects, EC2 snapshots, and AMIs) + +## Requirements + +* You're generally familiar with the (manual) + [EC2 snapshot import process](https://docs.aws.amazon.com/vm-import/latest/userguide/vmimport-import-snapshot.html). +* You are in possession of an AWS EC2 account, with the [IAM policy + `vmimport`](https://docs.aws.amazon.com/vm-import/latest/userguide/required-permissions.html#vmimport-role) attached. +* Both "Access Key" and "Secret Access Key" values set in [a credentials + file](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html). +* Podman is installed and functional +* At least 10gig free space under `/tmp`, more if there are failures / multiple runs. +* *Network bandwidth sufficient for downloading and uploading many GBs of + data, potentially multiple times.* + +## Process + +Unless there is a problem with the current contents of the +imported images, this process does not need to be followed. The +normal PR-based build workflow can simply be followed as usual. + +***Note:*** Most of the steps below will happen within a container environment. +Any exceptions are noted in the individual steps below with *[HOST]* + +1. *[HOST]* Edit the `Makefile`, update release numbers and/or URLs + under the section + `##### Important image release and source details #####` +1. *[HOST]* Run + ```bash + $ make image_builder_debug \ + IMG_SFX=$(date +%s) \ + GAC_FILEPATH=/dev/null \ + AWS_SHARED_CREDENTIALS_FILE=/path/to/.aws/credentials + ``` +1. Run `make import_images` +1. The following steps should all occur successfully for each imported image. + 1. Image is downloaded. + 1. Image checksum is downloaded. + 1. Image is verified against the checksum. + 1. Image is converted to `VHDX` format. + 1. The `VHDX` image is uploaded to the `packer-image-import` S3 bucket. + 1. AWS `import-snapshot` process is started. + 1. Progress of snapshot import is monitored until completion or failure. + 1. The imported snapshot is converted into an AMI + 1. Essential tags are added to the AMI + 1. Full details about the AMI are printed +1. Assuming all image imports were successful, a success message will be + printed by `make` with instructions for updating the `Makefile`. +1. *[HOST]* Update the `Makefile` as instructed, commit the + changes and push to a PR. The automated image building process + takes over and runs as usual. + +## Failure responses + +This list is not exhaustive, and only represents common/likely failures. +Normally there is no need to exit the build container. + +* If image download fails, double-check the URL values, run `make clean` + and retry. +* If checksum validation fails, + double-check the URL values. If + changes made, run `make clean`. + Retry `make import_images`. +* If s3 upload fails, + double-check the URL values. If + changes were needed, run `make clean`. + Retry `make import_images`. +* If snapshot import fails with a `Disk validation failed` error, + Retry `make import_images`. +* If snapshot import fails with an error, find them in EC2 and delete them. + Retry `make import_images`. +* If AMI registration fails, remove any conflicting AMIs and snapshots. + Retry `make import_images`. +* If import was successful but AMI tagging failed, manually add + the required tags to AMI: `automation=false` and `Name=-i${IMG_SFX}`. + Where `` is `fedora-aws` or `fedora-aws-arm64`. diff --git a/import_images/handle_image.sh b/import_images/handle_image.sh new file mode 100644 index 00000000..a075b802 --- /dev/null +++ b/import_images/handle_image.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# This script is intended to be run by packer, usage under any other +# environment may behave badly. Its purpose is to download a VM +# image and a checksum file. Verify the image's checksum matches. +# If it does, convert the downloaded image into the format indicated +# by the first argument's `.extension`. +# +# The first argument is the file path and name for the output image, +# the second argument is the image download URL (ending in a filename). +# The third argument is the download URL for a checksum file containing +# details necessary to verify vs filename included in image download URL. + +set -eo pipefail + +SCRIPT_FILEPATH=$(realpath "${BASH_SOURCE[0]}") +SCRIPT_DIRPATH=$(dirname "$SCRIPT_FILEPATH") +REPO_DIRPATH=$(realpath "$SCRIPT_DIRPATH/../") + +# shellcheck source=./lib.sh +source "$REPO_DIRPATH/lib.sh" + +[[ "$#" -eq 3 ]] || \ + die "Expected to be called with three arguments, not: $#" + +# Packer needs to provide the desired filename as it's unable to parse +# a filename out of the URL or interpret output from this script. +dest_dirpath=$(dirname "$1") +dest_filename=$(basename "$1") +dest_format=$(cut -d. -f2<<<"$dest_filename") +src_url="$2" +src_filename=$(basename "$src_url") +cs_url="$3" + +req_env_vars dest_dirpath dest_filename dest_format src_url src_filename cs_url + +mkdir -p "$dest_dirpath" +cd "$dest_dirpath" +[[ -r "$src_filename" ]] || \ + curl --fail --location -O "$src_url" +echo "Downloading & verifying checksums in $cs_url" +curl --fail --location "$cs_url" -o - | \ + sha256sum --ignore-missing --check - +echo "Converting '$src_filename' to ($dest_format format) '$dest_filename'" +qemu-img convert "$src_filename" -O "$dest_format" "${dest_filename}" diff --git a/import_images/manifest.json.in b/import_images/manifest.json.in new file mode 100644 index 00000000..f3595103 --- /dev/null +++ b/import_images/manifest.json.in @@ -0,0 +1,31 @@ +{ + "builds": [ + { + "name": "fedora-aws", + "builder_type": "hamsterwheel", + "build_time": 0, + "files": null, + "artifact_id": "", + "packer_run_uuid": null, + "custom_data": { + "IMG_SFX": "fedora-aws-i@@@IMG_SFX@@@", + "STAGE": "import", + "TASK": "@@@CIRRUS_TASK_ID@@@" + } + }, + { + "name": "fedora-aws-arm64", + "builder_type": "hamsterwheel", + "build_time": 0, + "files": null, + "artifact_id": "", + "packer_run_uuid": null, + "custom_data": { + "IMG_SFX": "fedora-aws-arm64-i@@@IMG_SFX@@@", + "STAGE": "import", + "TASK": "@@@CIRRUS_TASK_ID@@@" + } + } + ], + "last_run_uuid": "00000000-0000-0000-0000-000000000000" +} diff --git a/import_images/register.json.in b/import_images/register.json.in new file mode 100644 index 00000000..6a3bc431 --- /dev/null +++ b/import_images/register.json.in @@ -0,0 +1,18 @@ +{ + "Name": "@@@NAME@@@-i@@@IMG_SFX@@@", + "VirtualizationType": "hvm", + "Architecture": "@@@ARCH@@@", + "EnaSupport": true, + "RootDeviceName": "/dev/sda1", + "BlockDeviceMappings": [ + { + "DeviceName": "/dev/sda1", + "Ebs": { + "DeleteOnTermination": true, + "SnapshotId": "@@@SNAPSHOT_ID@@@", + "VolumeSize": 10, + "VolumeType": "gp2" + } + } + ] +} diff --git a/import_images/wait_import_task.sh b/import_images/wait_import_task.sh new file mode 100644 index 00000000..7a857609 --- /dev/null +++ b/import_images/wait_import_task.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +# This script is intended to be called by the main Makefile +# to wait for and confirm successful import and conversion +# of an uploaded image object from S3 into EC2. It expects +# the path to a file containing the import task ID as the +# first argument. +# +# If the import is successful, the snapshot ID is written +# to stdout. Otherwise, all output goes to stderr, and +# the script exits non-zero on failure or timeout. On +# failure, the file containing the import task ID will +# be removed. + +set -eo pipefail + +AWS="${AWS:-aws --output json --region us-east-1}" + +# The import/conversion process can take a LONG time, have observed +# > 10 minutes on occasion. Normally, takes 2-5 minutes. +SLEEP_SECONDS=10 +TIMEOUT_SECONDS=720 + +TASK_ID_FILE="$1" + +tmpfile=$(mktemp -p '' tmp.$(basename ${BASH_SOURCE[0]}).XXXX) + +die() { echo "ERROR: ${1:-No error message provided}" > /dev/stderr; exit 1; } + +msg() { echo "${1:-No error message provided}" > /dev/stderr; } + +unset snapshot_id +handle_exit() { + set +e + rm -f "$tmpfile" &> /dev/null + if [[ -n "$snapshot_id" ]]; then + msg "Success ($task_id): $snapshot_id" + echo -n "$snapshot_id" > /dev/stdout + return 0 + fi + rm -f "$TASK_ID_FILE" + die "Timeout or other error reported while waiting for snapshot import" +} +trap handle_exit EXIT + +[[ -n "$AWS_SHARED_CREDENTIALS_FILE" ]] || \ + die "\$AWS_SHARED_CREDENTIALS_FILE must not be unset/empty." + +[[ -r "$1" ]] || \ + die "Can't read task id from file '$TASK_ID_FILE'" + +task_id=$(<$TASK_ID_FILE) + +msg "Waiting up to $TIMEOUT_SECONDS seconds for '$task_id' import. Checking progress every $SLEEP_SECONDS seconds." +for (( i=$TIMEOUT_SECONDS ; i ; i=i-$SLEEP_SECONDS )); do \ + + # Sleep first, to give AWS time to start meaningful work. + sleep ${SLEEP_SECONDS}s + + $AWS ec2 describe-import-snapshot-tasks \ + --import-task-ids $task_id > $tmpfile + + if ! st_msg=$(jq -r -e '.ImportSnapshotTasks[0].SnapshotTaskDetail.StatusMessage?' $tmpfile) && \ + [[ -n $st_msg ]] && \ + [[ ! "$st_msg" =~ null ]] + then + die "Unexpected result: $st_msg" + elif egrep -iq '(error)|(fail)' <<<"$st_msg"; then + die "$task_id: $st_msg" + fi + + msg "$task_id: $st_msg (${i}s remaining)" + + # Why AWS you use StatusMessage && Status? Bad names! WHY!?!?!?! + if status=$(jq -r -e '.ImportSnapshotTasks[0].SnapshotTaskDetail.Status?' $tmpfile) && \ + [[ "$status" == "completed" ]] && \ + snapshot_id=$(jq -r -e '.ImportSnapshotTasks[0].SnapshotTaskDetail.SnapshotId?' $tmpfile) + then + msg "Import complete to: $snapshot_id" + break + else + unset snapshot_id + fi +done