diff --git a/tern/__main__.py b/tern/__main__.py index 96415f72..d00dbf2f 100755 --- a/tern/__main__.py +++ b/tern/__main__.py @@ -13,8 +13,8 @@ import logging import os import sys - -from tern.analyze.docker import run +from tern.analyze.oci import run as oci_run +from tern.analyze.docker import run as docker_run from tern.utils import cache from tern.utils import constants from tern.utils import general @@ -24,7 +24,7 @@ # global logger from tern.utils.general import check_image_string - +from tern.utils.general import check_oci_image_string logger = logging.getLogger(constants.logger_name) logger.setLevel(logging.DEBUG) @@ -69,6 +69,33 @@ def create_top_dir(working_dir=None): os.makedirs(top_dir) +def execute_image(args): + ''' Executes container images using the given inputs ''' + if args.type == "docker": + # Check if the image is of image:tag + # or image@digest_type:digest format + if not check_image_string(args.image): + sys.stderr.write('Error running Tern\n' + 'Please provide docker image ' + 'string in image:tag or ' + 'image@digest_type:digest format\n') + sys.exit(1) + if general.check_tar(args.image): + logger.error("%s", errors.incorrect_raw_option) + else: + docker_run.execute_docker_image(args) + logger.debug('Report completed.') + elif args.type == "oci": + # Check if the image is of oci://image-location:tag + if not check_oci_image_string(args.image): + sys.stderr.write('Error running Tern\n' + 'Please provide oci image ' + 'oci://image-location:tag format\n') + sys.exit(1) + oci_run.execute_oci_image(args) + logger.debug('Report completed.') + + def do_main(args): '''Execute according to subcommands''' # set bind mount location if working in a container @@ -84,33 +111,21 @@ def do_main(args): if args.clear_cache: logger.debug('Clearing cache...') cache.clear() - if hasattr(args, 'name') and (args.name == 'report' or - args.name == 'lock'): + if hasattr(args, 'name') and \ + (args.name == 'report' or args.name == 'lock'): if args.name == 'lock': - run.execute_dockerfile(args) + docker_run.execute_dockerfile(args) elif args.dockerfile: - run.execute_dockerfile(args) - elif args.docker_image: - # Check if the image is of image:tag - # or image@digest_type:digest format - if not check_image_string(args.docker_image): - sys.stderr.write('Error running Tern\n' - 'Please provide docker image ' - 'string in image:tag or ' - 'image@digest_type:digest format\n') - sys.exit(1) - if general.check_tar(args.docker_image): - logger.error("%s", errors.incorrect_raw_option) - else: - run.execute_docker_image(args) - logger.debug('Report completed.') + docker_run.execute_dockerfile(args) + elif args.image: + execute_image(args) if args.name == 'report': if args.raw_image: if not general.check_tar(args.raw_image): logger.error("%s", errors.invalid_raw_image.format( image=args.raw_image)) else: - run.execute_docker_image(args) + docker_run.execute_docker_image(args) logger.debug('Report completed.') logger.debug('Finished') @@ -153,12 +168,18 @@ def main(): parser_report.add_argument('-d', '--dockerfile', type=check_file_existence, help="Dockerfile used to build the Docker" " image") - parser_report.add_argument('-i', '--docker-image', - help="Docker image that exists locally -" + parser_report.add_argument('-i', '--image', + help="Image that exists locally -" + "either can be a docker image with format" " image:tag" + " or an OCI image with format" + " oci://:" " The option can be used to pull docker" " images by digest as well -" " @:") + parser_report.add_argument('-t', '--type', + help="type of image -" + " possible values could be an oci or docker") parser_report.add_argument('-w', '--raw-image', metavar='FILE', help="Raw container image that exists locally " "in the form of a tar archive.") diff --git a/tern/analyze/__init__.py b/tern/analyze/__init__.py index a048eda0..896c3024 100644 --- a/tern/analyze/__init__.py +++ b/tern/analyze/__init__.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- # -# Copyright (c) 2019 VMware, Inc. All Rights Reserved. +# Copyright (c) 2020 VMware, Inc. All Rights Reserved. # SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/analyze/docker/run.py b/tern/analyze/docker/run.py index 95093e04..351e06e2 100644 --- a/tern/analyze/docker/run.py +++ b/tern/analyze/docker/run.py @@ -72,13 +72,13 @@ def analyze(image_obj, args, dfile_lock=False, dfobj=None): def execute_docker_image(args): '''Execution path if given a Docker image''' logger.debug('Setting up...') - image_string = args.docker_image + image_string = args.image if not args.raw_image: # don't check docker daemon for raw images container.check_docker_setup() else: image_string = args.raw_image - report.setup(image_tag_string=image_string) + report.setup(image_tag_string=image_string, image_type=args.type) # attempt to get built image metadata full_image = report.load_full_image(image_string) if full_image.origins.is_empty(): diff --git a/tern/analyze/oci/__init__.py b/tern/analyze/oci/__init__.py new file mode 100644 index 00000000..896c3024 --- /dev/null +++ b/tern/analyze/oci/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2020 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/analyze/oci/analyze.py b/tern/analyze/oci/analyze.py new file mode 100644 index 00000000..f73ea2d8 --- /dev/null +++ b/tern/analyze/oci/analyze.py @@ -0,0 +1,155 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019-2020 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +Analyze a OCI format image +""" + +import sys +import logging +from tern.utils import rootfs +from tern.report import errors +from tern.analyze import common +from tern.utils import constants +from tern.analyze.oci import helpers +from tern.classes.notice import Notice +from tern.command_lib import command_lib + + +# global logger +logger = logging.getLogger(constants.logger_name) + + +def analyze_oci_image(image_obj, redo=False): + '''Given a OCIImage object, for each layer, retrieve the packages, first + looking up in cache and if not there then looking up in the command + library. For looking up in command library first mount the filesystem + and then look up the command library for commands to run in chroot.''' + + # set up empty master list of packages + master_list = [] + prepare_for_analysis(image_obj) + # Analyze the first layer and get the shell + shell = analyze_first_layer(image_obj, master_list, redo) + # Analyze the remaining layers + analyze_subsequent_layers(image_obj, shell, master_list, redo) + common.save_to_cache(image_obj) + + +def prepare_for_analysis(image_obj): + # add notices for each layer if it is imported + image_setup(image_obj) + # set up the mount points + rootfs.set_up() + + +def abort_analysis(): + '''Abort due to some external event''' + rootfs.recover() + sys.exit(1) + + +def analyze_first_layer(image_obj, master_list, redo): + # set up a notice origin for the first layer + origin_first_layer = 'Layer: ' + image_obj.layers[0].fs_hash[:10] + # find the shell from the first layer + shell = common.get_shell(image_obj.layers[0]) + if not shell: + logger.warning(errors.no_shell) + image_obj.layers[0].origins.add_notice_to_origins( + origin_first_layer, Notice(errors.no_shell, 'warning')) + # find the binary from the first layer + binary = common.get_base_bin(image_obj.layers[0]) + if not binary: + logger.warning(errors.no_package_manager) + image_obj.layers[0].origins.add_notice_to_origins( + origin_first_layer, Notice(errors.no_package_manager, 'warning')) + # try to load packages from cache + if not common.load_from_cache(image_obj.layers[0], redo): + # set a possible OS + common.get_os_style(image_obj.layers[0], binary) + # if there is a binary, extract packages + if shell and binary: + execute_base_layer(image_obj.layers[0], binary, shell) + # populate the master list with all packages found in the first layer + for p in image_obj.layers[0].packages: + master_list.append(p) + return shell + + +def execute_base_layer(base_layer, binary, shell): + '''Execute retrieving base layer packages''' + try: + target = rootfs.mount_base_layer(base_layer.tar_file) + rootfs.prep_rootfs(target) + common.add_base_packages(base_layer, binary, shell) + except KeyboardInterrupt: + logger.critical(errors.keyboard_interrupt) + abort_analysis() + finally: + # unmount proc, sys and dev + rootfs.undo_mount() + rootfs.unmount_rootfs() + + +def analyze_subsequent_layers(image_obj, shell, master_list, redo): # noqa: R0912,R0913 + # get packages for subsequent layers + curr_layer = 1 + # pylint:disable=too-many-nested-blocks + while curr_layer < len(image_obj.layers): + # if there is no shell, try to see if it exists in the current layer + if not shell: + shell = common.get_shell(image_obj.layers[curr_layer]) + if not common.load_from_cache(image_obj.layers[curr_layer], redo): + # get commands that created the layer + # for docker images this is retrieved from the image history + command_list = helpers.get_commands_from_history( + image_obj.layers[curr_layer]) + if command_list: + # mount diff layers from 0 till the current layer + target = mount_overlay_fs(image_obj, curr_layer) + # mount dev, sys and proc after mounting diff layers + rootfs.prep_rootfs(target) + # for each command look up the snippet library + for command in command_list: + pkg_listing = command_lib.get_package_listing(command.name) + if isinstance(pkg_listing, str): + try: + common.add_base_packages( + image_obj.layers[curr_layer], pkg_listing, shell) + except KeyboardInterrupt: + logger.critical(errors.keyboard_interrupt) + abort_analysis() + else: + try: + common.add_snippet_packages( + image_obj.layers[curr_layer], command, pkg_listing, + shell) + except KeyboardInterrupt: + logger.critical(errors.keyboard_interrupt) + abort_analysis() + if command_list: + rootfs.undo_mount() + rootfs.unmount_rootfs() + # update the master list + common.update_master_list(master_list, image_obj.layers[curr_layer]) + curr_layer = curr_layer + 1 + + +def image_setup(image_obj): + '''Add notices for each layer''' + for layer in image_obj.layers: + origin_str = 'Layer: ' + layer.fs_hash[:10] + layer.origins.add_notice_origin(origin_str) + + +def mount_overlay_fs(image_obj, top_layer): + '''Given the image object and the top most layer, mount all the layers + until the top layer using overlayfs''' + tar_layers = [] + for index in range(0, top_layer + 1): + tar_layers.append(image_obj.layers[index].tar_file) + target = rootfs.mount_diff_layers(tar_layers) + return target diff --git a/tern/analyze/oci/helpers.py b/tern/analyze/oci/helpers.py new file mode 100644 index 00000000..c896af17 --- /dev/null +++ b/tern/analyze/oci/helpers.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019-2020 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +OCI specific functions - used when trying to retrieve packages when +given a OCI Image +""" + +import os +import re +import json +from tern.report import errors +from tern.analyze import common +from tern.report import formats +from tern.classes.notice import Notice + +directives = ['FROM', + 'ARG', + 'ADD', + 'RUN', + 'ENV', + 'COPY', + 'ENTRYPOINT', + 'WORKDIR', + 'VOLUME', + 'EXPOSE', + 'CMD'] + + +def get_oci_image_index(image_string): + ''' Returns OCI image index path ''' + return os.path.join(image_string, "index.json") + + +def get_oci_image_manifest(image_string): + ''' Returns OCI image manifest path ''' + blob_path = os.path.join(image_string, "blobs/sha256") + index_json = json.load(open(get_oci_image_index(image_string))) + manifest_digest = index_json.get( + "manifests")[0].get("digest").split(":")[1] + return os.path.join(blob_path, manifest_digest) + + +def get_oci_image_config(image_string, manifest): + ''' Returns OCI image config path ''' + blob_path = os.path.join(image_string, "blobs/sha256") + config_digest = json.load(open(manifest)).get( + "config").get("digest").split(":")[1] + return os.path.join(blob_path, config_digest) + + +def get_oci_image_layers(image_string, manifest): + ''' Returns OCI image layers ''' + blob_path = os.path.join(image_string, "blobs/sha256") + layers = list() + layer_data = json.load(open(manifest)).get("layers") + for layer in layer_data: + layer_path = os.path.join(blob_path, layer.get("digest").split(":")[1]) + layers.append(layer_path) + return layers + + +def created_to_instruction(created_by): + '''The 'created_by' key in a OCI image config gives the shell + command that was executed unless it is a #(nop) instruction which is + for the other directives. Convert this line into a instruction + ''' + instruction = re.sub('/bin/sh -c ', '', created_by).strip() + instruction = re.sub(re.escape('#(nop) '), '', instruction).strip() + first = instruction.split(' ').pop(0) + if first and first not in directives and \ + 'RUN' not in instruction: + instruction = 'RUN ' + instruction + return instruction + + +def get_commands_from_history(image_layer): + '''Given the image layer object and the shell, get the list of command + objects that created the layer''' + # set up notice origin for the layer + origin_layer = 'Layer: ' + image_layer.fs_hash[:10] + if image_layer.created_by: + instruction = created_to_instruction(image_layer.created_by) + image_layer.origins.add_notice_to_origins(origin_layer, Notice( + formats.oci_image_line.format(oci_image_instruction=instruction), + 'info')) + command_line = instruction.split(' ', 1)[1] + else: + instruction = '' + image_layer.origins.add_notice_to_origins(origin_layer, Notice( + formats.no_created_by, 'warning')) + command_line = instruction + # Image layers are created with the directives RUN, ADD and COPY + # For ADD and COPY instructions, there is no information about the + # packages added + if 'ADD' in instruction or 'COPY' in instruction: + image_layer.origins.add_notice_to_origins(origin_layer, Notice( + errors.unknown_content.format(files=command_line), 'warning')) + # return an empty list as we cannot find any commands + return [] + # for RUN instructions we can return a list of commands + command_list, msg = common.filter_install_commands(command_line) + if msg: + image_layer.origins.add_notice_to_origins(origin_layer, Notice( + msg, 'warning')) + return command_list diff --git a/tern/analyze/oci/run.py b/tern/analyze/oci/run.py new file mode 100644 index 00000000..6adc7a3e --- /dev/null +++ b/tern/analyze/oci/run.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019-2020 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +Execute a OCI format image +""" + +import logging +from tern.report import report +from tern.report import formats +from tern.utils import constants +from tern.analyze.passthrough import run_extension +from tern.analyze.oci.analyze import analyze_oci_image + +# global logger +logger = logging.getLogger(constants.logger_name) + + +def analyze(image_obj, args): + ''' + Analyze the image object either using the default + method or the extended method + ''' + if args.extend: + run_extension(image_obj, args.extend, args.redo) + else: + analyze_oci_image(image_obj, args.redo) + + +def execute_oci_image(args): + '''Execution path if given a OCI image''' + logger.debug('Setting up...') + image_string = args.image + image_string = image_string.split("://")[1] + image_string = image_string.split(":")[0] + report.setup(image_tag_string=image_string, image_type=args.type) + # attempt to get built image metadata + full_image = report.load_oci_image(image_string) + if full_image.origins.is_empty(): + # image loading was successful + # Add an image origin here + full_image.origins.add_notice_origin( + formats.oci_image.format(imagetag=image_string)) + # analyze image + analyze(full_image, args) + # generate report + report.report_out(args, full_image) + else: + # we cannot load the full image + logger.warning('Cannot retrieve full image metadata') + if not args.keep_wd: + report.clean_image_tars(full_image) + logger.debug('Teardown...') + report.teardown() + if not args.keep_wd: + report.clean_working_dir() diff --git a/tern/classes/oci_image.py b/tern/classes/oci_image.py new file mode 100644 index 00000000..63056e84 --- /dev/null +++ b/tern/classes/oci_image.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2017-2020 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +import os +import json +import subprocess # nosec +from tern.utils import rootfs +from tern.utils import general +from tern.classes.image import Image +from tern.utils.constants import manifest_file +from tern.classes.image_layer import ImageLayer + + +class OCIImage(Image): + def __init__(self, repotag=None, image_id=None): + super().__init__(image_id) + self.__repotag = repotag + + @property + def repotag(self): + return self.__repotag + + @property + def repotags(self): + return self.__repotags + + @property + def history(self): + return self.__history + + def to_dict(self, template=None): + # this should take care of 'origins' and 'layers' + di_dict = super().to_dict(template) + return di_dict + + def get_image_option(self): + if self.repotag is not None and self.image_id is not None: + return self.image_id + if self.repotag is not None: + return self.repotag + if self.image_id is not None: + return self.image_id + raise NameError("Image object initialized with no repotag or ID") + + def get_image_manifest(self): + temp_path = rootfs.get_working_dir() + with general.pushd(temp_path): + with open(manifest_file) as f: + json_obj = json.loads(f.read()) + return json_obj + + def get_image_layers(self, manifest): + layers = [] + for layer in manifest.get('layers'): + layers.append(layer.get("digest").split(":")[1]) + return layers + + def get_image_config_file(self, manifest): + return manifest.get('config').get("digest").split(":")[1] + + def get_image_id(self, manifest): + config_file = self.get_image_config_file(manifest) + return config_file + + def get_image_repotags(self): + temp_path = rootfs.get_working_dir() + annotations = None + with general.pushd(temp_path): + with open("index.json") as f: + json_obj = json.loads(f.read()) + annotations = json_obj.get("manifests")[0].get("annotations") + return annotations.get("org.opencontainers.image.ref.name") + + def get_layer_sha(self, layer_path): + return os.path.dirname(layer_path) + + def get_image_config(self, manifest): + config_file = self.get_image_config_file(manifest) + temp_path = rootfs.get_working_dir() + with general.pushd(temp_path): + with open(config_file) as f: + json_obj = json.loads(f.read()) + return json_obj + + def get_image_history(self, config): + if 'history' in config.keys(): + return config['history'] + return None + + def get_diff_ids(self, config): + diff_ids = [] + for item in config['rootfs']['diff_ids']: + diff_ids.append(item.split(':').pop()) + return diff_ids + + def get_diff_checksum_type(self, config): + return config['rootfs']['diff_ids'][0].split(':')[0] + + def set_layer_created_by(self): + # the history is ordered according to the order of the layers + # so the first non-empty history corresponds with the first layer + index = 0 + for item in self.__history: + if 'empty_layer' not in item.keys(): + if 'created_by' in item.keys(): + self._layers[index].created_by = item['created_by'] + else: + self._layers[index].created_by = '' + index = index + 1 + + def load_image(self): + try: + self._manifest = self.get_image_manifest() + self._image_id = self.get_image_id(self._manifest) + self.__repotags = self.get_image_repotags() + self._config = self.get_image_config(self._manifest) + self.__history = self.get_image_history(self._config) + layer_paths = self.get_image_layers(self._manifest) + layer_diffs = self.get_diff_ids(self._config) + checksum_type = self.get_diff_checksum_type(self._config) + while layer_diffs and layer_paths: + layer = ImageLayer(layer_diffs.pop(0), layer_paths.pop(0)) + layer.set_checksum(checksum_type, layer.diff_id) + layer.gen_fs_hash() + self._layers.append(layer) + self.set_layer_created_by() + except NameError: # pylint: disable=try-except-raise + raise + except subprocess.CalledProcessError: # pylint: disable=try-except-raise + raise + except IOError: # pylint: disable=try-except-raise + raise diff --git a/tern/report/formats.py b/tern/report/formats.py index 934790ee..9cead9f4 100644 --- a/tern/report/formats.py +++ b/tern/report/formats.py @@ -80,6 +80,7 @@ dockerfile_image = '''Image built from Dockerfile {dockerfile}''' dockerfile_base = '''Base Image: {base_image_instructions}''' dockerfile_line = '''Instruction Line: {dockerfile_instruction}''' +oci_image_line = '''Instruction Line: {oci_image_instruction}''' image_build_failure = '''Failed to build image from Dockerfile''' image_load_failure = '''Failed to load metadata for built image {testimage}''' layer_created_by = '''Layer created by commands: {created_by}''' @@ -87,6 +88,8 @@ # docker image report docker_image = '''Docker image: {imagetag}''' +# OCI image report +oci_image = '''OCI image: {imagetag}''' # format for notices notice_format = '''{origin}:\n\t{info}\n\twarnings:{warnings}''' \ diff --git a/tern/report/report.py b/tern/report/report.py index 1109bda6..03357df9 100644 --- a/tern/report/report.py +++ b/tern/report/report.py @@ -23,7 +23,9 @@ from tern.utils import cache from tern.utils import general from tern.utils import rootfs +from tern.analyze.oci import helpers from tern.classes.docker_image import DockerImage +from tern.classes.oci_image import OCIImage from tern.classes.notice import Notice import tern.analyze.docker.helpers as dhelper @@ -41,12 +43,7 @@ def write_report(report, args): f.write(report) -def setup(dfobj=None, image_tag_string=None): - '''Any initial setup''' - # generate random names for image, container, and tag - general.initialize_names() - # load the cache - cache.load() +def docker_image_setup(dfobj=None, image_tag_string=None): # load dockerfile if present if dfobj is not None: dhelper.load_docker_commands(dfobj) @@ -60,6 +57,42 @@ def setup(dfobj=None, image_tag_string=None): sys.exit() +def oci_image_setup(image_tag_string): + path = os.path.join(general.get_top_dir(), constants.temp_folder) + if not os.path.exists(path): + os.mkdir(os.path.join(general.get_top_dir(), constants.temp_folder)) + oci_image_index = helpers.get_oci_image_index(image_tag_string) + oci_image_manifest = helpers.get_oci_image_manifest(image_tag_string) + oci_image_config = helpers.get_oci_image_config( + image_tag_string, oci_image_manifest) + oci_image_layers = helpers.get_oci_image_layers( + image_tag_string, oci_image_manifest) + commands = list() + commands.append(["cp", oci_image_index, os.path.join( + rootfs.mount_dir, constants.temp_folder)]) + commands.append(["cp", oci_image_manifest, os.path.join( + rootfs.mount_dir, constants.temp_folder, "manifest.json")]) + commands.append(["cp", oci_image_config, os.path.join( + rootfs.mount_dir, constants.temp_folder)]) + for oci_image_layer in oci_image_layers: + commands.append( + ["cp", oci_image_layer, os.path.join(rootfs.mount_dir, "temp")]) + + for command in commands: + rootfs.root_command(command) + + +def setup(image_tag_string=None, dfobj=None, image_type=None): + '''Any initial setup''' + # generate random names for image, container, and tag + general.initialize_names() + # load the cache + cache.load() + if image_type == "oci": + return oci_image_setup(image_tag_string) + return docker_image_setup(dfobj, image_tag_string) + + def teardown(): '''Tear down tern setup''' # close docker client if any @@ -108,6 +141,24 @@ def load_base_image(): return base_image +def load_oci_image(oci_image): + '''Create image object from image name and return the object''' + test_image = OCIImage(oci_image) + failure_origin = formats.image_load_failure.format( + testimage=test_image.repotag) + try: + test_image.load_image() + except (NameError, + subprocess.CalledProcessError, + IOError, + ValueError, + EOFError) as error: + logger.warning('Error in loading image: %s', str(error)) + test_image.origins.add_notice_to_origins( + failure_origin, Notice(str(error), 'error')) + return test_image + + def load_full_image(image_tag_string): '''Create image object from image name and tag and return the object''' test_image = DockerImage(image_tag_string) diff --git a/tern/utils/general.py b/tern/utils/general.py index 07b20d4e..998662af 100644 --- a/tern/utils/general.py +++ b/tern/utils/general.py @@ -144,6 +144,15 @@ def check_root(): return False +def check_oci_image_string(image_str: str): + '''Check if the image string is in the format + oci://image-location:tag format. If not, return False.''' + tag_format = r'oci://.+:.+' + if re.match(tag_format, image_str): + return True + return False + + def check_image_string(image_str: str): '''Check if the image string is in the format image:tag or image@digest_type:digest format. If not, return False.'''