From e3bdc51241ef92547db4f98879b569a985638617 Mon Sep 17 00:00:00 2001 From: Mukul Taneja Date: Fri, 16 Jul 2021 01:18:19 +0530 Subject: [PATCH 1/3] Using OCI images instead of Docker Images Start analyzing OCI images using tern Work towards: #948 Signed-off-by: Mukul Taneja --- tern/analyze/default/container/image.py | 7 ++ tern/analyze/oci/__init__.py | 4 + tern/analyze/oci/parse.py | 50 +++++++++ tern/classes/oci_image.py | 135 ++++++++++++++++++++++++ 4 files changed, 196 insertions(+) create mode 100644 tern/analyze/oci/__init__.py create mode 100644 tern/analyze/oci/parse.py create mode 100644 tern/classes/oci_image.py diff --git a/tern/analyze/default/container/image.py b/tern/analyze/default/container/image.py index 098aacad..d91b37e4 100644 --- a/tern/analyze/default/container/image.py +++ b/tern/analyze/default/container/image.py @@ -23,9 +23,16 @@ logger = logging.getLogger(constants.logger_name) +def download_container_image(image_tag_string): + '''Download the docker image and convert it into + oci format''' + pass + + def load_full_image(image_tag_string, load_until_layer=0): '''Create image object from image name and tag and return the object. Loads only as many layers as needed.''' + download_container_image(image_tag_string) test_image = DockerImage(image_tag_string) failure_origin = formats.image_load_failure.format( testimage=test_image.repotag) diff --git a/tern/analyze/oci/__init__.py b/tern/analyze/oci/__init__.py new file mode 100644 index 00000000..605cea42 --- /dev/null +++ b/tern/analyze/oci/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/analyze/oci/parse.py b/tern/analyze/oci/parse.py new file mode 100644 index 00000000..341c4983 --- /dev/null +++ b/tern/analyze/oci/parse.py @@ -0,0 +1,50 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019-2021 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +OCI specific functions - used when trying to retrieve packages when +given a OCI Image +""" + +import os +# import re +import json +# from tern.report import errors +# from tern.analyze import common +# from tern.report import formats +# from tern.classes.notice import Notice + + +def get_oci_image_index(image_string): + ''' Returns OCI image index path ''' + return os.path.join(image_string, "index.json") + + +def get_oci_image_manifest(image_string): + ''' Returns OCI image manifest path ''' + blob_path = os.path.join(image_string, "blobs/sha256") + index_json = json.load(open(get_oci_image_index(image_string))) + manifest_digest = index_json.get( + "manifests")[0].get("digest").split(":")[1] + return os.path.join(blob_path, manifest_digest) + + +def get_oci_image_config(image_string, manifest): + ''' Returns OCI image config path ''' + blob_path = os.path.join(image_string, "blobs/sha256") + config_digest = json.load(open(manifest)).get( + "config").get("digest").split(":")[1] + return os.path.join(blob_path, config_digest) + + +def get_oci_image_layers(image_string, manifest): + ''' Returns OCI image layers ''' + blob_path = os.path.join(image_string, "blobs/sha256") + layers = list() + layer_data = json.load(open(manifest)).get("layers") + for layer in layer_data: + layer_path = os.path.join(blob_path, layer.get("digest").split(":")[1]) + layers.append(layer_path) + return layers diff --git a/tern/classes/oci_image.py b/tern/classes/oci_image.py new file mode 100644 index 00000000..257304f5 --- /dev/null +++ b/tern/classes/oci_image.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2017-2020 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +import os +import json +import subprocess # nosec +from tern.utils import rootfs +from tern.utils import general +from tern.classes.image import Image +from tern.utils.constants import manifest_file +from tern.classes.image_layer import ImageLayer + + +class OCIImage(Image): + def __init__(self, repotag=None, image_id=None): + super().__init__(image_id) + self.__repotag = repotag + + @property + def repotag(self): + return self.__repotag + + @property + def repotags(self): + return self.__repotags + + @property + def history(self): + return self.__history + + def to_dict(self, template=None): + # this should take care of 'origins' and 'layers' + di_dict = super().to_dict(template) + return di_dict + + def get_image_option(self): + if self.repotag is not None and self.image_id is not None: + return self.image_id + if self.repotag is not None: + return self.repotag + if self.image_id is not None: + return self.image_id + raise NameError("Image object initialized with no repotag or ID") + + def get_image_manifest(self): + temp_path = rootfs.get_working_dir() + with general.pushd(temp_path): + with open(manifest_file) as f: + json_obj = json.loads(f.read()) + return json_obj + + def get_image_layers(self, manifest): + layers = [] + for layer in manifest.get('layers'): + layers.append(layer.get("digest").split(":")[1]) + return layers + + def get_image_config_file(self, manifest): + return manifest.get('config').get("digest").split(":")[1] + + def get_image_id(self, manifest): + config_file = self.get_image_config_file(manifest) + return config_file + + def get_image_repotags(self): + temp_path = rootfs.get_working_dir() + annotations = None + with general.pushd(temp_path): + with open("index.json") as f: + json_obj = json.loads(f.read()) + annotations = json_obj.get("manifests")[0].get("annotations") + return annotations.get("org.opencontainers.image.ref.name") + + def get_layer_sha(self, layer_path): + return os.path.dirname(layer_path) + + def get_image_config(self, manifest): + config_file = self.get_image_config_file(manifest) + temp_path = rootfs.get_working_dir() + with general.pushd(temp_path): + with open(config_file) as f: + json_obj = json.loads(f.read()) + return json_obj + + def get_image_history(self, config): + if 'history' in config.keys(): + return config['history'] + return None + + def get_diff_ids(self, config): + diff_ids = [] + for item in config['rootfs']['diff_ids']: + diff_ids.append(item.split(':').pop()) + return diff_ids + + def get_diff_checksum_type(self, config): + return config['rootfs']['diff_ids'][0].split(':')[0] + + def set_layer_created_by(self): + # the history is ordered according to the order of the layers + # so the first non-empty history corresponds with the first layer + index = 0 + for item in self.__history: + if 'empty_layer' not in item.keys(): + if 'created_by' in item.keys(): + self._layers[index].created_by = item['created_by'] + else: + self._layers[index].created_by = '' + index = index + 1 + + def load_image(self): + try: + self._manifest = self.get_image_manifest() + self._image_id = self.get_image_id(self._manifest) + self.__repotags = self.get_image_repotags() + self._config = self.get_image_config(self._manifest) + self.__history = self.get_image_history(self._config) + layer_paths = self.get_image_layers(self._manifest) + layer_diffs = self.get_diff_ids(self._config) + checksum_type = self.get_diff_checksum_type(self._config) + while layer_diffs and layer_paths: + layer = ImageLayer(layer_diffs.pop(0), layer_paths.pop(0)) + layer.set_checksum(checksum_type, layer.diff_id) + layer.gen_fs_hash() + self._layers.append(layer) + self.set_layer_created_by() + except NameError: # pylint: disable=try-except-raise + raise + except subprocess.CalledProcessError: # pylint: disable=try-except-raise + raise + except IOError: # pylint: disable=try-except-raise + raise +{"mode":"full","isActive":false} From 65705b64cad838a29ad93b13c225387988837383 Mon Sep 17 00:00:00 2001 From: mtaneja Date: Thu, 26 Aug 2021 02:04:02 +0530 Subject: [PATCH 2/3] Using OCI images instead of Docker Images Adding 'download_container_image` method Work towards: #948 Signed-off-by: mtaneja --- tern/analyze/default/container/image.py | 21 ++++++++++++++++----- tern/analyze/default/container/run.py | 17 ++--------------- tern/classes/oci_image.py | 3 +-- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/tern/analyze/default/container/image.py b/tern/analyze/default/container/image.py index d91b37e4..b3994010 100644 --- a/tern/analyze/default/container/image.py +++ b/tern/analyze/default/container/image.py @@ -10,7 +10,7 @@ import docker import logging import subprocess # nosec - +from tern.utils import general, rootfs from tern.classes.notice import Notice from tern.classes.docker_image import DockerImage from tern.utils import constants @@ -24,15 +24,26 @@ def download_container_image(image_tag_string): - '''Download the docker image and convert it into - oci format''' - pass + '''Download the docker image and convert it into oci format''' + try: + # extract the docker image + docker_image = image_tag_string + if 'docker://' in image_tag_string: + docker_image = image_tag_string.split('docker://')[1] + image_attr = general.parse_image_string(docker_image) + oci_image = 'oci://{0}/{1}'.format(rootfs.working_dir, image_attr.get('name')) + docker_image = 'docker://{0}'.format(docker_image) + process = subprocess.Popen(['skopeo', 'copy', docker_image, oci_image]) + process.wait() + return oci_image + except Exception: + logger.critical("Cannot extract Docker image") + raise def load_full_image(image_tag_string, load_until_layer=0): '''Create image object from image name and tag and return the object. Loads only as many layers as needed.''' - download_container_image(image_tag_string) test_image = DockerImage(image_tag_string) failure_origin = formats.image_load_failure.format( testimage=test_image.repotag) diff --git a/tern/analyze/default/container/run.py b/tern/analyze/default/container/run.py index 0346881c..1f40a22d 100644 --- a/tern/analyze/default/container/run.py +++ b/tern/analyze/default/container/run.py @@ -8,13 +8,11 @@ """ import logging - from tern.utils import constants from tern.utils import rootfs from tern.report import report from tern.report import formats from tern import prep -from tern.load import docker_api from tern.analyze import common from tern.analyze.default.container import image as cimage @@ -28,19 +26,8 @@ def extract_image(args): as an image tarball. Extract the image into a working directory accordingly Return an image name and tag and an image digest if it exists""" if args.docker_image: - # extract the docker image - image_attrs = docker_api.dump_docker_image(args.docker_image) - if image_attrs: - # repo name and digest is preferred, but if that doesn't exist - # the repo name and tag will do. If neither exist use repo Id. - if image_attrs['Id']: - image_string = image_attrs['Id'] - if image_attrs['RepoTags']: - image_string = image_attrs['RepoTags'][0] - if image_attrs['RepoDigests']: - image_string = image_attrs['RepoDigests'][0] - return image_string - logger.critical("Cannot extract Docker image") + return cimage.download_container_image(args.docker_image) + if args.raw_image: # for now we assume that the raw image tarball is always # the product of "docker save", hence it will be in diff --git a/tern/classes/oci_image.py b/tern/classes/oci_image.py index 257304f5..408347c3 100644 --- a/tern/classes/oci_image.py +++ b/tern/classes/oci_image.py @@ -110,7 +110,7 @@ def set_layer_created_by(self): self._layers[index].created_by = '' index = index + 1 - def load_image(self): + def load_image(self, load_until_layer=0): try: self._manifest = self.get_image_manifest() self._image_id = self.get_image_id(self._manifest) @@ -132,4 +132,3 @@ def load_image(self): raise except IOError: # pylint: disable=try-except-raise raise -{"mode":"full","isActive":false} From 70847159a36b5dbac49b17d362438e38691d160e Mon Sep 17 00:00:00 2001 From: Mukul Taneja Date: Wed, 1 Sep 2021 01:58:24 +0530 Subject: [PATCH 3/3] Using OCI images instead of Docker Images Updating OCI Image class with required methods to load image. Work towards: #948 Signed-off-by: Mukul Taneja --- tern/analyze/default/container/image.py | 15 ++-- tern/analyze/oci/parse.py | 50 ----------- tern/classes/oci_image.py | 109 ++++++++++++++---------- 3 files changed, 71 insertions(+), 103 deletions(-) delete mode 100644 tern/analyze/oci/parse.py diff --git a/tern/analyze/default/container/image.py b/tern/analyze/default/container/image.py index b3994010..2f8ec257 100644 --- a/tern/analyze/default/container/image.py +++ b/tern/analyze/default/container/image.py @@ -10,9 +10,10 @@ import docker import logging import subprocess # nosec -from tern.utils import general, rootfs +from tern.utils import rootfs +from tern.utils import general from tern.classes.notice import Notice -from tern.classes.docker_image import DockerImage +from tern.classes.oci_image import OCIImage from tern.utils import constants from tern.analyze import passthrough from tern.analyze.default.container import single_layer @@ -31,10 +32,12 @@ def download_container_image(image_tag_string): if 'docker://' in image_tag_string: docker_image = image_tag_string.split('docker://')[1] image_attr = general.parse_image_string(docker_image) - oci_image = 'oci://{0}/{1}'.format(rootfs.working_dir, image_attr.get('name')) + oci_image = 'oci://{0}/{1}'.format( + rootfs.working_dir, image_attr.get('name')) docker_image = 'docker://{0}'.format(docker_image) - process = subprocess.Popen(['skopeo', 'copy', docker_image, oci_image]) - process.wait() + # cmd = 'skopeo copy {0} {1}'.format(docker_image, oci_image) + rootfs.shell_command(False, ['skopeo', 'copy'], docker_image, oci_image) + # subprocess.check_output(cmd, shell=False) return oci_image except Exception: logger.critical("Cannot extract Docker image") @@ -44,7 +47,7 @@ def download_container_image(image_tag_string): def load_full_image(image_tag_string, load_until_layer=0): '''Create image object from image name and tag and return the object. Loads only as many layers as needed.''' - test_image = DockerImage(image_tag_string) + test_image = OCIImage(image_tag_string) failure_origin = formats.image_load_failure.format( testimage=test_image.repotag) try: diff --git a/tern/analyze/oci/parse.py b/tern/analyze/oci/parse.py deleted file mode 100644 index 341c4983..00000000 --- a/tern/analyze/oci/parse.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019-2021 VMware, Inc. All Rights Reserved. -# SPDX-License-Identifier: BSD-2-Clause - -""" -OCI specific functions - used when trying to retrieve packages when -given a OCI Image -""" - -import os -# import re -import json -# from tern.report import errors -# from tern.analyze import common -# from tern.report import formats -# from tern.classes.notice import Notice - - -def get_oci_image_index(image_string): - ''' Returns OCI image index path ''' - return os.path.join(image_string, "index.json") - - -def get_oci_image_manifest(image_string): - ''' Returns OCI image manifest path ''' - blob_path = os.path.join(image_string, "blobs/sha256") - index_json = json.load(open(get_oci_image_index(image_string))) - manifest_digest = index_json.get( - "manifests")[0].get("digest").split(":")[1] - return os.path.join(blob_path, manifest_digest) - - -def get_oci_image_config(image_string, manifest): - ''' Returns OCI image config path ''' - blob_path = os.path.join(image_string, "blobs/sha256") - config_digest = json.load(open(manifest)).get( - "config").get("digest").split(":")[1] - return os.path.join(blob_path, config_digest) - - -def get_oci_image_layers(image_string, manifest): - ''' Returns OCI image layers ''' - blob_path = os.path.join(image_string, "blobs/sha256") - layers = list() - layer_data = json.load(open(manifest)).get("layers") - for layer in layer_data: - layer_path = os.path.join(blob_path, layer.get("digest").split(":")[1]) - layers.append(layer_path) - return layers diff --git a/tern/classes/oci_image.py b/tern/classes/oci_image.py index 408347c3..f3062965 100644 --- a/tern/classes/oci_image.py +++ b/tern/classes/oci_image.py @@ -1,22 +1,29 @@ # -*- coding: utf-8 -*- # -# Copyright (c) 2017-2020 VMware, Inc. All Rights Reserved. +# Copyright (c) 2017-2021 VMware, Inc. All Rights Reserved. # SPDX-License-Identifier: BSD-2-Clause import os import json import subprocess # nosec -from tern.utils import rootfs -from tern.utils import general from tern.classes.image import Image -from tern.utils.constants import manifest_file from tern.classes.image_layer import ImageLayer class OCIImage(Image): - def __init__(self, repotag=None, image_id=None): - super().__init__(image_id) - self.__repotag = repotag + '''A representation of an image created by OCI + See image.py for super class's attributes + OCI Image specific attributes: + repotags: the list of repotags associated with this image + history: a list of commands used to create the filesystem layers + to_dict: return a dict representation of the object + ''' + def __init__(self, repotag=None): + '''Initialize using repotag''' + super().__init__(repotag) + self.__repotag = repotag.split('oci://')[1] + if self.repotag is None: + raise NameError("Image object initialized with no repotag") @property def repotag(self): @@ -31,57 +38,57 @@ def history(self): return self.__history def to_dict(self, template=None): - # this should take care of 'origins' and 'layers' + '''Return a dictionary representation of the OCI image''' di_dict = super().to_dict(template) return di_dict - def get_image_option(self): - if self.repotag is not None and self.image_id is not None: - return self.image_id - if self.repotag is not None: - return self.repotag - if self.image_id is not None: - return self.image_id - raise NameError("Image object initialized with no repotag or ID") - def get_image_manifest(self): - temp_path = rootfs.get_working_dir() - with general.pushd(temp_path): - with open(manifest_file) as f: - json_obj = json.loads(f.read()) - return json_obj + ''' Returns OCI image manifest path ''' + blob_path = os.path.join(self.repotag, "blobs/sha256") + manifest_digest = '' + with open(os.path.join(self.repotag, "index.json"), encoding="UTF-8") as f: + index_json = json.load(f) + manifest_digest = index_json.get("manifests") + if not manifest_digest: + raise Exception("manifest is missing") + manifest_digest = manifest_digest[0].get("digest").split(":")[1] + + return os.path.join(blob_path, manifest_digest) def get_image_layers(self, manifest): + ''' Returns OCI image layers ''' + blob_path = os.path.join(self.repotag, "blobs/sha256") layers = [] - for layer in manifest.get('layers'): - layers.append(layer.get("digest").split(":")[1]) + with open(manifest, encoding="UTF-8") as f: + layer_data = json.load(f).get("layers") + for layer in layer_data: + layer_path = os.path.join( + blob_path, layer.get("digest").split(":")[1]) + layers.append(layer_path) + return layers def get_image_config_file(self, manifest): - return manifest.get('config').get("digest").split(":")[1] + ''' Returns OCI image config path ''' + blob_path = os.path.join(self.repotag, "blobs/sha256") + config_digest = '' + with open(manifest, encoding="UTF-8") as f: + config = json.load(f).get("config") + config_digest = config.get("digest").split(":")[1] - def get_image_id(self, manifest): - config_file = self.get_image_config_file(manifest) - return config_file + return os.path.join(blob_path, config_digest) def get_image_repotags(self): - temp_path = rootfs.get_working_dir() - annotations = None - with general.pushd(temp_path): - with open("index.json") as f: - json_obj = json.loads(f.read()) - annotations = json_obj.get("manifests")[0].get("annotations") - return annotations.get("org.opencontainers.image.ref.name") + return '' def get_layer_sha(self, layer_path): return os.path.dirname(layer_path) def get_image_config(self, manifest): config_file = self.get_image_config_file(manifest) - temp_path = rootfs.get_working_dir() - with general.pushd(temp_path): - with open(config_file) as f: - json_obj = json.loads(f.read()) + json_obj = '' + with open(config_file, encoding="UTF-8") as f: + json_obj = json.loads(f.read()) return json_obj def get_image_history(self, config): @@ -104,13 +111,20 @@ def set_layer_created_by(self): index = 0 for item in self.__history: if 'empty_layer' not in item.keys(): + self._layers[index].created_by = '' if 'created_by' in item.keys(): self._layers[index].created_by = item['created_by'] - else: - self._layers[index].created_by = '' index = index + 1 + if index is self.load_until_layer: + break def load_image(self, load_until_layer=0): + """Load metadata from an extracted OCI image. This assumes the + image has already been downloaded and extracted into the working + directory""" + if load_until_layer > 0: + self._load_until_layer = load_until_layer + # else defaults to 0 - handles negative load_until_layer try: self._manifest = self.get_image_manifest() self._image_id = self.get_image_id(self._manifest) @@ -126,9 +140,10 @@ def load_image(self, load_until_layer=0): layer.gen_fs_hash() self._layers.append(layer) self.set_layer_created_by() - except NameError: # pylint: disable=try-except-raise - raise - except subprocess.CalledProcessError: # pylint: disable=try-except-raise - raise - except IOError: # pylint: disable=try-except-raise - raise + except NameError as e: + raise NameError(e) from e + except subprocess.CalledProcessError as e: + raise subprocess.CalledProcessError( + e.returncode, cmd=e.cmd, output=e.output, stderr=e.stderr) + except IOError as e: + raise IOError(e) from e