Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using OCI images instead of Docker Images #1006

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions tern/analyze/default/container/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@
import docker
import logging
import subprocess # nosec

from tern.utils import rootfs
from tern.utils import general
from tern.classes.notice import Notice
from tern.classes.docker_image import DockerImage
from tern.classes.oci_image import OCIImage
from tern.utils import constants
from tern.analyze import passthrough
from tern.analyze.default.container import single_layer
Expand All @@ -23,10 +24,30 @@
logger = logging.getLogger(constants.logger_name)


def download_container_image(image_tag_string):
'''Download the docker image and convert it into oci format'''
try:
# extract the docker image
docker_image = image_tag_string
if 'docker://' in image_tag_string:
docker_image = image_tag_string.split('docker://')[1]
image_attr = general.parse_image_string(docker_image)
oci_image = 'oci://{0}/{1}'.format(
rootfs.working_dir, image_attr.get('name'))
docker_image = 'docker://{0}'.format(docker_image)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just found out that if we use docker:// rather than dockerdaemon we encounter a TLS error on the registry side. I also found that crane is a much more lightweight tool to pull container images, and conforms with OCI's on-disk layout spec. @rnjudge Do you want to merge this PR and then let @mukultaneja update it to use crane?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the effect of the TLS error on the registry side?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can't download the image, because the certificate has expired or something like that. Usually, you need to set an option to disable TLS checking on the client side.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The disadvantage of switching to crane is that you have to download the binary from github rather than install it via the package manager. But I feel that the install is easy and the source code is included with the binary distribution.

# cmd = 'skopeo copy {0} {1}'.format(docker_image, oci_image)
rootfs.shell_command(False, ['skopeo', 'copy'], docker_image, oci_image)
# subprocess.check_output(cmd, shell=False)
return oci_image
except Exception:
logger.critical("Cannot extract Docker image")
raise


def load_full_image(image_tag_string, load_until_layer=0):
'''Create image object from image name and tag and return the object.
Loads only as many layers as needed.'''
test_image = DockerImage(image_tag_string)
test_image = OCIImage(image_tag_string)
failure_origin = formats.image_load_failure.format(
testimage=test_image.repotag)
try:
Expand Down
17 changes: 2 additions & 15 deletions tern/analyze/default/container/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,11 @@
"""

import logging

from tern.utils import constants
from tern.utils import rootfs
from tern.report import report
from tern.report import formats
from tern import prep
from tern.load import docker_api
from tern.analyze import common
from tern.analyze.default.container import image as cimage

Expand All @@ -28,19 +26,8 @@ def extract_image(args):
as an image tarball. Extract the image into a working directory accordingly
Return an image name and tag and an image digest if it exists"""
if args.docker_image:
# extract the docker image
image_attrs = docker_api.dump_docker_image(args.docker_image)
if image_attrs:
# repo name and digest is preferred, but if that doesn't exist
# the repo name and tag will do. If neither exist use repo Id.
if image_attrs['Id']:
image_string = image_attrs['Id']
if image_attrs['RepoTags']:
image_string = image_attrs['RepoTags'][0]
if image_attrs['RepoDigests']:
image_string = image_attrs['RepoDigests'][0]
return image_string
logger.critical("Cannot extract Docker image")
return cimage.download_container_image(args.docker_image)

if args.raw_image:
# for now we assume that the raw image tarball is always
# the product of "docker save", hence it will be in
Expand Down
4 changes: 4 additions & 0 deletions tern/analyze/oci/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2021 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause
149 changes: 149 additions & 0 deletions tern/classes/oci_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2017-2021 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause

import os
import json
import subprocess # nosec
from tern.classes.image import Image
from tern.classes.image_layer import ImageLayer


class OCIImage(Image):
'''A representation of an image created by OCI
See image.py for super class's attributes
OCI Image specific attributes:
repotags: the list of repotags associated with this image
history: a list of commands used to create the filesystem layers
to_dict: return a dict representation of the object
'''
def __init__(self, repotag=None):
'''Initialize using repotag'''
super().__init__(repotag)
self.__repotag = repotag.split('oci://')[1]
if self.repotag is None:
raise NameError("Image object initialized with no repotag")

@property
def repotag(self):
return self.__repotag

@property
def repotags(self):
return self.__repotags

@property
def history(self):
return self.__history

def to_dict(self, template=None):
'''Return a dictionary representation of the OCI image'''
di_dict = super().to_dict(template)
return di_dict

def get_image_manifest(self):
''' Returns OCI image manifest path '''
blob_path = os.path.join(self.repotag, "blobs/sha256")
manifest_digest = ''
with open(os.path.join(self.repotag, "index.json"), encoding="UTF-8") as f:
index_json = json.load(f)
manifest_digest = index_json.get("manifests")
if not manifest_digest:
raise Exception("manifest is missing")
manifest_digest = manifest_digest[0].get("digest").split(":")[1]

return os.path.join(blob_path, manifest_digest)

def get_image_layers(self, manifest):
''' Returns OCI image layers '''
blob_path = os.path.join(self.repotag, "blobs/sha256")
layers = []
with open(manifest, encoding="UTF-8") as f:
layer_data = json.load(f).get("layers")
for layer in layer_data:
layer_path = os.path.join(
blob_path, layer.get("digest").split(":")[1])
layers.append(layer_path)

return layers

def get_image_config_file(self, manifest):
''' Returns OCI image config path '''
blob_path = os.path.join(self.repotag, "blobs/sha256")
config_digest = ''
with open(manifest, encoding="UTF-8") as f:
config = json.load(f).get("config")
config_digest = config.get("digest").split(":")[1]

return os.path.join(blob_path, config_digest)

def get_image_repotags(self):
return ''

def get_layer_sha(self, layer_path):
return os.path.dirname(layer_path)

def get_image_config(self, manifest):
config_file = self.get_image_config_file(manifest)
json_obj = ''
with open(config_file, encoding="UTF-8") as f:
json_obj = json.loads(f.read())
return json_obj

def get_image_history(self, config):
if 'history' in config.keys():
return config['history']
return None

def get_diff_ids(self, config):
diff_ids = []
for item in config['rootfs']['diff_ids']:
diff_ids.append(item.split(':').pop())
return diff_ids

def get_diff_checksum_type(self, config):
return config['rootfs']['diff_ids'][0].split(':')[0]

def set_layer_created_by(self):
# the history is ordered according to the order of the layers
# so the first non-empty history corresponds with the first layer
index = 0
for item in self.__history:
if 'empty_layer' not in item.keys():
self._layers[index].created_by = ''
if 'created_by' in item.keys():
self._layers[index].created_by = item['created_by']
index = index + 1
if index is self.load_until_layer:
break

def load_image(self, load_until_layer=0):
"""Load metadata from an extracted OCI image. This assumes the
image has already been downloaded and extracted into the working
directory"""
if load_until_layer > 0:
self._load_until_layer = load_until_layer
# else defaults to 0 - handles negative load_until_layer
try:
self._manifest = self.get_image_manifest()
self._image_id = self.get_image_id(self._manifest)
self.__repotags = self.get_image_repotags()
self._config = self.get_image_config(self._manifest)
self.__history = self.get_image_history(self._config)
layer_paths = self.get_image_layers(self._manifest)
layer_diffs = self.get_diff_ids(self._config)
checksum_type = self.get_diff_checksum_type(self._config)
while layer_diffs and layer_paths:
layer = ImageLayer(layer_diffs.pop(0), layer_paths.pop(0))
layer.set_checksum(checksum_type, layer.diff_id)
layer.gen_fs_hash()
self._layers.append(layer)
self.set_layer_created_by()
except NameError as e:
raise NameError(e) from e
except subprocess.CalledProcessError as e:
raise subprocess.CalledProcessError(
e.returncode, cmd=e.cmd, output=e.output, stderr=e.stderr)
except IOError as e:
raise IOError(e) from e