Skip to content

Commit

Permalink
analyze: dockerfile: Parse with dockerfile_parse
Browse files Browse the repository at this point in the history
This is work towards tern-tools#522

We add initial functionality for parsing dockerfiles using
dockerfile_parse. We also add some tests for the functions therein.
A key feature of using dockerfile_parse is that we can now do
variable expansion i.e. for ENV instructions, replace the keys
with the values for the content in the dockerfile. This allows for
more accurate analysis of possible packages installed using scripts
that don't use a system package manager.

In order to test the functions, we also added some example
dockerfiles we would test against. They vary in complexity.

We added the new test to the ci test suite and the dockerfile-parse
module to requirements.txt.

Signed-off-by: Nisha K <[email protected]>
  • Loading branch information
Nisha K committed Jan 22, 2020
1 parent 4cbe9ab commit 97e21f0
Show file tree
Hide file tree
Showing 6 changed files with 235 additions and 5 deletions.
6 changes: 3 additions & 3 deletions ci/test_files_touched.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2019 VMware, Inc. All Rights Reserved.
# Copyright (c) 2019-2020 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause

from git import Repo
Expand Down Expand Up @@ -72,6 +72,7 @@
'tern -l report -i centos:7'],
# tern/analyze/docker
re.compile('tern/analyze/docker'): [
'python tests/test_analyze_docker_dockerfile.py',
'tern -l report -i golang:alpine',
'tern -l report -d samples/alpine_python/Dockerfile'],
# tern/report
Expand Down Expand Up @@ -106,8 +107,7 @@
re.compile('tests/test_class_package.py'):
['python tests/test_class_package.py'],
re.compile('tests/test_class_template.py'):
['python tests/test_class_template.py']
}
['python tests/test_class_template.py']}

alltests = []
for change in changes:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

PyYAML>=5.2
docker~=4.1
dockerfile-parse~=0.0
requests~=2.22
stevedore>=1.31
pbr>=5.4
18 changes: 18 additions & 0 deletions samples/example_dockerfiles/buildpack_deps_jesse_curl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM debian:jessie

RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
netbase \
wget \
&& rm -rf /var/lib/apt/lists/*

RUN set -ex; \
if ! command -v gpg > /dev/null; then \
apt-get update; \
apt-get install -y --no-install-recommends \
gnupg \
dirmngr \
; \
rm -rf /var/lib/apt/lists/*; \
fi
50 changes: 50 additions & 0 deletions samples/example_dockerfiles/golang_1.13_stretch
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
FROM buildpack-deps:stretch-scm

# gcc for cgo
RUN apt-get update && apt-get install -y --no-install-recommends \
g++ \
gcc \
libc6-dev \
make \
pkg-config \
&& rm -rf /var/lib/apt/lists/*

ENV GOLANG_VERSION 1.13.6

RUN set -eux; \
\
# this "case" statement is generated via "update.sh"
dpkgArch="$(dpkg --print-architecture)"; \
case "${dpkgArch##*-}" in \
amd64) goRelArch='linux-amd64'; goRelSha256='a1bc06deb070155c4f67c579f896a45eeda5a8fa54f35ba233304074c4abbbbd' ;; \
armhf) goRelArch='linux-armv6l'; goRelSha256='37a1a83e363dcf146a67fa839d170fd1afb13009585fdd493d0a3370fbe6f785' ;; \
arm64) goRelArch='linux-arm64'; goRelSha256='0a18125c4ed80f9c3045cf92384670907c4796b43ed63c4307210fe93e5bbca5' ;; \
i386) goRelArch='linux-386'; goRelSha256='27feb013106da784f09e560720aa41ab395c67f7eed4c4a0fce04bc6e3d01c7d' ;; \
ppc64el) goRelArch='linux-ppc64le'; goRelSha256='26a977a8af5dc50a562f0a57b58dded5fa3bacfe77722cf8a84ea54ca54728dd' ;; \
s390x) goRelArch='linux-s390x'; goRelSha256='5cd9900a1fa0f0cac657930b648381cad9b8c5e2bbc77caf86a6fb5cedad0017' ;; \
*) goRelArch='src'; goRelSha256='aae5be954bdc40bcf8006eb77e8d8a5dde412722bc8effcdaf9772620d06420c'; \
echo >&2; echo >&2 "warning: current architecture ($dpkgArch) does not have a corresponding Go binary release; will be building from source"; echo >&2 ;; \
esac; \
\
url="https://golang.org/dl/go${GOLANG_VERSION}.${goRelArch}.tar.gz"; \
wget -O go.tgz "$url"; \
echo "${goRelSha256} *go.tgz" | sha256sum -c -; \
tar -C /usr/local -xzf go.tgz; \
rm go.tgz; \
\
if [ "$goRelArch" = 'src' ]; then \
echo >&2; \
echo >&2 'error: UNIMPLEMENTED'; \
echo >&2 'TODO install golang-any from jessie-backports for GOROOT_BOOTSTRAP (and uninstall after build)'; \
echo >&2; \
exit 1; \
fi; \
\
export PATH="/usr/local/go/bin:$PATH"; \
go version

ENV GOPATH /go
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH

RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH"
WORKDIR $GOPATH
74 changes: 72 additions & 2 deletions tern/analyze/docker/dockerfile.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2017-2019 VMware, Inc. All Rights Reserved.
# Copyright (c) 2017-2020 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause

"""
Dockerfile parser and information retrieval
Dockerfile information retrieval and modification
"""

from dockerfile_parse import DockerfileParser
import re

from tern.utils.general import clean_command
Expand Down Expand Up @@ -35,6 +36,75 @@
tag_separator = ':'


class Dockerfile():
''' This class is used as a wrapper to store dockerfile information
retrieved from the parser.'''
def __init__(self):
self.structure = None
self.envs = None
self.prev_env = None
self.filepath = ""
self.parent_images = []

def is_none(self):
"""Check if the object is empty."""
is_none = True
if (self.structure or
self.envs or
self.prev_env or
self.filepath):
is_none = False
return is_none


def get_dockerfile_obj(dockerfile_name, prev_env=None):
'''Given a Dockerfile, create a Dockerfile parser object to be used later.
dockerfile_name: This is the path to the Dockerfile including the
file name
prev_env: These are environment variables that may have been used in
previous stages in a multistage docker build. Should be a python dictionary
of the form {'ENV': 'value',...}'''
dfobj = Dockerfile()
with open(dockerfile_name) as f:
parser = DockerfileParser(parent_env=prev_env, fileobj=f)
dfobj.filepath = dockerfile_name
dfobj.structure = parser.structure
dfobj.envs = parser.envs
dfobj.prev_env = prev_env
dfobj.parent_images = parser.parent_images
return dfobj


def replace_env(key_value_dict, df_structure_dict):
'''Replace the environment variables in the key_value_dict dictionary
with its corresponding value in the df_line_dict dictionary
key_value_dict: a dictionary of key-value pairs like envs in the dockerfile
object
df_structure_dict: a dictionary from the dockerfile object's structure'''
for key, val in key_value_dict.items():
envvar1 = '$' + key
envvar2 = '${' + key + '}'
df_structure_dict['content'] = df_structure_dict['content'].replace(
envvar1, val)
df_structure_dict['content'] = df_structure_dict['content'].replace(
envvar2, val)
df_structure_dict['value'] = df_structure_dict['value'].replace(
envvar1, val)
df_structure_dict['value'] = df_structure_dict['value'].replace(
envvar2, val)


def expand_vars(dfobj):
'''Replace the environment variables with their values if known
dfobj: the Dockerfile object created using get_dockerfile_obj'''
if dfobj.envs:
for obj in dfobj.structure:
replace_env(dfobj.envs, obj)
if dfobj.prev_env:
for obj in dfobj.structure:
replace_env(dfobj.prev_env, obj)


def get_command_list(dockerfile_name):
'''Given a Dockerfile, return a list of Docker commands'''
with open(dockerfile_name) as f:
Expand Down
91 changes: 91 additions & 0 deletions tests/test_analyze_docker_dockerfile.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# -*- coding: utf-8 -*-
#
# Copyright (c) 2020 VMware, Inc. All Rights Reserved.
# SPDX-License-Identifier: BSD-2-Clause

import unittest

from tern.analyze.docker import dockerfile


class TestAnalyzeDockerDockerfile(unittest.TestCase):

def setUp(self):
self.buildpack = ('samples/example_dockerfiles/'
'buildpack_deps_jesse_curl')
self.golang = 'samples/example_dockerfiles/golang_1.13_stretch'

def tearDown(self):
del self.buildpack
del self.golang

def testDockerfileObject(self):
dfobj = dockerfile.Dockerfile()
self.assertTrue(dfobj.is_none())

def testDockerfileParserWithoutEnv(self):
dfobj = dockerfile.get_dockerfile_obj(self.buildpack)
self.assertFalse(dfobj.is_none())
self.assertEqual(dfobj.parent_images, ['debian:jessie'])
structure = [{'instruction': 'FROM',
'startline': 0,
'endline': 0,
'content': 'FROM debian:jessie\n',
'value': 'debian:jessie'},
{'instruction': 'RUN',
'startline': 2,
'endline': 7,
'content': ('RUN apt-get update && apt-get install -y --'
'no-install-recommends \\\n\t\tca-certific'
'ates \\\n\t\tcurl \\\n\t\tnetbase \\\n\t\tw'
'get \\\n\t&& rm -rf /var/lib/apt/lists/*'
'\n'),
'value': ('apt-get update && apt-get install -y --no-in'
'stall-recommends \t\tca-certificates \t\tcur'
'l \t\tnetbase \t\twget \t&& rm -rf /var/lib/'
'apt/lists/*')},
{'instruction': 'RUN',
'startline': 9,
'endline': 17,
'content': ('RUN set -ex; \\\n\tif ! command -v gpg > /'
'dev/null; then \\\n\t\tapt-get update; \\'
'\n\t\tapt-get install -y --no-install-reco'
'mmends \\\n\t\t\tgnupg \\\n\t\t\tdirmngr \\'
'\n\t\t; \\\n\t\trm -rf /var/lib/apt/lists/'
'*; \\\n\tfi\n'),
'value': ('set -ex; \tif ! command -v gpg > /dev/null; t'
'hen \t\tapt-get update; \t\tapt-get install -'
'y --no-install-recommends \t\t\tgnupg \t\t\td'
'irmngr \t\t; \t\trm -rf /var/lib/apt/lists/*'
'; \tfi')}]
self.assertEqual(dfobj.structure, structure)
self.assertFalse(dfobj.envs)

def testDockerfileParserWithEnv(self):
dfobj = dockerfile.get_dockerfile_obj(self.buildpack,
{'buildno': '123abc'})
self.assertFalse(dfobj.is_none())
self.assertEqual(dfobj.prev_env, {'buildno': '123abc'})

def testReplaceEnv(self):
dfobj = dockerfile.get_dockerfile_obj(self.golang)
envs = {'GOLANG_VERSION': '1.13.6',
'GOPATH': '/go',
'PATH': '/go/bin:/usr/local/go/bin:'}
self.assertEqual(dfobj.envs, envs)
struct = dfobj.structure[9]
dockerfile.replace_env(dfobj.envs, struct)
self.assertEqual(struct['content'], 'WORKDIR /go\n')
self.assertEqual(struct['value'], '/go')
replace_content = ('\n\turl="https://golang.org/dl/go1.13.6.'
'${goRelArch}.tar.gz"; ')
replace_value = (' \t\turl="https://golang.org/dl/go1.13.6'
'.${goRelArch}.tar.gz"')
struct = dfobj.structure[5]
dockerfile.replace_env(dfobj.envs, struct)
self.assertEqual(struct['content'].split('\\')[14], replace_content)
self.assertEqual(struct['value'].split(';')[28], replace_value)


if __name__ == '__main__':
unittest.main()

0 comments on commit 97e21f0

Please sign in to comment.