Skip to content

Commit

Permalink
Replace manual dfile parser with dockerfile-parse
Browse files Browse the repository at this point in the history
This is a large commit that removes unnecessary manual parsing from
tern/analyze/docker/dockerfile.py so that other parts of the code can
utilize the built-in parsing abilities from the DockerfileParse module.
While many of the changes from this commit remove no longer necessary
functions, a few helper functions were also added.

Functions Removed:
- get_command_list()
- get_directive()
- get_directive_list()
- get_base_instructions()
- get_base_image_tag()

Functions Added:
- update_parent_images(
  If the FROM line in a Dockerfile contains a variable previously
  defined by the ARG command, the function expand_arg will take
  care of the replacement and update the dockerfile object structure.
  When this happens, we also need to update the parent_images list
  property of the Dockerfile object in case one of the ARG variables
  is part of the FROM line.

- get_command_list()
  Returns a list of commands from the dockerfile object structure
  provided. Useful when it's unrealistic to loop through the entire
  dictionary looking for a certain command.

Resolves tern-tools#522

Signed-off-by: Rose Judge <[email protected]>
  • Loading branch information
rnjudge committed Mar 27, 2020
1 parent c7f5094 commit d3c1829
Showing 1 changed file with 24 additions and 114 deletions.
138 changes: 24 additions & 114 deletions tern/analyze/docker/dockerfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,20 @@ def expand_arg(dfobj):
if arg_dict:
for obj in dfobj.structure:
replace_env(arg_dict, obj)
# Update dfobj parent image just in case ARG value was used in FROM line
update_parent_images(dfobj)


def update_parent_images(dfobj):
'''Given a Dockerfile object, update the parent_images list. This function
will be useful after ARG values have been replaced in expand_arg() that
can sometimes affect the FROM line(s) of a Dockerfile.'''
new_parent_list = []
for cmd_dict in dfobj.structure:
if cmd_dict['instruction'] == 'FROM':
new_parent_list.append(re.split(" as", cmd_dict['value'],
flags=re.IGNORECASE)[0])
dfobj.parent_images = new_parent_list


def parse_from_image(dfobj):
Expand Down Expand Up @@ -204,117 +218,15 @@ def package_in_dockerfile(command_dict, pkg_name):
return False


def get_command_list(dockerfile_name):
'''Given a Dockerfile, return a list of Docker commands'''
with open(dockerfile_name) as f:
contents = f.read()
dockerfile_lines = contents.split('\n')
command_list = []
command = ''
command_cont = False
for line in dockerfile_lines:
# check if this line is a continuation of the previous line
# it should not be a comment
if command_cont:
if comments.match(line) is not None:
command = command + line
# check if this line has an indentation
# comments don't count
command_cont = bool(line_indent.match(line))

# check if there is a command or not
if command == '':
directive = line.split(' ', 1)[0]
if directive in directives:
command = line
# check if there is continuation or not and if the command is still
# non-empty
if not command_cont and command != '':
command_list.append(command)
command = ''

return command_list


def get_directive(line):
'''Given a line from a Dockerfile get the Docker directive
eg: FROM, ADD, COPY, RUN and the object in the form of a tuple'''
directive_and_action = line.split(' ', 1)
return (directive_and_action[0], directive_and_action[1])


def get_directive_list(command_list):
'''Given a list of docker commands extracted from a Dockerfile,
provide a list of tuples containing the Docker directive
i.e. FROM, ADD, COPY etc and the object to be acted upon'''
directive_list = []
for command in command_list:
directive_list.append(get_directive(general.clean_command(command)))
return directive_list


def get_base_instructions(instructions):
'''Given a list of docker build instructions get a list of instructions
related to the base instructions
Possible docker instructions related to the base image:
FROM <base image>
FROM <image:tag>
ARG <key value pair>
FROM <key>
Dockerfile rules say that the only instruction that can precede FROM is
ARG'''
base_instructions = []
# check if the first instruction is FROM
if instructions[0][0] == 'FROM':
base_instructions.append(instructions[0])
# check if the first instruction is ARG
if instructions[0][0] == 'ARG':
# collect all ARGS until FROM
count = 0
while instructions[count][0] != 'FROM':
base_instructions.append(instructions[count])
count = count + 1
# get the form statement
base_instructions.append(instructions[count])
return base_instructions


def get_base_image_tag(base_instructions):
'''Given the base docker instructions, return the base image and tag
as a tuple
This involves finding the ARG key value pair and then replacing it
if it occurs in the image part
NOTE: Dockerfile rules say that if no --build-arg is passed during
docker build and ARG has no default, the build will fail. We assume
for now that we will not be passing build arguments in which case
if there is no default ARG, we will raise an exception indicating that
since the build arguments are determined by the user we will not
be able to determine what the user wanted'''
# get all the ARG key-value pairs
build_args = {}
from_instruction = ''
for instruction in base_instructions:
if instruction[0] == 'FROM':
from_instruction = instruction[1]
else:
key_value = instruction[1].split('=')
# raise error if there is no default value
if len(key_value) == 1:
raise ValueError('No ARG default value.'
' Unable to determine base image')
build_args.update({key_value[0]: key_value[1]})
# replace any variables in FROM with value
from_instruction = re.sub(bash_var, '', from_instruction)
for key, value in build_args.items():
from_instruction = from_instruction.replace(key, value)
# check if the base image has a tag
image_tag_list = from_instruction.split(tag_separator)
if len(image_tag_list) == 1:
image_tag_list.append('')
return tuple(image_tag_list)
def get_command_list(dfobj_structure):
'''Given a dockerfile object structure, return the list of commands
from the list of dictionaries. Useful when you don't want to loop
through the dictionary for commands'''
cmd_list = []
for cmd_dict in dfobj_structure:
if cmd_dict['instruction'] != 'COMMENT':
cmd_list.append(cmd_dict['content'].rstrip())
return cmd_list


def find_git_info(line, dockerfile_path):
Expand Down Expand Up @@ -361,10 +273,8 @@ def expand_add_command(dfobj):
def create_locked_dockerfile(dfobj):
'''Given a dockerfile object, the information in a new Dockerfile object
Copy the dfobj info to the destination output Dockerfile location'''
# packages in RUN lines, ENV, and ARG values are already expanded
expand_from_images(dfobj)
# packages in run lines are already expanded
expand_vars(dfobj)
expand_arg(dfobj)
expand_add_command(dfobj)
# create the output file
dfile = ''
Expand Down

0 comments on commit d3c1829

Please sign in to comment.