Skip to content

Commit

Permalink
Phase name check (#893)
Browse files Browse the repository at this point in the history
* Introducing phase name check for disallowed chars and removing check from runner

* Moving unique container name check to schema checker

* Duplicate phase check refactored; Added tests

* Refactored tests

* Schema Checker now also tests for empty values

* Fixed tests for macOS different tmp
  • Loading branch information
ArneTR authored Sep 27, 2024
1 parent c510ed4 commit 65f956e
Show file tree
Hide file tree
Showing 10 changed files with 312 additions and 75 deletions.
102 changes: 64 additions & 38 deletions lib/schema_checker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
import string
import re
from schema import Schema, SchemaError, Optional, Or, Use
from schema import Schema, SchemaError, Optional, Or, Use, And, Regex
#
# networks documentation is different than what i see in the wild!
# name: str
Expand All @@ -26,11 +26,18 @@ def is_valid_string(self, value):
valid_chars = set(string.ascii_letters + string.digits + '_' + '-')
if not set(value).issubset(valid_chars):
raise SchemaError(f"{value} does not use valid characters! (a-zA-Z0-9_-)")
return value

def contains_no_invalid_chars(self, value):
bad_values = re.findall(r'(\.\.|\$|\'|"|`|!)', value)
if bad_values:
raise SchemaError(f"{value} includes disallowed values: {bad_values}")
return value

def not_empty(self, value):
if value.strip() == '':
raise SchemaError('Value cannot be empty')
return value

## If we ever want smarter validation for ports, here's a list of examples that we need to support:
# - 3000
Expand Down Expand Up @@ -67,104 +74,123 @@ def valid_service_types(self, value):
raise SchemaError(f"{value} is not 'container'")
return value

def validate_networks_no_invalid_chars(self, networks):
if isinstance(networks, list):
for item in networks:
def validate_networks_no_invalid_chars(self, value):
if isinstance(value, list):
for item in value:
if item is not None:
self.contains_no_invalid_chars(item)
elif isinstance(networks, dict):
for key, value in networks.items():
elif isinstance(value, dict):
for key, item in value.items():
self.contains_no_invalid_chars(key)
if value is not None:
self.contains_no_invalid_chars(value)
if item is not None:
self.contains_no_invalid_chars(item)
else:
raise SchemaError("'networks' should be a list or a dictionary")

return value

def check_usage_scenario(self, usage_scenario):
# Anything with Optional() is not needed, but if it exists must conform to the definition specified
usage_scenario_schema = Schema({
"name": str,
"author": str,
"description": str,
"author": And(str, Use(self.not_empty)),
"description": And(str, Use(self.not_empty)),

Optional("networks"): Or(list, dict),

Optional("services"): {
Use(self.contains_no_invalid_chars): {
Optional("type"): Use(self.valid_service_types),
Optional("image"): str,
Optional("build"): Or(Or({str:str},list),str),
Optional("image"): And(str, Use(self.not_empty)),
Optional("build"): Or(Or({And(str, Use(self.not_empty)):And(str, Use(self.not_empty))},list),And(str, Use(self.not_empty))),
Optional("networks"): self.single_or_list(Use(self.contains_no_invalid_chars)),
Optional("environment"): self.single_or_list(Or(dict,str)),
Optional("ports"): self.single_or_list(Or(str, int)),
Optional("depends_on"): Or([str],dict),
Optional("environment"): self.single_or_list(Or(dict,And(str, Use(self.not_empty)))),
Optional("ports"): self.single_or_list(Or(And(str, Use(self.not_empty)), int)),
Optional("depends_on"): Or([And(str, Use(self.not_empty))],dict),
Optional('container_name'): And(str, Use(self.not_empty)),
Optional("healthcheck"): {
Optional('test'): Or(list, str),
Optional('interval'): str,
Optional('timeout'): str,
Optional('test'): Or(list, And(str, Use(self.not_empty))),
Optional('interval'): And(str, Use(self.not_empty)),
Optional('timeout'): And(str, Use(self.not_empty)),
Optional('retries'): int,
Optional('start_period'): str,
Optional('start_interval'): str,
Optional('start_period'): And(str, Use(self.not_empty)),
Optional('start_interval'): And(str, Use(self.not_empty)),
Optional('disable'): bool,
},
Optional("setup-commands"): [str],
Optional("setup-commands"): [And(str, Use(self.not_empty))],
Optional("volumes"): self.single_or_list(str),
Optional("folder-destination"):str,
Optional("command"): str,
Optional("folder-destination"):And(str, Use(self.not_empty)),
Optional("command"): And(str, Use(self.not_empty)),
Optional("log-stdout"): bool,
Optional("log-stderr"): bool,
Optional("read-notes-stdout"): bool,
Optional("read-sci-stdout"): bool,
}
},

"flow": [{
"name": str,
"container": Use(self.contains_no_invalid_chars),
"flow": [{
"name": And(str, Use(self.not_empty), Regex(r'^[\.\s0-9a-zA-Z_\(\)-]+$')),
"container": And(str, Use(self.not_empty), Use(self.contains_no_invalid_chars)),
"commands": [{
"type":"console",
"command": str,
"command": And(str, Use(self.not_empty)),
Optional("detach"): bool,
Optional("note"): str,
Optional("note"): And(str, Use(self.not_empty)),
Optional("read-notes-stdout"): bool,
Optional("read-sci-stdout"): bool,
Optional("ignore-errors"): bool,
Optional("shell"): str,
Optional("shell"): And(str, Use(self.not_empty)),
Optional("log-stdout"): bool,
Optional("log-stderr"): bool,
}],

}],

Optional("compose-file"): Use(self.validate_compose_include)
}, ignore_extra_keys=True)


# First we check the general structure. Otherwise we later cannot even iterate over it
try:
usage_scenario_schema.validate(usage_scenario)
except SchemaError as e: # This block filters out the too long error message that include the parsing structure
if len(e.autos) > 2:
raise SchemaError(e.autos[2:]) from e
raise SchemaError(e.autos) from e


# This check is necessary to do in a seperate pass. If tried to bake into the schema object above,
# it will not know how to handle the value passed when it could be either a dict or list
if 'networks' in usage_scenario:
self.validate_networks_no_invalid_chars(usage_scenario['networks'])

for service_name in usage_scenario.get('services'):
service = usage_scenario['services'][service_name]
known_container_names = []
for service_name, service in usage_scenario.get('services').items():
if 'container_name' in service:
container_name = service['container_name']
else:
container_name = service_name

if container_name in known_container_names:
raise SchemaError(f"Container name '{container_name}' was already used. Please choose unique container names.")
known_container_names.append(container_name)

if 'image' not in service and 'build' not in service:
raise SchemaError(f"The 'image' key for service '{service_name}' is required when 'build' key is not present.")
if 'cmd' in service:
raise SchemaError(f"The 'cmd' key for service '{service_name}' is not supported anymore. Please migrate to 'command'")

# Ensure that flow names are unique
flow_names = [flow['name'] for flow in usage_scenario['flow']]
if len(flow_names) != len(set(flow_names)):
raise SchemaError("The 'name' field in 'flow' must be unique.")

known_flow_names = []
for flow in usage_scenario['flow']:
if flow['name'] in known_flow_names:
raise SchemaError(f"The 'name' field in 'flow' must be unique. '{flow['name']}' was already used.")
known_flow_names.append(flow['name'])

for command in flow['commands']:
if 'read-sci-stdout' in command and 'log-stdout' not in command:
raise SchemaError(f"You have specified `read-sci-stdout` in flow {flow['name']} but not set `log-stdout` to True.")


usage_scenario_schema.validate(usage_scenario)


# if __name__ == '__main__':
# import yaml
Expand Down
13 changes: 2 additions & 11 deletions runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,19 +702,13 @@ def setup_services(self):
# Check if there are service dependencies defined with 'depends_on'.
# If so, change the order of the services accordingly.
services_ordered = self.order_services(services)
known_container_names = []
for service_name, service in services_ordered.items():

if 'container_name' in service:
container_name = service['container_name']
else:
container_name = service_name

if container_name in known_container_names:
raise RuntimeError(f"Container name '{container_name}' was already assigned. Please choose unique container names.")

known_container_names.append(container_name)

print(TerminalColors.HEADER, '\nSetting up container for service:', service_name, TerminalColors.ENDC)
print('Container name:', container_name)

Expand Down Expand Up @@ -1078,9 +1072,6 @@ def start_phase(self, phase, transition = True):
phase_time = int(time.time_ns() / 1_000)
self.__notes_helper.add_note({'note': f"Starting phase {phase}", 'detail_name': '[NOTES]', 'timestamp': phase_time})

if phase in self.__phases:
raise RuntimeError(f"'{phase}' as phase name has already used. Please set unique name for phases.")

self.__phases[phase] = {'start': phase_time, 'name': phase}

def end_phase(self, phase):
Expand Down Expand Up @@ -1121,7 +1112,7 @@ def run_flows(self):
print(TerminalColors.HEADER, '\nRunning flow: ', flow['name'], TerminalColors.ENDC)

try:
self.start_phase(flow['name'].replace('[', '').replace(']',''), transition=False)
self.start_phase(flow['name'], transition=False)

for cmd_obj in flow['commands']:
self.check_total_runtime_exceeded()
Expand Down Expand Up @@ -1200,7 +1191,7 @@ def run_flows(self):
if self._debugger.active:
self._debugger.pause('Waiting to start next command in flow')

self.end_phase(flow['name'].replace('[', '').replace(']',''))
self.end_phase(flow['name'])
self.__ps_to_kill += ps_to_kill_tmp
self.__ps_to_read += ps_to_read_tmp # will otherwise be discarded, bc they confuse execption handling
self.check_process_returncodes()
Expand Down
33 changes: 33 additions & 0 deletions tests/data/usage_scenarios/duplicate_phase_name.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
name: Container name invalid test
author: Arne Tarara
description: Container name invalid test

services:
highload-api-cont:
image: alpine
container_name: number-1


flow:
- name: This phase is ok
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
- name: This phase is also ok
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
- name: This phase is ok
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
18 changes: 18 additions & 0 deletions tests/data/usage_scenarios/empty_container_name.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
name: Container name invalid test
author: Arne Tarara
description: Container name invalid test

services:
highload-api-:cont:
image: alpine
container_name:

flow:
- name: Small-Stress
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
24 changes: 24 additions & 0 deletions tests/data/usage_scenarios/empty_phase_name.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
name: Container name invalid test
author: Arne Tarara
description: Container name invalid test

services:
highload-api-cont:
image: alpine
container_name: number-1


flow:
- name: This phase is ok
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
- name: ""
container: highload-api-cont
commands:
- type: console
command: echo "asd"
26 changes: 26 additions & 0 deletions tests/data/usage_scenarios/invalid_phase_name.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
name: Container name invalid test
author: Arne Tarara
description: Container name invalid test

services:
highload-api-cont:
image: alpine
container_name: number-1


flow:
- name: This phase is ok
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
- name: This phase is / not ok!
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
26 changes: 26 additions & 0 deletions tests/data/usage_scenarios/invalid_phase_name_runtime.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
---
name: Container name invalid test
author: Arne Tarara
description: Container name invalid test

services:
highload-api-cont:
image: alpine
container_name: number-1


flow:
- name: This phase is ok
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
- name: "[RUNTIME]"
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
18 changes: 18 additions & 0 deletions tests/data/usage_scenarios/none_container_name.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
name: Container name invalid test
author: Arne Tarara
description: Container name invalid test

services:
highload-api-:cont:
image: alpine
container_name:

flow:
- name: Small-Stress
container: highload-api-cont
commands:
- type: console
command: echo "asd"
shell: bash
note: Starting a little stress
2 changes: 1 addition & 1 deletion tests/lib/test_schema_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_schema_checker_invalid_missing_description():

expected_exception = "Missing key: 'description'"
assert expected_exception in str(error.value), \
Tests.assertion_info(f"Exception: {expected_exception}", str(error.value))
Tests.assertion_info(expected_exception, str(error.value))


def test_schema_checker_invalid_image_req_when_no_build():
Expand Down
Loading

0 comments on commit 65f956e

Please sign in to comment.