From 696e69874a3bcc758e9c0c495b52095771573af3 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz <effigies@gmail.com> Date: Wed, 8 May 2024 15:54:42 -0400 Subject: [PATCH 1/2] fix(datalad): Decompose subprocess.run() for logging --- .../datalad_service/tasks/validator.py | 51 ++++++++++++------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/services/datalad/datalad_service/tasks/validator.py b/services/datalad/datalad_service/tasks/validator.py index 797d55d3a4..811ee0e017 100644 --- a/services/datalad/datalad_service/tasks/validator.py +++ b/services/datalad/datalad_service/tasks/validator.py @@ -35,6 +35,25 @@ def setup_validator(): subprocess.run(['yarn']) +def run_and_decode(args, timeout, esLogger): + """Run a subprocess and return the JSON output.""" + process = gevent.subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + try: + process.wait(timeout=timeout) + except subprocess.TimeoutExpired: + process.kill() + + # Retrieve what we can from the process + stdout, stderr = process.communicate() + + # If we have a whole JSON document from stdout, then return it + # Otherwise, log the error and return None + try: + return json.loads(escape_ansi(stdout.decode('utf-8'))) + except json.decoder.JSONDecodeError as err: + esLogger.log(stdout, stderr, err) + + def validate_dataset_sync(dataset_path, ref, esLogger): """ Synchronous dataset validation. @@ -42,32 +61,26 @@ def validate_dataset_sync(dataset_path, ref, esLogger): Runs the bids-validator process and installs node dependencies if needed. """ setup_validator() - try: - process = gevent.subprocess.run( - ['./node_modules/.bin/bids-validator', '--json', '--ignoreSubjectConsistency', dataset_path], stdout=subprocess.PIPE, timeout=300) - return json.loads(process.stdout) - except subprocess.TimeoutExpired as err: - esLogger.log(process.stdout, process.stderr, err) - except json.decoder.JSONDecodeError as err: - esLogger.log(process.stdout, process.stderr, err) + return run_and_decode( + ['./node_modules/.bin/bids-validator', '--json', '--ignoreSubjectConsistency', dataset_path], + timeout=300, + esLogger=esLogger, + ) def validate_dataset_deno_sync(dataset_path, ref, esLogger): """ Synchronous dataset validation. - Runs the bids-validator process and installs node dependencies if needed. + Runs the deno bids-validator process. """ - try: - process = gevent.subprocess.run( - ['deno', 'run', '-A', - f'https://deno.land/x/bids_validator@{DENO_VALIDATOR_VERSION}/bids-validator.ts', - '--json', dataset_path], stdout=subprocess.PIPE, timeout=300) - return json.loads(escape_ansi(process.stdout.decode('utf-8'))) - except subprocess.TimeoutExpired as err: - esLogger.log(process.stdout, process.stderr, err) - except json.decoder.JSONDecodeError as err: - esLogger.log(process.stdout, process.stderr, err) + return run_and_decode( + ['deno', 'run', '-A', + f'https://deno.land/x/bids_validator@{DENO_VALIDATOR_VERSION}/bids-validator.ts', + '--json', dataset_path], + timeout=300, + esLogger=esLogger, + ) def summary_mutation(dataset_id, ref, validator_output, validator_metadata): From ce8ee3015aeb01d29697d5e22f5eba1e01a5e962 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz <effigies@gmail.com> Date: Wed, 8 May 2024 16:50:07 -0400 Subject: [PATCH 2/2] fix(test): Mock Popen() instead of run() --- services/datalad/tests/test_validator.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/services/datalad/tests/test_validator.py b/services/datalad/tests/test_validator.py index 0b7c5431ac..9bcdb58d82 100644 --- a/services/datalad/tests/test_validator.py +++ b/services/datalad/tests/test_validator.py @@ -21,8 +21,12 @@ def test_validator_error(new_dataset): @pytest.fixture def mock_validator_crash(monkeypatch): def return_bad_json(*args, **kwargs): - return SimpleNamespace(stdout='{invalidJson', stderr='') - monkeypatch.setattr(subprocess, 'run', return_bad_json) + return SimpleNamespace( + wait=lambda timeout=None: None, + kill=lambda: None, + communicate=lambda: (b'{invalidJson', b'') + ) + monkeypatch.setattr(subprocess, 'Popen', return_bad_json) def test_validator_bad_json(new_dataset, mock_validator_crash):