From 696e69874a3bcc758e9c0c495b52095771573af3 Mon Sep 17 00:00:00 2001
From: Chris Markiewicz <effigies@gmail.com>
Date: Wed, 8 May 2024 15:54:42 -0400
Subject: [PATCH 1/2] fix(datalad): Decompose subprocess.run() for logging

---
 .../datalad_service/tasks/validator.py        | 51 ++++++++++++-------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/services/datalad/datalad_service/tasks/validator.py b/services/datalad/datalad_service/tasks/validator.py
index 797d55d3a4..811ee0e017 100644
--- a/services/datalad/datalad_service/tasks/validator.py
+++ b/services/datalad/datalad_service/tasks/validator.py
@@ -35,6 +35,25 @@ def setup_validator():
         subprocess.run(['yarn'])
 
 
+def run_and_decode(args, timeout, esLogger):
+    """Run a subprocess and return the JSON output."""
+    process = gevent.subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    try:
+        process.wait(timeout=timeout)
+    except subprocess.TimeoutExpired:
+        process.kill()
+
+    # Retrieve what we can from the process
+    stdout, stderr = process.communicate()
+
+    # If we have a whole JSON document from stdout, then return it
+    # Otherwise, log the error and return None
+    try:
+        return json.loads(escape_ansi(stdout.decode('utf-8')))
+    except json.decoder.JSONDecodeError as err:
+        esLogger.log(stdout, stderr, err)
+
+
 def validate_dataset_sync(dataset_path, ref, esLogger):
     """
     Synchronous dataset validation.
@@ -42,32 +61,26 @@ def validate_dataset_sync(dataset_path, ref, esLogger):
     Runs the bids-validator process and installs node dependencies if needed.
     """
     setup_validator()
-    try:
-        process = gevent.subprocess.run(
-            ['./node_modules/.bin/bids-validator', '--json', '--ignoreSubjectConsistency', dataset_path], stdout=subprocess.PIPE, timeout=300)
-        return json.loads(process.stdout)
-    except subprocess.TimeoutExpired as err:
-        esLogger.log(process.stdout, process.stderr, err)
-    except json.decoder.JSONDecodeError as err:
-        esLogger.log(process.stdout, process.stderr, err)
+    return run_and_decode(
+        ['./node_modules/.bin/bids-validator', '--json', '--ignoreSubjectConsistency', dataset_path],
+        timeout=300,
+        esLogger=esLogger,
+    )
 
 
 def validate_dataset_deno_sync(dataset_path, ref, esLogger):
     """
     Synchronous dataset validation.
 
-    Runs the bids-validator process and installs node dependencies if needed.
+    Runs the deno bids-validator process.
     """
-    try:
-        process = gevent.subprocess.run(
-            ['deno', 'run', '-A',
-            f'https://deno.land/x/bids_validator@{DENO_VALIDATOR_VERSION}/bids-validator.ts',
-            '--json', dataset_path], stdout=subprocess.PIPE, timeout=300)
-        return json.loads(escape_ansi(process.stdout.decode('utf-8')))
-    except subprocess.TimeoutExpired as err:
-        esLogger.log(process.stdout, process.stderr, err)
-    except json.decoder.JSONDecodeError as err:
-        esLogger.log(process.stdout, process.stderr, err)
+    return run_and_decode(
+        ['deno', 'run', '-A',
+         f'https://deno.land/x/bids_validator@{DENO_VALIDATOR_VERSION}/bids-validator.ts',
+         '--json', dataset_path],
+        timeout=300,
+        esLogger=esLogger,
+    )
 
 
 def summary_mutation(dataset_id, ref, validator_output, validator_metadata):

From ce8ee3015aeb01d29697d5e22f5eba1e01a5e962 Mon Sep 17 00:00:00 2001
From: Chris Markiewicz <effigies@gmail.com>
Date: Wed, 8 May 2024 16:50:07 -0400
Subject: [PATCH 2/2] fix(test): Mock Popen() instead of run()

---
 services/datalad/tests/test_validator.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/services/datalad/tests/test_validator.py b/services/datalad/tests/test_validator.py
index 0b7c5431ac..9bcdb58d82 100644
--- a/services/datalad/tests/test_validator.py
+++ b/services/datalad/tests/test_validator.py
@@ -21,8 +21,12 @@ def test_validator_error(new_dataset):
 @pytest.fixture
 def mock_validator_crash(monkeypatch):
     def return_bad_json(*args, **kwargs):
-        return SimpleNamespace(stdout='{invalidJson', stderr='')
-    monkeypatch.setattr(subprocess, 'run', return_bad_json)
+        return SimpleNamespace(
+            wait=lambda timeout=None: None,
+            kill=lambda: None,
+            communicate=lambda: (b'{invalidJson', b'')
+        )
+    monkeypatch.setattr(subprocess, 'Popen', return_bad_json)
 
 
 def test_validator_bad_json(new_dataset, mock_validator_crash):