apply new, more generic analysis format (#503)

apply new, more generic analysis format
CERT-Polska · Apr 28, 2021 · 4a9e713 · 4a9e713
1 parent e1a1fa9
commit 4a9e713
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 20 deletions.
diff --git a/drakcore/drakcore/process.py b/drakcore/drakcore/process.py
@@ -64,7 +64,7 @@ def wrapper(self: Karton, *args, **kwargs):
 class AnalysisProcessor(Karton):
     version = DRAKCORE_VERSION
     identity = "karton.drakrun.processor"
-    filters = [{"type": "analysis", "kind": "drakrun"}]
+    filters = [{"type": "analysis-raw", "kind": "drakrun-internal"}]
 
     def __init__(self, config, enabled_plugins):
         super().__init__(config)
@@ -102,9 +102,12 @@ def process(self):
 
         task = Task({
             "type": "analysis",
-            "kind": "drakrun-processed",
+            "kind": "drakrun",
         })
 
+        # Add metadata information about dumps within dumps.zip
+        task.add_payload("dumps_metadata", self.current_task.get_payload("dumps_metadata"))
+
         for (name, resource) in task_resources.items():
             task.add_payload(name, resource)
         self.send_task(task)

diff --git a/drakrun/drakrun/main.py b/drakrun/drakrun/main.py
@@ -111,8 +111,8 @@ class DrakrunKarton(Karton):
         }
     ]
     DEFAULT_HEADERS = {
-        "type": "analysis",
-        "kind": "drakrun",
+        "type": "analysis-raw",
+        "kind": "drakrun-internal",
     }
 
     # Filters and headers used for testing sample analysis
@@ -128,7 +128,7 @@ class DrakrunKarton(Karton):
     ]
     DEFAULT_TEST_HEADERS = {
         "type": "analysis-test",
-        "kind": "drakrun",
+        "kind": "drakrun-internal",
     }
 
     def __init__(self, config: Config, instance_id: int):
@@ -287,12 +287,19 @@ def crop_dumps(self, dirpath, target_zip):
         max_total_size = 300 * 1024 * 1024  # 300 MB
         current_size = 0
 
+        dumps_metadata = []
         for _, path, size in sorted(entries):
             current_size += size
 
             if current_size <= max_total_size:
                 # Store files under dumps/
-                zipf.write(path, os.path.join("dumps", os.path.basename(path)))
+                file_basename = os.path.basename(path)
+                if re.fullmatch(r"[a-f0-9]{4,16}_[a-f0-9]{16}", file_basename):
+                    # If file is memory dump then append metadata that can be
+                    # later attached as payload when creating an `analysis` task.
+                    dump_base = self._get_base_from_drakrun_dump(file_basename)
+                    dumps_metadata.append({"filename": os.path.join("dumps", file_basename), "base_address": dump_base})
+                zipf.write(path, os.path.join("dumps", file_basename))
             os.unlink(path)
 
         # No dumps, force empty directory
@@ -301,6 +308,15 @@ def crop_dumps(self, dirpath, target_zip):
 
         if current_size > max_total_size:
             self.log.error('Some dumps were deleted, because the configured size threshold was exceeded.')
+        return dumps_metadata
+
+    def _get_base_from_drakrun_dump(self, dump_name):
+        """
+        Drakrun dumps come in form: <base>_<hash> e.g. 405000_688f58c58d798ecb,
+        that can be read as a dump from address 0x405000 with a content hash
+        equal to 688f58c58d798ecb.
+        """
+        return hex(int(dump_name.split("_")[0], 16))
 
     def update_vnc_info(self):
         """
@@ -351,7 +367,11 @@ def build_profile_payload(self) -> Dict[str, LocalResource]:
 
             return Resource.from_directory(name="profiles", directory_path=tmp_dir)
 
-    def send_analysis(self, sample, outdir, metadata, quality):
+    def send_raw_analysis(self, sample, outdir, metadata, dumps_metadata, quality):
+        """
+        Offload drakrun-prod by sending raw analysis output to be processed by
+        drakrun.processor.
+        """
         payload = {"analysis_uid": self.analysis_uid}
         payload.update(metadata)
 
@@ -364,6 +384,7 @@ def send_analysis(self, sample, outdir, metadata, quality):
 
         task = Task(headers, payload=payload)
         task.add_payload('sample', sample)
+        task.add_payload('dumps_metadata', dumps_metadata)
 
         if self.test_run:
             task.add_payload('testcase', self.current_task.payload['testcase'])
@@ -644,8 +665,9 @@ def process(self, task: Task):
 
         self.log.info("Analysis done. Collecting artifacts...")
 
-        # Make sure dumps have a reasonable size
-        self.crop_dumps(os.path.join(outdir, 'dumps'), os.path.join(outdir, 'dumps.zip'))
+        # Make sure dumps have a reasonable size.
+        # Calculate dumps_metadata as it's required by the `analysis` task format.
+        dumps_metadata = self.crop_dumps(os.path.join(outdir, 'dumps'), os.path.join(outdir, 'dumps.zip'))
 
         # Compress IPT traces, they're quite large however they compress well
         self.compress_ipt(os.path.join(outdir, 'ipt'), os.path.join(outdir, 'ipt.zip'))
@@ -656,7 +678,7 @@ def process(self, task: Task):
             f.write(json.dumps(metadata))
 
         quality = task.headers.get("quality", "high")
-        self.send_analysis(sample, outdir, metadata, quality)
+        self.send_raw_analysis(sample, outdir, metadata, dumps_metadata, quality)
 
 
 def validate_xen_commandline():

diff --git a/drakrun/drakrun/regression.py b/drakrun/drakrun/regression.py
@@ -65,33 +65,32 @@ class RegressionTester(Karton):
     filters = [
         {
             "type": "analysis-test",
-            "kind": "drakrun",
+            "kind": "drakrun-internal",
         },
     ]
 
     def __init__(self, config: Config):
         super().__init__(config)
 
-    def analyze_dumps(self, sample, dump_dir):
+    def analyze_dumps(self, sample, dump_dir, dumps_metadata):
         manager = ExtractManager(ExtractorModules(self.config.config['draktestd']['modules']))
-        dumps = Path(dump_dir) / "dumps"
         family = None
-        for f in dumps.glob("*.metadata"):
-            with open(f, "rb") as metafile:
-                metadata = json.load(metafile)
-            va = int(metadata["DumpAddress"], 16)
-            name = dumps / metadata["DataFileName"]
+        for dump_metadata in dumps_metadata:
+            dump_path = os.path.join(dump_dir, dump_metadata["filename"])
+            va = int(dump_metadata["base_address"], 16)
 
             with changedLogLevel(logging.getLogger(), logging.ERROR):
-                res = manager.push_file(name, base=va)
+                res = manager.push_file(dump_path, base=va)
                 family = family or res
         return family
 
     def process(self, task: Task):
         dumps = task.get_resource("dumps.zip")
+        dumps_metadata = task.get_payload("dumps_metadata")
         sample = task.get_resource("sample")
+
         with dumps.extract_temporary() as temp:
-            family = self.analyze_dumps(sample, temp)
+            family = self.analyze_dumps(sample, temp, dumps_metadata)
 
             testcase = TestCase.from_json(task.payload["testcase"])
             expected_family = testcase.ripped