Skip to content

Commit

Permalink
apply new, more generic analysis format (#503)
Browse files Browse the repository at this point in the history
apply new, more generic analysis format
  • Loading branch information
Konstanty Cieśliński authored Apr 28, 2021
1 parent e1a1fa9 commit 4a9e713
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 20 deletions.
7 changes: 5 additions & 2 deletions drakcore/drakcore/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def wrapper(self: Karton, *args, **kwargs):
class AnalysisProcessor(Karton):
version = DRAKCORE_VERSION
identity = "karton.drakrun.processor"
filters = [{"type": "analysis", "kind": "drakrun"}]
filters = [{"type": "analysis-raw", "kind": "drakrun-internal"}]

def __init__(self, config, enabled_plugins):
super().__init__(config)
Expand Down Expand Up @@ -102,9 +102,12 @@ def process(self):

task = Task({
"type": "analysis",
"kind": "drakrun-processed",
"kind": "drakrun",
})

# Add metadata information about dumps within dumps.zip
task.add_payload("dumps_metadata", self.current_task.get_payload("dumps_metadata"))

for (name, resource) in task_resources.items():
task.add_payload(name, resource)
self.send_task(task)
Expand Down
38 changes: 30 additions & 8 deletions drakrun/drakrun/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ class DrakrunKarton(Karton):
}
]
DEFAULT_HEADERS = {
"type": "analysis",
"kind": "drakrun",
"type": "analysis-raw",
"kind": "drakrun-internal",
}

# Filters and headers used for testing sample analysis
Expand All @@ -128,7 +128,7 @@ class DrakrunKarton(Karton):
]
DEFAULT_TEST_HEADERS = {
"type": "analysis-test",
"kind": "drakrun",
"kind": "drakrun-internal",
}

def __init__(self, config: Config, instance_id: int):
Expand Down Expand Up @@ -287,12 +287,19 @@ def crop_dumps(self, dirpath, target_zip):
max_total_size = 300 * 1024 * 1024 # 300 MB
current_size = 0

dumps_metadata = []
for _, path, size in sorted(entries):
current_size += size

if current_size <= max_total_size:
# Store files under dumps/
zipf.write(path, os.path.join("dumps", os.path.basename(path)))
file_basename = os.path.basename(path)
if re.fullmatch(r"[a-f0-9]{4,16}_[a-f0-9]{16}", file_basename):
# If file is memory dump then append metadata that can be
# later attached as payload when creating an `analysis` task.
dump_base = self._get_base_from_drakrun_dump(file_basename)
dumps_metadata.append({"filename": os.path.join("dumps", file_basename), "base_address": dump_base})
zipf.write(path, os.path.join("dumps", file_basename))
os.unlink(path)

# No dumps, force empty directory
Expand All @@ -301,6 +308,15 @@ def crop_dumps(self, dirpath, target_zip):

if current_size > max_total_size:
self.log.error('Some dumps were deleted, because the configured size threshold was exceeded.')
return dumps_metadata

def _get_base_from_drakrun_dump(self, dump_name):
"""
Drakrun dumps come in form: <base>_<hash> e.g. 405000_688f58c58d798ecb,
that can be read as a dump from address 0x405000 with a content hash
equal to 688f58c58d798ecb.
"""
return hex(int(dump_name.split("_")[0], 16))

def update_vnc_info(self):
"""
Expand Down Expand Up @@ -351,7 +367,11 @@ def build_profile_payload(self) -> Dict[str, LocalResource]:

return Resource.from_directory(name="profiles", directory_path=tmp_dir)

def send_analysis(self, sample, outdir, metadata, quality):
def send_raw_analysis(self, sample, outdir, metadata, dumps_metadata, quality):
"""
Offload drakrun-prod by sending raw analysis output to be processed by
drakrun.processor.
"""
payload = {"analysis_uid": self.analysis_uid}
payload.update(metadata)

Expand All @@ -364,6 +384,7 @@ def send_analysis(self, sample, outdir, metadata, quality):

task = Task(headers, payload=payload)
task.add_payload('sample', sample)
task.add_payload('dumps_metadata', dumps_metadata)

if self.test_run:
task.add_payload('testcase', self.current_task.payload['testcase'])
Expand Down Expand Up @@ -644,8 +665,9 @@ def process(self, task: Task):

self.log.info("Analysis done. Collecting artifacts...")

# Make sure dumps have a reasonable size
self.crop_dumps(os.path.join(outdir, 'dumps'), os.path.join(outdir, 'dumps.zip'))
# Make sure dumps have a reasonable size.
# Calculate dumps_metadata as it's required by the `analysis` task format.
dumps_metadata = self.crop_dumps(os.path.join(outdir, 'dumps'), os.path.join(outdir, 'dumps.zip'))

# Compress IPT traces, they're quite large however they compress well
self.compress_ipt(os.path.join(outdir, 'ipt'), os.path.join(outdir, 'ipt.zip'))
Expand All @@ -656,7 +678,7 @@ def process(self, task: Task):
f.write(json.dumps(metadata))

quality = task.headers.get("quality", "high")
self.send_analysis(sample, outdir, metadata, quality)
self.send_raw_analysis(sample, outdir, metadata, dumps_metadata, quality)


def validate_xen_commandline():
Expand Down
19 changes: 9 additions & 10 deletions drakrun/drakrun/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,33 +65,32 @@ class RegressionTester(Karton):
filters = [
{
"type": "analysis-test",
"kind": "drakrun",
"kind": "drakrun-internal",
},
]

def __init__(self, config: Config):
super().__init__(config)

def analyze_dumps(self, sample, dump_dir):
def analyze_dumps(self, sample, dump_dir, dumps_metadata):
manager = ExtractManager(ExtractorModules(self.config.config['draktestd']['modules']))
dumps = Path(dump_dir) / "dumps"
family = None
for f in dumps.glob("*.metadata"):
with open(f, "rb") as metafile:
metadata = json.load(metafile)
va = int(metadata["DumpAddress"], 16)
name = dumps / metadata["DataFileName"]
for dump_metadata in dumps_metadata:
dump_path = os.path.join(dump_dir, dump_metadata["filename"])
va = int(dump_metadata["base_address"], 16)

with changedLogLevel(logging.getLogger(), logging.ERROR):
res = manager.push_file(name, base=va)
res = manager.push_file(dump_path, base=va)
family = family or res
return family

def process(self, task: Task):
dumps = task.get_resource("dumps.zip")
dumps_metadata = task.get_payload("dumps_metadata")
sample = task.get_resource("sample")

with dumps.extract_temporary() as temp:
family = self.analyze_dumps(sample, temp)
family = self.analyze_dumps(sample, temp, dumps_metadata)

testcase = TestCase.from_json(task.payload["testcase"])
expected_family = testcase.ripped
Expand Down

0 comments on commit 4a9e713

Please sign in to comment.