-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(analyzers): Implement base64asciianalyzer (#172)
Thank you for your contribution(s) - especially for the tests, since, from my experience, these take most of the time to set up. All tests are green, the analyzer matches only what it should, codacy is happy with the code quality, so I am happy to merge this PR. fixes #166 ___ * feat(analyzers): implement base64asciianalyzer * test(analyzers) add test to base64analyzers * fix: use proper name __init__ * feat: add option to return decoded b64 from analyzer The b64asciianalyzer can now return either the original text or the decoded result * fix: add check for valid ascii characters by using the decode method on the byte string and catching exceptions, we can figure out if the result is actual ascii or other data. * update base64asciianalyzer tests for decode flag functionality * fix(test): correct order of equality comparisons The first parameter is always the expected result. The second one is the sample one. * fix(test): check for newline character The original test did not actually check for newline/linefeed character but for the literal string '\n'. Co-authored-by: Rico <[email protected]>
- Loading branch information
1 parent
6345639
commit b535781
Showing
5 changed files
with
216 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
# -*- coding: utf-8 -*- | ||
from .base64analyzer import Base64Analyzer | ||
from base64 import b64decode | ||
import binascii | ||
|
||
|
||
class Base64AsciiAnalyzer(Base64Analyzer): | ||
"""Analyzer to match base64 strings which decode to valid ASCII""" | ||
name = 'Base64AsciiAnalyzer' | ||
|
||
def __init__(self, actions, min_len=1, decode=False): | ||
super().__init__(actions, min_len) | ||
self.decode = decode | ||
|
||
def verify(self, results): | ||
"""Method to determine if found base64 decodes to valid ASCII""" | ||
# find valid base64 strings with the parent class | ||
validated_strings = super().verify(results) | ||
|
||
# go through each base64 string and attempt to decode | ||
base64_ascii_strings = [] | ||
|
||
for validated_string in validated_strings: | ||
# Check if the string is valid base64 | ||
try: | ||
decoded_string = b64decode(validated_string) | ||
except binascii.Error: | ||
# The string is no valid base64 | ||
continue | ||
|
||
# Check if the valid base64 decodes to plain ascii | ||
try: | ||
b64_ascii_string = decoded_string.decode('ascii') | ||
except UnicodeDecodeError: | ||
continue | ||
|
||
if self.decode: | ||
base64_ascii_strings.append(b64_ascii_string) | ||
else: | ||
base64_ascii_strings.append(validated_string) | ||
|
||
return base64_ascii_strings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,161 @@ | ||
# -*- coding: utf-8 -*- | ||
import unittest | ||
from unittest import mock | ||
|
||
from pastepwn.analyzers.base64asciianalyzer import Base64AsciiAnalyzer | ||
|
||
|
||
class TestBase64AsciiAnalyzer(unittest.TestCase): | ||
def setUp(self): | ||
self.analyzer = Base64AsciiAnalyzer(None) | ||
self.paste = mock.Mock() | ||
|
||
def test_match_positive(self): | ||
"""Test if positives are recognized""" | ||
# base64 encoded string: "Hello World" (UTF-8, LF) | ||
self.paste.body = "SGVsbG8gV29ybGQ=" | ||
self.assertTrue(self.analyzer.match(self.paste)) | ||
|
||
# base64 encoded string: "Hello\nWorld" (UTF-8, LF) | ||
self.paste.body = "SGVsbG9cbldvcmxk" | ||
self.assertTrue(self.analyzer.match(self.paste)) | ||
|
||
# base64 encoded string (32 chars): "2fwZ_CTjDKxu48FLCLZcGdB!sEj5XRQh" (UTF-8, LF) | ||
self.paste.body = "MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg=" | ||
self.assertTrue(self.analyzer.match(self.paste)) | ||
|
||
# base64 encoded string (64 chars): "Mv=ZH?NJrrBSdhus*KVg%4dG6*C&ub?sSeq!VrzCb_-QcY^KWfxKy8AJ3=^5?b6N" | ||
# (UTF-8, LF) | ||
self.paste.body = "TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg==" | ||
self.assertTrue(self.analyzer.match(self.paste)) | ||
|
||
# base64 encoded string (256 chars): "etFk!?m@A_vvdMT39Mgcynx_AFz6HY!4R8U3n_7JA?-rF=F3ehWat%4rKfhsuCc98G | ||
# =t8jMY7hgJDZ2c!y!$!XQATbk6fQD2pa+EdQ_rfP^&_DKJ34dFPcuGjDBTqdxZ&=3U%@dm&?JW#+k@mB%a3TFn%GAzukL+-%TUTq?fAbAKr | ||
# @y%LPK+KEmxeh+rg7?s3aR2v5A%tbn&_7zNMckCPRd&s8$wW5Bec@aRMCs@4rn?cRx?a&y-Z%kn&h8aLu*R" (UTF-8, LF) | ||
self.paste.body = "ZXRGayE/bUBBX3Z2ZE1UMzlNZ2N5bnhfQUZ6NkhZITRSOFUzbl83SkE/LXJGPUYzZWhXYXQlNHJLZmhzdUNjO" \ | ||
"ThHPXQ4ak1ZN2hnSkRaMmMheSEkIVhRQVRiazZmUUQycGErRWRRX3JmUF4mX0RLSjM0ZEZQY3VHakRCVHFkeF" \ | ||
"omPTNVJUBkbSY/SlcjK2tAbUIlYTNURm4lR0F6dWtMKy0lVFVUcT9mQWJBS3JAeSVMUEsrS0VteGVoK3JnNz9" \ | ||
"zM2FSMnY1QSV0Ym4mXzd6Tk1ja0NQUmQmczgkd1c1QmVjQGFSTUNzQDRybj9jUng/YSZ5LVola24maDhhTHUqUg==" | ||
self.assertTrue(self.analyzer.match(self.paste)) | ||
|
||
def test_intext(self): | ||
"""Test if matches inside text are recognized""" | ||
self.paste.body = "I wan to tel you tha TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg== is " \ | ||
"very important" | ||
match = self.analyzer.match(self.paste) | ||
self.assertTrue(match) | ||
self.assertEqual("TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg==", match[0]) | ||
|
||
def test_multiple(self): | ||
"""Test if multiple matches are recognized""" | ||
# Needed to keep the words below 3 chars each. Otherwise they would match as well | ||
self.paste.body = "I wan to tel you tha TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg== is " \ | ||
"ver imp.\nBut not onl tha, it's als MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg= and muc mor!" | ||
match = self.analyzer.match(self.paste) | ||
self.assertTrue(match) | ||
self.assertEqual("TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg==", match[0]) | ||
self.assertEqual("MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg=", match[1]) | ||
|
||
def test_multiple_min_len(self): | ||
"""Test if we can match multiple base64 strings in a longer text with min_len""" | ||
analyzer = Base64AsciiAnalyzer(None, min_len=8) | ||
self.paste.body = "I wanted to tell you that TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg== is " \ | ||
"very important.\nBut not only that, it's also MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg= and much more!" | ||
match = analyzer.match(self.paste) | ||
self.assertTrue(match) | ||
self.assertEqual("TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg==", match[0]) | ||
self.assertEqual("MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg=", match[1]) | ||
|
||
def test_min_len(self): | ||
"""Test if the min_len parameter works as expected""" | ||
self.paste.body = "dGVz" | ||
analyzer = Base64AsciiAnalyzer(None, min_len=4) | ||
match = analyzer.match(self.paste) | ||
self.assertTrue(match) | ||
|
||
self.paste.body = "dGVz" | ||
analyzer = Base64AsciiAnalyzer(None, min_len=5) | ||
match = analyzer.match(self.paste) | ||
self.assertFalse(match) | ||
|
||
self.paste.body = "dGVzdFRoaXNTdHJpbmc=" | ||
match = analyzer.match(self.paste) | ||
self.assertTrue(match) | ||
|
||
def test_match_negative(self): | ||
"""Test if negatives are not recognized""" | ||
# test that when nothing, is provided nothing matches | ||
self.paste.body = "" | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
# test that when nothing, is provided nothing matches | ||
self.paste.body = None | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
# invalid base64 string (% symbol inserted which is not valid base64) | ||
self.paste.body = "SGVsbG8gV%29ybGQ=" | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
# not a base64 string | ||
self.paste.body = "=====" | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
# base32 encoded string | ||
self.paste.body = "JBSWY3DPEBLW64TMMQ======" | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
# long string (129) not base64 | ||
self.paste.body = "sFm2XgxTt6fuErnWw9JZkae76sL7XDqyNvf2Wkatt9gkzVDxXTf6dCr3Yh6fT82fFzvNWG49P3KSR7XXngHJ5D9ba" \ | ||
"Dj448rhbNTJrKhRn7TPkYRubZLhmbCrg6bavDa9a" | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
def test_invalid_decodes(self): | ||
"""Test to make sure we don't match base64 strings which don't decode to ASCII""" | ||
# base64 encoded string containing one non-ascii character: "This string contains a non-ascii character: ¤" (UTF-8) | ||
self.paste.body = "VGhpcyBzdHJpbmcgY29udGFpbnMgYSBub24tYXNjaWkgY2hhcmFjdGVyOiDCpA==" | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
# base64 encoded string containing only non-ascii characters: "ΗÈλλθ ωÖΓλÐ" (UTF-8) | ||
self.paste.body = "zpfDiM67zrvOuCDPicOWzpPOu8OQ" | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
# base64 encoded string containing one non-ascii character: "º" (UTF-8) | ||
self.paste.body = "wro=" | ||
self.assertFalse(self.analyzer.match(self.paste)) | ||
|
||
def test_ascii_decode(self): | ||
"""Test if ascii decode flag works""" | ||
analyzer = Base64AsciiAnalyzer(None, decode=True) | ||
|
||
# base64 encoded string: "Hello World" (UTF-8, LF) | ||
self.paste.body = "SGVsbG8gV29ybGQ=" | ||
self.assertEqual("Hello World", analyzer.match(self.paste)[0]) | ||
|
||
# base64 encoded string: "Hello\nWorld" (UTF-8, LF) | ||
self.paste.body = "SGVsbG8KV29ybGQ=" | ||
self.assertEqual("Hello\nWorld", analyzer.match(self.paste)[0]) | ||
|
||
# base64 encoded string (32 chars): "2fwZ_CTjDKxu48FLCLZcGdB!sEj5XRQh" (UTF-8, LF) | ||
self.paste.body = "MmZ3Wl9DVGpES3h1NDhGTENMWmNHZEIhc0VqNVhSUWg=" | ||
self.assertEqual("2fwZ_CTjDKxu48FLCLZcGdB!sEj5XRQh", analyzer.match(self.paste)[0]) | ||
|
||
# base64 encoded string (64 chars): "Mv=ZH?NJrrBSdhus*KVg%4dG6*C&ub?sSeq!VrzCb_-QcY^KWfxKy8AJ3=^5?b6N" | ||
# (UTF-8, LF) | ||
self.paste.body = "TXY9Wkg/TkpyckJTZGh1cypLVmclNGRHNipDJnViP3NTZXEhVnJ6Q2JfLVFjWV5LV2Z4S3k4QUozPV41P2I2Tg==" | ||
self.assertEqual("Mv=ZH?NJrrBSdhus*KVg%4dG6*C&ub?sSeq!VrzCb_-QcY^KWfxKy8AJ3=^5?b6N", analyzer.match(self.paste)[0]) | ||
|
||
# base64 encoded string (256 chars): "etFk!?m@A_vvdMT39Mgcynx_AFz6HY!4R8U3n_7JA?-rF=F3ehWat%4rKfhsuCc98G | ||
# =t8jMY7hgJDZ2c!y!$!XQATbk6fQD2pa+EdQ_rfP^&_DKJ34dFPcuGjDBTqdxZ&=3U%@dm&?JW#+k@mB%a3TFn%GAzukL+-%TUTq?fAbAKr | ||
# @y%LPK+KEmxeh+rg7?s3aR2v5A%tbn&_7zNMckCPRd&s8$wW5Bec@aRMCs@4rn?cRx?a&y-Z%kn&h8aLu*R" (UTF-8, LF) | ||
self.paste.body = "ZXRGayE/bUBBX3Z2ZE1UMzlNZ2N5bnhfQUZ6NkhZITRSOFUzbl83SkE/LXJGPUYzZWhXYXQlNHJLZmhzdUNjO" \ | ||
"ThHPXQ4ak1ZN2hnSkRaMmMheSEkIVhRQVRiazZmUUQycGErRWRRX3JmUF4mX0RLSjM0ZEZQY3VHakRCVHFkeF" \ | ||
"omPTNVJUBkbSY/SlcjK2tAbUIlYTNURm4lR0F6dWtMKy0lVFVUcT9mQWJBS3JAeSVMUEsrS0VteGVoK3JnNz9" \ | ||
"zM2FSMnY1QSV0Ym4mXzd6Tk1ja0NQUmQmczgkd1c1QmVjQGFSTUNzQDRybj9jUng/YSZ5LVola24maDhhTHUqUg==" | ||
self.assertEqual("etFk!?m@A_vvdMT39Mgcynx_AFz6HY!4R8U3n_7JA?-rF=F3ehWat%4rKfhsuCc98G" \ | ||
"=t8jMY7hgJDZ2c!y!$!XQATbk6fQD2pa+EdQ_rfP^&_DKJ34dFPcuGjDBTqdxZ&=3U%" \ | ||
"@dm&?JW#+k@mB%a3TFn%GAzukL+-%TUTq?fAbAKr@y%LPK+KEmxeh+rg7?s3aR2v5A%tbn&" \ | ||
"_7zNMckCPRd&s8$wW5Bec@aRMCs@4rn?cRx?a&y-Z%kn&h8aLu*R", analyzer.match(self.paste)[0]) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |