From 69c47cf5037b4b8dd18491799624fe363504afc8 Mon Sep 17 00:00:00 2001 From: Nils Kuhnert Date: Sat, 13 Jan 2018 15:37:23 +0100 Subject: [PATCH 1/4] Quickfix for #169: filter input from artifacts, only allow letters for tld part of domains --- contrib/cortexutils/analyzer.py | 3 +-- contrib/cortexutils/extractor.py | 23 ++++++++++++++-- contrib/tests/test_suite_analyzer.py | 2 +- contrib/tests/test_suite_extractor.py | 7 +++++ contrib/tests/test_suite_integration.py | 35 +++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 contrib/tests/test_suite_integration.py diff --git a/contrib/cortexutils/analyzer.py b/contrib/cortexutils/analyzer.py index 7d9a4afd1..e4ac2355c 100644 --- a/contrib/cortexutils/analyzer.py +++ b/contrib/cortexutils/analyzer.py @@ -1,6 +1,5 @@ #!/usr/bin/env python # encoding: utf-8 - import os import sys import codecs @@ -154,7 +153,7 @@ def summary(self, raw): def artifacts(self, raw): # Use the regex extractor, if auto_extract setting is not False if self.auto_extract: - extractor = Extractor() + extractor = Extractor(ignore=self.get_data()) return extractor.check_iterable(raw) # Return empty list diff --git a/contrib/cortexutils/extractor.py b/contrib/cortexutils/extractor.py index f77c1df13..9e7c737ae 100644 --- a/contrib/cortexutils/extractor.py +++ b/contrib/cortexutils/extractor.py @@ -1,5 +1,7 @@ #!/usr/bin/env python from builtins import str as unicode + +import io import re @@ -11,9 +13,13 @@ class Extractor: Currently, this is not a fulltext search, so the the ioc's must be isolated strings, to get found. This can be iterated for ioc's. + + :param ignore: List of strings or a single string to ignore when matching artifacts to type + :type ignore: list, str """ - def __init__(self): + def __init__(self, ignore=None): + self.ignore = ignore self.regex = self.__init_regex() @staticmethod @@ -63,9 +69,10 @@ def __init_regex(): }) # domain + tldpattern = '(' regex.append({ 'type': 'domain', - 'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.\w+$') + 'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.[a-zA-Z]+$'.format(tldpattern)) }) # hash @@ -108,6 +115,16 @@ def __init_regex(): return regex + @staticmethod + def __get_tlds(): + """Get a list of tlds from the contributed mozille tld list""" + tlds = [] + with io.open('contrib/tlds.txt') as tldfile: + for line in tldfile: + if line != '' and not line.beginswith('//'): + tlds.append(line) + return tlds + def __checktype(self, value): """Checks if the given value is a known datatype @@ -116,6 +133,8 @@ def __checktype(self, value): :return: Data type of value, if known, else empty string :rtype: str """ + if self.ignore and value in self.ignore: + return '' if isinstance(value, (str, unicode)): for r in self.regex: diff --git a/contrib/tests/test_suite_analyzer.py b/contrib/tests/test_suite_analyzer.py index b171afb36..9192a2d8c 100644 --- a/contrib/tests/test_suite_analyzer.py +++ b/contrib/tests/test_suite_analyzer.py @@ -132,7 +132,7 @@ def setUp(self): load_test_fixture('fixtures/test-report-response.json') self.analyzer = Analyzer() - def test_error_response(self): + def test_report_response(self): # Run the analyzer report method self.analyzer.report({'report_id':'12345'}) diff --git a/contrib/tests/test_suite_extractor.py b/contrib/tests/test_suite_extractor.py index 2b764b9b4..3533b32d6 100644 --- a/contrib/tests/test_suite_extractor.py +++ b/contrib/tests/test_suite_extractor.py @@ -147,3 +147,10 @@ def test_iterable(self): l_expected, 'Check_iterable: wrong list returned.' ) + + def test_float(self): + self.assertEqual( + self.extractor.check_string(value='0.001234'), + '', + 'Check_float: float was recognized, but should not.' + ) diff --git a/contrib/tests/test_suite_integration.py b/contrib/tests/test_suite_integration.py new file mode 100644 index 000000000..04bec6821 --- /dev/null +++ b/contrib/tests/test_suite_integration.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# coding: utf-8 +import json +import unittest +import sys + +from cortexutils.analyzer import Analyzer + +# Different lib when using python3 or 2 +if sys.version_info >= (3, 0): + from io import StringIO +else: + from StringIO import StringIO + +class AnalyzerExtractorOutputTest(unittest.TestCase): + def setUp(self): + sys.stdin = StringIO(json.dumps({ + "data": "8.8.8.8", + "dataType": "ip" + })) + sys.stdout = StringIO() + self.analyzer = Analyzer() + + def test_output(self): + # Run the report method + self.analyzer.report({'result': '1.2.3.4'}) + + # Grab the output + output = self.analyzer.fpoutput.getvalue().strip() + json_output = json.loads(output) + + # Checks + self.assertNotIn(self.analyzer.get_data(), output) + self.assertEqual(json_output['artifacts'][0]['value'], '1.2.3.4') + self.assertEqual(json_output['artifacts'][0]['type'], 'ip') From 9a7613ae2bfa0200db555da74a0bf9be11ed3227 Mon Sep 17 00:00:00 2001 From: Nils Kuhnert Date: Sat, 13 Jan 2018 15:44:38 +0100 Subject: [PATCH 2/4] Forgot to clean-up. :) --- contrib/cortexutils/extractor.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/contrib/cortexutils/extractor.py b/contrib/cortexutils/extractor.py index 9e7c737ae..e89a457fd 100644 --- a/contrib/cortexutils/extractor.py +++ b/contrib/cortexutils/extractor.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from builtins import str as unicode -import io import re @@ -69,10 +68,9 @@ def __init_regex(): }) # domain - tldpattern = '(' regex.append({ 'type': 'domain', - 'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.[a-zA-Z]+$'.format(tldpattern)) + 'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.[a-zA-Z]+$') }) # hash @@ -115,16 +113,6 @@ def __init_regex(): return regex - @staticmethod - def __get_tlds(): - """Get a list of tlds from the contributed mozille tld list""" - tlds = [] - with io.open('contrib/tlds.txt') as tldfile: - for line in tldfile: - if line != '' and not line.beginswith('//'): - tlds.append(line) - return tlds - def __checktype(self, value): """Checks if the given value is a known datatype From 666dd4ffca152d93ddd7756cc410c96c482658ea Mon Sep 17 00:00:00 2001 From: Nils Kuhnert Date: Sun, 14 Jan 2018 16:47:26 +0100 Subject: [PATCH 3/4] Quickfix for #169: added same regex change for fqdn detection --- contrib/cortexutils/extractor.py | 2 +- contrib/tests/test_suite_extractor.py | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/contrib/cortexutils/extractor.py b/contrib/cortexutils/extractor.py index e89a457fd..808a4e79e 100644 --- a/contrib/cortexutils/extractor.py +++ b/contrib/cortexutils/extractor.py @@ -108,7 +108,7 @@ def __init_regex(): # fqdn regex.append({ 'type': 'fqdn', - 'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-\.]+\.[\w\-]+\.\w+$') + 'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-\.]+\.[\w\-]+\.[a-zA-Z]+$') }) return regex diff --git a/contrib/tests/test_suite_extractor.py b/contrib/tests/test_suite_extractor.py index 3533b32d6..782e38e85 100644 --- a/contrib/tests/test_suite_extractor.py +++ b/contrib/tests/test_suite_extractor.py @@ -148,9 +148,16 @@ def test_iterable(self): 'Check_iterable: wrong list returned.' ) - def test_float(self): + def test_float_domain(self): self.assertEqual( self.extractor.check_string(value='0.001234'), '', - 'Check_float: float was recognized, but should not.' + 'Check_float: float was recognized as domain, but should not.' + ) + + def test_float_fqdn(self): + self.assertEqual( + self.extractor.check_string(value='0.1234.5678'), + '', + 'Check_float_fqdn: float was recognized as fqdn but should not.' ) From ef61f776861c0344a3f431e1b44a108a57ce2bae Mon Sep 17 00:00:00 2001 From: Nils Kuhnert Date: Wed, 28 Feb 2018 11:12:03 +0100 Subject: [PATCH 4/4] Bump version --- contrib/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/setup.py b/contrib/setup.py index 157d4f4f4..cea67c1a3 100644 --- a/contrib/setup.py +++ b/contrib/setup.py @@ -2,7 +2,7 @@ setup( name='cortexutils', - version='1.2.0', + version='1.2.1', description='A Python library for including utility classes for Cortex analyzers', long_description=open('README').read(), author='TheHive-Project',