From 69c47cf5037b4b8dd18491799624fe363504afc8 Mon Sep 17 00:00:00 2001
From: Nils Kuhnert <Nils.Kuhnert@posteo.de>
Date: Sat, 13 Jan 2018 15:37:23 +0100
Subject: [PATCH 1/4] Quickfix for #169: filter input from artifacts, only
 allow letters for tld part of domains

---
 contrib/cortexutils/analyzer.py         |  3 +--
 contrib/cortexutils/extractor.py        | 23 ++++++++++++++--
 contrib/tests/test_suite_analyzer.py    |  2 +-
 contrib/tests/test_suite_extractor.py   |  7 +++++
 contrib/tests/test_suite_integration.py | 35 +++++++++++++++++++++++++
 5 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 contrib/tests/test_suite_integration.py

diff --git a/contrib/cortexutils/analyzer.py b/contrib/cortexutils/analyzer.py
index 7d9a4afd1..e4ac2355c 100644
--- a/contrib/cortexutils/analyzer.py
+++ b/contrib/cortexutils/analyzer.py
@@ -1,6 +1,5 @@
 #!/usr/bin/env python
 # encoding: utf-8
-
 import os
 import sys
 import codecs
@@ -154,7 +153,7 @@ def summary(self, raw):
     def artifacts(self, raw):
         # Use the regex extractor, if auto_extract setting is not False
         if self.auto_extract:
-            extractor = Extractor()
+            extractor = Extractor(ignore=self.get_data())
             return extractor.check_iterable(raw)
 
         # Return empty list
diff --git a/contrib/cortexutils/extractor.py b/contrib/cortexutils/extractor.py
index f77c1df13..9e7c737ae 100644
--- a/contrib/cortexutils/extractor.py
+++ b/contrib/cortexutils/extractor.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 from builtins import str as unicode
+
+import io
 import re
 
 
@@ -11,9 +13,13 @@ class Extractor:
 
     Currently, this is not a fulltext search, so the the ioc's must be isolated strings, to get found.
     This can be iterated for ioc's.
+
+    :param ignore: List of strings or a single string to ignore when matching artifacts to type
+    :type ignore: list, str
     """
 
-    def __init__(self):
+    def __init__(self, ignore=None):
+        self.ignore = ignore
         self.regex = self.__init_regex()
 
     @staticmethod
@@ -63,9 +69,10 @@ def __init_regex():
         })
 
         # domain
+        tldpattern = '('
         regex.append({
             'type': 'domain',
-            'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.\w+$')
+            'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.[a-zA-Z]+$'.format(tldpattern))
         })
 
         # hash
@@ -108,6 +115,16 @@ def __init_regex():
 
         return regex
 
+    @staticmethod
+    def __get_tlds():
+        """Get a list of tlds from the contributed mozille tld list"""
+        tlds = []
+        with io.open('contrib/tlds.txt') as tldfile:
+            for line in tldfile:
+                if line != '' and not line.beginswith('//'):
+                    tlds.append(line)
+        return tlds
+
     def __checktype(self, value):
         """Checks if the given value is a known datatype
 
@@ -116,6 +133,8 @@ def __checktype(self, value):
         :return: Data type of value, if known, else empty string
         :rtype: str
         """
+        if self.ignore and value in self.ignore:
+            return ''
 
         if isinstance(value, (str, unicode)):
             for r in self.regex:
diff --git a/contrib/tests/test_suite_analyzer.py b/contrib/tests/test_suite_analyzer.py
index b171afb36..9192a2d8c 100644
--- a/contrib/tests/test_suite_analyzer.py
+++ b/contrib/tests/test_suite_analyzer.py
@@ -132,7 +132,7 @@ def setUp(self):
         load_test_fixture('fixtures/test-report-response.json')
         self.analyzer = Analyzer()
 
-    def test_error_response(self):
+    def test_report_response(self):
         # Run the analyzer report method
         self.analyzer.report({'report_id':'12345'})
 
diff --git a/contrib/tests/test_suite_extractor.py b/contrib/tests/test_suite_extractor.py
index 2b764b9b4..3533b32d6 100644
--- a/contrib/tests/test_suite_extractor.py
+++ b/contrib/tests/test_suite_extractor.py
@@ -147,3 +147,10 @@ def test_iterable(self):
             l_expected,
             'Check_iterable: wrong list returned.'
         )
+    
+    def test_float(self):
+        self.assertEqual(
+            self.extractor.check_string(value='0.001234'),
+            '',
+            'Check_float: float was recognized, but should not.'
+        )
diff --git a/contrib/tests/test_suite_integration.py b/contrib/tests/test_suite_integration.py
new file mode 100644
index 000000000..04bec6821
--- /dev/null
+++ b/contrib/tests/test_suite_integration.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+# coding: utf-8
+import json
+import unittest
+import sys
+
+from cortexutils.analyzer import Analyzer
+
+# Different lib when using python3 or 2
+if sys.version_info >= (3, 0):
+    from io import StringIO
+else:
+    from StringIO import StringIO
+
+class AnalyzerExtractorOutputTest(unittest.TestCase):
+    def setUp(self):
+        sys.stdin = StringIO(json.dumps({
+            "data": "8.8.8.8",
+            "dataType": "ip"
+        }))
+        sys.stdout = StringIO()
+        self.analyzer = Analyzer()
+
+    def test_output(self):
+        # Run the report method
+        self.analyzer.report({'result': '1.2.3.4'})
+
+        # Grab the output
+        output = self.analyzer.fpoutput.getvalue().strip()
+        json_output = json.loads(output)
+
+        # Checks
+        self.assertNotIn(self.analyzer.get_data(), output)
+        self.assertEqual(json_output['artifacts'][0]['value'], '1.2.3.4')
+        self.assertEqual(json_output['artifacts'][0]['type'], 'ip')

From 9a7613ae2bfa0200db555da74a0bf9be11ed3227 Mon Sep 17 00:00:00 2001
From: Nils Kuhnert <Nils.Kuhnert@posteo.de>
Date: Sat, 13 Jan 2018 15:44:38 +0100
Subject: [PATCH 2/4] Forgot to clean-up. :)

---
 contrib/cortexutils/extractor.py | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/contrib/cortexutils/extractor.py b/contrib/cortexutils/extractor.py
index 9e7c737ae..e89a457fd 100644
--- a/contrib/cortexutils/extractor.py
+++ b/contrib/cortexutils/extractor.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 from builtins import str as unicode
 
-import io
 import re
 
 
@@ -69,10 +68,9 @@ def __init_regex():
         })
 
         # domain
-        tldpattern = '('
         regex.append({
             'type': 'domain',
-            'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.[a-zA-Z]+$'.format(tldpattern))
+            'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-]+\.[a-zA-Z]+$')
         })
 
         # hash
@@ -115,16 +113,6 @@ def __init_regex():
 
         return regex
 
-    @staticmethod
-    def __get_tlds():
-        """Get a list of tlds from the contributed mozille tld list"""
-        tlds = []
-        with io.open('contrib/tlds.txt') as tldfile:
-            for line in tldfile:
-                if line != '' and not line.beginswith('//'):
-                    tlds.append(line)
-        return tlds
-
     def __checktype(self, value):
         """Checks if the given value is a known datatype
 

From 666dd4ffca152d93ddd7756cc410c96c482658ea Mon Sep 17 00:00:00 2001
From: Nils Kuhnert <Nils.Kuhnert@posteo.de>
Date: Sun, 14 Jan 2018 16:47:26 +0100
Subject: [PATCH 3/4] Quickfix for #169: added same regex change for fqdn
 detection

---
 contrib/cortexutils/extractor.py      |  2 +-
 contrib/tests/test_suite_extractor.py | 11 +++++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/contrib/cortexutils/extractor.py b/contrib/cortexutils/extractor.py
index e89a457fd..808a4e79e 100644
--- a/contrib/cortexutils/extractor.py
+++ b/contrib/cortexutils/extractor.py
@@ -108,7 +108,7 @@ def __init_regex():
         # fqdn
         regex.append({
             'type': 'fqdn',
-            'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-\.]+\.[\w\-]+\.\w+$')
+            'regex': re.compile(r'^(?!http\:\/\/|https\:\/\/)^[\w\-\.]+\.[\w\-]+\.[a-zA-Z]+$')
         })
 
         return regex
diff --git a/contrib/tests/test_suite_extractor.py b/contrib/tests/test_suite_extractor.py
index 3533b32d6..782e38e85 100644
--- a/contrib/tests/test_suite_extractor.py
+++ b/contrib/tests/test_suite_extractor.py
@@ -148,9 +148,16 @@ def test_iterable(self):
             'Check_iterable: wrong list returned.'
         )
     
-    def test_float(self):
+    def test_float_domain(self):
         self.assertEqual(
             self.extractor.check_string(value='0.001234'),
             '',
-            'Check_float: float was recognized, but should not.'
+            'Check_float: float was recognized as domain, but should not.'
+        )
+
+    def test_float_fqdn(self):
+        self.assertEqual(
+            self.extractor.check_string(value='0.1234.5678'),
+            '',
+            'Check_float_fqdn: float was recognized as fqdn but should not.'
         )

From ef61f776861c0344a3f431e1b44a108a57ce2bae Mon Sep 17 00:00:00 2001
From: Nils Kuhnert <Nils.Kuhnert@posteo.de>
Date: Wed, 28 Feb 2018 11:12:03 +0100
Subject: [PATCH 4/4] Bump version

---
 contrib/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/setup.py b/contrib/setup.py
index 157d4f4f4..cea67c1a3 100644
--- a/contrib/setup.py
+++ b/contrib/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='cortexutils',
-    version='1.2.0',
+    version='1.2.1',
     description='A Python library for including utility classes for Cortex analyzers',
     long_description=open('README').read(),
     author='TheHive-Project',