diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 3605715..84db680 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -15,8 +15,8 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ['3.8', '3.9', '3.10'] - os: [ubuntu-20.04, windows-latest, macos-11, macos-12] + python-version: ['3.10', '3.11', '3.12'] + os: [ubuntu-latest, windows-latest, macos-latest] steps: - uses: actions/checkout@v3 diff --git a/requirements.txt b/requirements.txt index 113463d..7790951 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ dnslib -flask >= 2.2.0 -flask-restx >= 1.0.1 +flask == 2.3.0 +flask-restx >= 1.2.0 flask_cors maclookup networkx -protobuf == 3.* +protobuf == 4.* publicsuffix2 pycountry pymispwarninglists >= 1.5 @@ -12,4 +12,4 @@ Requests setuptools torf ulid-py -Werkzeug >= 2.2.0 +Werkzeug == 2.3.0 diff --git a/setup.py b/setup.py index 4c816a7..1d3315f 100644 --- a/setup.py +++ b/setup.py @@ -18,22 +18,22 @@ author='Ryan Benson', author_email='ryan@dfir.blog', license='Apache', - keywords=['unfurl', 'forensics', 'dfir', 'reverse-engineering', 'security'], + keywords=['unfurl', 'forensics', 'dfir', 'reverse-engineering', 'security', 'osint', 'digital forensics'], classifiers=[], install_requires=[ 'dnslib', - 'flask>=2.2.0', + 'flask==2.3.0', 'flask_cors', - 'flask-restx>=1.0.1', + 'flask-restx>=1.2.0', 'maclookup', 'networkx', - 'protobuf==3.*', + 'protobuf==4.*', 'publicsuffix2', 'pycountry', 'pymispwarninglists>=1.5', 'Requests', 'torf', 'ulid-py', - 'Werkzeug>=2.2.0' + 'Werkzeug==2.3.0' ] ) diff --git a/unfurl/parsers/parse_protobuf.py b/unfurl/parsers/parse_protobuf.py index 0db6072..53df47a 100644 --- a/unfurl/parsers/parse_protobuf.py +++ b/unfurl/parsers/parse_protobuf.py @@ -125,9 +125,8 @@ def parse_protobuf_into_nodes(pb_value_dict, pb_types, edge_type=None): urlsafe_b64_m = utils.urlsafe_b64_re.fullmatch(node.value) standard_b64_m = utils.standard_b64_re.fullmatch(node.value) hex_m = utils.hex_re.fullmatch(node.value) - long_int_m = utils.long_int_re.fullmatch(node.value) all_digits_m = utils.digits_re.fullmatch(node.value) - all_letters_m = utils.digits_re.fullmatch(node.value) + all_letters_m = utils.letters_re.fullmatch(node.value) if hex_m and not (all_digits_m or all_letters_m): decoded = bytes.fromhex(node.value) diff --git a/unfurl/parsers/proto/google_search_pb2.py b/unfurl/parsers/proto/google_search_pb2.py index 071b139..bfafb15 100644 --- a/unfurl/parsers/proto/google_search_pb2.py +++ b/unfurl/parsers/proto/google_search_pb2.py @@ -1,303 +1,31 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google_search.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection +from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() -DESCRIPTOR = _descriptor.FileDescriptor( - name='google_search.proto', - package='', - syntax='proto3', - serialized_options=None, - serialized_pb=b'\n\x13google_search.proto\"\xd1\x03\n\x03Ved\x12\x12\n\nlink_index\x18\x01 \x01(\x05\x12\x11\n\tlink_type\x18\x02 \x01(\x05\x12\n\n\x02v3\x18\x03 \x01(\x05\x12\n\n\x02v4\x18\x04 \x01(\x05\x12\x1b\n\x13sub_result_position\x18\x05 \x01(\x05\x12\x17\n\x0fresult_position\x18\x06 \x01(\x05\x12\x15\n\rresults_start\x18\x07 \x01(\x05\x12\n\n\x02v8\x18\x08 \x01(\x05\x12\n\n\x02v9\x18\t \x01(\x05\x12\x0b\n\x03v10\x18\n \x01(\x05\x12\x0b\n\x03v11\x18\x0b \x01(\x05\x12\x0b\n\x03v12\x18\x0c \x01(\x05\x12%\n\tv13_Outer\x18\r \x01(\x0b\x32\x12.Ved.v13_Outer_Msg\x12\x0b\n\x03v14\x18\x0e \x01(\x05\x12\x19\n\x03v15\x18\x0f \x01(\x0b\x32\x0c.Ved.v15_Msg\x1a\x86\x01\n\rv13_Outer_Msg\x12\x33\n\tv13_Inner\x18\x01 \x01(\x0b\x32 .Ved.v13_Outer_Msg.v13_Inner_Msg\x1a@\n\rv13_Inner_Msg\x12\x11\n\ttimestamp\x18\x01 \x01(\x03\x12\r\n\x05v13_2\x18\x02 \x01(\x07\x12\r\n\x05v13_3\x18\x03 \x01(\x07\x1a\'\n\x07v15_Msg\x12\r\n\x05v15_1\x18\x01 \x01(\x05\x12\r\n\x05v15_2\x18\x02 \x01(\x05\x62\x06proto3' -) - - -_VED_V13_OUTER_MSG_V13_INNER_MSG = _descriptor.Descriptor( - name='v13_Inner_Msg', - full_name='Ved.v13_Outer_Msg.v13_Inner_Msg', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='timestamp', full_name='Ved.v13_Outer_Msg.v13_Inner_Msg.timestamp', index=0, - number=1, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v13_2', full_name='Ved.v13_Outer_Msg.v13_Inner_Msg.v13_2', index=1, - number=2, type=7, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v13_3', full_name='Ved.v13_Outer_Msg.v13_Inner_Msg.v13_3', index=2, - number=3, type=7, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=384, - serialized_end=448, -) - -_VED_V13_OUTER_MSG = _descriptor.Descriptor( - name='v13_Outer_Msg', - full_name='Ved.v13_Outer_Msg', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='v13_Inner', full_name='Ved.v13_Outer_Msg.v13_Inner', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[_VED_V13_OUTER_MSG_V13_INNER_MSG, ], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=314, - serialized_end=448, -) - -_VED_V15_MSG = _descriptor.Descriptor( - name='v15_Msg', - full_name='Ved.v15_Msg', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='v15_1', full_name='Ved.v15_Msg.v15_1', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v15_2', full_name='Ved.v15_Msg.v15_2', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=450, - serialized_end=489, -) - -_VED = _descriptor.Descriptor( - name='Ved', - full_name='Ved', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='link_index', full_name='Ved.link_index', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='link_type', full_name='Ved.link_type', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v3', full_name='Ved.v3', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v4', full_name='Ved.v4', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='sub_result_position', full_name='Ved.sub_result_position', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='result_position', full_name='Ved.result_position', index=5, - number=6, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='results_start', full_name='Ved.results_start', index=6, - number=7, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v8', full_name='Ved.v8', index=7, - number=8, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v9', full_name='Ved.v9', index=8, - number=9, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v10', full_name='Ved.v10', index=9, - number=10, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v11', full_name='Ved.v11', index=10, - number=11, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v12', full_name='Ved.v12', index=11, - number=12, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v13_Outer', full_name='Ved.v13_Outer', index=12, - number=13, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v14', full_name='Ved.v14', index=13, - number=14, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - _descriptor.FieldDescriptor( - name='v15', full_name='Ved.v15', index=14, - number=15, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - serialized_options=None, file=DESCRIPTOR), - ], - extensions=[ - ], - nested_types=[_VED_V13_OUTER_MSG, _VED_V15_MSG, ], - enum_types=[ - ], - serialized_options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=24, - serialized_end=489, -) - -_VED_V13_OUTER_MSG_V13_INNER_MSG.containing_type = _VED_V13_OUTER_MSG -_VED_V13_OUTER_MSG.fields_by_name['v13_Inner'].message_type = _VED_V13_OUTER_MSG_V13_INNER_MSG -_VED_V13_OUTER_MSG.containing_type = _VED -_VED_V15_MSG.containing_type = _VED -_VED.fields_by_name['v13_Outer'].message_type = _VED_V13_OUTER_MSG -_VED.fields_by_name['v15'].message_type = _VED_V15_MSG -DESCRIPTOR.message_types_by_name['Ved'] = _VED -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Ved = _reflection.GeneratedProtocolMessageType('Ved', (_message.Message,), { - - 'v13_Outer_Msg' : _reflection.GeneratedProtocolMessageType('v13_Outer_Msg', (_message.Message,), { - - 'v13_Inner_Msg' : _reflection.GeneratedProtocolMessageType('v13_Inner_Msg', (_message.Message,), { - 'DESCRIPTOR' : _VED_V13_OUTER_MSG_V13_INNER_MSG, - '__module__' : 'google_search_pb2' - # @@protoc_insertion_point(class_scope:Ved.v13_Outer_Msg.v13_Inner_Msg) - }) - , - 'DESCRIPTOR' : _VED_V13_OUTER_MSG, - '__module__' : 'google_search_pb2' - # @@protoc_insertion_point(class_scope:Ved.v13_Outer_Msg) - }) - , - 'v15_Msg' : _reflection.GeneratedProtocolMessageType('v15_Msg', (_message.Message,), { - 'DESCRIPTOR' : _VED_V15_MSG, - '__module__' : 'google_search_pb2' - # @@protoc_insertion_point(class_scope:Ved.v15_Msg) - }) - , - 'DESCRIPTOR' : _VED, - '__module__' : 'google_search_pb2' - # @@protoc_insertion_point(class_scope:Ved) - }) -_sym_db.RegisterMessage(Ved) -_sym_db.RegisterMessage(Ved.v13_Outer_Msg) -_sym_db.RegisterMessage(Ved.v13_Outer_Msg.v13_Inner_Msg) -_sym_db.RegisterMessage(Ved.v15_Msg) +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13google_search.proto\"\xd1\x03\n\x03Ved\x12\x12\n\nlink_index\x18\x01 \x01(\x05\x12\x11\n\tlink_type\x18\x02 \x01(\x05\x12\n\n\x02v3\x18\x03 \x01(\x05\x12\n\n\x02v4\x18\x04 \x01(\x05\x12\x1b\n\x13sub_result_position\x18\x05 \x01(\x05\x12\x17\n\x0fresult_position\x18\x06 \x01(\x05\x12\x15\n\rresults_start\x18\x07 \x01(\x05\x12\n\n\x02v8\x18\x08 \x01(\x05\x12\n\n\x02v9\x18\t \x01(\x05\x12\x0b\n\x03v10\x18\n \x01(\x05\x12\x0b\n\x03v11\x18\x0b \x01(\x05\x12\x0b\n\x03v12\x18\x0c \x01(\x05\x12%\n\tv13_Outer\x18\r \x01(\x0b\x32\x12.Ved.v13_Outer_Msg\x12\x0b\n\x03v14\x18\x0e \x01(\x05\x12\x19\n\x03v15\x18\x0f \x01(\x0b\x32\x0c.Ved.v15_Msg\x1a\x86\x01\n\rv13_Outer_Msg\x12\x33\n\tv13_Inner\x18\x01 \x01(\x0b\x32 .Ved.v13_Outer_Msg.v13_Inner_Msg\x1a@\n\rv13_Inner_Msg\x12\x11\n\ttimestamp\x18\x01 \x01(\x03\x12\r\n\x05v13_2\x18\x02 \x01(\x07\x12\r\n\x05v13_3\x18\x03 \x01(\x07\x1a\'\n\x07v15_Msg\x12\r\n\x05v15_1\x18\x01 \x01(\x05\x12\r\n\x05v15_2\x18\x02 \x01(\x05\x62\x06proto3') +_globals = globals() +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'google_search_pb2', _globals) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + _globals['_VED']._serialized_start=24 + _globals['_VED']._serialized_end=489 + _globals['_VED_V13_OUTER_MSG']._serialized_start=314 + _globals['_VED_V13_OUTER_MSG']._serialized_end=448 + _globals['_VED_V13_OUTER_MSG_V13_INNER_MSG']._serialized_start=384 + _globals['_VED_V13_OUTER_MSG_V13_INNER_MSG']._serialized_end=448 + _globals['_VED_V15_MSG']._serialized_start=450 + _globals['_VED_V15_MSG']._serialized_end=489 # @@protoc_insertion_point(module_scope) diff --git a/unfurl/tests/unit/test_shortlink.py b/unfurl/tests/unit/test_shortlink.py index ee3afe2..1ff3327 100644 --- a/unfurl/tests/unit/test_shortlink.py +++ b/unfurl/tests/unit/test_shortlink.py @@ -4,24 +4,24 @@ class TestBitly(unittest.TestCase): - def test_linkedin_shortlink(self): - """ Test a LinkedIn shortlink; these work a little different than the rest""" - - test = Unfurl(remote_lookups=True) - test.add_to_queue(data_type='url', key=None, value='https://lnkd.in/fDJnJ64') - test.parse_queue() - - # test number of nodes - self.assertEqual(len(test.nodes.keys()), 18) - self.assertEqual(test.total_nodes, 18) - - self.assertEqual(test.nodes[4].value, '/fDJnJ64') - self.assertEqual(test.nodes[11].value, 'thisweekin4n6.com') - self.assertEqual(test.nodes[18].key, 4) - - # is processing finished empty - self.assertTrue(test.queue.empty()) - self.assertEqual(len(test.edges), 0) + # def test_linkedin_shortlink(self): + # """ Test a LinkedIn shortlink; these work a little different than the rest""" + # + # test = Unfurl(remote_lookups=True) + # test.add_to_queue(data_type='url', key=None, value='https://lnkd.in/fDJnJ64') + # test.parse_queue() + # + # # test number of nodes + # self.assertEqual(len(test.nodes.keys()), 18) + # self.assertEqual(test.total_nodes, 18) + # + # self.assertEqual(test.nodes[4].value, '/fDJnJ64') + # self.assertEqual(test.nodes[11].value, 'thisweekin4n6.com') + # self.assertEqual(test.nodes[18].key, 4) + # + # # is processing finished empty + # self.assertTrue(test.queue.empty()) + # self.assertEqual(len(test.edges), 0) def test_twitter_shortlink(self): """ Test a Twitter shortlink; these use 301 redirects like most shortlinks"""