Skip to content

Commit

Permalink
chg: [chats] add corelation chat->cve, cryptocurrencies, ... + correl…
Browse files Browse the repository at this point in the history
…ation domain->chat, message
  • Loading branch information
Terrtia committed Feb 27, 2025
1 parent eaf56d5 commit a31abb8
Show file tree
Hide file tree
Showing 10 changed files with 134 additions and 43 deletions.
10 changes: 7 additions & 3 deletions bin/crawlers/Crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def compute(self, capture):
# Crawler stats
self.domain.add_history(epoch, root_item=self.root_item)

if self.domain != self.original_domain:
if self.domain != self.original_domain: # TODO ADD RELATIONSHIP REDIRECT
self.original_domain.update_daterange(self.date.replace('/', ''))
if self.root_item:
self.original_domain.set_last_origin(parent_id)
Expand All @@ -364,6 +364,11 @@ def compute(self, capture):
print('capture:', capture.uuid, 'Unsafe Content Filtered')
print('task: ', task.uuid, 'Unsafe Content Filtered')
print()

# onion messages correlation
if crawlers.is_domain_correlation_cache(self.original_domain):
crawlers.save_domain_correlation_cache(self.original_domain.was_up(), domain)

task.remove()
self.root_item = None

Expand All @@ -377,14 +382,13 @@ def save_capture_response(self, parent_id, entries):
print('retrieved content')
# print(entries.get('html'))

if 'last_redirected_url' in entries and entries.get('last_redirected_url'):
if 'last_redirected_url' in entries and entries.get('last_redirected_url'): # TODO ADD RELATIONSHIP REDIRECT
last_url = entries['last_redirected_url']
unpacked_last_url = crawlers.unpack_url(last_url)
current_domain = unpacked_last_url['domain']
# REDIRECTION TODO CHECK IF TYPE CHANGE
if current_domain != self.domain.id and not self.root_item:
self.logger.warning(f'External redirection {self.domain.id} -> {current_domain}')
print(f'External redirection {self.domain.id} -> {current_domain}')
if not self.root_item:
self.domain = Domain(current_domain)
# Filter Domain
Expand Down
24 changes: 24 additions & 0 deletions bin/lib/Tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,16 @@ def delete_object_tags(obj_type, subtype, obj_id):
delete_object_tag(tag, obj_type, obj_id, subtype=subtype)


def get_objs_by_date(obj_type, tags, date):
objs = []
if obj_type == 'item' or obj_type == 'message':
l_set_keys = get_obj_keys_by_tags(tags, obj_type, date=date)
if len(l_set_keys) < 2:
objs = get_obj_by_tag(l_set_keys[0])
else:
objs = r_tags.sinter(l_set_keys[0], *l_set_keys[1:])
return objs

################################################################################################################

# TODO: REWRITE OLD
Expand Down Expand Up @@ -1213,6 +1223,20 @@ def get_tags_min_last_seen(l_tags, r_int=False):
else:
return str(min_last_seen)

def get_tags_min_first_seen(l_tags, r_int=False):
"""
Get min first seen from a list of tags (current: daterange objs only)
"""
min_first_seen = 99999999
for tag in l_tags:
first_seen = get_tag_first_seen(tag, r_int=True)
if first_seen < min_first_seen:
min_first_seen = first_seen
if r_int:
return min_first_seen
else:
return str(min_first_seen)

def get_all_tags():
return list(r_tags.smembers('list_tags'))

Expand Down
76 changes: 47 additions & 29 deletions bin/lib/chats_viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
from lib.objects import UsersAccount
from lib.objects import Usernames
from lib import Language
from lib import Tag
from packages import Date

config_loader = ConfigLoader()
r_db = config_loader.get_db_conn("Kvrocks_DB")
Expand Down Expand Up @@ -373,36 +375,51 @@ def list_messages_to_dict(l_messages_id, translation_target=None):
## Threads IDS
## Daterange
def get_messages_iterator(filters={}):
# Tags
tags = filters.get('tags', [])
if tags:
date_from = filters.get('date_from')
date_to = filters.get('date_to')
if not date_from:
date_from = Tag.get_tags_min_first_seen(tags)
if date_from == '99999999':
return None
if not date_to:
date_to = Date.get_today_date_str()
daterange = Date.get_daterange(date_from, date_to)
for date in daterange:
for message_id in Tag.get_objs_by_date('message', tags, date):
yield Messages.Message(message_id)
else:
for instance_uuid in get_chat_service_instances():

for instance_uuid in get_chat_service_instances():
for chat_id in ChatServiceInstance(instance_uuid).get_chats():
chat = Chats.Chat(chat_id, instance_uuid)

for chat_id in ChatServiceInstance(instance_uuid).get_chats():
chat = Chats.Chat(chat_id, instance_uuid)
# subchannels
for subchannel_gid in chat.get_subchannels():
_, _, subchannel_id = subchannel_gid.split(':', 2)
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
messages, _ = subchannel._get_messages(nb=-1)
for mess in messages:
_, _, message_id = mess[0].split(':', )
yield Messages.Message(message_id)
# threads

# subchannels
for subchannel_gid in chat.get_subchannels():
_, _, subchannel_id = subchannel_gid.split(':', 2)
subchannel = ChatSubChannels.ChatSubChannel(subchannel_id, instance_uuid)
messages, _ = subchannel._get_messages(nb=-1)
for mess in messages:
_, _, message_id = mess[0].split(':', )
yield Messages.Message(message_id)
# threads

# threads
for threads in chat.get_threads():
thread = ChatThreads.ChatThread(threads['id'], instance_uuid)
messages, _ = thread._get_messages(nb=-1)
for threads in chat.get_threads():
thread = ChatThreads.ChatThread(threads['id'], instance_uuid)
messages, _ = thread._get_messages(nb=-1)
for mess in messages:
message_id, _, message_id = mess[0].split(':', )
yield Messages.Message(message_id)

# messages
messages, _ = chat._get_messages(nb=-1)
for mess in messages:
message_id, _, message_id = mess[0].split(':', )
_, _, message_id = mess[0].split(':', )
yield Messages.Message(message_id)

# messages
messages, _ = chat._get_messages(nb=-1)
for mess in messages:
_, _, message_id = mess[0].split(':', )
yield Messages.Message(message_id)
# threads ???
# threads ???

def get_nb_messages_iterator(filters={}):
nb_messages = 0
Expand Down Expand Up @@ -1030,9 +1047,10 @@ def api_thread_messages(subtype, thread_id):


if __name__ == '__main__':
r = get_chat_service_instances()
print(r)
r = ChatServiceInstance(r.pop())
print(r.get_meta({'chats'}))
get_messages_iterator(filters={'tags': ['infoleak:automatic-detection="cve"']})
# r = get_chat_service_instances()
# print(r)
# r = ChatServiceInstance(r.pop())
# print(r.get_meta({'chats'}))
# r = get_chat_protocols()
# print(r)
# print(r)
14 changes: 7 additions & 7 deletions bin/lib/correlations_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,24 +42,24 @@

CORRELATION_TYPES_BY_OBJ = {
"barcode": ["chat", "cve", "cryptocurrency", "decoded", "domain", "image", "message", "screenshot"],
"chat": ["barcode", "chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
"chat": ["barcode", "chat-subchannel", "chat-thread", "cryptocurrency", "cve", "decoded", "domain", "image", "message", "ocr", "pgp", "user-account"],
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"],
"cookie-name": ["domain"],
"cryptocurrency": ["barcode", "domain", "item", "message", "ocr", "qrcode"],
"cve": ["barcode", "domain", "item", "message", "ocr", "qrcode"],
"decoded": ["barcode", "domain", "item", "message", "ocr", "qrcode"],
"domain": ["barcode", "cve", "cookie-name", "cryptocurrency", "dom-hash", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
"cryptocurrency": ["barcode", "chat", "domain", "item", "message", "ocr", "qrcode"],
"cve": ["barcode", "chat", "domain", "item", "message", "ocr", "qrcode"],
"decoded": ["barcode", "chat", "domain", "item", "message", "ocr", "qrcode"],
"domain": ["barcode", "chat", "cve", "cookie-name", "cryptocurrency", "dom-hash", "decoded", "etag", "favicon", "hhhash", "item", "message", "pgp", "title", "screenshot", "username"],
"dom-hash": ["domain", "item"],
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
"file-name": ["chat", "item", "message"],
"hhhash": ["domain"],
"image": ["barcode", "chat", "chat-subchannel", "chat-thread", "message", "ocr", "qrcode", "user-account"], # TODO subchannel + threads ????
"item": ["cve", "cryptocurrency", "decoded", "domain", "dom-hash", "favicon", "file-name", "message", "pgp", "screenshot", "title", "username"], # chat ???
"message": ["barcode", "chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "item", "ocr", "pgp", "user-account"],
"message": ["barcode", "chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "domain", "file-name", "image", "item", "ocr", "pgp", "user-account"],
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
"pgp": ["domain", "item", "message", "ocr"],
"pgp": ["chat", "domain", "item", "message", "ocr"],
"qrcode": ["chat", "cve", "cryptocurrency", "decoded", "domain", "image", "message", "screenshot"], # "chat-subchannel", "chat-thread" ?????
"screenshot": ["barcode", "domain", "item", "qrcode"],
"title": ["domain", "item"],
Expand Down
26 changes: 26 additions & 0 deletions bin/lib/crawlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,32 @@ def api_get_domain_from_url(url):
url_unpack = unpack_url(url)
return url_unpack['domain']

## onion correlation cache ##

def is_domain_correlation_cache(domain):
return r_cache.sismember('cache:domain:correlation', domain)

def add_domain_correlation_cache(domain, obj_gid):
r_cache.sadd('cache:domain:correlation', domain)
r_cache.sadd(f'cache:domain:correlation:objs:{domain}', obj_gid)

def save_domain_correlation_cache(is_domain_up, domain):
if is_domain_up:
dom = Domain(domain)
for obj_gid in r_cache.sadd(f'cache:domain:correlation:objs:{domain}'):
obj_type, obj_subtype, obj_id = obj_gid.split(':', 2)
if not obj_subtype:
obj_subtype = ''
dom.add_correlation(obj_type, obj_subtype, obj_id)
r_cache.srem('cache:domain:correlation', domain)
r_cache.delete(f'cache:domain:correlation:objs:{domain}')

# TODO CHECK ALL TASK IF STORED IN DB
def cleanup_domain_correlation_cache(domain):
pass

# -- #

# # # # # # # #
# #
# COMMON #
Expand Down
3 changes: 0 additions & 3 deletions bin/lib/objects/ChatSubChannels.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,8 @@
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects

from lib.data_retention_engine import update_obj_date
from lib.objects import ail_objects
from lib.timeline_engine import Timeline

from lib.correlations_engine import get_correlation_by_correl_type

config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
Expand Down
4 changes: 4 additions & 0 deletions bin/lib/objects/abstract_daterange_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,10 @@ def _add(self, date, obj): # TODO OBJ=None
if is_crawled(item_id):
domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain)
elif obj.type == 'message':
chat_subtype = obj.get_chat_instance()
chat_id = obj.get_chat_id()
self.add_correlation('chat', chat_subtype, chat_id)

def add(self, date, obj):
self._add(date, obj)
Expand Down
1 change: 0 additions & 1 deletion bin/lib/objects/abstract_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import sys
import uuid
from abc import ABC, abstractmethod
from pymisp import MISPObject

# from flask import url_for

Expand Down
5 changes: 5 additions & 0 deletions bin/lib/objects/abstract_subtype_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,11 @@ def add(self, date, obj=None):
domain = get_item_domain(item_id)
self.add_correlation('domain', '', domain)

elif obj.type == 'message':
chat_subtype = obj.get_chat_instance()
chat_id = obj.get_chat_id()
self.add_correlation('chat', chat_subtype, chat_id)

# TODO:ADD objects + Stats
# def create(self, first_seen, last_seen):
# self.set_first_seen(first_seen)
Expand Down
14 changes: 14 additions & 0 deletions bin/modules/Onion.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib.objects.Domains import Domain
from lib import crawlers

class Onion(AbstractModule):
Expand Down Expand Up @@ -98,6 +99,19 @@ def compute(self, message):
har=self.har, screenshot=self.screenshot)
if task_uuid:
print(f'{domain} added to crawler queue: {task_uuid}')
if self.obj.type == 'message':
dom = Domain(domain)
# check if domain was up
if dom.was_up():
self.obj.add_correlation('domain', '', domain)
chat_subtype = obj.get_chat_instance()
chat_id = obj.get_chat_id()
dom.add_correlation('chat', chat_subtype, chat_id)
elif task_uuid and not dom.exists():
chat_subtype = obj.get_chat_instance()
chat_id = obj.get_chat_id()
crawlers.add_domain_correlation_cache(domain, f'chat:{chat_subtype}:{chat_id}')
crawlers.add_domain_correlation_cache(domain, self.obj.get_global_id())
else:
print(f'Detected {len(domains)} .onion(s);{self.obj.get_global_id()}')

Expand Down

0 comments on commit a31abb8

Please sign in to comment.