+
-
+
${h5}
${task.description}
- ${pb}
- ${b}
+
+
+
+ ${pb}
+
`;
if (task.type == "Retrieve") {
taskRetrievalContainer.appendChild(card);
diff --git a/infohound/tasks.py b/infohound/tasks.py
index dede042..ab0773a 100644
--- a/infohound/tasks.py
+++ b/infohound/tasks.py
@@ -1,9 +1,12 @@
from infohound.tool.retriever_modules import domains,subdomains,urls,files,emails,people,dorks
-from infohound.tool.analysis_modules import domain_analysis,email_analysis,files_analysis,usernames_analysis
+from infohound.tool.analysis_modules import domain_analysis,email_analysis,files_analysis,usernames_analysis,people_analisys
from celery import shared_task
import trio
import importlib
+# ------------------------------------- #
+# ------------- RETRIEVAL ------------- #
+# ------------------------------------- #
@shared_task(bind=True, name="get_whois_info")
def getWhoisInfoTask(self, domain):
@@ -50,9 +53,14 @@ def executeDorksTask(self, domain):
def findEmailsFromDorksTask(self, domain):
emails.findEmailsFromDorks(domain)
+@shared_task(bind=True, name="find_people_from_google")
+def findPeopleFromGoogleTask(self, domain):
+ people.findPeopleFromGoogle(domain)
-
-# -------------ANALYSIS-------------
+# ------------------------------------- #
+# ------------- ANALYSIS -------------- #
+# ------------------------------------- #
+
@shared_task(bind=True, name="subdomain_take_over_analysis")
def subdomainTakeOverAnalysisTask(self, domain):
domain_analysis.subdomainTakeOverAnalysis(domain)
@@ -89,6 +97,10 @@ def findRegisteredSitesTask(self, domain):
def checkBreachTask(self, domain):
email_analysis.checkBreach(domain)
+@shared_task(bind=True, name="summarize_profile")
+def summarize_profile(self, domain):
+ people_analisys.summarize_profile(domain)
+
# --------------CUSTOM--------------
@shared_task(bind=True, name="custom_task")
diff --git a/infohound/tool/ai_assistant/ollama.py b/infohound/tool/ai_assistant/ollama.py
new file mode 100644
index 0000000..5994b01
--- /dev/null
+++ b/infohound/tool/ai_assistant/ollama.py
@@ -0,0 +1,24 @@
+from ollama import Client
+from infohound_project.settings import OLLAMA_URL,OLLAMA_MODEL
+
+def check_or_pull_model(client):
+ models = client.list()
+ present = False
+ for model in models["models"]:
+ if OLLAMA_MODEL == model["name"].split(":")[0]:
+ present = True
+ if not present:
+ client.pull(OLLAMA_MODEL)
+
+def ollama_flexible_prompt(in_prompt):
+ client = Client(host=OLLAMA_URL)
+ check_or_pull_model(client)
+ desc = None
+ try:
+ res = client.generate(model=OLLAMA_MODEL,prompt=in_prompt)
+ except Exception as e:
+ print(f"Could not call Ollama instance: {e}")
+
+ if "response" in res:
+ desc = res["response"].strip()
+ return desc
diff --git a/infohound/tool/analysis_modules/people_analisys.py b/infohound/tool/analysis_modules/people_analisys.py
new file mode 100644
index 0000000..6dc5789
--- /dev/null
+++ b/infohound/tool/analysis_modules/people_analisys.py
@@ -0,0 +1,17 @@
+import time
+from infohound.models import People
+from infohound.tool.ai_assistant import ollama
+
+def summarize_profile(domain_id):
+ queryset = People.objects.filter(domain_id=domain_id, ocupation_summary__contains="This profile doesn't have a description yet")
+
+ for entry in queryset.iterator():
+ try:
+ summarize_prompt = "Summarize the ocupation of the person in just 150 words given the following data: "
+ raw_data = entry.raw_metadata
+ print ("Executing AI-Powered Profile Analisis of: " + entry.name)
+ entry.ocupation_summary = ollama.ollama_flexible_prompt(summarize_prompt + raw_data)
+ print ("Summary: " +entry.ocupation_summary)
+ entry.save()
+ except Exception as e:
+ print(f"Error inesperado: {str(e)}")
diff --git a/infohound/tool/data_sources/google_data.py b/infohound/tool/data_sources/google_data.py
index 92f68f4..54981f7 100644
--- a/infohound/tool/data_sources/google_data.py
+++ b/infohound/tool/data_sources/google_data.py
@@ -1,6 +1,7 @@
import requests
import json
import html
+import time
import urllib.parse
import infohound.tool.infohound_utils as infohound_utils
from bs4 import BeautifulSoup
@@ -50,6 +51,48 @@ def getUrls(query):
#- files
#- url
+def discoverPeople (query):
+ start = 1
+ total_results = 0
+ total_gathered = 0
+ limit = False
+ results = True
+ people = []
+
+ print("Testing query: " + query)
+
+ while results and start < 100 and not limit:
+ payload = {"key":API_KEY,"cx":ID,"start":start,"q":query}
+ res = requests.get("https://www.googleapis.com/customsearch/v1",params=payload)
+ data = json.loads(res.text)
+ if "error" in data:
+ print(data["error"]["status"])
+ limit = True
+ else:
+ if start == 1:
+ total_results = data["searchInformation"]["totalResults"]
+ if "items" in data:
+ for item in data["items"]:
+ try:
+ url = item["link"]
+ first_name = item["pagemap"]["metatags"][0]["profile:first_name"]
+ last_name = item["pagemap"]["metatags"][0]["profile:last_name"]
+ url_img = item["pagemap"]["cse_image"][0]["src"]
+ name = f"{first_name} {last_name}"
+ people.append((name,url,json.dumps(item),url_img))
+ print("Added: " + name)
+ total_gathered = total_gathered + 1
+ except KeyError as e:
+ print(f"Error: The key '{e.args[0]}' is not present in the results.")
+ except Exception as e:
+ print(f"Unexpected error: {str(e)}")
+ else:
+ results = False
+ start = start + 10
+ time.sleep(1)
+
+ print("Found "+str(total_results)+" and added "+str(total_gathered))
+ return (people)
def discoverEmails(domain):
emails = []
@@ -101,7 +144,7 @@ def discoverSocialMedia(domain,email):
scope = email.split("@")[1]
url = f"https://www.google.com/search?q='{username}' {scope}"
- cookies = {"CONSENT": "YES+srp.gws"}
+ cookies = {"CONSENT": "YES+","SOCS":"CAISHAgCEhJnd3NfMjAyNDAxMzEtMF9SQzQaAmVzIAEaBgiAkIuuBg"}
try:
user_agent = infohound_utils.getUserAgents()
@@ -179,4 +222,4 @@ def discoverSocialMediaByDorks(domain,email):
return data
-
\ No newline at end of file
+
diff --git a/infohound/tool/infohound_utils.py b/infohound/tool/infohound_utils.py
index 7f90968..300fe8f 100644
--- a/infohound/tool/infohound_utils.py
+++ b/infohound/tool/infohound_utils.py
@@ -33,8 +33,6 @@ def extractSocialInfo(text):
if t is not None:
data.append(t.group(0))
-
-
# Twitter
regex = r"(http(s)?:\/\/)?([\w]+\.)?twitter\.com\/[^&\/?\"\%]*"
t = re.search(regex, text)
diff --git a/infohound/tool/retriever_modules/people.py b/infohound/tool/retriever_modules/people.py
index 4a7dabc..d0148f2 100644
--- a/infohound/tool/retriever_modules/people.py
+++ b/infohound/tool/retriever_modules/people.py
@@ -5,78 +5,57 @@
from infohound.tool.data_sources import google_data, bing_data
from infohound.models import Domain,People,Emails,Usernames
-def findSocialProfilesByEmail(domain_id):
- queryset = Emails.objects.filter(people_id__isnull=True, domain_id=domain_id)
- domain = Domain.objects.get(id=domain_id).domain
- for entry in queryset.iterator():
- usernames_data = []
- email = entry.email
- print("Testing: " + email)
-
- # TO-DO: check if Bing works
- #for l in bing_data.discoverSocialMedia(domain,email):
- # if l not in data:
- # data.append(l)
-
- results = google_data.discoverSocialMediaByDorks(domain,email)
- if results["links"] != []:
- for link in results["links"]:
- try:
- username = link.split("/")[-1]
- if "linkedin" in link:
- username = re.split('-\\d+', username)[0]
- u, created = Usernames.objects.get_or_create(username=username, source="Google", domain_id=domain_id)
- if created:
- usernames_data.append(username)
- except IntegrityError as e:
- pass
-
- try:
- p, created = People.objects.get_or_create(name=results["name"], social_profiles=results["links"], source="Google", domain_id=domain_id)
- except IntegrityError as e:
- pass
- try:
- u, created = Usernames.objects.get_or_create(username=email.split("@")[0], source="Google", domain_id=domain_id)
- usernames_data.append(email.split("@")[0])
- except IntegrityError as e:
- pass
-
- Emails.objects.filter(email=email, domain_id=domain_id).update(people=p)
- Usernames.objects.filter(username__in=usernames_data, domain_id=domain_id).update(people=p)
+def findPeopleFromGoogle(domain_id):
+ domain = Domain.objects.get(id=domain_id).domain
+ company = domain.split(".")[0]
+ query = f'intitle:"{company}" site:"linkedin.com"'
+ results = google_data.discoverPeople(query)
+ for result in results:
+ try:
+ p, created = People.objects.get_or_create(name=result[0], social_profiles=[result[1]], raw_metadata=result[2], url_img=result[3], source="Google", domain_id=domain_id)
+ if result[1]:
+ username = result[1].split('/')[4]
+ username = re.split('-\\d+', username)[0]
+ u, created = Usernames.objects.get_or_create(people=p, username=username, source="Google", domain_id=domain_id)
+ except IntegrityError as e:
+ pass
-
-
-# LEGACY FUNCTION
-"""
def findSocialProfilesByEmail(domain_id):
- queryset = Emails.objects.filter(people_id__isnull=True, domain_id=domain_id)
- domain = Domain.objects.get(id=domain_id).domain
- for entry in queryset.iterator():
- usernames_data = []
- email = entry.email
- print("Testing: " + email)
-
- # TO-DO: check if Bing works
- #for l in bing_data.discoverSocialMedia(domain,email):
- # if l not in data:
- # data.append(l)
-
- results = google_data.discoverSocialMedia(domain,email)
- if results["links"] != []:
- for link in results["links"]:
- username = Usernames(username=link.split("/")[-1], source="Google", domain_id=domain_id)
- usernames_data.append(username)
- try:
- p, created = People.objects.get_or_create(name=results["name"], social_profiles=results["links"], source="Google", domain_id=domain_id)
- except IntegrityError as e:
- pass
-
- Emails.objects.filter(email=email).update(people=p)
- try:
- for cred in usernames_data:
- cred.people = p
- Usernames.objects.bulk_create(usernames_data)
- except IntegrityError as e:
- pass
-"""
+ queryset = Emails.objects.filter(people_id__isnull=True, domain_id=domain_id)
+ domain = Domain.objects.get(id=domain_id).domain
+ for entry in queryset.iterator():
+ usernames_data = []
+ email = entry.email
+ print("Testing: " + email)
+
+ # TO-DO: check if Bing works
+ #for l in bing_data.discoverSocialMedia(domain,email):
+ # if l not in data:
+ # data.append(l)
+
+ results = google_data.discoverSocialMediaByDorks(domain,email)
+ if results["links"] != []:
+ for link in results["links"]:
+ try:
+ username = link.split("/")[-1]
+ if "linkedin" in link:
+ username = link.split("/")[4]
+ username = re.split('-\\d+', username)[0]
+ u, created = Usernames.objects.get_or_create(username=username, source="Google", domain_id=domain_id)
+ if created:
+ usernames_data.append(username)
+ except IntegrityError as e:
+ pass
+ try:
+ p, created = People.objects.get_or_create(name=results["name"], social_profiles=results["links"], source="Google", domain_id=domain_id)
+ try:
+ u, created = Usernames.objects.get_or_create(username=email.split("@")[0], source="Google", domain_id=domain_id)
+ usernames_data.append(email.split("@")[0])
+
+ Emails.objects.filter(email=email, domain_id=domain_id).update(people=p)
+ Usernames.objects.filter(username__in=usernames_data, domain_id=domain_id).update(people=p)
+ except IntegrityError as e:
+ pass
+ except IntegrityError as e:
+ pass
diff --git a/infohound/utils.py b/infohound/utils.py
index 8c6281d..35575d8 100644
--- a/infohound/utils.py
+++ b/infohound/utils.py
@@ -10,17 +10,19 @@
def load_tasks(domain_id):
tasks = [
- {"name_id":"getWhoisInfoTask","name":"Get Whois Info", "description":"Get revelant information from Whois register.", "type":"Retrieve"},
+ # RETRIEVAL
+ {"name_id":"getWhoisInfoTask","name":"Get Whois Information", "description":"Get relevant information from Whois register.", "type":"Retrieve"},
{"name_id":"getDNSRecordsTask","name":"Get DNS Records", "description":"This task queries the DNS.", "type":"Retrieve"},
{"name_id":"getSubdomainsTask","name":"Get Subdomains", "description":"This task uses Alienvault OTX API, CRT.sh and HackerTarget as data sources to discover cached subdomains.", "type":"Retrieve"},
- {"name_id":"getSubdomainsFromURLSTask","name":"Get Subdomains From URLs", "description":"Once some tasks have been performed, the URLs table will have a lot of entries. This task will check all the URLS in order to find new subdomains.", "type":"Retrieve"},
+ {"name_id":"getSubdomainsFromURLSTask","name":"Get Subdomains from URLs", "description":"Once some tasks have been performed, the URLs table will have a lot of entries. This task will check all the URLS in order to find new subdomains.", "type":"Retrieve"},
{"name_id":"getURLsTask","name":"Get URLs", "description":"It searches all URLs cached by Wayback Machine and saves them into the database. This will later help to discover other data entities like files or subdomains.", "type":"Retrieve"},
- {"name_id":"getFilesFromURLsTask","name":"Get Files from URLs", "description":"It loops through the URLs database table in order to find files and store them to the Files database table to analyse them later. The files that will be retrieved are: doc, docx, ppt, pptx, pps, ppsx, xls, xlsx, odt, ods, odg, odp, sxw, sxc, sxi, pdf, wpd, svg, indd, rdp, ica, zip, rar", "type":"Retrieve"},
- {"name_id":"findEmailsTask","name":"Find Email", "description":"It lookes for emails using queries to Google and Bing.", "type":"Retrieve"},
- {"name_id":"findSocialProfilesByEmailTask","name":"Find people from emails", "description":"Once some emails have been found it can be useful to discover the person behind them. Also, it finds usernames from that people.", "type":"Retrieve"},
- {"name_id":"findEmailsFromURLsTask","name":"Find Emails From Urls", "description":"Sometimes, the discoverd URLs can contain sentive information. This tasks retrive all the emails from URL paths.", "type":"Retrieve"},
- {"name_id":"executeDorksTask","name":"Execute dorks", "description":"It will execute the dorks defined in the dorks folder. Remember to grup the dorks by categories (filename) so you can later understand the objectives of the dorks.", "type":"Retrieve"},
+ {"name_id":"getFilesFromURLsTask","name":"Get Files from URLs", "description":"It loops through the URLs database table in order to find files and store them to the Files database. The files that will be retrieved are: doc, docx, ppt, pptx, pps, ppsx, xls, xlsx, odt, ods, odg, odp, sxw, sxc, sxi, pdf, wpd, svg, indd, rdp, ica, zip, rar", "type":"Retrieve"},
+ {"name_id":"findEmailsTask","name":"Find Emails", "description":"It looks for emails using queries to Google and Bing.", "type":"Retrieve"},
+ {"name_id":"findSocialProfilesByEmailTask","name":"Find People from Emails", "description":"Once some emails have been found it can be useful to discover the person behind them. Also, it finds usernames from that people.", "type":"Retrieve"},
+ {"name_id":"findEmailsFromURLsTask","name":"Find Emails From URLs", "description":"Sometimes, the discoverd URLs can contain sentive information. This tasks retrive all the emails from URL paths.", "type":"Retrieve"},
+ {"name_id":"executeDorksTask","name":"Execute Dorks", "description":"It will execute the dorks defined in the dorks folder. Remember to grup the dorks by categories (filename) so you can later understand the objectives of the dorks.", "type":"Retrieve"},
{"name_id":"findEmailsFromDorksTask","name":"Find Emails From Dorks", "description":"By default, InfoHound has some dorks defined in order to discover emails. This task will look for them in the results obtained by the execution of the dorks.", "type":"Retrieve"},
+ {"name_id":"findPeopleFromGoogleTask","name":"Find People From Google", "description":"Uses the Google JSON API to find people who work in the company asociated to the domain.", "type":"Retrieve"},
# ANALYSIS
{"name_id":"subdomainTakeOverAnalysisTask","name":"Check Subdomains Take-Over", "description":"It performes some checks to determine if a subdomain can be taken over.", "type":"Analysis"},
{"name_id":"canBeSpoofedTask","name":"Check If Domain Can Be Spoofed", "description":"It checks if a domain, from the emails InfoHound has discovered, can be spoofed. This could be used by attackers to impersonate a person and send emails as hime/her.", "type":"Analysis"},
@@ -30,7 +32,8 @@ def load_tasks(domain_id):
{"name_id":"getEmailsFromMetadataTask","name":"Get Emails From Metadata", "description":"As some metadata can contain emails, this will retrive all o them and save it to the database.", "type":"Analysis"},
{"name_id":"getEmailsFromFilesContentTask","name":"Get Emails From Files Content", "description":"Usually emails can be included in corporate files so this task will retrive all the emails from the downloaded files content.", "type":"Analysis"},
{"name_id":"findRegisteredSitesTask","name":"Find Registered Services using emails", "description":"It is possible to find services or social networks where an emaill has been used to create an account. This task will check if an email InfoHound has discovered has an account in: Twitter, Adobe, Facebook, Imgur, Mewe, Parler, Rumble, Snapchat, Wordpress and/or Duolingo", "type":"Analysis"},
- {"name_id":"checkBreachTask","name":"Check Breach", "description":"This task checks Firefox Monitor service to see if an email has been found in a data breach. Although it is a free service, it has a limitation of 10 queries per day. If Leak-Lookup API key is set, it also checks it.", "type":"Analysis"}]
+ {"name_id":"checkBreachTask","name":"Check Breach", "description":"This task checks Firefox Monitor service to see if an email has been found in a data breach. Although it is a free service, it has a limitation of 10 queries per day. If Leak-Lookup API key is set, it also checks it.", "type":"Analysis"},
+ {"name_id":"summarize_profile","name":"AI-Powered Profile Analisys", "description":"You can use the profile analysis task to employ an AI-powered tool that examines the metadata and creates a description for you.", "type":"Analysis"}]
for task in tasks:
try:
Tasks.objects.get_or_create(tid=task["name_id"], name=task["name"], description=task["description"], task_type=task["type"], custom=False, domain_id=domain_id)
diff --git a/infohound/views.py b/infohound/views.py
index 5631208..a51bcbb 100644
--- a/infohound/views.py
+++ b/infohound/views.py
@@ -16,6 +16,7 @@
from django.db import IntegrityError
from django.utils.safestring import mark_safe
+
def index(request):
return render(request, 'index.html')
@@ -79,6 +80,8 @@ def people_all(request):
p["name"] = entry.name
p["phones"] = len(entry.phones)
p["accounts"] = 0
+ p["ocupation_summary"] = entry.ocupation_summary
+ p["url_img"] = entry.url_img
user = Usernames.objects.filter(people=entry, domain_id=domain_id)
emails = Emails.objects.filter(people=entry, domain_id=domain_id)
for em in user.iterator():
@@ -192,7 +195,6 @@ def get_emails_stats(request, domain_id):
def get_available_tasks(request):
infohound.utils.load_tasks(request.GET['domain_id'])
infohound.utils.load_custom_tasks(request.GET['domain_id'])
-
tasks = []
queryset = Tasks.objects.filter(domain_id=request.GET['domain_id']).order_by('id')
for entry in queryset.iterator():
@@ -203,10 +205,8 @@ def get_available_tasks(request):
data["description"] = entry.description
data["type"] = entry.task_type
data["custom"] = entry.custom
- if entry.last_execution:
- data["last_execution"] = entry.last_execution.strftime("%d/%m/%y %H:%M")
- if entry.celery_id:
- data["state"] = AsyncResult(entry.celery_id).state
+ data["last_execution"] = entry.last_execution.strftime("%d/%m/%y %H:%M") if (entry.last_execution) else None
+ data["state"] = AsyncResult(entry.celery_id).state if (entry.celery_id) else None
tasks.append(data)
return JsonResponse(tasks, safe=False)
@@ -416,4 +416,4 @@ def export_all_to_CSV(request, domain_id):
return JsonResponse(data, status=200)
-
\ No newline at end of file
+
diff --git a/infohound_diagram.jpg b/infohound_diagram.jpg
deleted file mode 100644
index e16f4fc..0000000
Binary files a/infohound_diagram.jpg and /dev/null differ
diff --git a/infohound_project/settings.py b/infohound_project/settings.py
index 63a4eca..89c315d 100644
--- a/infohound_project/settings.py
+++ b/infohound_project/settings.py
@@ -134,3 +134,7 @@
#------------- CELERY -------------
CELERY_BROKER_URL = 'redis://redis:6379'
CELERY_RESULT_BACKEND = 'redis://redis:6379'
+
+#------------- OLLAMA -------------
+OLLAMA_URL = 'http://ollama:11434'
+OLLAMA_MODEL = 'llama2'
diff --git a/new_infohound_diagram.jpg b/new_infohound_diagram.jpg
new file mode 100644
index 0000000..e1cfcae
Binary files /dev/null and b/new_infohound_diagram.jpg differ
diff --git a/requirements.txt b/requirements.txt
index ad4a4b8..64d747f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,3 +21,4 @@ svgwrite==1.4.3
textract==1.6.5
trio==0.22.0
networkx
+ollama