From a73e45e93acdc92e72320454205206495338a83e Mon Sep 17 00:00:00 2001 From: dhondta Date: Mon, 28 Oct 2024 23:24:55 +0100 Subject: [PATCH] Added malware feed client --- README.md | 20 ++-- docs/pages/index.md | 14 +-- src/malsearch/VERSION.txt | 2 +- src/malsearch/__init__.py | 144 ++++++++++++++++----------- src/malsearch/__main__.py | 10 +- src/malsearch/clients/__init__.py | 3 +- src/malsearch/clients/maldatabase.py | 17 ++++ src/malsearch/clients/virustotal.py | 2 +- 8 files changed, 136 insertions(+), 76 deletions(-) create mode 100644 src/malsearch/clients/maldatabase.py diff --git a/README.md b/README.md index 7bf73a8..a0ccaf1 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,21 @@ [![PyPi](https://img.shields.io/pypi/v/malsearch.svg)](https://pypi.python.org/pypi/malsearch/) [![Read The Docs](https://readthedocs.org/projects/python-malsearch/badge/?version=latest)](https://python-malsearch.readthedocs.io/en/latest/?badge=latest) -[![Build Status](https://github.com/dhondta/python-malsearch/actions/workflows/python-package.yml/badge.svg)](https://github.com/dhondta/python-malsearch/actions/workflows/python-package.yml) -[![Coverage Status](https://raw.githubusercontent.com/dhondta/python-malsearch/main/docs/coverage.svg)](#) +[![Build Status](https://github.com/packing-box/python-malsearch/actions/workflows/python-package.yml/badge.svg)](https://github.com/packing-box/python-malsearch/actions/workflows/python-package.yml) +[![Coverage Status](https://raw.githubusercontent.com/packing-box/python-malsearch/main/docs/coverage.svg)](#) [![Python Versions](https://img.shields.io/pypi/pyversions/malsearch.svg)](https://pypi.python.org/pypi/malsearch/) -[![Known Vulnerabilities](https://snyk.io/test/github/dhondta/python-malsearch/badge.svg?targetFile=requirements.txt)](https://snyk.io/test/github/dhondta/python-malsearch?targetFile=requirements.txt) +[![Known Vulnerabilities](https://snyk.io/test/github/packing-box/python-malsearch/badge.svg?targetFile=requirements.txt)](https://snyk.io/test/github/packing-box/python-malsearch?targetFile=requirements.txt) [![License](https://img.shields.io/pypi/l/malsearch.svg)](https://pypi.python.org/pypi/malsearch/) -This library communicates with API's of multiple malware databases to collect malware samples. +This library communicates with API's of the following malware databases to collect malware samples: + +- [Maldatabase](https://maldatabase.com/api-doc.html) +- [Malpedia](https://malpedia.caad.fkie.fraunhofer.de/usage/api) +- [MalShare](https://malshare.com/doc.php) +- [Malware Bazaar](https://bazaar.abuse.ch/api) +- [Triage](https://tria.ge/docs) +- [VirusShare](https://virusshare.com/apiv2_reference) +- [VirusTotal](https://docs.virustotal.com/reference/overview) ```sh pip install malsearch @@ -23,8 +31,8 @@ TODO ## :clap: Supporters -[![Stargazers repo roster for @dhondta/python-malsearch](https://reporoster.com/stars/dark/dhondta/python-malsearch)](https://github.com/dhondta/python-malsearch/stargazers) +[![Stargazers repo roster for @packing-box/python-malsearch](https://reporoster.com/stars/dark/packing-box/python-malsearch)](https://github.com/packing-box/python-malsearch/stargazers) -[![Forkers repo roster for @dhondta/python-malsearch](https://reporoster.com/forks/dark/dhondta/python-malsearch)](https://github.com/dhondta/python-malsearch/network/members) +[![Forkers repo roster for @packing-box/python-malsearch](https://reporoster.com/forks/dark/packing-box/python-malsearch)](https://github.com/packing-box/python-malsearch/network/members)

Back to top

diff --git a/docs/pages/index.md b/docs/pages/index.md index b735b02..ea2b52c 100644 --- a/docs/pages/index.md +++ b/docs/pages/index.md @@ -3,11 +3,12 @@ MalSearch is a library that allows to collect malware samples from multiple malware databases using their API's. It relies on: - [Maldatabase](https://maldatabase.com/api-doc.html) -- [MalShare](https://www.malshare.com) +- [Malpedia](https://malpedia.caad.fkie.fraunhofer.de/usage/api) +- [MalShare](https://malshare.com/doc.php) - [Malware Bazaar](https://bazaar.abuse.ch/api) -- [Triage]() -- [VirusShare]() -- [VirusTotal](https://docs.virustotal.com/reference/getting-started) +- [Triage](https://tria.ge/docs) +- [VirusShare](https://virusshare.com/apiv2_reference) +- [VirusTotal](https://docs.virustotal.com/reference/overview) ## Setup @@ -18,8 +19,3 @@ This library is available on [PyPi](https://pypi.python.org/pypi/malsearch/) and pip install malsearch ``` -or - -```sh -pip3 install malsearch -``` diff --git a/src/malsearch/VERSION.txt b/src/malsearch/VERSION.txt index 7ecf123..c43514f 100644 --- a/src/malsearch/VERSION.txt +++ b/src/malsearch/VERSION.txt @@ -1 +1 @@ -0.1.0 +0.2.0 diff --git a/src/malsearch/__init__.py b/src/malsearch/__init__.py index e479ebf..73d0f64 100644 --- a/src/malsearch/__init__.py +++ b/src/malsearch/__init__.py @@ -1,11 +1,63 @@ # -*- coding: UTF-8 -*- +import logging +from os import cpu_count + from .clients import * from .clients import __all__ as _clients -__all__ = ["download_sample", "download_samples"] + _clients +__all__ = ["download_sample", "download_samples", "get_samples_feed"] + _clients _CLIENTS_MAP = {n.lower(): globals()[n] for n in _clients} +_MAX_WORKERS = 3 * cpu_count() + +logger = logging.getLogger("malsearch") + + +def _check_conf(method): + def _wrapper(f): + from functools import wraps + @wraps(f) + def _subwrapper(*args, config=None, **kwargs): + if config is None: + logger.error("no configuration file provided") + logger.info(f"you can create one at {config} manually (INI format with section 'API keys')") + else: + if isinstance(config, str): + config = _valid_conf(config) + clients = [] + for n in config['API keys']: + if not hasattr(_CLIENTS_MAP[n], method): + continue + if n in (kwargs.get('skip') or []): + logger.debug(f"{n} skipped") + continue + if config.has_section("Disabled"): + t = config['Disabled'].get(n) + if t is not None: + try: + if dt.datetime.strptime(t, "%d/%m/%Y %H:%M:%S") < dt.datetime.now(): + from contextlib import nullcontext + with kwargs.get('lock') or nullcontext(): + config['Disabled'].pop(n) + with open(config.path, 'w') as f: + config.write(f) + else: + logger.warning(f"{n} is disabled until {t}") + continue + except ValueError: + logger.warning(f"{n} is disabled") + continue + cls = _CLIENTS_MAP[n] + if cls.__base__.__name__ == "API": + kwargs['api_key'] = config['API keys'].get(n) + clients.append(cls(config=config, **kwargs)) + if len(clients) == 0: + logger.warning("no download client available/enabled") + logger.debug(f"clients: {', '.join(c.name for c in clients)}") + return f(*args, clients=clients, config=config, **kwargs) + return _subwrapper + return _wrapper def _valid_conf(path): @@ -23,68 +75,48 @@ def _valid_conf(path): return conf +@_check_conf("get_file_by_hash") def download_sample(hash, config=None, **kwargs): - import logging - logger = logging.getLogger("malsearch") - if config is None: - logger.error("no configuration file provided") - logger.info(f"you can create one at {config} manually (INI format with section 'API keys')") - else: - import datetime as dt - from os.path import exists, join - p = join(kwargs.get('output_dir', "."), hash) - if exists(p) and not kwargs.get('overwrite'): - logger.info(f"'{p}' already exists") - return - if isinstance(config, str): - config = _valid_conf(config) - clients = [] - for n in config['API keys']: - if n in (kwargs.get('skip') or []): - logger.debug(f"{n} skipped") - continue - if config.has_section("Disabled"): - t = config['Disabled'].get(n) - if t is not None: - try: - if dt.datetime.strptime(t, "%d/%m/%Y %H:%M:%S") < dt.datetime.now(): - from contextlib import nullcontext - with kwargs.get('lock') or nullcontext(): - config['Disabled'].pop(n) - with open(config.path, 'w') as f: - config.write(f) - else: - logger.warning(f"{n} is disabled until {t}") - continue - except ValueError: - logger.warning(f"{n} is disabled") - continue - clients.append(n) - if len(clients) == 0: - logger.warning("no download client available/enabled") - logger.debug(f"clients: {', '.join(clients)}") - for n in clients: - logger.debug(f"trying {n}...") - cls = _CLIENTS_MAP[n] - if cls.__base__.__name__ == "API": - kwargs['api_key'] = config['API keys'].get(n) - client = cls(config=config, **kwargs) - try: - client.get_file_by_hash(hash) - if hasattr(client, "content") and client.content is not None and len(client.content) > 0: - logger.debug("found sample !") - return - except ValueError as e: - logger.debug(e) - except Exception as e: - logger.exception(e) + import datetime as dt + from os.path import exists, join + p = join(kwargs.get('output_dir', "."), hash) + if exists(p) and not kwargs.get('overwrite'): + logger.info(f"'{p}' already exists") + return + for client in clients: + logger.debug(f"trying {client.name}...") + try: + client.get_file_by_hash(hash) + if hasattr(client, "content") and client.content is not None and len(client.content) > 0: + logger.debug("found sample !") + return + except AttributeError: + continue # not a client for downloading samples (e.g. Maldatabase) + except ValueError as e: + logger.debug(e) + except Exception as e: + logger.exception(e) logger.warning(f"could not find the sample with hash {hash}") -def download_samples(*hashes, max_workers=5, **kwargs): +def download_samples(*hashes, max_workers=_MAX_WORKERS, **kwargs): from concurrent.futures import ThreadPoolExecutor as Pool from threading import Lock kwargs['lock'] = Lock() with Pool(max_workers=max_workers) as executor: for h in hashes: executor.submit(download_sample, h.lower(), **kwargs) + + +@_check_conf("get_malware_feed") +def get_samples_feed(config=None, **kwargs): + count = 0 + for client in clients: + logger.debug(f"trying {client.name}...") + try: + for h in client.get_malware_feed(): + yield h + count += 1 + except Exception as e: + logger.exception(e) + logger.info(f"got {count} hashes") diff --git a/src/malsearch/__main__.py b/src/malsearch/__main__.py index 6b54561..09f3ce2 100644 --- a/src/malsearch/__main__.py +++ b/src/malsearch/__main__.py @@ -32,11 +32,14 @@ def _setup(parser): def main(): from os import makedirs - from .__init__ import _valid_conf, download_samples + from .__init__ import _valid_conf, download_samples, get_samples_feed from .clients.__common__ import _valid_hash - parser = _parser("MalSearch", "This tool is aimed to search for malware samples across some public databases", []) + parser = _parser("MalSearch", "This tool is aimed to search for malware samples across some public databases", + ["2037f9b7dd268eef7d2e950b27c6cf80e3ba692d262c785ab67b04dc71c99bf9", + "-f hashes.txt -o samples --disable-cache"]) parser.add_argument("sample_hash", type=_valid_hash, nargs="*", help="input hash") parser.add_argument("-f", "--from-file", help="get hashes from the target file (newline-separated list)") + parser.add_argument("-m", "--from-malware-feed", action="store_true", help="get hashes from malware feeds") opt = parser.add_argument_group("optional arguments") opt.add_argument("-c", "--config", default="~/.malsearch.conf", type=_valid_conf, help="INI configuration file") opt.add_argument("-o", "--output-dir", default=".", help="output directory for downloaded samples") @@ -52,6 +55,9 @@ def main(): with open(args.from_file) as f: for h in f.readlines(): args.sample_hash.append(_valid_hash(h.strip())) + if args.from_malware_feed: + for h in get_samples_feed(): + args.sample_hash.append(_valid_hash(h.strip())) makedirs(args.output_dir, exist_ok=True) if len(args.sample_hash) > 0: download_samples(*args.sample_hash, **vars(args)) diff --git a/src/malsearch/clients/__init__.py b/src/malsearch/clients/__init__.py index e6cfdd4..deb4b6d 100644 --- a/src/malsearch/clients/__init__.py +++ b/src/malsearch/clients/__init__.py @@ -1,4 +1,5 @@ # -*- coding: UTF-8 -*- +from .maldatabase import Maldatabase from .malpedia import Malpedia from .malshare import MalShare from .malwarebazaar import MalwareBazaar @@ -7,5 +8,5 @@ from .virustotal import VirusTotal -__all__ = ["Malpedia", "MalShare", "MalwareBazaar", "Triage", "VirusShare", "VirusTotal"] +__all__ = ["Maldatabase", "Malpedia", "MalShare", "MalwareBazaar", "Triage", "VirusShare", "VirusTotal"] diff --git a/src/malsearch/clients/maldatabase.py b/src/malsearch/clients/maldatabase.py new file mode 100644 index 0000000..e9834c6 --- /dev/null +++ b/src/malsearch/clients/maldatabase.py @@ -0,0 +1,17 @@ +# -*- coding: UTF-8 -*- +from .__common__ import API + + +__all__ = ["Maldatabase"] + + +class Maldatabase(API): + doc = "https://maldatabase.com/api-doc.html" + url = "https://api.maldatabase.com/download" + _api_key_header = "Authorization" + + def get_malware_feed(self, hashtype="sha256"): + # available output hash types: md5, sha1, sha256 + self._get("", headers={'Accept-Encoding': "gzip, deflate"}) + for data in self.json: + yield data[hashtype] diff --git a/src/malsearch/clients/virustotal.py b/src/malsearch/clients/virustotal.py index e80d4e4..dbc57f1 100644 --- a/src/malsearch/clients/virustotal.py +++ b/src/malsearch/clients/virustotal.py @@ -9,7 +9,7 @@ class VirusTotal(API): doc = "https://docs.virustotal.com/reference/overview" url = "https://www.virustotal.com/api/v3" _api_key_header = "X-Apikey" - + @hashtype("md5", "sha1", "sha256") def get_file_by_hash(self, hash): if self._unpacked: