diff --git a/MobileRevelator/python/Library/htmlreport-templates/data/avatar.png b/MobileRevelator/python/Library/htmlreport-templates/data/avatar.png new file mode 100644 index 0000000..14aafae Binary files /dev/null and b/MobileRevelator/python/Library/htmlreport-templates/data/avatar.png differ diff --git a/MobileRevelator/python/Library/htmlreport-templates/data/avatar_thumb.png b/MobileRevelator/python/Library/htmlreport-templates/data/avatar_thumb.png new file mode 100644 index 0000000..e61749e Binary files /dev/null and b/MobileRevelator/python/Library/htmlreport-templates/data/avatar_thumb.png differ diff --git a/MobileRevelator/python/Library/htmlreport-templates/data/bubble.css b/MobileRevelator/python/Library/htmlreport-templates/data/bubble.css new file mode 100644 index 0000000..3100d40 --- /dev/null +++ b/MobileRevelator/python/Library/htmlreport-templates/data/bubble.css @@ -0,0 +1,8 @@ +.status { position: relative; background-color: rgb(255, 179, 79); border-radius: .4em; min-height: 3em; padding: 1em} +.status:after { content: ''; position: absolute; top: 0; left: 50%; width: 0; height: 0; border: 10px solid transparent; border-bottom-color: rgb(255, 179, 79); border-top: 0; margin-left: -10px; margin-top: -10px; } + +.outgoing { position: relative; float:right; background-color: rgb(199, 255, 154); border-radius: .4em; min-width: 66%; padding: 1em} +.outgoing:after { content: ''; position: absolute; right: 0; top: 50%; width: 0; height: 0; border: 10px solid transparent; border-left-color: rgb(199, 255, 154); border-right: 0; margin-top: -10px; margin-right: -10px; } + +.incoming { position: relative; float:left; background-color: rgb(217, 217, 255); border-radius: .4em; min-width: 66%; padding: 1em} +.incoming:after { content: ''; position: absolute; left: 0; top: 50%; width: 0; height: 0; border: 10px solid transparent; border-right-color: rgb(217, 217, 255); border-left: 0; margin-top: -10px; margin-left: -10px; } diff --git a/MobileRevelator/python/Library/htmlreport-templates/data/style.css b/MobileRevelator/python/Library/htmlreport-templates/data/style.css new file mode 100644 index 0000000..6ce7181 --- /dev/null +++ b/MobileRevelator/python/Library/htmlreport-templates/data/style.css @@ -0,0 +1,105 @@ +body { + font-size: small +} + +table, th, td, tr { + border-collapse: collapse; +} + +.content td { + border: 1px solid #69899F; + padding: 0.25em; +} + +.content th { + border:2px solid rgb(0, 0, 0); + padding: 0.5em; +} + +.chat { + max-width: 800px; +} + +.status { + background-color: rgb(255, 179, 79); +} + +.outgoing { + background-color: rgb(199, 255, 154) +} + +.incoming { + background-color: rgb(255, 255, 255) +} + +.clearfix { + clear: both; +} + +.content_headline { + padding-bottom: 1em; +} + +.content_message { + font-size: medium; +} + +.content_timestamp { + text-align: right +} + +.note { + color: white; + background-color: darkred; + margin: 3px; + border: 1px solid #FF0000 +} + +.avatar_thumb_mini { + max-width:36px; + height:auto; + padding-right: 0.25em +} + +.avatar_thumb_micro { + max-width:16px; + height:auto; + padding-right: 0.25em +} + +.avatar_thumb { + max-width:96px; + height:auto; +} + +.contact_name { + font-weight: bold +} + +.contact_platform_id { + font-family: monospace +} + +.contact_status { + font-size: x-small +} + +.timestamp { + font-family: monospace +} + +.clickable { + border: 2px solid blue; + padding: 2px; + overflow: auto; + display: inline-block; +} + +.emoji { + font-size: xx-large; +} + +.STICKER { + max-width:96px; + height:auto; +} \ No newline at end of file diff --git a/MobileRevelator/python/Library/htmlreport-templates/index.html b/MobileRevelator/python/Library/htmlreport-templates/index.html new file mode 100644 index 0000000..69d197b --- /dev/null +++ b/MobileRevelator/python/Library/htmlreport-templates/index.html @@ -0,0 +1,47 @@ + + + + + Chatlist Report for: {owner[platform_id]} + + + +

Chatlist Report

+

Account Information:

+ Avatar of {owner[platform_id]}
+ ID: {owner[platform_id]}
+ Name: {owner[name]}
+ + + + + + + + + + + + + + + + + + + +
ContactMessagesAttachmentsUsersTimestampsSource
+
+ Avatar of {platform_id}
+ Name: {contact_id[name]}
+ ID: {platform_id}
+ Status: {contact_id[status]} +
+
{count_msg}
list view
bubble view
{count_attach}{count_users} + {timestamps} + {source}
+ + Report generated by {generator[product]}, {generator[version]} at {generator[timestamp_report]} + + + diff --git a/MobileRevelator/python/Library/htmlreport-templates/memberlist-all.html b/MobileRevelator/python/Library/htmlreport-templates/memberlist-all.html new file mode 100644 index 0000000..5a4676b --- /dev/null +++ b/MobileRevelator/python/Library/htmlreport-templates/memberlist-all.html @@ -0,0 +1,39 @@ + + + + + Member List + + + +

Memberlist for {chat[subject]}

+ The list contains all users, which where seen in chat by the owner. This means the list may contains users, which have left the chat at some point. +

Metadata

+ +

Memberlist

+ + + + + + + + + + + + + + + + +
AvatarNameIDStatusSource
Avatar{name}{platform_id}{status}{source} - ID: {source_id}
+ + Report generated by {generator[product]}, {generator[version]} at {generator[timestamp_report]} + + + diff --git a/MobileRevelator/python/Library/htmlreport-templates/memberlist.html b/MobileRevelator/python/Library/htmlreport-templates/memberlist.html new file mode 100644 index 0000000..c6a39f5 --- /dev/null +++ b/MobileRevelator/python/Library/htmlreport-templates/memberlist.html @@ -0,0 +1,38 @@ + + + + + Memberlist Report + + + +

Memberlist at {meta[timestamp]}

+

Metadata

+ +

Memberlist

+ + + + + + + + + + + + + + + + +
AvatarNameIDStatusSource
Avatar{name}{platform_id}{status}{source} - ID: {source_id}
+ + Report generated by {generator[product]}, {generator[version]} at {generator[timestamp_report]} + + + diff --git a/MobileRevelator/python/Library/htmlreport-templates/messages-bubble.html b/MobileRevelator/python/Library/htmlreport-templates/messages-bubble.html new file mode 100644 index 0000000..6faa6a4 --- /dev/null +++ b/MobileRevelator/python/Library/htmlreport-templates/messages-bubble.html @@ -0,0 +1,27 @@ + + + + + Message Report for: {contact[platform_id]} + + + + +

Messages Bubble Report

+
+ +
+
Avatar of {sender_id[platform_id]} {sender_id[platform_id]} - {sender_id[name]}
+
{content_message[data]}
+ +
+
+
+ +
+ Report generated by {generator[product]}, {generator[version]} at {generator[timestamp_report]}

+ * notes are red highlited
+ ** Timestamps are in UTC format.
+ + + diff --git a/MobileRevelator/python/Library/htmlreport-templates/messages.html b/MobileRevelator/python/Library/htmlreport-templates/messages.html new file mode 100644 index 0000000..fb887ec --- /dev/null +++ b/MobileRevelator/python/Library/htmlreport-templates/messages.html @@ -0,0 +1,57 @@ + + + + + Message Report for: {contact[platform_id]} + + + +

Messages Report

+

Chat Partner

+ Avatar of {contact[platform_id]}
+ ID: {contact[platform_id]}
+ Name: {contact[name]}
+

Chat Session Details

+ +

Messages

+ + + + + + + + + + + + + + +
SenderContentTimestamps **Details
+
+
Avatar of {sender_id[platform_id]} + {sender_id[name]}
{sender_id[platform_id]} +
+
{content_message[data]}{timestamps} + Source: {source} -> ID: {source_id}
+ Content: {content_message[media_type]} + {details} +
+ Report generated by {generator[product]}, {generator[version]} at {generator[timestamp_report]}

+ * notes are red highlited
+ ** Timestamps are in UTC format.
+ *** The list contains all users, which where seen in chat by the owner. This means the list may contains users, which have left the chat at some point. + + + diff --git a/MobileRevelator/python/Library/htmlreport.py b/MobileRevelator/python/Library/htmlreport.py new file mode 100644 index 0000000..86db70f --- /dev/null +++ b/MobileRevelator/python/Library/htmlreport.py @@ -0,0 +1,382 @@ + +import datetime, time, re +from datetime import timezone +from pathlib import PurePath, Path + +class HTMLReport: + TEMPLATE_DIR = "htmlreport-templates" + TEXT_EN = { + 'unknown_value': 'N/A' + } + DEFAULT_TEXT = TEXT_EN + + def __init__(self, output_directory): + # create project directory + try: + self.project_directory = Path(output_directory) + self.project_directory.mkdir(parents=True, exist_ok=True) + except: + # this is a serious error + raise + + def parse_template(self, template_name): + #open and parse template + template_file = Path(__file__).parent / HTMLReport.TEMPLATE_DIR / (template_name + ".html") + if template_file.exists(): + template_source = template_file.read_text() + return self.parse_template_content(template_source) + + def parse_template_content(self, template, template_var=None, next_item=None): + if not template_var: + template_var = [] + + if not next_item: + next_item ="content" + + try: + tag_re = re.compile("|$") + tag = tag_re.search(template).group(1) + if not tag: + template_var.append([next_item, template]) + except: + pass + else: + try: + block_start = template.index("".format(tag)) + block_end = template.index("".format(tag)) + snipet = template[block_start+len(tag)+9:block_end] + rest = template[:block_start+len(tag)+9] + "{content}" + template[block_end:] + template_var.append([next_item, rest]) + template_var = self.parse_template_content(snipet, template_var, tag) + except: + self.info_output("{0} entry not found in template.".format(tag), 1) + + return template_var + + def timestamp_converter(self, input_timestamp, timezone=None): + #TODO: implement timezone correction + output_timestamp = HTMLReport.DEFAULT_TEXT['unknown_value'] + try: + timestamp = int(input_timestamp) / 1000 + output_timestamp = str( + datetime.datetime + .fromtimestamp(timestamp) + .replace(microsecond=0) + .replace(tzinfo=datetime.timezone.utc) + .strftime('%Y-%m-%d %H:%M:%S %z') + ) + except (TypeError, ValueError) as error: + self.info_output('Timestamp {0} not parsable. {1}'.format(input_timestamp, error), 1) + return output_timestamp + + def info_output(self, message, level=2, end="\n"): + levels = ["Error", "Warning", "Info"] + if level < 3: + print(" " + levels[level] + (7-len(levels[level]))*" "+ ": " + str(message), end=end) + else: + print("**** " + str(message) + " ****", end=end) + + +class HTMLReportMessaging(HTMLReport): + # default text english + TEXT_EN = { + 'unknown_value': 'N/A', + 'chats': 'Chats for Account: {0}', + 'thumb_only': 'Only thumbnail available.', + 'no_media': 'Original media file missing.', + 'caption': 'Caption', + 'size': 'Size', + 'duration': 'Duration', + 'seconds': 'seconds', + 'latitude': 'Latitude', + 'longitude': 'Longitude', + 'url': 'URL', + 'name': 'Title' + } + DEFAULT_TEXT = TEXT_EN + + DEFAULT_FILES = { + 'file_avatar_thumb': Path(__file__).parent / HTMLReport.TEMPLATE_DIR / "data" / "avatar_thumb.png", + 'file_avatar': Path(__file__).parent / HTMLReport.TEMPLATE_DIR / "data" / "avatar.png" + } + + def __init__(self, output_directory, messaging): + self.info_output("Generating HTML-Report files ...",3) + super().__init__(output_directory) + + self.messaging = messaging + + #create directories & copy css + self.output_dir_avatar = self.project_directory / "avatar" + self.output_dir_avatar.mkdir(parents=True, exist_ok=True) + self.output_dir_media = self.project_directory / "media" + self.output_dir_media.mkdir(parents=True, exist_ok=True) + self.output_dir_thumb = self.project_directory / "thumbnail" + self.output_dir_thumb.mkdir(parents=True, exist_ok=True) + self.output_dir_data = self.project_directory / "data" + self.output_dir_data.mkdir(parents=True, exist_ok=True) + file_input_css = Path(__file__).parent / HTMLReport.TEMPLATE_DIR / "data" / "style.css" + file_output_css = self.output_dir_data / "style.css" + file_output_css.write_text(file_input_css.read_text(), encoding='utf-8') + file_input_css = Path(__file__).parent / HTMLReport.TEMPLATE_DIR / "data" / "bubble.css" + file_output_css = self.output_dir_data / "bubble.css" + file_output_css.write_text(file_input_css.read_text(), encoding='utf-8') + + self.chats = [] + self.owner = messaging.owner + self.meta = {} + insert_pos = 0 + + messages_template = self.parse_template("messages") + messages_bubble_template = self.parse_template("messages-bubble") + memberlist_template = self.parse_template("memberlist") + memberlist_all_template = self.parse_template("memberlist-all") + + # generate reports + for platform_id in messaging: + self.chat = messaging.get_chat(platform_id) + self.messages = self.chat.messages + # reload messages for bubble view + self.chat_bubble = messaging.get_chat(platform_id) + self.messages_bubble = self.chat_bubble.messages + + # if we have a memberlist extract some details + if self.chat.memberlist: + members_alltimes = self.chat.memberlist.get_allmembers() + members_current = self.chat.memberlist.get_state() + self.members = { + "list_alltimes": members_alltimes, + "count_alltimes": len(members_alltimes) if members_alltimes else 0, + "list_current": members_current, + "count_current": len(members_current) if members_current else 0 + } + self.info_output('Writing memberlist ...') + + # write list of all members + self.member = [] + if members_alltimes: + for member in members_alltimes: + self.member.append(messaging.get_contact(member)) + output_file = self.project_directory / (str(platform_id) + "-all-memberlist.html") + output_file.write_text(self.process_template(memberlist_all_template), encoding='utf-8') + + # write memberlist for every change + for e in self.chat.memberlist: + timestamp, memberlist = e + self.meta = {"timestamp": timestamp} + self.member = [] + for member in memberlist: + self.member.append(messaging.get_contact(member)) + output_file = self.project_directory / (str(platform_id) + "-" + str( + datetime.datetime + .fromtimestamp(timestamp/1000) + .replace(tzinfo=datetime.timezone.utc) + .strftime('%Y%m%d-%H%M%S-%f') + ) + "-memberlist.html") + output_file.write_text(self.process_template(memberlist_template), encoding='utf-8') + self.info_output(' {0}'.format(timestamp)) + else: + self.members = { + "list_alltimes": None, + "count_alltimes": 2, + "list_current": None, + "count_current": 2 + } + + # generate message pages only if we have messages + if self.chat.count_msg: + # insert chats with messages on top + self.chats.insert(insert_pos, self.chat) + insert_pos = insert_pos + 1 + output_file = self.project_directory / (str(platform_id) + "-messages.html") + output_file.write_text(self.process_template(messages_template), encoding='utf-8') + output_file = self.project_directory / (str(platform_id) + "-messages-bubble.html") + output_file.write_text(self.process_template(messages_bubble_template), encoding='utf-8') + else: + self.chats.append(self.chat) + + + #else: + # self.memberlist = None + + # finally write chatlist overview + output_file = self.project_directory / ("index.html") + output_file.write_text(self.process_template(self.parse_template("index")), encoding='utf-8') + + self.info_output("... finished.",3) + + def process_template(self, template_blocks): + content = "{content}" + block = "" + d = dict() + d["generator"] = {"product":self.messaging.PRODUCT, "version":self.messaging.VERSION, "timestamp_report":datetime.datetime.now()} + d["chat"] = self.parse_content_data(self.chat.__dict__) + d["meta"] = self.parse_content_data(self.meta) + d["contact"] = self.parse_content_data(self.chat.contact.__dict__) + d["messages"] = self.chat.messages + d["messages_bubble"] = self.chat_bubble.messages + d["owner"] = self.parse_content_data(self.owner.__dict__) + d["members"] = self.parse_content_data(self.members) + for template_block in template_blocks: + data = dict() + template_block_name = template_block[0] + template_block_content = template_block[1] + # simply replace content + if template_block_name == "content": + template_block_content = template_block_content.format( + **d, content="{content}") + content = content.format(content=template_block_content) + # iterate over + else: + if template_block_name: + data = self.__dict__[template_block_name] + for d in data: + d = data=self.parse_content_data(d.__dict__) + block = block + template_block_content.format( + **d, content="{content}") + content = content.format(content=block) + return content + + + def parse_content_data(self, data): + """ + Parse and convert data from database entrys into content. + + This function will convert data depending on the name of the field. + The following start of fieldnames are implemented: + + timestamp_ -> timestamps will be transformed into readable date/times + file_ -> copy file to output directory + + Other fieldnames will be checked if the are empty. If this is the case + they are set to some default value. + + :param data: dict with non processed data + :param content: dict with data fields for template + :return: dict with data fields for the template + """ + content = {} + + # loop over data and process them + for k, e in data.items(): + + # dont touch platform_id and lists + if k == "platform_id": + content.update({k:e}) + + elif k == "contact_id" or k == "sender_id" or k == "receiver_id": + try: + contact = self.messaging.get_contact(e) + contact = self.parse_content_data(contact.__dict__) + content.update({k:contact}) + except Exception as error: + self.info_output(error, 1) + + # convert timestamp fields + elif k.startswith("timestamp"): + # if we hava a timestamps field skip buildig it + if not "timestamps" in data: + timestamps = "" + for kk, ee in data.items(): + if kk.startswith("timestamp"): + if ee: + # add new line if already timestamps in var + timestamps = timestamps + "
" if timestamps else "" + timestamps = timestamps + '' + kk[10:].capitalize() + ': ' + self.timestamp_converter(ee) + '' + content.update({"timestamps":timestamps}) + if e: + content.update({k:self.timestamp_converter(data.get(k, 0))}) + else: + content.update({k:HTMLReport.DEFAULT_TEXT['unknown_value']}) + + # build message content + elif k == "content_message": + content["details"] = "" + # parse all fields + e = self.parse_content_data(e.__dict__) + + # build link to media file & show thumb if available + if e["file_media"] and e["file_thumbnail"]: + e["data"] = ''.format(e["file_media"], e["file_thumbnail"], e["media_type"]) + elif e["file_thumbnail"]: + e["data"] = ''.format(e["file_thumbnail"], HTMLReportMessaging.DEFAULT_TEXT['thumb_only'], e["media_type"]) + elif e["file_media"]: + if e["media_type"] == "STICKER": + e["data"] = ''.format(e["file_media"], e["media_type"]) + elif e["name"]: + e["data"] = '{}'.format(e["file_media"], e["name"]) + else: + e["data"] = '{}'.format(e["file_media"], e["media_type"]) + else: + pass + + # add additional fields and details + if e["caption"]: + if e["data"] == None: + e["data"] = "" + e["data"] = e["data"] + '
{0}'.format(e["caption"]) + if e["latitude"] and e["longitude"]: + e["data"] = e["data"] + '
Show on OpenStreetMap'.format(e["latitude"], e["longitude"]) + content["details"] = content.get("details", "") + '
{0}: {1}'.format(HTMLReportMessaging.DEFAULT_TEXT['latitude'], e["latitude"]) + content["details"] = content.get("details", "") + '
{0}: {1}'.format(HTMLReportMessaging.DEFAULT_TEXT['longitude'], e["longitude"]) + if e["url"]: + content["details"] = content.get("details", "") + '
{0}: {1}'.format(HTMLReportMessaging.DEFAULT_TEXT['url'], e["url"]) + if e["name"]: + content["details"] = content.get("details", "") + '
{0}: {1}'.format(HTMLReportMessaging.DEFAULT_TEXT['name'], e["name"]) + if e["size"]: + content["details"] = content.get("details", "") + '
{0}: {1} bytes'.format(HTMLReportMessaging.DEFAULT_TEXT['size'], e["size"]) + if e["duration"]: + content["details"] = content.get("details", "") + '
{0}: {1} {2}'.format(HTMLReportMessaging.DEFAULT_TEXT['duration'], e["duration"], HTMLReportMessaging.DEFAULT_TEXT['seconds']) + + # finally check for data, else show error + if e["data"]: + # let's show single emojis bigger + if len(e["data"]) == 1 and len(bytes(e["data"], 'utf-8')) >2: + e["data"] = '{}'.format(e["data"]) + else: + e["data"] = '{}'.format(HTMLReportMessaging.DEFAULT_TEXT['unknown_value']) + + content.update({k:e}) + + # copy files and set path to relative + elif k.startswith("file_"): + if k.startswith("file_avatar"): + input_file = e + if input_file == None: + input_file = HTMLReportMessaging.DEFAULT_FILES[k] + output_file = self.output_dir_avatar / input_file.name + if not output_file.exists(): + output_file.write_bytes(input_file.read_bytes()) + content.update({k:output_file.relative_to(self.project_directory)}) + else: + if e: + if k == "file_thumbnail": + output_file = self.output_dir_thumb / e.name + else: + output_file = self.output_dir_media / e.name + + if not output_file.exists(): + output_file.write_bytes(e.read_bytes()) + content.update({k:output_file.relative_to(self.project_directory)}) + else: + content.update({k:None}) + + elif k == "list_contacts": + if e: + for contact in e: + contact = contact.__dict__ + content.update({k:contact}) + else: + content.update({k:HTMLReport.DEFAULT_TEXT['unknown_value']}) + pass + # convert lists into
  • elements + elif k.startswith("list_"): + if e: + nl = "\n" + content.update({k:""}) + pass + else: + content.update({k:HTMLReport.DEFAULT_TEXT['unknown_value']}) + else: + content.update({k:data.get(k, HTMLReport.DEFAULT_TEXT['unknown_value'])}) + return content diff --git a/MobileRevelator/python/Library/javaobj.py b/MobileRevelator/python/Library/javaobj.py index ad2d1e1..e6bf590 100644 --- a/MobileRevelator/python/Library/javaobj.py +++ b/MobileRevelator/python/Library/javaobj.py @@ -41,11 +41,11 @@ import sys try: - # Python 2 - from StringIO import StringIO as BytesIO -except ImportError: # Python 3+ from io import BytesIO +except ImportError: + # Python 2 + from StringIO import StringIO as BytesIO try: import ftfy.bad_codecs @@ -75,7 +75,7 @@ def log_debug(message, ident=0): :param message: Message to log :param ident: Number of indentation spaces """ - _log.debug(" " * (ident * 2) + str(message)) + #_log.debug(" " * (ident * 2) + str(message)) def log_error(message, ident=0): @@ -85,7 +85,7 @@ def log_error(message, ident=0): :param message: Message to log :param ident: Number of indentation spaces """ - _log.error(" " * (ident * 2) + str(message)) + #_log.error(" " * (ident * 2) + str(message)) # ------------------------------------------------------------------------------ diff --git a/MobileRevelator/python/fs_whatsapp.py b/MobileRevelator/python/fs_whatsapp.py index aad7f2e..49a3cb2 100644 --- a/MobileRevelator/python/fs_whatsapp.py +++ b/MobileRevelator/python/fs_whatsapp.py @@ -76,7 +76,7 @@ def findwhatsapp(): def main(): error="" - script=ctx.gui_getpythonscriptpath()+"/WhatsApp/whatsapp_xtract.py" + script=ctx.gui_getpythonscriptpath()+"/whatsapp.py" ctx.gui_setMainLabel("Getting Python Path") python=GetPythonPath("python.exe") #Windows if (python is None): diff --git a/MobileRevelator/python/whatsapp.py b/MobileRevelator/python/whatsapp.py new file mode 100644 index 0000000..3deb176 --- /dev/null +++ b/MobileRevelator/python/whatsapp.py @@ -0,0 +1,1118 @@ +#!/usr/bin/python3 + +import sys, re, os, string, sqlite3, glob, base64, subprocess, io, shutil, gzip, zlib, collections +from pathlib import PurePath, Path +from xml.dom import minidom +from whatsapp_decrypt import decryptwhatsapp +from Library import javaobj +from Library.htmlreport import HTMLReportMessaging + +class WhatsApp: + PRODUCT = "WAPAF - WhatsApp Parser for Android Forensics" + VERSION = "v0.5 Alpha (2019-05-24)" + + """ + This class opens and parses an android whatsapp database and related files. + + WAPAF - WhatsApp Parser for Android Forensics + (c) 2019 by Björn Knorr + + Released under MIT licence + + Decryption of msgstore.db is supported via whatsapp_decrypt.py. + + The class will find and use media und other files. It's possible + to run the class by suppling just the msgstore.db. Decryption is in + this case supported if the "key" file lies next to the msgstore.db. + However you will not get any media files linked to chats etc. Embedded + thumbnails from msgstore.db will be extracted. + + It's recommended to supply the class with a full dump of the + "/data/com.whatsapp" and "/media/.../WhatsApp" (SD card) directory, + wich looks exacly like this: + + report + │ + ├── com.whatsapp <- folder from /data + │   ├── databases + │   │   ├── msgstore.db <- main database + │   │   └── wa.db  <- contact database + │   ├── files + │   │   ├── Avatars <- Avatar images + │   │   │   └── ... + │   │   └── key <- decryption key + │   └── shared_prefs + │   └── com.whatsapp_preferences.xml <- prefs file + └── WhatsApp <- folder from SD card or /media/0 + ├── Databases <- msgstore.db backup(s) + │   └── msgstore.db.crypt12 + └── Media <- sent & received files + └── ... + + If you parse some modded WhatsApp version like YoWhatsApp2, you must + rename the folders and file names like described above. + + Now the class can be called: + + whatsapp = WhatsApp("//com.whatsapp/databases/msgstore.db") + + or + + whatsapp = WhatsApp("//WhatsApp/Databases/msgstore.db.crypt12") + """ + + # default pathes and file names in app directory + path_root = None + path_data_root = Path("com.whatsapp") + path_data_databases = path_data_root / "databases" + path_data_files = path_data_root / "files" + path_data_cache = path_data_root / "cache" + path_data_sharedprefs = path_data_root / "shared_prefs" + + path_data_avatars = path_data_files / "Avatars" + path_data_avatars_big = path_data_cache / "Profile Pictures" + + # default pathes and file names in whatsapp directory + path_wa_root = Path("WhatsApp") + path_wa_databases = path_wa_root / "Databases" + path_wa_media = path_wa_root / "Media" + + # default file and dirnames + filename_preferences = "com.whatsapp_preferences.xml" + filename_key = "key" + filename_wadb = "wa.db" + dirname_thumbnail= "thumbnails" + + # default extensions for users and groups + JID_EXT_S = "@s.whatsapp.net" + JID_EXT_G = "@g.us" + + # mapping status id to content + MESSAGES_STATUS_MSG = 6 + + # mapping media_type id to content + MESSAGES_MEDIA_TYPE = collections.OrderedDict() + MESSAGES_MEDIA_TYPE.update({ + "TEXT": (0, ""), + "IMAGE": (1, ""), + "VOICE": (2, ""), + "VIDEO": (3, ""), + "CONTACT": (4, ""), + "GEO": (5, ""), + "ID6": (6, "-not implemented- media_type:6"), + "ID7": (7, "-not implemented- media_type:7"), + "CALL_MISSED": (8, "📱 Call"), + "ID9": (9, "-not implemented- media_type:9"), + "CALL_MISSED_SYSTEM": (10, ""), + "ID11": (11, "-not implemented- media_type:11"), + "ID12": (12, "-not implemented- media_type:12"), + "GIF": (13, ""), + "ID14": (14, "-not implemented- media_type:14"), + "DELETED": (15, "Message deleted by user."), + "ID16": (16, "-not implemented- media_type:16"), + "ID17": (17, "-not implemented- media_type:17"), + "ID18": (18, "-not implemented- media_type:18"), + "ID19": (19, "-not implemented- media_type:19"), + "STICKER": (20, "") + }) + + MT_ID2MSG = list(MESSAGES_MEDIA_TYPE.values()) + MT_ID2SC = list(MESSAGES_MEDIA_TYPE.keys()) + MT_SC2ID = MESSAGES_MEDIA_TYPE + + # mapping media_size id to messages + MESSAGES_EN = collections.OrderedDict() + MESSAGES_EN.update({ + "ID0": (0,"-not implemented- media_size:0 {}"), + "ID1": (1,"-not implemented- media_size:1 {}"), + "ID2": (2,"-not implemented- media_size:2 {}"), + "ID3": (3,"-not implemented- media_size:3 {}"), + "GROUP_JOIN": (4,"User {} joins the group."), + "GROUP_LEFT": (5,"User {} has left the group."), + "GROUP_PIC": (6,"User {} has changed the group picture."), + "ID7": (7,"-not implemented- media_size:7 {}"), + "ID8": (8,"-not implemented- media_size:8 {}"), + "ID9": (9,"-not implemented- media_size:9 {}"), + "GROUP_?": (10,"-not implemented- media_size:10 {}"), + "GROUP_CREATE": (11,"Chat-Group was created or changed from User {}"), + "GROUP_ADD": (12,"User {} joins the group. Initiated by {}"), + "ID13": (13,"-not implemented- media_size:13 {}"), + "GROUP_REMOVE": (14,"User {} was removed from group. Initiated by {}"), + "ID15": (15,"-not implemented- media_size:15"), + "ID16": (16,"-not implemented- media_size:16 {}"), + "ID17": (17,"-not implemented- media_size:17 {}"), + "ID18": (18,"-not implemented- media_size:18 {}"), + "CHAT_NEW": (19,"🔒 Messages you send to this chat and calls are now secured with end-to-end encryption. Tap for more info."), #19 + "GROUP_JOIN2": (20,"User {} joined group via invitaion link."), + "ID21": (21, "-not implemented- media_size:21 {}"), + "ID22": (22, "-not implemented- media_size:22 {}") + }) + + # default is english + MS_ID2MSG = list(MESSAGES_EN.values()) + MS_SC2ID = MESSAGES_EN + MS_ID2SC = list(MESSAGES_EN.keys()) + + def __init__(self, file_msgstore_db): + """ + Check and open whatsapp database and other files. + + Opens, decrypts and checks msgstore.db. The returned object will + have following attribues: + + - owner: a Contact object with the details of the owner + - contacts: a list of the contacts + - contact_objects: a dict of Contact objects + + :param file_msgstore_db: path to msgstore.db file + """ + self.info_output("WhatsApp Class {0}".format(WhatsApp.VERSION), 3) + self.info_output("Initiasisation ...", 3) + + # default values + self.owner_jid = "N/A" + self.owner_name = "Owner" + self.current = 0 + + # set all fileplaces to current directory of msgstore.db by default + self.file_msgstore_db = Path(file_msgstore_db) + self.file_wa_db = self.file_msgstore_db.parent / WhatsApp.filename_wadb + self.file_prefs = self.file_msgstore_db.parent / WhatsApp.filename_preferences + self.file_key = self.file_msgstore_db.parent / WhatsApp.filename_key + + # search for complete directory structure + try: + tmp_parrent = self.file_msgstore_db.parent.parent + # is msgstore.db in com.whatsapp/databases/ or in WhatsApp/Databases/ ? + if tmp_parrent.name == WhatsApp.path_data_root.name or tmp_parrent.name == WhatsApp.path_wa_root.name: + WhatsApp.path_root = tmp_parrent.parent + self.file_prefs = WhatsApp.path_root / WhatsApp.path_data_sharedprefs / WhatsApp.filename_preferences + self.file_key = WhatsApp.path_root / WhatsApp.path_data_files / WhatsApp.filename_key + self.file_wa_db = WhatsApp.path_root / WhatsApp.path_data_databases / WhatsApp.filename_wadb + else: + WhatsApp.path_root = self.file_msgstore_db.parent + raise RuntimeError('Database not in a sub directory path called "{0}"!'.format(WhatsApp.path_data_databases)) + except Exception as error: + self.info_output('Searching all needed files in current directory. {0}'.format(error), 1) + + # we need at least a valid database + try: + self.check_sqlite(self.file_msgstore_db) + except sqlite3.DatabaseError as error: + # maybe it's just encrypted, let's try to decrypt it *fingers crossed* + self.info_output('Can\'t open database file "{0}" Maybe it\'s encryted. {1}'.format(self.file_msgstore_db ,str(error)), 1) + try: + # write decrypted file to same location as the encrypted file + decryptedfile = self.file_msgstore_db.parent / str(self.file_msgstore_db.stem + ".decrypted.db") + decodedfile = decryptwhatsapp(self.file_msgstore_db, decryptedfile, self.file_key) + self.check_sqlite(decodedfile) + except: + raise + else: + self.file_msgstore_db = decodedfile + self.info_output("Decrypted file: '{0}'".format(self.file_msgstore_db)) + except: + raise + + # open db connection + self.db_msgstore = sqlite3.connect(os.fspath(self.file_msgstore_db)) + self.db_msgstore.row_factory = sqlite3.Row + self.db_msgstore.text_factory = lambda x: str(x.decode('utf_8_sig')) + + # check if we have a wa.db and open db + try: + self.check_sqlite(self.file_wa_db) + self.db_wa = sqlite3.connect(os.fspath(self.file_wa_db)) + self.db_wa.row_factory = sqlite3.Row + self.db_wa.text_factory = lambda x: str(x.decode('utf_8_sig')) + except: + self.info_output('No wa.db file in "{0}". This means we have no contact names and status.'.format(self.file_wa_db), 1) + self.file_wa_db = None + self.db_wa = None + + # get owner information from prefs + try: + self.prefs = self.parse_prefs(self.file_prefs) + except Exception as error: + self.info_output("No preference file. This means we have owner name or information. {0}".format(error), 1) + self.prefs = None + # fallback: try to get jid from msgstore.db + tmp_jid = self.find_own_id(self.file_msgstore_db) + if tmp_jid: + self.owner_jid = tmp_jid + else: + tmp_jid = self.prefs.get("registration_jid") + if not tmp_jid: + try: + tmp_jid = self.prefs.get("cc") + self.prefs.get("ph") + self.owner_jid = tmp_jid + WhatsApp.JID_EXT_S + except: + tmp_jid = self.find_own_id(self.file_msgstore_db) + + if not tmp_jid: + tmp_jid = "unkown" + else: + self.owner_jid = tmp_jid + WhatsApp.JID_EXT_S + self.owner_name = self.prefs.get("push_name", "") + " (Owner)" + + # preinit owner, and lists of contacts + self.init_contacts() + self.owner = self.Contact(platform_id=self.owner_jid, name=self.owner_name, owner=True) + self.contact_objects.append(self.owner) + + # we are done + self.info_output("... finished.", 3) + + def __iter__(self): + return self + + def __next__(self): + try: + item = self.contacts[self.current] + except IndexError: + self.current = 0 + raise StopIteration() + self.current += 1 + return item + + def check_sqlite(self, database_file): + """Check SQLite database for integrity.""" + + # prevent creation of new SQLite dabases by "opening" + if not database_file.exists(): + raise FileNotFoundError('[Error:] There is no file "{0}"'.format(database_file)) + + try: + db = sqlite3.connect(os.fspath(database_file)) + db_c = db.cursor() + # Get the integrity status. 'ok' means we're good + integrity_status = db_c.execute("PRAGMA integrity_check;").fetchone()[0] + db_c.close() + except: + raise + else: + if integrity_status != "ok": + raise RuntimeError('File "{0}" has integrity issues.'.format(database_file)) + + def find_own_id(self, file_msgstore_db=None): + jid = None + + # let's use the db file from our object + if not file_msgstore_db: + file_msgstore_db = self.file_msgstore_db + + try: + msgstore = sqlite3.connect(os.fspath(file_msgstore_db)) + msgstore.row_factory = sqlite3.Row + msgstore.text_factory = lambda x: str(x.decode('utf_8_sig')) + msg_cursor = msgstore.cursor() + + # own id is stored in remote_resource if owner joins group chat + msg_cursor.execute( + "SELECT remote_resource as jid FROM messages WHERE " + "key_from_me=1 AND status={0} AND media_size={1}".format( + WhatsApp.MESSAGES_STATUS_MSG, + WhatsApp.MS_SC2ID["GROUP_JOIN"][0])) + msg = msg_cursor.fetchone() + if msg: + jid = msg["jid"] + if jid: + return jid + self.info_output( + 'Found owner_id in "messages" table: "{0}"'.format(jid)) + + except Exception as error: + self.info_output( + 'Failure at fetching own user id from "messages" table in ' + '"{0}": {1}'.format(file_msgstore_db, str(error))) + + try: + # table messages stores own id in thumb_image if you where added to a group chat + msg_cursor.execute( + "SELECT thumb_image FROM messages WHERE key_from_me=1 " + "AND status={0} AND media_size={1}" + " AND media_duration=1".format( + WhatsApp.MESSAGES_STATUS_MSG, + WhatsApp.MS_SC2ID["GROUP_ADD"][0])) + + msg = msg_cursor.fetchone() + if msg: + jid = self.jid_from_thumb_image(msg["thumb_image"]) + self.info_output('Found owner_id in "messages" table: "{0}"'.format(jid)) + return jid + except Exception as error: + self.info_output('Failure at fetching own user id from "messages" table in "{0}": {1}'.format(file_msgstore_db, str(error))) + return None + + def parse_prefs(self, path_to_prefs_file): + """Parses xml preference files (whatsapp_preferences.xml)""" + preferences = {} + xmldoc = minidom.parse(os.fspath(path_to_prefs_file)) + cNodes = xmldoc.childNodes + for node in cNodes: + eList = node.getElementsByTagName("*") + for counter, e in enumerate(eList): + if e.hasAttribute("name"): + preferences[e.getAttribute("name")] = " ".join(t.nodeValue for t in e.childNodes if t.nodeType == t.TEXT_NODE) + self.info_output('Getting preferences from "{0}"'.format(path_to_prefs_file)) + self.info_output(' Found {0} preference(s).'.format(counter+1)) + return preferences + + def init_contacts(self): + """ + Parse wa.db (if available) or as fallback msgstore.db for a contact list + + :return: list of WhatsApp IDs + """ + + # default values + self.contact_objects = [] + contacts_set = set() + + # connect to wa.db if available + if self.db_wa: + try: + wa_cursor = self.db_wa.cursor() + self.info_output('Getting contacts from "{0}".'.format(self.file_wa_db)) + + # table wa_contacts stores all contacts + wa_cursor.execute("SELECT * FROM wa_contacts WHERE is_whatsapp_user=1 " + "ORDER BY wa_name") + wa = wa_cursor.fetchall() + source = str(self.file_wa_db.name) + ' - Table "wa_contacts"' + + # loop over contacts + for counter, w in enumerate(wa): + # skip if key_remote_jid is -1 + if w["jid"] == "-1": + continue + + # build Contact + cleartext_name = "" + if w["wa_name"]: + if w["display_name"]: + cleartext_name = w["display_name"] + " (" + w["wa_name"] +")" + else: + cleartext_name = w["wa_name"] + else: + if w["display_name"]: + cleartext_name = w["display_name"] + + contacts_set.add(w["jid"]) + self.contact_objects.append(self.Contact(platform_id=w["jid"], name=cleartext_name, status=w["status"], source=source , source_id=w["_id"])) + + self.info_output(" Found {0} contact(s).".format(counter+1)) + + except Exception as error: + self.info_output('Failure at fetching data from "wa_contacts" table in "{0}": '.format(self.file_wa_db) + str(error)) + + else: + #fallback: use msgstore as contact source + try: + contacts = [] + msg_cursor = self.db_msgstore.cursor() + self.info_output('Getting contacts from "{0}".'.format(self.file_msgstore_db)) + + # get jid from messages table + msg_cursor.execute("SELECT key_remote_jid as jid FROM messages GROUP BY key_remote_jid") + msg = msg_cursor.fetchall() + for c in msg: + contacts.append(c["jid"]) + except Exception as error: + self.info_output('Failure at fetching contacts from "messages" table in "{0}": '.format(self.file_msgstore_db) + str(error)) + # we have no contacts - this a serious error + raise + + try: + # table "jid" stores some more contacts - let's add them + msg_cursor.execute("SELECT raw_string as jid FROM jid") + msg = msg_cursor.fetchall() + for c in msg: + contacts.append(c["jid"]) + + # table group_participants stores contacts from groups - let's add them too + msg_cursor.execute("SELECT jid FROM group_participants GROUP by jid") + msg = msg_cursor.fetchall() + for c in msg: + contacts.append(c["jid"]) + except Exception as error: + self.info_output('Failure at fetching additional contacts from "messages" table in "{0}": '.format(self.file_msgstore_db) + str(error)) + + for counter, jid in enumerate(contacts): + # skip if key_remote_jid is -1 + if jid == "-1" or jid == "0@s.whatsapp.net": + continue + + # build Contact + contacts_set.add(jid) + self.contact_objects.append(self.Contact(platform_id=jid, source=str(self.file_msgstore_db.name))) + + self.info_output(" Found {0} contact(s).".format(counter+1)) + self.contacts = list(contacts_set) + + def get_contact(self, platform_id): + """ + Get details for a given platform_id as {dict}. If platform_id is unknown + you will get the requestet id back. This is by intention, so you can call this + function from reports etc. without worring. + + :param self: + :param platform_id: + :returns: found or empty Contact object + """ + for c in self.contact_objects: + if c.platform_id == platform_id: + return c + return self.Contact(platform_id) + + def get_chat(self, platform_id): + """ + Parse msgstore.db for a chat of platform_id. + + :param platform_id: WhatsApp ID + :return: a Chat object + """ + + # connect to msgstore.db + try: + msg_cursor = self.db_msgstore.cursor() + msg_cursor.execute( + "SELECT count(_id) as count_msg, " + "min(timestamp) as timestamp_start, " + "max(timestamp) as timestamp_end " + "FROM messages WHERE key_remote_jid='{0}'".format(platform_id)) + data = [dict(row) for row in msg_cursor.fetchall()][0] + self.info_output( + 'Get chat for {0}.'.format( + platform_id)) + except Exception as error: + self.info_output( + 'Failure at fetching data from "messages" table in "{0}": ' + .format(self.file_msgstore_db) + str(error)) + # this is a serious error + raise + + try: + # get attachment count + msg_cursor.execute( + "SELECT count(_id) as count_attach FROM messages WHERE " + "key_remote_jid='{0}' AND media_mime_type NOT NULL" + .format(platform_id)) + m2 = msg_cursor.fetchone() + data.update(m2) + + except Exception as error: + self.info_output( + 'Failure at fetching some details from "messages" table ' + 'in "{0}": '.format(file_msgstore_db) + str(error)) + + try: + # get id of last read message, subject & timstamp (groups) + msg_cursor.execute( + "SELECT last_read_message_table_id as last_read_id, " + "last_message_table_id as last_id, subject, creation " + "as timestamp_creation " + "FROM chat_list WHERE key_remote_jid='{0}'" + .format(platform_id)) + m2 = msg_cursor.fetchone() + if m2: + data.update(m2) + + except Exception as error: + self.info_output( + 'Failure at fetching some details from "chat_list" table ' + 'in "{0}": '.format(self.file_msgstore_db) + str(error)) + + # generate member_count and adminlist for groupchats + if WhatsApp.JID_EXT_G in platform_id: + list_admins = set() + + # table group_participants stores contacts & admins of groups + try: + msg_cursor.execute( + "SELECT jid, admin FROM group_participants WHERE " + "gjid='{0}'".format(platform_id)) + m = msg_cursor.fetchall() + for count, c in enumerate(m): + # TODO: check for admins if no 2 is correct + if c["admin"] == 2: + list_admins.add(c["jid"]) + # +1 for count starting from 0, +1 for owner of device + data.update(count_users = count + 2) + except Exception as error: + self.info_output( + 'Failure at fetching some details from ' + '"group_participants" table in "{0}": ' + .format(self.file_msgstore_db) + str(error)) + data.update(list_admins = list_admins) + data.update(memberlist = self.get_members(platform_id)) + else: + data.update(memberlist = None) + + # add extra data + data.update(source = str(self.file_msgstore_db.name) + + ' - tables "messages", "chat_list and "group_participants"') + data.update(platform_id = platform_id) + data.update(contact = self.get_contact(platform_id)) + + # add message generator + if data["count_msg"]: + data.update(messages = self.get_messages(platform_id)) + else: + data.update(messages = None) + + + self.info_output(" Found {0} message(s).".format(data["count_msg"])) + return self.Chat(**data) + + def get_messages(self, platform_id): + """ + Parses msgstore.db and returns a list of Message objects for a givne + platform_id. + + :param platform_id: WhatsApp ID + :return: generator object which gives back the messages + """ + + # connect to msgstore.db + try: + msg_cursor = self.db_msgstore.cursor() + msg_cursor.execute( + "SELECT * FROM messages " + "WHERE key_remote_jid='{0}' " + "ORDER BY timestamp ASC".format(platform_id)) + for counter, m in enumerate(msg_cursor): + self.info_output(" getting message: {0}".format(counter) ,2, end="\r") + yield self.Message( + source = self.file_msgstore_db.name, + source_id=m["_id"], + owner_jid=self.owner.platform_id, + key_from_me=m["key_from_me"], + key_remote_jid=m["key_remote_jid"], + media_wa_type=m["media_wa_type"], + data=m["data"], + thumb_image=m["thumb_image"], + remote_resource= m["remote_resource"], + timestamp=m["timestamp"], + received_timestamp=m["received_timestamp"], + send_timestamp=m["send_timestamp"], + status=m["status"], + media_size=m["media_size"], + media_name=m["media_name"], + media_caption=m["media_caption"], + media_hash=m["media_hash"], + media_url=m["media_url"], + media_duration=m["media_duration"], + latitude=m["latitude"], + longitude=m["longitude"], + media_enc_hash=m["media_enc_hash"], + thumbnail=m["key_id"] + ) + except Exception as error: + self.info_output( + 'Failure at fetching messages from "messages" table in "{0}":' + ' '.format(self.file_msgstore_db) + str(error)) + # this is a serious error + raise + + def get_members(self, platform_id): + """ + Parse msgstore.db for a memberlist in a chat of platform_id. + + :param platform_id: WhatsApp ID + :param file_msgstore_db: whatsapp database file + :return: a MessageList object + + notice: + - affected user id is in field thumb_image + - the action (user removed, user added, ... ) is in the field media_size + + obviously someone at whatsapp smoked pot... + """ + + # connect to msgstore.db + try: + msg_cursor = self.db_msgstore.cursor() + + except Exception as error: + self.info_output( + 'Failure at fetching data from "messages" table in "{0}": ' + .format(self.file_msgstore_db) + str(error)) + # this is a serious error + return None + + # generate memberlists + group_contacts = set() + try: + msg_cursor.execute( + "SELECT jid FROM group_participants WHERE " + "gjid='{0}'".format(platform_id)) + m = msg_cursor.fetchall() + for c in m: + group_contacts.add(c["jid"]) + + msg_cursor.execute( + "SELECT max(timestamp) as timestamp_end " + "FROM messages WHERE key_remote_jid='{0}'".format(platform_id)) + m = msg_cursor.fetchone() + timestamp_end = m["timestamp_end"] + + except Exception as error: + self.info_output( + 'Failure at fetching some memberlists from ' + '"group_participants" table in "{0}": ' + .format(self.file_msgstore_db) + str(error)) + return None + + # populate MemberList + memberlist = self.MemberList(platform_id) + try: + # initialise list with latest state from group_participants + #memberlist.add(timestamp_end, group_contacts) + + # fetch user changes from messages table + msg_cursor.execute( + "SELECT _id, media_size, timestamp, media_size, thumb_image, " + "remote_resource FROM messages WHERE status='{0}' " + "AND key_remote_jid='{1}' " + "ORDER BY timestamp DESC".format( + WhatsApp.MESSAGES_STATUS_MSG, platform_id)) + + # loop over messages in reverse order to find member changes backwards + for m2 in msg_cursor: + ms = m2["media_size"] + # add user on remove (we are coming timeline reverse order upwards) + if ms == WhatsApp.MS_SC2ID["GROUP_REMOVE"][0] \ + or ms == WhatsApp.MS_SC2ID["GROUP_LEFT"][0]: + if ms == WhatsApp.MS_SC2ID["GROUP_REMOVE"][0]: + user = self.jid_from_thumb_image(m2["thumb_image"]) + else: + user = m2["remote_resource"] + if user in group_contacts: + self.info_output( + 'Tried to add user "{0}" which is already' + ' in the chat "{1}". ID {2} in message table.'.format( + error, platform_id, m2["_id"])) + memberlist.add(m2["timestamp"], group_contacts) + group_contacts.add(user) + + #remove user + if ms == WhatsApp.MS_SC2ID["GROUP_ADD"][0] \ + or ms == WhatsApp.MS_SC2ID["GROUP_JOIN"][0] \ + or ms == WhatsApp.MS_SC2ID["GROUP_JOIN2"][0]: + try: + user = self.jid_from_thumb_image(m2["thumb_image"]) + if user: + memberlist.add(m2["timestamp"], group_contacts) + group_contacts.remove(user) + except KeyError: + self.info_output( + 'Tried to remove user "{0}" which is not' + ' in the chat "{1}". ID {2} in message table.'.format( + user, platform_id, m2["_id"])) + memberlist.add_backwards(m2["timestamp"], user) + + except Exception as error: + self.info_output('Creation of member list for "{0}" failed.' + ' List may be incomplete. {1}'.format(platform_id, error)) + + return memberlist + + + def dump_tumbnails(self): + self.info_output("Dumping thumbnails from database to filesystem ...", 3) + output_dir = WhatsApp.path_root / self.dirname_thumbnail + if output_dir.exists(): + self.info_output('Output directory "{0}" exists. Skipping thumbnail dump.'.format(output_dir)) + self.info_output("... finished.", 3) + return + + output_dir.mkdir(parents=True, exist_ok=True) + + # connect to msgstore.db + try: + msg_cursor = self.db_msgstore.cursor() + msg_cursor.execute( + "SELECT thumbnail, key_id FROM message_thumbnails") + except Exception as error: + self.info_output( + 'Failure at fetching thumbnails from "messages_thumbnails" table in "{0}": {1}' + ' '.format(self.file_msgstore_db, str(error))) + + for count, msg in enumerate(msg_cursor): + try: + output_file = output_dir / str(msg["key_id"]) + output_file.write_bytes(msg["thumbnail"]) + self.info_output('Thumbnail {0}: "{1}" '.format(count,msg["key_id"]), 2, "\r") + except Exception as error: + self.info_output( + 'Failure at writing thumbnail "{0}": {1}' + ' '.format(msg["key_id"], str(error))) + self.info_output("", 2) + self.info_output("... finished.", 3) + + @classmethod + def jid_from_thumb_image(cls, blob): + """ + Get jid from an Java Array Object + + """ + if blob: + try: + data = javaobj.loads(blob)[0].annotations[1] + except: + data = "" + return data + + def info_output(self, message, level=2, end="\n"): + levels = ["Error", "Warning", "Info"] + if level < 3: + print(" " + levels[level] + (7-len(levels[level]))*" "+ ": " + str(message), end=end) + else: + print("**** " + str(message) + " ****", end=end) + + + class Contact: + """Contact object to store contact information""" + + def __init__(self, platform_id, name=None, status=None, source=None, source_id=None, owner=None): + """ + Create a new Contact object. + + :param platform_id: WhatsApp-ID + :param name=None: name of contact + :param status=None: user status + :param source=None: source of data (database, etc.) + :param source_id=None: source id of data (id of entry in database) + """ + self.platform_id = platform_id + self.name = name + self.status = status + self.source = source + self.source_id = source_id + self.file_avatar_thumb = self.find_avatar(platform_id, "thumb") + self.file_avatar = self.find_avatar(platform_id) + + # find owners avatars + if owner: + self.file_avatar_thumb = self.find_avatar("me", "thumb") + file_avatar = WhatsApp.path_root / WhatsApp.path_data_files / "me.jpg" + if file_avatar.exists() and file_avatar.stat().st_size: + self.file_avatar = file_avatar + else: + self.file_avatar = None + + def find_avatar(self, platform_id, size=None): + """ + Scans kown places for avatar images. + + :param platform_id: WhatsApp-ID + :param size=None: size none means full image, "thumb" means thumbnail + :return: path to image file or None + """ + if size == "thumb": + test_pic = WhatsApp.path_root / WhatsApp.path_data_avatars / (str(platform_id) + ".j") + else: + id_no, *_ = str(platform_id).split('@') + test_pic = WhatsApp.path_root / WhatsApp.path_data_avatars_big / (str(id_no) + ".jpg") + + if test_pic.exists() and test_pic.stat().st_size: + path_avatar = test_pic + return path_avatar + else: + return None + + + class Chat: + """Chat object stores information for a chat session.""" + + def __init__(self, platform_id, count_msg=None, last_read_id=None, + last_id=None, timestamp_start=None, memberlist=None, + timestamp_end=None, count_attach=None, count_users=None, + list_admins=None, subject=None, messages=None, + timestamp_creation=None, source=None, contact=None): + + """ + Create a new Chat object. + + :param platform_id: WhatsApp ID + :param msg_count=None: number of messages in this chat + :param subject=None: group name (groups) + :param last_read_message_id=None: id of last read message + :param timestamp_creation: timestamp of chat creation (groups) + :param timestamp_start: timestamp of first message in chat + :param timestamp_end: timestamp of last message in chat + :param attach_count=None: number of attachments in this chat + :param user_count=None: number of users in this chat + :param admin_list: [list] of admin platform_ids (groups) + :param source: source of data (database, etc.) + """ + + self.platform_id = platform_id + self.contact_id = platform_id + if WhatsApp.JID_EXT_G in platform_id: + self.chat_type = "group" + else: + self.chat_type = "dialog" + self.count_msg = count_msg + self.subject = subject + self.last_read_id = last_read_id + self.timestamp_creation = timestamp_creation + self.timestamp_start = timestamp_start + self.timestamp_end = timestamp_end + self.count_attach = count_attach + if count_users == None: + self.count_users = 2 + else: + self.count_users = count_users + self.list_admins = list_admins + self.source = source + self.messages = messages + self.memberlist = memberlist + self.contact = contact + + + class Message: + """ + Stores a single message entry from msgstore.db -> messages + """ + def __init__(self, source_id, owner_jid, key_from_me, key_remote_jid, + data, thumb_image, remote_resource, timestamp, received_timestamp, + send_timestamp, status, media_wa_type, media_size, media_name, + media_caption, media_hash, media_duration, latitude, + longitude, media_enc_hash, thumbnail, source, media_url): + + self.platform_id = key_remote_jid + self.source_id = source_id + self.source = source + self.message_type = None + + # generate status message + if status == WhatsApp.MESSAGES_STATUS_MSG: + self.message_type = "status" + + # group messages + try: + sc = WhatsApp.MS_ID2SC[int(media_size)] + except IndexError: + sc = "ID" + str(media_size) + WhatsApp.info_output("Error unkown media_size field found, value: {0}", media_size) + + if sc.startswith("GROUP"): + # set media type to text + self.sender_id = key_remote_jid + self.receiver_id = owner_jid + + try: + if thumb_image: + affected_jid = WhatsApp.jid_from_thumb_image(thumb_image) + data = WhatsApp.MS_ID2MSG[media_size][1].format( + affected_jid, remote_resource) + else: + data = WhatsApp.MS_ID2MSG[media_size][1].format(remote_resource) + except: + data = "" + + media_size = None + + # calls + elif WhatsApp.MT_ID2SC[int(media_wa_type)].startswith("CALL"): + data = WhatsApp.MT_ID2MSG[int(media_wa_type)][1] + else: + try: + data = WhatsApp.MS_ID2MSG[int(media_size)][1] + except IndexError: + data = None + + # set sender and receiver + if key_from_me == 0: + if WhatsApp.JID_EXT_G in key_remote_jid: + self.sender_id = remote_resource + else: + self.sender_id = key_remote_jid + self.receiver_id = owner_jid + self.message_type = "incoming" if not self.message_type else self.message_type + else: + self.sender_id = owner_jid + self.receiver_id = key_remote_jid + self.message_type = "outgoing" if not self.message_type else self.message_type + + # timestamps + self.timestamp_message = timestamp + if received_timestamp >0: + self.timestamp_received = received_timestamp + if send_timestamp >0: + self.timestamp_sent = send_timestamp + + # create MessageContent + self.content_message = WhatsApp.MessageContent( + media_wa_type=media_wa_type, data=data, media_size=media_size, + media_name=media_name, media_caption=media_caption, + media_hash=media_hash, media_duration=media_duration, + latitude=latitude, longitude=longitude, media_url=media_url, + media_enc_hash=media_enc_hash, thumbnail=thumbnail, + thumb_image=thumb_image) + + + class MessageContent: + def __init__(self, media_wa_type, data, media_size, media_name, + media_caption, media_hash, media_duration, latitude, + longitude, media_enc_hash, thumbnail, thumb_image, + media_url): + + self.media_type = WhatsApp.MT_ID2SC[int(media_wa_type)] + self.size = media_size + self.caption = media_caption + self.name = media_name + self.url = media_url + self.file_thumbnail = self.find_thumbnail(thumbnail) + self.file_media = self.find_file(thumb_image) + self.data = data + self.duration = media_duration + self.latitude = latitude + self.longitude = longitude + + def find_file(self, thumb_image): + try: + if thumb_image: + media_file_name = javaobj.loads(thumb_image)[0].file.path + file_path = WhatsApp.path_root / WhatsApp.path_wa_root / media_file_name + if file_path.exists() and file_path.stat().st_size: + return file_path + else: + return None + except: + return None + + def find_thumbnail(self, thumbnail): + if thumbnail: + file_path = WhatsApp.path_root / WhatsApp.dirname_thumbnail / thumbnail + if file_path.exists() and file_path.stat().st_size: + return file_path + else: + return None + + + class MemberList: + """ + Stores a list of whatsapp ids of a group chat for a given timestamp + """ + + def __init__(self, platform_id): + self.timestamp_list = [] + self.contact_list = [] + self.platform_id = platform_id + self.current = 0 + + def __iter__(self): + return self + + def __next__(self): + try: + item = ( self.timestamp_list[self.current], self.contact_list[self.current]) + except IndexError: + self.current = 0 + raise StopIteration() + self.current += 1 + return item + + def add(self, timestamp, contactlist): + """ + Add list of Contacts for a timestamp + + :param timestamp: timestamp of list + :param contactlist: list of Contact objects + """ + if timestamp and contactlist: + self.timestamp_list.append(timestamp) + self.contact_list.append(contactlist.copy()) + + def add_backwards(self, timestamp, platform_id): + for counter, ts in enumerate(self.timestamp_list): + if ts >= timestamp: + cl = list(self.contact_list[counter].copy()) + cl.append(platform_id) + self.contact_list[counter] = set(cl) + + def get_state(self, timestamp=None): + """ + Find the last state of MemberList for a given timestamp) + + :param timestamp: timestamp (if empty get latest state) + :return: a list of jids + """ + + # get latest state + if not timestamp: + if self.contact_list: + return sorted(self.contact_list[0]) + else: + return None + + current_timelist = None + for counter, entry in enumerate(self.timestamp_list): + if counter == 0 and timestamp > entry: + # timestamp is higher as the highest entry + WhatsApp.info_output(self, + "Requested timestamp is higher then the highest in list.", 2) + return None + if timestamp == entry: + return sorted(self.contact_list[counter]) + elif timestamp < entry: + current_timelist = self.contact_list[counter] + continue + else: + return current_timelist + WhatsApp.info_output(self, + "Requested timestamp is lower then the lowest in list.", 2) + return None + + def get_allmembers(self): + """ + Generates a sorted list with all jids which where ever in group + + :return: a set with all jids + """ + if self.contact_list: + return sorted(set.union(*self.contact_list)) + else: + return None + +# main function +def main(argv): + """ + Function for running whatsapp.py class as standalone + + :param argv: params supplied by command line + """ + + from argparse import ArgumentParser + + # argument parser for options + parser = ArgumentParser(description= + 'Converts WhatsApp database and files into a HTML report.') + parser.add_argument(dest='infile', help="input 'msgstore.db' or " + "'msgstore.db.cryptXX' file to scan",nargs='?', + default='./report/com.whatsapp/databases/msgstore.db') + parser.add_argument('-o', '--outdir', dest='outdir', + help="optionally choose name of output directory",default='.') + options = parser.parse_args() + + # create WhatsApp object + whatsapp = WhatsApp(options.infile) + + # we need to dump the thumbnails + whatsapp.dump_tumbnails() + + + print('\nWhatsApp Account: {0} ({1})'.format(whatsapp.owner.name, + whatsapp.owner.platform_id)) + + output_dir = Path(options.outdir) / "wa-{0}".format( + whatsapp.file_msgstore_db.name) + + HTMLReportMessaging(output_dir, whatsapp) + +# run +if __name__ == '__main__': + main(sys.argv[1:]) \ No newline at end of file