From 5c4edc8b081f2862b644eec8359970c7f92c55ca Mon Sep 17 00:00:00 2001 From: Volker Date: Sun, 3 Nov 2024 16:11:31 -0600 Subject: [PATCH 1/2] Add --since flag to only get recent messages --- slackviewer/main.py | 47 +++++++++++++++----------- slackviewer/message.py | 10 +++++- slackviewer/reader.py | 77 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 110 insertions(+), 24 deletions(-) diff --git a/slackviewer/main.py b/slackviewer/main.py index e29f911..a73f320 100644 --- a/slackviewer/main.py +++ b/slackviewer/main.py @@ -9,8 +9,9 @@ from slackviewer.reader import Reader from slackviewer.freezer import CustomFreezer from slackviewer.utils.click import envvar, flag_ennvar - -def configure_app(app, archive, channels, no_sidebar, no_external_references, debug): + + +def configure_app(app, archive, channels, no_sidebar, no_external_references, debug, since): app.debug = debug app.no_sidebar = no_sidebar app.no_external_references = no_external_references @@ -19,7 +20,7 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de app.config["PROPAGATE_EXCEPTIONS"] = True path = extract_archive(archive) - reader = Reader(path) + reader = Reader(path, debug, since) top = flask._app_ctx_stack top.path = path @@ -30,6 +31,12 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de top.mpims = reader.compile_mpim_messages() top.mpim_users = reader.compile_mpim_users() + # remove any empty channels & groups. DM's are needed for now + # since the application loads the first + top.channels = {k: v for k, v in top.channels.items() if v} + top.groups = {k: v for k, v in top.groups.items() if v} + + @click.command() @click.option('-p', '--port', default=envvar('SEV_PORT', '5000'), type=click.INT, help="Host port to serve your content on") @@ -57,32 +64,34 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de @click.option('--debug', is_flag=True, default=flag_ennvar("FLASK_DEBUG")) @click.option("-o", "--output-dir", default="html_output", type=click.Path(), help="Output directory for static HTML files.") -@click.option("--html-only", is_flag=True, default=False, +@click.option("--html-only", is_flag=True, default=False, help="If you want static HTML only, set this.") +@click.option("--since", default=None, type=click.DateTime(formats=["%Y-%m-%d"]), + help="Only show messages since this date.") def main( - port, - archive, - ip, - no_browser, - channels, - no_sidebar, - no_external_references, - test, - debug, - output_dir, - html_only - ): + port, + archive, + ip, + no_browser, + channels, + no_sidebar, + no_external_references, + test, + debug, + output_dir, + html_only, + since, +): if not archive: raise ValueError("Empty path provided for archive") - configure_app(app, archive, channels, no_sidebar, no_external_references, debug) + configure_app(app, archive, channels, no_sidebar, no_external_references, debug, since) if html_only: - # We need relative URLs, otherwise channel refs do not work app.config["FREEZER_RELATIVE_URLS"] = True - + # Custom subclass of Freezer allows overwriting the output directory freezer = CustomFreezer(app) freezer.cf_output_dir = output_dir diff --git a/slackviewer/message.py b/slackviewer/message.py index ba6acf7..f4060e8 100644 --- a/slackviewer/message.py +++ b/slackviewer/message.py @@ -4,6 +4,7 @@ import logging import emoji + class Message(object): _DEFAULT_USER_ICON_SIZE = 72 @@ -12,6 +13,13 @@ def __init__(self, formatter, message): self._formatter = formatter self._message = message + def __repr__(self): + message = self._message.get("text") + if message and len(message) > 20: + message = message[:20] + "..." + + return f"" + ############## # Properties # ############## @@ -116,7 +124,7 @@ def subtype(self): return self._message.get("subtype") -class LinkAttachment(object): +class LinkAttachment(): """ Wrapper class for entries in either the "files" or "attachments" arrays. """ diff --git a/slackviewer/reader.py b/slackviewer/reader.py index 8b7f6e4..cd8fae5 100644 --- a/slackviewer/reader.py +++ b/slackviewer/reader.py @@ -4,6 +4,8 @@ import io import json import os +import datetime +import sys from slackviewer.formatter import SlackFormatter from slackviewer.message import Message @@ -15,8 +17,10 @@ class Reader(object): Reader object will read all of the archives' data from the json files """ - def __init__(self, PATH): + def __init__(self, PATH, debug, since): self._PATH = PATH + self._debug = debug + self._since = since # TODO: Make sure this works with io.open(os.path.join(self._PATH, "users.json"), encoding="utf8") as f: self.__USER_DATA = {u["id"]: User(u) for u in json.load(f)} @@ -178,11 +182,11 @@ def _create_messages(self, names, data, isDms=False): for day in sorted(day_files): with io.open(os.path.join(self._PATH, day), encoding="utf8") as f: # loads all messages - day_messages = json.load(f) + day_messages = json.load(f) # sorts the messages in the json file - day_messages.sort(key=Reader._extract_time) - + day_messages.sort(key=Reader._extract_time) + messages.extend([Message(formatter, d) for d in day_messages]) chats[name] = messages @@ -261,6 +265,10 @@ def _build_threads(self, channel_data): if isinstance(item, Message): data_with_sorted_threads.append(item) channel_data[channel_name] = data_with_sorted_threads.copy() + + if self._since: + channel_data = self._message_filter_timeframe(channel_data.copy()) + return channel_data def _read_from_json(self, file): @@ -279,3 +287,64 @@ def _read_from_json(self, file): return {u["id"]: u for u in json.load(f)} except IOError: return {} + + def _message_filter_timeframe(self, channel_data): + """ + It might be more efficient to filter the messages in the thread sorting + loop. Yet, this is a more straightforward approach, especially factoring + in the thread/non-thread message ids etc. + + Messages & threads need to be provided in a sorted form + """ + for channel in channel_data.keys(): + messages_in_thread = [] + last_thread_message_in_timeframe = False + delete_messages = [] + + for location, message in enumerate(channel_data[channel]): + is_msg_in_timeframe = self._message_in_timeframe(message) + msg_text = message._message.get('text') + + # Message can be empty + if not msg_text: + is_thread_msg = False + else: + is_thread_msg = msg_text.startswith("**Thread Reply:**") + + # new main message + if not is_thread_msg: + if not last_thread_message_in_timeframe: + delete_messages.extend(messages_in_thread) + messages_in_thread = [location] + # Thread message + else: + if last_thread_message_in_timeframe and not is_msg_in_timeframe: + print("ERROR: This should never happen. sorting is broken...") + sys.exit(1) + + messages_in_thread.append(location) + + last_thread_message_in_timeframe = is_msg_in_timeframe + + # Last thread/message... + if not last_thread_message_in_timeframe: + delete_messages.extend(messages_in_thread) + + # Remove all messages that are not in the timeframe + for loc in sorted(delete_messages, reverse=True): + del channel_data[channel][loc] + + return channel_data + + + def _message_in_timeframe(self, msg): + """ + Returns true if message timestamp is older as since + """ + if not self._since: + return True + + ts = msg._message.get('ts') + ts_obj = datetime.datetime.fromtimestamp(float(ts)) + + return self._since < ts_obj From 404ee38e93ec06b8aa4cad16767623baafc48920 Mon Sep 17 00:00:00 2001 From: Volker Date: Sun, 3 Nov 2024 16:33:49 -0600 Subject: [PATCH 2/2] Support --since flag for the CLI --- slackviewer/archive.py | 9 ++++++--- slackviewer/cli.py | 12 +++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/slackviewer/archive.py b/slackviewer/archive.py index 0cf0f73..5b2da65 100644 --- a/slackviewer/archive.py +++ b/slackviewer/archive.py @@ -124,9 +124,12 @@ def get_export_info(archive_name): extracted_path = extract_archive(archive_name) base_filename = basename(archive_name) (noext_filename, _) = splitext(base_filename) - # Typical extract name: "My Friends and Family Slack export Jul 21 2018 - Sep 06 2018" - # If that's not the format, we will just fall back to the extension-free filename. - (workspace_name, _) = noext_filename.split(" Slack export ", 1) + workspace_name = base_filename + # In case the archive is a zip file + if not os.path.isdir(extracted_path): + # Typical extract name: "My Friends and Family Slack export Jul 21 2018 - Sep 06 2018" + # If that's not the format, we will just fall back to the extension-free filename. + (workspace_name, _) = noext_filename.split(" Slack export ", 1) return { "readable_path": extracted_path, "basename": base_filename, diff --git a/slackviewer/cli.py b/slackviewer/cli.py index 951fc61..47d99fe 100644 --- a/slackviewer/cli.py +++ b/slackviewer/cli.py @@ -33,12 +33,16 @@ def clean(wet): @cli.command(help="Generates a single-file printable export for an archive file or directory") +@click.option('--debug', is_flag=True, default=flag_ennvar("FLASK_DEBUG")) +@click.option("--since", default=None, type=click.DateTime(formats=["%Y-%m-%d"]), + help="Only show messages since this date.") @click.argument('archive_dir') -def export(archive_dir): + +def export(archive_dir, debug, since): css = pkgutil.get_data('slackviewer', 'static/viewer.css').decode('utf-8') tmpl = Environment(loader=PackageLoader('slackviewer')).get_template("export_single.html") export_file_info = get_export_info(archive_dir) - r = Reader(export_file_info["readable_path"]) + r = Reader(export_file_info["readable_path"], debug, since) channel_list = sorted( [{"channel_name": k, "messages": v} for (k, v) in r.compile_channels().items()], key=lambda d: d["channel_name"] @@ -51,5 +55,7 @@ def export(archive_dir): source_file=export_file_info["basename"], channels=channel_list ) - with open(export_file_info['stripped_name'] + '.html', 'w') as outfile: + with open(export_file_info['stripped_name'] + '.html', 'wb') as outfile: outfile.write(html.encode('utf-8')) + + print("Exported to {}.html".format(export_file_info['stripped_name']))