Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add --since flag to only get recent messages #202

Merged
merged 2 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions slackviewer/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,12 @@ def get_export_info(archive_name):
extracted_path = extract_archive(archive_name)
base_filename = basename(archive_name)
(noext_filename, _) = splitext(base_filename)
# Typical extract name: "My Friends and Family Slack export Jul 21 2018 - Sep 06 2018"
# If that's not the format, we will just fall back to the extension-free filename.
(workspace_name, _) = noext_filename.split(" Slack export ", 1)
workspace_name = base_filename
# In case the archive is a zip file
if not os.path.isdir(extracted_path):
# Typical extract name: "My Friends and Family Slack export Jul 21 2018 - Sep 06 2018"
# If that's not the format, we will just fall back to the extension-free filename.
(workspace_name, _) = noext_filename.split(" Slack export ", 1)
return {
"readable_path": extracted_path,
"basename": base_filename,
Expand Down
12 changes: 9 additions & 3 deletions slackviewer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,16 @@ def clean(wet):


@cli.command(help="Generates a single-file printable export for an archive file or directory")
@click.option('--debug', is_flag=True, default=flag_ennvar("FLASK_DEBUG"))
@click.option("--since", default=None, type=click.DateTime(formats=["%Y-%m-%d"]),
help="Only show messages since this date.")
@click.argument('archive_dir')
def export(archive_dir):

def export(archive_dir, debug, since):
css = pkgutil.get_data('slackviewer', 'static/viewer.css').decode('utf-8')
tmpl = Environment(loader=PackageLoader('slackviewer')).get_template("export_single.html")
export_file_info = get_export_info(archive_dir)
r = Reader(export_file_info["readable_path"])
r = Reader(export_file_info["readable_path"], debug, since)
channel_list = sorted(
[{"channel_name": k, "messages": v} for (k, v) in r.compile_channels().items()],
key=lambda d: d["channel_name"]
Expand All @@ -51,5 +55,7 @@ def export(archive_dir):
source_file=export_file_info["basename"],
channels=channel_list
)
with open(export_file_info['stripped_name'] + '.html', 'w') as outfile:
with open(export_file_info['stripped_name'] + '.html', 'wb') as outfile:
outfile.write(html.encode('utf-8'))

print("Exported to {}.html".format(export_file_info['stripped_name']))
47 changes: 28 additions & 19 deletions slackviewer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
from slackviewer.reader import Reader
from slackviewer.freezer import CustomFreezer
from slackviewer.utils.click import envvar, flag_ennvar

def configure_app(app, archive, channels, no_sidebar, no_external_references, debug):


def configure_app(app, archive, channels, no_sidebar, no_external_references, debug, since):
app.debug = debug
app.no_sidebar = no_sidebar
app.no_external_references = no_external_references
Expand All @@ -19,7 +20,7 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de
app.config["PROPAGATE_EXCEPTIONS"] = True

path = extract_archive(archive)
reader = Reader(path)
reader = Reader(path, debug, since)

top = flask._app_ctx_stack
top.path = path
Expand All @@ -30,6 +31,12 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de
top.mpims = reader.compile_mpim_messages()
top.mpim_users = reader.compile_mpim_users()

# remove any empty channels & groups. DM's are needed for now
# since the application loads the first
top.channels = {k: v for k, v in top.channels.items() if v}
top.groups = {k: v for k, v in top.groups.items() if v}


@click.command()
@click.option('-p', '--port', default=envvar('SEV_PORT', '5000'),
type=click.INT, help="Host port to serve your content on")
Expand Down Expand Up @@ -57,32 +64,34 @@ def configure_app(app, archive, channels, no_sidebar, no_external_references, de
@click.option('--debug', is_flag=True, default=flag_ennvar("FLASK_DEBUG"))
@click.option("-o", "--output-dir", default="html_output", type=click.Path(),
help="Output directory for static HTML files.")
@click.option("--html-only", is_flag=True, default=False,
@click.option("--html-only", is_flag=True, default=False,
help="If you want static HTML only, set this.")
@click.option("--since", default=None, type=click.DateTime(formats=["%Y-%m-%d"]),
help="Only show messages since this date.")

def main(
port,
archive,
ip,
no_browser,
channels,
no_sidebar,
no_external_references,
test,
debug,
output_dir,
html_only
):
port,
archive,
ip,
no_browser,
channels,
no_sidebar,
no_external_references,
test,
debug,
output_dir,
html_only,
since,
):
if not archive:
raise ValueError("Empty path provided for archive")

configure_app(app, archive, channels, no_sidebar, no_external_references, debug)
configure_app(app, archive, channels, no_sidebar, no_external_references, debug, since)

if html_only:

# We need relative URLs, otherwise channel refs do not work
app.config["FREEZER_RELATIVE_URLS"] = True

# Custom subclass of Freezer allows overwriting the output directory
freezer = CustomFreezer(app)
freezer.cf_output_dir = output_dir
Expand Down
10 changes: 9 additions & 1 deletion slackviewer/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import emoji


class Message(object):

_DEFAULT_USER_ICON_SIZE = 72
Expand All @@ -12,6 +13,13 @@ def __init__(self, formatter, message):
self._formatter = formatter
self._message = message

def __repr__(self):
message = self._message.get("text")
if message and len(message) > 20:
message = message[:20] + "..."

return f"<Message({self.username}@{self.time}: {message})>"

##############
# Properties #
##############
Expand Down Expand Up @@ -116,7 +124,7 @@ def subtype(self):
return self._message.get("subtype")


class LinkAttachment(object):
class LinkAttachment():
"""
Wrapper class for entries in either the "files" or "attachments" arrays.
"""
Expand Down
77 changes: 73 additions & 4 deletions slackviewer/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import io
import json
import os
import datetime
import sys

from slackviewer.formatter import SlackFormatter
from slackviewer.message import Message
Expand All @@ -15,8 +17,10 @@ class Reader(object):
Reader object will read all of the archives' data from the json files
"""

def __init__(self, PATH):
def __init__(self, PATH, debug, since):
self._PATH = PATH
self._debug = debug
self._since = since
# TODO: Make sure this works
with io.open(os.path.join(self._PATH, "users.json"), encoding="utf8") as f:
self.__USER_DATA = {u["id"]: User(u) for u in json.load(f)}
Expand Down Expand Up @@ -178,11 +182,11 @@ def _create_messages(self, names, data, isDms=False):
for day in sorted(day_files):
with io.open(os.path.join(self._PATH, day), encoding="utf8") as f:
# loads all messages
day_messages = json.load(f)
day_messages = json.load(f)

# sorts the messages in the json file
day_messages.sort(key=Reader._extract_time)
day_messages.sort(key=Reader._extract_time)

messages.extend([Message(formatter, d) for d in day_messages])

chats[name] = messages
Expand Down Expand Up @@ -261,6 +265,10 @@ def _build_threads(self, channel_data):
if isinstance(item, Message):
data_with_sorted_threads.append(item)
channel_data[channel_name] = data_with_sorted_threads.copy()

if self._since:
channel_data = self._message_filter_timeframe(channel_data.copy())

return channel_data

def _read_from_json(self, file):
Expand All @@ -279,3 +287,64 @@ def _read_from_json(self, file):
return {u["id"]: u for u in json.load(f)}
except IOError:
return {}

def _message_filter_timeframe(self, channel_data):
"""
It might be more efficient to filter the messages in the thread sorting
loop. Yet, this is a more straightforward approach, especially factoring
in the thread/non-thread message ids etc.

Messages & threads need to be provided in a sorted form
"""
for channel in channel_data.keys():
messages_in_thread = []
last_thread_message_in_timeframe = False
delete_messages = []

for location, message in enumerate(channel_data[channel]):
is_msg_in_timeframe = self._message_in_timeframe(message)
msg_text = message._message.get('text')

# Message can be empty
if not msg_text:
is_thread_msg = False
else:
is_thread_msg = msg_text.startswith("**Thread Reply:**")

# new main message
if not is_thread_msg:
if not last_thread_message_in_timeframe:
delete_messages.extend(messages_in_thread)
messages_in_thread = [location]
# Thread message
else:
if last_thread_message_in_timeframe and not is_msg_in_timeframe:
print("ERROR: This should never happen. sorting is broken...")
sys.exit(1)

messages_in_thread.append(location)

last_thread_message_in_timeframe = is_msg_in_timeframe

# Last thread/message...
if not last_thread_message_in_timeframe:
delete_messages.extend(messages_in_thread)

# Remove all messages that are not in the timeframe
for loc in sorted(delete_messages, reverse=True):
del channel_data[channel][loc]

return channel_data


def _message_in_timeframe(self, msg):
"""
Returns true if message timestamp is older as since
"""
if not self._since:
return True

ts = msg._message.get('ts')
ts_obj = datetime.datetime.fromtimestamp(float(ts))

return self._since < ts_obj
Loading