Skip to content

Commit

Permalink
feat: add a European WhatsApp date format and support ngrok domains (v…
Browse files Browse the repository at this point in the history
  • Loading branch information
tnunamak authored Feb 13, 2024
1 parent 831d261 commit 49a6100
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ To launch Selfie, ensure that [python](https://www.python.org), [poetry](https:/

This starts a local web server and should launch the UI in your browser at http://localhost:8181. API documentation is available at http://localhost:8181/docs. Now that the server is running, you can use the API to import your data and connect to your LLM.

> Note: You can host selfie at a publicly-accessible URL with [ngrok](https://ngrok.com). Add your ngrok token in `selfie/.env` and run `poetry run python -m selfie --share`.
> Note: You can host selfie at a publicly-accessible URL with [ngrok](https://ngrok.com). Add your ngrok token (and optionally, ngrok domain) in `selfie/.env` and run `poetry run python -m selfie --share`.
### Step 1: Gather Messaging Data

Expand Down
3 changes: 2 additions & 1 deletion selfie/.env.example
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
PORT=8181
NGROK_AUTHTOKEN=<YOUR_NGROK_AUTHTOKEN> # Only needed if running using "share" flag
NGROK_AUTHTOKEN=<YOUR_NGROK_AUTHTOKEN> # Only needed if running using "share" flag.
NGROK_DOMAIN=<YOUR_NGROK_DOMAIN> # Optional static domain. You can create one at https://dashboard.ngrok.com/cloud-edge/domains.
3 changes: 2 additions & 1 deletion selfie/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,13 @@ def get_configured_app(shareable=False):
logging.getLogger("selfie").setLevel(level=logging.DEBUG)

ngrok_auth_token = os.environ.get('NGROK_AUTHTOKEN', None)
ngrok_domain = os.environ.get('NGROK_DOMAIN', None)

if shareable and args.share:
if ngrok_auth_token is None:
raise ValueError("NGROK_AUTHTOKEN environment variable is required to share the API. Visit https://dashboard.ngrok.com to get your token.")

listener = ngrok.forward(args.port, authtoken_from_env=True)
listener = ngrok.forward(args.port, authtoken_from_env=True, domain=ngrok_domain)
logger.info(f"Application is available at {listener.url()}")
# Update config directly as modifying args won't affect the env vars
os.environ['SELFIE_HOST'] = listener.url()
Expand Down
9 changes: 8 additions & 1 deletion selfie/parsers/chat/whatsapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,19 @@ class WhatsAppParser(TextBasedChatParser):
"regex": r"(?:\[)?(?P<timestamp>\d{1,2}-\d{1,2}-\d{2}, \d{1,2}:\d{2}:\d{2}\s(AM|PM))(?:\])? (?P<from>.+?): (?P<value>.+)",
"timestamp_format": "%Y-%m-%d, %I:%M:%S %p",
},
# Deprecated or unofficial formats
{
"regex": r"\[(?P<timestamp>\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2}:\d{2}\s(AM|PM))\] (?P<from>.+?): (?P<value>.+)",
"timestamp_format": "%m/%d/%y, %I:%M:%S %p",
},
# Deprecated or unofficial format (?)
{
"regex": r"(?P<timestamp>\d{1,2}/\d{1,2}/\d{2}, \d{1,2}:\d{2}(AM|PM)) - (?P<from>.+?): (?P<value>.+)",
"timestamp_format": "%m/%d/%y, %I:%M%p",
},
{
"regex": r"\[(?P<timestamp>\d{2}\.\d{2}\.\d{2}, \d{2}:\d{2}:\d{2})\] (?P<from>.+?): (?P<value>.+)",
"timestamp_format": "%d%m%y, %H:%M:%S",
},
]

DROP_LINES_LIKE = [
Expand All @@ -72,6 +76,9 @@ class WhatsAppParser(TextBasedChatParser):
{
"regex": r"(?:\[)?(?P<timestamp>\d{1,2}-\d{1,2}-\d{2}, \d{1,2}:\d{2}:\d{2}\s(AM|PM))(?:\])? (?P<value>.+)",
},
{
"regex": r"(?:\[)?(?P<timestamp>\d{2}\.\d{2}\.\d{2}, \d{2}:\d{2}:\d{2})(?:\])? (?P<value>.+)",
}
]

def _preprocess_hook(self, document: str) -> str:
Expand Down

0 comments on commit 49a6100

Please sign in to comment.