Skip to content

Commit

Permalink
extract urls correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
shivanker committed Jan 31, 2025
1 parent 30650c0 commit 290d937
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions session.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from multiprocessing import process
import re
import os
import time
from typing import Any
Expand Down Expand Up @@ -54,11 +55,15 @@ def check_mimetype(url) -> str:


def extract(text):
if is_youtube_video(text):
logger.debug(f"Fetching youtube transcript for [{text}].")
return yt_transcript(text)
logger.debug(f"Reading text from [{text}].")
return scrape_text(text)
url = text.strip()
match = re.search(r"\[<([^|>]+)\|[^>]+>\]", url)
if match:
url = match.group(1)
if is_youtube_video(url):
logger.debug(f"Fetching youtube transcript for [{url}].")
return yt_transcript(url) or f"Failed to extract transcript for {url}."
logger.debug(f"Reading text from [{url}].")
return scrape_text(url) or f"Failed to scrape text from {url}."


class ChatSession:
Expand Down Expand Up @@ -287,7 +292,7 @@ def process_command(self, text, say=lambda text: None):
say(text="Streaming mode disabled.")
elif cmd.startswith("\\extract "):
if say:
say(text=(extract(cmd[8:].strip()) or "None"))
say(text=(extract(cmd[8:]) or "None"))
elif cmd == "\\help":
say(
f"""
Expand Down

0 comments on commit 290d937

Please sign in to comment.