Skip to content

Commit

Permalink
more logging to debug yt
Browse files Browse the repository at this point in the history
  • Loading branch information
shivanker committed Feb 3, 2025
1 parent 021e336 commit 9b3f0a5
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 4 deletions.
9 changes: 6 additions & 3 deletions session.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ def extract(text):
if match:
url = match.group(1)
if is_youtube_video(url):
logger.info(f"Fetching youtube transcript for {url}. Original text {text}")
logger.warning(
f"SHIV1 Fetching youtube transcript for {url}. Original text {text}"
)
return yt_transcript(url) or f"Failed to extract transcript for {url}."
logger.info(f"Reading text from [{url}]. Original text {text}")
return scrape_text(url) or f"Failed to scrape text from {url}."
Expand Down Expand Up @@ -338,8 +340,9 @@ def process_direct_message(self, text, logger):
messages = (
# [ChatMessage.from_system(self.system_instr)] + messages
# if not self.model.value.startswith("o1")
# else
[ChatMessage.from_user(self.system_instr)] + messages
# else
[ChatMessage.from_user(self.system_instr)]
+ messages
)
messages = [msg.to_openai_format() for msg in messages]
logger.debug(messages)
Expand Down
13 changes: 12 additions & 1 deletion ytsubs.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,32 @@ def extract_video_id(url):

def yt_transcript(url: str) -> Union[str, None]:
"""Function to fetch the transcript of a YouTube video, given the URL."""
logger.warning(f"SHIV1 Fetching youtube transcript for {url}")
cached_transcript = s3_cache.get_cache(CACHE_NAMESPACE, url)
if cached_transcript:
logger.warning(f"SHIV1 Found cached transcript for {url}")
return cached_transcript
try:
logger.warning(f"SHIV1 Extracting video id for {url}")
video_id = extract_video_id(url)
if video_id:
logger.warning(f"SHIV1 Found video id {video_id} for {url}")
transcript = YouTubeTranscriptApi.get_transcript(video_id)
if transcript:
logger.warning(
f"SHIV1 Found transcript for {url}, beginning with {transcript[:50]}"
)
transcript = " ".join(
f"[{segment['start']:.2f}] {segment['text']}"
for segment in transcript
)
s3_cache.set_cache(CACHE_NAMESPACE, url, transcript)
return transcript
else:
logger.warning(f"SHIV1 No transcript found for {url}")
else:
logger.warning(f"SHIV1 No video id found for {url}")
except Exception as e:
logger.error(f"Failed to extract transcript for [{url}].")
logger.error(f"SHIV1 Failed to extract transcript for {url}." + str(e))
return None
return "<empty>"

0 comments on commit 9b3f0a5

Please sign in to comment.