more logging to debug yt

shivanker · Feb 3, 2025 · 9b3f0a5 · 9b3f0a5
1 parent 021e336
commit 9b3f0a5
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 4 deletions.
diff --git a/session.py b/session.py
@@ -60,7 +60,9 @@ def extract(text):
     if match:
         url = match.group(1)
     if is_youtube_video(url):
-        logger.info(f"Fetching youtube transcript for {url}. Original text {text}")
+        logger.warning(
+            f"SHIV1 Fetching youtube transcript for {url}. Original text {text}"
+        )
         return yt_transcript(url) or f"Failed to extract transcript for {url}."
     logger.info(f"Reading text from [{url}]. Original text {text}")
     return scrape_text(url) or f"Failed to scrape text from {url}."
@@ -338,8 +340,9 @@ def process_direct_message(self, text, logger):
         messages = (
             # [ChatMessage.from_system(self.system_instr)] + messages
             # if not self.model.value.startswith("o1")
-            # else 
-            [ChatMessage.from_user(self.system_instr)] + messages
+            # else
+            [ChatMessage.from_user(self.system_instr)]
+            + messages
         )
         messages = [msg.to_openai_format() for msg in messages]
         logger.debug(messages)

diff --git a/ytsubs.py b/ytsubs.py
@@ -45,21 +45,32 @@ def extract_video_id(url):
 
 def yt_transcript(url: str) -> Union[str, None]:
     """Function to fetch the transcript of a YouTube video, given the URL."""
+    logger.warning(f"SHIV1 Fetching youtube transcript for {url}")
     cached_transcript = s3_cache.get_cache(CACHE_NAMESPACE, url)
     if cached_transcript:
+        logger.warning(f"SHIV1 Found cached transcript for {url}")
         return cached_transcript
     try:
+        logger.warning(f"SHIV1 Extracting video id for {url}")
         video_id = extract_video_id(url)
         if video_id:
+            logger.warning(f"SHIV1 Found video id {video_id} for {url}")
             transcript = YouTubeTranscriptApi.get_transcript(video_id)
             if transcript:
+                logger.warning(
+                    f"SHIV1 Found transcript for {url}, beginning with {transcript[:50]}"
+                )
                 transcript = " ".join(
                     f"[{segment['start']:.2f}] {segment['text']}"
                     for segment in transcript
                 )
                 s3_cache.set_cache(CACHE_NAMESPACE, url, transcript)
                 return transcript
+            else:
+                logger.warning(f"SHIV1 No transcript found for {url}")
+        else:
+            logger.warning(f"SHIV1 No video id found for {url}")
     except Exception as e:
-        logger.error(f"Failed to extract transcript for [{url}].")
+        logger.error(f"SHIV1 Failed to extract transcript for {url}." + str(e))
         return None
     return "<empty>"