shp7724 · shp7724 · Oct 30, 2021 · Oct 30, 2021 · Mar 8, 2022 · Mar 8, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 venv
 .secrets
 downloads
-.idea
+.idea
+__pycache__
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
diff --git a/main.py b/main.py
@@ -1,13 +1,18 @@
-import requests
 import os
+import re
+import shutil
 import sys
+import time
+from pathlib import Path
+from threading import Thread
 from typing import List, Tuple
-from utils import SecretsManager
+
+import requests
+import subprocess
 from bs4 import BeautifulSoup
+
 from models import *
-import re
-import shutil
-from pathlib import Path
+from utils import SecretsManager
 
 
 class ETLDownloader:
@@ -26,8 +31,8 @@ def _get_soup(html):
     def _get_tmp_dir(self):
         return os.path.join(self.DOWNLOAD_PATH, self.selected_course.title, "tmp")
 
-    def _get_video_dir(self, video: Video, safe_filename=False):
-        filename = video.title + ".ts"
+    def _get_video_dir(self, video: Video, safe_filename=False, ext: str = "ts"):
+        filename = f"{video.title}.{ext}"
         if safe_filename:
             filename = filename.replace("/", "-")
         return os.path.join(self.DOWNLOAD_PATH, self.selected_course.title, filename)
@@ -94,27 +99,48 @@ def _parse_stream_endpoint(self, url: str) -> Tuple[str, str]:
         media_id = m2.group(1)
         return endpoint, media_id
 
-    def download_vod(self, video: Video):
+    def get_last_index(self, endpoint: str, media_id: str) -> int:
         index = 0
-        endpoint, media_id = self._parse_stream_endpoint(video.player_url)
-        video.media_id = media_id
-        directory = self._get_tmp_dir()
-        print(f"\t[*] {video.title} 다운로드 중.", end="")
         while True:
             chunk_url = f"{endpoint}/media_{media_id}_{index}.ts"
-            res = self.s.get(chunk_url)
+            res = self.s.head(chunk_url)
             if res.status_code != 200:
-                print("")
-                break
-            Path(directory).mkdir(parents=True, exist_ok=True)
-            with open(
-                os.path.join(directory, f"{index}_{media_id}.ts"),
-                "wb",
-            ) as f:
-                f.write(res.content)
+                return index
             index += 1
-            print(".", end="")
-        video.num_files = index
+
+    def download_proc(self, endpoint: str, media_id: str, index: str, directory: str):
+        chunk_url = f"{endpoint}/media_{media_id}_{index}.ts"
+        res = self.s.get(chunk_url)
+        with open(
+            os.path.join(directory, f"{index}_{media_id}.ts"),
+            "wb",
+        ) as f:
+            f.write(res.content)
+        print(".", end="", flush=True)
+        self.done_num += 1
+
+    def download_vod(self, video: Video):
+        endpoint, media_id = self._parse_stream_endpoint(video.player_url)
+        last_index = self.get_last_index(endpoint, media_id)
+
+        video.media_id, video.num_files = media_id, last_index + 1
+
+        directory = self._get_tmp_dir()
+        Path(directory).mkdir(parents=True, exist_ok=True)
+        print(f"\t[*] {video.title} 다운로드 중.", end="", flush=True)
+
+        self.done_num = 0
+        for index in range(video.num_files):
+            thread = Thread(
+                target=self.download_proc, args=(endpoint, media_id, index, directory)
+            )
+            thread.start()
+            time.sleep(0.05)
+
+        while self.done_num < video.num_files:
+            time.sleep(1)
+
+        print(flush=True)
 
     def concat_files(self, video: Video):
         directory = self._get_tmp_dir()
@@ -133,9 +159,37 @@ def _delete_tmp_folder(self):
         except FileNotFoundError:
             return
 
+    def convert_to_mp4(self, video):
+        print(f"\t[*] {video.title} 변환 시작")
+        infile = self._get_video_dir(video, safe_filename=True)
+        outfile = self._get_video_dir(video, safe_filename=True, ext="mp4")
+        subprocess.run(["ffmpeg", "-i", infile, outfile])
+        os.remove(infile)
+        print(f"\t[*] {video.title} 변환 완료")
+
     def download_all_videos(self):
         self._delete_tmp_folder()
         videos = self.get_course_vods()
+
+        def download_video(video: Video):
+            if Path(self._get_video_dir(video, safe_filename=True, ext="mp4")).exists():
+                print(f"\t[*] {video.title}.mp4 파일이 이미 존재하므로 건너뜁니다.")
+                return
+            if Path(self._get_video_dir(video, safe_filename=True, ext="ts")).exists():
+                print(f"\t[*] {video.title}.ts 파일이 이미 존재하므로 mp4로 변환 후 건너뜁니다.")
+                self.convert_to_mp4(video)
+                return
+            self.download_vod(video)
+            self.concat_files(video)
+            self.convert_to_mp4(video)
+
+        for video in videos:
+            if "Lecture 11" in video.title or "digital" in video.title:
+                download_video(video)
+            # Thread(target=download_video, args=(video,)).start()
+            # time.sleep(2)
+
+        return
         for video in videos:
             if Path(self._get_video_dir(video, safe_filename=True)).exists():
                 print(f"\t[*] {video.title}.ts 파일이 이미 존재하므로 건너뜁니다.")