Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

병렬화 및 Mp4 변환 #1

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
venv
.secrets
downloads
.idea
.idea
__pycache__
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python: Current File",
"type": "python",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}
100 changes: 77 additions & 23 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import requests
import os
import re
import shutil
import sys
import time
from pathlib import Path
from threading import Thread
from typing import List, Tuple
from utils import SecretsManager

import requests
import subprocess
from bs4 import BeautifulSoup

from models import *
import re
import shutil
from pathlib import Path
from utils import SecretsManager


class ETLDownloader:
Expand All @@ -26,8 +31,8 @@ def _get_soup(html):
def _get_tmp_dir(self):
return os.path.join(self.DOWNLOAD_PATH, self.selected_course.title, "tmp")

def _get_video_dir(self, video: Video, safe_filename=False):
filename = video.title + ".ts"
def _get_video_dir(self, video: Video, safe_filename=False, ext: str = "ts"):
filename = f"{video.title}.{ext}"
if safe_filename:
filename = filename.replace("/", "-")
return os.path.join(self.DOWNLOAD_PATH, self.selected_course.title, filename)
Expand Down Expand Up @@ -94,27 +99,48 @@ def _parse_stream_endpoint(self, url: str) -> Tuple[str, str]:
media_id = m2.group(1)
return endpoint, media_id

def download_vod(self, video: Video):
def get_last_index(self, endpoint: str, media_id: str) -> int:
index = 0
endpoint, media_id = self._parse_stream_endpoint(video.player_url)
video.media_id = media_id
directory = self._get_tmp_dir()
print(f"\t[*] {video.title} 다운로드 중.", end="")
while True:
chunk_url = f"{endpoint}/media_{media_id}_{index}.ts"
res = self.s.get(chunk_url)
res = self.s.head(chunk_url)
if res.status_code != 200:
print("")
break
Path(directory).mkdir(parents=True, exist_ok=True)
with open(
os.path.join(directory, f"{index}_{media_id}.ts"),
"wb",
) as f:
f.write(res.content)
return index
index += 1
print(".", end="")
video.num_files = index

def download_proc(self, endpoint: str, media_id: str, index: str, directory: str):
chunk_url = f"{endpoint}/media_{media_id}_{index}.ts"
res = self.s.get(chunk_url)
with open(
os.path.join(directory, f"{index}_{media_id}.ts"),
"wb",
) as f:
f.write(res.content)
print(".", end="", flush=True)
self.done_num += 1

def download_vod(self, video: Video):
endpoint, media_id = self._parse_stream_endpoint(video.player_url)
last_index = self.get_last_index(endpoint, media_id)

video.media_id, video.num_files = media_id, last_index + 1

directory = self._get_tmp_dir()
Path(directory).mkdir(parents=True, exist_ok=True)
print(f"\t[*] {video.title} 다운로드 중.", end="", flush=True)

self.done_num = 0
for index in range(video.num_files):
thread = Thread(
target=self.download_proc, args=(endpoint, media_id, index, directory)
)
thread.start()
time.sleep(0.05)

while self.done_num < video.num_files:
time.sleep(1)

print(flush=True)

def concat_files(self, video: Video):
directory = self._get_tmp_dir()
Expand All @@ -133,9 +159,37 @@ def _delete_tmp_folder(self):
except FileNotFoundError:
return

def convert_to_mp4(self, video):
print(f"\t[*] {video.title} 변환 시작")
infile = self._get_video_dir(video, safe_filename=True)
outfile = self._get_video_dir(video, safe_filename=True, ext="mp4")
subprocess.run(["ffmpeg", "-i", infile, outfile])
os.remove(infile)
print(f"\t[*] {video.title} 변환 완료")

def download_all_videos(self):
self._delete_tmp_folder()
videos = self.get_course_vods()

def download_video(video: Video):
if Path(self._get_video_dir(video, safe_filename=True, ext="mp4")).exists():
print(f"\t[*] {video.title}.mp4 파일이 이미 존재하므로 건너뜁니다.")
return
if Path(self._get_video_dir(video, safe_filename=True, ext="ts")).exists():
print(f"\t[*] {video.title}.ts 파일이 이미 존재하므로 mp4로 변환 후 건너뜁니다.")
self.convert_to_mp4(video)
return
self.download_vod(video)
self.concat_files(video)
self.convert_to_mp4(video)

for video in videos:
if "Lecture 11" in video.title or "digital" in video.title:
download_video(video)
# Thread(target=download_video, args=(video,)).start()
# time.sleep(2)

return
for video in videos:
if Path(self._get_video_dir(video, safe_filename=True)).exists():
print(f"\t[*] {video.title}.ts 파일이 이미 존재하므로 건너뜁니다.")
Expand Down