-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
169 changed files
with
7,684 additions
and
1,624 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
|
||
# Function to fetch video list from YouTube API and save a single youtube.json file | ||
function Get-YoutubePublicChannelVideos { | ||
param ( | ||
[string]$channelId, | ||
[string]$apiKey | ||
) | ||
|
||
Write-Host "Getting Video List for $channelId" | ||
$nextPageToken = $null | ||
$page = 1; | ||
$allVideosData = @() | ||
|
||
do { | ||
# YouTube API endpoint to get videos from a channel, including nextPageToken | ||
$searchApiUrl = "https://www.googleapis.com/youtube/v3/search?key=$apiKey&part=snippet&channelId=$channelId&type=video&maxResults=$maxResults&pageToken=$nextPageToken" | ||
|
||
# Fetch video list | ||
$searchResponse = Invoke-RestMethod -Uri $searchApiUrl -Method Get | ||
Write-Host " Parsing Page $page with $($searchResponse.items.Count) videos and etag: $($searchResponse.etag)" | ||
$allVideosData += $searchResponse.items | ||
|
||
# Get the nextPageToken to continue fetching more videos | ||
$nextPageToken = $searchResponse.nextPageToken | ||
$page++ | ||
} while ($nextPageToken) | ||
Write-Host " Found $($allVideosData.Count) videos" | ||
return $allVideosData; | ||
} | ||
|
||
# Function to test if a file is older than a specified number of hours | ||
function Test-FileAge { | ||
param ( | ||
[Parameter(Mandatory = $true)] | ||
[string]$filePath, | ||
[Parameter(Mandatory = $true)] | ||
[int]$hours | ||
) | ||
|
||
if (-not (Test-Path -Path $filePath)) { | ||
# File doesn't exist, consider it old | ||
return $true | ||
} | ||
|
||
$fileInfo = Get-Item -Path $filePath | ||
$lastWriteTime = $fileInfo.LastWriteTime | ||
$timeDifference = (Get-Date) - $lastWriteTime | ||
|
||
return $timeDifference.TotalHours -ge $hours | ||
} | ||
|
||
# Function to update data.json for a single video | ||
function Get-YoutubeVideoData { | ||
param ( | ||
[Parameter(Mandatory = $true)] | ||
[string]$videoId | ||
) | ||
|
||
# Ensure API key is defined | ||
if (-not $apiKey) { | ||
Write-Host "API Key is missing. Please set the API Key." -ForegroundColor Red | ||
return $null | ||
} | ||
|
||
# Ensure videoId is valid | ||
if (-not $videoId) { | ||
Write-Host "Invalid videoId provided." -ForegroundColor Red | ||
return $null | ||
} | ||
|
||
Write-Host "Working on Data for: $videoId" -ForegroundColor Green | ||
$videoDetailsUrl = "https://www.googleapis.com/youtube/v3/videos?key=$apiKey&id=$videoId&part=snippet,contentDetails" | ||
|
||
try { | ||
$videoDetails = Invoke-RestMethod -Uri $videoDetailsUrl -Method Get -ErrorAction Stop | ||
if ($null -eq $videoDetails -or $null -eq $videoDetails.items -or $videoDetails.items.Count -eq 0) { | ||
Write-Host "No data found for video: $videoId" -ForegroundColor Yellow | ||
return $null | ||
} | ||
|
||
$videoData = $videoDetails.items[0] | ||
|
||
Write-Host "Data found for video: $videoId" -ForegroundColor Green | ||
return $videoData | ||
} | ||
catch { | ||
Write-Host "Error fetching data for video: $videoId" -ForegroundColor Red | ||
Write-Host $_.Exception.Message -ForegroundColor Red | ||
return $null | ||
} | ||
} | ||
|
||
# Function to get captions data for a single video | ||
function Get-YouTubeCaptionsData { | ||
param ( | ||
[Parameter(Mandatory = $true)] | ||
[string]$videoId | ||
) | ||
|
||
# Ensure API key is defined | ||
if (-not $apiKey) { | ||
Write-Host "API Key is missing. Please set the API Key." -ForegroundColor Red | ||
return $null | ||
} | ||
|
||
# Ensure videoId is valid | ||
if (-not $videoId) { | ||
Write-Host "Invalid videoId provided." -ForegroundColor Red | ||
return $null | ||
} | ||
|
||
Write-Host "Getting caption data for: $videoId" -ForegroundColor Green | ||
$captionsUrl = "https://www.googleapis.com/youtube/v3/captions?key=$apiKey&videoId=$videoId&part=snippet" | ||
|
||
try { | ||
# Get captions for the video | ||
$captionsResponse = Invoke-RestMethod -Uri $captionsUrl -Method Get -ErrorAction Stop | ||
$captionsData = @() | ||
|
||
if ($null -ne $captionsResponse -and $null -ne $captionsResponse.items -and $captionsResponse.items.Count -gt 0) { | ||
foreach ($caption in $captionsResponse.items) { | ||
$captionsData += @{ | ||
"captionId" = $caption.id | ||
"language" = $caption.snippet.language | ||
"trackKind" = $caption.snippet.trackKind | ||
"isDraft" = $caption.snippet.isDraft | ||
"status" = $caption.snippet.status | ||
"lastUpdated" = $caption.snippet.lastUpdated | ||
} | ||
} | ||
} | ||
else { | ||
Write-Host "No captions found for video: $videoId" -ForegroundColor Yellow | ||
} | ||
|
||
return $captionsData | ||
} | ||
catch { | ||
Write-Host "Error fetching captions for video: $videoId" -ForegroundColor Red | ||
Write-Host $_.Exception.Message -ForegroundColor Red | ||
return $null | ||
} | ||
} | ||
|
||
|
||
Write-Host "YoutubeAPI.ps1 loaded" -ForegroundColor Green |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,109 +1,156 @@ | ||
Write-Host "Running v2" | ||
# Helpers | ||
. ./.powershell/_includes/YoutubeAPI.ps1 | ||
|
||
# Define variables | ||
Write-Host "Running v3" | ||
|
||
$apiKey = $env:YOUTUBE_API_KEY | ||
$channelId = "UCkYqhFNmhCzkefHsHS652hw" | ||
$outputDir = "site\content\resources\videos\youtube" | ||
$dataDirectory = ".\site\data" | ||
$refreshData = $false | ||
|
||
$maxResults = 800 | ||
|
||
# Create the output directory if it doesn't exist | ||
if (-not (Test-Path $outputDir)) { | ||
New-Item -Path $outputDir -ItemType Directory | ||
} | ||
|
||
# Function to fetch video list from YouTube API and save a single youtube.json file | ||
function Fetch-YoutubeVideoList { | ||
param () | ||
|
||
$nextPageToken = $null | ||
$page = 1; | ||
$allVideosData = @() | ||
# Function to get captions for a video | ||
function Get-YouTubeCaptions { | ||
param ( | ||
[Parameter(Mandatory = $true)] | ||
[string]$videoId, | ||
[string]$accessToken | ||
) | ||
|
||
$captionsApiUrl = "https://www.googleapis.com/youtube/v3/captions?part=id,snippet&videoId=$videoId" | ||
$headers = @{"Authorization" = "Bearer $accessToken" } | ||
|
||
$response = Invoke-RestMethod -Uri $captionsApiUrl -Headers $headers -Method Get | ||
return $response.items | ||
} | ||
|
||
do { | ||
# YouTube API endpoint to get videos from a channel, including nextPageToken | ||
$searchApiUrl = "https://www.googleapis.com/youtube/v3/search?key=$apiKey&part=snippet&channelId=$channelId&type=video&maxResults=$maxResults&pageToken=$nextPageToken" | ||
# Function to download a caption file with a check if $captionContent is empty | ||
function Get-YouTubeCaption { | ||
param ( | ||
[Parameter(Mandatory = $true)] | ||
[string]$captionId, | ||
[Parameter(Mandatory = $true)] | ||
[string]$accessToken | ||
) | ||
|
||
# Fetch video list | ||
$searchResponse = Invoke-RestMethod -Uri $searchApiUrl -Method Get | ||
# Specify the format as SRT by adding 'tfmt=srt' to the URL | ||
$downloadUrl = "https://www.googleapis.com/youtube/v3/captions/$captionId/?tfmt=srt" | ||
$headers = @{"Authorization" = "Bearer $accessToken" } | ||
|
||
$allVideosData += $searchResponse.items | ||
# Use Invoke-WebRequest for binary or non-JSON/XML responses | ||
$response = Invoke-WebRequest -Uri $downloadUrl -Headers $headers -Method Get | ||
|
||
# Get the nextPageToken to continue fetching more videos | ||
$nextPageToken = $searchResponse.nextPageToken | ||
$page++ | ||
} while ($nextPageToken) | ||
return $response.Content | ||
} | ||
|
||
# Define variables | ||
$channelId = "UCkYqhFNmhCzkefHsHS652hw" | ||
$outputDir = "site\content\resources\videos\youtube" | ||
$dataDirectory = ".\site\data" | ||
$refreshData = $false | ||
$captionsDownloadLimit = 0 | ||
$videoUpdateLimit = 10 | ||
$captionsManafestUpdateLimit = 10 | ||
|
||
# 0. Get Youtube Video List | ||
$dataFilePath = Join-Path $dataDirectory "youtube.json" | ||
if (Test-FileAge -filePath $dataFilePath -hours 3) { | ||
$allVideosData = Get-YoutubePublicChannelVideos -channelId $channelId -apiKey $env:YOUTUBE_API_KEY # Call this to fetch video list and save to youtube.json | ||
# Save all video data to a single youtube.json file | ||
$dataFilePath = Join-Path $dataDirectory "youtube.json" | ||
|
||
$allVideosData | ConvertTo-Json -Depth 10 | Set-Content -Path $dataFilePath | ||
|
||
Write-Host "All video data saved to youtube.json." | ||
Write-Host "$dataFilePath saved with $($allVideosData.Count) videos." -ForegroundColor Green | ||
} | ||
else { | ||
Write-Host "$dataFilePath is up to date." -ForegroundColor Yellow | ||
} | ||
|
||
# Function to update data.json for a single video | ||
function Update-YoutubeDataFile { | ||
param ( | ||
[string]$videoId | ||
) | ||
|
||
$videoUpdateCount = 0 | ||
$captionsManafestUpdateCount = 0 | ||
$captionsDownloadCount = 0 | ||
foreach ($video in $allVideosData) { | ||
|
||
Write-Host "Processing $($video.id.videoId)" -ForegroundColor Green | ||
|
||
$videoId = $video.id.videoId | ||
# Create the directory named after the video ID | ||
$videoDir = Join-Path $outputDir $videoId | ||
if (-not (Test-Path $videoDir)) { | ||
New-Item -Path $videoDir -ItemType Directory | ||
} | ||
|
||
# File path for data.json | ||
$jsonFilePath = Join-Path $videoDir "data.json" | ||
if ($videoId -eq "xo4jMxupIM0") { | ||
Write-Host "Updating data.json for video: $videoId" | ||
# 1. Get Youtube Video Data | ||
$jsonFilePathVideos = Join-Path $videoDir "data.json" | ||
if ($refreshData -or -not (Test-Path $jsonFilePathVideos)) { | ||
if ($videoUpdateCount -lt $videoUpdateLimit) { | ||
# Call the function to update the data for a single video | ||
$videoData = Get-YoutubeVideoData -videoId $videoId | ||
# Save updated video data to data.json | ||
if ($videoData) { | ||
$videoData | ConvertTo-Json -Depth 10 | Set-Content -Path $jsonFilePathVideos | ||
Write-Host " Updated data.json for video: $videoId" | ||
$videoUpdateCount++; | ||
} | ||
} | ||
else { | ||
Write-Host " Reached video update limit of $videoUpdateLimit. skipping." | ||
} | ||
} | ||
# Only update if $refreshData is true or data.json doesn't exist | ||
if ($refreshData -or -not (Test-Path $jsonFilePath)) { | ||
# Fetch full video details from YouTube API | ||
$videoDetailsUrl = "https://www.googleapis.com/youtube/v3/videos?key=$apiKey&id=$videoId&part=snippet,contentDetails" | ||
$videoDetails = Invoke-RestMethod -Uri $videoDetailsUrl -Method Get | ||
$videoData = $videoDetails.items[0] | ||
|
||
if ($videoData) { | ||
|
||
# 2. Get Youtube Captions List | ||
$jsonFilePathCaptions = Join-Path $videoDir "data.captions.json" | ||
if ($refreshData -or -not (Test-Path $jsonFilePathCaptions)) { | ||
if ($captionsManafestUpdateCount -lt $captionsManafestUpdateLimit) { | ||
# Call the function to update the data for a single video | ||
$captionListData = Get-YouTubeCaptionsData -videoId $videoId | ||
# Save updated video data to data.json | ||
$videoData | ConvertTo-Json -Depth 10 | Set-Content -Path $jsonFilePath | ||
Write-Host "Updated data.json for video: $videoId" | ||
if ($captionListData) { | ||
$captionListData | ConvertTo-Json -Depth 10 | Set-Content -Path $jsonFilePathCaptions | ||
Write-Host " Updated data.captions.json for video: $videoId" | ||
$captionsManafestUpdateCount++; | ||
} | ||
} | ||
else { | ||
Write-Host "No data found for video: $videoId" | ||
Write-Host " Reached capations manafest update limit of $captionsManafestUpdateLimit. skipping." | ||
} | ||
|
||
} | ||
|
||
# 3. Download Captions | ||
if (Test-Path $jsonFilePathCaptions) { | ||
$captionsData = Get-Content -Path $jsonFilePathCaptions | ConvertFrom-Json | ||
foreach ($caption in $captionsData) { | ||
$captionId = $caption.captionId | ||
$language = $caption.language | ||
$captionsFileName = "data.captions.$language.srt" | ||
$captionFilePath = Join-Path $videoDir $captionsFileName | ||
if (-not (Test-Path $captionFilePath)) { | ||
if ($captionsDownloadCount -lt $captionsDownloadLimit) { | ||
$captionData = Get-YouTubeCaption -captionId $captionId -accessToken $env:GOOGLE_ACCESS_TOKEN | ||
$captionData | Set-Content -Path $captionFilePath | ||
Write-Host " Updated $captionsFileName for video: $videoId" | ||
$captionsDownloadCount++ | ||
} | ||
else { | ||
Write-Host " Reached capations download limit of $captionsDownloadLimit. skipping." | ||
} | ||
|
||
} | ||
} | ||
|
||
} | ||
else { | ||
Write-Host "Data for video $videoId is already up to date." | ||
Write-Host " No caption list data manafest. skipping." | ||
} | ||
} | ||
|
||
# Function to iterate through youtube.json and update data.json for each video | ||
function Update-YoutubeDataFilesFromJson { | ||
param () | ||
|
||
$dataFilePath = Join-Path $dataDirectory "youtube.json" | ||
if (-not (Test-Path $dataFilePath)) { | ||
Write-Host "youtube.json file not found. Please run Fetch-YoutubeVideoList first." | ||
return | ||
} | ||
|
||
# Load video list from youtube.json | ||
$allVideosData = Get-Content -Path $dataFilePath | ConvertFrom-Json | ||
|
||
foreach ($video in $allVideosData) { | ||
$videoId = $video.id.videoId | ||
|
||
# Call the function to update the data for a single video | ||
Update-YoutubeDataFile -videoId $videoId | ||
} | ||
# Update-YoutubeDataFilesFromJson # Call this to update data.json files from youtube.json | ||
|
||
Write-Host "All video data files updated from youtube.json." | ||
} | ||
# # Set a limit for the number of transcripts to download | ||
|
||
#Fetch-YoutubeVideoList # Call this to fetch video list and save to youtube.json | ||
Update-YoutubeDataFilesFromJson # Call this to update data.json files from youtube.json | ||
# Download-AllYouTubeCaptions -accessToken $env:GOOGLE_ACCESS_TOKEN |
File renamed without changes.
Oops, something went wrong.