Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix long playlists (100+ videos) #1911

Merged
merged 11 commits into from
Mar 23, 2021
Merged
16 changes: 5 additions & 11 deletions spec/helpers_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,13 @@ describe "Helper" do
end
end

describe "#produce_playlist_url" do
it "correctly produces url for requesting index `x` of a playlist" do
produce_playlist_url("UUCla9fZca4I7KagBtgRGnOw", 0).should eq("/browse_ajax?continuation=4qmFsgIqEhpWTFVVQ2xhOWZaY2E0STdLYWdCdGdSR25PdxoMZWdaUVZEcERRVUU9&gl=US&hl=en")
describe "#produce_playlist_continuation" do
it "correctly produces ctoken for requesting index `x` of a playlist" do
produce_playlist_continuation("UUCla9fZca4I7KagBtgRGnOw", 100).should eq("4qmFsgJNEhpWTFVVQ2xhOWZaY2E0STdLYWdCdGdSR25PdxoUQ0FGNkJsQlVPa05IVVElM0QlM0SaAhhVVUNsYTlmWmNhNEk3S2FnQnRnUkduT3c%3D")

produce_playlist_url("UCCla9fZca4I7KagBtgRGnOw", 0).should eq("/browse_ajax?continuation=4qmFsgIqEhpWTFVVQ2xhOWZaY2E0STdLYWdCdGdSR25PdxoMZWdaUVZEcERRVUU9&gl=US&hl=en")
produce_playlist_continuation("UCCla9fZca4I7KagBtgRGnOw", 200).should eq("4qmFsgJLEhpWTFVVQ2xhOWZaY2E0STdLYWdCdGdSR25PdxoSQ0FKNkIxQlVPa05OWjBJJTNEmgIYVVVDbGE5ZlpjYTRJN0thZ0J0Z1JHbk93")

produce_playlist_url("PLt5AfwLFPxWLNVixpe1w3fi6lE2OTq0ET", 0).should eq("/browse_ajax?continuation=4qmFsgI0EiRWTFBMdDVBZndMRlB4V0xOVml4cGUxdzNmaTZsRTJPVHEwRVQaDGVnWlFWRHBEUVVFPQ%3D%3D&gl=US&hl=en")

produce_playlist_url("PLt5AfwLFPxWLNVixpe1w3fi6lE2OTq0ET", 10000).should eq("/browse_ajax?continuation=4qmFsgI0EiRWTFBMdDVBZndMRlB4V0xOVml4cGUxdzNmaTZsRTJPVHEwRVQaDGVnZFFWRHBEU2tKUA%3D%3D&gl=US&hl=en")

produce_playlist_url("PL55713C70BA91BD6E", 0).should eq("/browse_ajax?continuation=4qmFsgIkEhRWTFBMNTU3MTNDNzBCQTkxQkQ2RRoMZWdaUVZEcERRVUU9&gl=US&hl=en")

produce_playlist_url("PL55713C70BA91BD6E", 10000).should eq("/browse_ajax?continuation=4qmFsgIkEhRWTFBMNTU3MTNDNzBCQTkxQkQ2RRoMZWdkUVZEcERTa0pQ&gl=US&hl=en")
produce_playlist_continuation("PL55713C70BA91BD6E", 100).should eq("4qmFsgJBEhRWTFBMNTU3MTNDNzBCQTkxQkQ2RRoUQ0FGNkJsQlVPa05IVVElM0QlM0SaAhJQTDU1NzEzQzcwQkE5MUJENkU%3D")
end
end

Expand Down
47 changes: 16 additions & 31 deletions src/invidious/channels.cr
Original file line number Diff line number Diff line change
Expand Up @@ -229,18 +229,18 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
page = 1

LOGGER.trace("fetch_channel: #{ucid} : Downloading channel videos page")
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
response_body = get_channel_videos_response(ucid, page, auto_generated: auto_generated)

videos = [] of SearchVideo
begin
initial_data = JSON.parse(response.body)
initial_data = JSON.parse(response_body)
raise InfoException.new("Could not extract channel JSON") if !initial_data

LOGGER.trace("fetch_channel: #{ucid} : Extracting videos from channel videos page initial_data")
videos = extract_videos(initial_data.as_h, author, ucid)
rescue ex
if response.body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
response.body.includes?("https://www.google.com/sorry/index")
if response_body.includes?("To continue with your YouTube experience, please fill out the form below.") ||
response_body.includes?("https://www.google.com/sorry/index")
raise InfoException.new("Could not extract channel info. Instance is likely blocked.")
end
raise ex
Expand Down Expand Up @@ -304,8 +304,8 @@ def fetch_channel(ucid, db, pull_all_videos = true, locale = nil)
ids = [] of String

loop do
response = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
initial_data = JSON.parse(response.body)
response_body = get_channel_videos_response(ucid, page, auto_generated: auto_generated)
initial_data = JSON.parse(response_body)
raise InfoException.new("Could not extract channel JSON") if !initial_data
videos = extract_videos(initial_data.as_h, author, ucid)

Expand Down Expand Up @@ -447,6 +447,7 @@ def produce_channel_videos_continuation(ucid, page = 1, auto_generated = nil, so
return continuation
end

# Used in bypass_captcha_job.cr
def produce_channel_videos_url(ucid, page = 1, auto_generated = nil, sort_by = "newest", v2 = false)
continuation = produce_channel_videos_continuation(ucid, page, auto_generated, sort_by, v2)
return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
Expand Down Expand Up @@ -937,35 +938,19 @@ def get_about_info(ucid, locale)
})
end

def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest", youtubei_browse = true)
if youtubei_browse
continuation = produce_channel_videos_continuation(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
data = {
"context": {
"client": {
"clientName": "WEB",
"clientVersion": "2.20201021.03.00",
},
},
"continuation": continuation,
}.to_json
return YT_POOL.client &.post(
"/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8",
headers: HTTP::Headers{"content-type" => "application/json"},
body: data
)
else
url = produce_channel_videos_url(ucid, page, auto_generated: auto_generated, sort_by: sort_by, v2: true)
return YT_POOL.client &.get(url)
end
def get_channel_videos_response(ucid, page = 1, auto_generated = nil, sort_by = "newest")
continuation = produce_channel_videos_continuation(ucid, page,
auto_generated: auto_generated, sort_by: sort_by, v2: true)

return request_youtube_api_browse(continuation)
end

def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
videos = [] of SearchVideo

2.times do |i|
response = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
initial_data = JSON.parse(response.body)
response_json = get_channel_videos_response(ucid, page * 2 + (i - 1), auto_generated: auto_generated, sort_by: sort_by)
initial_data = JSON.parse(response_json)
break if !initial_data
videos.concat extract_videos(initial_data.as_h, author, ucid)
end
Expand All @@ -974,8 +959,8 @@ def get_60_videos(ucid, author, page, auto_generated, sort_by = "newest")
end

def get_latest_videos(ucid)
response = get_channel_videos_response(ucid)
initial_data = JSON.parse(response.body)
response_json = get_channel_videos_response(ucid)
initial_data = JSON.parse(response_json)
return [] of SearchVideo if !initial_data
author = initial_data["metadata"]?.try &.["channelMetadataRenderer"]?.try &.["title"]?.try &.as_s
items = extract_videos(initial_data.as_h, author, ucid)
Expand Down
31 changes: 31 additions & 0 deletions src/invidious/helpers/youtube_api.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#
# This file contains youtube API wrappers
#

# Hard-coded constants required by the API
HARDCODED_API_KEY = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"
HARDCODED_CLIENT_VERS = "2.20210318.08.00"

def request_youtube_api_browse(continuation)
# JSON Request data, required by the API
data = {
"context": {
"client": {
"hl": "en",
"gl": "US",
"clientName": "WEB",
"clientVersion": HARDCODED_CLIENT_VERS,
},
},
"continuation": continuation,
}

# Send the POST request and return result
response = YT_POOL.client &.post(
"/youtubei/v1/browse?key=#{HARDCODED_API_KEY}",
headers: HTTP::Headers{"content-type" => "application/json"},
body: data.to_json
)

return response.body
end
95 changes: 58 additions & 37 deletions src/invidious/playlists.cr
Original file line number Diff line number Diff line change
Expand Up @@ -307,23 +307,32 @@ def subscribe_playlist(db, user, playlist)
return playlist
end

def produce_playlist_url(id, index)
def produce_playlist_continuation(id, index)
if id.starts_with? "UC"
id = "UU" + id.lchop("UC")
end
plid = "VL" + id

# Emulate a "request counter" increment, to make perfectly valid
# ctokens, even if at the time of writing, it's ignored by youtube.
request_count = (index / 100).to_i64 || 1_i64

data = {"1:varint" => index.to_i64}
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i, padding: false) }

data_wrapper = {"1:varint" => request_count, "15:string" => "PT:#{data}"}
SamantazFox marked this conversation as resolved.
Show resolved Hide resolved
.try { |i| Protodec::Any.cast_json(i) }
.try { |i| Protodec::Any.from_json(i) }
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }

object = {
"80226972:embedded" => {
"2:string" => plid,
"3:base64" => {
"15:string" => "PT:#{data}",
},
"2:string" => plid,
"3:string" => data_wrapper,
"35:string" => id,
SamantazFox marked this conversation as resolved.
Show resolved Hide resolved
},
}

Expand All @@ -332,7 +341,7 @@ def produce_playlist_url(id, index)
.try { |i| Base64.urlsafe_encode(i) }
.try { |i| URI.encode_www_form(i) }

return "/browse_ajax?continuation=#{continuation}&gl=US&hl=en"
return continuation
end

def get_playlist(db, plid, locale, refresh = true, force_refresh = false)
Expand Down Expand Up @@ -427,47 +436,59 @@ def fetch_playlist(plid, locale)
end

def get_playlist_videos(db, playlist, offset, locale = nil, continuation = nil)
if playlist.is_a? InvidiousPlaylist
db.query_all("SELECT * FROM playlist_videos WHERE plid = $1 ORDER BY array_position($2, index) LIMIT 100 OFFSET $3", playlist.id, playlist.index, offset, as: PlaylistVideo)
else
fetch_playlist_videos(playlist.id, playlist.video_count, offset, locale, continuation)
end
end

def fetch_playlist_videos(plid, video_count, offset = 0, locale = nil, continuation = nil)
if continuation
response = YT_POOL.client &.get("/watch?v=#{continuation}&list=#{plid}&gl=US&hl=en")
initial_data = extract_initial_data(response.body)
offset = initial_data["currentVideoEndpoint"]?.try &.["watchEndpoint"]?.try &.["index"]?.try &.as_i64 || offset
end

if video_count > 100
url = produce_playlist_url(plid, offset)

response = YT_POOL.client &.get(url)
initial_data = JSON.parse(response.body).as_a.find(&.as_h.["response"]?).try &.as_h
elsif offset > 100
# Show empy playlist if requested page is out of range
if offset >= playlist.video_count
return [] of PlaylistVideo
else # Extract first page of videos
response = YT_POOL.client &.get("/playlist?list=#{plid}&gl=US&hl=en")
initial_data = extract_initial_data(response.body)
end

return [] of PlaylistVideo if !initial_data
videos = extract_playlist_videos(initial_data)
if playlist.is_a? InvidiousPlaylist
db.query_all("SELECT * FROM playlist_videos WHERE plid = $1 ORDER BY array_position($2, index) LIMIT 100 OFFSET $3",
playlist.id, playlist.index, offset, as: PlaylistVideo)
else
if offset >= 100
# Normalize offset to match youtube's behavior (100 videos chunck per request)
offset = (offset / 100).to_i64 * 100_i64

ctoken = produce_playlist_continuation(playlist.id, offset)
initial_data = JSON.parse(request_youtube_api_browse(ctoken)).as_h
else
response = YT_POOL.client &.get("/playlist?list=#{playlist.id}&gl=US&hl=en")
initial_data = extract_initial_data(response.body)
end

until videos.empty? || videos[0].index == offset
videos.shift
if initial_data
return extract_playlist_videos(initial_data)
else
return [] of PlaylistVideo
end
end

return videos
end

def extract_playlist_videos(initial_data : Hash(String, JSON::Any))
videos = [] of PlaylistVideo

(initial_data["contents"]?.try &.["twoColumnBrowseResultsRenderer"]["tabs"].as_a.select(&.["tabRenderer"]["selected"]?.try &.as_bool)[0]["tabRenderer"]["content"]["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"][0]["playlistVideoListRenderer"]["contents"].as_a ||
initial_data["response"]?.try &.["continuationContents"]["playlistVideoListContinuation"]["contents"].as_a).try &.each do |item|
if initial_data["contents"]?
tabs = initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
tabs_renderer = tabs.as_a.select(&.["tabRenderer"]["selected"]?.try &.as_bool)[0]["tabRenderer"]

# Watch out the two versions, with and without "s"
if tabs_renderer["contents"]? || tabs_renderer["content"]?
# Initial playlist data
tabs_contents = tabs_renderer.["contents"]? || tabs_renderer.["content"]

list_renderer = tabs_contents.["sectionListRenderer"]["contents"][0]
item_renderer = list_renderer.["itemSectionRenderer"]["contents"][0]
contents = item_renderer.["playlistVideoListRenderer"]["contents"].as_a
else
# Continuation data
contents = initial_data["onResponseReceivedActions"][0]?
.try &.["appendContinuationItemsAction"]["continuationItems"].as_a
end
else
contents = initial_data["response"]?.try &.["continuationContents"]["playlistVideoListContinuation"]["contents"].as_a
end

contents.try &.each do |item|
if i = item["playlistVideoRenderer"]?
video_id = i["navigationEndpoint"]["watchEndpoint"]["videoId"].as_s
plid = i["navigationEndpoint"]["watchEndpoint"]["playlistId"].as_s
Expand Down
9 changes: 8 additions & 1 deletion src/invidious/routes/playlists.cr
Original file line number Diff line number Diff line change
Expand Up @@ -433,14 +433,21 @@ class Invidious::Routes::Playlists < Invidious::Routes::BaseRoute
return error_template(500, ex)
end

page_count = (playlist.video_count / 100).to_i
page_count = 1 if page_count == 0

if page > page_count
return env.redirect "/playlist?list=#{plid}&page=#{page_count}"
end

if playlist.privacy == PlaylistPrivacy::Private && playlist.author != user.try &.email
return error_template(403, "This playlist is private.")
end

begin
videos = get_playlist_videos(PG_DB, playlist, offset: (page - 1) * 100, locale: locale)
rescue ex
videos = [] of PlaylistVideo
return error_template(500, "Error encountered while retrieving playlist videos.<br>#{ex.message}")
end

if playlist.author == user.try &.email
Expand Down
2 changes: 1 addition & 1 deletion src/invidious/views/playlist.ecr
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
</div>
<div class="pure-u-1 pure-u-lg-3-5"></div>
<div class="pure-u-1 pure-u-lg-1-5" style="text-align:right">
<% if videos.size == 100 %>
<% if page_count != 1 && page < page_count %>
<a href="/playlist?list=<%= playlist.id %>&page=<%= page + 1 %>">
<%= translate(locale, "Next page") %>
</a>
Expand Down