Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YouTube] Fix buffering by decoding n parameter of stream urls #683

Merged
merged 13 commits into from
Jul 28, 2021
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package org.schabi.newpipe.extractor.services.youtube;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.utils.Parser;

import javax.annotation.Nonnull;

/**
* Youtube restricts streaming their media in multiple ways by requiring clients to apply a cipher function
XiangRongLin marked this conversation as resolved.
Show resolved Hide resolved
* on parameters of requests.
* The cipher function is sent alongside as a JavaScript function.
* <p>
* This class handling fetching the JavaScript file in order to allow other classes to extract the needed functions.
*/
public class YoutubeJavascriptExtractor {

private static final String HTTPS = "https:";
private static String cachedJavascriptCode;

/**
* Extracts the JavaScript file. The result is cached, so subsequent calls use the result of previous calls.
*
* @param videoId Does not influence the result, but a valid video id can prevent tracking
* @return The whole javascript file as a string.
* @throws ParsingException If the extraction failed.
*/
@Nonnull
public static String extractJavascriptCode(String videoId) throws ParsingException {
if (cachedJavascriptCode == null) {
final YoutubeJavascriptExtractor extractor = new YoutubeJavascriptExtractor();
TobiGr marked this conversation as resolved.
Show resolved Hide resolved
String playerJsUrl = extractor.cleanJavascriptUrl(extractor.extractJavascriptUrl(videoId));
XiangRongLin marked this conversation as resolved.
Show resolved Hide resolved
cachedJavascriptCode = extractor.downloadJavascriptCode(playerJsUrl);
}

return cachedJavascriptCode;
}

/**
* Same as {@link YoutubeJavascriptExtractor#extractJavascriptCode(String)} but with a constant value for videoId.
* Possible because the videoId has no influence on the result.
*
* For tracking avoidance purposes it may make sense to pass in valid video ids.
*/
@Nonnull
public static String extractJavascriptCode() throws ParsingException {
return extractJavascriptCode("d4IGg5dqeO8");
}

private String extractJavascriptUrl(String videoId) throws ParsingException {
try {
final String embedUrl = "https://www.youtube.com/embed/" + videoId;
final String embedPageContent = NewPipe.getDownloader()
.get(embedUrl, Localization.DEFAULT).responseBody();

try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
return Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (final Parser.RegexException ex) {
// playerJsUrl is still available in the file, just somewhere else TODO
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (final Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
return elem.attr("src");
}
}
}

} catch (final Exception i) {
throw new ParsingException("Embedded info did not provide YouTube player js url");
}
throw new ParsingException("Embedded info did not provide YouTube player js url");
}

private String cleanJavascriptUrl(String playerJsUrl) {
if (playerJsUrl.startsWith("//")) {
return HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
// sometimes https://www.youtube.com part has to be added manually
return HTTPS + "//www.youtube.com" + playerJsUrl;
} else {
return playerJsUrl;
}
}

private String downloadJavascriptCode(String playerJsUrl) throws ParsingException {
try {
return NewPipe.getDownloader().get(playerJsUrl, Localization.DEFAULT).responseBody();
} catch (Exception e) {
throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package org.schabi.newpipe.extractor.services.youtube;

import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.Javascript;
import org.schabi.newpipe.extractor.utils.Parser;

import java.util.regex.Pattern;

/**
* <p>
* YouTube's media is protected with a cipher, which modifies the "n" query parameter of it's video playback urls.
* This class handles extracting that "n" query parameter, applying the cipher on it and returning the resulting url
* which is not throttled.
* </p>
*
* <p>
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other
* </p>
* becomes
* <p>
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other
* </p>
*/
public class YoutubeThrottlingDecrypter {

private static final String N_PARAM_REGEX = "[&?]n=([^&]+)";

private final String functionName;
private final String function;

/**
* <p>
* Use this if you care about the off chance that YouTube tracks with which videoId the cipher is requested.
* </p>
* Otherwise use the no-arg constructor which uses a constant value.
*/
public YoutubeThrottlingDecrypter(String videoId) throws ParsingException {
final String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode(videoId);

functionName = parseDecodeFunctionName(playerJsCode);
function = parseDecodeFunction(playerJsCode, functionName);
}

public YoutubeThrottlingDecrypter() throws ParsingException {
final String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode();

functionName = parseDecodeFunctionName(playerJsCode);
function = parseDecodeFunction(playerJsCode, functionName);
}

private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException {
Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
return Parser.matchGroup1(pattern, playerJsCode);
}

private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException {
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
}

public String apply(String url) throws Parser.RegexException {
if (containsNParam(url)) {
String oldNParam = parseNParam(url);
String newNParam = decryptNParam(oldNParam);
return replaceNParam(url, oldNParam, newNParam);
} else {
return url;
}
}

private boolean containsNParam(String url) {
return Parser.isMatch(N_PARAM_REGEX, url);
}

private String parseNParam(String url) throws Parser.RegexException {
Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX);
return Parser.matchGroup1(nValuePattern, url);
}

private String decryptNParam(String nParam) {
Javascript javascript = new Javascript();
return javascript.run(function, functionName, nParam);
}

private String replaceNParam(String url, String oldValue, String newValue) {
return url.replace(oldValue, newValue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
import org.schabi.newpipe.extractor.services.youtube.ItagItem;
import org.schabi.newpipe.extractor.services.youtube.YoutubeJavascriptExtractor;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecrypter;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.*;
import org.schabi.newpipe.extractor.utils.JsonUtils;
Expand Down Expand Up @@ -79,13 +81,10 @@ public static class DeobfuscateException extends ParsingException {

@Nullable
private static String cachedDeobfuscationCode = null;
@Nullable
private String playerJsUrl = null;

private JsonArray initialAjaxJson;
private JsonObject initialData;
@Nonnull
private final Map<String, String> videoInfoPage = new HashMap<>();
private JsonArray initialAjaxJson;
private JsonObject initialData;
private JsonObject playerResponse;
private JsonObject videoPrimaryInfoRenderer;
private JsonObject videoSecondaryInfoRenderer;
Expand Down Expand Up @@ -525,11 +524,15 @@ public List<AudioStream> getAudioStreams() throws ExtractionException {
public List<VideoStream> getVideoStreams() throws ExtractionException {
assertPageFetched();
final List<VideoStream> videoStreams = new ArrayList<>();
YoutubeThrottlingDecrypter throttlingDecrypter = new YoutubeThrottlingDecrypter(getId());
XiangRongLin marked this conversation as resolved.
Show resolved Hide resolved

try {
for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) {
final ItagItem itag = entry.getValue();
final VideoStream videoStream = new VideoStream(entry.getKey(), false, itag);
String url = entry.getKey();
url = throttlingDecrypter.apply(url);

final VideoStream videoStream = new VideoStream(url, false, itag);
if (!Stream.containSimilarStream(videoStream, videoStreams)) {
videoStreams.add(videoStream);
}
Expand Down Expand Up @@ -797,38 +800,6 @@ private void fetchVideoInfoPage() throws ParsingException, ReCaptchaException, I
}
}

@Nonnull
private String getEmbeddedInfoStsAndStorePlayerJsUrl() {
try {
final String embedUrl = "https://www.youtube.com/embed/" + getId();
final String embedPageContent = NewPipe.getDownloader()
.get(embedUrl, getExtractorLocalization()).responseBody();

try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (final Parser.RegexException ex) {
// playerJsUrl is still available in the file, just somewhere else TODO
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (final Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
playerJsUrl = elem.attr("src");
break;
}
}
}

// Get embed sts
return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent);
} catch (final Exception i) {
// if it fails we simply reply with no sts as then it does not seem to be necessary
return "";
}
}

private String getDeobfuscationFuncName(final String playerCode) throws DeobfuscateException {
Parser.RegexException exception = null;
for (final String regex : REGEXES) {
Expand All @@ -843,11 +814,10 @@ private String getDeobfuscationFuncName(final String playerCode) throws Deobfusc
throw new DeobfuscateException("Could not find deobfuscate function with any of the given patterns.", exception);
}

private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
private String loadDeobfuscationCode()
throws DeobfuscateException {
try {
final String playerCode = NewPipe.getDownloader()
.get(playerJsUrl, getExtractorLocalization()).responseBody();
final String playerCode = YoutubeJavascriptExtractor.extractJavascriptCode(getId());
final String deobfuscationFunctionName = getDeobfuscationFuncName(playerCode);

final String functionPattern = "("
Expand All @@ -866,8 +836,6 @@ private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
"function " + DEOBFUSCATION_FUNC_NAME + "(a){return " + deobfuscationFunctionName + "(a);}";

return helperObject + deobfuscateFunction + callerFunction;
} catch (final IOException ioe) {
throw new DeobfuscateException("Could not load deobfuscate function", ioe);
} catch (final Exception e) {
throw new DeobfuscateException("Could not parse deobfuscate function ", e);
}
Expand All @@ -876,24 +844,7 @@ private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
@Nonnull
private String getDeobfuscationCode() throws ParsingException {
if (cachedDeobfuscationCode == null) {
if (playerJsUrl == null) {
// the currentPlayerJsUrl was not found in any page fetched so far and there is
// nothing cached, so try fetching embedded info
getEmbeddedInfoStsAndStorePlayerJsUrl();
if (playerJsUrl == null) {
throw new ParsingException(
"Embedded info did not provide YouTube player js url");
}
}

if (playerJsUrl.startsWith("//")) {
playerJsUrl = HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
// sometimes https://www.youtube.com part has to be added manually
playerJsUrl = HTTPS + "//www.youtube.com" + playerJsUrl;
}

cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl);
cachedDeobfuscationCode = loadDeobfuscationCode();
}
return cachedDeobfuscationCode;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package org.schabi.newpipe.extractor.utils;

import org.mozilla.javascript.Context;
import org.mozilla.javascript.Function;
import org.mozilla.javascript.ScriptableObject;

public class Javascript {

public String run(String function, String functionName, String... parameters) {
XiangRongLin marked this conversation as resolved.
Show resolved Hide resolved
try {
Context context = Context.enter();
context.setOptimizationLevel(-1);
ScriptableObject scope = context.initSafeStandardObjects();

context.evaluateString(scope, function, functionName, 1, null);
Function jsFunction = (Function) scope.get(functionName, scope);
Object result = jsFunction.call(context, scope, scope, parameters);
return result.toString();
} finally {
Context.exit();
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package org.schabi.newpipe.extractor.services.youtube;

import org.junit.Before;
import org.junit.Test;
import org.schabi.newpipe.downloader.DownloaderTestImpl;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;

import java.io.IOException;

import static org.hamcrest.CoreMatchers.allOf;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.MatcherAssert.assertThat;

public class YoutubeJavascriptExtractorTest {

@Before
public void setup() throws IOException {
NewPipe.init(DownloaderTestImpl.getInstance());
}

@Test
public void testExtractJavascript__success() throws ParsingException {
String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode("d4IGg5dqeO8");
assertPlayerJsCode(playerJsCode);

playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode();
assertPlayerJsCode(playerJsCode);
}

@Test
public void testExtractJavascript__invalidVideoId__success() throws ParsingException {
String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode("not_a_video_id");
assertPlayerJsCode(playerJsCode);

playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode("11-chars123");
assertPlayerJsCode(playerJsCode);

}

private void assertPlayerJsCode(String playerJsCode) {
assertThat(playerJsCode, allOf(
containsString(" Copyright The Closure Library Authors.\n"
+ " SPDX-License-Identifier: Apache-2.0"),
TobiGr marked this conversation as resolved.
Show resolved Hide resolved
containsString("var _yt_player")));
}
}
Loading