Skip to content

Commit

Permalink
pull request #683 from XiangRongLin/yt_throttling
Browse files Browse the repository at this point in the history
[YouTube] Fix buffering by decoding n parameter of stream urls
  • Loading branch information
TobiGr committed Jul 28, 2021
1 parent 6fd93cd commit 027dc65
Show file tree
Hide file tree
Showing 6 changed files with 375 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package org.schabi.newpipe.extractor.services.youtube;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.NewPipe;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.utils.Parser;

import javax.annotation.Nonnull;

/**
* YouTube restricts streaming their media in multiple ways by requiring clients to apply a cipher
* function on parameters of requests.
* The cipher function is sent alongside as a JavaScript function.
* <p>
* This class handling fetching the JavaScript file in order to allow other classes to extract the
* needed functions.
*/
public class YoutubeJavaScriptExtractor {

private static final String HTTPS = "https:";
private static String cachedJavaScriptCode;

private YoutubeJavaScriptExtractor() {
}

/**
* Extracts the JavaScript file. The result is cached, so subsequent calls use the result of
* previous calls.
*
* @param videoId Does not influence the result, but a valid video id may help in the chance
* that YouTube tracks it.
* @return The whole JavaScript file as a string.
* @throws ParsingException If the extraction failed.
*/
@Nonnull
public static String extractJavaScriptCode(final String videoId) throws ParsingException {
if (cachedJavaScriptCode == null) {
final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(
YoutubeJavaScriptExtractor.extractJavaScriptUrl(videoId));
cachedJavaScriptCode = YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
}

return cachedJavaScriptCode;
}

/**
* Same as {@link YoutubeJavaScriptExtractor#extractJavaScriptCode(String)} but with a constant
* value for videoId.
* Possible because the videoId has no influence on the result.
* <p>
* In the off chance that YouTube tracks with which video id the request is made, it may make
* sense to pass in video ids.
*/
@Nonnull
public static String extractJavaScriptCode() throws ParsingException {
return extractJavaScriptCode("d4IGg5dqeO8");
}

private static String extractJavaScriptUrl(final String videoId) throws ParsingException {
try {
final String embedUrl = "https://www.youtube.com/embed/" + videoId;
final String embedPageContent = NewPipe.getDownloader()
.get(embedUrl, Localization.DEFAULT).responseBody();

try {
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
return Parser.matchGroup1(assetsPattern, embedPageContent)
.replace("\\", "").replace("\"", "");
} catch (final Parser.RegexException ex) {
// playerJsUrl is still available in the file, just somewhere else TODO
// it is ok not to find it, see how that's handled in getDeobfuscationCode()
final Document doc = Jsoup.parse(embedPageContent);
final Elements elems = doc.select("script").attr("name", "player_ias/base");
for (final Element elem : elems) {
if (elem.attr("src").contains("base.js")) {
return elem.attr("src");
}
}
}

} catch (final Exception i) {
throw new ParsingException("Embedded info did not provide YouTube player js url");
}
throw new ParsingException("Embedded info did not provide YouTube player js url");
}

@Nonnull
private static String cleanJavaScriptUrl(@Nonnull final String playerJsUrl) {
if (playerJsUrl.startsWith("//")) {
return HTTPS + playerJsUrl;
} else if (playerJsUrl.startsWith("/")) {
// sometimes https://www.youtube.com part has to be added manually
return HTTPS + "//www.youtube.com" + playerJsUrl;
} else {
return playerJsUrl;
}
}

@Nonnull
private static String downloadJavaScriptCode(final String playerJsUrl)
throws ParsingException {
try {
return NewPipe.getDownloader().get(playerJsUrl, Localization.DEFAULT).responseBody();
} catch (final Exception e) {
throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
package org.schabi.newpipe.extractor.services.youtube;

import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.utils.JavaScript;
import org.schabi.newpipe.extractor.utils.Parser;

import javax.annotation.Nonnull;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;

/**
* <p>
* YouTube's media is protected with a cipher,
* which modifies the "n" query parameter of it's video playback urls.
* This class handles extracting that "n" query parameter,
* applying the cipher on it and returning the resulting url which is not throttled.
* </p>
*
* <p>
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other
* </p>
* becomes
* <p>
* https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other
* </p>
* <br>
* <p>
* Decoding the "n" parameter is time intensive. For this reason, the results are cached.
* The cache can be cleared using {@link #clearCache()}
* </p>
*
*/
public class YoutubeThrottlingDecrypter {

private static final String N_PARAM_REGEX = "[&?]n=([^&]+)";
private static final Map<String, String> nParams = new HashMap<>();

private final String functionName;
private final String function;

/**
* <p>
* Use this if you care about the off chance that YouTube tracks with which videoId the cipher
* is requested.
* </p>
* Otherwise use the no-arg constructor which uses a constant value.
*/
public YoutubeThrottlingDecrypter(final String videoId) throws ParsingException {
final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode(videoId);

functionName = parseDecodeFunctionName(playerJsCode);
function = parseDecodeFunction(playerJsCode, functionName);
}

public YoutubeThrottlingDecrypter() throws ParsingException {
final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode();

functionName = parseDecodeFunctionName(playerJsCode);
function = parseDecodeFunction(playerJsCode, functionName);
}

private String parseDecodeFunctionName(final String playerJsCode)
throws Parser.RegexException {
Pattern pattern = Pattern.compile(
"b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
return Parser.matchGroup1(pattern, playerJsCode);
}

@Nonnull
private String parseDecodeFunction(final String playerJsCode, final String functionName)
throws Parser.RegexException {
Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n",
Pattern.DOTALL);
return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
}

public String apply(final String url) throws Parser.RegexException {
if (containsNParam(url)) {
String oldNParam = parseNParam(url);
String newNParam = decryptNParam(oldNParam);
return replaceNParam(url, oldNParam, newNParam);
} else {
return url;
}
}

private boolean containsNParam(final String url) {
return Parser.isMatch(N_PARAM_REGEX, url);
}

private String parseNParam(final String url) throws Parser.RegexException {
Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX);
return Parser.matchGroup1(nValuePattern, url);
}

private String decryptNParam(final String nParam) {
if (nParams.containsKey(nParam)) {
return nParams.get(nParam);
}
final String decryptedNParam = JavaScript.run(function, functionName, nParam);
nParams.put(nParam, decryptedNParam);
return decryptedNParam;
}

@Nonnull
private String replaceNParam(@Nonnull final String url,
final String oldValue,
final String newValue) {
return url.replace(oldValue, newValue);
}

/**
* @return the number of the cached "n" query parameters.
*/
public static int getCacheSize() {
return nParams.size();
}

/**
* Clears all stored "n" query parameters.
*/
public static void clearCache() {
nParams.clear();
}
}
Loading

0 comments on commit 027dc65

Please sign in to comment.