pull request #683 from XiangRongLin/yt_throttling

[YouTube] Fix buffering by decoding n parameter of stream urls
TeamNewPipe · Jul 28, 2021 · 027dc65 · 027dc65
1 parent 6fd93cd
commit 027dc65
Show file tree

Hide file tree

Showing 6 changed files with 375 additions and 67 deletions.
diff --git a/...c/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavaScriptExtractor.java b/...c/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavaScriptExtractor.java
@@ -0,0 +1,112 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.schabi.newpipe.extractor.NewPipe;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.localization.Localization;
+import org.schabi.newpipe.extractor.utils.Parser;
+
+import javax.annotation.Nonnull;
+
+/**
+ * YouTube restricts streaming their media in multiple ways by requiring clients to apply a cipher
+ * function on parameters of requests.
+ * The cipher function is sent alongside as a JavaScript function.
+ * <p>
+ * This class handling fetching the JavaScript file in order to allow other classes to extract the
+ * needed functions.
+ */
+public class YoutubeJavaScriptExtractor {
+
+    private static final String HTTPS = "https:";
+    private static String cachedJavaScriptCode;
+
+    private YoutubeJavaScriptExtractor() {
+    }
+
+    /**
+     * Extracts the JavaScript file. The result is cached, so subsequent calls use the result of
+     * previous calls.
+     *
+     * @param videoId Does not influence the result, but a valid video id may help in the chance
+     *                that YouTube tracks it.
+     * @return The whole JavaScript file as a string.
+     * @throws ParsingException If the extraction failed.
+     */
+    @Nonnull
+    public static String extractJavaScriptCode(final String videoId) throws ParsingException {
+        if (cachedJavaScriptCode == null) {
+            final String playerJsUrl = YoutubeJavaScriptExtractor.cleanJavaScriptUrl(
+                    YoutubeJavaScriptExtractor.extractJavaScriptUrl(videoId));
+            cachedJavaScriptCode = YoutubeJavaScriptExtractor.downloadJavaScriptCode(playerJsUrl);
+        }
+
+        return cachedJavaScriptCode;
+    }
+
+    /**
+     * Same as {@link YoutubeJavaScriptExtractor#extractJavaScriptCode(String)} but with a constant
+     * value for videoId.
+     * Possible because the videoId has no influence on the result.
+     * <p>
+     * In the off chance that YouTube tracks with which video id the request is made, it may make
+     * sense to pass in video ids.
+     */
+    @Nonnull
+    public static String extractJavaScriptCode() throws ParsingException {
+        return extractJavaScriptCode("d4IGg5dqeO8");
+    }
+
+    private static String extractJavaScriptUrl(final String videoId) throws ParsingException {
+        try {
+            final String embedUrl = "https://www.youtube.com/embed/" + videoId;
+            final String embedPageContent = NewPipe.getDownloader()
+                    .get(embedUrl, Localization.DEFAULT).responseBody();
+
+            try {
+                final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
+                return Parser.matchGroup1(assetsPattern, embedPageContent)
+                        .replace("\\", "").replace("\"", "");
+            } catch (final Parser.RegexException ex) {
+                // playerJsUrl is still available in the file, just somewhere else TODO
+                // it is ok not to find it, see how that's handled in getDeobfuscationCode()
+                final Document doc = Jsoup.parse(embedPageContent);
+                final Elements elems = doc.select("script").attr("name", "player_ias/base");
+                for (final Element elem : elems) {
+                    if (elem.attr("src").contains("base.js")) {
+                        return elem.attr("src");
+                    }
+                }
+            }
+
+        } catch (final Exception i) {
+            throw new ParsingException("Embedded info did not provide YouTube player js url");
+        }
+        throw new ParsingException("Embedded info did not provide YouTube player js url");
+    }
+
+    @Nonnull
+    private static String cleanJavaScriptUrl(@Nonnull final String playerJsUrl) {
+        if (playerJsUrl.startsWith("//")) {
+            return HTTPS + playerJsUrl;
+        } else if (playerJsUrl.startsWith("/")) {
+            // sometimes https://www.youtube.com part has to be added manually
+            return HTTPS + "//www.youtube.com" + playerJsUrl;
+        } else {
+            return playerJsUrl;
+        }
+    }
+
+    @Nonnull
+    private static String downloadJavaScriptCode(final String playerJsUrl)
+            throws ParsingException {
+        try {
+            return NewPipe.getDownloader().get(playerJsUrl, Localization.DEFAULT).responseBody();
+        } catch (final Exception e) {
+            throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
+        }
+    }
+}
diff --git a/...c/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java b/...c/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java
@@ -0,0 +1,126 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.utils.JavaScript;
+import org.schabi.newpipe.extractor.utils.Parser;
+
+import javax.annotation.Nonnull;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+/**
+ * <p>
+ * YouTube's media is protected with a cipher,
+ * which modifies the "n" query parameter of it's video playback urls.
+ * This class handles extracting that "n" query parameter,
+ * applying the cipher on it and returning the resulting url which is not throttled.
+ * </p>
+ *
+ * <p>
+ * https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other
+ * </p>
+ * becomes
+ * <p>
+ * https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other
+ * </p>
+ * <br>
+ * <p>
+ * Decoding the "n" parameter is time intensive. For this reason, the results are cached.
+ * The cache can be cleared using {@link #clearCache()}
+ * </p>
+ *
+ */
+public class YoutubeThrottlingDecrypter {
+
+    private static final String N_PARAM_REGEX = "[&?]n=([^&]+)";
+    private static final Map<String, String> nParams = new HashMap<>();
+
+    private final String functionName;
+    private final String function;
+
+    /**
+     * <p>
+     * Use this if you care about the off chance that YouTube tracks with which videoId the cipher
+     * is requested.
+     * </p>
+     * Otherwise use the no-arg constructor which uses a constant value.
+     */
+    public YoutubeThrottlingDecrypter(final String videoId) throws ParsingException {
+        final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode(videoId);
+
+        functionName = parseDecodeFunctionName(playerJsCode);
+        function = parseDecodeFunction(playerJsCode, functionName);
+    }
+
+    public YoutubeThrottlingDecrypter() throws ParsingException {
+        final String playerJsCode = YoutubeJavaScriptExtractor.extractJavaScriptCode();
+
+        functionName = parseDecodeFunctionName(playerJsCode);
+        function = parseDecodeFunction(playerJsCode, functionName);
+    }
+
+    private String parseDecodeFunctionName(final String playerJsCode)
+            throws Parser.RegexException {
+        Pattern pattern = Pattern.compile(
+                "b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
+        return Parser.matchGroup1(pattern, playerJsCode);
+    }
+
+    @Nonnull
+    private String parseDecodeFunction(final String playerJsCode, final String functionName)
+            throws Parser.RegexException {
+        Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n",
+                Pattern.DOTALL);
+        return "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
+    }
+
+    public String apply(final String url) throws Parser.RegexException {
+        if (containsNParam(url)) {
+            String oldNParam = parseNParam(url);
+            String newNParam = decryptNParam(oldNParam);
+            return replaceNParam(url, oldNParam, newNParam);
+        } else {
+            return url;
+        }
+    }
+
+    private boolean containsNParam(final String url) {
+        return Parser.isMatch(N_PARAM_REGEX, url);
+    }
+
+    private String parseNParam(final String url) throws Parser.RegexException {
+        Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX);
+        return Parser.matchGroup1(nValuePattern, url);
+    }
+
+    private String decryptNParam(final String nParam) {
+        if (nParams.containsKey(nParam)) {
+            return nParams.get(nParam);
+        }
+        final String decryptedNParam = JavaScript.run(function, functionName, nParam);
+        nParams.put(nParam, decryptedNParam);
+        return decryptedNParam;
+    }
+
+    @Nonnull
+    private String replaceNParam(@Nonnull final String url,
+                                 final String oldValue,
+                                 final String newValue) {
+        return url.replace(oldValue, newValue);
+    }
+
+    /**
+     * @return the number of the cached "n" query parameters.
+     */
+    public static int getCacheSize() {
+        return nParams.size();
+    }
+
+    /**
+     * Clears all stored "n" query parameters.
+     */
+    public static void clearCache() {
+        nParams.clear();
+    }
+}