TeamNewPipe · TobiGr · Jul 28, 2021 · Jul 9, 2021 · Jul 12, 2021 · Jul 15, 2021
diff --git a/...c/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractor.java b/...c/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractor.java
@@ -0,0 +1,101 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.schabi.newpipe.extractor.NewPipe;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.localization.Localization;
+import org.schabi.newpipe.extractor.utils.Parser;
+
+import javax.annotation.Nonnull;
+
+/**
+ * Youtube restricts streaming their media in multiple ways by requiring clients to apply a cipher function
+ * on parameters of requests.
+ * The cipher function is sent alongside as a JavaScript function.
+ * <p>
+ * This class handling fetching the JavaScript file in order to allow other classes to extract the needed functions.
+ */
+public class YoutubeJavascriptExtractor {
+
+    private static final String HTTPS = "https:";
+    private static String cachedJavascriptCode;
+
+    /**
+     * Extracts the JavaScript file. The result is cached, so subsequent calls use the result of previous calls.
+     *
+     * @param videoId Does not influence the result, but a valid video id can prevent tracking
+     * @return The whole javascript file as a string.
+     * @throws ParsingException If the extraction failed.
+     */
+    @Nonnull
+    public static String extractJavascriptCode(String videoId) throws ParsingException {
+        if (cachedJavascriptCode == null) {
+            final YoutubeJavascriptExtractor extractor = new YoutubeJavascriptExtractor();
+            String playerJsUrl = extractor.cleanJavascriptUrl(extractor.extractJavascriptUrl(videoId));
+            cachedJavascriptCode = extractor.downloadJavascriptCode(playerJsUrl);
+        }
+
+        return cachedJavascriptCode;
+    }
+
+    /**
+     * Same as {@link YoutubeJavascriptExtractor#extractJavascriptCode(String)} but with a constant value for videoId.
+     * Possible because the videoId has no influence on the result.
+     *
+     * For tracking avoidance purposes it may make sense to pass in valid video ids.
+     */
+    @Nonnull
+    public static String extractJavascriptCode() throws ParsingException {
+        return extractJavascriptCode("d4IGg5dqeO8");
+    }
+
+    private String extractJavascriptUrl(String videoId) throws ParsingException {
+        try {
+            final String embedUrl = "https://www.youtube.com/embed/" + videoId;
+            final String embedPageContent = NewPipe.getDownloader()
+                    .get(embedUrl, Localization.DEFAULT).responseBody();
+
+            try {
+                final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
+                return Parser.matchGroup1(assetsPattern, embedPageContent)
+                        .replace("\\", "").replace("\"", "");
+            } catch (final Parser.RegexException ex) {
+                // playerJsUrl is still available in the file, just somewhere else TODO
+                // it is ok not to find it, see how that's handled in getDeobfuscationCode()
+                final Document doc = Jsoup.parse(embedPageContent);
+                final Elements elems = doc.select("script").attr("name", "player_ias/base");
+                for (final Element elem : elems) {
+                    if (elem.attr("src").contains("base.js")) {
+                        return elem.attr("src");
+                    }
+                }
+            }
+
+        } catch (final Exception i) {
+            throw new ParsingException("Embedded info did not provide YouTube player js url");
+        }
+        throw new ParsingException("Embedded info did not provide YouTube player js url");
+    }
+
+    private String cleanJavascriptUrl(String playerJsUrl) {
+        if (playerJsUrl.startsWith("//")) {
+            return HTTPS + playerJsUrl;
+        } else if (playerJsUrl.startsWith("/")) {
+            // sometimes https://www.youtube.com part has to be added manually
+            return HTTPS + "//www.youtube.com" + playerJsUrl;
+        } else {
+            return playerJsUrl;
+        }
+    }
+
+    private String downloadJavascriptCode(String playerJsUrl) throws ParsingException {
+        try {
+            return NewPipe.getDownloader().get(playerJsUrl, Localization.DEFAULT).responseBody();
+        } catch (Exception e) {
+            throw new ParsingException("Could not get player js code from url: " + playerJsUrl);
+        }
+    }
+}
diff --git a/...c/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java b/...c/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeThrottlingDecrypter.java
@@ -0,0 +1,88 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+import org.schabi.newpipe.extractor.utils.Javascript;
+import org.schabi.newpipe.extractor.utils.Parser;
+
+import java.util.regex.Pattern;
+
+/**
+ * <p>
+ * YouTube's media is protected with a cipher, which modifies the "n" query parameter of it's video playback urls.
+ * This class handles extracting that "n" query parameter, applying the cipher on it and returning the resulting url
+ * which is not throttled.
+ * </p>
+ *
+ * <p>
+ * https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=VVF2xyZLVRZZxHXZ&other=other
+ * </p>
+ * becomes
+ * <p>
+ * https://r5---sn-4g5ednsz.googlevideo.com/videoplayback?n=iHywZkMipkszqA&other=other
+ * </p>
+ */
+public class YoutubeThrottlingDecrypter {
+
+    private static final String N_PARAM_REGEX = "[&?]n=([^&]+)";
+
+    private final String functionName;
+    private final String function;
+
+    /**
+     * <p>
+     * Use this if you care about the off chance that YouTube tracks with which videoId the cipher is requested.
+     * </p>
+     * Otherwise use the no-arg constructor which uses a constant value.
+     */
+    public YoutubeThrottlingDecrypter(String videoId) throws ParsingException {
+        final String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode(videoId);
+
+        functionName = parseDecodeFunctionName(playerJsCode);
+        function = parseDecodeFunction(playerJsCode, functionName);
+    }
+
+    public YoutubeThrottlingDecrypter() throws ParsingException {
+        final String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode();
+
+        functionName = parseDecodeFunctionName(playerJsCode);
+        function = parseDecodeFunction(playerJsCode, functionName);
+    }
+
+    private String parseDecodeFunctionName(String playerJsCode) throws Parser.RegexException {
+        Pattern pattern = Pattern.compile("b=a\\.get\\(\"n\"\\)\\)&&\\(b=(\\w+)\\(b\\),a\\.set\\(\"n\",b\\)");
+        return Parser.matchGroup1(pattern, playerJsCode);
+    }
+
+    private String parseDecodeFunction(String playerJsCode, String functionName) throws Parser.RegexException {
+        Pattern functionPattern = Pattern.compile(functionName + "=function(.*?;)\n", Pattern.DOTALL);
+        return  "function " + functionName + Parser.matchGroup1(functionPattern, playerJsCode);
+    }
+
+    public String apply(String url) throws Parser.RegexException {
+        if (containsNParam(url)) {
+            String oldNParam = parseNParam(url);
+            String newNParam = decryptNParam(oldNParam);
+            return replaceNParam(url, oldNParam, newNParam);
+        } else {
+            return url;
+        }
+    }
+
+    private boolean containsNParam(String url) {
+        return Parser.isMatch(N_PARAM_REGEX, url);
+    }
+
+    private String parseNParam(String url) throws Parser.RegexException {
+        Pattern nValuePattern = Pattern.compile(N_PARAM_REGEX);
+        return Parser.matchGroup1(nValuePattern, url);
+    }
+
+    private String decryptNParam(String nParam) {
+        Javascript javascript = new Javascript();
+        return javascript.run(function, functionName, nParam);
+    }
+
+    private String replaceNParam(String url, String oldValue, String newValue) {
+        return url.replace(oldValue, newValue);
+    }
+}
diff --git a/...java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java b/...java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java
@@ -24,7 +24,9 @@
 import org.schabi.newpipe.extractor.localization.TimeAgoParser;
 import org.schabi.newpipe.extractor.localization.TimeAgoPatternsManager;
 import org.schabi.newpipe.extractor.services.youtube.ItagItem;
+import org.schabi.newpipe.extractor.services.youtube.YoutubeJavascriptExtractor;
 import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
+import org.schabi.newpipe.extractor.services.youtube.YoutubeThrottlingDecrypter;
 import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeChannelLinkHandlerFactory;
 import org.schabi.newpipe.extractor.stream.*;
 import org.schabi.newpipe.extractor.utils.JsonUtils;
@@ -79,13 +81,10 @@ public static class DeobfuscateException extends ParsingException {
 
     @Nullable
     private static String cachedDeobfuscationCode = null;
-    @Nullable
-    private String playerJsUrl = null;
-
-    private JsonArray initialAjaxJson;
-    private JsonObject initialData;
     @Nonnull
     private final Map<String, String> videoInfoPage = new HashMap<>();
+    private JsonArray initialAjaxJson;
+    private JsonObject initialData;
     private JsonObject playerResponse;
     private JsonObject videoPrimaryInfoRenderer;
     private JsonObject videoSecondaryInfoRenderer;
@@ -525,11 +524,15 @@ public List<AudioStream> getAudioStreams() throws ExtractionException {
     public List<VideoStream> getVideoStreams() throws ExtractionException {
         assertPageFetched();
         final List<VideoStream> videoStreams = new ArrayList<>();
+        YoutubeThrottlingDecrypter throttlingDecrypter = new YoutubeThrottlingDecrypter(getId());
 
         try {
             for (final Map.Entry<String, ItagItem> entry : getItags(FORMATS, ItagItem.ItagType.VIDEO).entrySet()) {
                 final ItagItem itag = entry.getValue();
-                final VideoStream videoStream = new VideoStream(entry.getKey(), false, itag);
+                String url = entry.getKey();
+                url = throttlingDecrypter.apply(url);
+
+                final VideoStream videoStream = new VideoStream(url, false, itag);
                 if (!Stream.containSimilarStream(videoStream, videoStreams)) {
                     videoStreams.add(videoStream);
                 }
@@ -797,38 +800,6 @@ private void fetchVideoInfoPage() throws ParsingException, ReCaptchaException, I
         }
     }
 
-    @Nonnull
-    private String getEmbeddedInfoStsAndStorePlayerJsUrl() {
-        try {
-            final String embedUrl = "https://www.youtube.com/embed/" + getId();
-            final String embedPageContent = NewPipe.getDownloader()
-                    .get(embedUrl, getExtractorLocalization()).responseBody();
-
-            try {
-                final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
-                playerJsUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
-                        .replace("\\", "").replace("\"", "");
-            } catch (final Parser.RegexException ex) {
-                // playerJsUrl is still available in the file, just somewhere else TODO
-                // it is ok not to find it, see how that's handled in getDeobfuscationCode()
-                final Document doc = Jsoup.parse(embedPageContent);
-                final Elements elems = doc.select("script").attr("name", "player_ias/base");
-                for (final Element elem : elems) {
-                    if (elem.attr("src").contains("base.js")) {
-                        playerJsUrl = elem.attr("src");
-                        break;
-                    }
-                }
-            }
-
-            // Get embed sts
-            return Parser.matchGroup1("\"sts\"\\s*:\\s*(\\d+)", embedPageContent);
-        } catch (final Exception i) {
-            // if it fails we simply reply with no sts as then it does not seem to be necessary
-            return "";
-        }
-    }
-
     private String getDeobfuscationFuncName(final String playerCode) throws DeobfuscateException {
         Parser.RegexException exception = null;
         for (final String regex : REGEXES) {
@@ -843,11 +814,10 @@ private String getDeobfuscationFuncName(final String playerCode) throws Deobfusc
         throw new DeobfuscateException("Could not find deobfuscate function with any of the given patterns.", exception);
     }
 
-    private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
+    private String loadDeobfuscationCode()
             throws DeobfuscateException {
         try {
-            final String playerCode = NewPipe.getDownloader()
-                    .get(playerJsUrl, getExtractorLocalization()).responseBody();
+            final String playerCode = YoutubeJavascriptExtractor.extractJavascriptCode(getId());
             final String deobfuscationFunctionName = getDeobfuscationFuncName(playerCode);
 
             final String functionPattern = "("
@@ -866,8 +836,6 @@ private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
                     "function " + DEOBFUSCATION_FUNC_NAME + "(a){return " + deobfuscationFunctionName + "(a);}";
 
             return helperObject + deobfuscateFunction + callerFunction;
-        } catch (final IOException ioe) {
-            throw new DeobfuscateException("Could not load deobfuscate function", ioe);
         } catch (final Exception e) {
             throw new DeobfuscateException("Could not parse deobfuscate function ", e);
         }
@@ -876,24 +844,7 @@ private String loadDeobfuscationCode(@Nonnull final String playerJsUrl)
     @Nonnull
     private String getDeobfuscationCode() throws ParsingException {
         if (cachedDeobfuscationCode == null) {
-            if (playerJsUrl == null) {
-                // the currentPlayerJsUrl was not found in any page fetched so far and there is
-                // nothing cached, so try fetching embedded info
-                getEmbeddedInfoStsAndStorePlayerJsUrl();
-                if (playerJsUrl == null) {
-                    throw new ParsingException(
-                            "Embedded info did not provide YouTube player js url");
-                }
-            }
-
-            if (playerJsUrl.startsWith("//")) {
-                playerJsUrl = HTTPS + playerJsUrl;
-            } else if (playerJsUrl.startsWith("/")) {
-                // sometimes https://www.youtube.com part has to be added manually
-                playerJsUrl = HTTPS + "//www.youtube.com" + playerJsUrl;
-            }
-
-            cachedDeobfuscationCode = loadDeobfuscationCode(playerJsUrl);
+            cachedDeobfuscationCode = loadDeobfuscationCode();
         }
         return cachedDeobfuscationCode;
     }

diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Javascript.java b/extractor/src/main/java/org/schabi/newpipe/extractor/utils/Javascript.java
@@ -0,0 +1,24 @@
+package org.schabi.newpipe.extractor.utils;
+
+import org.mozilla.javascript.Context;
+import org.mozilla.javascript.Function;
+import org.mozilla.javascript.ScriptableObject;
+
+public class Javascript {
+
+    public String run(String function, String functionName, String... parameters) {
+        try {
+            Context context = Context.enter();
+            context.setOptimizationLevel(-1);
+            ScriptableObject scope = context.initSafeStandardObjects();
+
+            context.evaluateString(scope, function, functionName, 1, null);
+            Function jsFunction = (Function) scope.get(functionName, scope);
+            Object result = jsFunction.call(context, scope, scope, parameters);
+            return result.toString();
+        } finally {
+            Context.exit();
+        }
+    }
+
+}
diff --git a/...st/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractorTest.java b/...st/java/org/schabi/newpipe/extractor/services/youtube/YoutubeJavascriptExtractorTest.java
@@ -0,0 +1,47 @@
+package org.schabi.newpipe.extractor.services.youtube;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.schabi.newpipe.downloader.DownloaderTestImpl;
+import org.schabi.newpipe.extractor.NewPipe;
+import org.schabi.newpipe.extractor.exceptions.ParsingException;
+
+import java.io.IOException;
+
+import static org.hamcrest.CoreMatchers.allOf;
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+public class YoutubeJavascriptExtractorTest {
+
+    @Before
+    public void setup() throws IOException {
+        NewPipe.init(DownloaderTestImpl.getInstance());
+    }
+
+    @Test
+    public void testExtractJavascript__success() throws ParsingException {
+        String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode("d4IGg5dqeO8");
+        assertPlayerJsCode(playerJsCode);
+
+        playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode();
+        assertPlayerJsCode(playerJsCode);
+    }
+
+    @Test
+    public void testExtractJavascript__invalidVideoId__success() throws ParsingException {
+        String playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode("not_a_video_id");
+        assertPlayerJsCode(playerJsCode);
+
+        playerJsCode = YoutubeJavascriptExtractor.extractJavascriptCode("11-chars123");
+        assertPlayerJsCode(playerJsCode);
+
+    }
+
+    private void assertPlayerJsCode(String playerJsCode) {
+        assertThat(playerJsCode, allOf(
+                containsString(" Copyright The Closure Library Authors.\n"
+                        + " SPDX-License-Identifier: Apache-2.0"),
+                containsString("var _yt_player")));
+    }
+}