Skip to content

Commit

Permalink
Merge pull request #573 from B0pol/comments-performance
Browse files Browse the repository at this point in the history
[youtube] improve comments extraction performance
  • Loading branch information
TobiGr authored Mar 5, 2021
2 parents ec1127d + ff5273b commit a3c6fce
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -824,4 +824,14 @@ public static boolean isVerified(final JsonArray badges) {

return false;
}

public static String unescapeDocument(final String doc) {
return doc
.replaceAll("\\\\x22", "\"")
.replaceAll("\\\\x7b", "{")
.replaceAll("\\\\x7d", "}")
.replaceAll("\\\\x5b", "[")
.replaceAll("\\\\x5d", "]");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.exceptions.ReCaptchaException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Parser;

Expand Down Expand Up @@ -46,11 +47,9 @@ public YoutubeCommentsExtractor(StreamingService service, ListLinkHandler uiHand

@Override
public InfoItemsPage<CommentsInfoItem> getInitialPage() throws IOException, ExtractionException {
final String commentsTokenInside;
if (responseBody.contains("commentSectionRenderer")) {
String commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
if (!commentsTokenInside.contains("continuation\":\"")) {
commentsTokenInside = findValue(responseBody, "commentSectionRenderer", "}");
} else {
commentsTokenInside = findValue(responseBody, "sectionListRenderer", "}");
}
final String commentsToken = findValue(commentsTokenInside, "continuation\":\"", "\"");
return getPage(getNextPage(commentsToken));
Expand Down Expand Up @@ -133,7 +132,7 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
final Map<String, List<String>> requestHeaders = new HashMap<>();
requestHeaders.put("User-Agent", singletonList(USER_AGENT));
final Response response = downloader.get(getUrl(), requestHeaders, getExtractorLocalization());
responseBody = response.responseBody();
responseBody = YoutubeParsingHelper.unescapeDocument(response.responseBody());
ytClientVersion = findValue(responseBody, "INNERTUBE_CONTEXT_CLIENT_VERSION\":\"", "\"");
ytClientName = Parser.matchGroup1(YT_CLIENT_NAME_PATTERN, responseBody);
}
Expand Down Expand Up @@ -163,16 +162,9 @@ private String getDataString(Map<String, String> params) throws UnsupportedEncod
return result.toString();
}

private String findValue(String doc, String start, String end) {
final String unescaped = doc
.replaceAll("\\\\x22", "\"")
.replaceAll("\\\\x7b", "{")
.replaceAll("\\\\x7d", "}")
.replaceAll("\\\\x5b", "[")
.replaceAll("\\\\x5d", "]");

final int beginIndex = unescaped.indexOf(start) + start.length();
final int endIndex = unescaped.indexOf(end, beginIndex);
return unescaped.substring(beginIndex, endIndex);
private String findValue(final String doc, final String start, final String end) {
final int beginIndex = doc.indexOf(start) + start.length();
final int endIndex = doc.indexOf(end, beginIndex);
return doc.substring(beginIndex, endIndex);
}
}

0 comments on commit a3c6fce

Please sign in to comment.