Skip to content

Commit

Permalink
Merge pull request #794 from FireMasterK/comments-count
Browse files Browse the repository at this point in the history
[YouTube] Add support to extract total comment count
  • Loading branch information
Stypox authored Jan 11, 2023
2 parents 6ccc43e + 22a47da commit c1040bc
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 57 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ public boolean isCommentsDisabled() throws ExtractionException {
return false;
}

/**
* @return the total number of comments
*/
public int getCommentsCount() throws ExtractionException {
return -1;
}

@Nonnull
@Override
public String getName() throws ParsingException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ public static CommentsInfo getInfo(final CommentsExtractor commentsExtractor)
ExtractorHelper.getItemsPageOrLogError(commentsInfo, commentsExtractor);
commentsInfo.setCommentsDisabled(commentsExtractor.isCommentsDisabled());
commentsInfo.setRelatedItems(initialCommentsPage.getItems());
try {
commentsInfo.setCommentsCount(commentsExtractor.getCommentsCount());
} catch (final Exception e) {
commentsInfo.addError(e);
}
commentsInfo.setNextPage(initialCommentsPage.getNextPage());

return commentsInfo;
Expand Down Expand Up @@ -76,6 +81,7 @@ public static InfoItemsPage<CommentsInfoItem> getMoreItems(

private transient CommentsExtractor commentsExtractor;
private boolean commentsDisabled = false;
private int commentsCount;

public CommentsExtractor getCommentsExtractor() {
return commentsExtractor;
Expand All @@ -86,7 +92,6 @@ public void setCommentsExtractor(final CommentsExtractor commentsExtractor) {
}

/**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @return {@code true} if the comments are disabled otherwise {@code false} (default)
* @see CommentsExtractor#isCommentsDisabled()
*/
Expand All @@ -95,10 +100,27 @@ public boolean isCommentsDisabled() {
}

/**
* @apiNote Warning: This method is experimental and may get removed in a future release.
* @param commentsDisabled {@code true} if the comments are disabled otherwise {@code false}
*/
public void setCommentsDisabled(final boolean commentsDisabled) {
this.commentsDisabled = commentsDisabled;
}

/**
* Returns the total number of comments.
*
* @return the total number of comments
*/
public int getCommentsCount() {
return commentsCount;
}

/**
* Sets the total number of comments.
*
* @param commentsCount the commentsCount to set.
*/
public void setCommentsCount(final int commentsCount) {
this.commentsCount = commentsCount;
}
}
Original file line number Diff line number Diff line change
@@ -1,18 +1,8 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;

import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
Expand All @@ -24,26 +14,31 @@
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
import org.schabi.newpipe.extractor.localization.Localization;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonWriter;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;

import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

public class YoutubeCommentsExtractor extends CommentsExtractor {

private JsonObject nextResponse;
/**
* Whether comments are disabled on video.
*/
private boolean commentsDisabled;

/**
* Caching mechanism and holder of the commentsDisabled value.
* <br/>
* Initial value = empty -> unknown if comments are disabled or not<br/>
* Some method calls {@link #findInitialCommentsToken()}
* -> value is set<br/>
* If the method or another one that is depending on disabled comments
* is now called again, the method execution can avoid unnecessary calls
* The second ajax <b>/next</b> response.
*/
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
private Optional<Boolean> optCommentsDisabled = Optional.empty();
private JsonObject ajaxJson;

public YoutubeCommentsExtractor(
final StreamingService service,
Expand All @@ -56,32 +51,25 @@ public YoutubeCommentsExtractor(
public InfoItemsPage<CommentsInfoItem> getInitialPage()
throws IOException, ExtractionException {

// Check if findInitialCommentsToken was already called and optCommentsDisabled initialized
if (optCommentsDisabled.orElse(false)) {
return getInfoItemsPageForDisabledComments();
}

// Get the token
final String commentsToken = findInitialCommentsToken();
// Check if the comments have been disabled
if (optCommentsDisabled.get()) {
if (commentsDisabled) {
return getInfoItemsPageForDisabledComments();
}

return getPage(getNextPage(commentsToken));
return extractComments(ajaxJson);
}

/**
* Finds the initial comments token and initializes commentsDisabled.
* <br/>
* Also sets {@link #optCommentsDisabled}.
* Also sets {@link #commentsDisabled}.
*
* @return the continuation token or null if none was found
*/
@Nullable
private String findInitialCommentsToken() throws ExtractionException {
private String findInitialCommentsToken(final JsonObject nextResponse)
throws ExtractionException {
final String token = JsonUtils.getArray(nextResponse,
"contents.twoColumnWatchNextResults.results.results.contents")
"contents.twoColumnWatchNextResults.results.results.contents")
.stream()
// Only use JsonObjects
.filter(JsonObject.class::isInstance)
Expand Down Expand Up @@ -112,7 +100,7 @@ private String findInitialCommentsToken() throws ExtractionException {
.orElse(null);

// The comments are disabled if we couldn't get a token
optCommentsDisabled = Optional.of(token == null);
commentsDisabled = token == null;

return token;
}
Expand All @@ -123,9 +111,9 @@ private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {
}

@Nullable
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
private Page getNextPage(@Nonnull final JsonObject jsonObject) throws ExtractionException {
final JsonArray onResponseReceivedEndpoints =
ajaxJson.getArray("onResponseReceivedEndpoints");
jsonObject.getArray("onResponseReceivedEndpoints");

// Prevent ArrayIndexOutOfBoundsException
if (onResponseReceivedEndpoints.isEmpty()) {
Expand Down Expand Up @@ -173,30 +161,39 @@ private Page getNextPage(final String continuation) throws ParsingException {
@Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
throws IOException, ExtractionException {
if (optCommentsDisabled.orElse(false)) {

if (commentsDisabled) {
return getInfoItemsPageForDisabledComments();
}

if (page == null || isNullOrEmpty(page.getId())) {
throw new IllegalArgumentException("Page doesn't have the continuation.");
}

final Localization localization = getExtractorLocalization();
// @formatter:off
final byte[] body = JsonWriter.string(
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("continuation", page.getId())
.done())
.getBytes(StandardCharsets.UTF_8);
// @formatter:on

final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);
final var jsonObject = getJsonPostResponse("next", body, localization);

return extractComments(jsonObject);
}

private InfoItemsPage<CommentsInfoItem> extractComments(final JsonObject jsonObject)
throws ExtractionException {
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
getServiceId());
collectCommentsFrom(collector, ajaxJson);
return new InfoItemsPage<>(collector, getNextPage(ajaxJson));
collectCommentsFrom(collector);
return new InfoItemsPage<>(collector, getNextPage(jsonObject));
}

private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject ajaxJson) throws ParsingException {
private void collectCommentsFrom(final CommentsInfoItemsCollector collector)
throws ParsingException {

final JsonArray onResponseReceivedEndpoints =
ajaxJson.getArray("onResponseReceivedEndpoints");
Expand Down Expand Up @@ -254,24 +251,59 @@ private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException {
final Localization localization = getExtractorLocalization();
// @formatter:off
final byte[] body = JsonWriter.string(
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("videoId", getId())
.done())
.getBytes(StandardCharsets.UTF_8);
// @formatter:on

final String initialToken =
findInitialCommentsToken(getJsonPostResponse("next", body, localization));

if (initialToken == null) {
return;
}

// @formatter:off
final byte[] ajaxBody = JsonWriter.string(
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("continuation", initialToken)
.done())
.getBytes(StandardCharsets.UTF_8);
// @formatter:on

nextResponse = getJsonPostResponse("next", body, localization);
ajaxJson = getJsonPostResponse("next", ajaxBody, localization);
}


@Override
public boolean isCommentsDisabled() throws ExtractionException {
// Check if commentsDisabled has to be initialized
if (!optCommentsDisabled.isPresent()) {
// Initialize commentsDisabled
this.findInitialCommentsToken();
public boolean isCommentsDisabled() {
return commentsDisabled;
}

@Override
public int getCommentsCount() throws ExtractionException {
assertPageFetched();

if (commentsDisabled) {
return -1;
}

return optCommentsDisabled.get();
final JsonObject countText = ajaxJson
.getArray("onResponseReceivedEndpoints").getObject(0)
.getObject("reloadContinuationItemsCommand")
.getArray("continuationItems").getObject(0)
.getObject("commentsHeaderRenderer")
.getObject("countText");

try {
return Integer.parseInt(
Utils.removeNonDigitCharacters(getTextFromObject(countText))
);
} catch (final Exception e) {
throw new ExtractionException("Unable to get comments count", e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ private boolean getCommentsFromCommentsInfoHelper(String url) throws IOException
@Test
public void testGetCommentsAllData() throws IOException, ExtractionException {
InfoItemsPage<CommentsInfoItem> comments = extractor.getInitialPage();
assertTrue(extractor.getCommentsCount() > 5); // at least 5 comments

DefaultTests.defaultTestListOfItems(YouTube, comments.getItems(), comments.getErrors());
for (CommentsInfoItem c : comments.getItems()) {
Expand Down Expand Up @@ -344,6 +345,11 @@ public void testGetCommentsReplyCount() throws IOException, ExtractionException
assertNotEquals(UNKNOWN_REPLY_COUNT, firstComment.getReplyCount(), "Could not get the reply count of the first comment");
assertGreater(300, firstComment.getReplyCount());
}

@Test
public void testCommentsCount() throws IOException, ExtractionException {
assertTrue(extractor.getCommentsCount() > 18800);
}
}

public static class FormattingTest {
Expand Down

0 comments on commit c1040bc

Please sign in to comment.