Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SoundCloud] Add support for comment replies #993

Draft
wants to merge 6 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ public void reset() {
* Add an error
* @param error the error
*/
protected void addError(final Exception error) {
public void addError(final Exception error) {
errors.add(error);
}

Expand Down
36 changes: 34 additions & 2 deletions extractor/src/main/java/org/schabi/newpipe/extractor/Page.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
package org.schabi.newpipe.extractor;

import javax.annotation.Nullable;
import java.io.Serializable;
import java.util.List;
import java.util.Map;

import javax.annotation.Nullable;

import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

/**
* The {@link Page} class is used for storing information on future requests
* for retrieving content.
* <br>
* A page has an {@link #id}, an {@link #url}, as well as information on possible {@link #cookies}.
* In case the data behind the URL has already been retrieved,
* it can be accessed by using {@link #getBody()} or {@link #getContent()}.
*/
public class Page implements Serializable {
private final String url;
private final String id;
private final List<String> ids;
private final Map<String, String> cookies;
private Serializable content;

@Nullable
private final byte[] body;
Expand Down Expand Up @@ -78,4 +86,28 @@ public static boolean isValid(final Page page) {
public byte[] getBody() {
return body;
}

public boolean hasContent() {
return content != null;
}

/**
* Get the page's content if it has been set, returns {@code null} otherwise.
* @return the page's content
*/
@Nullable
public Serializable getContent() {
return content;
}

/**
* Set the page's content.
* The page's content can either be retrieved manually by requesting the resource
* behind the page's URL (see {@link #url} and {@link #getUrl()})
* or storing it in a {@link Page}s instance in case the content has already been downloaded.
* @param content the page's content
*/
public void setContent(@Nullable final Serializable content) {
this.content = content;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -323,4 +323,11 @@ public static String getAvatarUrl(final JsonObject object) {
public static String getUploaderName(final JsonObject object) {
return object.getObject("user").getString("username", "");
}

public static boolean isReplyTo(@Nonnull final JsonObject originalComment,
@Nonnull final JsonObject otherComment) {
return originalComment.getInt("timestamp") == otherComment.getInt("timestamp");

}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.schabi.newpipe.extractor.services.soundcloud.extractors;

import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;
import com.grack.nanojson.JsonParser;
Expand All @@ -16,14 +18,29 @@
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;

import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.utils.cache.SoundCloudCommentsCache;
import org.schabi.newpipe.extractor.utils.cache.SoundCloudCommentsCache.CachedCommentInfo;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import javax.annotation.Nonnull;

import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
import javax.annotation.Nullable;

public class SoundcloudCommentsExtractor extends CommentsExtractor {
public static final String COLLECTION = "collection";
public static final String NEXT_HREF = "next_href";

/**
* The last comment which was a top level comment.
* Next pages might start with replies to the last top level comment
* and therefore the {@link SoundcloudCommentsInfoItemExtractor#replyCount}
* of the last top level comment cannot be determined certainly.
*/
private static final SoundCloudCommentsCache LAST_TOP_LEVEL_COMMENTS =
new SoundCloudCommentsCache(10);

public SoundcloudCommentsExtractor(final StreamingService service,
final ListLinkHandler uiHandler) {
super(service, uiHandler);
Expand All @@ -46,44 +63,205 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage() throws ExtractionExcepti
final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
getServiceId());

collectStreamsFrom(collector, json.getArray("collection"));
collectCommentsFrom(collector, json, null);

return new InfoItemsPage<>(collector, new Page(json.getString("next_href")));
return new InfoItemsPage<>(collector, new Page(json.getString(NEXT_HREF)));
}

@Override
public InfoItemsPage<CommentsInfoItem> getPage(final Page page) throws ExtractionException,
IOException {
public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
throws ExtractionException, IOException {

if (page == null || isNullOrEmpty(page.getUrl())) {
throw new IllegalArgumentException("Page doesn't contain an URL");
}

final Downloader downloader = NewPipe.getDownloader();
final Response response = downloader.get(page.getUrl());

final JsonObject json;
try {
json = JsonParser.object().from(response.responseBody());
} catch (final JsonParserException e) {
throw new ParsingException("Could not parse json", e);
}

final CommentsInfoItemsCollector collector = new CommentsInfoItemsCollector(
getServiceId());

collectStreamsFrom(collector, json.getArray("collection"));
// Replies typically do not have a next page, but that's not always the case.
final boolean hasNextPage;
if (page.hasContent()) {
// This page contains the whole previously fetched comments.
// We need to get the comments which are replies to the comment with the page's id.
json = (JsonObject) page.getContent();
try {
final int commentId = Integer.parseInt(page.getId());
hasNextPage = collectRepliesFrom(collector, json, commentId, page.getUrl());
} catch (final NumberFormatException e) {
throw new ParsingException("Got invalid comment id", e);
}
} else {

final Downloader downloader = NewPipe.getDownloader();
final Response response = downloader.get(page.getUrl());

return new InfoItemsPage<>(collector, new Page(json.getString("next_href")));
try {
json = JsonParser.object().from(response.responseBody());
hasNextPage = json.has(NEXT_HREF);
} catch (final JsonParserException e) {
throw new ParsingException("Could not parse json", e);
}

final CachedCommentInfo topLevelCommentElement = LAST_TOP_LEVEL_COMMENTS.get(getUrl());
if (topLevelCommentElement == null) {
if (LAST_TOP_LEVEL_COMMENTS.isEmpty()) {
collector.addError(new RuntimeException(
"Could not get last top level comment. It has been removed from cache."
+ " Increase the cache size to not loose any comments"));
}
collectCommentsFrom(collector, json, null);
} else {
collectCommentsFrom(collector, json, topLevelCommentElement);
}
}

if (hasNextPage) {
return new InfoItemsPage<>(collector, new Page(json.getString(NEXT_HREF)));
} else {
return new InfoItemsPage<>(collector, null);
}
}

@Override
public void onFetchPage(@Nonnull final Downloader downloader) { }
public void onFetchPage(@Nonnull final Downloader downloader) {
}

private void collectStreamsFrom(final CommentsInfoItemsCollector collector,
final JsonArray entries) throws ParsingException {
/**
* Collect top level comments from a SoundCloud API response.
*
* @param collector the collector which collects the the top level comments
* @param json the JsonObject of the API response
* @param lastTopLevelComment the last top level comment from the previous page or {@code null}
* if this method is run for the initial page.
* @throws ParsingException
*/
private void collectCommentsFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json,
@Nullable final CachedCommentInfo lastTopLevelComment)
throws ParsingException {
final List<SoundcloudCommentsInfoItemExtractor> extractors = new ArrayList<>();
final String url = getUrl();
for (final Object comment : entries) {
collector.commit(new SoundcloudCommentsInfoItemExtractor((JsonObject) comment, url));

JsonObject currentTopLevelComment = null;
int currentTopLevelCommentIndex = 0;
boolean isLastCommentReply = true;
boolean isFirstCommentReply = false;
boolean addedLastTopLevelComment = lastTopLevelComment == null;
// Check whether the first comment in the list is a reply to the last top level comment
// from the previous page if there was a previous page.
if (lastTopLevelComment != null) {
final JsonObject firstComment = json.getArray(COLLECTION).getObject(0);
if (SoundcloudParsingHelper.isReplyTo(lastTopLevelComment.comment, firstComment)) {
currentTopLevelComment = lastTopLevelComment.comment;
isFirstCommentReply = true;
merge(json, lastTopLevelComment.json, lastTopLevelComment.index);
} else {
extractors.add(new SoundcloudCommentsInfoItemExtractor(
lastTopLevelComment.json,
lastTopLevelComment.index,
lastTopLevelComment.comment, url, null));
addedLastTopLevelComment = true;
}
}

final JsonArray entries = json.getArray(COLLECTION);
for (int i = 0; i < entries.size(); i++) {
final JsonObject entry = entries.getObject(i);
// Extract all top level comments
// The first comment is a top level co
// if it is not a reply to the last top level comment
//
if ((i == 0 && !isFirstCommentReply)
|| (
i != 0 && !SoundcloudParsingHelper.isReplyTo(entries.getObject(i - 1), entry)
&& !SoundcloudParsingHelper.isReplyTo(currentTopLevelComment, entry))) {
currentTopLevelComment = entry;
currentTopLevelCommentIndex = i;
if (!addedLastTopLevelComment) {
// There is a new top level comment. This also means that we can now determine
// the reply count and get all replies for the top level comment.
extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, 0, lastTopLevelComment.comment, url, null));
addedLastTopLevelComment = true;
}
if (i == entries.size() - 1) {
isLastCommentReply = false;
LAST_TOP_LEVEL_COMMENTS.put(getUrl(), currentTopLevelComment, json, i);

// Do not collect the last comment if it is a top level comment
// because it might have replies.
// That is information we cannot get from the comment itself
// (thanks SoundCloud...) but needs to be obtained from the next comment.
// The comment will therefore be collected
// when collecting the items from the next page.
break;
}
extractors.add(new SoundcloudCommentsInfoItemExtractor(
json, i, entry, url, null));
}
}
if (isLastCommentReply) {
// Do not collect the last top level comment if it has replies and the retrieved
// comment list ends with a reply. We do not know whether the next page starts
// with more replies to the last top level comment.
LAST_TOP_LEVEL_COMMENTS.put(
getUrl(),
extractors.remove(extractors.size() - 1).item,
json, currentTopLevelCommentIndex);
}
extractors.stream().forEach(collector::commit);

}

/**
* Collect replies to a top level comment from a SoundCloud API response.
*
* @param collector the collector which collects the the replies
* @param json the SoundCloud API response
* @param id the comment's id for which the replies are collected
* @param url the corresponding page's URL
* @return {code true} if there might be more replies to the comment;
* {@code false} if there are definitely no more replies
*/
private boolean collectRepliesFrom(@Nonnull final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject json,
final int id,
@Nonnull final String url) {
JsonObject originalComment = null;
final JsonArray entries = json.getArray(COLLECTION);
boolean moreReplies = false;
for (int i = 0; i < entries.size(); i++) {
final JsonObject comment = entries.getObject(i);
if (comment.getInt("id") == id) {
originalComment = comment;
continue;
}
if (originalComment != null
&& SoundcloudParsingHelper.isReplyTo(originalComment, comment)) {
collector.commit(new SoundcloudCommentsInfoItemExtractor(
json, i, entries.getObject(i), url, originalComment));
// There might be more replies to the originalComment
// if the original comment is at the end of the list.
if (i == entries.size() - 1 && json.has(NEXT_HREF)) {
moreReplies = true;
}
}
}
return moreReplies;
}

private void merge(@Nonnull final JsonObject target, @Nonnull final JsonObject subject,
final int index) {
final JsonArray targetArray = target.getArray(COLLECTION);
final JsonArray subjectArray = subject.getArray(COLLECTION);
final JsonArray newArray = new JsonArray(
targetArray.size() + subjectArray.size() - index - 1);
for (int i = index; i < subjectArray.size(); i++) {
newArray.add(subjectArray.getObject(i));
}
newArray.addAll(targetArray);
target.put(COLLECTION, newArray);
}

}
Loading