Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Description object for comments text. #987

Merged
merged 1 commit into from
Nov 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import org.schabi.newpipe.extractor.InfoItem;
import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.stream.Description;

import javax.annotation.Nullable;

public class CommentsInfoItem extends InfoItem {

private String commentId;
private String commentText;
private Description commentText;
private String uploaderName;
private String uploaderAvatarUrl;
private String uploaderUrl;
Expand Down Expand Up @@ -43,11 +44,11 @@ public void setCommentId(final String commentId) {
this.commentId = commentId;
}

public String getCommentText() {
public Description getCommentText() {
return commentText;
}

public void setCommentText(final String commentText) {
public void setCommentText(final Description commentText) {
this.commentText = commentText;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.stream.StreamExtractor;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -41,8 +42,8 @@ default String getTextualLikeCount() throws ParsingException {
/**
* The text of the comment
*/
default String getCommentText() throws ParsingException {
return "";
default Description getCommentText() throws ParsingException {
return Description.EMPTY_DESCRIPTION;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.jsoup.nodes.Element;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.stream.Description;

import java.util.Objects;

Expand All @@ -18,7 +19,7 @@ public BandcampCommentsInfoItemExtractor(final Element writing, final String url

@Override
public String getName() throws ParsingException {
return getCommentText();
return getCommentText().getContent();
}

@Override
Expand All @@ -32,12 +33,14 @@ public String getThumbnailUrl() throws ParsingException {
}

@Override
public String getCommentText() throws ParsingException {
return writing.getElementsByClass("text").stream()
public Description getCommentText() throws ParsingException {
final var text = writing.getElementsByClass("text").stream()
.filter(Objects::nonNull)
.map(Element::ownText)
.findFirst()
.orElseThrow(() -> new ParsingException("Could not get comment text"));

return new Description(text, Description.PLAIN_TEXT);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;

import java.util.Objects;
Expand Down Expand Up @@ -59,13 +60,15 @@ public DateWrapper getUploadDate() throws ParsingException {
}

@Override
public String getCommentText() throws ParsingException {
public Description getCommentText() throws ParsingException {
final String htmlText = JsonUtils.getString(item, "text");
FireMasterK marked this conversation as resolved.
Show resolved Hide resolved
try {
final Document doc = Jsoup.parse(htmlText);
return doc.body().text();
final var text = doc.body().text();
return new Description(text, Description.PLAIN_TEXT);
} catch (final Exception e) {
return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
final var text = htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", "");
return new Description(text, Description.PLAIN_TEXT);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper;
import org.schabi.newpipe.extractor.stream.Description;

import javax.annotation.Nullable;
import java.util.Objects;
Expand All @@ -24,8 +25,8 @@ public String getCommentId() {
}

@Override
public String getCommentText() {
return json.getString("body");
public Description getCommentText() {
return new Description(json.getString("body"), Description.PLAIN_TEXT);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;

import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;

import com.grack.nanojson.JsonArray;
import com.grack.nanojson.JsonObject;

import org.schabi.newpipe.extractor.Page;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.localization.TimeAgoParser;
import org.schabi.newpipe.extractor.stream.Description;
import org.schabi.newpipe.extractor.utils.JsonUtils;
import org.schabi.newpipe.extractor.utils.Utils;

import javax.annotation.Nullable;

import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;

public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor {

private final JsonObject json;
Expand Down Expand Up @@ -176,18 +176,20 @@ public String getTextualLikeCount() throws ParsingException {
}

@Override
public String getCommentText() throws ParsingException {
public Description getCommentText() throws ParsingException {
try {
final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText");
if (contentText.isEmpty()) {
// completely empty comments as described in
// https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584
return "";
return Description.EMPTY_DESCRIPTION;
}
final String commentText = getTextFromObject(contentText, true);
// YouTube adds U+FEFF in some comments.
// eg. https://www.youtube.com/watch?v=Nj4F63E59io<feff>
return Utils.removeUTF8BOM(commentText);
final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText);

return new Description(commentTextBomRemoved, Description.HTML);
} catch (final Exception e) {
throw new ParsingException("Could not get comment text", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
for (CommentsInfoItem c : comments.getItems()) {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
Expand All @@ -91,7 +91,7 @@ private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String

private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) {
if (c.getCommentText().getContent().contains(comment)) {
return true;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getUploaderName()));
assertFalse(Utils.isBlank(c.getUploaderAvatarUrl()));
assertFalse(Utils.isBlank(c.getCommentId()));
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
assertFalse(Utils.isBlank(c.getName()));
assertFalse(Utils.isBlank(c.getTextualUploadDate()));
assertNotNull(c.getUploadDate());
Expand All @@ -111,7 +111,7 @@ private boolean findInComments(InfoItemsPage<CommentsInfoItem> comments, String

private boolean findInComments(List<CommentsInfoItem> comments, String comment) {
for (CommentsInfoItem c : comments) {
if (c.getCommentText().contains(comment)) {
if (c.getCommentText().getContent().contains(comment)) {
return true;
}
}
Expand Down Expand Up @@ -152,9 +152,9 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text
assertTrue(Utils.isBlank(c.getCommentText()));
assertTrue(Utils.isBlank(c.getCommentText().getContent()));
} else {
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
}
}
}
Expand Down Expand Up @@ -193,7 +193,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
if (c.isHeartedByUploader()) {
heartedByUploader = true;
}
Expand Down Expand Up @@ -233,7 +233,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException {
assertFalse(Utils.isBlank(c.getThumbnailUrl()));
assertFalse(Utils.isBlank(c.getUrl()));
assertTrue(c.getLikeCount() >= 0);
assertFalse(Utils.isBlank(c.getCommentText()));
assertFalse(Utils.isBlank(c.getCommentText().getContent()));
}

assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned");
Expand Down Expand Up @@ -328,7 +328,7 @@ public void testGetCommentsFirstReplies() throws IOException, ExtractionExceptio

InfoItemsPage<CommentsInfoItem> replies = extractor.getPage(firstComment.getReplies());

assertEquals("First", replies.getItems().get(0).getCommentText(),
assertEquals("First", replies.getItems().get(0).getCommentText().getContent(),
"First reply comment did not match");
}

Expand Down