From b566084cace5b4fd1c628eba280fa8da40b74ef3 Mon Sep 17 00:00:00 2001 From: Kavin <20838718+FireMasterK@users.noreply.github.com> Date: Mon, 28 Nov 2022 00:22:10 +0000 Subject: [PATCH] Use Description object for comments text. --- .../extractor/comments/CommentsInfoItem.java | 7 ++++--- .../comments/CommentsInfoItemExtractor.java | 5 +++-- .../BandcampCommentsInfoItemExtractor.java | 9 ++++++--- .../PeertubeCommentsInfoItemExtractor.java | 9 ++++++--- .../SoundcloudCommentsInfoItemExtractor.java | 5 +++-- .../YoutubeCommentsInfoItemExtractor.java | 16 +++++++++------- .../bandcamp/BandcampCommentsExtractorTest.java | 2 +- .../peertube/PeertubeCommentsExtractorTest.java | 4 ++-- .../youtube/YoutubeCommentsExtractorTest.java | 14 +++++++------- 9 files changed, 41 insertions(+), 30 deletions(-) diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java index 2c1e0dac1b..0752e9b745 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItem.java @@ -3,13 +3,14 @@ import org.schabi.newpipe.extractor.InfoItem; import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.localization.DateWrapper; +import org.schabi.newpipe.extractor.stream.Description; import javax.annotation.Nullable; public class CommentsInfoItem extends InfoItem { private String commentId; - private String commentText; + private Description commentText; private String uploaderName; private String uploaderAvatarUrl; private String uploaderUrl; @@ -43,11 +44,11 @@ public void setCommentId(final String commentId) { this.commentId = commentId; } - public String getCommentText() { + public Description getCommentText() { return commentText; } - public void setCommentText(final String commentText) { + public void setCommentText(final Description commentText) { this.commentText = commentText; } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java index 128235fc1c..695478764f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/comments/CommentsInfoItemExtractor.java @@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.services.youtube.extractors.YoutubeCommentsInfoItemExtractor; +import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.stream.StreamExtractor; import javax.annotation.Nullable; @@ -41,8 +42,8 @@ default String getTextualLikeCount() throws ParsingException { /** * The text of the comment */ - default String getCommentText() throws ParsingException { - return ""; + default Description getCommentText() throws ParsingException { + return Description.EMPTY_DESCRIPTION; } /** diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java index 661f624e95..d931738c9f 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/bandcamp/extractors/BandcampCommentsInfoItemExtractor.java @@ -3,6 +3,7 @@ import org.jsoup.nodes.Element; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; +import org.schabi.newpipe.extractor.stream.Description; import java.util.Objects; @@ -18,7 +19,7 @@ public BandcampCommentsInfoItemExtractor(final Element writing, final String url @Override public String getName() throws ParsingException { - return getCommentText(); + return getCommentText().getContent(); } @Override @@ -32,12 +33,14 @@ public String getThumbnailUrl() throws ParsingException { } @Override - public String getCommentText() throws ParsingException { - return writing.getElementsByClass("text").stream() + public Description getCommentText() throws ParsingException { + final var text = writing.getElementsByClass("text").stream() .filter(Objects::nonNull) .map(Element::ownText) .findFirst() .orElseThrow(() -> new ParsingException("Could not get comment text")); + + return new Description(text, Description.PLAIN_TEXT); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java index 1bdc3bed63..8ceb913175 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/peertube/extractors/PeertubeCommentsInfoItemExtractor.java @@ -9,6 +9,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.services.peertube.PeertubeParsingHelper; +import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.utils.JsonUtils; import java.util.Objects; @@ -59,13 +60,15 @@ public DateWrapper getUploadDate() throws ParsingException { } @Override - public String getCommentText() throws ParsingException { + public Description getCommentText() throws ParsingException { final String htmlText = JsonUtils.getString(item, "text"); try { final Document doc = Jsoup.parse(htmlText); - return doc.body().text(); + final var text = doc.body().text(); + return new Description(text, Description.PLAIN_TEXT); } catch (final Exception e) { - return htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", ""); + final var text = htmlText.replaceAll("(?s)<[^>]*>(\\s*<[^>]*>)*", ""); + return new Description(text, Description.PLAIN_TEXT); } } diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java index 94497f7dfa..ec3f353e62 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/extractors/SoundcloudCommentsInfoItemExtractor.java @@ -5,6 +5,7 @@ import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.services.soundcloud.SoundcloudParsingHelper; +import org.schabi.newpipe.extractor.stream.Description; import javax.annotation.Nullable; import java.util.Objects; @@ -24,8 +25,8 @@ public String getCommentId() { } @Override - public String getCommentText() { - return json.getString("body"); + public Description getCommentText() { + return new Description(json.getString("body"), Description.PLAIN_TEXT); } @Override diff --git a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java index 81a3fdd917..95209a65fa 100644 --- a/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java +++ b/extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeCommentsInfoItemExtractor.java @@ -1,21 +1,21 @@ package org.schabi.newpipe.extractor.services.youtube.extractors; -import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT; -import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; - import com.grack.nanojson.JsonArray; import com.grack.nanojson.JsonObject; - import org.schabi.newpipe.extractor.Page; import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor; import org.schabi.newpipe.extractor.exceptions.ParsingException; import org.schabi.newpipe.extractor.localization.DateWrapper; import org.schabi.newpipe.extractor.localization.TimeAgoParser; +import org.schabi.newpipe.extractor.stream.Description; import org.schabi.newpipe.extractor.utils.JsonUtils; import org.schabi.newpipe.extractor.utils.Utils; import javax.annotation.Nullable; +import static org.schabi.newpipe.extractor.comments.CommentsInfoItem.UNKNOWN_REPLY_COUNT; +import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject; + public class YoutubeCommentsInfoItemExtractor implements CommentsInfoItemExtractor { private final JsonObject json; @@ -176,18 +176,20 @@ public String getTextualLikeCount() throws ParsingException { } @Override - public String getCommentText() throws ParsingException { + public Description getCommentText() throws ParsingException { try { final JsonObject contentText = JsonUtils.getObject(getCommentRenderer(), "contentText"); if (contentText.isEmpty()) { // completely empty comments as described in // https://github.com/TeamNewPipe/NewPipeExtractor/issues/380#issuecomment-668808584 - return ""; + return Description.EMPTY_DESCRIPTION; } final String commentText = getTextFromObject(contentText, true); // YouTube adds U+FEFF in some comments. // eg. https://www.youtube.com/watch?v=Nj4F63E59io - return Utils.removeUTF8BOM(commentText); + final String commentTextBomRemoved = Utils.removeUTF8BOM(commentText); + + return new Description(commentTextBomRemoved, Description.HTML); } catch (final Exception e) { throw new ParsingException("Could not get comment text", e); } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java index 4a8fd97198..3d40d934f7 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/bandcamp/BandcampCommentsExtractorTest.java @@ -42,7 +42,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException { for (CommentsInfoItem c : comments.getItems()) { assertFalse(Utils.isBlank(c.getUploaderName())); assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); - assertFalse(Utils.isBlank(c.getCommentText())); + assertFalse(Utils.isBlank(c.getCommentText().getContent())); assertFalse(Utils.isBlank(c.getName())); assertFalse(Utils.isBlank(c.getThumbnailUrl())); assertFalse(Utils.isBlank(c.getUrl())); diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java index 2bcd4f5944..72382813aa 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/peertube/PeertubeCommentsExtractorTest.java @@ -75,7 +75,7 @@ void testGetCommentsAllData() throws IOException, ExtractionException { assertFalse(Utils.isBlank(c.getUploaderName())); assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); assertFalse(Utils.isBlank(c.getCommentId())); - assertFalse(Utils.isBlank(c.getCommentText())); + assertFalse(Utils.isBlank(c.getCommentText().getContent())); assertFalse(Utils.isBlank(c.getName())); assertFalse(Utils.isBlank(c.getTextualUploadDate())); assertFalse(Utils.isBlank(c.getThumbnailUrl())); @@ -91,7 +91,7 @@ private boolean findInComments(InfoItemsPage comments, String private boolean findInComments(List comments, String comment) { for (CommentsInfoItem c : comments) { - if (c.getCommentText().contains(comment)) { + if (c.getCommentText().getContent().contains(comment)) { return true; } } diff --git a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java index 24c34b1d65..4eccc944ba 100644 --- a/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java +++ b/extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeCommentsExtractorTest.java @@ -95,7 +95,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException { assertFalse(Utils.isBlank(c.getUploaderName())); assertFalse(Utils.isBlank(c.getUploaderAvatarUrl())); assertFalse(Utils.isBlank(c.getCommentId())); - assertFalse(Utils.isBlank(c.getCommentText())); + assertFalse(Utils.isBlank(c.getCommentText().getContent())); assertFalse(Utils.isBlank(c.getName())); assertFalse(Utils.isBlank(c.getTextualUploadDate())); assertNotNull(c.getUploadDate()); @@ -111,7 +111,7 @@ private boolean findInComments(InfoItemsPage comments, String private boolean findInComments(List comments, String comment) { for (CommentsInfoItem c : comments) { - if (c.getCommentText().contains(comment)) { + if (c.getCommentText().getContent().contains(comment)) { return true; } } @@ -152,9 +152,9 @@ public void testGetCommentsAllData() throws IOException, ExtractionException { assertFalse(Utils.isBlank(c.getUrl())); assertTrue(c.getLikeCount() >= 0); if (c.getCommentId().equals("Ugga_h1-EXdHB3gCoAEC")) { // comment without text - assertTrue(Utils.isBlank(c.getCommentText())); + assertTrue(Utils.isBlank(c.getCommentText().getContent())); } else { - assertFalse(Utils.isBlank(c.getCommentText())); + assertFalse(Utils.isBlank(c.getCommentText().getContent())); } } } @@ -193,7 +193,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException { assertFalse(Utils.isBlank(c.getThumbnailUrl())); assertFalse(Utils.isBlank(c.getUrl())); assertTrue(c.getLikeCount() >= 0); - assertFalse(Utils.isBlank(c.getCommentText())); + assertFalse(Utils.isBlank(c.getCommentText().getContent())); if (c.isHeartedByUploader()) { heartedByUploader = true; } @@ -233,7 +233,7 @@ public void testGetCommentsAllData() throws IOException, ExtractionException { assertFalse(Utils.isBlank(c.getThumbnailUrl())); assertFalse(Utils.isBlank(c.getUrl())); assertTrue(c.getLikeCount() >= 0); - assertFalse(Utils.isBlank(c.getCommentText())); + assertFalse(Utils.isBlank(c.getCommentText().getContent())); } assertTrue(comments.getItems().get(0).isPinned(), "First comment isn't pinned"); @@ -328,7 +328,7 @@ public void testGetCommentsFirstReplies() throws IOException, ExtractionExceptio InfoItemsPage replies = extractor.getPage(firstComment.getReplies()); - assertEquals("First", replies.getItems().get(0).getCommentText(), + assertEquals("First", replies.getItems().get(0).getCommentText().getContent(), "First reply comment did not match"); }