Skip to content

Commit

Permalink
Base Implementation: Parse the upload date of StreamInfoItems
Browse files Browse the repository at this point in the history
In the format '2 days ago' (in English) on a YouTube channel page.
(Parser extensible to other pages.)
  • Loading branch information
wojcik-online authored and mauriciocolli committed Nov 3, 2019
1 parent 514ed7b commit 180836c
Show file tree
Hide file tree
Showing 16 changed files with 316 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import org.schabi.newpipe.extractor.playlist.PlaylistExtractor;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.stream.StreamExtractor;
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
import org.schabi.newpipe.extractor.utils.Localization;

Expand Down Expand Up @@ -222,15 +223,15 @@ public SuggestionExtractor getSuggestionExtractor() {
public ChannelExtractor getChannelExtractor(ListLinkHandler linkHandler) throws ExtractionException {
return getChannelExtractor(linkHandler, NewPipe.getPreferredLocalization());
}

public PlaylistExtractor getPlaylistExtractor(ListLinkHandler linkHandler) throws ExtractionException {
return getPlaylistExtractor(linkHandler, NewPipe.getPreferredLocalization());
}

public StreamExtractor getStreamExtractor(LinkHandler linkHandler) throws ExtractionException {
return getStreamExtractor(linkHandler, NewPipe.getPreferredLocalization());
}

public CommentsExtractor getCommentsExtractor(ListLinkHandler urlIdHandler) throws ExtractionException {
return getCommentsExtractor(urlIdHandler, NewPipe.getPreferredLocalization());
}
Expand Down Expand Up @@ -287,7 +288,7 @@ public PlaylistExtractor getPlaylistExtractor(String url) throws ExtractionExcep
public StreamExtractor getStreamExtractor(String url) throws ExtractionException {
return getStreamExtractor(getStreamLHFactory().fromUrl(url), NewPipe.getPreferredLocalization());
}

public CommentsExtractor getCommentsExtractor(String url) throws ExtractionException {
ListLinkHandlerFactory llhf = getCommentsLHFactory();
if(null == llhf) {
Expand All @@ -296,6 +297,9 @@ public CommentsExtractor getCommentsExtractor(String url) throws ExtractionExcep
return getCommentsExtractor(llhf.fromUrl(url), NewPipe.getPreferredLocalization());
}

public TimeAgoParser getTimeAgoParser() {
return new TimeAgoParser(TimeAgoParser.DEFAULT_AGO_PHRASES);
}

/**
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,23 +79,22 @@ static boolean checkIfHardcodedClientIdIsValid(Downloader dl) throws IOException
return dl.head(apiUrl).getResponseCode() == 200;
}

public static String toDateString(String time) throws ParsingException {
static Date parseDate(String time) throws ParsingException {
try {
Date date;
// Have two date formats, one for the 'api.soundc...' and the other 'api-v2.soundc...'.
return new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time);
} catch (ParseException e1) {
try {
date = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(time);
} catch (Exception e) {
date = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time);
return new SimpleDateFormat("yyyy/MM/dd HH:mm:ss +0000").parse(time);
} catch (ParseException e2) {
throw new ParsingException(e1.getMessage(), e2);
}

SimpleDateFormat newDateFormat = new SimpleDateFormat("yyyy-MM-dd");
return newDateFormat.format(date);
} catch (ParseException e) {
throw new ParsingException(e.getMessage(), e);
}
}

static String toTextualDate(String time) throws ParsingException {
return new SimpleDateFormat("yyyy-MM-dd").format(parseDate(time));
}

/**
* Call the endpoint "/resolve" of the api.<p>
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public String getName() {
@Nonnull
@Override
public String getUploadDate() throws ParsingException {
return SoundcloudParsingHelper.toDateString(track.getString("created_at"));
return SoundcloudParsingHelper.toTextualDate(track.getString("created_at"));
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;

import java.util.Calendar;

import static org.schabi.newpipe.extractor.utils.Utils.replaceHttpWithHttps;

public class SoundcloudStreamInfoItemExtractor implements StreamInfoItemExtractor {
Expand Down Expand Up @@ -41,8 +43,19 @@ public String getUploaderUrl() {
}

@Override
public String getUploadDate() throws ParsingException {
return SoundcloudParsingHelper.toDateString(itemObject.getString("created_at"));
public String getTextualUploadDate() throws ParsingException {
return SoundcloudParsingHelper.toTextualDate(getCreatedAt());
}

@Override
public Calendar getUploadDate() throws ParsingException {
Calendar uploadTime = Calendar.getInstance();
uploadTime.setTime(SoundcloudParsingHelper.parseDate(getCreatedAt()));
return uploadTime;
}

private String getCreatedAt() {
return itemObject.getString("created_at");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
import org.schabi.newpipe.extractor.utils.DonationLinkHelper;
import org.schabi.newpipe.extractor.utils.Localization;
import org.schabi.newpipe.extractor.utils.Parser;
Expand Down Expand Up @@ -53,6 +54,8 @@ public class YoutubeChannelExtractor extends ChannelExtractor {
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";

private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();

private Document doc;

public YoutubeChannelExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
Expand Down Expand Up @@ -230,7 +233,7 @@ private void collectStreamsFrom(StreamInfoItemsCollector collector, Element elem
final String uploaderUrl = getUrl();
for (final Element li : element.children()) {
if (li.select("div[class=\"feed-item-dismissable\"]").first() != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
@Override
public String getUrl() throws ParsingException {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
import org.schabi.newpipe.extractor.utils.Localization;
import org.schabi.newpipe.extractor.utils.Utils;

Expand All @@ -28,6 +29,8 @@
@SuppressWarnings("WeakerAccess")
public class YoutubePlaylistExtractor extends PlaylistExtractor {

private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();

private Document doc;

public YoutubePlaylistExtractor(StreamingService service, ListLinkHandler linkHandler, Localization localization) {
Expand Down Expand Up @@ -192,7 +195,7 @@ private void collectStreamsFrom(@Nonnull StreamInfoItemsCollector collector, @Nu
continue;
}

collector.commit(new YoutubeStreamInfoItemExtractor(li) {
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
public Element uploaderLink;

@Override
Expand Down Expand Up @@ -258,7 +261,7 @@ public String getUploaderUrl() throws ParsingException {
}

@Override
public String getUploadDate() throws ParsingException {
public String getTextualUploadDate() throws ParsingException {
return "";
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
import org.schabi.newpipe.extractor.search.InfoItemsSearchCollector;
import org.schabi.newpipe.extractor.search.SearchExtractor;
import org.schabi.newpipe.extractor.linkhandler.SearchQueryHandler;
Expand Down Expand Up @@ -129,7 +130,7 @@ private InfoItemsSearchCollector collectItems(Document doc) throws NothingFoundE

// video item type
} else if ((el = item.select("div[class*=\"yt-lockup-video\"]").first()) != null) {
collector.commit(new YoutubeStreamInfoItemExtractor(el));
collector.commit(new YoutubeStreamInfoItemExtractor(el, getService().getTimeAgoParser()));
} else if ((el = item.select("div[class*=\"yt-lockup-channel\"]").first()) != null) {
collector.commit(new YoutubeChannelInfoItemExtractor(el));
} else if ((el = item.select("div[class*=\"yt-lockup-playlist\"]").first()) != null &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ public class SubtitlesException extends ContentNotAvailableException {

/*//////////////////////////////////////////////////////////////////////////*/

private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();

private Document doc;
@Nullable
private JsonObject playerArgs;
Expand Down Expand Up @@ -932,7 +934,7 @@ private Map<String, ItagItem> getItags(String streamingDataKey, ItagItem.ItagTyp
* This is encapsulated in a StreamInfoItem object, which is a subset of the fields in a full StreamInfo.
*/
private StreamInfoItemExtractor extractVideoPreviewInfo(final Element li) {
return new YoutubeStreamInfoItemExtractor(li) {
return new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {

@Override
public String getUrl() throws ParsingException {
Expand All @@ -959,7 +961,7 @@ public String getUploaderUrl() throws ParsingException {
}

@Override
public String getUploadDate() throws ParsingException {
public String getTextualUploadDate() throws ParsingException {
return "";
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;

import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeParsingHelper;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.stream.TimeAgoParser;
import org.schabi.newpipe.extractor.utils.Utils;

import javax.annotation.Nullable;
import java.util.Calendar;

/*
* Copyright (C) Christian Schabesberger 2016 <[email protected]>
* YoutubeStreamInfoItemExtractor.java is part of NewPipe.
Expand All @@ -28,9 +33,18 @@
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {

private final Element item;
private final TimeAgoParser timeAgoParser;

private String cachedUploadDate;

public YoutubeStreamInfoItemExtractor(Element item) {
/**
* Creates an extractor of StreamInfoItems from a YouTube page.
* @param item The page element
* @param timeAgoParser A parser of the textual dates or {@code null}.
*/
public YoutubeStreamInfoItemExtractor(Element item, @Nullable TimeAgoParser timeAgoParser) {
this.item = item;
this.timeAgoParser = timeAgoParser;
}

@Override
Expand Down Expand Up @@ -126,20 +140,35 @@ public String getUploaderUrl() throws ParsingException {
}

@Override
public String getUploadDate() throws ParsingException {
public String getTextualUploadDate() throws ParsingException {
if (cachedUploadDate != null) {
return cachedUploadDate;
}

try {
Element meta = item.select("div[class=\"yt-lockup-meta\"]").first();
if (meta == null) return "";

Element li = meta.select("li").first();
if(li == null) return "";
final Elements li = meta.select("li");
if (li.isEmpty()) return "";

return meta.select("li").first().text();
return cachedUploadDate = li.first().text();
} catch (Exception e) {
throw new ParsingException("Could not get upload date", e);
}
}

@Override
public Calendar getUploadDate() throws ParsingException {
String textualUploadDate = getTextualUploadDate();
if (timeAgoParser != null
&& textualUploadDate != null && !"".equals(textualUploadDate)) {
return timeAgoParser.parse(textualUploadDate);
} else {
return null;
}
}

@Override
public long getViewCount() throws ParsingException {
String input;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,15 @@
import org.schabi.newpipe.extractor.stream.StreamInfoItem;
import org.schabi.newpipe.extractor.stream.StreamInfoItemsCollector;
import org.schabi.newpipe.extractor.utils.Localization;
import org.schabi.newpipe.extractor.stream.TimeAgoParser;

import javax.annotation.Nonnull;
import java.io.IOException;

public class YoutubeTrendingExtractor extends KioskExtractor<StreamInfoItem> {

private final TimeAgoParser timeAgoParser = getService().getTimeAgoParser();

private Document doc;

public YoutubeTrendingExtractor(StreamingService service,
Expand Down Expand Up @@ -93,7 +96,7 @@ public InfoItemsPage<StreamInfoItem> getInitialPage() throws ParsingException {
for(Element ul : uls) {
for(final Element li : ul.children()) {
final Element el = li.select("div[class*=\"yt-lockup-dismissable\"]").first();
collector.commit(new YoutubeStreamInfoItemExtractor(li) {
collector.commit(new YoutubeStreamInfoItemExtractor(li, timeAgoParser) {
@Override
public String getUrl() throws ParsingException {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,17 @@

import org.schabi.newpipe.extractor.InfoItem;

import java.util.Calendar;

/**
* Info object for previews of unopened videos, eg search results, related videos
*/
public class StreamInfoItem extends InfoItem {
private final StreamType streamType;

private String uploaderName;
private String uploadDate;
private String textualUploadDate;
private Calendar uploadDate;
private long viewCount = -1;
private long duration = -1;

Expand All @@ -52,14 +55,6 @@ public void setUploaderName(String uploader_name) {
this.uploaderName = uploader_name;
}

public String getUploadDate() {
return uploadDate;
}

public void setUploadDate(String upload_date) {
this.uploadDate = upload_date;
}

public long getViewCount() {
return viewCount;
}
Expand All @@ -84,12 +79,36 @@ public void setUploaderUrl(String uploaderUrl) {
this.uploaderUrl = uploaderUrl;
}

/**
* @return The original textual upload date as returned by the streaming service.
* @see #getUploadDate()
*/
public String getTextualUploadDate() {
return textualUploadDate;
}

public void setTextualUploadDate(String upload_date) {
this.textualUploadDate = upload_date;
}

/**
* @return The (approximated) date and time this item was uploaded or {@code null}.
* @see #getTextualUploadDate()
*/
public Calendar getUploadDate() {
return uploadDate;
}

public void setUploadDate(Calendar uploadDate) {
this.uploadDate = uploadDate;
}

@Override
public String toString() {
return "StreamInfoItem{" +
"streamType=" + streamType +
", uploaderName='" + uploaderName + '\'' +
", uploadDate='" + uploadDate + '\'' +
", textualUploadDate='" + textualUploadDate + '\'' +
", viewCount=" + viewCount +
", duration=" + duration +
", uploaderUrl='" + uploaderUrl + '\'' +
Expand Down
Loading

0 comments on commit 180836c

Please sign in to comment.