Skip to content

Commit

Permalink
Fix YouTube subscriber count
Browse files Browse the repository at this point in the history
Modify test to fail on too small subscriber count
  • Loading branch information
TobiGr committed Sep 16, 2019
1 parent dbdd9ed commit 06016d1
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
public class YoutubeChannelExtractor extends ChannelExtractor {
/*package-private*/ static final String CHANNEL_URL_BASE = "https://www.youtube.com/channel/";
private static final String CHANNEL_FEED_BASE = "https://www.youtube.com/feeds/videos.xml?channel_id=";
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000";
private static final String CHANNEL_URL_PARAMETERS = "/videos?view=0&flow=list&sort=dd&live_view=10000&gl=US&hl=en";

private Document doc;

Expand Down Expand Up @@ -135,10 +135,11 @@ public String getFeedUrl() throws ParsingException {

@Override
public long getSubscriberCount() throws ParsingException {
final Element el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]").first();
final String el = doc.select("span[class*=\"yt-subscription-button-subscriber-count\"]")
.first().attr("title");
if (el != null) {
try {
return Long.parseLong(Utils.removeNonDigitCharacters(el.text()));
return Utils.mixedNumberWordToLong(el);
} catch (NumberFormatException e) {
throw new ParsingException("Could not get subscriber count", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,35 @@ public static String removeNonDigitCharacters(String toRemove) {
return toRemove.replaceAll("\\D+", "");
}

/**
* <p>Convert a mixed number word to a long.</p>
* <p>Examples:</p>
* <ul>
* <li>123 -&gt; 123</li>
* <li>1.23K -&gt; 1230</li>
* <li>1.23M -&gt; 1230000</li>
* </ul>
* @param numberWord string to be converted to a long
* @return a long
* @throws NumberFormatException
* @throws ParsingException
*/
public static long mixedNumberWordToLong(String numberWord) throws NumberFormatException, ParsingException {
String multiplier = "";
try {
multiplier = Parser.matchGroup("[\\d]+([\\.,][\\d]+)?([KMkm])+", numberWord, 2);
} catch(ParsingException ignored) {}
double count = Double.parseDouble(Parser.matchGroup1("([\\d]+([\\.,][\\d]+)?)", numberWord));
switch (multiplier.toUpperCase()) {
case "K":
return (long) (count * 1e3);
case "M":
return (long) (count * 1e6);
default:
return (long) (count);
}
}

/**
* Check if the url matches the pattern.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ public void testFeedUrl() throws Exception {
@Test
public void testSubscriberCount() throws Exception {
assertTrue("Wrong subscriber count", extractor.getSubscriberCount() >= 0);
assertTrue("Subscriber count too small", extractor.getSubscriberCount() >= 4e6);
}
}

Expand Down Expand Up @@ -195,6 +196,7 @@ public void testFeedUrl() throws Exception {
@Test
public void testSubscriberCount() throws Exception {
assertTrue("Wrong subscriber count", extractor.getSubscriberCount() >= 0);
assertTrue("Subscriber count too small", extractor.getSubscriberCount() >= 10e6);
}

}
Expand Down

0 comments on commit 06016d1

Please sign in to comment.