From 128d384d29e509ec7942bf07e8a422d47b208d02 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 15 Jun 2017 15:21:08 -0400 Subject: [PATCH] Eroshare can now rip profiles (Fixes #465) (#578) --- .../ripme/ripper/rippers/EroShareRipper.java | 112 ++++++++++++++---- 1 file changed, 92 insertions(+), 20 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java index 10d848fb4..4ef8c1e11 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EroShareRipper.java @@ -9,6 +9,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -17,8 +18,10 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.jsoup.Connection.Method; import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; /** @@ -27,6 +30,7 @@ */ public class EroShareRipper extends AbstractHTMLRipper { + public EroShareRipper (URL url) throws IOException { super(url); } @@ -42,33 +46,73 @@ public String getHost() { } @Override - public void downloadURL(URL url, int index) { + public void downloadURL(URL url, int index){ addURLToDownload(url); } + @Override + public boolean canRip(URL url) { + Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return true; + } + + Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); + Matcher ma = pa.matcher(url.toExternalForm()); + if (ma.matches()) { + return true; + } + return false; + } + + public boolean is_profile(URL url) { + Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); + Matcher ma = pa.matcher(url.toExternalForm()); + if (ma.matches()) { + return true; + } + return false; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // Find next page + String nextUrl = ""; + Element elem = doc.select("li.next > a").first(); + logger.info(elem); + nextUrl = elem.attr("href"); + if (nextUrl == "") { + throw new IOException("No more pages"); + } + return Http.url("https://eroshare.com" + nextUrl).get(); + } @Override public String getAlbumTitle(URL url) throws MalformedURLException { - try { - // Attempt to use album title as GID - Element titleElement = getFirstPage().select("meta[property=og:title]").first(); - String title = titleElement.attr("content"); - title = title.substring(title.lastIndexOf('/') + 1); - return getHost() + "_" + getGID(url) + "_" + title.trim(); - } catch (IOException e) { - // Fall back to default album naming convention - logger.info("Unable to find title at " + url); + if (is_profile(url) == false) { + try { + // Attempt to use album title as GID + Element titleElement = getFirstPage().select("meta[property=og:title]").first(); + String title = titleElement.attr("content"); + title = title.substring(title.lastIndexOf('/') + 1); + return getHost() + "_" + getGID(url) + "_" + title.trim(); + } catch (IOException e) { + // Fall back to default album naming convention + logger.info("Unable to find title at " + url); + } + return super.getAlbumTitle(url); } - return super.getAlbumTitle(url); + return url.toExternalForm().split("/u/")[1]; } @Override - public List getURLsFromPage(Document doc) { + public List getURLsFromPage(Document doc){ List URLs = new ArrayList(); //Pictures Elements imgs = doc.getElementsByTag("img"); - for (Element img : imgs) { - if (img.hasClass("album-image")) { + for (Element img : imgs){ + if (img.hasClass("album-image")){ String imageURL = img.attr("src"); imageURL = "https:" + imageURL; URLs.add(imageURL); @@ -76,13 +120,33 @@ public List getURLsFromPage(Document doc) { } //Videos Elements vids = doc.getElementsByTag("video"); - for (Element vid : vids) { - if (vid.hasClass("album-video")) { + for (Element vid : vids){ + if (vid.hasClass("album-video")){ Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); URLs.add(videoURL); } } + // Profile videos + Elements links = doc.select("div.item-container > a.item"); + for (Element link : links){ + Document video_page; + try { + video_page = Http.url("https://eroshare.com" + link.attr("href")).get(); + } catch(IOException e) { + logger.warn("Failed to log link in Jsoup"); + video_page = null; + e.printStackTrace(); + } + Elements profile_vids = video_page.getElementsByTag("video"); + for (Element vid : profile_vids){ + if (vid.hasClass("album-video")){ + Elements source = vid.getElementsByTag("source"); + String videoURL = source.first().attr("src"); + URLs.add(videoURL); + } + } + } return URLs; } @@ -105,6 +169,13 @@ public String getGID(URL url) throws MalformedURLException { if (m.matches()) { return m.group(1); } + + Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); + Matcher ma = pa.matcher(url.toExternalForm()); + if (ma.matches()) { + return m.group(1) + "_profile"; + } + throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album"); } @@ -119,8 +190,8 @@ public static List getURLs(URL url) throws IOException{ List URLs = new ArrayList(); //Pictures Elements imgs = doc.getElementsByTag("img"); - for (Element img : imgs) { - if (img.hasClass("album-image")) { + for (Element img : imgs){ + if (img.hasClass("album-image")){ String imageURL = img.attr("src"); imageURL = "https:" + imageURL; URLs.add(new URL(imageURL)); @@ -128,8 +199,8 @@ public static List getURLs(URL url) throws IOException{ } //Videos Elements vids = doc.getElementsByTag("video"); - for (Element vid : vids) { - if (vid.hasClass("album-video")) { + for (Element vid : vids){ + if (vid.hasClass("album-video")){ Elements source = vid.getElementsByTag("source"); String videoURL = source.first().attr("src"); URLs.add(new URL(videoURL)); @@ -139,3 +210,4 @@ public static List getURLs(URL url) throws IOException{ return URLs; } } +