-
Notifications
You must be signed in to change notification settings - Fork 631
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #74 from metaprime/pornhub
Added Pornhub album ripper
- Loading branch information
Showing
1 changed file
with
185 additions
and
0 deletions.
There are no files selected for viewing
185 changes: 185 additions & 0 deletions
185
src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
package com.rarchives.ripme.ripper.rippers; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
|
||
import com.rarchives.ripme.ripper.AlbumRipper; | ||
import com.rarchives.ripme.ripper.DownloadThreadPool; | ||
import com.rarchives.ripme.ui.RipStatusMessage.STATUS; | ||
import com.rarchives.ripme.utils.Utils; | ||
|
||
public class PornhubRipper extends AlbumRipper { | ||
// All sleep times are in milliseconds | ||
private static final int IMAGE_SLEEP_TIME = 1 * 1000; | ||
private static final int TIMEOUT = 5 * 1000; | ||
|
||
private static final String DOMAIN = "pornhub.com", HOST = "Pornhub"; | ||
|
||
// Thread pool for finding direct image links from "image" pages (html) | ||
private DownloadThreadPool pornhubThreadPool = new DownloadThreadPool("pornhub"); | ||
|
||
// Current HTML document | ||
private Document albumDoc = null; | ||
|
||
public PornhubRipper(URL url) throws IOException { | ||
super(url); | ||
} | ||
|
||
@Override | ||
public String getHost() { | ||
return HOST; | ||
} | ||
|
||
public URL sanitizeURL(URL url) throws MalformedURLException { | ||
return url; | ||
} | ||
|
||
public String getAlbumTitle(URL url) throws MalformedURLException { | ||
try { | ||
// Attempt to use album title as GID | ||
if (albumDoc == null) { | ||
logger.info(" Retrieving " + url.toExternalForm()); | ||
sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); | ||
albumDoc = Jsoup.connect(url.toExternalForm()) | ||
.userAgent(USER_AGENT) | ||
.timeout(TIMEOUT) | ||
.get(); | ||
} | ||
Elements elems = albumDoc.select(".photoAlbumTitleV2"); | ||
return HOST + "_" + elems.get(0).text(); | ||
} catch (Exception e) { | ||
// Fall back to default album naming convention | ||
logger.warn("Failed to get album title from " + url, e); | ||
} | ||
return super.getAlbumTitle(url); | ||
} | ||
|
||
@Override | ||
public String getGID(URL url) throws MalformedURLException { | ||
Pattern p; | ||
Matcher m; | ||
|
||
p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+)$"); | ||
m = p.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
return m.group(1); | ||
} | ||
|
||
throw new MalformedURLException( | ||
"Expected pornhub.com album format: " | ||
+ "http://www.pornhub.com/album/####" | ||
+ " Got: " + url); | ||
} | ||
|
||
@Override | ||
public void rip() throws IOException { | ||
int index = 0, retries = 3; | ||
String nextUrl = this.url.toExternalForm(); | ||
|
||
if (isStopped()) { | ||
return; | ||
} | ||
|
||
if (albumDoc == null) { | ||
logger.info(" Retrieving album page " + nextUrl); | ||
sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); | ||
albumDoc = Jsoup.connect(nextUrl) | ||
.userAgent(USER_AGENT) | ||
.timeout(TIMEOUT) | ||
.referrer(this.url.toExternalForm()) | ||
.get(); | ||
} | ||
|
||
// Find thumbnails | ||
Elements thumbs = albumDoc.select(".photoBlockBox li"); | ||
if (thumbs.size() == 0) { | ||
logger.info("albumDoc: " + albumDoc); | ||
logger.info("No images found at " + nextUrl); | ||
return; | ||
} | ||
|
||
// Iterate over images on page | ||
for (Element thumb : thumbs) { | ||
if (isStopped()) { | ||
break; | ||
} | ||
index++; | ||
String imagePageUrl = thumb.select(".photoAlbumListBlock > a").first().attr("href"); | ||
URL imagePage = new URL(url, imagePageUrl); | ||
PornhubImageThread t = new PornhubImageThread(imagePage, index, this.workingDir); | ||
pornhubThreadPool.addThread(t); | ||
try { | ||
Thread.sleep(IMAGE_SLEEP_TIME); | ||
} catch (InterruptedException e) { | ||
logger.warn("Interrupted while waiting to load next image", e); | ||
} | ||
} | ||
|
||
waitForThreads(); | ||
} | ||
|
||
public boolean canRip(URL url) { | ||
return url.getHost().endsWith(DOMAIN); | ||
} | ||
|
||
/** | ||
* Helper class to find and download images found on "image" pages | ||
* | ||
* Handles case when site has IP-banned the user. | ||
*/ | ||
private class PornhubImageThread extends Thread { | ||
private URL url; | ||
private int index; | ||
private File workingDir; | ||
private int retries = 3; | ||
|
||
public PornhubImageThread(URL url, int index, File workingDir) { | ||
super(); | ||
this.url = url; | ||
this.index = index; | ||
this.workingDir = workingDir; | ||
} | ||
|
||
@Override | ||
public void run() { | ||
fetchImage(); | ||
} | ||
|
||
private void fetchImage() { | ||
try { | ||
Document doc = Jsoup.connect(this.url.toExternalForm()) | ||
.userAgent(USER_AGENT) | ||
.timeout(TIMEOUT) | ||
.referrer(this.url.toExternalForm()) | ||
.get(); | ||
|
||
// Find image | ||
Elements images = doc.select("#photoImageSection img"); | ||
Element image = images.first(); | ||
String imgsrc = image.attr("src"); | ||
logger.info("Found URL " + imgsrc + " via " + images.get(0)); | ||
|
||
// Provide prefix and let the AbstractRipper "guess" the filename | ||
String prefix = ""; | ||
if (Utils.getConfigBoolean("download.save_order", true)) { | ||
prefix = String.format("%03d_", index); | ||
} | ||
|
||
URL imgurl = new URL(url, imgsrc); | ||
addURLToDownload(imgurl, prefix); | ||
|
||
} catch (IOException e) { | ||
logger.error("[!] Exception while loading/parsing " + this.url, e); | ||
} | ||
} | ||
} | ||
} |