-
Notifications
You must be signed in to change notification settings - Fork 202
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #455 from cyian-1756/chevereto
Added a ripper for Chevereto
- Loading branch information
Showing
1 changed file
with
123 additions
and
0 deletions.
There are no files selected for viewing
123 changes: 123 additions & 0 deletions
123
src/main/java/com/rarchives/ripme/ripper/rippers/CheveretoRipper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package com.rarchives.ripme.ripper.rippers; | ||
|
||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
|
||
import com.rarchives.ripme.ripper.AbstractHTMLRipper; | ||
import com.rarchives.ripme.utils.Http; | ||
|
||
public class CheveretoRipper extends AbstractHTMLRipper { | ||
|
||
public CheveretoRipper(URL url) throws IOException { | ||
super(url); | ||
} | ||
|
||
public static List<String> explicit_domains_1 = Arrays.asList("hushpix.com", "tag-fox.com"); | ||
@Override | ||
public String getHost() { | ||
String host = url.toExternalForm().split("/")[2]; | ||
return host; | ||
} | ||
|
||
@Override | ||
public String getDomain() { | ||
String host = url.toExternalForm().split("/")[2]; | ||
return host; | ||
} | ||
|
||
@Override | ||
public boolean canRip(URL url) { | ||
String url_name = url.toExternalForm(); | ||
if (explicit_domains_1.contains(url_name.split("/")[2]) == true) { | ||
Pattern pa = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$"); | ||
Matcher ma = pa.matcher(url.toExternalForm()); | ||
if (ma.matches()) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
@Override | ||
public String getAlbumTitle(URL url) throws MalformedURLException { | ||
try { | ||
// Attempt to use album title as GID | ||
Element titleElement = getFirstPage().select("meta[property=og:title]").first(); | ||
String title = titleElement.attr("content"); | ||
title = title.substring(title.lastIndexOf('/') + 1); | ||
return getHost() + "_" + title.trim(); | ||
} catch (IOException e) { | ||
// Fall back to default album naming convention | ||
logger.info("Unable to find title at " + url); | ||
} | ||
return super.getAlbumTitle(url); | ||
} | ||
|
||
|
||
@Override | ||
public String getGID(URL url) throws MalformedURLException { | ||
Pattern p = Pattern.compile("(?:https?://)?(?:www\\.)?[a-z1-9-]*\\.[a-z1-9]*/album/([a-zA-Z1-9]*)/?$"); | ||
Matcher m = p.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
return m.group(1); | ||
} | ||
throw new MalformedURLException("Expected chevereto URL format: " + | ||
"site.domain/album/albumName or site.domain/username/albums- got " + url + " instead"); | ||
} | ||
|
||
@Override | ||
public Document getFirstPage() throws IOException { | ||
// "url" is an instance field of the superclass | ||
return Http.url(url).get(); | ||
} | ||
|
||
@Override | ||
public Document getNextPage(Document doc) throws IOException { | ||
// Find next page | ||
String nextUrl = ""; | ||
// We use comic-nav-next to the find the next page | ||
Element elem = doc.select("li.pagination-next > a").first(); | ||
if (elem == null) { | ||
throw new IOException("No more pages"); | ||
} | ||
String nextPage = elem.attr("href"); | ||
// Some times this returns a empty string | ||
// This for stops that | ||
if (nextPage == "") { | ||
return null; | ||
} | ||
else { | ||
return Http.url(nextPage).get(); | ||
} | ||
} | ||
|
||
@Override | ||
public List<String> getURLsFromPage(Document doc) { | ||
List<String> result = new ArrayList<String>(); | ||
for (Element el : doc.select("a.image-container > img")) { | ||
String imageSource = el.attr("src"); | ||
// We remove the .md from images so we download the full size image | ||
// not the medium ones | ||
imageSource = imageSource.replace(".md", ""); | ||
result.add(imageSource); | ||
} | ||
return result; | ||
} | ||
|
||
@Override | ||
public void downloadURL(URL url, int index) { | ||
addURLToDownload(url, getPrefix(index)); | ||
} | ||
|
||
|
||
} |