-
Notifications
You must be signed in to change notification settings - Fork 631
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Implement FapwizRipper
- Loading branch information
Showing
2 changed files
with
316 additions
and
0 deletions.
There are no files selected for viewing
154 changes: 154 additions & 0 deletions
154
src/main/java/com/rarchives/ripme/ripper/rippers/FapwizRipper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
package com.rarchives.ripme.ripper.rippers; | ||
|
||
import java.io.IOException; | ||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
import org.apache.logging.log4j.LogManager; | ||
import org.apache.logging.log4j.Logger; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.select.Elements; | ||
|
||
import com.rarchives.ripme.ripper.AbstractHTMLRipper; | ||
import com.rarchives.ripme.utils.Http; | ||
|
||
public class FapwizRipper extends AbstractHTMLRipper { | ||
|
||
private static final Logger logger = LogManager.getLogger(FapwizRipper.class); | ||
|
||
private static final Pattern CATEGORY_PATTERN = Pattern.compile("https?://fapwiz.com/category/([a-zA-Z0-9_-]+)/?$"); | ||
|
||
private static final Pattern USER_PATTERN = Pattern.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/?$"); | ||
|
||
// Note that the last part of the pattern can contain unicode emoji which | ||
// get encoded as %-encoded UTF-8 bytes in the URL, so we allow % characters. | ||
private static final Pattern POST_PATTERN = Pattern | ||
.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/([a-zA-Z0-9_%-]+)/?$"); | ||
|
||
public FapwizRipper(URL url) throws IOException { | ||
super(url); | ||
} | ||
|
||
@Override | ||
public String getHost() { | ||
return "fapwiz"; | ||
} | ||
|
||
@Override | ||
public String getDomain() { | ||
return "fapwiz.com"; | ||
} | ||
|
||
@Override | ||
public String getGID(URL url) throws MalformedURLException { | ||
Matcher m; | ||
|
||
m = CATEGORY_PATTERN.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
return "category_" + m.group(1); | ||
} | ||
|
||
m = USER_PATTERN.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
return "user_" + m.group(1); | ||
} | ||
|
||
m = POST_PATTERN.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
return "post_" + m.group(1) + "_" + m.group(2); | ||
} | ||
|
||
throw new MalformedURLException("Expected fapwiz URL format: " + | ||
"fapwiz.com/USER or fapwiz.com/USER/POST or " + | ||
"fapwiz.com/CATEGORY - got " + url + " instead"); | ||
} | ||
|
||
void processUserOrCategoryPage(Document doc, List<String> results) { | ||
// The category page looks a lot like the structure of a user page, | ||
// so processUserPage is written to be compatible with both. | ||
doc.select(".post-items-holder img").forEach(e -> { | ||
String imgSrc = e.attr("src"); | ||
|
||
// Skip the user profile picture thumbnail insets | ||
if (imgSrc.endsWith("-thumbnail-icon.jpg")) { | ||
return; | ||
} | ||
|
||
// Replace -thumbnail.jpg with .mp4 | ||
String videoSrc = imgSrc.replace("-thumbnail.jpg", ".mp4"); | ||
results.add(videoSrc); | ||
}); | ||
} | ||
|
||
void processCategoryPage(Document doc, List<String> results) { | ||
logger.info("Processing category page: " + url); | ||
processUserOrCategoryPage(doc, results); | ||
} | ||
|
||
void processUserPage(Document doc, List<String> results) { | ||
logger.info("Processing user page: " + url); | ||
processUserOrCategoryPage(doc, results); | ||
} | ||
|
||
void processPostPage(Document doc, List<String> results) { | ||
logger.info("Processing post page: " + url); | ||
doc.select("video source").forEach(video -> { | ||
results.add(video.attr("src")); | ||
}); | ||
} | ||
|
||
@Override | ||
public List<String> getURLsFromPage(Document doc) { | ||
List<String> results = new ArrayList<>(); | ||
Matcher m; | ||
|
||
m = CATEGORY_PATTERN.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
processCategoryPage(doc, results); | ||
} | ||
|
||
m = USER_PATTERN.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
processUserPage(doc, results); | ||
} | ||
|
||
m = POST_PATTERN.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
processPostPage(doc, results); | ||
} | ||
|
||
return results; | ||
} | ||
|
||
private Document getDocument(String url, int retries) throws IOException { | ||
return Http.url(url).userAgent(USER_AGENT).retries(retries).get(); | ||
} | ||
|
||
private Document getDocument(String url) throws IOException { | ||
return getDocument(url, 1); | ||
} | ||
|
||
@Override | ||
public Document getNextPage(Document page) throws IOException { | ||
logger.info("Getting next page for url: " + url); | ||
Elements next = page.select("a.next"); | ||
if (!next.isEmpty()) { | ||
String href = next.attr("href"); | ||
logger.info("Found next page: " + href); | ||
return getDocument(href); | ||
} else { | ||
logger.info("No more pages"); | ||
throw new IOException("No more pages."); | ||
} | ||
} | ||
|
||
@Override | ||
public void downloadURL(URL url, int index) { | ||
sleep(2000); | ||
addURLToDownload(url, getPrefix(index)); | ||
} | ||
} |
162 changes: 162 additions & 0 deletions
162
src/test/java/com/rarchives/ripme/tst/ripper/rippers/FapwizRipperTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
package com.rarchives.ripme.tst.ripper.rippers; | ||
|
||
import java.io.IOException; | ||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
|
||
import org.jsoup.nodes.Document; | ||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.api.Tag; | ||
import org.junit.jupiter.api.Test; | ||
|
||
import com.rarchives.ripme.ripper.AbstractRipper; | ||
import com.rarchives.ripme.ripper.rippers.FapwizRipper; | ||
import com.rarchives.ripme.utils.Http; | ||
import com.rarchives.ripme.utils.Utils; | ||
|
||
public class FapwizRipperTest extends RippersTest { | ||
@Test | ||
@Tag("flaky") // It seems like fetching the document within the test can be flaky. | ||
public void testGetNextPage_NoNextPage() throws IOException, URISyntaxException { | ||
URL url = new URI("https://fapwiz.com/alison-esha/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
|
||
Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get(); | ||
try { | ||
ripper.getNextPage(firstPage); | ||
// If we don't throw, we failed the text because there *was* a next | ||
// page even though there shouldn't be. | ||
Assertions.fail(); | ||
} catch (IOException exception) { | ||
Assertions.assertTrue(true); | ||
} | ||
} | ||
|
||
@Test | ||
@Tag("flaky") // It seems like fetching the document within the test can be flaky. | ||
public void testGetNextPage_HasNextPage() throws IOException, URISyntaxException { | ||
URL url = new URI("https://fapwiz.com/miaipanema/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
|
||
Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get(); | ||
try { | ||
Document doc = ripper.getNextPage(firstPage); | ||
Assertions.assertNotNull(doc); | ||
} catch (IOException exception) { | ||
// We should have found a next page but didn't. | ||
Assertions.fail(); | ||
} | ||
} | ||
|
||
@Test | ||
public void testRipPost() throws IOException, URISyntaxException { | ||
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
testRipper(ripper); | ||
} | ||
|
||
@Test | ||
public void testRipPostWithNumbersInUsername1() throws IOException, URISyntaxException { | ||
URL url = new URI("https://fapwiz.com/desperate_bug_7776/lets-be-friends-that-secretly-fuck-thanks/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
testRipper(ripper); | ||
} | ||
|
||
@Test | ||
public void testRipPostWithEmojiInShortUrl() throws IOException, URISyntaxException { | ||
URL url = new URI("https://fapwiz.com/miaipanema/my-grip-needs-a-name-%f0%9f%a4%ad%f0%9f%91%87%f0%9f%8f%bc/") | ||
.toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
testRipper(ripper); | ||
} | ||
|
||
@Test | ||
public void testRipPostWithEmojiInLongUrlAtEnd() throws IOException, URISyntaxException { | ||
URL url = new URI( | ||
"https://fapwiz.com/bimeat1998/just-imagine-youre-out-with-your-girl-and-your-buddies-and-then-she-makes-this-move-%f0%9f%98%8d/") | ||
.toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
testRipper(ripper); | ||
} | ||
|
||
@Test | ||
public void testRipPostWithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException { | ||
URL url = new URI( | ||
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/") | ||
.toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
testRipper(ripper); | ||
} | ||
|
||
// TODO Test rip user | ||
|
||
// TODO Test rip category | ||
|
||
@Test | ||
public void testPostGetGID1_Simple() throws IOException, URISyntaxException { | ||
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
Assertions.assertEquals("post_petiteasiantravels_riding-at-9-months-pregnant", ripper.getGID(url)); | ||
} | ||
|
||
// Test Post pages GetGID | ||
|
||
@Test | ||
public void testPostGetGID2_WithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException { | ||
URL url = new URI( | ||
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/") | ||
.toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
|
||
// In this case the filesystem safe version of the GID is | ||
// "post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-" | ||
// but the GID doesn't truncate and doesn't remove non-filesystem-safe | ||
// characters. | ||
String gid = ripper.getGID(url); | ||
Assertions.assertEquals( | ||
"post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96", | ||
gid); | ||
|
||
// Test directory name on disk (filesystem safe sanitized as the ripper will | ||
// do). | ||
String directoryName = Utils.filesystemSafe(ripper.getHost() + "_" + gid); | ||
Assertions.assertEquals( | ||
"fapwiz_post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-", | ||
directoryName); | ||
} | ||
|
||
// Test User pages GetGID | ||
|
||
@Test | ||
public void testUserGetGID1_Simple() throws IOException, URISyntaxException { | ||
// Test a "simple" username that is all letters. | ||
URL url = new URI("https://fapwiz.com/petiteasiantravels/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
Assertions.assertEquals("user_petiteasiantravels", ripper.getGID(url)); | ||
} | ||
|
||
@Test | ||
public void testUserGetGID2_Numbers() throws IOException, URISyntaxException { | ||
// Test a more complex username that contains numbers. | ||
URL url = new URI("https://fapwiz.com/bimeat1998/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
Assertions.assertEquals("user_bimeat1998", ripper.getGID(url)); | ||
} | ||
|
||
@Test | ||
public void testUserGetGID3_HyphensAndNumbers() throws IOException, URISyntaxException { | ||
// Test a more complex username that contains hyphens and numbers. | ||
URL url = new URI("https://fapwiz.com/used-airport-4076/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
Assertions.assertEquals("user_used-airport-4076", ripper.getGID(url)); | ||
} | ||
|
||
@Test | ||
public void testUserGetGID4_Underscores() throws IOException, URISyntaxException { | ||
// Test a more complex username that contains underscores. | ||
URL url = new URI("https://fapwiz.com/desperate_bug_7776/").toURL(); | ||
FapwizRipper ripper = new FapwizRipper(url); | ||
Assertions.assertEquals("user_desperate_bug_7776", ripper.getGID(url)); | ||
} | ||
} |