Skip to content

Commit

Permalink
Implement Fapwiz Ripper (#2086)
Browse files Browse the repository at this point in the history
* Implement FapwizRipper
  • Loading branch information
metaprime authored Feb 11, 2025
1 parent 647de8f commit e41eb25
Show file tree
Hide file tree
Showing 2 changed files with 316 additions and 0 deletions.
154 changes: 154 additions & 0 deletions src/main/java/com/rarchives/ripme/ripper/rippers/FapwizRipper.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package com.rarchives.ripme.ripper.rippers;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;

public class FapwizRipper extends AbstractHTMLRipper {

private static final Logger logger = LogManager.getLogger(FapwizRipper.class);

private static final Pattern CATEGORY_PATTERN = Pattern.compile("https?://fapwiz.com/category/([a-zA-Z0-9_-]+)/?$");

private static final Pattern USER_PATTERN = Pattern.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/?$");

// Note that the last part of the pattern can contain unicode emoji which
// get encoded as %-encoded UTF-8 bytes in the URL, so we allow % characters.
private static final Pattern POST_PATTERN = Pattern
.compile("https?://fapwiz.com/([a-zA-Z0-9_-]+)/([a-zA-Z0-9_%-]+)/?$");

public FapwizRipper(URL url) throws IOException {
super(url);
}

@Override
public String getHost() {
return "fapwiz";
}

@Override
public String getDomain() {
return "fapwiz.com";
}

@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m;

m = CATEGORY_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "category_" + m.group(1);
}

m = USER_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "user_" + m.group(1);
}

m = POST_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
return "post_" + m.group(1) + "_" + m.group(2);
}

throw new MalformedURLException("Expected fapwiz URL format: " +
"fapwiz.com/USER or fapwiz.com/USER/POST or " +
"fapwiz.com/CATEGORY - got " + url + " instead");
}

void processUserOrCategoryPage(Document doc, List<String> results) {
// The category page looks a lot like the structure of a user page,
// so processUserPage is written to be compatible with both.
doc.select(".post-items-holder img").forEach(e -> {
String imgSrc = e.attr("src");

// Skip the user profile picture thumbnail insets
if (imgSrc.endsWith("-thumbnail-icon.jpg")) {
return;
}

// Replace -thumbnail.jpg with .mp4
String videoSrc = imgSrc.replace("-thumbnail.jpg", ".mp4");
results.add(videoSrc);
});
}

void processCategoryPage(Document doc, List<String> results) {
logger.info("Processing category page: " + url);
processUserOrCategoryPage(doc, results);
}

void processUserPage(Document doc, List<String> results) {
logger.info("Processing user page: " + url);
processUserOrCategoryPage(doc, results);
}

void processPostPage(Document doc, List<String> results) {
logger.info("Processing post page: " + url);
doc.select("video source").forEach(video -> {
results.add(video.attr("src"));
});
}

@Override
public List<String> getURLsFromPage(Document doc) {
List<String> results = new ArrayList<>();
Matcher m;

m = CATEGORY_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processCategoryPage(doc, results);
}

m = USER_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processUserPage(doc, results);
}

m = POST_PATTERN.matcher(url.toExternalForm());
if (m.matches()) {
processPostPage(doc, results);
}

return results;
}

private Document getDocument(String url, int retries) throws IOException {
return Http.url(url).userAgent(USER_AGENT).retries(retries).get();
}

private Document getDocument(String url) throws IOException {
return getDocument(url, 1);
}

@Override
public Document getNextPage(Document page) throws IOException {
logger.info("Getting next page for url: " + url);
Elements next = page.select("a.next");
if (!next.isEmpty()) {
String href = next.attr("href");
logger.info("Found next page: " + href);
return getDocument(href);
} else {
logger.info("No more pages");
throw new IOException("No more pages.");
}
}

@Override
public void downloadURL(URL url, int index) {
sleep(2000);
addURLToDownload(url, getPrefix(index));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package com.rarchives.ripme.tst.ripper.rippers;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;

import org.jsoup.nodes.Document;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;

import com.rarchives.ripme.ripper.AbstractRipper;
import com.rarchives.ripme.ripper.rippers.FapwizRipper;
import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils;

public class FapwizRipperTest extends RippersTest {
@Test
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
public void testGetNextPage_NoNextPage() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/alison-esha/").toURL();
FapwizRipper ripper = new FapwizRipper(url);

Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
try {
ripper.getNextPage(firstPage);
// If we don't throw, we failed the text because there *was* a next
// page even though there shouldn't be.
Assertions.fail();
} catch (IOException exception) {
Assertions.assertTrue(true);
}
}

@Test
@Tag("flaky") // It seems like fetching the document within the test can be flaky.
public void testGetNextPage_HasNextPage() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/miaipanema/").toURL();
FapwizRipper ripper = new FapwizRipper(url);

Document firstPage = Http.url(url).userAgent(AbstractRipper.USER_AGENT).retries(1).get();
try {
Document doc = ripper.getNextPage(firstPage);
Assertions.assertNotNull(doc);
} catch (IOException exception) {
// We should have found a next page but didn't.
Assertions.fail();
}
}

@Test
public void testRipPost() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

@Test
public void testRipPostWithNumbersInUsername1() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/desperate_bug_7776/lets-be-friends-that-secretly-fuck-thanks/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

@Test
public void testRipPostWithEmojiInShortUrl() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/miaipanema/my-grip-needs-a-name-%f0%9f%a4%ad%f0%9f%91%87%f0%9f%8f%bc/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

@Test
public void testRipPostWithEmojiInLongUrlAtEnd() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/bimeat1998/just-imagine-youre-out-with-your-girl-and-your-buddies-and-then-she-makes-this-move-%f0%9f%98%8d/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

@Test
public void testRipPostWithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);
testRipper(ripper);
}

// TODO Test rip user

// TODO Test rip category

@Test
public void testPostGetGID1_Simple() throws IOException, URISyntaxException {
URL url = new URI("https://fapwiz.com/petiteasiantravels/riding-at-9-months-pregnant/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("post_petiteasiantravels_riding-at-9-months-pregnant", ripper.getGID(url));
}

// Test Post pages GetGID

@Test
public void testPostGetGID2_WithEmojiInLongUrlInTheMiddle() throws IOException, URISyntaxException {
URL url = new URI(
"https://fapwiz.com/miaipanema/new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96/")
.toURL();
FapwizRipper ripper = new FapwizRipper(url);

// In this case the filesystem safe version of the GID is
// "post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-"
// but the GID doesn't truncate and doesn't remove non-filesystem-safe
// characters.
String gid = ripper.getGID(url);
Assertions.assertEquals(
"post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-%f0%9f%92%a6-you-know-where-to-get-it-%f0%9f%94%97%f0%9f%92%96",
gid);

// Test directory name on disk (filesystem safe sanitized as the ripper will
// do).
String directoryName = Utils.filesystemSafe(ripper.getHost() + "_" + gid);
Assertions.assertEquals(
"fapwiz_post_miaipanema_new-pov-couch-sex-with-perfect-cumshot-on-my-ass-f09f92a6-you-know-where-to-",
directoryName);
}

// Test User pages GetGID

@Test
public void testUserGetGID1_Simple() throws IOException, URISyntaxException {
// Test a "simple" username that is all letters.
URL url = new URI("https://fapwiz.com/petiteasiantravels/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_petiteasiantravels", ripper.getGID(url));
}

@Test
public void testUserGetGID2_Numbers() throws IOException, URISyntaxException {
// Test a more complex username that contains numbers.
URL url = new URI("https://fapwiz.com/bimeat1998/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_bimeat1998", ripper.getGID(url));
}

@Test
public void testUserGetGID3_HyphensAndNumbers() throws IOException, URISyntaxException {
// Test a more complex username that contains hyphens and numbers.
URL url = new URI("https://fapwiz.com/used-airport-4076/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_used-airport-4076", ripper.getGID(url));
}

@Test
public void testUserGetGID4_Underscores() throws IOException, URISyntaxException {
// Test a more complex username that contains underscores.
URL url = new URI("https://fapwiz.com/desperate_bug_7776/").toURL();
FapwizRipper ripper = new FapwizRipper(url);
Assertions.assertEquals("user_desperate_bug_7776", ripper.getGID(url));
}
}

0 comments on commit e41eb25

Please sign in to comment.