Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eroshareripper now uses mirror for data #29

Merged
merged 3 commits into from
Aug 10, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -51,22 +51,34 @@ public void downloadURL(URL url, int index) {
}
@Override
public boolean canRip(URL url) {
Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
Pattern p = Pattern.compile("^https?://eroshae.com/([a-zA-Z0-9\\-_]+)/?$");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably stick to allowing optional www (which is what the [w.]*was accomplishing although(www[.])?` documents the intent better and would likely be more efficient under the regex engine.

Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return true;
}

Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
return true;
}

Pattern p_eroshare = Pattern.compile("^https?://eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
Matcher m_eroshare = p_eroshare.matcher(url.toExternalForm());
if (m_eroshare.matches()) {
return true;
}

Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm());
if (m_eroshare_profile.matches()) {
return true;
}
return false;
}

public boolean is_profile(URL url) {
Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
return true;
Expand All @@ -79,12 +91,14 @@ public Document getNextPage(Document doc) throws IOException {
// Find next page
String nextUrl = "";
Element elem = doc.select("li.next > a").first();
logger.info(elem);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why remove the logger line?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume I did it because I was trying to debug and it was clogging up the log/output

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough

if (elem == null) {
throw new IOException("No more pages");
}
nextUrl = elem.attr("href");
if (nextUrl == "") {
throw new IOException("No more pages");
}
return Http.url("https://eroshare.com" + nextUrl).get();
return Http.url("eroshae.com" + nextUrl).get();
}

@Override
Expand Down Expand Up @@ -124,15 +138,15 @@ public List<String> getURLsFromPage(Document doc) {
if (vid.hasClass("album-video")) {
Elements source = vid.getElementsByTag("source");
String videoURL = source.first().attr("src");
URLs.add(videoURL);
URLs.add("https:" + videoURL);
}
}
// Profile videos
Elements links = doc.select("div.item-container > a.item");
for (Element link : links) {
Document video_page;
try {
video_page = Http.url("https://eroshare.com" + link.attr("href")).get();
video_page = Http.url("eroshae.com" + link.attr("href")).get();
} catch (IOException e) {
logger.warn("Failed to log link in Jsoup");
video_page = null;
Expand All @@ -143,7 +157,7 @@ public List<String> getURLsFromPage(Document doc) {
if (vid.hasClass("album-video")) {
Elements source = vid.getElementsByTag("source");
String videoURL = source.first().attr("src");
URLs.add(videoURL);
URLs.add("https:" + videoURL);
}
}
}
Expand All @@ -153,7 +167,8 @@ public List<String> getURLsFromPage(Document doc) {

@Override
public Document getFirstPage() throws IOException {
Response resp = Http.url(this.url)
String urlToDownload = this.url.toExternalForm();
Response resp = Http.url(urlToDownload.replace("eroshare.com", "eroshae.com"))
.ignoreContentType()
.response();

Expand All @@ -164,19 +179,31 @@ public Document getFirstPage() throws IOException {

@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
Pattern p = Pattern.compile("^https?://eroshae.com/([a-zA-Z0-9\\-_]+)/?$");
Matcher m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}

Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
Pattern p_eroshare = Pattern.compile("^https?://eroshare.com/([a-zA-Z0-9\\-_]+)/?$");
Matcher m_eroshare = p_eroshare.matcher(url.toExternalForm());
if (m_eroshare.matches()) {
return m_eroshare.group(1);
}

Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm());
if (m_eroshare_profile.matches()) {
return m_eroshare_profile.group(1) + "_profile";
}

Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$");
Matcher ma = pa.matcher(url.toExternalForm());
if (ma.matches()) {
return m.group(1) + "_profile";
}

throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album");
throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album or eroshae.com/album");
}

public static List<URL> getURLs(URL url) throws IOException{
Expand All @@ -203,11 +230,10 @@ public static List<URL> getURLs(URL url) throws IOException{
if (vid.hasClass("album-video")) {
Elements source = vid.getElementsByTag("source");
String videoURL = source.first().attr("src");
URLs.add(new URL(videoURL));
URLs.add(new URL("https:" + videoURL));
}
}

return URLs;
}
}