-
Notifications
You must be signed in to change notification settings - Fork 633
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Eroshareripper now uses mirror for data #29
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,22 +51,34 @@ public void downloadURL(URL url, int index) { | |
} | ||
@Override | ||
public boolean canRip(URL url) { | ||
Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); | ||
Pattern p = Pattern.compile("^https?://eroshae.com/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher m = p.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
return true; | ||
} | ||
|
||
Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); | ||
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher ma = pa.matcher(url.toExternalForm()); | ||
if (ma.matches()) { | ||
return true; | ||
} | ||
|
||
Pattern p_eroshare = Pattern.compile("^https?://eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher m_eroshare = p_eroshare.matcher(url.toExternalForm()); | ||
if (m_eroshare.matches()) { | ||
return true; | ||
} | ||
|
||
Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm()); | ||
if (m_eroshare_profile.matches()) { | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
public boolean is_profile(URL url) { | ||
Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); | ||
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher ma = pa.matcher(url.toExternalForm()); | ||
if (ma.matches()) { | ||
return true; | ||
|
@@ -79,12 +91,14 @@ public Document getNextPage(Document doc) throws IOException { | |
// Find next page | ||
String nextUrl = ""; | ||
Element elem = doc.select("li.next > a").first(); | ||
logger.info(elem); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assume I did it because I was trying to debug and it was clogging up the log/output There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair enough |
||
if (elem == null) { | ||
throw new IOException("No more pages"); | ||
} | ||
nextUrl = elem.attr("href"); | ||
if (nextUrl == "") { | ||
throw new IOException("No more pages"); | ||
} | ||
return Http.url("https://eroshare.com" + nextUrl).get(); | ||
return Http.url("eroshae.com" + nextUrl).get(); | ||
} | ||
|
||
@Override | ||
|
@@ -124,15 +138,15 @@ public List<String> getURLsFromPage(Document doc) { | |
if (vid.hasClass("album-video")) { | ||
Elements source = vid.getElementsByTag("source"); | ||
String videoURL = source.first().attr("src"); | ||
URLs.add(videoURL); | ||
URLs.add("https:" + videoURL); | ||
} | ||
} | ||
// Profile videos | ||
Elements links = doc.select("div.item-container > a.item"); | ||
for (Element link : links) { | ||
Document video_page; | ||
try { | ||
video_page = Http.url("https://eroshare.com" + link.attr("href")).get(); | ||
video_page = Http.url("eroshae.com" + link.attr("href")).get(); | ||
} catch (IOException e) { | ||
logger.warn("Failed to log link in Jsoup"); | ||
video_page = null; | ||
|
@@ -143,7 +157,7 @@ public List<String> getURLsFromPage(Document doc) { | |
if (vid.hasClass("album-video")) { | ||
Elements source = vid.getElementsByTag("source"); | ||
String videoURL = source.first().attr("src"); | ||
URLs.add(videoURL); | ||
URLs.add("https:" + videoURL); | ||
} | ||
} | ||
} | ||
|
@@ -153,7 +167,8 @@ public List<String> getURLsFromPage(Document doc) { | |
|
||
@Override | ||
public Document getFirstPage() throws IOException { | ||
Response resp = Http.url(this.url) | ||
String urlToDownload = this.url.toExternalForm(); | ||
Response resp = Http.url(urlToDownload.replace("eroshare.com", "eroshae.com")) | ||
.ignoreContentType() | ||
.response(); | ||
|
||
|
@@ -164,19 +179,31 @@ public Document getFirstPage() throws IOException { | |
|
||
@Override | ||
public String getGID(URL url) throws MalformedURLException { | ||
Pattern p = Pattern.compile("^https?://[w.]*eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); | ||
Pattern p = Pattern.compile("^https?://eroshae.com/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher m = p.matcher(url.toExternalForm()); | ||
if (m.matches()) { | ||
return m.group(1); | ||
} | ||
|
||
Pattern pa = Pattern.compile("^https?://[w.]*eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); | ||
Pattern p_eroshare = Pattern.compile("^https?://eroshare.com/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher m_eroshare = p_eroshare.matcher(url.toExternalForm()); | ||
if (m_eroshare.matches()) { | ||
return m_eroshare.group(1); | ||
} | ||
|
||
Pattern p_eroshare_profile = Pattern.compile("^https?://eroshare.com/u/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher m_eroshare_profile = p_eroshare_profile.matcher(url.toExternalForm()); | ||
if (m_eroshare_profile.matches()) { | ||
return m_eroshare_profile.group(1) + "_profile"; | ||
} | ||
|
||
Pattern pa = Pattern.compile("^https?://eroshae.com/u/([a-zA-Z0-9\\-_]+)/?$"); | ||
Matcher ma = pa.matcher(url.toExternalForm()); | ||
if (ma.matches()) { | ||
return m.group(1) + "_profile"; | ||
} | ||
|
||
throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album"); | ||
throw new MalformedURLException("eroshare album not found in " + url + ", expected https://eroshare.com/album or eroshae.com/album"); | ||
} | ||
|
||
public static List<URL> getURLs(URL url) throws IOException{ | ||
|
@@ -203,11 +230,10 @@ public static List<URL> getURLs(URL url) throws IOException{ | |
if (vid.hasClass("album-video")) { | ||
Elements source = vid.getElementsByTag("source"); | ||
String videoURL = source.first().attr("src"); | ||
URLs.add(new URL(videoURL)); | ||
URLs.add(new URL("https:" + videoURL)); | ||
} | ||
} | ||
|
||
return URLs; | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should probably stick to allowing optional
www
(which is what the
[w.]*was accomplishing although
(www[.])?` documents the intent better and would likely be more efficient under the regex engine.