Skip to content

Commit

Permalink
Merge pull request #2070 from NormanPriv/master
Browse files Browse the repository at this point in the history
Add scene and performer scrapers for ModelMediaAsia
  • Loading branch information
feederbox826 authored Oct 27, 2024
2 parents b21a7dc + 66d073b commit a361bd6
Showing 1 changed file with 90 additions and 0 deletions.
90 changes: 90 additions & 0 deletions scrapers/ModelMediaAsia.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
name: "ModelMediaAsia"
sceneByURL:
- action: scrapeXPath
url:
- modelmediaasia.com/en-US/videos/
- modelmediaasia.com/zh-CN/videos/
scraper: sceneScraper

sceneByFragment:
action: scrapeXPath
queryURL: https://modelmediaasia.com/videos/{filename}
queryURLReplace:
# Assume beginning part contains the code
filename:
# Get file that matches code
- regex: ^([\w\d-]+)
with: $1
- regex: .*\.[^\.]+$ # if no id is found in the filename
with: # clear the filename so that it doesn't leak
scraper: sceneScraper

performerByURL:
- action: scrapeXPath
url:
- modelmediaasia.com/en-US/models/
- modelmediaasia.com/zh-CN/models/
scraper: performerScraper

xPathScrapers:
sceneScraper:
common:
$detailspart: //div[@class="details-part"]
scene:
Title: $detailspart//h2
Date:
# Some text may be in front of the date
selector: $detailspart//span[contains(@class, "trending-year")]
postProcess:
- replace:
- regex: '.*(\d{4}/\d{1,2}/\d{1,2})'
with: $1
- parseDate: 2006/01/02
Details:
selector: //div[@id="description-01"]/div/p/text()
Performers:
Name: $detailspart//div[@class="content-details trending-info"][2]//h6/text()
# workaround for including URL
# URL arrays don't work https://github.com/stashapp/stash/issues/5294
# Details:
# selector: //*[@id="__nuxt"]/main/div/div[2]/div/div/div/div[3]/div/ul/li/a/@href
# postProcess:
# - replace:
# - regex: ^
# with: https://modelmediaasia.com
Studio:
Name:
fixed: "Model Media"
Tags:
Name: //ul[contains(@class, "iq-blogtag")]/li/a/text()
Image: //div[@class="iq-main-slider site-video"]//div[contains(@class,"object-cover")]/img/@src
Code: //div[@class="details-part"]//a[contains(@class,"text-capitalize")]/text()
performerScraper:
common:
$infobox: //div[@id="__nuxt"]/main//div[@class="flex flex-col gap-3 my-5"]
performer:
Name: //div[@id="__nuxt"]/main//div[contains(@class,"text-white")]/div/h4[1]/text()
Gender:
fixed: Female
Ethnicity:
fixed: Asian
Height:
selector: $infobox/p[1]/text()
postProcess:
- replace:
- regex: '(\d+)\D+cm'
with: $1
Weight:
selector: $infobox/p[2]/text()
postProcess:
- replace:
- regex: '(\d+)\D+kg'
with: $1
Measurements:
selector: $infobox/p[3]/text()
postProcess:
- replace:
- regex: '\s*(\d+\w*)\D+(\d+)\D+(\d+)'
with: $1-$2-$3
Image: //img[@class="w-full aspect-[3/4]"]/@src
# Last Updated October 26, 2024

0 comments on commit a361bd6

Please sign in to comment.