Skip to content

Commit

Permalink
Add VRP Films scraper (#190)
Browse files Browse the repository at this point in the history
  • Loading branch information
jrebey authored and cld9x committed Nov 9, 2019
1 parent 2656a1e commit e7185d1
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 11 deletions.
31 changes: 20 additions & 11 deletions pkg/models/model_tag.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ func (t *Tag) Save() error {
}

func ConvertTag(t string) string {
t = strings.ToLower(t)
t = strings.TrimSpace(strings.ToLower(t))

if funk.Contains([]string{"180", "60fps", "60 fps", "5k", "5k+", "big dick", "big cocks",
"axaxqxrrysrwqua", "girl-boy", "virtual reality",
"virtual reality porn", "vr porn", "180 vr porn", "xxxsex vr",
"xxx vr porn", "VRconk", "sex onbed",
},t) {
"axaxqxrrysrwqua", "girl-boy", "virtual reality", "sex", "new",
"virtual reality porn", "vr porn", "180 vr porn", "xxxsex vr",
"xxx vr porn", "VRconk", "sex onbed", "pornstars", "vr", "vrp",
"bg", "coming soon", "vr 1080p porn",
}, t) {
return ""
}

Expand Down Expand Up @@ -60,7 +61,7 @@ func ConvertTag(t string) string {
return "threesome fmm"
}

if funk.Contains([]string{"big boobs"}, t) {
if funk.Contains([]string{"big boobs", "big tits porn"}, t) {
return "big tits"
}

Expand Down Expand Up @@ -124,7 +125,7 @@ func ConvertTag(t string) string {
return "latina"
}

if funk.Contains([]string{"lesbian love", "lesbians"}, t) {
if funk.Contains([]string{"lesbian love", "lesbians", "girlgirl", "girl-on-girl"}, t) {
return "lesbian"
}

Expand Down Expand Up @@ -152,7 +153,7 @@ func ConvertTag(t string) string {
return "squirting"
}

if funk.Contains([]string{"teens"}, t) {
if funk.Contains([]string{"teens", "18"}, t) {
return "teen"
}

Expand Down Expand Up @@ -200,11 +201,11 @@ func ConvertTag(t string) string {
return "no tattoos"
}

if funk.Contains([]string{"tattoo", "tatoos"}, t) {
if funk.Contains([]string{"tattoo", "tatoos", "tattoo(s)"}, t) {
return "tattoos"
}

if funk.Contains([]string{"piercing", "pirced pussy"}, t) {
if funk.Contains([]string{"piercing", "pirced pussy", "pierced navel"}, t) {
return "piercings"
}

Expand Down Expand Up @@ -272,13 +273,21 @@ func ConvertTag(t string) string {
return "dp"
}

if funk.Contains([]string{"pov fucking"}, t) {
if funk.Contains([]string{"pov fucking", "pov vr"}, t) {
return "pov"
}

if funk.Contains([]string{"xxx parody", "xxx parody vr porn"}, t) {
return "parody"
}

if funk.Contains([]string{"fingering"}, t) {
return "masturbation"
}

if funk.Contains([]string{"solo models"}, t) {
return "solo"
}

return t
}
154 changes: 154 additions & 0 deletions pkg/scrape/vrpfilms.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package scrape

import (
"fmt"
"strconv"
"strings"
"sync"

"github.com/gocolly/colly"
"github.com/mozillazg/go-slugify"
"github.com/nleeper/goment"
"github.com/thoas/go-funk"
"github.com/xbapps/xbvr/pkg/models"
)

func VRPFilms(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene) error {
defer wg.Done()
logScrapeStart("vrpfilms", "VRP Films")

siteCollector := colly.NewCollector(
colly.AllowedDomains("vrpfilms.com", "www.vrpfilms.com"),
colly.CacheDir(siteCacheDir),
colly.UserAgent(userAgent),
)

sceneCollector := colly.NewCollector(
colly.AllowedDomains("vrpfilms.com", "www.vrpfilms.com"),
colly.CacheDir(sceneCacheDir),
colly.UserAgent(userAgent),
)

siteCollector.OnRequest(func(r *colly.Request) {
log.Println("visiting", r.URL.String())
})

sceneCollector.OnRequest(func(r *colly.Request) {
log.Println("visiting", r.URL.String())
})

sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) {
sc := models.ScrapedScene{}
sc.SceneType = "VR"
sc.Studio = "VRP Films"
sc.Site = "VRP Films"
sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0]

// Scene ID - get from download link. It's the closest thing they have to a scene id
sc.SiteID = e.ChildAttr(`a.member-download`, "data-main-product-id")
sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID

sc.Title = strings.TrimSpace(e.ChildText(`span.breadcrumb_last`))
coverURL := e.ChildAttr(`meta[property="og:image"]`, "content")
sc.Covers = append(sc.Covers, coverURL)

// No release date anywhere, but we can approximate based on the wordpress date of the
// cover image. It's at least better than nothing.
//
// https://vrpfilms.com/wp-content/uploads/2019/10/No-Boys-Just-Toys-Banner-1600x800.jpg
t := strings.Split(coverURL, "/")
tmpDate := fmt.Sprintf("%s-%s-01", t[5], t[6])
date, _ := goment.New(tmpDate, "YYYY-MM-DD")
sc.Released = date.Format("YYYY-MM-DD")

sc.Gallery = e.ChildAttrs(`.movies-gallery a`, "href")

unfilteredTags := []string{}
e.ForEach(`.detail p`, func(id int, e *colly.HTMLElement) {
if strings.Contains(e.Text, "Featuring:") {
// Featuring: Amber Jayne, Selvaggia
tmpCast := strings.Split(e.Text, ":")[1]
cast := strings.Split(strings.TrimSpace(tmpCast), ",")
funk.ForEach(cast, func(c string) {
sc.Cast = append(sc.Cast, strings.TrimSpace(c))
})

}

if strings.Contains(e.Text, "Length:") {
// Length: 35 Minutes
tmpDuration := strings.TrimSpace(strings.Split(e.Text, ":")[1])
duration, err := strconv.Atoi(strings.Split(tmpDuration, " ")[0])
if err == nil {
sc.Duration = duration
}
}

if strings.Contains(e.Text, "Tags:") {
tmpTags := strings.Split(e.Text, ":")[1]
tags := strings.Split(strings.TrimSpace(tmpTags), ",")
funk.ForEach(tags, func(t string) {
unfilteredTags = append(unfilteredTags, strings.TrimSpace(t))
})
}
})

// It pains me to have to do this
garbageTags := []string{"pussy", "polly pons", "little cindy",
"bass ass handy women", "hot",
"estate agent sex pov", "real estate sex vr",
"sandy's superstar escorts", "wet and wild",
}
sc.Tags = funk.FilterString(unfilteredTags, func(t string) bool {
lt := strings.ToLower(t)
if funk.ContainsString(garbageTags, lt) {
return false
}

var badTag bool
funk.ForEach(sc.Cast, func(c string) {
if strings.ToLower(c) == lt {
badTag = true
}
})

if badTag {
return false
}

if strings.ToLower(sc.Title) == lt {
return false
}
return true
})

out <- sc
})

siteCollector.OnHTML(`article a`, func(e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.Attr("href"))

if !funk.ContainsString(knownScenes, sceneURL) && !strings.Contains(sceneURL, "/join") {
sceneCollector.Visit(sceneURL)
}
})

siteCollector.OnHTML(`a.page-numbers`, func(e *colly.HTMLElement) {
pageURL := e.Request.AbsoluteURL(e.Attr("href"))
if !strings.Contains(pageURL, "/join") {
siteCollector.Visit(pageURL)
}
})

siteCollector.Visit("https://vrpfilms.com/vrp-movies")

if updateSite {
updateSiteLastUpdate("vrpfilms")
}
logScrapeFinished("vrpfilms", "VRP Films")
return nil
}

func init() {
registerScraper("vrpfilms", "VRP Films", VRPFilms)
}

0 comments on commit e7185d1

Please sign in to comment.