Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

complete #142

Merged
merged 1 commit into from
Apr 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/araddon/dateparse v0.0.0-20200409225146-d820a6159ab1
github.com/avast/retry-go v3.0.0+incompatible
github.com/blevesearch/bleve v1.0.14
github.com/bregydoc/gtranslate v0.0.0-20200913051839-1bd07f6c1fc5
github.com/creasty/defaults v1.5.1
github.com/darwayne/go-timecode v1.1.0
github.com/djherbis/times v1.2.0
Expand Down Expand Up @@ -77,6 +78,7 @@ require (
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110
golang.org/x/oauth2 v0.0.0-20210220000619-9bb904979d93
golang.org/x/sys v0.0.0-20210228012217-479acdf4ea46
golang.org/x/text v0.3.4
gopkg.in/cheggaaa/pb.v1 v1.0.28
gopkg.in/gormigrate.v1 v1.6.0
gopkg.in/resty.v1 v1.12.0
Expand Down
4 changes: 2 additions & 2 deletions pkg/models/model_tag.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func ConvertTag(t string) string {
return "blowjob"
}

if funk.Contains([]string{"boobs job", "titty fucking", "titjob"}, t) {
if funk.Contains([]string{"boobs job", "titty fucking", "tittyfuck", "titjob"}, t) {
return "titty fuck"
}

Expand Down Expand Up @@ -297,7 +297,7 @@ func ConvertTag(t string) string {
return "parody"
}

if funk.Contains([]string{"fingering"}, t) {
if funk.Contains([]string{"fingering", "masterbation"}, t) {
return "masturbation"
}

Expand Down
130 changes: 130 additions & 0 deletions pkg/scrape/caribbeancom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
package scrape

import (
"strconv"
"strings"
"sync"

"github.com/bregydoc/gtranslate"
"github.com/gocolly/colly"
"github.com/mozillazg/go-slugify"
"github.com/thoas/go-funk"
"github.com/tidwall/gjson"
"github.com/xbapps/xbvr/pkg/models"
"golang.org/x/text/language"
)

func CariVR(wg *sync.WaitGroup, updateSite bool, knownScenes []string, out chan<- models.ScrapedScene) error {
defer wg.Done()
scraperID := "caribbeancomvr"
siteID := "CaribbeanCom VR"
logScrapeStart(scraperID, siteID)

sceneCollector := createCollector("en.caribbeancom.com", "www.caribbeancom.com")
siteCollector := createCollector("en.caribbeancom.com", "www.caribbeancom.com")
sceneCollectorJap := cloneCollector(sceneCollector)

sceneCollector.OnHTML(`html`, func(e *colly.HTMLElement) {

sc := models.ScrapedScene{}
sc.SceneType = "VR"
sc.Studio = "Caribbeancom"
sc.Site = siteID
sc.HomepageURL = strings.Split(e.Request.URL.String(), "?")[0]

// Scene ID - get from JavaScript
e.ForEach(`script`, func(id int, e *colly.HTMLElement) {
if !strings.Contains(e.Text, "movie_seq") {
return
}
jsonData := e.Text[strings.Index(e.Text, "{") : len(e.Text)-3]
movSeq := gjson.Get(jsonData, "movie_seq").String()
if movSeq == "" {
return
}
sc.SiteID = movSeq
sc.SceneID = slugify.Slugify(sc.Site) + "-" + sc.SiteID
})

// Title
e.ForEach(`h1[itemprop=name]`, func(id int, e *colly.HTMLElement) {
sc.Title = strings.TrimSpace(strings.Replace(e.Text, "[VR] ", "", 1))
})

// Cover
coverURL := strings.Replace(strings.Replace(sc.HomepageURL, "eng/", "", 1), "index.html", "images/poster_en.jpg", 1)
if len(coverURL) > 0 {
sc.Covers = append(sc.Covers, coverURL)
}

// Filename 011421-001-carib-2160p.mp4
sc.Filenames = append(sc.Filenames, strings.Split(coverURL, "/")[4]+"-carib-2160p.mp4")

// Gallery
e.ForEach(`div.movie-gallery a.fancy-gallery`, func(id int, e *colly.HTMLElement) {
if strings.Compare(e.Attr(`data-is_sample`), "0") == 0 {
return
}
sc.Gallery = append(sc.Gallery, e.Request.AbsoluteURL(e.Attr("href")))
})

// Cast & Tags
e.ForEach(`div.movie-info a.spec__tag`, func(id int, e *colly.HTMLElement) {
if strings.Compare(e.Attr(`itemprop`), "actor") == 0 {
sc.Cast = append(sc.Cast, strings.TrimSpace(e.Text))
} else {
if (strings.Compare(e.Attr(`itemprop`), "genre") == 0) || (strings.Compare(e.Attr(`itemprop`), "url") == 0) {
sc.Tags = append(sc.Tags, strings.TrimSpace(e.Text))
}
}
})

// Release Date
e.ForEach(`div.movie-info span`, func(id int, e *colly.HTMLElement) {
if e.Attr(`itemprop`) == "uploadDate" {
sc.Released = strings.TrimSpace(strings.Replace(e.Text, "/", "-", -1))
}
// Duration
if e.Attr(`itemprop`) == "duration" {
tmpDuration := strings.Split(strings.Trim(e.Attr(`content`), "TS"), "M")[0]
sc.Duration, _ = strconv.Atoi(strings.Split(tmpDuration, "H")[1])
}
})

sceneURLJap := strings.Replace(strings.Replace(sc.HomepageURL, "eng/", "", 1), "en.", "www.", 1)
ctx := colly.NewContext()
ctx.Put("scene", sc)

sceneCollectorJap.Request("GET", sceneURLJap, nil, ctx, nil)
})

// Synopsis - Pull from Japanese site & translate
sceneCollectorJap.OnHTML(`html`, func(e *colly.HTMLElement) {
sc := e.Request.Ctx.GetAny("scene").(models.ScrapedScene)
e.ForEach(`p[itemprop=description]`, func(id int, e *colly.HTMLElement) {
sc.Synopsis, _ = gtranslate.Translate(strings.TrimSpace(e.Text), language.Japanese, language.English)
})

out <- sc
})

siteCollector.OnHTML(`div.media-thum a`, func(e *colly.HTMLElement) {
sceneURL := e.Request.AbsoluteURL(e.Attr("href"))
// If scene exists in database, there's no need to scrape
if !funk.ContainsString(knownScenes, sceneURL) {
sceneCollector.Visit(sceneURL)
}
})

siteCollector.Visit("https://en.caribbeancom.com/eng/listpages/vr1.htm")

if updateSite {
updateSiteLastUpdate(scraperID)
}
logScrapeFinished(scraperID, siteID)
return nil
}

func init() {
registerScraper("caribbeancomvr", "CaribbeanCom VR", "https://mcdn.vrporn.com/files/20191217194900/baimudan-vr-porn-studio-logo-vrporn.com-virtual-reality-porn.jpg", CariVR)
}