Skip to content

Commit

Permalink
Add Image Scraping (#5562)
Browse files Browse the repository at this point in the history
Co-authored-by: keenbed <[email protected]>
Co-authored-by: WithoutPants <[email protected]>
  • Loading branch information
3 people authored Feb 24, 2025
1 parent b6ace42 commit e97f647
Show file tree
Hide file tree
Showing 27 changed files with 1,063 additions and 11 deletions.
8 changes: 8 additions & 0 deletions graphql/schema/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ type Query {
input: ScrapeSingleGroupInput!
): [ScrapedGroup!]!

"Scrape for a single image"
scrapeSingleImage(
source: ScraperSourceInput!
input: ScrapeSingleImageInput!
): [ScrapedImage!]!

"Scrapes content based on a URL"
scrapeURL(url: String!, ty: ScrapeContentType!): ScrapedContent

Expand All @@ -183,6 +189,8 @@ type Query {
scrapeSceneURL(url: String!): ScrapedScene
"Scrapes a complete gallery record based on a URL"
scrapeGalleryURL(url: String!): ScrapedGallery
"Scrapes a complete image record based on a URL"
scrapeImageURL(url: String!): ScrapedImage
"Scrapes a complete movie record based on a URL"
scrapeMovieURL(url: String!): ScrapedMovie
@deprecated(reason: "Use scrapeGroupURL instead")
Expand Down
33 changes: 33 additions & 0 deletions graphql/schema/types/scraper.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ enum ScrapeType {
"Type of the content a scraper generates"
enum ScrapeContentType {
GALLERY
IMAGE
MOVIE
GROUP
PERFORMER
Expand All @@ -22,6 +23,7 @@ union ScrapedContent =
| ScrapedTag
| ScrapedScene
| ScrapedGallery
| ScrapedImage
| ScrapedMovie
| ScrapedGroup
| ScrapedPerformer
Expand All @@ -41,6 +43,8 @@ type Scraper {
scene: ScraperSpec
"Details for gallery scraper"
gallery: ScraperSpec
"Details for image scraper"
image: ScraperSpec
"Details for movie scraper"
movie: ScraperSpec @deprecated(reason: "use group")
"Details for group scraper"
Expand Down Expand Up @@ -128,6 +132,26 @@ input ScrapedGalleryInput {
# no studio, tags or performers
}

type ScrapedImage {
title: String
code: String
details: String
photographer: String
urls: [String!]
date: String
studio: ScrapedStudio
tags: [ScrapedTag!]
performers: [ScrapedPerformer!]
}

input ScrapedImageInput {
title: String
code: String
details: String
urls: [String!]
date: String
}

input ScraperSourceInput {
"Index of the configured stash-box instance to use. Should be unset if scraper_id is set"
stash_box_index: Int @deprecated(reason: "use stash_box_endpoint")
Expand Down Expand Up @@ -190,6 +214,15 @@ input ScrapeSingleGalleryInput {
gallery_input: ScrapedGalleryInput
}

input ScrapeSingleImageInput {
"Instructs to query by string"
query: String
"Instructs to query by image id"
image_id: ID
"Instructs to query by image fragment"
image_input: ScrapedImageInput
}

input ScrapeSingleMovieInput {
"Instructs to query by string"
query: String
Expand Down
6 changes: 3 additions & 3 deletions internal/api/resolver_mutation_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func (r *mutationResolver) getImage(ctx context.Context, id int) (ret *models.Im
return ret, nil
}

func (r *mutationResolver) ImageUpdate(ctx context.Context, input ImageUpdateInput) (ret *models.Image, err error) {
func (r *mutationResolver) ImageUpdate(ctx context.Context, input models.ImageUpdateInput) (ret *models.Image, err error) {
translator := changesetTranslator{
inputMap: getUpdateInputMap(ctx),
}
Expand All @@ -46,7 +46,7 @@ func (r *mutationResolver) ImageUpdate(ctx context.Context, input ImageUpdateInp
return r.getImage(ctx, ret.ID)
}

func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*ImageUpdateInput) (ret []*models.Image, err error) {
func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*models.ImageUpdateInput) (ret []*models.Image, err error) {
inputMaps := getUpdateInputMaps(ctx)

// Start the transaction and save the image
Expand Down Expand Up @@ -89,7 +89,7 @@ func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*ImageUpdat
return newRet, nil
}

func (r *mutationResolver) imageUpdate(ctx context.Context, input ImageUpdateInput, translator changesetTranslator) (*models.Image, error) {
func (r *mutationResolver) imageUpdate(ctx context.Context, input models.ImageUpdateInput, translator changesetTranslator) (*models.Image, error) {
imageID, err := strconv.Atoi(input.ID)
if err != nil {
return nil, fmt.Errorf("converting id: %w", err)
Expand Down
42 changes: 42 additions & 0 deletions internal/api/resolver_query_scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ func (r *queryResolver) ScrapeGalleryURL(ctx context.Context, url string) (*scra
return ret, nil
}

func (r *queryResolver) ScrapeImageURL(ctx context.Context, url string) (*scraper.ScrapedImage, error) {
content, err := r.scraperCache().ScrapeURL(ctx, url, scraper.ScrapeContentTypeImage)
if err != nil {
return nil, err
}

return marshalScrapedImage(content)
}

func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
content, err := r.scraperCache().ScrapeURL(ctx, url, scraper.ScrapeContentTypeMovie)
if err != nil {
Expand Down Expand Up @@ -491,6 +500,39 @@ func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper.
return ret, nil
}

func (r *queryResolver) ScrapeSingleImage(ctx context.Context, source scraper.Source, input ScrapeSingleImageInput) ([]*scraper.ScrapedImage, error) {
if source.StashBoxIndex != nil {
return nil, ErrNotSupported
}

if source.ScraperID == nil {
return nil, fmt.Errorf("%w: scraper_id must be set", ErrInput)
}

var c scraper.ScrapedContent

switch {
case input.ImageID != nil:
imageID, err := strconv.Atoi(*input.ImageID)
if err != nil {
return nil, fmt.Errorf("%w: image id is not an integer: '%s'", ErrInput, *input.ImageID)
}
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, imageID, scraper.ScrapeContentTypeImage)
if err != nil {
return nil, err
}
return marshalScrapedImages([]scraper.ScrapedContent{c})
case input.ImageInput != nil:
c, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Image: input.ImageInput})
if err != nil {
return nil, err
}
return marshalScrapedImages([]scraper.ScrapedContent{c})
default:
return nil, ErrNotImplemented
}
}

func (r *queryResolver) ScrapeSingleMovie(ctx context.Context, source scraper.Source, input ScrapeSingleMovieInput) ([]*models.ScrapedMovie, error) {
return nil, ErrNotSupported
}
Expand Down
31 changes: 31 additions & 0 deletions internal/api/scraped_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ func marshalScrapedGalleries(content []scraper.ScrapedContent) ([]*scraper.Scrap
return ret, nil
}

func marshalScrapedImages(content []scraper.ScrapedContent) ([]*scraper.ScrapedImage, error) {
var ret []*scraper.ScrapedImage
for _, c := range content {
if c == nil {
// graphql schema requires images to be non-nil
continue
}

switch g := c.(type) {
case *scraper.ScrapedImage:
ret = append(ret, g)
case scraper.ScrapedImage:
ret = append(ret, &g)
default:
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedImage", models.ErrConversion)
}
}

return ret, nil
}

// marshalScrapedMovies converts ScrapedContent into ScrapedMovie. If conversion
// fails, an error is returned.
func marshalScrapedMovies(content []scraper.ScrapedContent) ([]*models.ScrapedMovie, error) {
Expand Down Expand Up @@ -129,6 +150,16 @@ func marshalScrapedGallery(content scraper.ScrapedContent) (*scraper.ScrapedGall
return g[0], nil
}

// marshalScrapedImage will marshal a single scraped image
func marshalScrapedImage(content scraper.ScrapedContent) (*scraper.ScrapedImage, error) {
g, err := marshalScrapedImages([]scraper.ScrapedContent{content})
if err != nil {
return nil, err
}

return g[0], nil
}

// marshalScrapedMovie will marshal a single scraped movie
func marshalScrapedMovie(content scraper.ScrapedContent) (*models.ScrapedMovie, error) {
m, err := marshalScrapedMovies([]scraper.ScrapedContent{content})
Expand Down
22 changes: 22 additions & 0 deletions pkg/models/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,28 @@ type ImageFilterType struct {
UpdatedAt *TimestampCriterionInput `json:"updated_at"`
}

type ImageUpdateInput struct {
ClientMutationID *string `json:"clientMutationId"`
ID string `json:"id"`
Title *string `json:"title"`
Code *string `json:"code"`
Urls []string `json:"urls"`
Date *string `json:"date"`
Details *string `json:"details"`
Photographer *string `json:"photographer"`
Rating100 *int `json:"rating100"`
Organized *bool `json:"organized"`
SceneIds []string `json:"scene_ids"`
StudioID *string `json:"studio_id"`
TagIds []string `json:"tag_ids"`
PerformerIds []string `json:"performer_ids"`
GalleryIds []string `json:"gallery_ids"`
PrimaryFileID *string `json:"primary_file_id"`

// deprecated
URL *string `json:"url"`
}

type ImageDestroyInput struct {
ID string `json:"id"`
DeleteFile *bool `json:"delete_file"`
Expand Down
1 change: 1 addition & 0 deletions pkg/scraper/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type scraperActionImpl interface {

scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*ScrapedScene, error)
scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*ScrapedGallery, error)
scrapeImageByImage(ctx context.Context, image *models.Image) (*ScrapedImage, error)
}

func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, globalConfig GlobalConfig) scraperActionImpl {
Expand Down
58 changes: 58 additions & 0 deletions pkg/scraper/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,18 @@ type GalleryFinder interface {
models.URLLoader
}

type ImageFinder interface {
models.ImageGetter
models.FileLoader
models.URLLoader
}

type Repository struct {
TxnManager models.TxnManager

SceneFinder SceneFinder
GalleryFinder GalleryFinder
ImageFinder ImageFinder
TagFinder TagFinder
PerformerFinder PerformerFinder
GroupFinder match.GroupNamesFinder
Expand All @@ -93,6 +100,7 @@ func NewRepository(repo models.Repository) Repository {
TxnManager: repo.TxnManager,
SceneFinder: repo.Scene,
GalleryFinder: repo.Gallery,
ImageFinder: repo.Image,
TagFinder: repo.Tag,
PerformerFinder: repo.Performer,
GroupFinder: repo.Group,
Expand Down Expand Up @@ -357,6 +365,28 @@ func (c Cache) ScrapeID(ctx context.Context, scraperID string, id int, ty Scrape
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
}

if scraped != nil {
ret = scraped
}

case ScrapeContentTypeImage:
is, ok := s.(imageScraper)
if !ok {
return nil, fmt.Errorf("%w: cannot use scraper %s as a image scraper", ErrNotSupported, scraperID)
}

scene, err := c.getImage(ctx, id)
if err != nil {
return nil, fmt.Errorf("scraper %s: unable to load image id %v: %w", scraperID, id, err)
}

// don't assign nil concrete pointer to ret interface, otherwise nil
// detection is harder
scraped, err := is.viaImage(ctx, c.client, scene)
if err != nil {
return nil, fmt.Errorf("scraper %s: %w", scraperID, err)
}

if scraped != nil {
ret = scraped
}
Expand Down Expand Up @@ -426,3 +456,31 @@ func (c Cache) getGallery(ctx context.Context, galleryID int) (*models.Gallery,
}
return ret, nil
}

func (c Cache) getImage(ctx context.Context, imageID int) (*models.Image, error) {
var ret *models.Image
r := c.repository
if err := r.WithReadTxn(ctx, func(ctx context.Context) error {
qb := r.ImageFinder

var err error
ret, err = qb.Find(ctx, imageID)
if err != nil {
return err
}

if ret == nil {
return fmt.Errorf("image with id %d not found", imageID)
}

err = ret.LoadFiles(ctx, qb)
if err != nil {
return err
}

return ret.LoadURLs(ctx, qb)
}); err != nil {
return nil, err
}
return ret, nil
}
Loading

0 comments on commit e97f647

Please sign in to comment.