Skip to content

Commit

Permalink
Strip series from title
Browse files Browse the repository at this point in the history
  • Loading branch information
ahobsonsayers committed Dec 28, 2024
1 parent a074168 commit 3382509
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 45 deletions.
94 changes: 59 additions & 35 deletions goodreads/book.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,32 @@ import (

var (
// These are dirty workarounds, but they seem to work
alternativeCoverRegex = regexp.MustCompile(`^<i>.*?[Aa]lternat(iv)?e cover.*?</i>`)
breakTagRegex = regexp.MustCompile(`<br\s*/?>`)
lastBracketRegex = regexp.MustCompile(`^(.*)(\([^\(\)]*\))([^()]*)$`)

// Regex to match last brackets from title. This is the series.
// e.g. Harry Potter and the Chamber of Secrets (Harry Potter, #2)
titleSeriesRegex = regexp.MustCompile(`\([^)]*#\d+(\.\d+)?\)$`)

// Regex to match alternative cover preamble in description.
// e.g. Harry Potter and the Chamber of Secrets (Harry Potter, #2)
descriptionAlternativeCoverRegex = regexp.MustCompile(`^<i>.*?[Aa]lternat(iv)?e cover.*?</i>`)

breakTagRegex = regexp.MustCompile(`<br\s*/?>`)
)

type BookOverview struct {
Id string `xml:"id"`
Title string `xml:"title"`
Author string `xml:"author>name"`
Id string `xml:"id"`
FullTitle string `xml:"title"`
Author string `xml:"author>name"`
}

func (b *BookOverview) Sanitise() {
// Strip last brackets from title. This is the series.
// e.g. Harry Potter and the Chamber of Secrets (Harry Potter, #2)
b.Title = lastBracketRegex.ReplaceAllString(b.Title, "$1$2")
// Title is the full title with any subtitle and series removed.
func (o BookOverview) Title() string {
return extractTitle(o.FullTitle)
}

// Subtitle is the subtitle part of the full title with any series removed.
func (o BookOverview) Subtitle() string {
return extractSubtitle(o.FullTitle)
}

type Book struct {
Expand Down Expand Up @@ -83,21 +94,14 @@ type Work struct {
RatingDistribution string `xml:"rating_dist"`
}

// Title is the full title with any subtitle removed.
// A subtitle is anything after the first : in the full title
// Title is the full title with any subtitle and series removed.
func (w Work) Title() string {
titleParts := strings.Split(w.FullTitle, ":")
return strings.TrimSpace(titleParts[0])
return extractTitle(w.FullTitle)
}

// Subtitle is the subtle part of the full title.
// A subtitle is anything after the first : in the full title
// Subtitle is the subtitle part of the full title with any series removed.
func (w Work) Subtitle() string {
colonIdx := strings.Index(w.FullTitle, ":")
if colonIdx == -1 {
return ""
}
return strings.TrimSpace(w.FullTitle[colonIdx+1:])
return extractSubtitle(w.FullTitle)
}

func (w Work) AverageRating() float64 {
Expand All @@ -122,33 +126,26 @@ type Edition struct {
Language string `xml:"language_code"`
}

// Title is the full title with any subtitle removed.
// A subtitle is anything after the first : in the full title
// Title is the full title with any subtitle and series removed.
func (e Edition) Title() string {
titleParts := strings.Split(e.FullTitle, ":")
return strings.TrimSpace(titleParts[0])
return extractTitle(e.FullTitle)
}

// Subtitle is the subtle part of the full title.
// A subtitle is anything after the first : in the full title
// Subtitle is the subtitle part of the full title with any series removed.
func (e Edition) Subtitle() string {
colonIdx := strings.Index(e.FullTitle, ":")
if colonIdx == -1 {
return ""
}
return strings.TrimSpace(e.FullTitle[colonIdx+1:])
return extractSubtitle(e.FullTitle)
}

func (e *Edition) Sanitise() {
// Description is html and can contain preamble about alternative covers.
// Break tags need to be specially handled to add new lines as html2text does
// not convert them to new lines properly
e.Description = alternativeCoverRegex.ReplaceAllString(e.Description, "")
e.Description = descriptionAlternativeCoverRegex.ReplaceAllString(e.Description, "")
e.Description = breakTagRegex.ReplaceAllString(e.Description, "\n")
e.Description = html2text.HTML2TextWithOptions(e.Description, html2text.WithUnixLineBreaks())
e.Description = strings.TrimSpace(e.Description)

// Get original cover image by cleaning the ul0
// Get original cover image by cleaning the url
if strings.Contains(e.ImageURL, "nophoto") {
e.ImageURL = ""
} else {
Expand All @@ -175,7 +172,34 @@ func BookIds(books []BookOverview) []string {
func BookTitles(books []BookOverview) []string {
titles := make([]string, 0, len(books))
for _, book := range books {
titles = append(titles, book.Title)
titles = append(titles, book.Title())
}
return titles
}

// extractTitle extracts the title from the full title with any subtitle and series removed.
func extractTitle(fullTitle string) string {
titleParts := strings.Split(fullTitle, ":")

title := titleParts[0]
title = strings.TrimSpace(title)
title = titleSeriesRegex.ReplaceAllString(title, "")
title = strings.TrimSpace(title)

return title
}

// extractTitle extracts the subtitle part of the full title with any series removed.
func extractSubtitle(fullTitle string) string {
colonIdx := strings.Index(fullTitle, ":")
if colonIdx == -1 {
return ""
}

subtitle := fullTitle[colonIdx+1:]
subtitle = strings.TrimSpace(subtitle)
subtitle = titleSeriesRegex.ReplaceAllString(subtitle, "")
subtitle = strings.TrimSpace(subtitle)

return subtitle
}
4 changes: 2 additions & 2 deletions goodreads/goodreads_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (

const (
TheHobbitId = "5907"
TheHobbitTitle = "The Hobbit"
TheHobbitTitle = "The Hobbit, or There and Back Again"
TheHobbitAuthor = "J.R.R. Tolkien"
)

Expand Down Expand Up @@ -44,7 +44,7 @@ func TestSearchTitleAndAuthor(t *testing.T) {
}

func checkTheHobbitBookDetails(t *testing.T, book goodreads.Book) {
require.Equal(t, TheHobbitTitle, book.Work.Title())
require.Equal(t, TheHobbitTitle, book.BestEdition.Title())
require.Equal(t, TheHobbitId, book.BestEdition.Id)
require.Regexp(t, "1546071216l/5907.jpg$", book.BestEdition.ImageURL)
require.Equal(t, "English", book.BestEdition.Language)
Expand Down
9 changes: 1 addition & 8 deletions goodreads/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,5 @@ func (c *Client) searchBooksSinglePage(ctx context.Context, input searchBooksSin
return nil, err
}

// Sanitise the books
books := make([]BookOverview, 0, len(unmarshaller.Books))
for _, book := range unmarshaller.Books {
book.Sanitise()
books = append(books, book)
}

return books, nil
return unmarshaller.Books, nil
}

0 comments on commit 3382509

Please sign in to comment.