diff --git a/goodreads/book.go b/goodreads/book.go index 905c03c..f3b6662 100644 --- a/goodreads/book.go +++ b/goodreads/book.go @@ -14,21 +14,32 @@ import ( var ( // These are dirty workarounds, but they seem to work - alternativeCoverRegex = regexp.MustCompile(`^<i>.*?[Aa]lternat(iv)?e cover.*?</i>`) - breakTagRegex = regexp.MustCompile(`<br\s*/?>`) - lastBracketRegex = regexp.MustCompile(`^(.*)(\([^\(\)]*\))([^()]*)$`) + + // Regex to match last brackets from title. This is the series. + // e.g. Harry Potter and the Chamber of Secrets (Harry Potter, #2) + titleSeriesRegex = regexp.MustCompile(`\([^)]*#\d+(\.\d+)?\)$`) + + // Regex to match alternative cover preamble in description. + // e.g. Harry Potter and the Chamber of Secrets (Harry Potter, #2) + descriptionAlternativeCoverRegex = regexp.MustCompile(`^<i>.*?[Aa]lternat(iv)?e cover.*?</i>`) + + breakTagRegex = regexp.MustCompile(`<br\s*/?>`) ) type BookOverview struct { - Id string `xml:"id"` - Title string `xml:"title"` - Author string `xml:"author>name"` + Id string `xml:"id"` + FullTitle string `xml:"title"` + Author string `xml:"author>name"` } -func (b *BookOverview) Sanitise() { - // Strip last brackets from title. This is the series. - // e.g. Harry Potter and the Chamber of Secrets (Harry Potter, #2) - b.Title = lastBracketRegex.ReplaceAllString(b.Title, "$1$2") +// Title is the full title with any subtitle and series removed. +func (o BookOverview) Title() string { + return extractTitle(o.FullTitle) +} + +// Subtitle is the subtitle part of the full title with any series removed. +func (o BookOverview) Subtitle() string { + return extractSubtitle(o.FullTitle) } type Book struct { @@ -83,21 +94,14 @@ type Work struct { RatingDistribution string `xml:"rating_dist"` } -// Title is the full title with any subtitle removed. -// A subtitle is anything after the first : in the full title +// Title is the full title with any subtitle and series removed. func (w Work) Title() string { - titleParts := strings.Split(w.FullTitle, ":") - return strings.TrimSpace(titleParts[0]) + return extractTitle(w.FullTitle) } -// Subtitle is the subtle part of the full title. -// A subtitle is anything after the first : in the full title +// Subtitle is the subtitle part of the full title with any series removed. func (w Work) Subtitle() string { - colonIdx := strings.Index(w.FullTitle, ":") - if colonIdx == -1 { - return "" - } - return strings.TrimSpace(w.FullTitle[colonIdx+1:]) + return extractSubtitle(w.FullTitle) } func (w Work) AverageRating() float64 { @@ -122,33 +126,26 @@ type Edition struct { Language string `xml:"language_code"` } -// Title is the full title with any subtitle removed. -// A subtitle is anything after the first : in the full title +// Title is the full title with any subtitle and series removed. func (e Edition) Title() string { - titleParts := strings.Split(e.FullTitle, ":") - return strings.TrimSpace(titleParts[0]) + return extractTitle(e.FullTitle) } -// Subtitle is the subtle part of the full title. -// A subtitle is anything after the first : in the full title +// Subtitle is the subtitle part of the full title with any series removed. func (e Edition) Subtitle() string { - colonIdx := strings.Index(e.FullTitle, ":") - if colonIdx == -1 { - return "" - } - return strings.TrimSpace(e.FullTitle[colonIdx+1:]) + return extractSubtitle(e.FullTitle) } func (e *Edition) Sanitise() { // Description is html and can contain preamble about alternative covers. // Break tags need to be specially handled to add new lines as html2text does // not convert them to new lines properly - e.Description = alternativeCoverRegex.ReplaceAllString(e.Description, "") + e.Description = descriptionAlternativeCoverRegex.ReplaceAllString(e.Description, "") e.Description = breakTagRegex.ReplaceAllString(e.Description, "\n") e.Description = html2text.HTML2TextWithOptions(e.Description, html2text.WithUnixLineBreaks()) e.Description = strings.TrimSpace(e.Description) - // Get original cover image by cleaning the ul0 + // Get original cover image by cleaning the url if strings.Contains(e.ImageURL, "nophoto") { e.ImageURL = "" } else { @@ -175,7 +172,34 @@ func BookIds(books []BookOverview) []string { func BookTitles(books []BookOverview) []string { titles := make([]string, 0, len(books)) for _, book := range books { - titles = append(titles, book.Title) + titles = append(titles, book.Title()) } return titles } + +// extractTitle extracts the title from the full title with any subtitle and series removed. +func extractTitle(fullTitle string) string { + titleParts := strings.Split(fullTitle, ":") + + title := titleParts[0] + title = strings.TrimSpace(title) + title = titleSeriesRegex.ReplaceAllString(title, "") + title = strings.TrimSpace(title) + + return title +} + +// extractTitle extracts the subtitle part of the full title with any series removed. +func extractSubtitle(fullTitle string) string { + colonIdx := strings.Index(fullTitle, ":") + if colonIdx == -1 { + return "" + } + + subtitle := fullTitle[colonIdx+1:] + subtitle = strings.TrimSpace(subtitle) + subtitle = titleSeriesRegex.ReplaceAllString(subtitle, "") + subtitle = strings.TrimSpace(subtitle) + + return subtitle +} diff --git a/goodreads/goodreads_test.go b/goodreads/goodreads_test.go index 15f25b8..16a66e7 100644 --- a/goodreads/goodreads_test.go +++ b/goodreads/goodreads_test.go @@ -11,7 +11,7 @@ import ( const ( TheHobbitId = "5907" - TheHobbitTitle = "The Hobbit" + TheHobbitTitle = "The Hobbit, or There and Back Again" TheHobbitAuthor = "J.R.R. Tolkien" ) @@ -44,7 +44,7 @@ func TestSearchTitleAndAuthor(t *testing.T) { } func checkTheHobbitBookDetails(t *testing.T, book goodreads.Book) { - require.Equal(t, TheHobbitTitle, book.Work.Title()) + require.Equal(t, TheHobbitTitle, book.BestEdition.Title()) require.Equal(t, TheHobbitId, book.BestEdition.Id) require.Regexp(t, "1546071216l/5907.jpg$", book.BestEdition.ImageURL) require.Equal(t, "English", book.BestEdition.Language) diff --git a/goodreads/search.go b/goodreads/search.go index c502d27..2c46bcf 100644 --- a/goodreads/search.go +++ b/goodreads/search.go @@ -185,12 +185,5 @@ func (c *Client) searchBooksSinglePage(ctx context.Context, input searchBooksSin return nil, err } - // Sanitise the books - books := make([]BookOverview, 0, len(unmarshaller.Books)) - for _, book := range unmarshaller.Books { - book.Sanitise() - books = append(books, book) - } - - return books, nil + return unmarshaller.Books, nil }