Skip to content

Commit

Permalink
Remove text fragments from urls
Browse files Browse the repository at this point in the history
  • Loading branch information
taylormonacelli committed Aug 4, 2024
1 parent 45035ef commit e4dbde5
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 84 deletions.
3 changes: 2 additions & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ var linkTransforms = []func(io.Reader, io.Writer) error{
links.RemoveWhitespaceFromMarkdownLinks,
links.RemoveTitlesFromMarkdownLinks,
links.RemoveParamsFromGoogleURLs,
links.RemoveYoutubeParamsFromURLs,
links.RemoveParamsFromYouTubeURLs,
links.RemoveTextFragments,
links.RemoveYouTubeCountFromMarkdownLinks,
}

Expand Down
51 changes: 27 additions & 24 deletions core/links/yt.go → core/links/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,13 @@ import (
"fmt"
"io"
"net/url"
"regexp"
"strings"

"mvdan.cc/xurls/v2"
)

type URLProcessor interface {
Process(url *url.URL) *url.URL
}

type YouTubeURLProcessor struct{}

func (p *YouTubeURLProcessor) Process(u *url.URL) *url.URL {
if isYouTubeURL(u) {
q := u.Query()
q.Del("si")
q.Del("app")
u.RawQuery = q.Encode()
}
return u
}
var textFragmentRegex = regexp.MustCompile(`^:~:text=`)

func isYouTubeURL(u *url.URL) bool {
youTubeDomains := []string{
Expand All @@ -38,14 +25,32 @@ func isYouTubeURL(u *url.URL) bool {
return false
}

func RemoveYoutubeParamsFromURLs(r io.Reader, w io.Writer) error {
processors := []URLProcessor{
&YouTubeURLProcessor{},
}
return processURLs(r, w, processors...)
func RemoveParamsFromYouTubeURLs(r io.Reader, w io.Writer) error {
return processURLs(r, w, func(u *url.URL) *url.URL {
if isYouTubeURL(u) {
q := u.Query()
q.Del("si")
q.Del("app")
u.RawQuery = q.Encode()
}
return u
})
}

func processURLs(r io.Reader, w io.Writer, processors ...URLProcessor) error {
func RemoveTextFragments(r io.Reader, w io.Writer) error {
return processURLs(r, w, func(u *url.URL) *url.URL {
if isTextFragment(u.Fragment) {
u.Fragment = ""
}
return u
})
}

func isTextFragment(fragment string) bool {
return textFragmentRegex.MatchString(fragment)
}

func processURLs(r io.Reader, w io.Writer, processor func(*url.URL) *url.URL) error {
buf, err := io.ReadAll(r)
if err != nil {
return fmt.Errorf("processURLs: failed to read input: %w", err)
Expand All @@ -58,9 +63,7 @@ func processURLs(r io.Reader, w io.Writer, processors ...URLProcessor) error {
return match
}

for _, processor := range processors {
u = processor.Process(u)
}
u = processor(u)

return []byte(u.String())
})
Expand Down
109 changes: 109 additions & 0 deletions core/links/url_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package links

import (
"bytes"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
)

func TestRemoveParamsFromYouTubeURLs(t *testing.T) {
testCases := []struct {
name string
input string
expected string
}{
{
name: "YouTube link with si and app parameters",
input: "https://youtu.be/JSKJbGi5oNA?si=b2GkFDivckm1k-Mq&app=Desktop",
expected: "https://youtu.be/JSKJbGi5oNA",
},
{
name: "YouTube link without si parameter",
input: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
expected: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
},
{
name: "Non-YouTube link",
input: "https://example.com?param=value",
expected: "https://example.com?param=value",
},
{
name: "Multiple YouTube links",
input: `
Check out this video: https://youtu.be/JSKJbGi5oNA?si=b2GkFDivckm1k-Mq
And this one: https://www.youtube.com/watch?v=dQw4w9WgXcQ&si=AnotherParam
`,
expected: `
Check out this video: https://youtu.be/JSKJbGi5oNA
And this one: https://www.youtube.com/watch?v=dQw4w9WgXcQ
`,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
input := strings.NewReader(tc.input)
var output bytes.Buffer
err := RemoveParamsFromYouTubeURLs(input, &output)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
result := output.String()
if diff := cmp.Diff(tc.expected, result); diff != "" {
t.Errorf("Unexpected result (-want +got):\n%s", diff)
}
})
}
}

func TestRemoveTextFragments(t *testing.T) {
testCases := []struct {
name string
input string
expected string
}{
{
name: "Link with text fragment",
input: "https://example.com/article#:~:text=some%20text",
expected: "https://example.com/article",
},
{
name: "Link with regular fragment",
input: "https://example.com/article#heading-1",
expected: "https://example.com/article#heading-1",
},
{
name: "Link without fragment",
input: "https://example.com/article",
expected: "https://example.com/article",
},
{
name: "Multiple links with text fragments",
input: `
Check out this article: https://example.com/article1#:~:text=some%20text
And this one: https://example.com/article2#another-fragment
`,
expected: `
Check out this article: https://example.com/article1
And this one: https://example.com/article2#another-fragment
`,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
input := strings.NewReader(tc.input)
var output bytes.Buffer
err := RemoveTextFragments(input, &output)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
result := output.String()
if diff := cmp.Diff(tc.expected, result); diff != "" {
t.Errorf("Unexpected result (-want +got):\n%s", diff)
}
})
}
}
59 changes: 0 additions & 59 deletions core/links/yt_test.go

This file was deleted.

0 comments on commit e4dbde5

Please sign in to comment.