diff --git a/cmd/root.go b/cmd/root.go index eb6ae99..e06740d 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -25,7 +25,8 @@ var linkTransforms = []func(io.Reader, io.Writer) error{ links.RemoveWhitespaceFromMarkdownLinks, links.RemoveTitlesFromMarkdownLinks, links.RemoveParamsFromGoogleURLs, - links.RemoveYoutubeParamsFromURLs, + links.RemoveParamsFromYouTubeURLs, + links.RemoveTextFragments, links.RemoveYouTubeCountFromMarkdownLinks, } diff --git a/core/links/yt.go b/core/links/url.go similarity index 53% rename from core/links/yt.go rename to core/links/url.go index dc52421..bf8a9f8 100644 --- a/core/links/yt.go +++ b/core/links/url.go @@ -4,26 +4,13 @@ import ( "fmt" "io" "net/url" + "regexp" "strings" "mvdan.cc/xurls/v2" ) -type URLProcessor interface { - Process(url *url.URL) *url.URL -} - -type YouTubeURLProcessor struct{} - -func (p *YouTubeURLProcessor) Process(u *url.URL) *url.URL { - if isYouTubeURL(u) { - q := u.Query() - q.Del("si") - q.Del("app") - u.RawQuery = q.Encode() - } - return u -} +var textFragmentRegex = regexp.MustCompile(`^:~:text=`) func isYouTubeURL(u *url.URL) bool { youTubeDomains := []string{ @@ -38,14 +25,32 @@ func isYouTubeURL(u *url.URL) bool { return false } -func RemoveYoutubeParamsFromURLs(r io.Reader, w io.Writer) error { - processors := []URLProcessor{ - &YouTubeURLProcessor{}, - } - return processURLs(r, w, processors...) +func RemoveParamsFromYouTubeURLs(r io.Reader, w io.Writer) error { + return processURLs(r, w, func(u *url.URL) *url.URL { + if isYouTubeURL(u) { + q := u.Query() + q.Del("si") + q.Del("app") + u.RawQuery = q.Encode() + } + return u + }) } -func processURLs(r io.Reader, w io.Writer, processors ...URLProcessor) error { +func RemoveTextFragments(r io.Reader, w io.Writer) error { + return processURLs(r, w, func(u *url.URL) *url.URL { + if isTextFragment(u.Fragment) { + u.Fragment = "" + } + return u + }) +} + +func isTextFragment(fragment string) bool { + return textFragmentRegex.MatchString(fragment) +} + +func processURLs(r io.Reader, w io.Writer, processor func(*url.URL) *url.URL) error { buf, err := io.ReadAll(r) if err != nil { return fmt.Errorf("processURLs: failed to read input: %w", err) @@ -58,9 +63,7 @@ func processURLs(r io.Reader, w io.Writer, processors ...URLProcessor) error { return match } - for _, processor := range processors { - u = processor.Process(u) - } + u = processor(u) return []byte(u.String()) }) diff --git a/core/links/url_test.go b/core/links/url_test.go new file mode 100644 index 0000000..eb06ba1 --- /dev/null +++ b/core/links/url_test.go @@ -0,0 +1,109 @@ +package links + +import ( + "bytes" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestRemoveParamsFromYouTubeURLs(t *testing.T) { + testCases := []struct { + name string + input string + expected string + }{ + { + name: "YouTube link with si and app parameters", + input: "https://youtu.be/JSKJbGi5oNA?si=b2GkFDivckm1k-Mq&app=Desktop", + expected: "https://youtu.be/JSKJbGi5oNA", + }, + { + name: "YouTube link without si parameter", + input: "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + expected: "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + }, + { + name: "Non-YouTube link", + input: "https://example.com?param=value", + expected: "https://example.com?param=value", + }, + { + name: "Multiple YouTube links", + input: ` +Check out this video: https://youtu.be/JSKJbGi5oNA?si=b2GkFDivckm1k-Mq +And this one: https://www.youtube.com/watch?v=dQw4w9WgXcQ&si=AnotherParam +`, + expected: ` +Check out this video: https://youtu.be/JSKJbGi5oNA +And this one: https://www.youtube.com/watch?v=dQw4w9WgXcQ +`, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + input := strings.NewReader(tc.input) + var output bytes.Buffer + err := RemoveParamsFromYouTubeURLs(input, &output) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + result := output.String() + if diff := cmp.Diff(tc.expected, result); diff != "" { + t.Errorf("Unexpected result (-want +got):\n%s", diff) + } + }) + } +} + +func TestRemoveTextFragments(t *testing.T) { + testCases := []struct { + name string + input string + expected string + }{ + { + name: "Link with text fragment", + input: "https://example.com/article#:~:text=some%20text", + expected: "https://example.com/article", + }, + { + name: "Link with regular fragment", + input: "https://example.com/article#heading-1", + expected: "https://example.com/article#heading-1", + }, + { + name: "Link without fragment", + input: "https://example.com/article", + expected: "https://example.com/article", + }, + { + name: "Multiple links with text fragments", + input: ` +Check out this article: https://example.com/article1#:~:text=some%20text +And this one: https://example.com/article2#another-fragment +`, + expected: ` +Check out this article: https://example.com/article1 +And this one: https://example.com/article2#another-fragment +`, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + input := strings.NewReader(tc.input) + var output bytes.Buffer + err := RemoveTextFragments(input, &output) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + result := output.String() + if diff := cmp.Diff(tc.expected, result); diff != "" { + t.Errorf("Unexpected result (-want +got):\n%s", diff) + } + }) + } +} diff --git a/core/links/yt_test.go b/core/links/yt_test.go deleted file mode 100644 index 66ed5ec..0000000 --- a/core/links/yt_test.go +++ /dev/null @@ -1,59 +0,0 @@ -package links - -import ( - "bytes" - "strings" - "testing" - - "github.com/google/go-cmp/cmp" -) - -func TestRemoveParamsFromYouTubeLinks(t *testing.T) { - testCases := []struct { - name string - input string - expected string - }{ - { - name: "YouTube link with si and app parameters", - input: "https://youtu.be/JSKJbGi5oNA?si=b2GkFDivckm1k-Mq&app=Desktop", - expected: "https://youtu.be/JSKJbGi5oNA", - }, - { - name: "YouTube link without si parameter", - input: "https://www.youtube.com/watch?v=dQw4w9WgXcQ", - expected: "https://www.youtube.com/watch?v=dQw4w9WgXcQ", - }, - { - name: "Non-YouTube link", - input: "https://example.com?param=value", - expected: "https://example.com?param=value", - }, - { - name: "Multiple YouTube links", - input: ` -Check out this video: https://youtu.be/JSKJbGi5oNA?si=b2GkFDivckm1k-Mq -And this one: https://www.youtube.com/watch?v=dQw4w9WgXcQ&si=AnotherParam -`, - expected: ` -Check out this video: https://youtu.be/JSKJbGi5oNA -And this one: https://www.youtube.com/watch?v=dQw4w9WgXcQ -`, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - input := strings.NewReader(tc.input) - var output bytes.Buffer - err := RemoveYoutubeParamsFromURLs(input, &output) - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } - result := output.String() - if diff := cmp.Diff(tc.expected, result); diff != "" { - t.Errorf("Unexpected result (-want +got):\n%s", diff) - } - }) - } -}