Skip to content

Commit

Permalink
Clean substack urls
Browse files Browse the repository at this point in the history
  • Loading branch information
taylormonacelli committed Aug 16, 2024
1 parent b77dd8b commit 67cb0ef
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 0 deletions.
1 change: 1 addition & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ var linkTransforms = []func(io.Reader, io.Writer) error{
links.RemoveTitlesFromMarkdownLinks,
links.RemoveParamsFromGoogleURLs,
links.RemoveParamsFromYouTubeURLs,
links.RemoveParamsFromSubstackURLs,
links.RemoveTextFragments,
links.RemoveYouTubeCountFromMarkdownLinks,
}
Expand Down
13 changes: 13 additions & 0 deletions core/links/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ func isYouTubeURL(u *url.URL) bool {
return false
}

func isSubstackURL(u *url.URL) bool {
return strings.HasSuffix(strings.ToLower(u.Hostname()), ".substack.com")
}

func RemoveParamsFromYouTubeURLs(r io.Reader, w io.Writer) error {
return processURLs(r, w, func(u *url.URL) *url.URL {
if isYouTubeURL(u) {
Expand All @@ -37,6 +41,15 @@ func RemoveParamsFromYouTubeURLs(r io.Reader, w io.Writer) error {
})
}

func RemoveParamsFromSubstackURLs(r io.Reader, w io.Writer) error {
return processURLs(r, w, func(u *url.URL) *url.URL {
if isSubstackURL(u) {
u.RawQuery = ""
}
return u
})
}

func RemoveTextFragments(r io.Reader, w io.Writer) error {
return processURLs(r, w, func(u *url.URL) *url.URL {
if isTextFragment(u.Fragment) {
Expand Down
50 changes: 50 additions & 0 deletions core/links/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,56 @@ And this one: https://www.youtube.com/watch?v=dQw4w9WgXcQ
}
}

func TestRemoveParamsFromSubstackURLs(t *testing.T) {
testCases := []struct {
name string
input string
expected string
}{
{
name: "Substack link with parameters",
input: "https://example.substack.com/p/article-title?utm_source=twitter&utm_medium=social",
expected: "https://example.substack.com/p/article-title",
},
{
name: "Substack link without parameters",
input: "https://another.substack.com/p/another-article",
expected: "https://another.substack.com/p/another-article",
},
{
name: "Non-Substack link",
input: "https://example.com?param=value",
expected: "https://example.com?param=value",
},
{
name: "Multiple Substack links",
input: `
Check out this article: https://first.substack.com/p/title?utm_source=twitter
And this one: https://second.substack.com/p/another-title?utm_campaign=post
`,
expected: `
Check out this article: https://first.substack.com/p/title
And this one: https://second.substack.com/p/another-title
`,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
input := strings.NewReader(tc.input)
var output bytes.Buffer
err := RemoveParamsFromSubstackURLs(input, &output)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
result := output.String()
if diff := cmp.Diff(tc.expected, result); diff != "" {
t.Errorf("Unexpected result (-want +got):\n%s", diff)
}
})
}
}

func TestRemoveTextFragments(t *testing.T) {
testCases := []struct {
name string
Expand Down

0 comments on commit 67cb0ef

Please sign in to comment.