diff --git a/cmd/dsl/input.go b/cmd/dsl/input.go index e5e5745c..ab619ddd 100644 --- a/cmd/dsl/input.go +++ b/cmd/dsl/input.go @@ -89,7 +89,7 @@ func pollInputSource( // processInput processes given input replacing any Safe Links encoded URL // with the original value. Other input is returned unmodified. func processInput(txt string, resultsChan chan<- string, errChan chan<- error) { - log.Println("Calling safelinks.SafeLinkURLs(txt)") + log.Println("Calling safelinks.SafeLinkURLs") safeLinks, err := safelinks.SafeLinkURLs(txt) // Failing to find a URL in the input is considered OK. Other errors diff --git a/cmd/dslg/buttons.go b/cmd/dslg/buttons.go index 4d7a26c2..edbbd782 100644 --- a/cmd/dslg/buttons.go +++ b/cmd/dslg/buttons.go @@ -14,6 +14,7 @@ import ( "fyne.io/fyne/v2" "fyne.io/fyne/v2/widget" + "github.com/atc0005/safelinks/internal/safelinks" ) func newCopyButton(w fyne.Window, outputField *widget.Label) *widget.Button { @@ -34,7 +35,7 @@ func newDecodeButton(inputField *widget.Entry, copyButton *widget.Button, errOut log.Println("Decoding requested but no input text provided") copyButton.Disable() - errOutField.Text = errOutTryAgain + errOutField.Text = errOutTryAgain + "\n" errOutField.Refresh() return @@ -42,7 +43,7 @@ func newDecodeButton(inputField *widget.Entry, copyButton *widget.Button, errOut log.Println("Decoding provided input text") - result, err := decodeInput(inputField.Text) + result, err := safelinks.DecodeInput(inputField.Text) switch { case err != nil: errOutField.Append(err.Error() + "\n") @@ -104,7 +105,7 @@ func newAboutButton(_ fyne.Window, inputField *widget.Entry, copyButton *widget. inputField.Text = "" inputField.Refresh() - errOutField.Text = "" + errOutField.Text = "..." errOutField.Refresh() outputField.Text = "Current version:\n\n" + Version() diff --git a/cmd/dslg/decode.go b/cmd/dslg/decode.go deleted file mode 100644 index c7c5e897..00000000 --- a/cmd/dslg/decode.go +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2022 Adam Chalkley -// -// https://github.com/atc0005/safelinks -// -// Licensed under the MIT License. See LICENSE file in the project root for -// full license information. - -package main - -import ( - "log" - "strings" - - "github.com/atc0005/safelinks/internal/safelinks" -) - -// decodeInput processes given input replacing any Safe Links encoded URL -// with the original decoded value. Other input is returned unmodified. -func decodeInput(txt string) (string, error) { - log.Println("Calling safelinks.SafeLinkURLs(txt)") - - safeLinks, err := safelinks.SafeLinkURLs(txt) - if err != nil { - return "", err - } - - modifiedInput := txt - for _, sl := range safeLinks { - modifiedInput = strings.Replace(modifiedInput, sl.EncodedURL, sl.DecodedURL, 1) - } - - return modifiedInput, nil - -} diff --git a/cmd/eslg/buttons.go b/cmd/eslg/buttons.go index 579ed48a..8689efc5 100644 --- a/cmd/eslg/buttons.go +++ b/cmd/eslg/buttons.go @@ -12,7 +12,10 @@ import ( "runtime" "fyne.io/fyne/v2" + "fyne.io/fyne/v2/theme" "fyne.io/fyne/v2/widget" + + "github.com/atc0005/safelinks/internal/safelinks" ) func newCopyButton(w fyne.Window, outputField *widget.Label) *widget.Button { @@ -27,21 +30,52 @@ func newCopyButton(w fyne.Window, outputField *widget.Label) *widget.Button { return copyButton } -func newEncodeButton(inputField *widget.Entry, copyButton *widget.Button, errOutField *widget.Entry, outputField *widget.Label) *widget.Button { - encodeButton := widget.NewButton("Encode", func() { +func newEncodeButton(randomEncode bool, inputField *widget.Entry, copyButton *widget.Button, errOutField *widget.Entry, outputField *widget.Label) *widget.Button { + buttonLabelText := func() string { + if randomEncode { + return "Encode Randomly" + } + return "Encode All" + }() + + encodeButton := newProcessInputButton( + randomEncode, + buttonLabelText, + safelinks.EncodeInput, + inputField, + copyButton, + errOutField, + outputField, + ) + + return encodeButton +} + +func newProcessInputButton( + // TODO: Refactor this to reduce parameters. + randomEscape bool, + buttonLabelText string, + processFunc func(string, bool) (string, error), + inputField *widget.Entry, + copyButton *widget.Button, + errOutField *widget.Entry, + outputField *widget.Label, +) *widget.Button { + + button := widget.NewButton(buttonLabelText, func() { if inputField.Text == "" { - log.Println("Encoding requested but no input text provided") + log.Printf("%s used but no input text provided", buttonLabelText) copyButton.Disable() - errOutField.Text = errOutTryAgain + errOutField.Text = errOutTryAgain + "\n" errOutField.Refresh() return } - log.Println("Encoding provided input text") + log.Printf("%s input text", buttonLabelText) - result, err := encodeInput(inputField.Text) + result, err := processFunc(inputField.Text, randomEscape) switch { case err != nil: errOutField.Append(err.Error() + "\n") @@ -61,9 +95,35 @@ func newEncodeButton(inputField *widget.Entry, copyButton *widget.Button, errOut } }) - encodeButton.Importance = widget.HighImportance + if randomEscape { + button.Importance = widget.MediumImportance + button.Icon = theme.QuestionIcon() + } else { + button.Importance = widget.HighImportance + } - return encodeButton + return button +} + +func newQueryEscapeButton(randomEscape bool, inputField *widget.Entry, copyButton *widget.Button, errOutField *widget.Entry, outputField *widget.Label) *widget.Button { + buttonLabelText := func() string { + if randomEscape { + return "QueryEscape Randomly" + } + return "QueryEscape All" + }() + + queryEscapeButton := newProcessInputButton( + randomEscape, + buttonLabelText, + safelinks.QueryEscapeInput, + inputField, + copyButton, + errOutField, + outputField, + ) + + return queryEscapeButton } func newResetButton(w fyne.Window, inputField *widget.Entry, copyButton *widget.Button, errOutField *widget.Entry, outputField *widget.Label) *widget.Button { @@ -101,7 +161,7 @@ func newAboutButton(_ fyne.Window, inputField *widget.Entry, copyButton *widget. inputField.Text = "" inputField.Refresh() - errOutField.Text = "" + errOutField.Text = "..." errOutField.Refresh() outputField.Text = "Current version:\n\n" + Version() diff --git a/cmd/eslg/encode.go b/cmd/eslg/encode.go deleted file mode 100644 index a6aaad69..00000000 --- a/cmd/eslg/encode.go +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2022 Adam Chalkley -// -// https://github.com/atc0005/safelinks -// -// Licensed under the MIT License. See LICENSE file in the project root for -// full license information. - -package main - -import ( - "log" - "strings" - - "github.com/atc0005/safelinks/internal/safelinks" -) - -// encodeInput processes given input replacing any normal URL with an encoded -// value similar to a real Safe Links value. Other input is returned -// unmodified. -func encodeInput(txt string) (string, error) { - log.Println("Calling URLs(input)") - urls, err := safelinks.URLs(txt) - if err != nil { - return "", err - } - - nonSafeLinkURLs := safelinks.FilterURLs(urls, true) - - log.Printf("%d URLs identified as nonSafeLinkURLs", len(nonSafeLinkURLs)) - - if len(nonSafeLinkURLs) == 0 { - return "", safelinks.ErrNoNonSafeLinkURLsFound - } - - log.Printf("nonSafeLinkURLs URLs (%d):", len(nonSafeLinkURLs)) - for i, u := range nonSafeLinkURLs { - log.Printf("(%2.2d) %s", i+1, u.String()) - } - - modifiedInput := txt - log.Println("Replacing original unencoded URLs") - for _, link := range nonSafeLinkURLs { - fauxSafeLinksURL := safelinks.EncodeURLAsFauxSafeLinksURL(link) - modifiedInput = strings.Replace(modifiedInput, link.String(), fauxSafeLinksURL, 1) - } - - return modifiedInput, nil -} diff --git a/cmd/eslg/main.go b/cmd/eslg/main.go index 7ffe5d09..4ddad09c 100644 --- a/cmd/eslg/main.go +++ b/cmd/eslg/main.go @@ -25,10 +25,13 @@ func main() { log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile) log.SetOutput(debugLoggingOut) - // Help this tool stand out from the dslg app. - if err := os.Setenv("FYNE_THEME", "light"); err != nil { - log.Println("Failed to set fyne toolkit theme") - } + // Help this tool stand out from the dslg app unless explicitly requested + // otherwise. + // if os.Getenv("FYNE_THEME") == "" { + // if err := os.Setenv("FYNE_THEME", "light"); err != nil { + // log.Println("Failed to set fyne toolkit theme") + // } + // } // NOTE: This is deprecated and set to be removed in v3.0. // fyne.CurrentApp().Settings().SetTheme(theme.LightTheme()) @@ -41,14 +44,20 @@ func main() { output := NewOutputTextLabel() copyButton := newCopyButton(w, output) - encodeButton := newEncodeButton(input, copyButton, errorOutput, output) + encodeAllButton := newEncodeButton(false, input, copyButton, errorOutput, output) + encodeRandomButton := newEncodeButton(true, input, copyButton, errorOutput, output) + queryEscapeAllButton := newQueryEscapeButton(false, input, copyButton, errorOutput, output) + queryEscapeRandomButton := newQueryEscapeButton(true, input, copyButton, errorOutput, output) resetButton := newResetButton(w, input, copyButton, errorOutput, output) aboutButton := newAboutButton(w, input, copyButton, errorOutput, output) exitButton := newExitButton(a) buttonRowContainer := NewButtonRowContainer( - encodeButton, + encodeAllButton, + encodeRandomButton, + queryEscapeAllButton, + queryEscapeRandomButton, copyButton, resetButton, aboutButton, diff --git a/cmd/usl/main.go b/cmd/usl/main.go index c7ef163f..c9d52df0 100644 --- a/cmd/usl/main.go +++ b/cmd/usl/main.go @@ -35,7 +35,7 @@ func main() { os.Exit(1) } - input, err := safelinks.ReadURLsFromFile(f) + input, err := safelinks.ReadFromFile(f) if err != nil { fmt.Printf("Failed to read URLs from %q: %v\n", cfg.Filename, err) os.Exit(1) @@ -44,7 +44,7 @@ func main() { inputURLs = input default: - input, err := safelinks.ProcessInputAsURL(cfg.URL) + input, err := ReadURLsFromInput(cfg.URL) if err != nil { fmt.Printf("Failed to parse input as URL: %v\n", err) os.Exit(1) @@ -53,7 +53,7 @@ func main() { inputURLs = input } - hasErr := safelinks.ProcessInputURLs(inputURLs, os.Stdout, os.Stderr, cfg.Verbose) + hasErr := ProcessInputURLs(inputURLs, os.Stdout, os.Stderr, cfg.Verbose) // Ensure unsuccessful error code if one encountered. if hasErr { diff --git a/internal/safelinks/output.go b/cmd/usl/output.go similarity index 98% rename from internal/safelinks/output.go rename to cmd/usl/output.go index 618edbe2..b8f4a6a3 100644 --- a/internal/safelinks/output.go +++ b/cmd/usl/output.go @@ -5,7 +5,7 @@ // Licensed under the MIT License. See LICENSE file in the project root for // full license information. -package safelinks +package main import ( "fmt" diff --git a/cmd/usl/urls.go b/cmd/usl/urls.go new file mode 100644 index 00000000..d75fb587 --- /dev/null +++ b/cmd/usl/urls.go @@ -0,0 +1,117 @@ +// Copyright 2022 Adam Chalkley +// +// https://github.com/atc0005/safelinks +// +// Licensed under the MIT License. See LICENSE file in the project root for +// full license information. + +package main + +import ( + "flag" + "fmt" + "io" + "net/url" + "os" + "strings" + + "github.com/atc0005/safelinks/internal/safelinks" +) + +// ReadURLsFromInput processes a given input string as a URL value. This +// input string represents a single URL given via CLI flag. +// +// If an input string is not provided, this function will attempt to read +// input URLs from stdin. Each input URL is unescaped and quoting removed. +// +// The collection of input URLs is returned or an error if one occurs. +func ReadURLsFromInput(inputURL string) ([]string, error) { + var inputURLs []string + + // https://stackoverflow.com/questions/22744443/check-if-there-is-something-to-read-on-stdin-in-golang + // https://stackoverflow.com/a/26567513/903870 + // stat, _ := os.Stdin.Stat() + // if (stat.Mode() & os.ModeCharDevice) == 0 { + // fmt.Println("data is being piped to stdin") + // } else { + // fmt.Println("stdin is from a terminal") + // } + + stat, _ := os.Stdin.Stat() + + switch { + + // We received one or more URLs via standard input. + case (stat.Mode() & os.ModeCharDevice) == 0: + // fmt.Fprintln(os.Stderr, "Received URL via standard input") + return safelinks.ReadFromFile(os.Stdin) + + // We received a URL via positional argument. We ignore all but the first + // one. + case len(flag.Args()) > 0: + // fmt.Fprintln(os.Stderr, "Received URL via positional argument") + + if strings.TrimSpace(flag.Args()[0]) == "" { + return nil, safelinks.ErrInvalidURL + } + + inputURLs = append(inputURLs, safelinks.CleanURL(flag.Args()[0])) + + // We received a URL via flag. + case inputURL != "": + // fmt.Fprintln(os.Stderr, "Received URL via flag") + + inputURLs = append(inputURLs, safelinks.CleanURL(inputURL)) + + // Input URL not given via positional argument, not given via flag either. + // We prompt the user for a single input value. + default: + // fmt.Fprintln(os.Stderr, "default switch case triggered") + + input, err := safelinks.ReadURLFromUser() + if err != nil { + return nil, fmt.Errorf("error reading URL: %w", err) + } + + if strings.TrimSpace(input) == "" { + return nil, safelinks.ErrInvalidURL + } + + inputURLs = append(inputURLs, safelinks.CleanURL(input)) + } + + return inputURLs, nil +} + +// ProcessInputURLs processes a given collection of input URL strings and +// emits successful decoding results to the specified results output sink. +// Errors are emitted to the specified error output sink if encountered but +// bulk processing continues until all input URLs have been evaluated. +// +// If requested, decoded URLs are emitted in verbose format. +// +// A boolean value is returned indicating whether any errors occurred. +func ProcessInputURLs(inputURLs []string, okOut io.Writer, errOut io.Writer, verbose bool) bool { + var errEncountered bool + + for _, inputURL := range inputURLs { + safelink, err := url.Parse(inputURL) + if err != nil { + fmt.Printf("Failed to parse URL: %v\n", err) + + errEncountered = true + continue + } + + if safelinks.ValidSafeLinkURL(safelink) { + fmt.Fprintf(errOut, "Invalid Safelinks URL %q\n", safelink) + + errEncountered = true + continue + } + + emitOutput(safelink, okOut, verbose) + } + + return errEncountered +} diff --git a/internal/safelinks/errors.go b/internal/safelinks/errors.go index c75b3b49..d197297a 100644 --- a/internal/safelinks/errors.go +++ b/internal/safelinks/errors.go @@ -10,6 +10,9 @@ package safelinks import "errors" var ( + // ErrMissingValue indicates that an expected value was missing. + ErrMissingValue = errors.New("missing expected value") + // ErrInvalidURL indicates that an invalid URL was provided. ErrInvalidURL = errors.New("invalid URL provided") @@ -19,7 +22,7 @@ var ( // ErrNoURLsFound indicates that an attempt to parse an input string for // URLs failed. - ErrNoURLsFound = errors.New("no URLs found in input") + ErrNoURLsFound = errors.New("no URLs matching requirements found in input") // ErrURLNotSafeLinkEncoded indicates that a given URL is not recognized // as using Safe Link encoding. @@ -32,4 +35,12 @@ var ( // ErrNoNonSafeLinkURLsFound indicates that no URLs were found to not // already be encoded as Safe Links. ErrNoNonSafeLinkURLsFound = errors.New("no non-Safe Link URLs found in input") + + // ErrQueryEscapingUnsuccessful indicates that an attempt to query escape + // input was unsuccessful. + ErrQueryEscapingUnsuccessful = errors.New("failed to query escape input") + + // ErrEncodingUnsuccessful indicates that an attempt to encode input was + // unsuccessful. + ErrEncodingUnsuccessful = errors.New("failed to encode input") ) diff --git a/internal/safelinks/init.go b/internal/safelinks/init.go index 00f4faab..2bead806 100644 --- a/internal/safelinks/init.go +++ b/internal/safelinks/init.go @@ -13,6 +13,6 @@ import ( ) func init() { - // Disable logging out by default. + // Disable logging output by default. log.SetOutput(io.Discard) } diff --git a/internal/safelinks/safelinks.go b/internal/safelinks/safelinks.go index f76638e0..225d0084 100644 --- a/internal/safelinks/safelinks.go +++ b/internal/safelinks/safelinks.go @@ -10,7 +10,6 @@ package safelinks import ( "bufio" crand "crypto/rand" - "flag" "fmt" "html" "io" @@ -38,6 +37,12 @@ const ( SafeLinksURLTemplate string = "%s%s/?url=%s&data=%s&sdata=%s&reserved=%s" ) +const ( + // HTTPPlainURLPrefix is the plaintext prefix used for unencrypted + // connections to a HTTP-enabled site/service. + HTTPPlainURLPrefix string = "http://" +) + // FoundURLPattern is an unvalidated URL pattern match found in given input. type FoundURLPattern struct { // Input *string @@ -52,33 +57,76 @@ type SafeLinkURL struct { DecodedURL string } -// ValidURL attempts to validate whether a given input string is a valid URL. -func ValidURL(input string) bool { - if _, err := url.Parse(input); err != nil { +// ParsedURL contains the original matched URL pattern and a parsed URL. +type ParsedURL struct { + // Original is the untrimmed, unmodified original match pattern. + Original string + + // Parsed is the parsed form of the original match pattern *after* it has + // been trimmed of unwanted/invalid characters (e.g., angle brackets, + // period). + Parsed *url.URL +} + +// Trimmed is a copy of the original URL match pattern with unwanted/invalid +// leading/trailing characters removed. +func (pURL ParsedURL) Trimmed() string { + return trimEnclosingURLCharacters(pURL.Original) +} + +// Parsed is a trimmed copy of the original URL match pattern parsed as a +// url.URL value. +// func (pURL ParsedURL) Parsed() string { +// return trimEnclosingURLCharacters(pURL.Original) +// } + +// ValidURLPattern attempts to validate whether a given input string is a +// valid URL. +func ValidURLPattern(input string) bool { + u, err := url.Parse(input) + if err != nil { + // url.Parse is *very* liberal; any parse failure is an immediate + // validation failure. return false } - return true + return ValidURL(u) +} + +// ValidURL attempts to validate whether a given url.URL value is a valid / +// usable URL. On its down url.Parse is *very* forgiving so we apply +// additional checks to ensure the url.URL value meets our minimum +// requirements. +func ValidURL(u *url.URL) bool { + switch { + case u.Host == "": + return false + case u.Scheme == "": + return false + default: + return true + } } // ValidSafeLinkURL validates whether a given url.URL is a valid Safe Links // URL. func ValidSafeLinkURL(u *url.URL) bool { if !strings.Contains(u.Host, SafeLinksBaseDomain) { - log.Printf("FAIL: URL %q fails base domain check", u.String()) + log.Printf("URL %q fails base domain check", u.String()) return false } if err := assertValidURLParameter(u); err != nil { - log.Printf("FAIL: URL %q fails %q parameter check", u.String(), "url") + log.Printf("URL %q fails %q parameter check", u.String(), "url") return false } return true } -// ReadURLFromUser attempts to read a given URL pattern from the user via -// stdin prompt. +// ReadURLFromUser attempts to read input from the user via stdin prompt. The +// user is prompted for a URL but validation of that input is left to the +// caller to perform. func ReadURLFromUser() (string, error) { fmt.Print("Enter URL: ") @@ -94,12 +142,12 @@ func ReadURLFromUser() (string, error) { return scanner.Text(), scanner.Err() } -// ReadURLsFromFile attempts to read URL patterns from a given file +// ReadFromFile attempts to read newline separated entries from a given file // (io.Reader). // -// The collection of input URLs is returned or an error if one occurs. -func ReadURLsFromFile(r io.Reader) ([]string, error) { - var inputURLs []string +// The collection of entries is returned or an error if one occurs. +func ReadFromFile(r io.Reader) ([]string, error) { + var entries []string // Loop over input "reader" and attempt to collect each item. scanner := bufio.NewScanner((r)) @@ -110,93 +158,29 @@ func ReadURLsFromFile(r io.Reader) ([]string, error) { continue } - inputURLs = append(inputURLs, txt) + entries = append(entries, txt) } if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("error reading URLs: %w", err) + return nil, fmt.Errorf("error reading input: %w", err) } - if len(inputURLs) == 0 { - return nil, ErrInvalidURL - } - - return inputURLs, nil -} - -// ProcessInputAsURL processes a given input string as a URL value. This -// input string represents a single URL given via CLI flag. -// -// If an input string is not provided, this function will attempt to read -// input URLs from stdin. Each input URL is unescaped and quoting removed. -// -// The collection of input URLs is returned or an error if one occurs. -func ProcessInputAsURL(inputURL string) ([]string, error) { - var inputURLs []string - - // https://stackoverflow.com/questions/22744443/check-if-there-is-something-to-read-on-stdin-in-golang - // https://stackoverflow.com/a/26567513/903870 - // stat, _ := os.Stdin.Stat() - // if (stat.Mode() & os.ModeCharDevice) == 0 { - // fmt.Println("data is being piped to stdin") - // } else { - // fmt.Println("stdin is from a terminal") - // } - - stat, _ := os.Stdin.Stat() - - switch { - - // We received one or more URLs via standard input. - case (stat.Mode() & os.ModeCharDevice) == 0: - // fmt.Fprintln(os.Stderr, "Received URL via standard input") - return ReadURLsFromFile(os.Stdin) - - // We received a URL via positional argument. We ignore all but the first - // one. - case len(flag.Args()) > 0: - // fmt.Fprintln(os.Stderr, "Received URL via positional argument") - - if strings.TrimSpace(flag.Args()[0]) == "" { - return nil, ErrInvalidURL - } - - inputURLs = append(inputURLs, cleanURL(flag.Args()[0])) - - // We received a URL via flag. - case inputURL != "": - // fmt.Fprintln(os.Stderr, "Received URL via flag") - - inputURLs = append(inputURLs, cleanURL(inputURL)) - - // Input URL not given via positional argument, not given via flag either. - // We prompt the user for a single input value. - default: - // fmt.Fprintln(os.Stderr, "default switch case triggered") - - input, err := ReadURLFromUser() - if err != nil { - return nil, fmt.Errorf("error reading URL: %w", err) - } - - if strings.TrimSpace(input) == "" { - return nil, ErrInvalidURL - } - - inputURLs = append(inputURLs, cleanURL(input)) + if len(entries) == 0 { + return nil, ErrMissingValue } - return inputURLs, nil + return entries, nil } -// cleanURL strips away quoting or escaping of characters in a given URL. -func cleanURL(s string) string { - // Strip off any quoting that may be present. +// CleanURL strips away quoting, escaping of characters or other problematic +// leading or trailing characters in a given URL. +func CleanURL(s string) string { + // Remove potential quoting. s = strings.ReplaceAll(s, `'`, "") s = strings.ReplaceAll(s, `"`, "") - // Strip of potential enclosing angle brackets. - s = strings.Trim(s, `<>`) + // Remove potential invalid leading or trailing characters from URL. + s = trimEnclosingURLCharacters(s) // Replace escaped ampersands with literal ampersands. // inputURL = strings.ReplaceAll(flag.Args()[1], "&", "&") @@ -208,6 +192,23 @@ func cleanURL(s string) string { return s } +func randomBool() bool { + //nolint:gosec,nolintlint // G404: Use of weak random number generator + return mrand.Intn(2) == 0 +} + +// trimEnclosingURLCharacters trims invalid leading or trailing characters +// from given URL. +func trimEnclosingURLCharacters(url string) string { + // Remove potential leading/trailing period. + url = strings.Trim(url, `.`) + + // Remove potential enclosing angle brackets. + url = strings.Trim(url, `<>`) + + return url +} + // assertValidURLParameter requires that the given url.URL contains a // non-empty parameter named url. func assertValidURLParameter(u *url.URL) error { @@ -219,88 +220,69 @@ func assertValidURLParameter(u *url.URL) error { return nil } -// ProcessInputURLs processes a given collection of input URL strings and -// emits successful decoding results to the specified results output sink. -// Errors are emitted to the specified error output sink if encountered but -// bulk processing continues until all input URLs have been evaluated. -// -// If requested, decoded URLs are emitted in verbose format. -// -// A boolean value is returned indicating whether any errors occurred. -func ProcessInputURLs(inputURLs []string, okOut io.Writer, errOut io.Writer, verbose bool) bool { - var errEncountered bool - - for _, inputURL := range inputURLs { - safelink, err := url.Parse(inputURL) - if err != nil { - fmt.Printf("Failed to parse URL: %v\n", err) - - errEncountered = true - continue - } - - if err := assertValidURLParameter(safelink); err != nil { - fmt.Fprintf(errOut, "Invalid Safelinks URL %q: %v\n", safelink, err) - - errEncountered = true - continue - } - - emitOutput(safelink, okOut, verbose) - } - - return errEncountered -} - // GetURLPatternsUsingRegex parses the given input and returns a collection of // FoundURLPattern values. // // Since all Safe Links URLs observed in the wild begin with a HTTPS scheme we -// require that all matched URL patterns begin with that protocol scheme. nil -// is returned if no patterns using that scheme are found. +// require that all matched URL patterns begin with that protocol scheme. If +// specified, non-HTTPS URLs are evaluated also. nil is returned if no +// matching patterns are found. // // NOTE: Validation is not performed to ensure that matched patterns are valid // URLs. // // Internal logic uses a regular expression to match URL patterns optionally -// beginning with a left angle bracket, then 'https://' and ending with a -// whitespace character or a right angle bracket. Any angle brackets present -// are trimmed from returned matches. -// Internal logic uses a regular expression to match URL patterns optionally -// beginning with a left angle bracket, then 'https://' and ending with a -// whitespace character or a right angle bracket. Any angle brackets present -// are trimmed from returned matches. -func GetURLPatternsUsingRegex(input string) ([]FoundURLPattern, error) { +// beginning with a left angle bracket, then 'https://' (or 'http://' if +// specified) and ending with a whitespace character or a right angle bracket. +// The caller is responsible for trimming angle brackets and other unwanted +// characters. +func GetURLPatternsUsingRegex(input string, evalPlainHTTP bool) ([]FoundURLPattern, error) { urlPatterns := make([]FoundURLPattern, 0, 5) - if !strings.Contains(input, SafeLinksURLRequiredPrefix) { + log.Println("Evaluating plain HTTP URLs:", evalPlainHTTP) + + if !hasAcceptableURLPrefix(input, evalPlainHTTP) { return nil, ErrNoURLsFound } - urlRegex := `?` + var urlRegex string + switch { + case evalPlainHTTP: + // urlRegex = `?` + urlRegex = fmt.Sprintf( + `?`, + SafeLinksURLRequiredPrefix, + HTTPPlainURLPrefix, + ) + log.Printf("urlRegex set to also allow plain HTTP prefixes: %q", urlRegex) + + default: + urlRegex = `?` + log.Printf("urlRegex set to disallow plain HTTP prefixes: %q", urlRegex) + } r := regexp.MustCompile(urlRegex) matches := r.FindAllString(input, -1) - log.Printf("Matches: %d\n", len(matches)) - for _, up := range matches { - log.Println(up) + log.Printf("Matches (%d) untrimmed:\n", len(matches)) + for _, m := range matches { + log.Println(m) } - log.Println("Cleaning URLs of enclosing angle brackets") - for i := range matches { - matches[i] = strings.Trim(matches[i], "<>") - } - log.Printf("Matches (%d) trimmed:", len(matches)) - for _, up := range matches { - log.Println(up) - } + // log.Println("Cleaning URLs of invalid leading/trailing characters") + // for i := range matches { + // matches[i] = trimEnclosingURLCharacters(matches[i]) + // } + // log.Printf("Matches (%d) trimmed:", len(matches)) + // for _, m := range matches { + // log.Println(m) + // } - for _, match := range matches { + for _, m := range matches { urlPatterns = append( urlPatterns, FoundURLPattern{ - URLPattern: match, + URLPattern: m, // the caller will handle trimming }, ) } @@ -312,91 +294,68 @@ func GetURLPatternsUsingRegex(input string) ([]FoundURLPattern, error) { // FoundURLPattern values. // // Since all Safe Links URLs observed in the wild begin with a HTTPS scheme we -// require that all matched URL patterns begin with that protocol scheme. nil -// is returned if no patterns using that scheme are found. +// require that all matched URL patterns begin with that protocol scheme. If +// specified, non-HTTPS URLs are evaluated also. nil is returned if no +// matching patterns are found. // // NOTE: Validation has not been performed to ensure that matched patterns are // valid URLs. // // Internal logic uses slice indexing/iteration to match URL patterns -// beginning with 'https://' and ending with a whitespace character or a right -// angle bracket. Any angle brackets present are trimmed from returned -// matches. -func GetURLPatternsUsingIndex(input string) ([]FoundURLPattern, error) { - // urls := make([]url.URL, 0, 5) - urlPatterns := make([]FoundURLPattern, 0, 5) - - if !strings.Contains(input, SafeLinksURLRequiredPrefix) { +// beginning with 'https://' (or optionally 'http://') and ending with a +// whitespace character or a right angle bracket. The caller is responsible +// for trimming angle brackets and other unwanted characters. +func GetURLPatternsUsingIndex(input string, evalAllHTTPURLs bool) ([]FoundURLPattern, error) { + if !strings.Contains(input, SafeLinksURLRequiredPrefix) && !evalAllHTTPURLs { return nil, ErrNoURLsFound } - remaining := input - - for { - urlStart := strings.Index(remaining, SafeLinksURLRequiredPrefix) - log.Println("urlStart:", urlStart) - - if urlStart == -1 { - break - } - - next := urlStart + len(SafeLinksURLRequiredPrefix) + 1 - - // Sanity check to keep from indexing past remaining string length. - if next >= len(remaining) { - break - } - - urlEnd := getURLIndexEndPosition(remaining, next) - - urlPatterns = append( - urlPatterns, - FoundURLPattern{ - // recording for later potential debugging - startPosition: urlStart, - endPosition: urlEnd, + matches, err := getURLPatternsUsingIndex(input, SafeLinksURLRequiredPrefix) + if err != nil { + return nil, err + } - URLPattern: remaining[urlStart:urlEnd], - }, - ) + if evalAllHTTPURLs { + log.Println("Evaluating plain HTTP URLs also") - // Abort further processing if we're at the end of our original input - // string. - if urlEnd+1 >= len(input) { - break + additionalMatches, err := getURLPatternsUsingIndex(input, HTTPPlainURLPrefix) + if err != nil { + return nil, err } - // Otherwise, record the next position as the starting point for - // further URL match evaluation. - remaining = remaining[urlEnd+1:] - + matches = append(matches, additionalMatches...) } - log.Printf("Total URL pattern matches: %d", len(urlPatterns)) - for _, up := range urlPatterns { + log.Printf("Total URL pattern matches: %d", len(matches)) + for _, up := range matches { log.Println(up.URLPattern) } - return urlPatterns, nil + return matches, nil } // GetURLPatternsUsingPrefixMatchingOnFields parses the given input and // returns a collection of FoundURLPattern values. // // Since all Safe Links URLs observed in the wild begin with a HTTPS scheme we -// require that all matched URL patterns begin with that protocol scheme. nil -// is returned if no patterns using that scheme are found. +// require that all matched URL patterns begin with that protocol scheme. If +// specified, non-HTTPS URLs are evaluated also. nil is returned if no +// matching patterns are found. // // NOTE: Validation has not been performed to ensure that matched patterns are // valid URLs. // // Internal logic uses string splitting on whitespace and prefix matching to -// match URL patterns optionally beginning a left angle bracket, then -// 'https://' and ending with a whitespace character. -func GetURLPatternsUsingPrefixMatchingOnFields(input string) ([]FoundURLPattern, error) { +// match URL patterns optionally beginning with a left angle bracket, then +// 'https://' (or 'http://' if specified) and ending with a whitespace +// character. The caller is responsible for trimming angle brackets and other +// unwanted characters. +func GetURLPatternsUsingPrefixMatchingOnFields(input string, evalPlainHTTP bool) ([]FoundURLPattern, error) { urlPatterns := make([]FoundURLPattern, 0, 5) - if !strings.Contains(input, SafeLinksURLRequiredPrefix) { + log.Println("Evaluating plain HTTP URLs:", evalPlainHTTP) + + if !hasAcceptableURLPrefix(input, evalPlainHTTP) { return nil, ErrNoURLsFound } @@ -410,12 +369,29 @@ func GetURLPatternsUsingPrefixMatchingOnFields(input string) ([]FoundURLPattern, URLPattern: field, }, ) + case evalPlainHTTP && strings.HasPrefix(field, HTTPPlainURLPrefix): + urlPatterns = append( + urlPatterns, + FoundURLPattern{ + URLPattern: field, + }, + ) case strings.HasPrefix(field, "<"+SafeLinksURLRequiredPrefix): urlPatterns = append( urlPatterns, FoundURLPattern{ - URLPattern: strings.Trim(field, "<>"), + // URLPattern: strings.Trim(field, "<>"), + URLPattern: field, // the caller will handle trimming + }, + ) + + case evalPlainHTTP && strings.HasPrefix(field, "<"+HTTPPlainURLPrefix): + urlPatterns = append( + urlPatterns, + FoundURLPattern{ + // URLPattern: strings.Trim(field, "<>"), + URLPattern: field, // the caller will handle trimming }, ) } @@ -425,16 +401,27 @@ func GetURLPatternsUsingPrefixMatchingOnFields(input string) ([]FoundURLPattern, return nil, ErrNoURLsFound } + // log.Println("Cleaning URLs of invalid leading/trailing characters") + // for i := range urlPatterns { + // urlPatterns[i].URLPattern = trimEnclosingURLCharacters(urlPatterns[i].URLPattern) + // } + return urlPatterns, nil } -// URLs parses the given input and returns a collection of *url.URL values. +// URLs parses the given input and returns a collection of ParsedURL values. // // Since all Safe Links URLs observed in the wild begin with a HTTPS scheme we -// require that all matched URLs begin with that protocol scheme. nil is -// returned if no valid URLs using that scheme are found. -func URLs(input string) ([]*url.URL, error) { - urls := make([]*url.URL, 0, 5) +// require that all matched URLs begin with that protocol scheme. If +// specified, non-HTTPS URLs are also evaluated. nil is returned if no +// matching patterns are found. +// +// The result is a collection of ParsedURL values containing the original URL +// match pattern and a parsed +func URLs(input string, evalPlainHTTP bool) ([]ParsedURL, error) { + parsedURLs := make([]ParsedURL, 0, 5) + + log.Println("Evaluating plain HTTP URLs:", evalPlainHTTP) // NOTE: Confirmed working with either of: // @@ -446,26 +433,43 @@ func URLs(input string) ([]*url.URL, error) { // URL patterns without a leading space, but GetURLPatternsUsingRegex and // GetURLPatternsUsingIndex do. // - // urlPatterns, err := GetURLPatternsUsingPrefixMatchingOnFields(input) - // urlPatterns, err := GetURLPatternsUsingIndex(input) - - log.Println("Calling GetURLPatternsUsingRegex(input)") - urlPatterns, err := GetURLPatternsUsingRegex(input) + // urlPatterns, err := GetURLPatternsUsingPrefixMatchingOnFields(input, evalPlainHTTP) + // urlPatterns, err := GetURLPatternsUsingIndex(input, evalPlainHTTP) + log.Println("Calling GetURLPatternsUsingRegex") + urlPatterns, err := GetURLPatternsUsingRegex(input, evalPlainHTTP) if err != nil { return nil, err } - for _, pattern := range urlPatterns { - u, err := url.Parse(pattern.URLPattern) + log.Printf("Processing %d matched patterns", len(urlPatterns)) + for _, pattern := range urlPatterns { + trimmedPattern := trimEnclosingURLCharacters(pattern.URLPattern) + u, err := url.Parse(trimmedPattern) if err != nil { + // url.Parse is *very* lenient. Any failure at this point is a + // reliable "skip" indication. continue } - urls = append(urls, u) + + if !ValidURL(u) { + continue + } + + pURL := ParsedURL{ + Original: pattern.URLPattern, + Parsed: u, + } + + log.Printf("Original URL match: %q", pURL.Original) + log.Printf("Trimmed URL match: %q", pURL.Trimmed()) + log.Printf("Parsed URL: %+v", pURL.Parsed.String()) + + parsedURLs = append(parsedURLs, pURL) } - return urls, nil + return parsedURLs, nil } // SafeLinkURLsFromURLs evaluates a given collection of URLs and returns any @@ -486,7 +490,37 @@ func SafeLinkURLsFromURLs(urls []*url.URL) ([]SafeLinkURL, error) { SafeLinkURL{ EncodedURL: u.String(), // DecodedURL: originalURL, - DecodedURL: cleanURL(originalURL), + DecodedURL: CleanURL(originalURL), + }, + ) + } + + if len(safeLinkURLs) == 0 { + return nil, ErrNoSafeLinkURLsFound + } + + return safeLinkURLs, nil +} + +// SafeLinkURLsFromParsedURLs evaluates a given collection of parsed URLs and +// returns any that are found to be encoded as Safe Links. Deduplication is +// *not* performed. An error is returned if no valid matches are found. +func SafeLinkURLsFromParsedURLs(parsedURLs []ParsedURL) ([]SafeLinkURL, error) { + safeLinkURLs := make([]SafeLinkURL, 0, len(parsedURLs)) + + for _, u := range parsedURLs { + if !ValidSafeLinkURL(u.Parsed) { + continue + } + + originalURL := u.Parsed.Query().Get("url") + + safeLinkURLs = append( + safeLinkURLs, + SafeLinkURL{ + EncodedURL: u.Parsed.String(), + // DecodedURL: originalURL, + DecodedURL: CleanURL(originalURL), }, ) } @@ -503,14 +537,14 @@ func SafeLinkURLsFromURLs(urls []*url.URL) ([]SafeLinkURL, error) { // // An error is returned if no valid matches are found. func SafeLinkURLs(input string) ([]SafeLinkURL, error) { - log.Println("Calling URLs(input)") - urls, err := URLs(input) + log.Println("Calling URLs") + urls, err := URLs(input, false) if err != nil { return nil, err } - log.Println("Calling SafeLinkURLsFromURLs(urls)") - return SafeLinkURLsFromURLs(urls) + log.Println("Calling SafeLinkURLsFromURLs") + return SafeLinkURLsFromParsedURLs(urls) } // FromURLs evaluates a given collection of URLs and returns a collection of @@ -540,6 +574,78 @@ func getURLIndexEndPosition(input string, startPos int) int { return endPos } +// getURLPatternsUsingIndex performs the bulk of the work for the exported +// GetURLPatternsUsingIndex function. See that function's doc comments for +// further details. +func getURLPatternsUsingIndex(input string, urlPrefix string) ([]FoundURLPattern, error) { + urlPatterns := make([]FoundURLPattern, 0, 5) + + remaining := input + + for { + urlStart := strings.Index(remaining, urlPrefix) + log.Println("urlStart:", urlStart) + + if urlStart == -1 { + break + } + + next := urlStart + len(urlPrefix) + 1 + + // Sanity check to keep from indexing past remaining string length. + if next >= len(remaining) { + break + } + + urlEnd := getURLIndexEndPosition(remaining, next) + + urlPatterns = append( + urlPatterns, + FoundURLPattern{ + // recording for later potential debugging + startPosition: urlStart, + endPosition: urlEnd, + + URLPattern: remaining[urlStart:urlEnd], // the caller will handle trimming + }, + ) + + // Abort further processing if we're at the end of our original input + // string. + if urlEnd+1 >= len(input) { + break + } + + // Otherwise, record the next position as the starting point for + // further URL match evaluation. + remaining = remaining[urlEnd+1:] + } + + // log.Println("Cleaning URLs of invalid leading/trailing characters") + // for i := range urlPatterns { + // urlPatterns[i].URLPattern = trimEnclosingURLCharacters(urlPatterns[i].URLPattern) + // } + + return urlPatterns, nil +} + +// hasAcceptableURLPrefix accepts an input string and an indication of whether +// a plain HTTP prefix should be considered OK alongside the existing +// Safe Links URL required prefix. +func hasAcceptableURLPrefix(input string, evalPlainHTTP bool) bool { + hasSafeLinksURL := strings.Contains(input, SafeLinksURLRequiredPrefix) + hasPlainHTTPURL := strings.Contains(input, HTTPPlainURLPrefix) + + switch { + case hasSafeLinksURL: + return true + case hasPlainHTTPURL && evalPlainHTTP: + return true + default: + return false + } +} + // GetRandomSafeLinksFQDN returns a pseudorandom FQDN from a list observed to // be associated with Safe Links URLs. Entries in the list have a naming // pattern of *.safelinks.protection.outlook.com. @@ -576,15 +682,15 @@ func GetRandomSafeLinksFQDN() string { return strings.Join([]string{subdomain, SafeLinksBaseDomain}, ".") } -// EncodeURLAsFauxSafeLinksURL encodes the provided url.URL in a format that -// mimics real Safe Links encoded URLs observed in the wild. This output is -// intended for use with testing encoding/decoding behavior. -func EncodeURLAsFauxSafeLinksURL(u *url.URL) string { +// EncodeParsedURLAsFauxSafeLinksURL encodes the provided ParsedURL in a +// format that mimics real Safe Links encoded URLs observed in the wild. This +// output is intended for use with testing encoding/decoding behavior. +func EncodeParsedURLAsFauxSafeLinksURL(pURL ParsedURL) string { return fmt.Sprintf( SafeLinksURLTemplate, SafeLinksURLRequiredPrefix, GetRandomSafeLinksFQDN(), - url.QueryEscape(u.String()), + url.QueryEscape(pURL.Trimmed()), "data_placeholder", "sdata_placeholder", "0", // 0 is the only value observed in the wild thus far. @@ -635,3 +741,164 @@ func FilterURLs(urls []*url.URL, excludeSafeLinkURLs bool) []*url.URL { return remaining } + +// FilterParsedURLs filters the given collection of parsed URLs, returning the +// remaining parsed URLs or an error if none remain after filtering. +// +// If specified, Safe Link URLs are excluded from the collection returning +// only URLs that have not been encoded as Safe Links URLs. Otherwise, only +// URLs that have been encoded as Safe Links URLs are returned. +// +// An empty collection is returned if no URLs remain after filtering. +func FilterParsedURLs(parsedURLs []ParsedURL, excludeSafeLinkURLs bool) []ParsedURL { + remaining := make([]ParsedURL, 0, len(parsedURLs)) + + keepSafeLinksURLs := !excludeSafeLinkURLs + keepNonSafeLinksURLs := excludeSafeLinkURLs + + for _, pURL := range parsedURLs { + if ValidSafeLinkURL(pURL.Parsed) { + log.Printf("URL identified as Safe Links encoded (orig): %q", pURL.Original) + + switch { + case keepSafeLinksURLs: + log.Printf("Retaining Safe Links encoded URL %q as requested", pURL.Original) + remaining = append(remaining, pURL) + default: + log.Printf("Skipping Safe Links encoded URL %q as requested", pURL.Original) + } + + continue + } + + log.Printf("URL not identified as Safe Links encoded: %q", pURL.Original) + + switch { + case keepNonSafeLinksURLs: + log.Printf("Retaining unencoded URL %q as requested", pURL.Original) + remaining = append(remaining, pURL) + + continue + default: + log.Printf("Skipping unencoded URL %q as requested", pURL.Original) + } + } + + return remaining +} + +// DecodeInput processes given input replacing any Safe Links encoded URL +// with the original decoded value. Other input is returned unmodified. +func DecodeInput(txt string) (string, error) { + log.Println("Calling SafeLinkURLs") + + safeLinks, err := SafeLinkURLs(txt) + if err != nil { + return "", err + } + + modifiedInput := txt + + // URLs are "cleaned" of problematic leading and trailing characters as + // part of retrieving them and asserting that they're in the expected + // format of Safe Links URLs. In order to safely match and replace the + // original encoded URL we also have to perform that same URL cleaning + // step. This helps handle edge cases where an original URL match applies + // to more characters than intended. + for _, sl := range safeLinks { + cleanedOriginalURL := trimEnclosingURLCharacters(sl.EncodedURL) + modifiedInput = strings.Replace(modifiedInput, cleanedOriginalURL, sl.DecodedURL, 1) + } + + return modifiedInput, nil +} + +// EncodeInput processes given input replacing any normal URL with an encoded +// value similar to a real Safe Links value. Other input is returned +// unmodified. +func EncodeInput(txt string, randomlyEncode bool) (string, error) { + log.Println("Calling URLs") + urls, err := URLs(txt, true) + if err != nil { + return "", err + } + + nonSafeLinkURLs := FilterParsedURLs(urls, true) + + log.Printf("%d URLs identified as nonSafeLinkURLs", len(nonSafeLinkURLs)) + + if len(nonSafeLinkURLs) == 0 { + return "", ErrNoNonSafeLinkURLsFound + } + + log.Printf("nonSafeLinkURLs URLs (%d):", len(nonSafeLinkURLs)) + for i, u := range nonSafeLinkURLs { + log.Printf("(%2.2d) %s", i+1, u.Original) + } + + modifiedInput := txt + log.Printf("Replacing original unencoded URLs (randomly: %t)", randomlyEncode) + shouldEncode := true + for _, pURL := range nonSafeLinkURLs { + if randomlyEncode { + shouldEncode = randomBool() + } + + if shouldEncode { + cleanedOriginalURL := pURL.Trimmed() + fauxSafeLinksURL := EncodeParsedURLAsFauxSafeLinksURL(pURL) + modifiedInput = strings.Replace(modifiedInput, cleanedOriginalURL, fauxSafeLinksURL, 1) + } + } + + if modifiedInput == txt { + return "", fmt.Errorf("encoded output matches input: %w", ErrEncodingUnsuccessful) + } + + return modifiedInput, nil +} + +// QueryEscapeInput processes given input replacing any normal URL with an +// escaped string so it can be safely placed inside a URL query. Other input +// is returned unmodified. +func QueryEscapeInput(txt string, randomlyEscape bool) (string, error) { + log.Println("Calling URLs") + urls, err := URLs(txt, true) + if err != nil { + return "", err + } + + nonSafeLinkURLs := FilterParsedURLs(urls, true) + + log.Printf("%d URLs identified as nonSafeLinkURLs", len(nonSafeLinkURLs)) + + if len(nonSafeLinkURLs) == 0 { + return "", ErrNoNonSafeLinkURLsFound + } + + log.Printf("nonSafeLinkURLs URLs (%d):", len(nonSafeLinkURLs)) + for i, u := range nonSafeLinkURLs { + log.Printf("(%2.2d) %s", i+1, u.Original) + } + + modifiedInput := txt + log.Printf("Replacing original unencoded URLs (randomly: %t)", randomlyEscape) + shouldQueryEscape := true + for _, pURL := range nonSafeLinkURLs { + if randomlyEscape { + shouldQueryEscape = randomBool() + } + + if shouldQueryEscape { + cleanedOriginalURL := pURL.Trimmed() + queryEscapedURL := url.QueryEscape(cleanedOriginalURL) + modifiedInput = strings.Replace(modifiedInput, cleanedOriginalURL, queryEscapedURL, 1) + } + } + + if modifiedInput == txt { + return "", fmt.Errorf("encoded output matches input: %w", ErrQueryEscapingUnsuccessful) + } + + return modifiedInput, nil +}