Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor support for decoding text streams #210

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 39 additions & 35 deletions cmd/dsl/prototyping.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,41 +155,7 @@ func pollInputSource(
continue
}

log.Println("Calling safelinks.SafeLinkURLs(txt)")
safeLinks, err := safelinks.SafeLinkURLs(txt)

// Failing to find a URL in the input is considered OK. Other errors
// result in aborting the decode attempt.
//
// TODO: This behavior needs further testing.
//
// It's likely that we will wish to continue processing further lines
// and not abort early.
//
switch {
case errors.Is(err, safelinks.ErrNoURLsFound):
resultsChan <- txt
case errors.Is(err, safelinks.ErrNoSafeLinkURLsFound):
resultsChan <- txt
case err != nil:
errChan <- err

return
default:
// TESTING
// fmt.Printf("%d Safe Links:\n", len(safeLinks))
// for _, sl := range safeLinks {
// fmt.Printf("\tOriginal: %s\n\tDecoded: %s\n\n", sl.EncodedURL, sl.DecodedURL)
// }

modifiedInput := txt

for _, sl := range safeLinks {
modifiedInput = strings.Replace(modifiedInput, sl.EncodedURL, sl.DecodedURL, 1)
}

resultsChan <- modifiedInput
}
processInput(txt, resultsChan, errChan)

}

Expand All @@ -207,3 +173,41 @@ func pollInputSource(
done <- true
}
}

func processInput(txt string, resultsChan chan<- string, errChan chan<- error) {
log.Println("Calling safelinks.SafeLinkURLs(txt)")
safeLinks, err := safelinks.SafeLinkURLs(txt)

// Failing to find a URL in the input is considered OK. Other errors
// result in aborting the decode attempt.
//
// TODO: This behavior needs further testing.
//
// It's likely that we will wish to continue processing further lines
// and not abort early.
//
switch {
case errors.Is(err, safelinks.ErrNoURLsFound):
resultsChan <- txt
case errors.Is(err, safelinks.ErrNoSafeLinkURLsFound):
resultsChan <- txt
case err != nil:
errChan <- err

return
default:
// TESTING
// fmt.Printf("%d Safe Links:\n", len(safeLinks))
// for _, sl := range safeLinks {
// fmt.Printf("\tOriginal: %s\n\tDecoded: %s\n\n", sl.EncodedURL, sl.DecodedURL)
// }

modifiedInput := txt

for _, sl := range safeLinks {
modifiedInput = strings.Replace(modifiedInput, sl.EncodedURL, sl.DecodedURL, 1)
}

resultsChan <- modifiedInput
}
}
44 changes: 24 additions & 20 deletions internal/safelinks/prototyping.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,6 @@ func GetURLPatternsUsingRegex(input string) ([]FoundURLPattern, error) {
return nil, ErrNoURLsFound
}

// NOTE: This is broken.
//
// r := regexp.MustCompile(`https://.*\s`)
// r := regexp.MustCompile(`(https://.*)(?:\s|\n)?`)
// r := regexp.MustCompile(`https://(?:[^/.\s]+)*(?:/[^/\s]+)*/?`)

// urlRegex := `^(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/|\/|\/\/)?[A-z0-9_-]*?[:]?[A-z0-9_-]*?[@]?[A-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$`
// urlRegex := `(http:\/\/www\.|https:\/\/www\.|http:\/\/|https:\/\/|\/|\/\/)?[A-z0-9_-]*?[:]?[A-z0-9_-]*?[@]?[A-z0-9]+([\-\.]{1}[a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?`

// This works but would match regular http:// prefixes:
//
// https://www.honeybadger.io/blog/a-definitive-guide-to-regular-expressions-in-go/
Expand Down Expand Up @@ -135,14 +126,16 @@ func GetURLPatternsUsingIndex(input string) ([]FoundURLPattern, error) {
}

// Assume we found ending point until proven otherwise.
urlEnd := next
// urlEnd := next

for _, char := range remaining[next:] {
if unicode.IsSpace(char) {
break // we found end of URL pattern
}
urlEnd++
}
// for _, char := range remaining[next:] {
// if unicode.IsSpace(char) {
// break // we found end of URL pattern
// }
// urlEnd++
// }

urlEnd := getURLIndexEndPosition(remaining[next:], next)

urlPatterns = append(
urlPatterns,
Expand Down Expand Up @@ -221,7 +214,6 @@ func URLs(input string) ([]*url.URL, error) {
//
// urlPatterns, err := GetURLPatternsUsingIndex(input)
// urlPatterns, err := GetURLPatternsUsingPrefixMatchingOnFields(input)

urlPatterns, err := GetURLPatternsUsingRegex(input)
if err != nil {
return nil, err
Expand Down Expand Up @@ -291,6 +283,18 @@ func FromURLs(urls []*url.URL) ([]SafeLinkURL, error) {
return SafeLinkURLsFromURLs(urls)
}

// func ReadInputFromUser() (string, error) {
//
// }
// getURLIndexEndPosition accepts an input string and a starting position and
// iterates until it finds the first space character. This is assumed to be
// the separator used to indicate the end of a URL pattern.
func getURLIndexEndPosition(input string, startPos int) int {
endPos := startPos

for _, char := range input[startPos:] {
if unicode.IsSpace(char) {
break // we found end of URL pattern
}
endPos++
}

return endPos
}