rslice.go

/*
package rslice provides some common []rune patterns for moving runes around
within a slice, justifying embedded text, etc

Charles <asciifaceman> Corbett 2023

MIT License
*/
package rslice

import (
	"fmt"
	"unicode"
)

// Whitespace returns true if the entire []rune is whitespace
// It will also return true if the slice is empty
func Whitespace(slice []rune) bool {
	for _, char := range slice {
		if !unicode.IsSpace(char) {
			return false
		}
	}
	return true
}

// Words returns a count of non-whitespace groupings of characters
// that may or may not be a word
//
// Can be useful to discover how many areas of whitespace you have for
// purposes such as full text justification across a width
//
// Does not recognize control characters (such as \n or \t) as non-whitespace
// characters as per unicode stdlib
func Words(slice []rune) int {
	count := 0
	word := false

	for _, char := range slice {
		if !unicode.IsSpace(char) {
			if !word {
				word = true
				count++
			}
		} else {
			if word {
				word = false
			}
		}
	}
	return count

}

// Valid returns true a slice has width and is not all whitespace
func Valid(slice []rune) bool {
	if len(slice) > 0 && !Whitespace(slice) {
		return true
	}

	return false
}

// ShiftLeft shifts the rune slice one to the left and returns a copy
// if the slice is not all whitespace
func ShiftLeft(slice []rune) []rune {
	if !Valid(slice) {
		return slice
	}

	return append(slice[1:], slice[0])
}

// ShiftRight shits the rune slice one to the right and returns a copy
// if the slice is not all whitespace
func ShiftRight(slice []rune) []rune {
	if !Valid(slice) {
		return slice
	}

	return append(slice[len(slice)-1:], slice[:len(slice)-1]...)
}

// ShiftWhitespaceLeft shifts any whitespace right of the last non-whitespace
// character to the left of the first non-whitespace character in the rune slice
// and returns a copy if the slice is not all whitespace
func ShiftWhitespaceLeft(slice []rune) []rune {
	if !Valid(slice) {
		return slice
	}

	if unicode.IsSpace(slice[len(slice)-1]) {
		slice = ShiftRight(slice)
		return ShiftWhitespaceLeft(slice)
	} else {
		return slice
	}
}

// ShiftWhitespaceRight shifts any whitespace left of the first non-whitespace
// character to the right of the last non-whitespace character in the rune sluce
// and returns a copy if the slice is not all whitespace
func ShiftWhitespaceRight(slice []rune) []rune {
	if !Valid(slice) {
		return slice
	}

	if unicode.IsSpace(slice[0]) {
		slice = ShiftLeft(slice)
		return ShiftWhitespaceRight(slice)
	} else {
		return slice
	}
}

// Newline returns true if the given rune is a Linux, Darwin, or Windows newline character
func Newline(r rune) bool {
	if unicode.IsControl(r) {
		if r == rune('\r') || r == rune('\n') {
			return true
		}
	}
	return false
}

// TrimExcessWhitespace will remove any occurance of whitespace greater
// than one count
func TrimExcessWhitespace(slice []rune) []rune {
	count := 0
	for i, r := range slice {
		if unicode.IsSpace(r) {
			count++
		} else {
			count = 0
		}

		if count > 1 {
			if i == 1 {
				slice = slice[1:]
			} else {
				slice = append(slice[:i-1], slice[i:]...)
			}
			return TrimExcessWhitespace(slice)
		}

	}

	return slice
}

// LeastWhitespaceIndex returns an index point of a []rune with
// the least whitespace between the left and right
// most characters
//
// It will wait until it has passed at least one non-whitespace character
// before recording the potential index.
//
// A return of -1 indicates there is no suitable index, an example
// string would be ` a ` which has no whitespace between two non-whitespace
// characters
//
// Currently this function will trigger an ignore on whitespace after a control
// character is encountered until the next non-whitespace non-control character
// and effectively erase any whitespace between the previous non-ws/non-cc rune
// to prevent returning an index between or before a control character wh
func LeastWhitespaceIndex(slice []rune) int {
	var idx int
	count := len(slice)
	word := false
	ignore := false

	subcount := count
	for i, r := range slice {
		if unicode.IsControl(r) {
			// ignore any new whitespaces until the
			// next non-whitespace character
			ignore = true
			continue
		}
		if !unicode.IsSpace(r) {
			if !word {
				word = true
				if !ignore {
					// count the index if not ignored
					if subcount < count {
						idx = i
						count = subcount
					}
					subcount = 0
				} else {
					// if ignored reset the count to terminate
					// any whitespace since the last valid word char
					subcount = 0
				}
			}
			// disable ignore if it is a word boundary
			ignore = false
			subcount = 0
		} else {
			if word {
				word = false
			}
			// increase count of this segment of whitespace
			subcount++
		}
	}
	return idx - 1
}

/*
NormalizeWhitespace takes the left and right whitespace of the given
rune slice and spreads it across the interior whitespace of the rune
slice between the inner and outer most non-whitespace character

Maintains the []rune's width

Returns the slice unchanged if the []rune contains only whitespace,
has no length, or has fewer than 2 words since there would be no
inner whitespace to utilize

Usage:
```go

	s := []rune("    A string with whitespace to the left and right    ")
	s = rslice.NormalizeWhitespace(s)

	// s should now be "A  string  with  whitespace  to  the   left and  right"

```
*/
func NormalizeWhitespace(slice []rune) []rune {
	wordCount := Words(slice)

	if Whitespace(slice) || len(slice) < 1 || wordCount < 2 {
		return slice
	}

	slice = ShiftWhitespaceLeft(slice)
	slice = Normalize(slice)

	return slice
}

/*
Normalize is a recursive function that will take all whitespace left
of the left most non-whitespace and non-control-character space and move it
somewhere in the interior starting on the left most interior and working in

Normalize maintains the []rune's width

Example
```go

	s := []rune("    A string with whitespace to the left")
	s = rslice.Normalize(s)

	// s should now be "A  string  with  whitespace  to the left"

```
*/
func Normalize(slice []rune) []rune {
	if !Valid(slice) {
		return slice
	}

	if unicode.IsSpace(slice[0]) {
		d := LeastWhitespaceIndex(slice)
		if d == -1 {
			//  need to find a test condition that will
			// cause a -1 maybe after deep iteration?
			return slice
		}
		fmt.Println(d)
		slice = slice[1:]
		slice = append(slice[:d+1], slice[d:]...)
		slice[d] = rune(' ')
		return Normalize(slice)
	} else {
		return slice
	}
}