Skip to content

Commit

Permalink
remove fuzzy sets and punctuation removal
Browse files Browse the repository at this point in the history
Signed-off-by: Michael Valdron <[email protected]>
  • Loading branch information
michael-valdron committed Mar 26, 2024
1 parent d0ca430 commit e95086a
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 57 deletions.
67 changes: 34 additions & 33 deletions index/server/pkg/util/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,16 +199,10 @@ func trimExtraSpace(s string) string {
return strings.Join(splitStr, " ")
}

// trimPunc Trims punctuation from a string
func trimPunc(s string) string {
re := regexp.MustCompile(`[^a-zA-Z0-9 ]+`)
return strings.TrimSpace(re.ReplaceAllString(s, " "))
}

// preProcessString pre-process give string to perform fuzzy matching
func preProcessString(s string) string {
sLower := strings.ToLower(s)
return trimPunc(trimExtraSpace(sLower))
return trimExtraSpace(sLower)
}

// preProcessStringTokens gives array of string tokens
Expand All @@ -221,30 +215,15 @@ func preProcessStringTokens(s string) []string {
return []string{}
}

// getFuzzySetFromArray gets a fuzzy pre-processed set from given array
func getFuzzySetFromArray(arr []string) *sets.Set[string] {
preProcessedArray := []string{}

for i := 0; i < len(arr); i++ {
preProcessedString := preProcessString(arr[i])
tokens := preProcessStringTokens(preProcessedString)

preProcessedArray = append(preProcessedArray, tokens...)
preProcessedArray = append(preProcessedArray, preProcessedString)
}

return sets.From(preProcessedArray)
}

// fuzzyMatch fuzzy compare function
func fuzzyMatch(a, b string) bool {
return strings.Contains(preProcessString(a), preProcessString(b))
}

// fuzzyMatchInSet fuzzy compare function on fuzzy pre-processed set
func fuzzyMatchInSet(fuzzySet *sets.Set[string], matchVal string) bool {
return fuzzySet.Contains(preProcessString(matchVal))
}
// func fuzzyMatchInSet(fuzzySet *sets.Set[string], matchVal string) bool {
// return fuzzySet.Contains(preProcessString(matchVal))
// }

// filterDevfileFieldFuzzy filters devfiles based on fuzzy filtering of string fields
func filterDevfileFieldFuzzy(index []indexSchema.Schema, requestedValue string, options FilterOptions[string]) FilterResult {
Expand Down Expand Up @@ -308,14 +287,25 @@ func filterDevfileArrayFuzzy(index []indexSchema.Schema, requestedValues []strin
// else if filtering out based on empty fields is set, set index schema to be filtered out
// (after version filtering if applicable)
if !indexFieldEmptyHandler(fieldValues, requestedValues, options) {
valuesInIndex := getFuzzySetFromArray(fieldValues)

matchAll := true
for _, requestedValue := range requestedValues {
if !fuzzyMatchInSet(valuesInIndex, requestedValue) {
toFilterOutIndex = true
matchFound := false

for _, fieldValue := range fieldValues {
if fuzzyMatch(fieldValue, requestedValue) {
matchFound = true
break
}
}

if !matchFound {
matchAll = false
break
}
}
if !matchAll {
toFilterOutIndex = true
}
} else if options.FilterOutEmpty {
toFilterOutIndex = true
}
Expand All @@ -330,14 +320,25 @@ func filterDevfileArrayFuzzy(index []indexSchema.Schema, requestedValues []strin
// If version schema field is not empty perform fuzzy filtering
// else if filtering out based on empty fields is set, filter out version schema
if !versionFieldEmptyHandler(fieldValues, requestedValues, options) {
valuesInVersion := getFuzzySetFromArray(fieldValues)

matchAll := true
for _, requestedValue := range requestedValues {
if !fuzzyMatchInSet(valuesInVersion, requestedValue) {
filterOut(&filteredVersions, &versionIndex)
matchFound := false

for _, fieldValue := range fieldValues {
if fuzzyMatch(fieldValue, requestedValue) {
matchFound = true
break
}
}

if !matchFound {
matchAll = false
break
}
}
if !matchAll {
filterOut(&filteredVersions, &versionIndex)
}
} else if options.FilterOutEmpty {
filterOut(&filteredVersions, &versionIndex)
}
Expand Down
24 changes: 0 additions & 24 deletions index/server/pkg/util/filter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1853,30 +1853,6 @@ func TestFuzzyMatch(t *testing.T) {
valueB: "Java Springboot",
want: true,
},
{
name: "Match with period",
valueA: "Java Springboot",
valueB: "Java Springboot.",
want: true,
},
{
name: "Match with question mark",
valueA: "Java Springboot",
valueB: "Java Springboot?",
want: true,
},
{
name: "Match with exclamation mark",
valueA: "Java Springboot",
valueB: "Java Springboot!",
want: true,
},
{
name: "Match using a dash",
valueA: "Java Springboot",
valueB: "java-springboot",
want: true,
},
{
name: "Extra space match",
valueA: "Java Springboot",
Expand Down

0 comments on commit e95086a

Please sign in to comment.