Skip to content

Commit

Permalink
Added a new fuzzy matching algorithm, it should be better than the pr…
Browse files Browse the repository at this point in the history
…evious version, it's similar to what most editors have (implementation is inspired in the sublime_text fuzzy matcher).
  • Loading branch information
SpartanJ committed Mar 8, 2025
1 parent 0dba4a2 commit dfb0820
Show file tree
Hide file tree
Showing 12 changed files with 305 additions and 59 deletions.
4 changes: 3 additions & 1 deletion include/eepp/core/string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,11 @@ class EE_API String {
*/
static bool icontains( const String& haystack, const String& needle );

static int fuzzyMatch( const std::string& string, const std::string& pattern,
static int fuzzyMatchSimple( const std::string& pattern, const std::string& string,
bool allowUneven = false, bool permissive = false );

static int fuzzyMatch( const std::string& pattern, const std::string& string );

/** Replace all occurrences of the search string with the replacement string. */
static void replaceAll( std::string& target, const std::string& that, const std::string& with );

Expand Down
13 changes: 11 additions & 2 deletions src/eepp/core/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
// #include <eepp/core/simd.hpp>

#include <thirdparty/fast_float/include/fast_float/fast_float.h>
#define FTS_FUZZY_MATCH_IMPLEMENTATION
#include <thirdparty/fts_fuzzy_match/fts_fuzzy_match.h>
#include <thirdparty/utf8cpp/utf8.h>

#include <algorithm>
Expand Down Expand Up @@ -1133,11 +1135,18 @@ constexpr int tFuzzyMatch( const T* str, const T* ptn, bool allowUneven, bool pe
return score - ( permissive ? 0 : strlen( str ) );
}

int String::fuzzyMatch( const std::string& string, const std::string& pattern, bool allowUneven,
bool permissive ) {
int String::fuzzyMatchSimple( const std::string& pattern, const std::string& string,
bool allowUneven, bool permissive ) {
return tFuzzyMatch<char>( string.c_str(), pattern.c_str(), allowUneven, permissive );
}

int String::fuzzyMatch( const std::string& pattern, const std::string& string ) {
int score = std::numeric_limits<int>::min();
uint8_t matches[256];
fts::fuzzy_match( pattern.c_str(), string.c_str(), score, matches, sizeof( matches ) );
return score;
}

std::vector<Uint8> String::stringToUint8( const std::string& str ) {
return std::vector<Uint8>( str.begin(), str.end() );
}
Expand Down
2 changes: 1 addition & 1 deletion src/eepp/ui/doc/languages/html.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ void addHTML() {

},
"",
{ "<html", "<![Dd][Oo][Cc][Tt][Yy][Pp][Ee]%s[Hh][Tt][Mm][Ll]>" }
{ "^<html", "^<![Dd][Oo][Cc][Tt][Yy][Pp][Ee]%s[Hh][Tt][Mm][Ll]>" }

} )
.setAutoCloseXMLTags( true );
Expand Down
2 changes: 1 addition & 1 deletion src/eepp/ui/doc/languages/xml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void addXML() {

},
"",
{ "<%?xml" }
{ "^<%?xml" }

} )
.setAutoCloseXMLTags( true );
Expand Down
221 changes: 221 additions & 0 deletions src/thirdparty/fts_fuzzy_match/fts_fuzzy_match.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
// LICENSE
//
// This software is dual-licensed to the public domain and under the following
// license: you are granted a perpetual, irrevocable license to copy, modify,
// publish, and distribute this file as you see fit.
//
// VERSION
// 0.2.0 (2017-02-18) Scored matches perform exhaustive search for best score
// 0.1.0 (2016-03-28) Initial release
//
// AUTHOR
// Forrest Smith
//
// NOTES
// Compiling
// You MUST add '#define FTS_FUZZY_MATCH_IMPLEMENTATION' before including this header in ONE source file to create implementation.
//
// fuzzy_match_simple(...)
// Returns true if each character in pattern is found sequentially within str
//
// fuzzy_match(...)
// Returns true if pattern is found AND calculates a score.
// Performs exhaustive search via recursion to find all possible matches and match with highest score.
// Scores values have no intrinsic meaning. Possible score range is not normalized and varies with pattern.
// Recursion is limited internally (default=10) to prevent degenerate cases (pattern="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
// Uses uint8_t for match indices. Therefore patterns are limited to 256 characters.
// Score system should be tuned for YOUR use case. Words, sentences, file names, or method names all prefer different tuning.


#ifndef FTS_FUZZY_MATCH_H
#define FTS_FUZZY_MATCH_H


#include <cstdint> // uint8_t
#include <ctype.h> // ::tolower, ::toupper
#include <cstring> // memcpy

#include <cstdio>

// Public interface
namespace fts {
[[maybe_unused]] static bool fuzzy_match_simple(char const * pattern, char const * str);
[[maybe_unused]] static bool fuzzy_match(char const * pattern, char const * str, int & outScore);
[[maybe_unused]] static bool fuzzy_match(char const * pattern, char const * str, int & outScore, uint8_t * matches, int maxMatches);
}


#ifdef FTS_FUZZY_MATCH_IMPLEMENTATION
namespace fts {

// Forward declarations for "private" implementation
namespace fuzzy_internal {
static bool fuzzy_match_recursive(const char * pattern, const char * str, int & outScore, const char * strBegin,
uint8_t const * srcMatches, uint8_t * newMatches, int maxMatches, int nextMatch,
int & recursionCount, int recursionLimit);
}

// Public interface
static bool fuzzy_match_simple(char const * pattern, char const * str) {
while (*pattern != '\0' && *str != '\0') {
if (tolower(*pattern) == tolower(*str))
++pattern;
++str;
}

return *pattern == '\0' ? true : false;
}

static bool fuzzy_match(char const * pattern, char const * str, int & outScore) {

uint8_t matches[256];
return fuzzy_match(pattern, str, outScore, matches, sizeof(matches));
}

static bool fuzzy_match(char const * pattern, char const * str, int & outScore, uint8_t * matches, int maxMatches) {
int recursionCount = 0;
int recursionLimit = 10;

return fuzzy_internal::fuzzy_match_recursive(pattern, str, outScore, str, nullptr, matches, maxMatches, 0, recursionCount, recursionLimit);
}

// Private implementation
static bool fuzzy_internal::fuzzy_match_recursive(const char * pattern, const char * str, int & outScore,
const char * strBegin, uint8_t const * srcMatches, uint8_t * matches, int maxMatches,
int nextMatch, int & recursionCount, int recursionLimit)
{
// Count recursions
++recursionCount;
if (recursionCount >= recursionLimit)
return false;

// Detect end of strings
if (*pattern == '\0' || *str == '\0')
return false;

// Recursion params
bool recursiveMatch = false;
uint8_t bestRecursiveMatches[256];
int bestRecursiveScore = 0;

// Loop through pattern and str looking for a match
bool first_match = true;
while (*pattern != '\0' && *str != '\0') {

// Found match
if (tolower(*pattern) == tolower(*str)) {

// Supplied matches buffer was too short
if (nextMatch >= maxMatches)
return false;

// "Copy-on-Write" srcMatches into matches
if (first_match && srcMatches) {
memcpy(matches, srcMatches, nextMatch);
first_match = false;
}

// Recursive call that "skips" this match
uint8_t recursiveMatches[256];
int recursiveScore;
if (fuzzy_match_recursive(pattern, str + 1, recursiveScore, strBegin, matches, recursiveMatches, sizeof(recursiveMatches), nextMatch, recursionCount, recursionLimit)) {

// Pick best recursive score
if (!recursiveMatch || recursiveScore > bestRecursiveScore) {
memcpy(bestRecursiveMatches, recursiveMatches, 256);
bestRecursiveScore = recursiveScore;
}
recursiveMatch = true;
}

// Advance
matches[nextMatch++] = (uint8_t)(str - strBegin);
++pattern;
}
++str;
}

// Determine if full pattern was matched
bool matched = *pattern == '\0' ? true : false;

// Calculate score
if (matched) {
const int sequential_bonus = 15; // bonus for adjacent matches
const int separator_bonus = 30; // bonus if match occurs after a separator
const int camel_bonus = 30; // bonus if match is uppercase and prev is lower
const int first_letter_bonus = 15; // bonus if the first letter is matched

const int leading_letter_penalty = -5; // penalty applied for every letter in str before the first match
const int max_leading_letter_penalty = -15; // maximum penalty for leading letters
const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter

// Iterate str to end
while (*str != '\0')
++str;

// Initialize score
outScore = 100;

// Apply leading letter penalty
int penalty = leading_letter_penalty * matches[0];
if (penalty < max_leading_letter_penalty)
penalty = max_leading_letter_penalty;
outScore += penalty;

// Apply unmatched penalty
int unmatched = (int)(str - strBegin) - nextMatch;
outScore += unmatched_letter_penalty * unmatched;

// Apply ordering bonuses
for (int i = 0; i < nextMatch; ++i) {
uint8_t currIdx = matches[i];

if (i > 0) {
uint8_t prevIdx = matches[i - 1];

// Sequential
if (currIdx == (prevIdx + 1))
outScore += sequential_bonus;
}

// Check for bonuses based on neighbor character value
if (currIdx > 0) {
// Camel case
char neighbor = strBegin[currIdx - 1];
char curr = strBegin[currIdx];
if (::islower(neighbor) && ::isupper(curr))
outScore += camel_bonus;

// Separator
bool neighborSeparator = neighbor == '_' || neighbor == ' ';
if (neighborSeparator)
outScore += separator_bonus;
}
else {
// First letter
outScore += first_letter_bonus;
}
}
}

// Return best result
if (recursiveMatch && (!matched || bestRecursiveScore > outScore)) {
// Recursive score is better than "this"
memcpy(matches, bestRecursiveMatches, maxMatches);
outScore = bestRecursiveScore;
return true;
}
else if (matched) {
// "this" score is better than recursive
return true;
}
else {
// no match
return false;
}
}
} // namespace fts

#endif // FTS_FUZZY_MATCH_IMPLEMENTATION

#endif // FTS_FUZZY_MATCH_H
16 changes: 9 additions & 7 deletions src/tools/ecode/commandpalette.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void CommandPalette::setCurModel( const std::shared_ptr<CommandPaletteModel>& cu

std::shared_ptr<CommandPaletteModel>
CommandPalette::fuzzyMatch( const std::vector<std::vector<std::string>>& cmdPalette,
const std::string& match, const size_t& max ) const {
const std::string& pattern, const size_t& max ) const {
if ( cmdPalette.empty() )
return {};

Expand All @@ -75,9 +75,11 @@ CommandPalette::fuzzyMatch( const std::vector<std::vector<std::string>>& cmdPale
std::vector<std::vector<std::string>> ret;

for ( size_t i = 0; i < cmdPalette.size(); i++ ) {
int matchName = String::fuzzyMatch( cmdPalette[i][0], match );
int matchKeybind = String::fuzzyMatch( cmdPalette[i][2], match );
matchesMap.insert( { std::max( matchName, matchKeybind ), i } );
int matchName = String::fuzzyMatch( pattern, cmdPalette[i][0] );
int matchKeybind = String::fuzzyMatch( pattern, cmdPalette[i][2] );
int matchScore = std::max( matchName, matchKeybind );
if ( matchScore > std::numeric_limits<int>::min() )
matchesMap.insert( { matchScore, i } );
}
for ( auto& res : matchesMap ) {
if ( ret.size() < max )
Expand All @@ -86,15 +88,15 @@ CommandPalette::fuzzyMatch( const std::vector<std::vector<std::string>>& cmdPale
return CommandPaletteModel::create( 3, ret );
}

void CommandPalette::asyncFuzzyMatch( const std::string& match, const size_t& max,
void CommandPalette::asyncFuzzyMatch( const std::string& pattern, const size_t& max,
MatchResultCb res ) const {
if ( !mCurModel )
return;

mPool->run( [this, match, max, res]() {
mPool->run( [this, pattern, max, res]() {
const std::vector<std::vector<std::string>>& cmdPalette =
mCurModel.get() == mBaseModel.get() ? mCommandPalette : mCommandPaletteEditor;
res( fuzzyMatch( cmdPalette, match, max ) );
res( fuzzyMatch( cmdPalette, pattern, max ) );
} );
}

Expand Down
4 changes: 2 additions & 2 deletions src/tools/ecode/commandpalette.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ class CommandPalette {
static std::shared_ptr<CommandPaletteModel>
asModel( const std::vector<std::string>& commandList, const EE::UI::KeyBindings& keybindings );

void asyncFuzzyMatch( const std::string& match, const size_t& max, MatchResultCb res ) const;
void asyncFuzzyMatch( const std::string& pattern, const size_t& max, MatchResultCb res ) const;

std::shared_ptr<CommandPaletteModel>
fuzzyMatch( const std::vector<std::vector<std::string>>& cmdPalette, const std::string& match,
fuzzyMatch( const std::vector<std::vector<std::string>>& cmdPalette, const std::string& pattern,
const size_t& max ) const;

void setCommandPalette( const std::vector<std::string>& commandList,
Expand Down
10 changes: 6 additions & 4 deletions src/tools/ecode/plugins/autocomplete/autocompleteplugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,21 @@ static json getURIAndPositionJSON( UICodeEditor* editor ) {

static AutoCompletePlugin::SymbolsList
fuzzyMatchSymbols( const std::vector<const AutoCompletePlugin::SymbolsList*>& symbolsVec,
const std::string& match, const size_t& max ) {
const std::string& pattern, const size_t& max ) {
AutoCompletePlugin::SymbolsList matches;
matches.reserve( max );
int score = 0;
for ( const auto& symbols : symbolsVec ) {
for ( const auto& symbol : *symbols ) {
if ( symbol.kind == LSPCompletionItemKind::Snippet ||
( score = String::fuzzyMatch( symbol.text, match, false,
symbol.kind != LSPCompletionItemKind::Text ) ) >
0 ) {
( score = String::fuzzyMatch( pattern, symbol.text ) ) >
std::numeric_limits<int>::min() ) {
if ( std::find( matches.begin(), matches.end(), symbol ) == matches.end() ) {
symbol.setScore( score );
matches.push_back( symbol );

if ( matches.size() > max )
break;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/tools/ecode/plugins/lsp/lspclientplugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ PluginRequestHandle LSPClientPlugin::processWorkspaceSymbol( const PluginMessage
if ( !query.empty() ) {
for ( auto& i : info ) {
if ( i.score == 0.f )
i.score = String::fuzzyMatch( i.name, query );
i.score = String::fuzzyMatch( query, i.name );
}
}
mManager->sendResponse( this, PluginMessageType::WorkspaceSymbol,
Expand All @@ -422,7 +422,7 @@ PluginRequestHandle LSPClientPlugin::processWorkspaceSymbol( const PluginMessage
if ( !query.empty() ) {
for ( auto& i : info ) {
if ( i.score == 0.f )
i.score = String::fuzzyMatch( i.name, query );
i.score = String::fuzzyMatch( query, i.name );
}
}
mManager->sendResponse( this, PluginMessageType::WorkspaceSymbol,
Expand Down
Loading

0 comments on commit dfb0820

Please sign in to comment.