From 8927650e0338499d4f3a76a71a0b72cb116a25d9 Mon Sep 17 00:00:00 2001 From: Vlad Voloshyn Date: Sun, 9 Jan 2022 04:34:44 +0300 Subject: [PATCH] =?UTF-8?q?=F0=9F=9A=80=20performance=20optimization=20(PR?= =?UTF-8?q?=20#2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Minor performance optimizations: (PR #2 by @NewEXE) 1. avoid count() usage in a loop 2. pass string for ASCII converting by ref --- mb_levenshtein.php | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/mb_levenshtein.php b/mb_levenshtein.php index 1ff523d..6aee1d2 100644 --- a/mb_levenshtein.php +++ b/mb_levenshtein.php @@ -60,8 +60,8 @@ function mb_levenshtein_ratio($s1, $s2, $cost_ins = 1, $cost_rep = 1, $cost_del function mb_levenshtein($s1, $s2, $cost_ins = 1, $cost_rep = 1, $cost_del = 1) { $charMap = array(); - $s1 = convert_mb_ascii($s1, $charMap); - $s2 = convert_mb_ascii($s2, $charMap); + convert_mb_ascii($s1, $charMap); + convert_mb_ascii($s2, $charMap); return levenshtein($s1, $s2, $cost_ins, $cost_rep, $cost_del); } @@ -82,25 +82,27 @@ function mb_levenshtein($s1, $s2, $cost_ins = 1, $cost_rep = 1, $cost_del = 1) * @param string $str UTF-8 string to be converted to extended ASCII. * @param array $map Reference of the map. * - * @return string Extended ASCII + * @return void */ -function convert_mb_ascii($str, &$map) +function convert_mb_ascii(&$str, &$map) { // find all utf-8 characters $matches = array(); if (! preg_match_all('/[\xC0-\xF7][\x80-\xBF]+/', $str, $matches)) { - return $str; // plain ascii string + return; // plain ascii string } // update the encoding map with the characters not already met + $mapCount = count($map); foreach ($matches[0] as $mbc) { if (! isset($map[$mbc])) { - $map[$mbc] = chr(128 + count($map)); + $map[$mbc] = chr(128 + $mapCount); + $mapCount++; } } // finally remap non-ascii characters - return strtr($str, $map); + $str = strtr($str, $map); } }