|
4 | 4 | hasUnquotedSubstring,
|
5 | 5 | fastHash,
|
6 | 6 | replaceAll,
|
| 7 | + getUtf8EncodedLength, |
7 | 8 | } from '../../src/utils/string-utils';
|
8 | 9 |
|
9 | 10 | describe('splitByDelimiterWithEscapeCharacter', () => {
|
@@ -135,55 +136,55 @@ describe('fastHash', () => {
|
135 | 136 | });
|
136 | 137 | });
|
137 | 138 |
|
138 |
| -describe('stringUtils.getUtf8EncodedLength', () => { |
| 139 | +describe('getUtf8EncodedLength', () => { |
139 | 140 | it('should return 0 for an empty string', () => {
|
140 |
| - expect(stringUtils.getUtf8EncodedLength('')).toBe(0); |
| 141 | + expect(getUtf8EncodedLength('')).toBe(0); |
141 | 142 | });
|
142 | 143 |
|
143 | 144 | it('should return 1 for a single ASCII character', () => {
|
144 | 145 | // ASCII character
|
145 |
| - expect(stringUtils.getUtf8EncodedLength('A')).toBe(1); |
| 146 | + expect(getUtf8EncodedLength('A')).toBe(1); |
146 | 147 | });
|
147 | 148 |
|
148 | 149 | it('should return the correct byte length for a string with multiple ASCII characters', () => {
|
149 | 150 | // "Hello" consists of 5 ASCII characters
|
150 |
| - expect(stringUtils.getUtf8EncodedLength('Hello')).toBe(5); |
| 151 | + expect(getUtf8EncodedLength('Hello')).toBe(5); |
151 | 152 | });
|
152 | 153 |
|
153 | 154 | it('should return 2 for a 2-byte UTF-8 character (e.g., é)', () => {
|
154 | 155 | // 'é' is a 2-byte character in UTF-8
|
155 |
| - expect(stringUtils.getUtf8EncodedLength('é')).toBe(2); |
| 156 | + expect(getUtf8EncodedLength('é')).toBe(2); |
156 | 157 | });
|
157 | 158 |
|
158 | 159 | it('should return 3 for a 3-byte UTF-8 character (e.g., 中)', () => {
|
159 | 160 | // '中' is a 3-byte character in UTF-8
|
160 |
| - expect(stringUtils.getUtf8EncodedLength('中')).toBe(3); |
| 161 | + expect(getUtf8EncodedLength('中')).toBe(3); |
161 | 162 | });
|
162 | 163 |
|
163 | 164 | it('should return 4 for a 4-byte UTF-8 character (e.g., smiley emoji)', () => {
|
164 | 165 | // '😄' is a 4-byte character in UTF-8 (surrogate pair)
|
165 |
| - expect(stringUtils.getUtf8EncodedLength('😄')).toBe(4); |
| 166 | + expect(getUtf8EncodedLength('😄')).toBe(4); |
166 | 167 | });
|
167 | 168 |
|
168 | 169 | it('should handle mixed strings with ASCII and multi-byte UTF-8 characters', () => {
|
169 | 170 | // "Hello " = 6 bytes, 'é' = 2 bytes
|
170 |
| - expect(stringUtils.getUtf8EncodedLength('Hello é')).toBe(8); |
| 171 | + expect(getUtf8EncodedLength('Hello é')).toBe(8); |
171 | 172 | });
|
172 | 173 |
|
173 | 174 | it('should handle surrogate pairs correctly', () => {
|
174 | 175 | const complexEmoji = '👨👩👧👦'; // Family emoji
|
175 |
| - expect(stringUtils.getUtf8EncodedLength(complexEmoji)).toBe(25); // Surrogate pairs and ZWJ |
| 176 | + expect(getUtf8EncodedLength(complexEmoji)).toBe(25); // Surrogate pairs and ZWJ |
176 | 177 | });
|
177 | 178 |
|
178 | 179 | it('should return correct byte length for characters in different byte ranges', () => {
|
179 | 180 | // 'A' = 1 byte, 'é' = 2 bytes, '中' = 3 bytes, '😄' = 4 bytes
|
180 |
| - expect(stringUtils.getUtf8EncodedLength('Aé中😄')).toBe(10); |
| 181 | + expect(getUtf8EncodedLength('Aé中😄')).toBe(10); |
181 | 182 | });
|
182 | 183 |
|
183 | 184 | it('should correctly handle special characters like newline, tabs, etc.', () => {
|
184 | 185 | // Newline is a single byte in UTF-8
|
185 |
| - expect(stringUtils.getUtf8EncodedLength('\n')).toBe(1); |
| 186 | + expect(getUtf8EncodedLength('\n')).toBe(1); |
186 | 187 | // Tab is a single byte in UTF-8
|
187 |
| - expect(stringUtils.getUtf8EncodedLength('\t')).toBe(1); |
| 188 | + expect(getUtf8EncodedLength('\t')).toBe(1); |
188 | 189 | });
|
189 | 190 | });
|
0 commit comments