Skip to content

Commit

Permalink
deps: update simdutf to 4.0.9
Browse files Browse the repository at this point in the history
PR-URL: #51655
Reviewed-By: Luigi Pinca <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Ulises Gascón <[email protected]>
  • Loading branch information
nodejs-github-bot authored and marco-ippolito committed May 2, 2024
1 parent 01aaddd commit 18838f2
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 42 deletions.
74 changes: 36 additions & 38 deletions deps/simdutf/simdutf.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2023-12-01 13:59:01 -0500. Do not edit! */
/* auto-generated on 2024-01-29 10:40:15 -0500. Do not edit! */
/* begin file src/simdutf.cpp */
#include "simdutf.h"
/* begin file src/implementation.cpp */
Expand Down Expand Up @@ -1151,7 +1151,7 @@ namespace icelake {
// We should not get warnings while including <x86intrin.h> yet we do
// under some versions of GCC.
// If the x86intrin.h header has uninitialized values that are problematic,
// it is a GCC issue, we want to ignore these warnigns.
// it is a GCC issue, we want to ignore these warnings.
SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized)
#endif

Expand Down Expand Up @@ -1568,7 +1568,7 @@ class implementation final : public simdutf::implementation {
// We should not get warnings while including <x86intrin.h> yet we do
// under some versions of GCC.
// If the x86intrin.h header has uninitialized values that are problematic,
// it is a GCC issue, we want to ignore these warnigns.
// it is a GCC issue, we want to ignore these warnings.
SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized)
#endif

Expand Down Expand Up @@ -2498,7 +2498,7 @@ class implementation final : public simdutf::implementation {
// We should not get warnings while including <x86intrin.h> yet we do
// under some versions of GCC.
// If the x86intrin.h header has uninitialized values that are problematic,
// it is a GCC issue, we want to ignore these warnigns.
// it is a GCC issue, we want to ignore these warnings.
SIMDUTF_DISABLE_GCC_WARNING(-Wuninitialized)
#endif

Expand Down Expand Up @@ -11655,7 +11655,7 @@ inline result convert_with_errors(const char* buf, size_t len, char16_t* utf16_o
*
* The caller is responsible to ensure that len > 0.
*
* If the error is believed to have occured prior to 'buf', the count value contain in the result
* If the error is believed to have occurred prior to 'buf', the count value contain in the result
* will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3.
*/
template <endianness endian>
Expand Down Expand Up @@ -11934,7 +11934,7 @@ inline result convert_with_errors(const char* buf, size_t len, char32_t* utf32_o
*
* The caller is responsible to ensure that len > 0.
*
* If the error is believed to have occured prior to 'buf', the count value contain in the result
* If the error is believed to have occurred prior to 'buf', the count value contain in the result
* will be SIZE_T - 1, SIZE_T - 2, or SIZE_T - 3.
*/
inline result rewind_and_convert_with_errors(size_t prior_bytes, const char* buf, size_t len, char32_t* utf32_output) {
Expand Down Expand Up @@ -16084,11 +16084,11 @@ using namespace simd;
// 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
// and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
// much more than 8 bytes. However, you cannot generally assume that you have valid
// UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
// UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
// to give us a good margin.
size_t leading_byte = 0;
size_t margin = size;
for(; margin > 0 && leading_byte < 4; margin--) {
for(; margin > 0 && leading_byte < 8; margin--) {
leading_byte += (int8_t(in[margin-1]) > -65);
}
// If the input is long enough, then we have that margin-1 is the fourth last leading byte.
Expand Down Expand Up @@ -16158,11 +16158,11 @@ using namespace simd;
// 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
// and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
// much more than 8 bytes. However, you cannot generally assume that you have valid
// UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
// UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
// to give us a good margin.
size_t leading_byte = 0;
size_t margin = size;
for(; margin > 0 && leading_byte < 4; margin--) {
for(; margin > 0 && leading_byte < 8; margin--) {
leading_byte += (int8_t(in[margin-1]) > -65);
}
// If the input is long enough, then we have that margin-1 is the fourth last leading byte.
Expand Down Expand Up @@ -17930,7 +17930,7 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
__m512i indexofthirdlastbytes = _mm512_add_epi16(mask_ffffffff,
indexofsecondlastbytes); // indices of the second last bytes
__m512i thirdlastbyte = _mm512_maskz_mov_epi8(m34,
clearedbytes); // only those that are the third last byte of a sequece
clearedbytes); // only those that are the third last byte of a sequence
__m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8(0x5555555555555555, indexofthirdlastbytes,
thirdlastbyte); // the third last bytes (of three byte sequences, hi
// surrogate)
Expand Down Expand Up @@ -17992,7 +17992,7 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
__m512i indexofthirdlastbytes = _mm512_add_epi16(mask_ffffffff,
indexofsecondlastbytes); // indices of the second last bytes
__m512i thirdlastbyte = _mm512_maskz_mov_epi8(m34,
clearedbytes); // only those that are the third last byte of a sequece
clearedbytes); // only those that are the third last byte of a sequence
__m512i thirdlastbytes = _mm512_maskz_permutexvar_epi8(0x5555555555555555, indexofthirdlastbytes,
thirdlastbyte); // the third last bytes (of three byte sequences, hi
// surrogate)
Expand Down Expand Up @@ -18048,7 +18048,7 @@ simdutf_really_inline bool process_block_utf8_to_utf16(const char *&in, char16_t
}
// Fast path 2: all ASCII or 2 byte
__mmask64 continuation_or_ascii = (tail == SIMDUTF_FULL) ? _knot_mask64(m234) : _kand_mask64(_knot_mask64(m234), b);
// on top of -0xc0 we substract -2 which we get back later of the
// on top of -0xc0 we subtract -2 which we get back later of the
// continuation byte tags
__m512i leading2byte = _mm512_maskz_sub_epi8(m234, input, mask_c2c2c2c2);
__mmask64 leading = tail == (tail == SIMDUTF_FULL) ? _kor_mask64(m1, m234) : _kand_mask64(_kor_mask64(m1, m234), b); // first bytes of each sequence
Expand Down Expand Up @@ -18296,7 +18296,7 @@ __m512i rotate_by_N_epi8(const __m512i input) {
stored at separate 32-bit lanes.

For each lane we have also a character class (`char_class), given in form
0x8080800N, where N is 4 higest bits from the leading byte; 0x80 resets
0x8080800N, where N is 4 highest bits from the leading byte; 0x80 resets
corresponding bytes during pshufb.
*/
simdutf_really_inline __m512i expanded_utf8_to_utf32(__m512i char_class, __m512i utf8) {
Expand Down Expand Up @@ -19214,7 +19214,7 @@ simdutf_really_inline size_t process_block_from_utf8_to_latin1(const char *buf,
// _mm512_storeu_si512((__m512i *)latin_output, output); I tried using
// _mm512_storeu_si512 and have the next process_block start from the
// "written_out" point but the compiler shuffles memory in such a way that it
// is signifcantly slower...
// is significantly slower...
// ****************************
_mm512_mask_storeu_epi8((__m512i *)latin_output, store_mask, output);

Expand Down Expand Up @@ -22028,10 +22028,9 @@ simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd
}

simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be > 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be > 0x80
return simd8<bool>(is_third_byte | is_fourth_byte);
}

/* begin file src/haswell/avx2_detect_encodings.cpp */
Expand Down Expand Up @@ -25495,11 +25494,11 @@ using namespace simd;
// 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
// and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
// much more than 8 bytes. However, you cannot generally assume that you have valid
// UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
// UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
// to give us a good margin.
size_t leading_byte = 0;
size_t margin = size;
for(; margin > 0 && leading_byte < 4; margin--) {
for(; margin > 0 && leading_byte < 8; margin--) {
leading_byte += (int8_t(in[margin-1]) > -65);
}
// If the input is long enough, then we have that margin-1 is the fourth last leading byte.
Expand Down Expand Up @@ -25569,11 +25568,11 @@ using namespace simd;
// 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
// and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
// much more than 8 bytes. However, you cannot generally assume that you have valid
// UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
// UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
// to give us a good margin.
size_t leading_byte = 0;
size_t margin = size;
for(; margin > 0 && leading_byte < 4; margin--) {
for(; margin > 0 && leading_byte < 8; margin--) {
leading_byte += (int8_t(in[margin-1]) > -65);
}
// If the input is long enough, then we have that margin-1 is the fourth last leading byte.
Expand Down Expand Up @@ -26887,10 +26886,10 @@ simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd
}

simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
return simd8<bool>(is_third_byte | is_fourth_byte);
}

} // unnamed namespace
Expand Down Expand Up @@ -27867,11 +27866,11 @@ using namespace simd;
// 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
// and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
// much more than 8 bytes. However, you cannot generally assume that you have valid
// UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
// UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
// to give us a good margin.
size_t leading_byte = 0;
size_t margin = size;
for(; margin > 0 && leading_byte < 4; margin--) {
for(; margin > 0 && leading_byte < 8; margin--) {
leading_byte += (int8_t(in[margin-1]) > -65);
}
// If the input is long enough, then we have that margin-1 is the fourth last leading byte.
Expand Down Expand Up @@ -27941,11 +27940,11 @@ using namespace simd;
// 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
// and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
// much more than 8 bytes. However, you cannot generally assume that you have valid
// UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
// UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
// to give us a good margin.
size_t leading_byte = 0;
size_t margin = size;
for(; margin > 0 && leading_byte < 4; margin--) {
for(; margin > 0 && leading_byte < 8; margin--) {
leading_byte += (int8_t(in[margin-1]) > -65);
}
// If the input is long enough, then we have that margin-1 is the fourth last leading byte.
Expand Down Expand Up @@ -28407,10 +28406,9 @@ simdutf_unused simdutf_really_inline simd8<bool> must_be_continuation(const simd
}

simdutf_really_inline simd8<bool> must_be_2_3_continuation(const simd8<uint8_t> prev2, const simd8<uint8_t> prev3) {
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0b11100000u-1); // Only 111_____ will be > 0
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0b11110000u-1); // Only 1111____ will be > 0
// Caller requires a bool (all 1's). All values resulting from the subtraction will be <= 64, so signed comparison is fine.
return simd8<int8_t>(is_third_byte | is_fourth_byte) > int8_t(0);
simd8<uint8_t> is_third_byte = prev2.saturating_sub(0xe0u-0x80); // Only 111_____ will be >= 0x80
simd8<uint8_t> is_fourth_byte = prev3.saturating_sub(0xf0u-0x80); // Only 1111____ will be >= 0x80
return simd8<bool>(is_third_byte | is_fourth_byte);
}

/* begin file src/westmere/internal/loader.cpp */
Expand Down Expand Up @@ -31874,11 +31872,11 @@ using namespace simd;
// 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
// and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
// much more than 8 bytes. However, you cannot generally assume that you have valid
// UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
// UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
// to give us a good margin.
size_t leading_byte = 0;
size_t margin = size;
for(; margin > 0 && leading_byte < 4; margin--) {
for(; margin > 0 && leading_byte < 8; margin--) {
leading_byte += (int8_t(in[margin-1]) > -65);
}
// If the input is long enough, then we have that margin-1 is the fourth last leading byte.
Expand Down Expand Up @@ -31948,11 +31946,11 @@ using namespace simd;
// 8 bytes when calling convert_masked_utf8_to_utf32. If you skip the last 16 bytes,
// and if the data is valid, then it is entirely safe because 16 UTF-8 bytes generate
// much more than 8 bytes. However, you cannot generally assume that you have valid
// UTF-8 input, so we are going to go back from the end counting 4 leading bytes,
// UTF-8 input, so we are going to go back from the end counting 8 leading bytes,
// to give us a good margin.
size_t leading_byte = 0;
size_t margin = size;
for(; margin > 0 && leading_byte < 4; margin--) {
for(; margin > 0 && leading_byte < 8; margin--) {
leading_byte += (int8_t(in[margin-1]) > -65);
}
// If the input is long enough, then we have that margin-1 is the fourth last leading byte.
Expand Down
8 changes: 4 additions & 4 deletions deps/simdutf/simdutf.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2023-12-01 13:59:01 -0500. Do not edit! */
/* auto-generated on 2024-01-29 10:40:15 -0500. Do not edit! */
/* begin file include/simdutf.h */
#ifndef SIMDUTF_H
#define SIMDUTF_H
Expand Down Expand Up @@ -567,7 +567,7 @@ SIMDUTF_DISABLE_UNDESIRED_WARNINGS
#define SIMDUTF_SIMDUTF_VERSION_H

/** The version of simdutf being used (major.minor.revision) */
#define SIMDUTF_VERSION "4.0.8"
#define SIMDUTF_VERSION "4.0.9"

namespace simdutf {
enum {
Expand All @@ -582,7 +582,7 @@ enum {
/**
* The revision (major.minor.REVISION) of simdutf being used.
*/
SIMDUTF_VERSION_REVISION = 8
SIMDUTF_VERSION_REVISION = 9
};
} // namespace simdutf

Expand Down Expand Up @@ -874,7 +874,7 @@ simdutf_really_inline simdutf_warn_unused simdutf::encoding_type autodetect_enco
* E.g., if the input might be UTF-16LE or UTF-8, this function returns
* the value (simdutf::encoding_type::UTF8 | simdutf::encoding_type::UTF16_LE).
*
* Overriden by each implementation.
* Overridden by each implementation.
*
* @param input the string to analyze.
* @param length the length of the string in bytes.
Expand Down

0 comments on commit 18838f2

Please sign in to comment.