-
Notifications
You must be signed in to change notification settings - Fork 4.9k
/
Copy pathpal_casing.c
156 lines (139 loc) · 5.47 KB
/
pal_casing.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
//
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include "pal_icushim_internal.h"
#include "pal_casing.h"
#ifdef __clang__
// Workaround for warnings produced by U16_NEXT and U16_APPEND macro expansions
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
/*
Function:
ChangeCase
Performs upper or lower casing of a string into a new buffer.
No special casing is performed beyond that provided by ICU.
*/
void GlobalizationNative_ChangeCase(
const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
{
// Iterate through the string, decoding the next one or two UTF-16 code units
// into a codepoint and updating srcIdx to point to the next UTF-16 code unit
// to decode. Then upper or lower case it, write dstCodepoint into lpDst at
// offset dstIdx, and update dstIdx.
// (The loop here has been manually cloned for each of the four cases, rather
// than having a single loop that internally branched based on bToUpper as the
// compiler wasn't doing that optimization, and it results in an ~15-20% perf
// improvement on longer strings.)
UBool isError = false;
(void)isError; // only used for assert
int32_t srcIdx = 0, dstIdx = 0;
UChar32 srcCodepoint, dstCodepoint;
if (bToUpper)
{
while (srcIdx < cwSrcLength)
{
U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
dstCodepoint = u_toupper(srcCodepoint);
U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
assert(isError == false && srcIdx == dstIdx);
}
}
else
{
while (srcIdx < cwSrcLength)
{
U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
dstCodepoint = u_tolower(srcCodepoint);
U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
assert(isError == false && srcIdx == dstIdx);
}
}
}
/*
Function:
ChangeCaseInvariant
Performs upper or lower casing of a string into a new buffer.
Special casing is performed to ensure that invariant casing
matches that of Windows in certain situations, e.g. Turkish i's.
*/
void GlobalizationNative_ChangeCaseInvariant(
const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
{
// See algorithmic comment in ChangeCase.
UBool isError = false;
(void)isError; // only used for assert
int32_t srcIdx = 0, dstIdx = 0;
UChar32 srcCodepoint, dstCodepoint;
if (bToUpper)
{
while (srcIdx < cwSrcLength)
{
// On Windows with InvariantCulture, the LATIN SMALL LETTER DOTLESS I (U+0131)
// capitalizes to itself, whereas with ICU it capitalizes to LATIN CAPITAL LETTER I (U+0049).
// We special case it to match the Windows invariant behavior.
U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
dstCodepoint = ((srcCodepoint == (UChar32)0x0131) ? (UChar32)0x0131 : u_toupper(srcCodepoint));
U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
assert(isError == false && srcIdx == dstIdx);
}
}
else
{
while (srcIdx < cwSrcLength)
{
// On Windows with InvariantCulture, the LATIN CAPITAL LETTER I WITH DOT ABOVE (U+0130)
// lower cases to itself, whereas with ICU it lower cases to LATIN SMALL LETTER I (U+0069).
// We special case it to match the Windows invariant behavior.
U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
dstCodepoint = ((srcCodepoint == (UChar32)0x0130) ? (UChar32)0x0130 : u_tolower(srcCodepoint));
U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
assert(isError == false && srcIdx == dstIdx);
}
}
}
/*
Function:
ChangeCaseTurkish
Performs upper or lower casing of a string into a new buffer, performing special
casing for Turkish.
*/
void GlobalizationNative_ChangeCaseTurkish(
const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength, int32_t bToUpper)
{
// See algorithmic comment in ChangeCase.
UBool isError = false;
(void)isError; // only used for assert
int32_t srcIdx = 0, dstIdx = 0;
UChar32 srcCodepoint, dstCodepoint;
if (bToUpper)
{
while (srcIdx < cwSrcLength)
{
// In turkish casing, LATIN SMALL LETTER I (U+0069) upper cases to LATIN
// CAPITAL LETTER I WITH DOT ABOVE (U+0130).
U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
dstCodepoint = ((srcCodepoint == (UChar32)0x0069) ? (UChar32)0x0130 : u_toupper(srcCodepoint));
U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
assert(isError == false && srcIdx == dstIdx);
}
}
else
{
while (srcIdx < cwSrcLength)
{
// In turkish casing, LATIN CAPITAL LETTER I (U+0049) lower cases to
// LATIN SMALL LETTER DOTLESS I (U+0131).
U16_NEXT(lpSrc, srcIdx, cwSrcLength, srcCodepoint);
dstCodepoint = ((srcCodepoint == (UChar32)0x0049) ? (UChar32)0x0131 : u_tolower(srcCodepoint));
U16_APPEND(lpDst, dstIdx, cwDstLength, dstCodepoint, isError);
assert(isError == false && srcIdx == dstIdx);
}
}
}
#ifdef __clang__
#pragma clang diagnostic pop
#endif