18
18
{-# LANGUAGE UnboxedTuples #-}
19
19
{-# LANGUAGE FlexibleContexts #-}
20
20
module Foundation.String.ASCII
21
- ( AsciiString ( .. )
21
+ ( AsciiString
22
22
-- , Buffer
23
23
, create
24
24
, replicate
@@ -27,64 +27,60 @@ module Foundation.String.ASCII
27
27
, toBytes
28
28
, copy
29
29
30
- , validate
31
- , ASCII7_Invalid (.. )
32
30
-- * Legacy utility
33
31
, lines
34
32
, words
35
33
) where
36
34
37
35
import Foundation.Array.Unboxed (UArray )
38
36
import qualified Foundation.Array.Unboxed as Vec
39
- import Foundation.Array.Unboxed.ByteArray (MutableByteArray )
40
37
import qualified Foundation.Array.Unboxed.Mutable as MVec
41
38
import qualified Foundation.Collection as C
42
39
import Foundation.Internal.Base
43
40
import Foundation.Internal.Types
44
41
import Foundation.Number
45
42
import Foundation.Primitive.Monad
46
- import GHC.Prim
47
- import GHC.Types
48
- import GHC.Word
43
+ import Foundation.Foreign
44
+
45
+ import GHC.Int
46
+ import GHC.Types
47
+ import GHC.Prim
49
48
50
49
-- temporary
51
50
import qualified Data.List
52
51
import qualified Prelude
53
52
import Foundation.Class.Bifunctor
54
53
55
- import Foundation.String.ModifiedUTF8 (fromModified )
56
- import GHC.CString (unpackCString #,
57
- unpackCStringUtf8 #)
58
-
59
- import qualified Foundation.String.Encoding.ASCII7 as Encoder
60
- import Foundation.String.Encoding.ASCII7 (ASCII7_Invalid )
54
+ ccharToChar :: CChar -> Char
55
+ ccharToChar (CChar (I8 # i)) = C # (chr# i)
56
+ charToCChar :: Char -> CChar
57
+ charToCChar (C # i) = CChar (I8 # (ord# i))
61
58
62
59
-- | Opaque packed array of characters in the ASCII encoding
63
- newtype AsciiString = AsciiString ( UArray Word8 )
60
+ newtype AsciiString = AsciiString { toBytes :: UArray CChar }
64
61
deriving (Typeable , Monoid , Eq , Ord )
65
62
66
- toBytes :: AsciiString -> UArray Word8
67
- toBytes (AsciiString bs) = bs
68
-
69
- newtype MutableAsciiString st = MutableAsciiString (MutableByteArray st )
63
+ newtype MutableAsciiString st = MutableAsciiString (MVec. MUArray CChar st )
70
64
deriving (Typeable )
71
65
72
66
instance Show AsciiString where
73
- show = show . sToList
67
+ show = fmap ccharToChar . toList
74
68
instance IsString AsciiString where
75
- fromString = sFromList
69
+ fromString = fromList . fmap charToCChar
76
70
instance IsList AsciiString where
77
- type Item AsciiString = Char
71
+ type Item AsciiString = CChar
78
72
fromList = sFromList
79
73
toList = sToList
80
74
81
- type instance C. Element AsciiString = Char
75
+ type instance C. Element AsciiString = CChar
82
76
83
77
instance C. InnerFunctor AsciiString where
84
- imap = charMap
78
+ imap = ccharMap
85
79
instance C. Collection AsciiString where
86
80
null = null
87
81
length = length
82
+ minimum = Data.List. minimum . toList . C. getNonEmpty -- TODO faster implementation
83
+ maximum = Data.List. maximum . toList . C. getNonEmpty -- TODO faster implementation
88
84
instance C. Sequential AsciiString where
89
85
take = take
90
86
drop = drop
@@ -111,56 +107,31 @@ instance C.Zippable AsciiString where
111
107
-- TODO Use a string builder once available
112
108
zipWith f a b = sFromList (C. zipWith f a b)
113
109
114
- -- | Validate a bytearray for ASCIIness
115
- --
116
- -- On success Nothing is returned
117
- -- On Failure the position along with the failure reason
118
- validate :: AsciiString -> Maybe ASCII7_Invalid
119
- validate = Encoder. validate . toBytes
120
- {-# INLINE validate #-}
121
-
122
- next :: AsciiString -> Offset Char -> (# Char , Offset Char # )
123
- next (AsciiString ba) (Offset n) = (# toChar h, Offset (n + 1 ) # )
110
+ next :: AsciiString -> Offset CChar -> (# CChar , Offset CChar # )
111
+ next (AsciiString ba) (Offset n) = (# h, Offset (n + 1 ) # )
124
112
where
125
113
! h = Vec. unsafeIndex ba n
126
114
127
- toChar :: Word8 -> Char
128
- toChar (W8 # w) = C # (chr# (word2Int# w))
129
- {-# INLINE toChar #-}
130
-
131
- toWord8 :: Char -> Word8
132
- toWord8 (C # i) = W8 # (int2Word# (ord# i))
133
- {-# INLINE toWord8 #-}
134
-
135
115
freeze :: PrimMonad prim => MutableAsciiString (PrimState prim ) -> prim AsciiString
136
116
freeze (MutableAsciiString mba) = AsciiString `fmap` C. unsafeFreeze mba
137
117
{-# INLINE freeze #-}
138
118
139
119
------------------------------------------------------------------------
140
120
-- real functions
141
121
142
- sToList :: AsciiString -> [Char ]
122
+ sToList :: AsciiString -> [CChar ]
143
123
sToList s = loop azero
144
124
where
125
+ nbBytes :: Size CChar
145
126
! nbBytes = size s
146
127
! end = azero `offsetPlusE` nbBytes
147
128
loop idx
148
129
| idx == end = []
149
130
| otherwise =
150
131
let (# c , idx' # ) = next s idx in c : loop idx'
151
132
152
-
153
- {-# RULES
154
- "AsciiString sFromList" forall s .
155
- sFromList (unpackCString# s) = AsciiString $ fromModified s
156
- #-}
157
- {-# RULES
158
- "AsciiString sFromList" forall s .
159
- sFromList (unpackCStringUtf8# s) = AsciiString $ fromModified s
160
- #-}
161
-
162
- sFromList :: [Char ] -> AsciiString
163
- sFromList = AsciiString . fromList . fmap toWord8
133
+ sFromList :: [CChar ] -> AsciiString
134
+ sFromList = AsciiString . fromList
164
135
{-# INLINE [0] sFromList #-}
165
136
166
137
null :: AsciiString -> Bool
@@ -206,50 +177,43 @@ revSplitAt n v = (drop idx v, take idx v)
206
177
-- > splitOn (== ':') "abc::def" == ["abc","","def"]
207
178
-- > splitOn (== ':') "::abc::def" == ["","","abc","","def"]
208
179
--
209
- splitOn :: (Char -> Bool ) -> AsciiString -> [AsciiString ]
210
- splitOn predicate = fmap AsciiString . Vec. splitOn f . toBytes
211
- where
212
- f :: Word8 -> Bool
213
- f = predicate . toChar
180
+ splitOn :: (CChar -> Bool ) -> AsciiString -> [AsciiString ]
181
+ splitOn predicate = fmap AsciiString . Vec. splitOn predicate . toBytes
214
182
215
- break :: (Char -> Bool ) -> AsciiString -> (AsciiString , AsciiString )
216
- break predicate = bimap AsciiString AsciiString . Vec. break ( predicate . toChar) . toBytes
183
+ break :: (CChar -> Bool ) -> AsciiString -> (AsciiString , AsciiString )
184
+ break predicate = bimap AsciiString AsciiString . Vec. break predicate . toBytes
217
185
{-# INLINE [0] break #-}
218
186
219
187
{-# RULES "break (== 'c')" [3] forall c . break (== c) = breakElem c #-}
220
188
221
- breakElem :: Char -> AsciiString -> (AsciiString , AsciiString )
189
+ breakElem :: CChar -> AsciiString -> (AsciiString , AsciiString )
222
190
breakElem ! el (AsciiString ba) =
223
- let (# v1,v2 # ) = Vec. splitElem (w8 el) ba in (AsciiString v1, AsciiString v2)
224
- where
225
- w8 (C # ch) = W8 # (int2Word# (ord# ch))
191
+ let (# v1,v2 # ) = Vec. splitElem el ba in (AsciiString v1, AsciiString v2)
226
192
{-# INLINE breakElem #-}
227
193
228
- intersperse :: Char -> AsciiString -> AsciiString
229
- intersperse sep = AsciiString . Vec. intersperse (toWord8 sep) . toBytes
194
+ intersperse :: CChar -> AsciiString -> AsciiString
195
+ intersperse sep = AsciiString . Vec. intersperse sep . toBytes
230
196
231
- span :: (Char -> Bool ) -> AsciiString -> (AsciiString , AsciiString )
197
+ span :: (CChar -> Bool ) -> AsciiString -> (AsciiString , AsciiString )
232
198
span predicate = break (not . predicate)
233
199
234
200
-- | size in bytes
235
- size :: AsciiString -> Size Char
201
+ size :: AsciiString -> Size CChar
236
202
size = Size . C. length . toBytes
237
203
238
204
length :: AsciiString -> Int
239
205
length s = let (Size l) = size s in l
240
206
241
- replicate :: Int -> Char -> AsciiString
242
- replicate n c = AsciiString $ Vec. create n (const w)
243
- where
244
- ! w = toWord8 c
207
+ replicate :: Int -> CChar -> AsciiString
208
+ replicate n c = AsciiString $ Vec. create n (const c)
245
209
246
210
-- | Copy the AsciiString
247
211
copy :: AsciiString -> AsciiString
248
212
copy (AsciiString s) = AsciiString (Vec. copy s)
249
213
250
214
-- | Allocate a MutableAsciiString of a specific size in bytes.
251
215
new :: PrimMonad prim
252
- => Size8 -- ^ in number of bytes, not of elements.
216
+ => Size CChar -- ^ in number of bytes, not of elements.
253
217
-> prim (MutableAsciiString (PrimState prim ))
254
218
new n = MutableAsciiString `fmap` MVec. new n
255
219
@@ -261,30 +225,28 @@ create sz f = do
261
225
then freeze ms
262
226
else C. take filled `fmap` freeze ms
263
227
264
- charMap :: (Char -> Char ) -> AsciiString -> AsciiString
265
- charMap f = AsciiString . Vec. map (toWord8 . f . toChar) . toBytes
228
+ ccharMap :: (CChar -> CChar ) -> AsciiString -> AsciiString
229
+ ccharMap f = AsciiString . Vec. map f . toBytes
266
230
267
- snoc :: AsciiString -> Char -> AsciiString
268
- snoc (AsciiString ba) = AsciiString . Vec. snoc ba . toWord8
231
+ snoc :: AsciiString -> CChar -> AsciiString
232
+ snoc (AsciiString ba) = AsciiString . Vec. snoc ba
269
233
270
- cons :: Char -> AsciiString -> AsciiString
271
- cons c = AsciiString . Vec. cons (toWord8 c) . toBytes
234
+ cons :: CChar -> AsciiString -> AsciiString
235
+ cons c = AsciiString . Vec. cons c . toBytes
272
236
273
- unsnoc :: AsciiString -> Maybe (AsciiString , Char )
274
- unsnoc str = bimap AsciiString toChar <$> Vec. unsnoc (toBytes str)
237
+ unsnoc :: AsciiString -> Maybe (AsciiString , CChar )
238
+ unsnoc str = first AsciiString <$> Vec. unsnoc (toBytes str)
275
239
276
- uncons :: AsciiString -> Maybe (Char , AsciiString )
277
- uncons str = bimap toChar AsciiString <$> Vec. uncons (toBytes str)
240
+ uncons :: AsciiString -> Maybe (CChar , AsciiString )
241
+ uncons str = second AsciiString <$> Vec. uncons (toBytes str)
278
242
279
- find :: (Char -> Bool ) -> AsciiString -> Maybe Char
280
- find predicate ( AsciiString ba) = toChar <$> Vec. find ( predicate . toChar) ba
243
+ find :: (CChar -> Bool ) -> AsciiString -> Maybe CChar
244
+ find predicate = Vec. find predicate . toBytes
281
245
282
- sortBy :: (Char -> Char -> Ordering ) -> AsciiString -> AsciiString
283
- sortBy sortF = AsciiString . Vec. sortBy f . toBytes
284
- where
285
- f a b = sortF (toChar a) (toChar b)
246
+ sortBy :: (CChar -> CChar -> Ordering ) -> AsciiString -> AsciiString
247
+ sortBy sortF = AsciiString . Vec. sortBy sortF . toBytes
286
248
287
- filter :: (Char -> Bool ) -> AsciiString -> AsciiString
249
+ filter :: (CChar -> Bool ) -> AsciiString -> AsciiString
288
250
filter p s = fromList $ Data.List. filter p $ toList s
289
251
290
252
reverse :: AsciiString -> AsciiString
@@ -295,11 +257,11 @@ reverse (AsciiString ba) = AsciiString $ Vec.reverse ba
295
257
-- If the input contains invalid sequences, it will trigger runtime async errors when processing data.
296
258
--
297
259
-- In doubt, use 'fromBytes'
298
- fromBytesUnsafe :: UArray Word8 -> AsciiString
260
+ fromBytesUnsafe :: UArray CChar -> AsciiString
299
261
fromBytesUnsafe = AsciiString
300
262
301
263
lines :: AsciiString -> [AsciiString ]
302
- lines = fmap fromList . Prelude. lines . toList
264
+ lines = fmap fromString . Prelude. lines . show
303
265
304
266
words :: AsciiString -> [AsciiString ]
305
- words = fmap fromList . Prelude. words . toList
267
+ words = fmap fromString . Prelude. words . show
0 commit comments