Skip to content
This repository was archived by the owner on Sep 20, 2023. It is now read-only.

Commit e0bc70a

Browse files
committed
make AsciiString uses CChar type
1 parent f84bcff commit e0bc70a

File tree

2 files changed

+59
-96
lines changed

2 files changed

+59
-96
lines changed

Foundation/String/ASCII.hs

+56-94
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
{-# LANGUAGE UnboxedTuples #-}
1919
{-# LANGUAGE FlexibleContexts #-}
2020
module Foundation.String.ASCII
21-
( AsciiString(..)
21+
( AsciiString
2222
--, Buffer
2323
, create
2424
, replicate
@@ -27,64 +27,60 @@ module Foundation.String.ASCII
2727
, toBytes
2828
, copy
2929

30-
, validate
31-
, ASCII7_Invalid(..)
3230
-- * Legacy utility
3331
, lines
3432
, words
3533
) where
3634

3735
import Foundation.Array.Unboxed (UArray)
3836
import qualified Foundation.Array.Unboxed as Vec
39-
import Foundation.Array.Unboxed.ByteArray (MutableByteArray)
4037
import qualified Foundation.Array.Unboxed.Mutable as MVec
4138
import qualified Foundation.Collection as C
4239
import Foundation.Internal.Base
4340
import Foundation.Internal.Types
4441
import Foundation.Number
4542
import Foundation.Primitive.Monad
46-
import GHC.Prim
47-
import GHC.Types
48-
import GHC.Word
43+
import Foundation.Foreign
44+
45+
import GHC.Int
46+
import GHC.Types
47+
import GHC.Prim
4948

5049
-- temporary
5150
import qualified Data.List
5251
import qualified Prelude
5352
import Foundation.Class.Bifunctor
5453

55-
import Foundation.String.ModifiedUTF8 (fromModified)
56-
import GHC.CString (unpackCString#,
57-
unpackCStringUtf8#)
58-
59-
import qualified Foundation.String.Encoding.ASCII7 as Encoder
60-
import Foundation.String.Encoding.ASCII7 (ASCII7_Invalid)
54+
ccharToChar :: CChar -> Char
55+
ccharToChar (CChar (I8# i)) = C# (chr# i)
56+
charToCChar :: Char -> CChar
57+
charToCChar (C# i) = CChar (I8# (ord# i))
6158

6259
-- | Opaque packed array of characters in the ASCII encoding
63-
newtype AsciiString = AsciiString (UArray Word8)
60+
newtype AsciiString = AsciiString { toBytes :: UArray CChar }
6461
deriving (Typeable, Monoid, Eq, Ord)
6562

66-
toBytes :: AsciiString -> UArray Word8
67-
toBytes (AsciiString bs) = bs
68-
69-
newtype MutableAsciiString st = MutableAsciiString (MutableByteArray st)
63+
newtype MutableAsciiString st = MutableAsciiString (MVec.MUArray CChar st)
7064
deriving (Typeable)
7165

7266
instance Show AsciiString where
73-
show = show . sToList
67+
show = fmap ccharToChar . toList
7468
instance IsString AsciiString where
75-
fromString = sFromList
69+
fromString = fromList . fmap charToCChar
7670
instance IsList AsciiString where
77-
type Item AsciiString = Char
71+
type Item AsciiString = CChar
7872
fromList = sFromList
7973
toList = sToList
8074

81-
type instance C.Element AsciiString = Char
75+
type instance C.Element AsciiString = CChar
8276

8377
instance C.InnerFunctor AsciiString where
84-
imap = charMap
78+
imap = ccharMap
8579
instance C.Collection AsciiString where
8680
null = null
8781
length = length
82+
minimum = Data.List.minimum . toList . C.getNonEmpty -- TODO faster implementation
83+
maximum = Data.List.maximum . toList . C.getNonEmpty -- TODO faster implementation
8884
instance C.Sequential AsciiString where
8985
take = take
9086
drop = drop
@@ -111,56 +107,31 @@ instance C.Zippable AsciiString where
111107
-- TODO Use a string builder once available
112108
zipWith f a b = sFromList (C.zipWith f a b)
113109

114-
-- | Validate a bytearray for ASCIIness
115-
--
116-
-- On success Nothing is returned
117-
-- On Failure the position along with the failure reason
118-
validate :: AsciiString -> Maybe ASCII7_Invalid
119-
validate = Encoder.validate . toBytes
120-
{-# INLINE validate #-}
121-
122-
next :: AsciiString -> Offset Char -> (# Char, Offset Char #)
123-
next (AsciiString ba) (Offset n) = (# toChar h, Offset (n + 1) #)
110+
next :: AsciiString -> Offset CChar -> (# CChar, Offset CChar #)
111+
next (AsciiString ba) (Offset n) = (# h, Offset (n + 1) #)
124112
where
125113
!h = Vec.unsafeIndex ba n
126114

127-
toChar :: Word8 -> Char
128-
toChar (W8# w) = C# (chr# (word2Int# w))
129-
{-# INLINE toChar #-}
130-
131-
toWord8 :: Char -> Word8
132-
toWord8 (C# i) = W8# (int2Word# (ord# i))
133-
{-# INLINE toWord8 #-}
134-
135115
freeze :: PrimMonad prim => MutableAsciiString (PrimState prim) -> prim AsciiString
136116
freeze (MutableAsciiString mba) = AsciiString `fmap` C.unsafeFreeze mba
137117
{-# INLINE freeze #-}
138118

139119
------------------------------------------------------------------------
140120
-- real functions
141121

142-
sToList :: AsciiString -> [Char]
122+
sToList :: AsciiString -> [CChar]
143123
sToList s = loop azero
144124
where
125+
nbBytes :: Size CChar
145126
!nbBytes = size s
146127
!end = azero `offsetPlusE` nbBytes
147128
loop idx
148129
| idx == end = []
149130
| otherwise =
150131
let (# c , idx' #) = next s idx in c : loop idx'
151132

152-
153-
{-# RULES
154-
"AsciiString sFromList" forall s .
155-
sFromList (unpackCString# s) = AsciiString $ fromModified s
156-
#-}
157-
{-# RULES
158-
"AsciiString sFromList" forall s .
159-
sFromList (unpackCStringUtf8# s) = AsciiString $ fromModified s
160-
#-}
161-
162-
sFromList :: [Char] -> AsciiString
163-
sFromList = AsciiString . fromList . fmap toWord8
133+
sFromList :: [CChar] -> AsciiString
134+
sFromList = AsciiString . fromList
164135
{-# INLINE [0] sFromList #-}
165136

166137
null :: AsciiString -> Bool
@@ -206,50 +177,43 @@ revSplitAt n v = (drop idx v, take idx v)
206177
-- > splitOn (== ':') "abc::def" == ["abc","","def"]
207178
-- > splitOn (== ':') "::abc::def" == ["","","abc","","def"]
208179
--
209-
splitOn :: (Char -> Bool) -> AsciiString -> [AsciiString]
210-
splitOn predicate = fmap AsciiString . Vec.splitOn f . toBytes
211-
where
212-
f :: Word8 -> Bool
213-
f = predicate . toChar
180+
splitOn :: (CChar -> Bool) -> AsciiString -> [AsciiString]
181+
splitOn predicate = fmap AsciiString . Vec.splitOn predicate . toBytes
214182

215-
break :: (Char -> Bool) -> AsciiString -> (AsciiString, AsciiString)
216-
break predicate = bimap AsciiString AsciiString . Vec.break (predicate . toChar) . toBytes
183+
break :: (CChar -> Bool) -> AsciiString -> (AsciiString, AsciiString)
184+
break predicate = bimap AsciiString AsciiString . Vec.break predicate . toBytes
217185
{-# INLINE[0] break #-}
218186

219187
{-# RULES "break (== 'c')" [3] forall c . break (== c) = breakElem c #-}
220188

221-
breakElem :: Char -> AsciiString -> (AsciiString, AsciiString)
189+
breakElem :: CChar -> AsciiString -> (AsciiString, AsciiString)
222190
breakElem !el (AsciiString ba) =
223-
let (# v1,v2 #) = Vec.splitElem (w8 el) ba in (AsciiString v1, AsciiString v2)
224-
where
225-
w8 (C# ch) = W8# (int2Word# (ord# ch))
191+
let (# v1,v2 #) = Vec.splitElem el ba in (AsciiString v1, AsciiString v2)
226192
{-# INLINE breakElem #-}
227193

228-
intersperse :: Char -> AsciiString -> AsciiString
229-
intersperse sep = AsciiString . Vec.intersperse (toWord8 sep) . toBytes
194+
intersperse :: CChar -> AsciiString -> AsciiString
195+
intersperse sep = AsciiString . Vec.intersperse sep . toBytes
230196

231-
span :: (Char -> Bool) -> AsciiString -> (AsciiString, AsciiString)
197+
span :: (CChar -> Bool) -> AsciiString -> (AsciiString, AsciiString)
232198
span predicate = break (not . predicate)
233199

234200
-- | size in bytes
235-
size :: AsciiString -> Size Char
201+
size :: AsciiString -> Size CChar
236202
size = Size . C.length . toBytes
237203

238204
length :: AsciiString -> Int
239205
length s = let (Size l) = size s in l
240206

241-
replicate :: Int -> Char -> AsciiString
242-
replicate n c = AsciiString $ Vec.create n (const w)
243-
where
244-
!w = toWord8 c
207+
replicate :: Int -> CChar -> AsciiString
208+
replicate n c = AsciiString $ Vec.create n (const c)
245209

246210
-- | Copy the AsciiString
247211
copy :: AsciiString -> AsciiString
248212
copy (AsciiString s) = AsciiString (Vec.copy s)
249213

250214
-- | Allocate a MutableAsciiString of a specific size in bytes.
251215
new :: PrimMonad prim
252-
=> Size8 -- ^ in number of bytes, not of elements.
216+
=> Size CChar -- ^ in number of bytes, not of elements.
253217
-> prim (MutableAsciiString (PrimState prim))
254218
new n = MutableAsciiString `fmap` MVec.new n
255219

@@ -261,30 +225,28 @@ create sz f = do
261225
then freeze ms
262226
else C.take filled `fmap` freeze ms
263227

264-
charMap :: (Char -> Char) -> AsciiString -> AsciiString
265-
charMap f = AsciiString . Vec.map (toWord8 . f . toChar) . toBytes
228+
ccharMap :: (CChar -> CChar) -> AsciiString -> AsciiString
229+
ccharMap f = AsciiString . Vec.map f . toBytes
266230

267-
snoc :: AsciiString -> Char -> AsciiString
268-
snoc (AsciiString ba) = AsciiString . Vec.snoc ba . toWord8
231+
snoc :: AsciiString -> CChar -> AsciiString
232+
snoc (AsciiString ba) = AsciiString . Vec.snoc ba
269233

270-
cons :: Char -> AsciiString -> AsciiString
271-
cons c = AsciiString . Vec.cons (toWord8 c) . toBytes
234+
cons :: CChar -> AsciiString -> AsciiString
235+
cons c = AsciiString . Vec.cons c . toBytes
272236

273-
unsnoc :: AsciiString -> Maybe (AsciiString, Char)
274-
unsnoc str = bimap AsciiString toChar <$> Vec.unsnoc (toBytes str)
237+
unsnoc :: AsciiString -> Maybe (AsciiString, CChar)
238+
unsnoc str = first AsciiString <$> Vec.unsnoc (toBytes str)
275239

276-
uncons :: AsciiString -> Maybe (Char, AsciiString)
277-
uncons str = bimap toChar AsciiString <$> Vec.uncons (toBytes str)
240+
uncons :: AsciiString -> Maybe (CChar, AsciiString)
241+
uncons str = second AsciiString <$> Vec.uncons (toBytes str)
278242

279-
find :: (Char -> Bool) -> AsciiString -> Maybe Char
280-
find predicate (AsciiString ba) = toChar <$> Vec.find (predicate . toChar) ba
243+
find :: (CChar -> Bool) -> AsciiString -> Maybe CChar
244+
find predicate = Vec.find predicate . toBytes
281245

282-
sortBy :: (Char -> Char -> Ordering) -> AsciiString -> AsciiString
283-
sortBy sortF = AsciiString . Vec.sortBy f . toBytes
284-
where
285-
f a b = sortF (toChar a) (toChar b)
246+
sortBy :: (CChar -> CChar -> Ordering) -> AsciiString -> AsciiString
247+
sortBy sortF = AsciiString . Vec.sortBy sortF . toBytes
286248

287-
filter :: (Char -> Bool) -> AsciiString -> AsciiString
249+
filter :: (CChar -> Bool) -> AsciiString -> AsciiString
288250
filter p s = fromList $ Data.List.filter p $ toList s
289251

290252
reverse :: AsciiString -> AsciiString
@@ -295,11 +257,11 @@ reverse (AsciiString ba) = AsciiString $ Vec.reverse ba
295257
-- If the input contains invalid sequences, it will trigger runtime async errors when processing data.
296258
--
297259
-- In doubt, use 'fromBytes'
298-
fromBytesUnsafe :: UArray Word8 -> AsciiString
260+
fromBytesUnsafe :: UArray CChar -> AsciiString
299261
fromBytesUnsafe = AsciiString
300262

301263
lines :: AsciiString -> [AsciiString]
302-
lines = fmap fromList . Prelude.lines . toList
264+
lines = fmap fromString . Prelude.lines . show
303265

304266
words :: AsciiString -> [AsciiString]
305-
words = fmap fromList . Prelude.words . toList
267+
words = fmap fromString . Prelude.words . show

tests/Test/Data/ASCII.hs

+3-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ module Test.Data.ASCII
99
) where
1010

1111
import Foundation
12+
import Foundation.Foreign
1213
import Test.Tasty.QuickCheck
1314

1415
-- | a better generator for unicode Character
15-
genAsciiChar :: Gen Char
16-
genAsciiChar = toEnum <$> choose (1, 128)
16+
genAsciiChar :: Gen CChar
17+
genAsciiChar = toEnum <$> choose (1, 127)

0 commit comments

Comments
 (0)