Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NumericUnderscores lexing #455 #457

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/Language/Haskell/Exts/Extension.hs
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,12 @@ data KnownExtension =

| BlockArguments

-- | HexFloatLiterals syntax ex 0xFF.FFp-12
| HexFloatLiterals

-- | NumericUnderscores num literal syntax ex 1_000_000 or 0xF_F.F_Fp-12 or 0b11_11_11 or 1_000e+23
| NumericUnderscores

deriving (Show, Read, Eq, Ord, Enum, Bounded, Data, Typeable)

-- | Certain extensions imply other extensions, and this function
Expand Down
152 changes: 103 additions & 49 deletions src/Language/Haskell/Exts/InternalLexer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -660,17 +660,20 @@ lexStdToken = do

'0':c:d:_ | toLower c == 'o' && isOctDigit d -> do
discard 2
(n, str) <- lexOctal
(n, str) <- lexOctal $ numUnderscoresEnabled exts
con <- intHash
return (con (n, '0':c:str))
| toLower c == 'b' && isBinDigit d && BinaryLiterals `elem` exts -> do
discard 2
(n, str) <- lexBinary
(n, str) <- lexBinary $ numUnderscoresEnabled exts
con <- intHash
return (con (n, '0':c:str))
| toLower c == 'x' && isHexDigit d && HexFloatLiterals `elem` exts -> do
discard 2
lexHexadecimalFloat (numUnderscoresEnabled exts) c
| toLower c == 'x' && isHexDigit d -> do
discard 2
(n, str) <- lexHexadecimal
(n, str) <- lexHexadecimal $ numUnderscoresEnabled exts
con <- intHash
return (con (n, '0':c:str))

Expand Down Expand Up @@ -803,7 +806,7 @@ lexStdToken = do
return $ LabelVarId ident


c:_ | isDigit c -> lexDecimalOrFloat
c:_ | isDigit c -> lexDecimalOrFloat $ numUnderscoresEnabled exts

| isUpper c -> lexConIdOrQual ""

Expand Down Expand Up @@ -1009,49 +1012,76 @@ lexRawPragma = lexRawPragmaAux
rpr' <- lexRawPragma
return $ rpr ++ '#':rpr'

lexDecimalOrFloat :: Lex a Token
lexDecimalOrFloat = do
ds <- lexWhile isDigit
lexDecimalOrFloat :: NumericUnderscoresAllowed -> Lex a Token
lexDecimalOrFloat underAllowed = do
(n, raw) <- lexHandleUnderAllowed underAllowed isDigit
rest <- getInput
exts <- getExtensionsL
case rest of
('.':d:_) | isDigit d -> do
discard 1
frac <- lexWhile isDigit
let num = parseInteger 10 (ds ++ frac)
(frac, fracRaw) <- lexHandleUnderAllowed underAllowed isDigit
let num = parseInteger 10 (n ++ frac)
decimals = toInteger (length frac)
(exponent, estr) <- do
rest2 <- getInput
case rest2 of
'e':_ -> lexExponent
'E':_ -> lexExponent
'e':_ -> lexExponent underAllowed
'E':_ -> lexExponent underAllowed
_ -> return (0,"")
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
return $ con ((num%1) * 10^^(exponent - decimals), ds ++ '.':frac ++ estr)
return $ con ((num%1) * 10^^(exponent - decimals), raw ++ '.':fracRaw ++ estr)
e:_ | toLower e == 'e' -> do
(exponent, estr) <- lexExponent
(exponent, estr) <- lexExponent underAllowed
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
return $ con ((parseInteger 10 ds%1) * 10^^exponent, ds ++ estr)
'#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 ds, ds))
'#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 ds, ds))
_ -> return (IntTok (parseInteger 10 ds, ds))

where
lexExponent :: Lex a (Integer, String)
lexExponent = do
(e:r) <- getInput
discard 1 -- 'e' or 'E'
case r of
'+':d:_ | isDigit d -> do
return $ con ((parseInteger 10 n%1) * 10^^exponent, raw ++ estr)
'#':'#':_ | MagicHash `elem` exts -> discard 2 >> return (WordTokHash (parseInteger 10 n, raw))
'#':_ | MagicHash `elem` exts -> discard 1 >> return (IntTokHash (parseInteger 10 n, raw))
_ -> return (IntTok (parseInteger 10 n, raw))

lexExponent :: NumericUnderscoresAllowed -> Lex a (Integer, String)
lexExponent underAllowed = do
(e:r) <- getInput
discard 1 -- discard ex notation
case r of
'+':d:_ | isDigit d -> do
discard 1
(n, str) <- lexDecimal
(n, str) <- lexDecimal underAllowed
return (n, e:'+':str)
'-':d:_ | isDigit d -> do
'-':d:_ | isDigit d -> do
discard 1
(n, str) <- lexDecimal
(n, str) <- lexDecimal underAllowed
return (negate n, e:'-':str)
d:_ | isDigit d -> lexDecimal >>= \(n,str) -> return (n, e:str)
_ -> fail "Float with missing exponent"
d:_ | isDigit d -> lexDecimal underAllowed >>= \(n,str) -> return (n, e:str)
_ -> fail "Float with missing exponent"

lexHexadecimalFloat :: NumericUnderscoresAllowed -> Char -> Lex a Token
lexHexadecimalFloat underAllowed c = do
(n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
rest <- getInput
case rest of
('.':d:_) | isHexDigit d -> do
discard 1
(frac, fracRaw) <- lexHandleUnderAllowed underAllowed isHexDigit
let num = parseInteger 16 n
numFrac = parseFrac frac
(exponent, estr) <- do
rest2 <- getInput
case rest2 of
'p':_ -> lexExponent underAllowed
'P':_ -> lexExponent underAllowed
_ -> return (0,"")
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
return $ con (((num%1) + numFrac) * 2^^(exponent), '0':c:raw ++ '.':fracRaw ++ estr)
e:_ | toLower e == 'p' -> do
(exponent, estr) <- lexExponent underAllowed
con <- lexHash FloatTok FloatTokHash (Right DoubleTokHash)
return $ con (((parseInteger 16 n)%1) * 2^^exponent, '0':c:raw ++ estr)
_ -> return (IntTok (parseInteger 16 n, '0':c:raw))
where
parseFrac :: String -> Rational
parseFrac ds =
foldl (\n (dp, d) -> n + (d / (16 ^^ dp))) (0 % 1) $ zip [(1 :: Integer) ..] (map ((% 1) . toInteger . digitToInt) ds)

lexHash :: (b -> Token) -> (b -> Token) -> Either String (b -> Token) -> Lex a (b -> Token)
lexHash a b c = do
Expand Down Expand Up @@ -1250,16 +1280,16 @@ lexEscape = do

'o':c:_ | isOctDigit c -> do
discard 1
(n, raw) <- lexOctal
(n, raw) <- lexOctal NoUnderscoresAllowedInNumeric
n' <- checkChar n
return (n', 'o':raw)
'x':c:_ | isHexDigit c -> do
discard 1
(n, raw) <- lexHexadecimal
(n, raw) <- lexHexadecimal NoUnderscoresAllowedInNumeric
n' <- checkChar n
return (n', 'x':raw)
c:_ | isDigit c -> do
(n, raw) <- lexDecimal
(n, raw) <- lexDecimal NoUnderscoresAllowedInNumeric
n' <- checkChar n
return (n', raw)

Expand All @@ -1276,28 +1306,28 @@ lexEscape = do
cntrl _ = fail "Illegal control character"

-- assumes at least one octal digit
lexOctal :: Lex a (Integer, String)
lexOctal = do
ds <- lexWhile isOctDigit
return (parseInteger 8 ds, ds)
lexOctal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
lexOctal underAllowed = do
(n, raw) <- lexHandleUnderAllowed underAllowed isOctDigit
return (parseInteger 8 n, raw)

-- assumes at least one binary digit
lexBinary :: Lex a (Integer, String)
lexBinary = do
ds <- lexWhile isBinDigit
return (parseInteger 2 ds, ds)
lexBinary :: NumericUnderscoresAllowed -> Lex a (Integer, String)
lexBinary underAllowed = do
(n, raw) <- lexHandleUnderAllowed underAllowed isBinDigit
return (parseInteger 2 n, raw)

-- assumes at least one hexadecimal digit
lexHexadecimal :: Lex a (Integer, String)
lexHexadecimal = do
ds <- lexWhile isHexDigit
return (parseInteger 16 ds, ds)
lexHexadecimal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
lexHexadecimal underAllowed = do
(n, raw) <- lexHandleUnderAllowed underAllowed isHexDigit
return (parseInteger 16 n, raw)

-- assumes at least one decimal digit
lexDecimal :: Lex a (Integer, String)
lexDecimal = do
ds <- lexWhile isDigit
return (parseInteger 10 ds, ds)
lexDecimal :: NumericUnderscoresAllowed -> Lex a (Integer, String)
lexDecimal underAllowed = do
(n, raw) <- lexHandleUnderAllowed underAllowed isDigit
return (parseInteger 10 n, raw)

-- Stolen from Hugs's Prelude
parseInteger :: Integer -> String -> Integer
Expand All @@ -1310,6 +1340,30 @@ flagKW t =
exts <- getExtensionsL
when (NondecreasingIndentation `elem` exts) flagDo

data NumericUnderscoresAllowed = UnderscoresAllowedInNumeric | NoUnderscoresAllowedInNumeric
deriving Show

numUnderscoresEnabled :: [KnownExtension] -> NumericUnderscoresAllowed
numUnderscoresEnabled exts = if (NumericUnderscores `elem` exts)
then UnderscoresAllowedInNumeric
else NoUnderscoresAllowedInNumeric

lexHandleUnderAllowed :: NumericUnderscoresAllowed -> (Char -> Bool) -> Lex a (String, String)
lexHandleUnderAllowed NoUnderscoresAllowedInNumeric p = do
ds <- lexWhile p
return (ds, ds)
lexHandleUnderAllowed UnderscoresAllowedInNumeric p = do
s <- getInput
case s of
c:_ | p c -> do
raw <- lexWhile (\ic -> p ic || ic == '_')
if (not $ null raw) && last raw == '_'
then fail $ "lexHandleUnderAllowed: numeric must not end with _: " ++ show raw
else return (filter (/= '_') raw, raw)
c:_ -> fail $ "lexHandleUnderAllowed: numeric must start with proper digit: " ++ show c
_ -> fail $ "lexHandleUnderAllowed: token stream exhausted"


-- | Selects ASCII binary digits, i.e. @\'0\'@..@\'1\'@.
isBinDigit :: Char -> Bool
isBinDigit c = c >= '0' && c <= '1'
Expand Down
4 changes: 4 additions & 0 deletions tests/examples/HexFloatLiteralsBad.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- Missing hex float literals extension. Should fail.
f :: Float -> ()
f 0xFF.FFp12 = ()
f _ = ()
1 change: 1 addition & 0 deletions tests/examples/HexFloatLiteralsBad.hs.exactprinter.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ParseFailed (SrcLoc "tests/examples/HexFloatLiteralsBad.hs" 4 1) "Parse error in pattern: f"
3 changes: 3 additions & 0 deletions tests/examples/HexFloatLiteralsBad.hs.parser.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ParseFailed
(SrcLoc "tests/examples/HexFloatLiteralsBad.hs" 4 1)
"Parse error in pattern: f"
1 change: 1 addition & 0 deletions tests/examples/HexFloatLiteralsBad.hs.prettyparser.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ParseFailed (SrcLoc "tests/examples/HexFloatLiteralsBad.hs" 4 1) "Parse error in pattern: f"
1 change: 1 addition & 0 deletions tests/examples/HexFloatLiteralsBad.hs.prettyprinter.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ParseFailed (SrcLoc "tests/examples/HexFloatLiteralsBad.hs" 4 1) "Parse error in pattern: f"
41 changes: 41 additions & 0 deletions tests/examples/HexFloatLiteralsGood.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{-# LANGUAGE HexFloatLiterals #-}
{-# LANGUAGE NumericUnderscores #-}

import GHC.Types

main = do
print [ 0x0, -0x0, 0x1, -0x1
, 0xF, -0xF, 0xF, -0xF
, 0x00000000000000000000000000000000000000000000000000000000000000000000000000001
, 0x0000000000000000000000000000000000000000000000000000000000000000000000000000F
, -0x00000000000000000000000000000000000000000000000000000000000000000000000000001
, -0x0000000000000000000000000000000000000000000000000000000000000000000000000000F
, -0x11.11, -0x11.11
, -0xFF.FF, -0xFF.FF
, -0xFF.FFp12, -0xFF.FFp12
, -0xFF.FFp-12, -0xFF.FFp-12
]

print [ 0x0, 0x1, 0x10, 0x11, 0x100, 0x101, 0x110, 0x111 :: Integer
, -0x0, -0x1, -0x10, -0x11, -0x100, -0x101, -0x110, -0x111
, 0x11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
, -0x11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
]

print [ 0x0_0, -0x0_0, 0x1_0, -0x1_0
, 0xF_0, -0xF_0, 0xF_F, -0xF_F
, 0x000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_01
, 0x000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_0F
, -0x000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_01
, -0x000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_000_0F
, -0x11_0.11_0, -0x11_0.11_0
, -0xFF_0.FF_0, -0xFF_0.FF_0
, -0xF_F.F_Fp1_2, -0xF_F.F_Fp1_2
, -0xF_F.F_Fp-1_2, -0xF_F.F_Fp-1_2
]

print [ 0x0_0, 0x1_0, 0x10_0, 0x11_0, 0x100_0, 0x101_0, 0x110_0, 0x111_0 :: Integer
, -0x0_0, -0x1_0, -0x10_0, -0x11_0, -0x100_0, -0x101_0, -0x110_0, -0x111_0
, 0x111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_11
, -0x111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_111_11
]
1 change: 1 addition & 0 deletions tests/examples/HexFloatLiteralsGood.hs.exactprinter.golden
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Match
Loading