diff --git a/doc/lexical_structure.md b/doc/lexical_structure.md index 93febbdb2891..a9d8b0b43990 100644 --- a/doc/lexical_structure.md +++ b/doc/lexical_structure.md @@ -128,16 +128,16 @@ Numeric literals can be specified in various bases. ``` numeral : numeral10 | numeral2 | numeral8 | numeral16 - numeral10 : [0-9]+ - numeral2 : "0" [bB] [0-1]+ - numeral8 : "0" [oO] [0-7]+ - numeral16 : "0" [xX] hex_char+ + numeral10 : [0-9]+ ("_"+ [0-9]+)* + numeral2 : "0" [bB] ("_"* [0-1]+)+ + numeral8 : "0" [oO] ("_"* [0-7]+)+ + numeral16 : "0" [xX] ("_"* hex_char+)+ ``` Floating point literals are also possible with optional exponent: ``` - float : [0-9]+ "." [0-9]+ [[eE[+-][0-9]+] + float : numeral10 "." numeral10? [eE[+-]numeral10] ``` For example: @@ -147,6 +147,7 @@ constant w : Int := 55 constant x : Nat := 26085 constant y : Nat := 0x65E5 constant z : Float := 2.548123e-05 +constant b : Bool := 0b_11_01_10_00 ``` Note: that negative numbers are created by applying the "-" negation prefix operator to the number, for example: diff --git a/src/Init/Meta.lean b/src/Init/Meta.lean index 4f520ed30dae..629ba768f987 100644 --- a/src/Init/Meta.lean +++ b/src/Init/Meta.lean @@ -679,6 +679,7 @@ private partial def decodeBinLitAux (s : String) (i : String.Pos) (val : Nat) : let c := s.get i if c == '0' then decodeBinLitAux s (s.next i) (2*val) else if c == '1' then decodeBinLitAux s (s.next i) (2*val + 1) + else if c == '_' then decodeBinLitAux s (s.next i) val else none private partial def decodeOctalLitAux (s : String) (i : String.Pos) (val : Nat) : Option Nat := @@ -686,6 +687,7 @@ private partial def decodeOctalLitAux (s : String) (i : String.Pos) (val : Nat) else let c := s.get i if '0' ≤ c && c ≤ '7' then decodeOctalLitAux s (s.next i) (8*val + c.toNat - '0'.toNat) + else if c == '_' then decodeOctalLitAux s (s.next i) val else none private def decodeHexDigit (s : String) (i : String.Pos) : Option (Nat × String.Pos) := @@ -700,13 +702,16 @@ private partial def decodeHexLitAux (s : String) (i : String.Pos) (val : Nat) : if s.atEnd i then some val else match decodeHexDigit s i with | some (d, i) => decodeHexLitAux s i (16*val + d) - | none => none + | none => + if s.get i == '_' then decodeHexLitAux s (s.next i) val + else none private partial def decodeDecimalLitAux (s : String) (i : String.Pos) (val : Nat) : Option Nat := if s.atEnd i then some val else let c := s.get i if '0' ≤ c && c ≤ '9' then decodeDecimalLitAux s (s.next i) (10*val + c.toNat - '0'.toNat) + else if c == '_' then decodeDecimalLitAux s (s.next i) val else none def decodeNatLitVal? (s : String) : Option Nat := @@ -773,6 +778,8 @@ where let c := s.get i if '0' ≤ c && c ≤ '9' then decodeAfterExp (s.next i) val e sign (10*exp + c.toNat - '0'.toNat) + else if c == '_' then + decodeAfterExp (s.next i) val e sign exp else none @@ -793,6 +800,8 @@ where let c := s.get i if '0' ≤ c && c ≤ '9' then decodeAfterDot (s.next i) (10*val + c.toNat - '0'.toNat) (e+1) + else if c == '_' then + decodeAfterDot (s.next i) val e else if c == 'e' || c == 'E' then decodeExp (s.next i) val e else @@ -805,6 +814,8 @@ where let c := s.get i if '0' ≤ c && c ≤ '9' then decode (s.next i) (10*val + c.toNat - '0'.toNat) + else if c == '_' then + decode (s.next i) val else if c == '.' then decodeAfterDot (s.next i) val 0 else if c == 'e' || c == 'E' then diff --git a/src/Lean/Parser/Basic.lean b/src/Lean/Parser/Basic.lean index 8dd33413de3f..7649171e922b 100644 --- a/src/Lean/Parser/Basic.lean +++ b/src/Lean/Parser/Basic.lean @@ -804,18 +804,45 @@ where else normalState num c s +/-- +Parses a sequence of the form `many (many '_' >> many1 digit)`, but if `needDigit` is true the parsed result must be nonempty. + +Note: this does not report that it is expecting `_` if we reach EOI or an unexpected character. +Rationale: this error happens if there is already a `_`, and while sequences of `_` are allowed, it's a bit perverse to suggest extending the sequence. +-/ +partial def takeDigitsFn (isDigit : Char → Bool) (expecting : String) (needDigit : Bool) : ParserFn := fun c s => + let input := c.input + let i := s.pos + if h : input.atEnd i then + if needDigit then + s.mkEOIError [expecting] + else + s + else + let curr := input.get' i h + if curr == '_' then takeDigitsFn isDigit expecting true c (s.next' c.input i h) + else if isDigit curr then takeDigitsFn isDigit expecting false c (s.next' c.input i h) + else if needDigit then s.mkUnexpectedError "unexpected character" (expected := [expecting]) + else s + def decimalNumberFn (startPos : String.Pos) (c : ParserContext) : ParserState → ParserState := fun s => - let s := takeWhileFn (fun c => c.isDigit) c s + let s := takeDigitsFn (fun c => c.isDigit) "decimal number" false c s let input := c.input let i := s.pos - let curr := input.get i - if curr == '.' || curr == 'e' || curr == 'E' then + if h : input.atEnd i then + mkNodeToken numLitKind startPos c s + else + let curr := input.get' i h + if curr == '.' || curr == 'e' || curr == 'E' then + parseScientific s + else + mkNodeToken numLitKind startPos c s +where + parseScientific s := let s := parseOptDot s let s := parseOptExp s mkNodeToken scientificLitKind startPos c s - else - mkNodeToken numLitKind startPos c s -where + parseOptDot s := let input := c.input let i := s.pos @@ -824,7 +851,7 @@ where let i := input.next i let curr := input.get i if curr.isDigit then - takeWhileFn (fun c => c.isDigit) c (s.setPos i) + takeDigitsFn (fun c => c.isDigit) "decimal number" false c (s.setPos i) else s.setPos i else @@ -839,22 +866,22 @@ where let i := if input.get i == '-' || input.get i == '+' then input.next i else i let curr := input.get i if curr.isDigit then - takeWhileFn (fun c => c.isDigit) c (s.setPos i) + takeDigitsFn (fun c => c.isDigit) "decimal number" false c (s.setPos i) else s.mkUnexpectedError "missing exponent digits in scientific literal" else s def binNumberFn (startPos : String.Pos) : ParserFn := fun c s => - let s := takeWhile1Fn (fun c => c == '0' || c == '1') "binary number" c s + let s := takeDigitsFn (fun c => c == '0' || c == '1') "binary number" true c s mkNodeToken numLitKind startPos c s def octalNumberFn (startPos : String.Pos) : ParserFn := fun c s => - let s := takeWhile1Fn (fun c => '0' ≤ c && c ≤ '7') "octal number" c s + let s := takeDigitsFn (fun c => '0' ≤ c && c ≤ '7') "octal number" true c s mkNodeToken numLitKind startPos c s def hexNumberFn (startPos : String.Pos) : ParserFn := fun c s => - let s := takeWhile1Fn (fun c => ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "hexadecimal number" c s + let s := takeDigitsFn (fun c => ('0' ≤ c && c ≤ '9') || ('a' ≤ c && c ≤ 'f') || ('A' ≤ c && c ≤ 'F')) "hexadecimal number" true c s mkNodeToken numLitKind startPos c s def numberFnAux : ParserFn := fun c s => diff --git a/tests/lean/run/6199.lean b/tests/lean/run/6199.lean new file mode 100644 index 000000000000..c0702cc11857 --- /dev/null +++ b/tests/lean/run/6199.lean @@ -0,0 +1,107 @@ +import Lean +/-! +# `_` separators for numeric literals +RFC: https://github.com/leanprover/lean4/issues/6199 +-/ +set_option pp.mvars false + +open Lean Elab + +elab "#term " s:str : command => Command.liftTermElabM <| withRef s do + let t ← Lean.ofExcept <| Parser.runParserCategory (← getEnv) `term s.getString + let e ← Term.elabTermAndSynthesize t none + logInfo m!"{e} : {← Meta.inferType e}" + +/-! +Decimal tests +-/ +/-- info: 0 : Nat -/ +#guard_msgs in #term "0" +/-- info: 1 : Nat -/ +#guard_msgs in #term "1" +/-- info: 1000000 : Nat -/ +#guard_msgs in #term "1000000" +/-- info: 1000000 : Nat -/ +#guard_msgs in #term "1_000_000" +/-- info: 1000000 : Nat -/ +#guard_msgs in #term "1__000___000" +/-- error: :1:5: unexpected end of input; expected decimal number -/ +#guard_msgs in #term "1_00_" +/-- error: :1:6: unexpected character; expected decimal number -/ +#guard_msgs in #term "(1_00_)" +-- Starting with `_` is an identifier: +/-- +error: unknown identifier '_10' +--- +info: sorry : ?_ +-/ +#guard_msgs in #term "_10" + +/-! +Scientific tests +-/ +/-- info: 100000. : Float -/ +#guard_msgs in #term "100_000." +/-- info: 100000.0 : Float -/ +#guard_msgs in #term "100_000.0" +/-- info: 0. : Float -/ +#guard_msgs in #term "0." +-- The decimal parser requires a digit at the start, so the `_` is left over: +/-- error: :1:4: expected end of input -/ +#guard_msgs in #term "100._" +/-- info: 100.111111 : Float -/ +#guard_msgs in #term "100.111_111" +/-- error: :1:8: unexpected end of input; expected decimal number -/ +#guard_msgs in #term "100.111_" +/-- error: :1:9: unexpected character; expected decimal number -/ +#guard_msgs in #term "(100.111_)" +/-- info: 100111111e1094 : Float -/ +#guard_msgs in #term "100.111_111e1_100" +/-- error: :1:5: unexpected end of input; expected decimal number -/ +#guard_msgs in #term "1e10_" +/-- error: :1:6: unexpected character; expected decimal number -/ +#guard_msgs in #term "(1e10_)" +/-- error: :1:1: missing exponent digits in scientific literal -/ +#guard_msgs in #term "1e_10" + +/-! +Base-2 tests +-/ +/-- info: 15 : Nat -/ +#guard_msgs in #term "0b1111" +/-- info: 15 : Nat -/ +#guard_msgs in #term "0b11_11" +/-- info: 15 : Nat -/ +#guard_msgs in #term "0b__11__11" +/-- error: :1:7: unexpected end of input; expected binary number -/ +#guard_msgs in #term "0b1111_" +/-- error: :1:8: unexpected character; expected binary number -/ +#guard_msgs in #term "(0b1111_)" + +/-! +Base-8 tests +-/ +/-- info: 512 : Nat -/ +#guard_msgs in #term "0o1000" +/-- info: 512 : Nat -/ +#guard_msgs in #term "0o1_000" +/-- info: 512 : Nat -/ +#guard_msgs in #term "0o_1_000" +/-- error: :1:7: unexpected end of input; expected octal number -/ +#guard_msgs in #term "0o1000_" +/-- error: :1:8: unexpected character; expected octal number -/ +#guard_msgs in #term "(0o1000_)" + +/-! +Base-16 tests +-/ +/-- info: 4096 : Nat -/ +#guard_msgs in #term "0x1000" +/-- info: 4096 : Nat -/ +#guard_msgs in #term "0x1_000" +/-- info: 4096 : Nat -/ +#guard_msgs in #term "0x_1_000" +/-- error: :1:7: unexpected end of input; expected hexadecimal number -/ +#guard_msgs in #term "0x1000_" +/-- error: :1:8: unexpected character; expected hexadecimal number -/ +#guard_msgs in #term "(0x1000_)"