Skip to content

Commit

Permalink
Fix: Errors for invalid trigraph & \U in String span complete string
Browse files Browse the repository at this point in the history
prev:
```fsharp
> "foo\U12345678bar";;
  ^^^^^^^^^^^^^^^^^^
stdin(1,1): error FS1245: \U12345678 is not a valid Unicode character escape sequence
```

now:
```fsharp
> "foo\U12345678bar";;
  ----^^^^^^^^^^
stdin(1,5): error FS1245: \U12345678 is not a valid Unicode character escape sequence
```

Note: In Byte Strings that's only the case for invalid chars (-> invalid in normal string too), but not for chars invalid only inside byte string:
```fsharp
> "foo\U000003C0bar";;
val it: string = "fooπbar"

> "foo\U000003C0bar"B;;
  ^^^^^^^^^^^^^^^^^^^
stdin(8,1): error FS1140: This byte array literal contains characters that do not encode as a single byte
```

Reason: We only now at end of string if it's a Byte String. At this point we don't have direct/simple access to the invalid element (and its notation) any more -> we know the value and can validate it -- but don't know exactly where it's located
  • Loading branch information
Booksbaum committed Aug 30, 2023
1 parent faa8646 commit 5984f26
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 6 deletions.
6 changes: 4 additions & 2 deletions src/Compiler/lex.fsl
Original file line number Diff line number Diff line change
Expand Up @@ -1210,7 +1210,8 @@ and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
let c = trigraph s.[1] s.[2] s.[3]
let x = int c
if x < 0 || x > 255 then
fail args lexbuf (FSComp.SR.lexInvalidCharLiteralInString (s[0..3])) (result())
fail args lexbuf (FSComp.SR.lexInvalidCharLiteralInString (s[0..3])) ()
result()
else
addByteChar buf c
result() }
Expand Down Expand Up @@ -1241,7 +1242,8 @@ and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
singleQuoteString sargs skip lexbuf
match unicodeGraphLong hexChars with
| Invalid ->
fail args lexbuf (FSComp.SR.lexInvalidUnicodeLiteral hexChars) (result())
fail args lexbuf (FSComp.SR.lexInvalidUnicodeLiteral hexChars) ()
result()
| SingleChar(c) ->
addUnicodeChar buf (int c)
result()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ let _ = "\937"B
|> typecheck
|> shouldFail
|> withDiagnostics [
(Error 1252, Line 3, Col 9, Line 3, Col 16, "'\\937' is not a valid character literal")
(Error 1252, Line 3, Col 10, Line 3, Col 14, "'\\937' is not a valid character literal")
]

[<Fact>]
Expand Down Expand Up @@ -65,3 +65,23 @@ let ``values in different notations are invalid above 127``() =
(Error 1157, Line 6, Col 5, Line 6, Col 14, "This is not a valid byte literal")
(Error 1157, Line 7, Col 5, Line 7, Col 18, "This is not a valid byte literal")
]

[<Fact>]
let ``Error messages for different notations only span invalid notation``() =
Fs """
"ok:\061;err:\937;err:\U12345678;err:\U00005678;fin"B
|> printfn "%A"
"""
|> typecheck
|> shouldFail
|> withDiagnostics [
(Error 1252, Line 2, Col 14, Line 2, Col 18, "'\\937' is not a valid character literal")
(Error 1245, Line 2, Col 23, Line 2, Col 33, "\\U12345678 is not a valid Unicode character escape sequence")

// Note: Error for `\U00005678` spans full byte string:
// Is a valid char, but two bytes -> not valid inside byte string
// But check for correct byte happens after string is finished
// (because `B` suffix -> only know at end if it's a byte string)
// -> Don't have direct access to range of invalid char any more
(Error 1140, Line 2, Col 1, Line 2, Col 54, "This byte array literal contains characters that do not encode as a single byte")
]
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ printfn "Ω\937"
|> typecheck
|> shouldFail
|> withDiagnostics [
// Note: Error spans full string -- not just error part
(Error 1252, Line 2, Col 9, Line 2, Col 16, "'\\937' is not a valid character literal")
(Error 1252, Line 2, Col 11, Line 2, Col 15, "'\\937' is not a valid character literal")
]

[<Fact>]
Expand Down Expand Up @@ -47,6 +46,17 @@ printfn "foo\937bar"
|> typecheck
|> shouldFail
|> withDiagnostics [
(Error 1252, Line 2, Col 9, Line 2, Col 21, "'\\937' is not a valid character literal")
(Error 1252, Line 2, Col 13, Line 2, Col 17, "'\\937' is not a valid character literal")
]

[<Fact>]
let ``Error messages for different notations only span invalid notation``() =
Fs """
printfn "ok:\061;err:\937;err:\U12345678;ok:\U00005678;fin"
"""
|> typecheck
|> shouldFail
|> withDiagnostics [
(Error 1252, Line 2, Col 22, Line 2, Col 26, "'\\937' is not a valid character literal")
(Error 1245, Line 2, Col 31, Line 2, Col 41, "\\U12345678 is not a valid Unicode character escape sequence")
]

0 comments on commit 5984f26

Please sign in to comment.