Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix #15867, #15868, #15869: Add missing byte chars notations, enforce limits in decimal notation in byte char & string #15898

Merged
merged 22 commits into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/release-notes/.FSharp.Compiler.Service/9.0.100.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
* Nullness export - make sure option<> and other UseNullAsTrueValue types are properly annotated as nullable for C# and reflection consumers [PR #17528](https://github.com/dotnet/fsharp/pull/17528)
* MethodAccessException on equality comparison of a type private to module. ([Issue #17541](https://github.com/dotnet/fsharp/issues/17541), [PR #17548](https://github.com/dotnet/fsharp/pull/17548))
* Fixed checking failure when `global` namespace is involved with enabled GraphBasedChecking ([PR #17553](https://github.com/dotnet/fsharp/pull/17553))
* Add missing byte chars notations, enforce limits in decimal notation in byte char & string (Issues [#15867](https://github.com/dotnet/fsharp/issues/15867), [#15868](https://github.com/dotnet/fsharp/issues/15868), [#15869](https://github.com/dotnet/fsharp/issues/15869), [PR #15898](https://github.com/dotnet/fsharp/pull/15898))

### Added

Expand Down
7 changes: 5 additions & 2 deletions src/Compiler/FSComp.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1020,7 +1020,7 @@ lexfltSeparatorTokensOfPatternMatchMisaligned,"The '|' tokens separating rules o
# -----------------------------------------------------------------------------
lexCharNotAllowedInOperatorNames,"'%s' is not permitted as a character in operator names and is reserved for future use"
lexUnexpectedChar,"Unexpected character '%s'"
1140,lexByteArrayCannotEncode,"This byte array literal contains characters that do not encode as a single byte"
1140,lexByteArrayCannotEncode,"This byte array literal contains %d characters that do not encode as a single byte"
1141,lexIdentEndInMarkReserved,"Identifiers followed by '%s' are reserved for future use"
1142,lexOutsideEightBitSigned,"This number is outside the allowable range for 8-bit signed integers"
1143,lexOutsideEightBitSignedHex,"This number is outside the allowable range for hexadecimal 8-bit signed integers"
Expand All @@ -1037,7 +1037,8 @@ lexUnexpectedChar,"Unexpected character '%s'"
1154,lexOutsideDecimal,"This number is outside the allowable range for decimal literals"
1155,lexOutsideThirtyTwoBitFloat,"This number is outside the allowable range for 32-bit floats"
1156,lexInvalidNumericLiteral,"This is not a valid numeric literal. Valid numeric literals include 1, 0x1, 0o1, 0b1, 1l (int/int32), 1u (uint/uint32), 1L (int64), 1UL (uint64), 1s (int16), 1us (uint16), 1y (int8/sbyte), 1uy (uint8/byte), 1.0 (float/double), 1.0f (float32/single), 1.0m (decimal), 1I (bigint)."
1157,lexInvalidByteLiteral,"This is not a valid byte literal"
1157,lexInvalidAsciiByteLiteral,"This is not a valid byte character literal. The value must be less than or equal to '\127'B."
1157,lexInvalidTrigraphAsciiByteLiteral,"This is not a valid byte character literal. The value must be less than or equal to '\127'B.\nNote: In a future F# version this warning will be promoted to an error."
1158,lexInvalidCharLiteral,"This is not a valid character literal"
1159,lexThisUnicodeOnlyInStringLiterals,"This Unicode encoding is only valid in string literals"
1160,lexTokenReserved,"This token is reserved for future use"
Expand Down Expand Up @@ -1131,6 +1132,8 @@ lexIfOCaml,"IF-FSHARP/IF-CAML regions are no longer supported"
1249,lexUnmatchedRBracesInTripleQuote,"The interpolated string contains unmatched closing braces."
1250,lexTooManyPercentsInTripleQuote,"The interpolated triple quoted string literal does not start with enough '$' characters to allow this many consecutive '%%' characters."
1251,lexExtendedStringInterpolationNotSupported,"Extended string interpolation is not supported in this version of F#."
1252,lexInvalidCharLiteralInString,"'%s' is not a valid character literal.\nNote: Currently the value is wrapped around byte range to '%s'. In a future F# version this warning will be promoted to an error."
1253,lexByteArrayOutisdeAscii,"This byte array literal contains %d non-ASCII characters. All characters should be < 128y."
# reshapedmsbuild.fs
1300,toolLocationHelperUnsupportedFrameworkVersion,"The specified .NET Framework version '%s' is not supported. Please specify a value from the enumeration Microsoft.Build.Utilities.TargetDotNetFrameworkVersion."
# -----------------------------------------------------------------------------
Expand Down
24 changes: 20 additions & 4 deletions src/Compiler/SyntaxTree/LexHelpers.fs
Original file line number Diff line number Diff line change
Expand Up @@ -222,16 +222,32 @@ let addUnicodeChar buf c = addIntChar buf (int c)

let addByteChar buf (c: char) = addIntChar buf (int32 c % 256)

type LargerThanOneByte = int
type LargerThan127ButInsideByte = int

/// Sanity check that high bytes are zeros. Further check each low byte <= 127
let stringBufferIsBytes (buf: ByteBuffer) =
let errorsInByteStringBuffer (buf: ByteBuffer) =
let bytes = buf.AsMemory()
let mutable ok = true
assert (bytes.Length % 2 = 0)

// Enhancement?: return faulty values?
// But issue: we don't know range of values -> no direct mapping from value to range & notation

// values with high byte <> 0
let mutable largerThanOneByteCount = 0
// values with high byte = 0, but low byte > 127
let mutable largerThan127ButSingleByteCount = 0

for i = 0 to bytes.Length / 2 - 1 do
if bytes.Span[i * 2 + 1] <> 0uy then
ok <- false
largerThanOneByteCount <- largerThanOneByteCount + 1
elif bytes.Span[i * 2] > 127uy then
largerThan127ButSingleByteCount <- largerThan127ButSingleByteCount + 1

ok
if largerThanOneByteCount + largerThan127ButSingleByteCount > 0 then
Some(largerThanOneByteCount, largerThan127ButSingleByteCount)
else
None

let newline (lexbuf: LexBuffer<_>) = lexbuf.EndPos <- lexbuf.EndPos.NextLine

Expand Down
4 changes: 3 additions & 1 deletion src/Compiler/SyntaxTree/LexHelpers.fsi
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,9 @@ val stringBufferAsString: ByteBuffer -> string

val stringBufferAsBytes: ByteBuffer -> byte[]

val stringBufferIsBytes: ByteBuffer -> bool
type LargerThanOneByte = int
type LargerThan127ButInsideByte = int
val errorsInByteStringBuffer: ByteBuffer -> Option<LargerThanOneByte * LargerThan127ButInsideByte>

val newline: Lexing.LexBuffer<'a> -> unit

Expand Down
68 changes: 56 additions & 12 deletions src/Compiler/lex.fsl
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,14 @@ let startString args (lexbuf: UnicodeLexing.Lexbuf) =
if kind.IsInterpolated then
fail args lexbuf (FSComp.SR.lexByteStringMayNotBeInterpolated()) ()
BYTEARRAY (Lexhelp.stringBufferAsBytes buf, synByteStringKind, cont)
elif Lexhelp.stringBufferIsBytes buf then
BYTEARRAY (Lexhelp.stringBufferAsBytes buf, synByteStringKind, cont)
else
fail args lexbuf (FSComp.SR.lexByteArrayCannotEncode()) ()
match Lexhelp.errorsInByteStringBuffer buf with
| Some (largerThanOneByte, largerThan127) ->
if largerThanOneByte > 0 then
fail args lexbuf (FSComp.SR.lexByteArrayCannotEncode(largerThanOneByte)) ()
if largerThan127 > 0 then
warning (Error(FSComp.SR.lexByteArrayOutisdeAscii(largerThan127), lexbuf.LexemeRange))
| None -> ()
BYTEARRAY (Lexhelp.stringBufferAsBytes buf, synByteStringKind, cont)
elif kind.IsInterpolated then
let s = Lexhelp.stringBufferAsString buf
Expand Down Expand Up @@ -518,7 +522,7 @@ rule token (args: LexArgs) (skip: bool) = parse
{ let s = lexeme lexbuf
let x = int32 (if s.[1] = '\\' then escape s.[2] else s.[1])
if x < 0 || x > 127 then
fail args lexbuf (FSComp.SR.lexInvalidByteLiteral()) (UINT8(byte 0))
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
else
UINT8 (byte(x)) }

Expand All @@ -535,26 +539,50 @@ rule token (args: LexArgs) (skip: bool) = parse
{ let s = lexeme lexbuf
let x = int32 (trigraph s.[2] s.[3] s.[4])
if x < 0 || x > 255 then
fail args lexbuf (FSComp.SR.lexInvalidByteLiteral()) (UINT8(byte 0))
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
elif x > 127 then
// TODO: Promote to Error:
// * Adjust range check in `if` above to `x > 127`
// * Remove this `elif` expression
// * Remove `lexInvalidTrigraphAsciiByteLiteral` from `FSComp.txt`
warning (Error(FSComp.SR.lexInvalidTrigraphAsciiByteLiteral(), lexbuf.LexemeRange))
UINT8 (byte(x))
else
UINT8 (byte(x)) }

| '\'' unicodeGraphShort '\'' { CHAR (char (int32 (unicodeGraphShort (lexemeTrimBoth lexbuf 3 1)))) }

| '\'' unicodeGraphShort '\'' 'B'
{ let x = int32 (unicodeGraphShort (lexemeTrimBoth lexbuf 3 2))
if x < 0 || x > 127 then
fail args lexbuf (FSComp.SR.lexInvalidByteLiteral()) (UINT8(byte 0))
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
else
UINT8 (byte(x)) }

| '\'' hexGraphShort '\'' { CHAR (char (int32 (hexGraphShort (lexemeTrimBoth lexbuf 3 1)))) }

| '\'' unicodeGraphShort '\'' { CHAR (char (int32 (unicodeGraphShort (lexemeTrimBoth lexbuf 3 1)))) }
| '\'' hexGraphShort '\'' 'B'
{ let x = int32 (hexGraphShort (lexemeTrimBoth lexbuf 3 2))
if x < 0 || x > 127 then
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
else
UINT8 (byte(x)) }

| '\'' unicodeGraphLong '\''
{ match unicodeGraphLong (lexemeTrimBoth lexbuf 3 1) with
| SingleChar(c) -> CHAR (char c)
| _ -> fail args lexbuf (FSComp.SR.lexThisUnicodeOnlyInStringLiterals()) (CHAR (char 0)) }

| '\'' unicodeGraphLong '\'' 'B'
{ match unicodeGraphLong (lexemeTrimBoth lexbuf 3 2) with
| SingleChar(c) ->
let x = int32 c
if x < 0 || x > 127 then
fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0))
else
UINT8 (byte(x))
| _ -> fail args lexbuf (FSComp.SR.lexInvalidAsciiByteLiteral()) (UINT8(byte 0)) }

| "(*IF-FSHARP"
{ if lexbuf.SupportsFeature LanguageFeature.MLCompatRevisions then
mlCompatWarning (FSComp.SR.lexIndentOffForML()) lexbuf.LexemeRange
Expand Down Expand Up @@ -1201,11 +1229,26 @@ and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
| trigraph
{ let (buf, _fin, m, kind, args) = sargs
let s = lexeme lexbuf
addByteChar buf (trigraph s.[1] s.[2] s.[3])
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, args.interpolationDelimiterLength, m))
let result() =
if not skip then
STRING_TEXT (LexCont.String(args.ifdefStack, args.stringNest, LexerStringStyle.SingleQuote, kind, args.interpolationDelimiterLength, m))
else
singleQuoteString sargs skip lexbuf
let c = trigraph s.[1] s.[2] s.[3]
let x = int c
if x < 0 || x > 255 then
// TODO: Promote to Error:
// * remove `addByteChar ...`
// * remove `warning ...`
// * Adjust `lexInvalidCharLiteralInString` in `FSComp.txt`: remove `Note` (incl. 2nd placeholder)
// * uncomment `fail ...`
addByteChar buf c
warning (Error(FSComp.SR.lexInvalidCharLiteralInString (s[0..3], sprintf "\\%03i" (x % 256)), lexbuf.LexemeRange))
//fail args lexbuf (FSComp.SR.lexInvalidCharLiteralInString (s[0..3])) ()
result()
else
singleQuoteString sargs skip lexbuf }
addByteChar buf c
result() }

| hexGraphShort
{ let (buf, _fin, m, kind, args) = sargs
Expand Down Expand Up @@ -1233,7 +1276,8 @@ and singleQuoteString (sargs: LexerStringArgs) (skip: bool) = parse
singleQuoteString sargs skip lexbuf
match unicodeGraphLong hexChars with
| Invalid ->
fail args lexbuf (FSComp.SR.lexInvalidUnicodeLiteral hexChars) (result())
fail args lexbuf (FSComp.SR.lexInvalidUnicodeLiteral hexChars) ()
result()
| SingleChar(c) ->
addUnicodeChar buf (int c)
result()
Expand Down
29 changes: 22 additions & 7 deletions src/Compiler/xlf/FSComp.txt.cs.xlf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading