Skip to content

Latest commit

 

History

History
81 lines (73 loc) · 3.65 KB

grammar.md

File metadata and controls

81 lines (73 loc) · 3.65 KB

The input is first transformed into tokens. The tokens are the terminals in the grammar shown below. Before a token is produced, all whitespace, including comments, is removed from the start of the remaining input. If multiple terminals match the start of the remaining input, the longest terminal is used.

The thus produced list of tokens is parsed according to the following grammar.

Top <- Declaration*

Declaration <- ConstDeclaration / TypeDeclaration
ConstDeclaration <- 'const' Identifier '=' Expression
TypeDeclaration <- Identifier '=' Type

Keyword <- 'const' / 'typedef' / 'BITS_PER_BYTE' / 'pragma_pack' / 'attr_packed' / 'align'
         / 'sizeof' / 'sizeof_bits' / 'offsetof' / 'offsetof_bits' / 'opaque' / 'enum'
         / 'struct' / 'union' / 'unit' / 'bool' / 'u8' / 'i8' / 'u16' / 'i16' / 'u32'
         / 'i32' / 'u64' / 'i64' / 'u128' / 'i128' / 'char' / 'signed' / 'unsigned'
         / 'short' / 'int' / 'long' / 'f32' / 'f64' / 'float' / 'double' / 'ptr'

Identifier <- !Keyword ([a-zA-Z] [a-zA-Z_0-9]* / '_' [a-zA-Z_0-9]+)

Number <- BinaryNumber / OctalNumber / DecimalNumber / HexadecimalNumber
BinaryNumber <- '0b' [01_]* [01] [01_]*
OctalNumber <- '0o' [0-7_]* [0-7] [0-7_]*
DecimalNumber <- [0-9_]* [0-9] [0-9_]*
HexadecimalNumber <- '0x' [0-9a-fA-F_]* [0-9a-fA-F] [0-9a-fA-F_]*

Expression <- AtomicExpression (BinaryOperator AtomicExpression)*
AtomicExpression <- '-' AtomicExpression
                  / '!' AtomicExpression
                  / '(' Expression ')'
                  / 'BITS_PER_BYTE'
                  / Number
                  / ('sizeof' / 'sizeof_bits') '(' Type ')'
                  / Identifier
                  / ('offsetof' / 'offsetof_bits') '(' Type ',' OffsetofPath ')'
OffsetofPath <- (Identifier / '[' Expression ']')
                    ('.' Identifier / '[' Expression ']')*
BinaryOperator <- '==' / '!=' / '<=' / '>=' / '||' / '&&' / '>' / '<' / '+' / '-' / '*'
                / '/' | '%'
SimpleExpression <- '-'? Number
                
Type <- TypeLayout<SimpleExpression>? Annotation* TypeVariant
TypeVariant <- Identifier
             / Typedef
             / OpaqueType
             / Enum
             / Struct
             / Union
             / Array
             / BuiltinType
Typedef <- 'typedef' Type
OpaqueType <- 'opaque' TypeLayout<Expression>
Enum <- 'enum' '{' (Expression ',')* Expression? '}'
Struct <- 'struct' RecordBody
Union <- 'union' RecordBody
Array <- '[' Expression? ']' Type
RecordBody <- '{' (RecordField ',')* RecordField? '}'
RecordField <- FieldLayout? Annotation* ('_' / Identifier) Type
BuiltinType <- 'unsigned' 'long' 'long' / 'signed' 'long' 'long' / 'long' 'long'
             / 'signed' 'char' / 'signed' 'short' / 'signed' 'int' / 'signed' 'long'
             / 'unsigned' 'char' / 'unsigned' 'short' / 'unsigned' 'int'
             / 'unsigned' 'long' / 'unit' / 'bool' / 'u8' / 'i8' / 'u16' / 'i16' / 'u32'
             / 'i32' / 'u64' / 'i64' / 'u128' / 'i128' / 'char' / 'signed' / 'unsigned'
             / 'short' / 'int' / 'long' / 'f32' / 'f64' / 'float' / 'double' / 'ptr'
TypeLayout<T> <- '{' (TypeLayoutElement<T> ',')* TypeLayoutElement<T>? '}'
TypeLayoutElement<T> <- (  'size'
                        / 'alignment'
                        / 'field_alignment'
                        / 'pointer_alignment'
                        / 'required_alignment'
                        )
                        ':' T
FieldLayout <- '{' (FieldLayoutElement ',')* FieldLayoutElement? '}'
FieldLayoutElement <- ('size' / 'offset') ':' SimpleExpression
Annotation <- '@' ( 'attr_packed'
                  / ('align' ('(' Expression ')')?)
                  / ('pragma_pack' '(' Expression ')')
                  )