1
1
//! Lexical analysis of Inko source code.
2
- use crate :: source_location :: SourceLocation ;
2
+ use location :: Location ;
3
3
use unicode_segmentation:: UnicodeSegmentation ;
4
4
5
5
const NULL : u8 = 0 ;
@@ -166,6 +166,7 @@ pub enum TokenKind {
166
166
While ,
167
167
Whitespace ,
168
168
Extern ,
169
+ Inline ,
169
170
}
170
171
171
172
impl TokenKind {
@@ -268,6 +269,7 @@ impl TokenKind {
268
269
TokenKind :: Nil => "the 'nil' keyword" ,
269
270
TokenKind :: Replace => "a '=:'" ,
270
271
TokenKind :: Extern => "the 'extern' keyword" ,
272
+ TokenKind :: Inline => "the 'inline' keyword" ,
271
273
}
272
274
}
273
275
}
@@ -276,23 +278,23 @@ impl TokenKind {
276
278
pub struct Token {
277
279
pub kind : TokenKind ,
278
280
pub value : String ,
279
- pub location : SourceLocation ,
281
+ pub location : Location ,
280
282
}
281
283
282
284
impl Token {
283
- fn new ( kind : TokenKind , value : String , location : SourceLocation ) -> Self {
285
+ fn new ( kind : TokenKind , value : String , location : Location ) -> Self {
284
286
Self { kind, value, location }
285
287
}
286
288
287
289
/// Returns a token signalling unexpected input. The token contains the
288
290
/// invalid character.
289
- fn invalid ( value : String , location : SourceLocation ) -> Self {
291
+ fn invalid ( value : String , location : Location ) -> Self {
290
292
Self :: new ( TokenKind :: Invalid , value, location)
291
293
}
292
294
293
295
/// Returns a token that signals the end of the input stream. We use null
294
296
/// tokens so we don't need to wrap/unwrap every token using an Option type.
295
- fn null ( location : SourceLocation ) -> Self {
297
+ fn null ( location : Location ) -> Self {
296
298
Self :: new ( TokenKind :: Null , String :: new ( ) , location)
297
299
}
298
300
@@ -335,6 +337,7 @@ impl Token {
335
337
| TokenKind :: Case
336
338
| TokenKind :: Enum
337
339
| TokenKind :: Extern
340
+ | TokenKind :: Inline
338
341
)
339
342
}
340
343
@@ -363,7 +366,7 @@ impl Token {
363
366
}
364
367
365
368
pub fn same_line_as ( & self , token : & Token ) -> bool {
366
- self . location . lines . start ( ) == token. location . lines . start ( )
369
+ self . location . line_start == token. location . line_start
367
370
}
368
371
}
369
372
@@ -422,10 +425,10 @@ pub struct Lexer {
422
425
states : Vec < State > ,
423
426
424
427
/// The current line number.
425
- line : usize ,
428
+ line : u32 ,
426
429
427
430
/// The current (starting) column number.
428
- column : usize ,
431
+ column : u32 ,
429
432
}
430
433
431
434
impl Lexer {
@@ -443,8 +446,13 @@ impl Lexer {
443
446
}
444
447
}
445
448
446
- pub fn start_location ( & self ) -> SourceLocation {
447
- SourceLocation :: new ( self . line ..=self . line , self . column ..=self . column )
449
+ pub fn start_location ( & self ) -> Location {
450
+ Location {
451
+ line_start : self . line ,
452
+ line_end : self . line ,
453
+ column_start : self . column ,
454
+ column_end : self . column ,
455
+ }
448
456
}
449
457
450
458
pub fn next_token ( & mut self ) -> Token {
@@ -457,18 +465,16 @@ impl Lexer {
457
465
}
458
466
}
459
467
460
- fn source_location (
461
- & self ,
462
- start_line : usize ,
463
- start_column : usize ,
464
- ) -> SourceLocation {
465
- SourceLocation :: new (
466
- start_line..=self . line ,
468
+ fn source_location ( & self , start_line : u32 , start_column : u32 ) -> Location {
469
+ Location {
470
+ line_start : start_line,
471
+ line_end : self . line ,
467
472
// The end column points to whatever comes _after_ the last
468
473
// processed character. This means the end column is one column
469
474
// earlier.
470
- start_column..=( self . column - 1 ) ,
471
- )
475
+ column_start : start_column,
476
+ column_end : self . column - 1 ,
477
+ }
472
478
}
473
479
474
480
fn current_byte ( & self ) -> u8 {
@@ -500,7 +506,7 @@ impl Lexer {
500
506
}
501
507
502
508
fn advance_column ( & mut self , value : & str ) {
503
- self . column += value. graphemes ( true ) . count ( ) ;
509
+ self . column += value. graphemes ( true ) . count ( ) as u32 ;
504
510
}
505
511
506
512
fn advance_char ( & mut self ) {
@@ -997,6 +1003,7 @@ impl Lexer {
997
1003
"return" => TokenKind :: Return ,
998
1004
"static" => TokenKind :: Static ,
999
1005
"extern" => TokenKind :: Extern ,
1006
+ "inline" => TokenKind :: Inline ,
1000
1007
_ => TokenKind :: Identifier ,
1001
1008
} ,
1002
1009
7 => match value. as_str ( ) {
@@ -1087,14 +1094,14 @@ impl Lexer {
1087
1094
& mut self ,
1088
1095
kind : TokenKind ,
1089
1096
buffer : Vec < u8 > ,
1090
- line : usize ,
1091
- column : usize ,
1097
+ line : u32 ,
1098
+ column : u32 ,
1092
1099
new_line : bool ,
1093
1100
) -> Token {
1094
1101
let value = String :: from_utf8_lossy ( & buffer) . into_owned ( ) ;
1095
1102
1096
1103
if !value. is_empty ( ) {
1097
- self . column += value. graphemes ( true ) . count ( ) ;
1104
+ self . column += value. graphemes ( true ) . count ( ) as u32 ;
1098
1105
}
1099
1106
1100
1107
let location = self . source_location ( line, column) ;
@@ -1175,8 +1182,8 @@ impl Lexer {
1175
1182
& mut self ,
1176
1183
kind : TokenKind ,
1177
1184
start : usize ,
1178
- line : usize ,
1179
- column : usize ,
1185
+ line : u32 ,
1186
+ column : u32 ,
1180
1187
) -> Token {
1181
1188
let value = self . slice_string ( start, self . position ) ;
1182
1189
@@ -1187,7 +1194,7 @@ impl Lexer {
1187
1194
Token :: new ( kind, value, location)
1188
1195
}
1189
1196
1190
- fn token ( & mut self , kind : TokenKind , start : usize , line : usize ) -> Token {
1197
+ fn token ( & mut self , kind : TokenKind , start : usize , line : u32 ) -> Token {
1191
1198
self . token_with_column ( kind, start, line, self . column )
1192
1199
}
1193
1200
@@ -1223,13 +1230,22 @@ impl Lexer {
1223
1230
// When we encounter the end of the input, we want the location to point
1224
1231
// to the last column that came before it. This way any errors are
1225
1232
// reported within the bounds of the column range.
1226
- let lines = self . line ..=self . line ;
1227
1233
let location = if self . column == 1 {
1228
- SourceLocation :: new ( lines, 1 ..=1 )
1234
+ Location {
1235
+ line_start : self . line ,
1236
+ line_end : self . line ,
1237
+ column_start : 1 ,
1238
+ column_end : 1 ,
1239
+ }
1229
1240
} else {
1230
1241
let column = self . column - 1 ;
1231
1242
1232
- SourceLocation :: new ( lines, column..=column)
1243
+ Location {
1244
+ line_start : self . line ,
1245
+ line_end : self . line ,
1246
+ column_start : column,
1247
+ column_end : column,
1248
+ }
1233
1249
} ;
1234
1250
1235
1251
Token :: null ( location)
@@ -1247,17 +1263,17 @@ mod tests {
1247
1263
}
1248
1264
1249
1265
fn location (
1250
- line_range : RangeInclusive < usize > ,
1251
- column_range : RangeInclusive < usize > ,
1252
- ) -> SourceLocation {
1253
- SourceLocation :: new ( line_range, column_range)
1266
+ line_range : RangeInclusive < u32 > ,
1267
+ column_range : RangeInclusive < u32 > ,
1268
+ ) -> Location {
1269
+ Location :: new ( & line_range, & column_range)
1254
1270
}
1255
1271
1256
1272
fn tok (
1257
1273
kind : TokenKind ,
1258
1274
value : & str ,
1259
- line_range : RangeInclusive < usize > ,
1260
- column_range : RangeInclusive < usize > ,
1275
+ line_range : RangeInclusive < u32 > ,
1276
+ column_range : RangeInclusive < u32 > ,
1261
1277
) -> Token {
1262
1278
Token :: new ( kind, value. to_string ( ) , location ( line_range, column_range) )
1263
1279
}
@@ -1337,6 +1353,7 @@ mod tests {
1337
1353
assert ! ( tok( TokenKind :: While , "" , 1 ..=1 , 1 ..=1 ) . is_keyword( ) ) ;
1338
1354
assert ! ( tok( TokenKind :: Recover , "" , 1 ..=1 , 1 ..=1 ) . is_keyword( ) ) ;
1339
1355
assert ! ( tok( TokenKind :: Nil , "" , 1 ..=1 , 1 ..=1 ) . is_keyword( ) ) ;
1356
+ assert ! ( tok( TokenKind :: Inline , "" , 1 ..=1 , 1 ..=1 ) . is_keyword( ) ) ;
1340
1357
}
1341
1358
1342
1359
#[ test]
@@ -1978,6 +1995,7 @@ mod tests {
1978
1995
assert_token ! ( "return" , Return , "return" , 1 ..=1 , 1 ..=6 ) ;
1979
1996
assert_token ! ( "static" , Static , "static" , 1 ..=1 , 1 ..=6 ) ;
1980
1997
assert_token ! ( "extern" , Extern , "extern" , 1 ..=1 , 1 ..=6 ) ;
1998
+ assert_token ! ( "inline" , Inline , "inline" , 1 ..=1 , 1 ..=6 ) ;
1981
1999
1982
2000
assert_token ! ( "builtin" , Builtin , "builtin" , 1 ..=1 , 1 ..=7 ) ;
1983
2001
assert_token ! ( "recover" , Recover , "recover" , 1 ..=1 , 1 ..=7 ) ;
0 commit comments