Skip to content

Commit

Permalink
Remove support for escaping whitespace in strings
Browse files Browse the repository at this point in the history
Inko supported strings like this:

    foo \
      bar \
      baz

Such strings would be parsed as this:

    "foo bar baz"

This feature isn't particularly useful as one can use String.+ instead.
It also complicates automatic code formatting, as the presence of a
backslash isn't retained in the AST. While we could change that, this
would lead to additional complexity that frankly isn't justified.

Changelog: changed
  • Loading branch information
yorickpeterse committed Feb 27, 2024
1 parent e77291a commit dc87e18
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 103 deletions.
61 changes: 0 additions & 61 deletions ast/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,6 @@ enum State {
Default,
SingleString,
DoubleString,
EscapedWhitespace,
}

/// A lexer for Inko source code.
Expand Down Expand Up @@ -459,10 +458,6 @@ impl Lexer {
match self.states.last().cloned() {
Some(State::SingleString) => self.next_single_string_token(),
Some(State::DoubleString) => self.next_double_string_token(),
Some(State::EscapedWhitespace) => {
self.consume_escaped_whitespace();
self.next_token()
}
_ => self.next_regular_token(),
}
}
Expand Down Expand Up @@ -1055,10 +1050,6 @@ impl Lexer {
BACKSLASH => {
let next = self.next_byte();

if self.enter_escaped_whitespace(next) {
break;
}

if self.replace_escape_sequence(
&mut buffer,
next,
Expand Down Expand Up @@ -1107,10 +1098,6 @@ impl Lexer {

let next = self.next_byte();

if self.enter_escaped_whitespace(next) {
break;
}

if self.replace_escape_sequence(
&mut buffer,
next,
Expand Down Expand Up @@ -1152,28 +1139,6 @@ impl Lexer {
self.single_character_token(TokenKind::StringExprOpen)
}

fn enter_escaped_whitespace(&mut self, byte: u8) -> bool {
if !self.is_whitespace(byte) {
return false;
}

self.advance_char();
self.states.push(State::EscapedWhitespace);
true
}

fn consume_escaped_whitespace(&mut self) {
loop {
match self.current_byte() {
SPACE | TAB | CARRIAGE_RETURN => self.advance_char(),
NEWLINE => self.advance_line(),
_ => break,
}
}

self.states.pop();
}

fn replace_escape_sequence(
&mut self,
buffer: &mut Vec<u8>,
Expand Down Expand Up @@ -1235,10 +1200,6 @@ impl Lexer {
}
}

fn is_whitespace(&self, byte: u8) -> bool {
matches!(byte, SPACE | TAB | CARRIAGE_RETURN | NEWLINE)
}

fn next_is_unicode_escape(&self) -> bool {
self.next_byte() == LOWER_U && self.peek(2) == CURLY_OPEN
}
Expand Down Expand Up @@ -1900,28 +1861,6 @@ mod tests {
);
}

#[test]
fn test_lexer_single_quoted_string_with_escaped_whitespace() {
assert_tokens!(
"'foo \\\n bar'",
tok(SingleStringOpen, "'", 1..=1, 1..=1),
tok(StringText, "foo ", 1..=1, 2..=6),
tok(StringText, "bar", 2..=2, 3..=5),
tok(SingleStringClose, "'", 2..=2, 6..=6)
);
}

#[test]
fn test_lexer_double_quoted_string_with_escaped_whitespace() {
assert_tokens!(
"\"foo \\\n bar\"",
tok(DoubleStringOpen, "\"", 1..=1, 1..=1),
tok(StringText, "foo ", 1..=1, 2..=6),
tok(StringText, "bar", 2..=2, 3..=5),
tok(DoubleStringClose, "\"", 2..=2, 6..=6)
);
}

#[test]
fn test_lexer_colon() {
assert_token!(":", Colon, ":", 1..=1, 1..=1);
Expand Down
9 changes: 0 additions & 9 deletions docs/source/references/syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -504,15 +504,6 @@ multiple
lines"
```

If a string spans multiple lines and a line ends with a `\`, the newline and any
whitespace that follows is ignored:

```inko
"foo \
bar \
baz" # => "foo bar baz"
```

Double quoted strings support Unicode escape sequences using the syntax
`\u{XXXXX}`, such as this:

Expand Down
6 changes: 2 additions & 4 deletions std/src/std/json.inko
Original file line number Diff line number Diff line change
Expand Up @@ -597,8 +597,7 @@ class pub Parser {

if @buffer.size > @max_string_size {
throw error(
"The size of this string ({@buffer.size} bytes) is greater than \
the maximum of {@max_string_size} bytes"
"The size of this string ({@buffer.size} bytes) is greater than the maximum of {@max_string_size} bytes"
)
}

Expand Down Expand Up @@ -859,8 +858,7 @@ class pub Parser {
match advance {
case Some(val) if val == byte -> Result.Ok(nil)
case Some(val) -> throw error(
"The character '{char(byte)}' is expected, \
but '{char(val)}' was found instead"
"The character '{char(byte)}' is expected, but '{char(val)}' was found instead"
)
case _ -> throw unexpected_eof
}
Expand Down
22 changes: 15 additions & 7 deletions std/src/std/net/ip.inko
Original file line number Diff line number Diff line change
Expand Up @@ -522,11 +522,12 @@ impl ToString for Ipv6Address {
# an octet of 255.
let hextet_to_octet_modulo = IPV4_OCTET_MAXIMUM + 1
let prefix = if ipv4_compatible { '::' } else { '::ffff:' }
let a = @g >> IPV4_TOIPV6_SHIFT
let b = @g % hextet_to_octet_modulo
let c = @h >> IPV4_TOIPV6_SHIFT
let d = @h % hextet_to_octet_modulo

return "{prefix}{@g >> IPV4_TOIPV6_SHIFT}\
.{@g % hextet_to_octet_modulo}\
.{@h >> IPV4_TOIPV6_SHIFT}\
.{@h % hextet_to_octet_modulo}"
return "{prefix}{a}.{b}.{c}.{d}"
}

let mut compression_start = 0
Expand Down Expand Up @@ -577,9 +578,16 @@ impl ToString for Ipv6Address {
return buffer.to_string
}

"{@a.format(format)}:{@b.format(format)}:{@c.format(format)}:\
{@d.format(format)}:{@e.format(format)}:{@f.format(format)}:\
{@g.format(format)}:{@h.format(format)}"
let a = @a.format(format)
let b = @b.format(format)
let c = @c.format(format)
let d = @d.format(format)
let e = @e.format(format)
let f = @f.format(format)
let g = @g.format(format)
let h = @h.format(format)

"{a}:{b}:{c}:{d}:{e}:{f}:{g}:{h}"
}
}

Expand Down
8 changes: 4 additions & 4 deletions std/test/std/crypto/test_md5.inko
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ fn pub tests(t: mut Tests) {
'57edf4a22be3c955ac49da2e2107b67a'
),
(
'Inko is a language for building concurrent software with confidence. \
Inko makes it easy to build concurrent software, without having to \
worry about unpredictable performance, unexpected runtime errors, \
race conditions, and type errors.',
'Inko is a language for building concurrent software with confidence. '
+ 'Inko makes it easy to build concurrent software, without having to '
+ 'worry about unpredictable performance, unexpected runtime errors, '
+ 'race conditions, and type errors.',
'6b8c6926954f355eccf02e55fd6e6ffd'
),
(
Expand Down
8 changes: 4 additions & 4 deletions std/test/std/crypto/test_sha1.inko
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ fn pub tests(t: mut Tests) {
'50abf5706a150990a08b2c5ea40fa0e585554732',
),
(
'Inko is a language for building concurrent software with confidence. \
Inko makes it easy to build concurrent software, without having to \
worry about unpredictable performance, unexpected runtime errors, \
race conditions, and type errors.',
'Inko is a language for building concurrent software with confidence. '
+ 'Inko makes it easy to build concurrent software, without having to '
+ 'worry about unpredictable performance, unexpected runtime errors, '
+ 'race conditions, and type errors.',
'a5e718cce8889c9b06394c3736663cc245b068df'
),
(
Expand Down
16 changes: 8 additions & 8 deletions std/test/std/crypto/test_sha2.inko
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ fn pub tests(t: mut Tests) {
'f371bc4a311f2b009eef952dd83ca80e2b60026c8e935592d0f9c308453c813e',
),
(
'Inko is a language for building concurrent software with confidence. \
Inko makes it easy to build concurrent software, without having to \
worry about unpredictable performance, unexpected runtime errors, \
race conditions, and type errors.',
'Inko is a language for building concurrent software with confidence. '
+ 'Inko makes it easy to build concurrent software, without having to '
+ 'worry about unpredictable performance, unexpected runtime errors, '
+ 'race conditions, and type errors.',
'a0936176dafc10061f1ec9a8261a537a2d297b771dae3299ab471097c0f0f36a'
),
(
Expand Down Expand Up @@ -121,10 +121,10 @@ fn pub tests(t: mut Tests) {
'72ec1ef1124a45b047e8b7c75a932195135bb61de24ec0d1914042246e0aec3a2354e093d76f3048b456764346900cb130d2a4fd5dd16abb5e30bcb850dee843',
),
(
'Inko is a language for building concurrent software with confidence. \
Inko makes it easy to build concurrent software, without having to \
worry about unpredictable performance, unexpected runtime errors, \
race conditions, and type errors.',
'Inko is a language for building concurrent software with confidence. '
+ 'Inko makes it easy to build concurrent software, without having to '
+ 'worry about unpredictable performance, unexpected runtime errors, '
+ 'race conditions, and type errors.',
'd642e9ca2d29360ab5d4d17841de89cb1141a1f379433df606bc5b0d7aac76eca3480afe01461865e7977557179a22919da27d2b56c9e96c90913e664789f725'
),
(
Expand Down
9 changes: 3 additions & 6 deletions std/test/std/test_json.inko
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,7 @@ fn pub tests(t: mut Tests) {
t.equal(
parse_invalid("\n1.2e"),
Option.Some(
'One or more tokens are required, but we ran out of input, \
on line 2 at byte offset 4'
'One or more tokens are required, but we ran out of input, on line 2 at byte offset 4'
)
)
}
Expand Down Expand Up @@ -347,8 +346,7 @@ fn pub tests(t: mut Tests) {
t.equal(
parse_invalid('"a'),
Option.Some(
'One or more tokens are required, but we ran out of input, \
on line 1 at byte offset 1'
'One or more tokens are required, but we ran out of input, on line 1 at byte offset 1'
)
)
}
Expand Down Expand Up @@ -401,8 +399,7 @@ fn pub tests(t: mut Tests) {
t.equal(
parse_invalid('{"a"}'),
Option.Some(
"The character ':' is expected, but '}' was found instead, \
on line 1 at byte offset 4"
"The character ':' is expected, but '}' was found instead, on line 1 at byte offset 4"
)
)
}
Expand Down

0 comments on commit dc87e18

Please sign in to comment.