Skip to content
This repository was archived by the owner on Feb 10, 2025. It is now read-only.

Commit 084b201

Browse files
DanTupnatebosch
andauthored
Avoid treating /r as newline on it (#81)
Fixes https://github.com/dart-lang/string_scanner/issues/80 This fixes an error when scanning a zero-length match when between a CR and LF. * Fix typo * comment nits * Fix some bugs when setting position if the current position is between \r\n --------- Co-authored-by: Nate Bosch <[email protected]>
1 parent 2139417 commit 084b201

File tree

4 files changed

+292
-31
lines changed

4 files changed

+292
-31
lines changed

CHANGELOG.md

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
## 1.4.0
2+
3+
* Fix `LineScanner`'s handling of `\r\n`'s to preventing errors scanning
4+
zero-length matches when between CR and LF. CR is treated as a new line only
5+
if not immediately followed by a LF.
6+
* Fix `LineScanner`'s updating of `column` when setting `position` if the
7+
current position is not `0`.
8+
19
## 1.3.0
210

311
* Require Dart 3.1.0

lib/src/line_scanner.dart

+56-14
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ import 'utils.dart';
88

99
// Note that much of this code is duplicated in eager_span_scanner.dart.
1010

11-
/// A regular expression matching newlines across platforms.
12-
final _newlineRegExp = RegExp(r'\r\n?|\n');
11+
/// A regular expression matching newlines. A newline is either a `\n`, a `\r\n`
12+
/// or a `\r` that is not immediately followed by a `\n`.
13+
final _newlineRegExp = RegExp(r'\n|\r\n|\r(?!\n)');
1314

1415
/// A subclass of [StringScanner] that tracks line and column information.
1516
class LineScanner extends StringScanner {
@@ -48,27 +49,57 @@ class LineScanner extends StringScanner {
4849

4950
@override
5051
set position(int newPosition) {
52+
if (newPosition == position) {
53+
return;
54+
}
55+
5156
final oldPosition = position;
5257
super.position = newPosition;
5358

54-
if (newPosition > oldPosition) {
55-
final newlines = _newlinesIn(string.substring(oldPosition, newPosition));
59+
if (newPosition == 0) {
60+
_line = 0;
61+
_column = 0;
62+
} else if (newPosition > oldPosition) {
63+
final newlines = _newlinesIn(string.substring(oldPosition, newPosition),
64+
endPosition: newPosition);
5665
_line += newlines.length;
5766
if (newlines.isEmpty) {
5867
_column += newPosition - oldPosition;
5968
} else {
60-
_column = newPosition - newlines.last.end;
69+
// The regex got a substring, so we need to account for where it started
70+
// in the string.
71+
final offsetOfLastNewline = oldPosition + newlines.last.end;
72+
_column = newPosition - offsetOfLastNewline;
6173
}
62-
} else {
63-
final newlines = _newlinesIn(string.substring(newPosition, oldPosition));
64-
if (_betweenCRLF) newlines.removeLast();
74+
} else if (newPosition < oldPosition) {
75+
final newlines = _newlinesIn(string.substring(newPosition, oldPosition),
76+
endPosition: oldPosition);
6577

6678
_line -= newlines.length;
6779
if (newlines.isEmpty) {
6880
_column -= oldPosition - newPosition;
6981
} else {
70-
_column =
71-
newPosition - string.lastIndexOf(_newlineRegExp, newPosition) - 1;
82+
// To compute the new column, we need to locate the last newline before
83+
// the new position. When searching, we must exclude the CR if we're
84+
// between a CRLF because it's not considered a newline.
85+
final crOffset = _betweenCRLF ? -1 : 0;
86+
// Additionally, if we use newPosition as the end of the search and the
87+
// character at that position itself (the next character) is a newline
88+
// we should not use it, so also offset to account for that.
89+
const currentCharOffset = -1;
90+
final lastNewline = string.lastIndexOf(
91+
_newlineRegExp, newPosition + currentCharOffset + crOffset);
92+
93+
// Now we need to know the offset after the newline. This is the index
94+
// above plus the length of the newline (eg. if we found `\r\n`) we need
95+
// to add two. However if no newline was found, that index is 0.
96+
final offsetAfterLastNewline = lastNewline == -1
97+
? 0
98+
: string[lastNewline] == '\r' && string[lastNewline + 1] == '\n'
99+
? lastNewline + 2
100+
: lastNewline + 1;
101+
102+
_column = newPosition - offsetAfterLastNewline;
72103
}
73104
}
74105
}
@@ -103,7 +134,7 @@ class LineScanner extends StringScanner {
103134
bool scan(Pattern pattern) {
104135
if (!super.scan(pattern)) return false;
105136

106-
final newlines = _newlinesIn(lastMatch![0]!);
137+
final newlines = _newlinesIn(lastMatch![0]!, endPosition: position);
107138
_line += newlines.length;
108139
if (newlines.isEmpty) {
109140
_column += lastMatch![0]!.length;
@@ -115,10 +146,21 @@ class LineScanner extends StringScanner {
115146
}
116147

117148
/// Returns a list of [Match]es describing all the newlines in [text], which
118-
/// is assumed to end at [position].
119-
List<Match> _newlinesIn(String text) {
149+
/// ends at [endPosition].
150+
///
151+
/// If [text] ends with `\r`, it will only be treated as a newline if the next
152+
/// character at [position] is not a `\n`.
153+
List<Match> _newlinesIn(String text, {required int endPosition}) {
120154
final newlines = _newlineRegExp.allMatches(text).toList();
121-
if (_betweenCRLF) newlines.removeLast();
155+
// If the last character is a `\r` it will have been treated as a newline,
156+
// but this is only valid if the next character is not a `\n`.
157+
if (endPosition < string.length &&
158+
text.endsWith('\r') &&
159+
string[endPosition] == '\n') {
160+
// newlines should never be empty here, because if `text` ends with `\r`
161+
// it would have matched `\r(?!\n)` in the newline regex.
162+
newlines.removeLast();
163+
}
122164
return newlines;
123165
}
124166
}

pubspec.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: string_scanner
2-
version: 1.3.0
2+
version: 1.4.0
33
description: A class for parsing strings using a sequence of patterns.
44
repository: https://github.com/dart-lang/string_scanner
55

0 commit comments

Comments
 (0)