Skip to content

Commit

Permalink
Avoid treating /r as newline on it (#81)
Browse files Browse the repository at this point in the history
Fixes https://github.com/dart-lang/string_scanner/issues/80

This fixes an error when scanning a zero-length match when between a CR and LF. 


* Fix typo

* comment nits

* Fix some bugs when setting position if the current position is between \r\n

---------

Co-authored-by: Nate Bosch <nbosch1@gmail.com>
  • Loading branch information
DanTup and natebosch authored Oct 9, 2024
1 parent 2139417 commit 084b201
Show file tree
Hide file tree
Showing 4 changed files with 292 additions and 31 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## 1.4.0

* Fix `LineScanner`'s handling of `\r\n`'s to preventing errors scanning
zero-length matches when between CR and LF. CR is treated as a new line only
if not immediately followed by a LF.
* Fix `LineScanner`'s updating of `column` when setting `position` if the
current position is not `0`.

## 1.3.0

* Require Dart 3.1.0
Expand Down
70 changes: 56 additions & 14 deletions lib/src/line_scanner.dart
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ import 'utils.dart';

// Note that much of this code is duplicated in eager_span_scanner.dart.

/// A regular expression matching newlines across platforms.
final _newlineRegExp = RegExp(r'\r\n?|\n');
/// A regular expression matching newlines. A newline is either a `\n`, a `\r\n`
/// or a `\r` that is not immediately followed by a `\n`.
final _newlineRegExp = RegExp(r'\n|\r\n|\r(?!\n)');

/// A subclass of [StringScanner] that tracks line and column information.
class LineScanner extends StringScanner {
Expand Down Expand Up @@ -48,27 +49,57 @@ class LineScanner extends StringScanner {

@override
set position(int newPosition) {
if (newPosition == position) {
return;
}

final oldPosition = position;
super.position = newPosition;

if (newPosition > oldPosition) {
final newlines = _newlinesIn(string.substring(oldPosition, newPosition));
if (newPosition == 0) {
_line = 0;
_column = 0;
} else if (newPosition > oldPosition) {
final newlines = _newlinesIn(string.substring(oldPosition, newPosition),
endPosition: newPosition);
_line += newlines.length;
if (newlines.isEmpty) {
_column += newPosition - oldPosition;
} else {
_column = newPosition - newlines.last.end;
// The regex got a substring, so we need to account for where it started
// in the string.
final offsetOfLastNewline = oldPosition + newlines.last.end;
_column = newPosition - offsetOfLastNewline;
}
} else {
final newlines = _newlinesIn(string.substring(newPosition, oldPosition));
if (_betweenCRLF) newlines.removeLast();
} else if (newPosition < oldPosition) {
final newlines = _newlinesIn(string.substring(newPosition, oldPosition),
endPosition: oldPosition);

_line -= newlines.length;
if (newlines.isEmpty) {
_column -= oldPosition - newPosition;
} else {
_column =
newPosition - string.lastIndexOf(_newlineRegExp, newPosition) - 1;
// To compute the new column, we need to locate the last newline before
// the new position. When searching, we must exclude the CR if we're
// between a CRLF because it's not considered a newline.
final crOffset = _betweenCRLF ? -1 : 0;
// Additionally, if we use newPosition as the end of the search and the
// character at that position itself (the next character) is a newline
// we should not use it, so also offset to account for that.
const currentCharOffset = -1;
final lastNewline = string.lastIndexOf(
_newlineRegExp, newPosition + currentCharOffset + crOffset);

// Now we need to know the offset after the newline. This is the index
// above plus the length of the newline (eg. if we found `\r\n`) we need
// to add two. However if no newline was found, that index is 0.
final offsetAfterLastNewline = lastNewline == -1
? 0
: string[lastNewline] == '\r' && string[lastNewline + 1] == '\n'
? lastNewline + 2
: lastNewline + 1;

_column = newPosition - offsetAfterLastNewline;
}
}
}
Expand Down Expand Up @@ -103,7 +134,7 @@ class LineScanner extends StringScanner {
bool scan(Pattern pattern) {
if (!super.scan(pattern)) return false;

final newlines = _newlinesIn(lastMatch![0]!);
final newlines = _newlinesIn(lastMatch![0]!, endPosition: position);
_line += newlines.length;
if (newlines.isEmpty) {
_column += lastMatch![0]!.length;
Expand All @@ -115,10 +146,21 @@ class LineScanner extends StringScanner {
}

/// Returns a list of [Match]es describing all the newlines in [text], which
/// is assumed to end at [position].
List<Match> _newlinesIn(String text) {
/// ends at [endPosition].
///
/// If [text] ends with `\r`, it will only be treated as a newline if the next
/// character at [position] is not a `\n`.
List<Match> _newlinesIn(String text, {required int endPosition}) {
final newlines = _newlineRegExp.allMatches(text).toList();
if (_betweenCRLF) newlines.removeLast();
// If the last character is a `\r` it will have been treated as a newline,
// but this is only valid if the next character is not a `\n`.
if (endPosition < string.length &&
text.endsWith('\r') &&
string[endPosition] == '\n') {
// newlines should never be empty here, because if `text` ends with `\r`
// it would have matched `\r(?!\n)` in the newline regex.
newlines.removeLast();
}
return newlines;
}
}
Expand Down
2 changes: 1 addition & 1 deletion pubspec.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: string_scanner
version: 1.3.0
version: 1.4.0
description: A class for parsing strings using a sequence of patterns.
repository: https://github.com/dart-lang/string_scanner

Expand Down
Loading

0 comments on commit 084b201

Please sign in to comment.