diff --git a/src/Compilers/CSharp/Portable/Parser/Lexer.cs b/src/Compilers/CSharp/Portable/Parser/Lexer.cs
index 6e523cb7c530d..404ce025ae053 100644
--- a/src/Compilers/CSharp/Portable/Parser/Lexer.cs
+++ b/src/Compilers/CSharp/Portable/Parser/Lexer.cs
@@ -8,10 +8,10 @@
using System.Globalization;
using System.Linq;
using System.Text;
+using Microsoft.CodeAnalysis.PooledObjects;
using Microsoft.CodeAnalysis.Syntax.InternalSyntax;
using Microsoft.CodeAnalysis.Text;
using Roslyn.Utilities;
-using Microsoft.CodeAnalysis.PooledObjects;
namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax
{
@@ -434,8 +434,8 @@ private void ScanSyntaxToken(ref TokenInfo info)
info.Kind = SyntaxKind.None;
info.ContextualKind = SyntaxKind.None;
info.Text = null;
+
char character;
- char surrogateCharacter = SlidingTextWindow.InvalidCharacter;
bool isEscaped = false;
int startingPosition = TextWindow.Position;
@@ -863,19 +863,16 @@ private void ScanSyntaxToken(ref TokenInfo info)
break;
case '\\':
+ // Could be unicode escape. Try that.
+ isEscaped = true;
+ character = PeekCharOrUnicodeEscape(out _);
+ if (SyntaxFacts.IsIdentifierStartCharacter(character))
{
- // Could be unicode escape. Try that.
- character = TextWindow.PeekCharOrUnicodeEscape(out surrogateCharacter);
-
- isEscaped = true;
- if (SyntaxFacts.IsIdentifierStartCharacter(character))
- {
- goto case 'a';
- }
-
- goto default;
+ goto case 'a';
}
+ goto default;
+
case SlidingTextWindow.InvalidCharacter:
if (!TextWindow.IsReallyAtEnd())
{
@@ -903,29 +900,42 @@ private void ScanSyntaxToken(ref TokenInfo info)
if (isEscaped)
{
- SyntaxDiagnosticInfo? error;
- TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error);
+ NextCharOrUnicodeEscape(out _, out var error);
AddError(error);
}
else
{
TextWindow.AdvanceChar();
+
+ // If we ran into the start of a surrogate pair, then see if we have the whole pair. If so,
+ // skip the pair in its entirety so we can produce a good error message that treats the two as a
+ // single entity to tell the user about.
+ if (char.IsHighSurrogate(character) && char.IsLowSurrogate(TextWindow.PeekChar()))
+ TextWindow.AdvanceChar();
}
- if (_badTokenCount++ > 200)
+ // If we get too many characters that we cannot make sense of, treat the entire rest of the file as
+ // a single invalid character, so we can bail out of parsing early without producing an unbounded
+ // number of errors.
+ if (_badTokenCount++ <= 200)
{
- // If we get too many characters that we cannot make sense of, absorb the rest of the input.
- int end = TextWindow.Text.Length;
- int width = end - startingPosition;
- info.Text = TextWindow.Text.ToString(new TextSpan(startingPosition, width));
- TextWindow.Reset(end);
+ info.Text = TextWindow.GetText(intern: true);
}
else
{
- info.Text = TextWindow.GetText(intern: true);
+ int end = TextWindow.Text.Length;
+ info.Text = TextWindow.Text.ToString(TextSpan.FromBounds(startingPosition, end));
+ TextWindow.Reset(end);
}
- this.AddError(ErrorCode.ERR_UnexpectedCharacter, info.Text);
+ // if the original text wasn't already escaped, then escape it in the error message so that it's
+ // clear what the issue is. i.e. if the users source had the literal six characters in order like
+ // so: `\` `u` `D` `C` `E` `7`, then there's no need to escape that again when reporting the error.
+ // However, if the user's code has the actual System.Char \uDCE7 char in it, then we want to print
+ // that out in escaped form so they have an actual clue about what the character value is that we
+ // have a problem with.
+ var messageText = isEscaped ? info.Text : ObjectDisplay.FormatLiteral(info.Text, ObjectDisplayOptions.EscapeNonPrintableCharacters);
+ this.AddError(ErrorCode.ERR_UnexpectedCharacter, messageText);
break;
}
}
@@ -1714,12 +1724,12 @@ private bool ScanIdentifier_SlowPath(ref TokenInfo info)
switch (ch)
{
case '\\':
- if (!isEscaped && TextWindow.IsUnicodeEscape())
+ if (!isEscaped && IsUnicodeEscape())
{
// ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape)
info.HasIdentifierEscapeSequence = true;
isEscaped = true;
- ch = TextWindow.PeekUnicodeEscape(out surrogateCharacter);
+ ch = PeekUnicodeEscape(out surrogateCharacter);
goto top;
}
@@ -1870,7 +1880,7 @@ private bool ScanIdentifier_SlowPath(ref TokenInfo info)
if (isEscaped)
{
SyntaxDiagnosticInfo? error;
- TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error);
+ NextCharOrUnicodeEscape(out surrogateCharacter, out error);
AddError(error);
}
else
@@ -1894,7 +1904,7 @@ private bool ScanIdentifier_SlowPath(ref TokenInfo info)
if (isEscaped)
{
SyntaxDiagnosticInfo? error;
- TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error);
+ NextCharOrUnicodeEscape(out surrogateCharacter, out error);
AddError(error);
}
else
@@ -1992,7 +2002,7 @@ private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info)
if (TextWindow.PeekChar() == '&')
{
- if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate))
+ if (!TryScanXmlEntity(out consumedChar, out consumedSurrogate))
{
// If it's not a valid entity, then it's not part of the identifier.
TextWindow.Reset(beforeConsumed);
@@ -2032,7 +2042,7 @@ private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info)
// ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape)
isEscaped = true;
SyntaxDiagnosticInfo? error;
- consumedChar = TextWindow.NextUnicodeEscape(out consumedSurrogate, out error);
+ consumedChar = NextUnicodeEscape(out consumedSurrogate, out error);
AddCrefError(error);
goto top;
}
@@ -2988,7 +2998,7 @@ private bool ScanDirectiveToken(ref TokenInfo info)
case '\\':
{
// Could be unicode escape. Try that.
- character = TextWindow.PeekCharOrUnicodeEscape(out surrogateCharacter);
+ character = PeekCharOrUnicodeEscape(out surrogateCharacter);
isEscaped = true;
if (SyntaxFacts.IsIdentifierStartCharacter(character))
{
@@ -3015,7 +3025,7 @@ private bool ScanDirectiveToken(ref TokenInfo info)
if (isEscaped)
{
SyntaxDiagnosticInfo? error;
- TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error);
+ NextCharOrUnicodeEscape(out surrogateCharacter, out error);
AddError(error);
}
else
@@ -3988,7 +3998,7 @@ private bool ScanXmlCrefToken(ref TokenInfo info)
case '&':
TextWindow.Reset(beforeConsumed);
- if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate))
+ if (!TryScanXmlEntity(out consumedChar, out consumedSurrogate))
{
TextWindow.Reset(beforeConsumed);
this.ScanXmlEntity(ref info);
@@ -4243,7 +4253,7 @@ private bool AdvanceIfMatches(char ch)
char nextChar;
char nextSurrogate;
- if (TextWindow.TryScanXmlEntity(out nextChar, out nextSurrogate)
+ if (TryScanXmlEntity(out nextChar, out nextSurrogate)
&& nextChar == ch && nextSurrogate == SlidingTextWindow.InvalidCharacter)
{
return true;
@@ -4896,5 +4906,299 @@ private void LexXmlWhitespaceAndNewLineTrivia(ref SyntaxListBuilder? trivia)
}
}
}
+
+ private bool IsUnicodeEscape()
+ {
+ if (TextWindow.PeekChar() == '\\')
+ {
+ var ch2 = TextWindow.PeekChar(1);
+ if (ch2 == 'U' || ch2 == 'u')
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private char PeekCharOrUnicodeEscape(out char surrogateCharacter)
+ {
+ if (IsUnicodeEscape())
+ {
+ return PeekUnicodeEscape(out surrogateCharacter);
+ }
+ else
+ {
+ surrogateCharacter = SlidingTextWindow.InvalidCharacter;
+ return TextWindow.PeekChar();
+ }
+ }
+
+ private char PeekUnicodeEscape(out char surrogateCharacter)
+ {
+ int position = TextWindow.Position;
+
+ // if we're peeking, then we don't want to change the position
+ SyntaxDiagnosticInfo? info;
+ var ch = ScanUnicodeEscape(peek: true, surrogateCharacter: out surrogateCharacter, info: out info);
+ Debug.Assert(info == null, "Never produce a diagnostic while peeking.");
+ TextWindow.Reset(position);
+ return ch;
+ }
+
+ private char NextCharOrUnicodeEscape(out char surrogateCharacter, out SyntaxDiagnosticInfo? info)
+ {
+ var ch = TextWindow.PeekChar();
+ Debug.Assert(ch != SlidingTextWindow.InvalidCharacter, "Precondition established by all callers; required for correctness of AdvanceChar() call.");
+ if (ch == '\\')
+ {
+ var ch2 = TextWindow.PeekChar(1);
+ if (ch2 == 'U' || ch2 == 'u')
+ {
+ return ScanUnicodeEscape(peek: false, surrogateCharacter: out surrogateCharacter, info: out info);
+ }
+ }
+
+ surrogateCharacter = SlidingTextWindow.InvalidCharacter;
+ info = null;
+ TextWindow.AdvanceChar();
+ return ch;
+ }
+
+ private char NextUnicodeEscape(out char surrogateCharacter, out SyntaxDiagnosticInfo? info)
+ {
+ return ScanUnicodeEscape(peek: false, surrogateCharacter: out surrogateCharacter, info: out info);
+ }
+
+ private char ScanUnicodeEscape(bool peek, out char surrogateCharacter, out SyntaxDiagnosticInfo? info)
+ {
+ surrogateCharacter = SlidingTextWindow.InvalidCharacter;
+ info = null;
+
+ int start = TextWindow.Position;
+ char character = TextWindow.PeekChar();
+ Debug.Assert(character == '\\');
+ TextWindow.AdvanceChar();
+
+ character = TextWindow.PeekChar();
+ if (character == 'U')
+ {
+ uint uintChar = 0;
+
+ TextWindow.AdvanceChar();
+ if (!SyntaxFacts.IsHexDigit(TextWindow.PeekChar()))
+ {
+ if (!peek)
+ {
+ info = CreateIllegalEscapeDiagnostic(start);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < 8; i++)
+ {
+ character = TextWindow.PeekChar();
+ if (!SyntaxFacts.IsHexDigit(character))
+ {
+ if (!peek)
+ {
+ info = CreateIllegalEscapeDiagnostic(start);
+ }
+
+ break;
+ }
+
+ uintChar = (uint)((uintChar << 4) + SyntaxFacts.HexValue(character));
+ TextWindow.AdvanceChar();
+ }
+
+ if (uintChar > 0x0010FFFF)
+ {
+ if (!peek)
+ {
+ info = CreateIllegalEscapeDiagnostic(start);
+ }
+ }
+ else
+ {
+ character = GetCharsFromUtf32(uintChar, out surrogateCharacter);
+ }
+ }
+ }
+ else
+ {
+ Debug.Assert(character == 'u' || character == 'x');
+
+ int intChar = 0;
+ TextWindow.AdvanceChar();
+ if (!SyntaxFacts.IsHexDigit(TextWindow.PeekChar()))
+ {
+ if (!peek)
+ {
+ info = CreateIllegalEscapeDiagnostic(start);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < 4; i++)
+ {
+ char ch2 = TextWindow.PeekChar();
+ if (!SyntaxFacts.IsHexDigit(ch2))
+ {
+ if (character == 'u')
+ {
+ if (!peek)
+ {
+ info = CreateIllegalEscapeDiagnostic(start);
+ }
+ }
+
+ break;
+ }
+
+ intChar = (intChar << 4) + SyntaxFacts.HexValue(ch2);
+ TextWindow.AdvanceChar();
+ }
+
+ character = (char)intChar;
+ }
+ }
+
+ return character;
+ }
+
+ ///
+ /// Given that the next character is an ampersand ('&'), attempt to interpret the
+ /// following characters as an XML entity. On success, populate the out parameters
+ /// with the low and high UTF-16 surrogates for the character represented by the
+ /// entity.
+ ///
+ /// e.g. '<' for <.
+ /// e.g. '\uDC00' for 𐀀 (ch == '\uD800').
+ /// True if a valid XML entity was consumed.
+ ///
+ /// NOTE: Always advances, even on failure.
+ ///
+ public bool TryScanXmlEntity(out char ch, out char surrogate)
+ {
+ Debug.Assert(TextWindow.PeekChar() == '&');
+
+ ch = '&';
+ TextWindow.AdvanceChar();
+
+ surrogate = SlidingTextWindow.InvalidCharacter;
+
+ switch (TextWindow.PeekChar())
+ {
+ case 'l':
+ if (TextWindow.AdvanceIfMatches("lt;"))
+ {
+ ch = '<';
+ return true;
+ }
+ break;
+ case 'g':
+ if (TextWindow.AdvanceIfMatches("gt;"))
+ {
+ ch = '>';
+ return true;
+ }
+ break;
+ case 'a':
+ if (TextWindow.AdvanceIfMatches("amp;"))
+ {
+ ch = '&';
+ return true;
+ }
+ else if (TextWindow.AdvanceIfMatches("apos;"))
+ {
+ ch = '\'';
+ return true;
+ }
+ break;
+ case 'q':
+ if (TextWindow.AdvanceIfMatches("quot;"))
+ {
+ ch = '"';
+ return true;
+ }
+ break;
+ case '#':
+ {
+ TextWindow.AdvanceChar(); //#
+
+ uint uintChar = 0;
+
+ if (TextWindow.AdvanceIfMatches("x"))
+ {
+ char digit;
+ while (SyntaxFacts.IsHexDigit(digit = TextWindow.PeekChar()))
+ {
+ TextWindow.AdvanceChar();
+
+ // disallow overflow
+ if (uintChar <= 0x7FFFFFF)
+ {
+ uintChar = (uintChar << 4) + (uint)SyntaxFacts.HexValue(digit);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ else
+ {
+ char digit;
+ while (SyntaxFacts.IsDecDigit(digit = TextWindow.PeekChar()))
+ {
+ TextWindow.AdvanceChar();
+
+ // disallow overflow
+ if (uintChar <= 0x7FFFFFF)
+ {
+ uintChar = (uintChar << 3) + (uintChar << 1) + (uint)SyntaxFacts.DecValue(digit);
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+
+ if (TextWindow.AdvanceIfMatches(";"))
+ {
+ ch = GetCharsFromUtf32(uintChar, out surrogate);
+ return true;
+ }
+
+ break;
+ }
+ }
+
+ return false;
+ }
+
+ private SyntaxDiagnosticInfo CreateIllegalEscapeDiagnostic(int start)
+ {
+ return new SyntaxDiagnosticInfo(start - TextWindow.LexemeStartPosition,
+ TextWindow.Position - start,
+ ErrorCode.ERR_IllegalEscape);
+ }
+
+ internal static char GetCharsFromUtf32(uint codepoint, out char lowSurrogate)
+ {
+ if (codepoint < (uint)0x00010000)
+ {
+ lowSurrogate = SlidingTextWindow.InvalidCharacter;
+ return (char)codepoint;
+ }
+ else
+ {
+ Debug.Assert(codepoint > 0x0000FFFF && codepoint <= 0x0010FFFF);
+ lowSurrogate = (char)((codepoint - 0x00010000) % 0x0400 + 0xDC00);
+ return (char)((codepoint - 0x00010000) / 0x0400 + 0xD800);
+ }
+ }
}
}
diff --git a/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs b/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs
index d625d7ed22bda..27f6b6efcce50 100644
--- a/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs
+++ b/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs
@@ -171,7 +171,7 @@ private char ScanEscapeSequence(out char surrogateCharacter)
case 'U':
TextWindow.Reset(start);
SyntaxDiagnosticInfo? error;
- ch = TextWindow.NextUnicodeEscape(surrogateCharacter: out surrogateCharacter, info: out error);
+ ch = NextUnicodeEscape(surrogateCharacter: out surrogateCharacter, info: out error);
AddError(error);
break;
default:
diff --git a/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs b/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs
index 52b56ffe1be34..25e050f990e3a 100644
--- a/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs
+++ b/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs
@@ -361,283 +361,11 @@ public char PeekChar(int delta)
return ch;
}
- public bool IsUnicodeEscape()
- {
- if (this.PeekChar() == '\\')
- {
- var ch2 = this.PeekChar(1);
- if (ch2 == 'U' || ch2 == 'u')
- {
- return true;
- }
- }
-
- return false;
- }
-
- public char PeekCharOrUnicodeEscape(out char surrogateCharacter)
- {
- if (this.IsUnicodeEscape())
- {
- return this.PeekUnicodeEscape(out surrogateCharacter);
- }
- else
- {
- surrogateCharacter = InvalidCharacter;
- return this.PeekChar();
- }
- }
-
- public char PeekUnicodeEscape(out char surrogateCharacter)
- {
- int position = this.Position;
-
- // if we're peeking, then we don't want to change the position
- SyntaxDiagnosticInfo? info;
- var ch = this.ScanUnicodeEscape(peek: true, surrogateCharacter: out surrogateCharacter, info: out info);
- Debug.Assert(info == null, "Never produce a diagnostic while peeking.");
- this.Reset(position);
- return ch;
- }
-
- public char NextCharOrUnicodeEscape(out char surrogateCharacter, out SyntaxDiagnosticInfo? info)
- {
- var ch = this.PeekChar();
- Debug.Assert(ch != InvalidCharacter, "Precondition established by all callers; required for correctness of AdvanceChar() call.");
- if (ch == '\\')
- {
- var ch2 = this.PeekChar(1);
- if (ch2 == 'U' || ch2 == 'u')
- {
- return this.ScanUnicodeEscape(peek: false, surrogateCharacter: out surrogateCharacter, info: out info);
- }
- }
-
- surrogateCharacter = InvalidCharacter;
- info = null;
- this.AdvanceChar();
- return ch;
- }
-
- public char NextUnicodeEscape(out char surrogateCharacter, out SyntaxDiagnosticInfo? info)
- {
- return ScanUnicodeEscape(peek: false, surrogateCharacter: out surrogateCharacter, info: out info);
- }
-
- private char ScanUnicodeEscape(bool peek, out char surrogateCharacter, out SyntaxDiagnosticInfo? info)
- {
- surrogateCharacter = InvalidCharacter;
- info = null;
-
- int start = this.Position;
- char character = this.PeekChar();
- Debug.Assert(character == '\\');
- this.AdvanceChar();
-
- character = this.PeekChar();
- if (character == 'U')
- {
- uint uintChar = 0;
-
- this.AdvanceChar();
- if (!SyntaxFacts.IsHexDigit(this.PeekChar()))
- {
- if (!peek)
- {
- info = CreateIllegalEscapeDiagnostic(start);
- }
- }
- else
- {
- for (int i = 0; i < 8; i++)
- {
- character = this.PeekChar();
- if (!SyntaxFacts.IsHexDigit(character))
- {
- if (!peek)
- {
- info = CreateIllegalEscapeDiagnostic(start);
- }
-
- break;
- }
-
- uintChar = (uint)((uintChar << 4) + SyntaxFacts.HexValue(character));
- this.AdvanceChar();
- }
-
- if (uintChar > 0x0010FFFF)
- {
- if (!peek)
- {
- info = CreateIllegalEscapeDiagnostic(start);
- }
- }
- else
- {
- character = GetCharsFromUtf32(uintChar, out surrogateCharacter);
- }
- }
- }
- else
- {
- Debug.Assert(character == 'u' || character == 'x');
-
- int intChar = 0;
- this.AdvanceChar();
- if (!SyntaxFacts.IsHexDigit(this.PeekChar()))
- {
- if (!peek)
- {
- info = CreateIllegalEscapeDiagnostic(start);
- }
- }
- else
- {
- for (int i = 0; i < 4; i++)
- {
- char ch2 = this.PeekChar();
- if (!SyntaxFacts.IsHexDigit(ch2))
- {
- if (character == 'u')
- {
- if (!peek)
- {
- info = CreateIllegalEscapeDiagnostic(start);
- }
- }
-
- break;
- }
-
- intChar = (intChar << 4) + SyntaxFacts.HexValue(ch2);
- this.AdvanceChar();
- }
-
- character = (char)intChar;
- }
- }
-
- return character;
- }
-
- ///
- /// Given that the next character is an ampersand ('&'), attempt to interpret the
- /// following characters as an XML entity. On success, populate the out parameters
- /// with the low and high UTF-16 surrogates for the character represented by the
- /// entity.
- ///
- /// e.g. '<' for <.
- /// e.g. '\uDC00' for 𐀀 (ch == '\uD800').
- /// True if a valid XML entity was consumed.
- ///
- /// NOTE: Always advances, even on failure.
- ///
- public bool TryScanXmlEntity(out char ch, out char surrogate)
- {
- Debug.Assert(this.PeekChar() == '&');
-
- ch = '&';
- this.AdvanceChar();
-
- surrogate = InvalidCharacter;
-
- switch (this.PeekChar())
- {
- case 'l':
- if (AdvanceIfMatches("lt;"))
- {
- ch = '<';
- return true;
- }
- break;
- case 'g':
- if (AdvanceIfMatches("gt;"))
- {
- ch = '>';
- return true;
- }
- break;
- case 'a':
- if (AdvanceIfMatches("amp;"))
- {
- ch = '&';
- return true;
- }
- else if (AdvanceIfMatches("apos;"))
- {
- ch = '\'';
- return true;
- }
- break;
- case 'q':
- if (AdvanceIfMatches("quot;"))
- {
- ch = '"';
- return true;
- }
- break;
- case '#':
- {
- this.AdvanceChar(); //#
-
- uint uintChar = 0;
-
- if (AdvanceIfMatches("x"))
- {
- char digit;
- while (SyntaxFacts.IsHexDigit(digit = this.PeekChar()))
- {
- this.AdvanceChar();
-
- // disallow overflow
- if (uintChar <= 0x7FFFFFF)
- {
- uintChar = (uintChar << 4) + (uint)SyntaxFacts.HexValue(digit);
- }
- else
- {
- return false;
- }
- }
- }
- else
- {
- char digit;
- while (SyntaxFacts.IsDecDigit(digit = this.PeekChar()))
- {
- this.AdvanceChar();
-
- // disallow overflow
- if (uintChar <= 0x7FFFFFF)
- {
- uintChar = (uintChar << 3) + (uintChar << 1) + (uint)SyntaxFacts.DecValue(digit);
- }
- else
- {
- return false;
- }
- }
- }
-
- if (AdvanceIfMatches(";"))
- {
- ch = GetCharsFromUtf32(uintChar, out surrogate);
- return true;
- }
-
- break;
- }
- }
-
- return false;
- }
-
///
/// If the next characters in the window match the given string,
/// then advance past those characters. Otherwise, do nothing.
///
- private bool AdvanceIfMatches(string desired)
+ internal bool AdvanceIfMatches(string desired)
{
int length = desired.Length;
@@ -653,13 +381,6 @@ private bool AdvanceIfMatches(string desired)
return true;
}
- private SyntaxDiagnosticInfo CreateIllegalEscapeDiagnostic(int start)
- {
- return new SyntaxDiagnosticInfo(start - this.LexemeStartPosition,
- this.Position - start,
- ErrorCode.ERR_IllegalEscape);
- }
-
public string Intern(StringBuilder text)
{
return _strings.Add(text);
diff --git a/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalErrorTests.cs b/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalErrorTests.cs
index 5dbdc3833a388..463aedf413c01 100644
--- a/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalErrorTests.cs
+++ b/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalErrorTests.cs
@@ -4,8 +4,11 @@
#nullable disable
+using System.Collections.Generic;
using Microsoft.CodeAnalysis.CSharp.Test.Utilities;
+using Microsoft.CodeAnalysis.Test.Utilities;
using Roslyn.Test.Utilities;
+using Roslyn.Utilities;
using Xunit;
namespace Microsoft.CodeAnalysis.CSharp.UnitTests
@@ -389,6 +392,154 @@ class A
Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"\u0060"));
}
+ [Fact]
+ public void CS1056ERR_UnexpectedCharacter_UnpairedSurrogate1()
+ {
+ var test = $$"""
+ using System;
+ class Test
+ {
+ public static void Main()
+ {
+ int {{'\ud86d'}} = 1;
+ }
+ }
+ """;
+
+ ParsingTests.ParseAndValidate(test,
+ // (6,13): error CS1001: Identifier expected
+ // int � = 1;
+ Diagnostic(ErrorCode.ERR_IdentifierExpected, "\ud86d").WithLocation(6, 13),
+ // (6,13): error CS1056: Unexpected character '\ud86d'
+ // int � = 1;
+ Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"\ud86d").WithLocation(6, 13),
+ // (6,15): error CS1002: ; expected
+ // int � = 1;
+ Diagnostic(ErrorCode.ERR_SemicolonExpected, "=").WithLocation(6, 15),
+ // (6,15): error CS1525: Invalid expression term '='
+ // int � = 1;
+ Diagnostic(ErrorCode.ERR_InvalidExprTerm, "=").WithArguments("=").WithLocation(6, 15));
+ }
+
+ [Fact]
+ public void CS1056ERR_UnexpectedCharacter_UnpairedSurrogate2()
+ {
+ var test = $$"""
+ using System;
+ class Test
+ {
+ public static void Main()
+ {
+ int {{'\udce7'}} = 1;
+ }
+ }
+ """;
+
+ ParsingTests.ParseAndValidate(test,
+ // (6,13): error CS1001: Identifier expected
+ // int � = 1;
+ Diagnostic(ErrorCode.ERR_IdentifierExpected, "\udce7").WithLocation(6, 13),
+ // (6,13): error CS1056: Unexpected character '\udce7'
+ // int � = 1;
+ Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"\udce7").WithLocation(6, 13),
+ // (6,15): error CS1002: ; expected
+ // int � = 1;
+ Diagnostic(ErrorCode.ERR_SemicolonExpected, "=").WithLocation(6, 15),
+ // (6,15): error CS1525: Invalid expression term '='
+ // int � = 1;
+ Diagnostic(ErrorCode.ERR_InvalidExprTerm, "=").WithArguments("=").WithLocation(6, 15));
+ }
+
+ [Fact]
+ public void CS1056ERR_UnexpectedCharacter_Surrogate()
+ {
+ var test = """
+ using System;
+ class Test
+ {
+ public static void Main()
+ {
+ int 𫓧龦 = 1;
+ }
+ }
+ """;
+
+ ParsingTests.ParseAndValidate(test,
+ // (6,13): error CS1001: Identifier expected
+ // int 𫓧龦 = 1;
+ Diagnostic(ErrorCode.ERR_IdentifierExpected, "𫓧").WithLocation(6, 13),
+ // (6,13): error CS1056: Unexpected character '𫓧'
+ // int 𫓧龦 = 1;
+ Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments("𫓧").WithLocation(6, 13),
+ // (6,15): error CS1002: ; expected
+ // int 𫓧龦 = 1;
+ Diagnostic(ErrorCode.ERR_SemicolonExpected, "龦").WithLocation(6, 15));
+ }
+
+ [Fact]
+ public void CS1056ERR_UnexpectedCharacter_Surrogate_Long()
+ {
+ // Create a file with 200 slashes in a row. This will cause 200 'expected character' errors, after which
+ // the compiler will give up and make a single error (with a multi-char message) for the remainder of the doc.
+
+ var test = $$"""
+ using System;
+ class Test
+ {
+ public static void Main()
+ {
+ int {{new string('\\', 200)}}𫓧𫓧 = 1;
+ }
+ }
+ """;
+
+ var descriptions = new List
+ {
+ // (6,13): error CS1001: Identifier expected
+ // int \..200 more slashes..\𫓧𫓧 = 1;
+ Diagnostic(ErrorCode.ERR_IdentifierExpected, @"\").WithLocation(6, 13),
+ };
+
+ for (int i = 0; i < 200; i++)
+ {
+ descriptions.Add(
+ // (6,13 + i): error CS1056: Unexpected character '\'
+ // int \..200 more slashes..\𫓧𫓧 = 1;
+ Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments("\\").WithLocation(6, 13 + i));
+ }
+
+ descriptions.Add(
+ // (6,213): error CS1056: Unexpected character '𫓧'
+ // int \..200 more slashes..\𫓧𫓧 = 1;
+ Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"𫓧").WithLocation(6, 213));
+
+ // (6,214): error CS1056: Unexpected character '龦 = 1;\r\n }\r\n}'
+ // int \..200 more slashes..\𫓧𫓧 = 1;
+ if (PathUtilities.IsUnixLikePlatform)
+ {
+ descriptions.Add(Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"𫓧 = 1;\n }\n}").WithLocation(6, 215));
+ }
+ else
+ {
+ descriptions.Add(Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"𫓧 = 1;\r\n }\r\n}").WithLocation(6, 215));
+ }
+
+ descriptions.AddRange(new[]
+ {
+ // (8,2): error CS1002: ; expected
+ // }
+ Diagnostic(ErrorCode.ERR_SemicolonExpected, "").WithLocation(8, 2),
+ // (8,2): error CS1513: } expected
+ // }
+ Diagnostic(ErrorCode.ERR_RbraceExpected, "").WithLocation(8, 2),
+ // (8,2): error CS1513: } expected
+ // }
+ Diagnostic(ErrorCode.ERR_RbraceExpected, "").WithLocation(8, 2),
+ });
+
+ ParsingTests.ParseAndValidate(test, descriptions.ToArray());
+ }
+
[Fact, WorkItem(535937, "http://vstfdevdiv:8080/DevDiv2/DevDiv/_workitems/edit/535937")]
public void CS1646ERR_ExpectedVerbatimLiteral()
{