diff --git a/src/Compilers/CSharp/Portable/Parser/Lexer.cs b/src/Compilers/CSharp/Portable/Parser/Lexer.cs index 6e523cb7c530d..404ce025ae053 100644 --- a/src/Compilers/CSharp/Portable/Parser/Lexer.cs +++ b/src/Compilers/CSharp/Portable/Parser/Lexer.cs @@ -8,10 +8,10 @@ using System.Globalization; using System.Linq; using System.Text; +using Microsoft.CodeAnalysis.PooledObjects; using Microsoft.CodeAnalysis.Syntax.InternalSyntax; using Microsoft.CodeAnalysis.Text; using Roslyn.Utilities; -using Microsoft.CodeAnalysis.PooledObjects; namespace Microsoft.CodeAnalysis.CSharp.Syntax.InternalSyntax { @@ -434,8 +434,8 @@ private void ScanSyntaxToken(ref TokenInfo info) info.Kind = SyntaxKind.None; info.ContextualKind = SyntaxKind.None; info.Text = null; + char character; - char surrogateCharacter = SlidingTextWindow.InvalidCharacter; bool isEscaped = false; int startingPosition = TextWindow.Position; @@ -863,19 +863,16 @@ private void ScanSyntaxToken(ref TokenInfo info) break; case '\\': + // Could be unicode escape. Try that. + isEscaped = true; + character = PeekCharOrUnicodeEscape(out _); + if (SyntaxFacts.IsIdentifierStartCharacter(character)) { - // Could be unicode escape. Try that. - character = TextWindow.PeekCharOrUnicodeEscape(out surrogateCharacter); - - isEscaped = true; - if (SyntaxFacts.IsIdentifierStartCharacter(character)) - { - goto case 'a'; - } - - goto default; + goto case 'a'; } + goto default; + case SlidingTextWindow.InvalidCharacter: if (!TextWindow.IsReallyAtEnd()) { @@ -903,29 +900,42 @@ private void ScanSyntaxToken(ref TokenInfo info) if (isEscaped) { - SyntaxDiagnosticInfo? error; - TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error); + NextCharOrUnicodeEscape(out _, out var error); AddError(error); } else { TextWindow.AdvanceChar(); + + // If we ran into the start of a surrogate pair, then see if we have the whole pair. If so, + // skip the pair in its entirety so we can produce a good error message that treats the two as a + // single entity to tell the user about. + if (char.IsHighSurrogate(character) && char.IsLowSurrogate(TextWindow.PeekChar())) + TextWindow.AdvanceChar(); } - if (_badTokenCount++ > 200) + // If we get too many characters that we cannot make sense of, treat the entire rest of the file as + // a single invalid character, so we can bail out of parsing early without producing an unbounded + // number of errors. + if (_badTokenCount++ <= 200) { - // If we get too many characters that we cannot make sense of, absorb the rest of the input. - int end = TextWindow.Text.Length; - int width = end - startingPosition; - info.Text = TextWindow.Text.ToString(new TextSpan(startingPosition, width)); - TextWindow.Reset(end); + info.Text = TextWindow.GetText(intern: true); } else { - info.Text = TextWindow.GetText(intern: true); + int end = TextWindow.Text.Length; + info.Text = TextWindow.Text.ToString(TextSpan.FromBounds(startingPosition, end)); + TextWindow.Reset(end); } - this.AddError(ErrorCode.ERR_UnexpectedCharacter, info.Text); + // if the original text wasn't already escaped, then escape it in the error message so that it's + // clear what the issue is. i.e. if the users source had the literal six characters in order like + // so: `\` `u` `D` `C` `E` `7`, then there's no need to escape that again when reporting the error. + // However, if the user's code has the actual System.Char \uDCE7 char in it, then we want to print + // that out in escaped form so they have an actual clue about what the character value is that we + // have a problem with. + var messageText = isEscaped ? info.Text : ObjectDisplay.FormatLiteral(info.Text, ObjectDisplayOptions.EscapeNonPrintableCharacters); + this.AddError(ErrorCode.ERR_UnexpectedCharacter, messageText); break; } } @@ -1714,12 +1724,12 @@ private bool ScanIdentifier_SlowPath(ref TokenInfo info) switch (ch) { case '\\': - if (!isEscaped && TextWindow.IsUnicodeEscape()) + if (!isEscaped && IsUnicodeEscape()) { // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape) info.HasIdentifierEscapeSequence = true; isEscaped = true; - ch = TextWindow.PeekUnicodeEscape(out surrogateCharacter); + ch = PeekUnicodeEscape(out surrogateCharacter); goto top; } @@ -1870,7 +1880,7 @@ private bool ScanIdentifier_SlowPath(ref TokenInfo info) if (isEscaped) { SyntaxDiagnosticInfo? error; - TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error); + NextCharOrUnicodeEscape(out surrogateCharacter, out error); AddError(error); } else @@ -1894,7 +1904,7 @@ private bool ScanIdentifier_SlowPath(ref TokenInfo info) if (isEscaped) { SyntaxDiagnosticInfo? error; - TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error); + NextCharOrUnicodeEscape(out surrogateCharacter, out error); AddError(error); } else @@ -1992,7 +2002,7 @@ private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info) if (TextWindow.PeekChar() == '&') { - if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate)) + if (!TryScanXmlEntity(out consumedChar, out consumedSurrogate)) { // If it's not a valid entity, then it's not part of the identifier. TextWindow.Reset(beforeConsumed); @@ -2032,7 +2042,7 @@ private bool ScanIdentifier_CrefSlowPath(ref TokenInfo info) // ^^^^^^^ otherwise \u005Cu1234 looks just like \u1234! (i.e. escape within escape) isEscaped = true; SyntaxDiagnosticInfo? error; - consumedChar = TextWindow.NextUnicodeEscape(out consumedSurrogate, out error); + consumedChar = NextUnicodeEscape(out consumedSurrogate, out error); AddCrefError(error); goto top; } @@ -2988,7 +2998,7 @@ private bool ScanDirectiveToken(ref TokenInfo info) case '\\': { // Could be unicode escape. Try that. - character = TextWindow.PeekCharOrUnicodeEscape(out surrogateCharacter); + character = PeekCharOrUnicodeEscape(out surrogateCharacter); isEscaped = true; if (SyntaxFacts.IsIdentifierStartCharacter(character)) { @@ -3015,7 +3025,7 @@ private bool ScanDirectiveToken(ref TokenInfo info) if (isEscaped) { SyntaxDiagnosticInfo? error; - TextWindow.NextCharOrUnicodeEscape(out surrogateCharacter, out error); + NextCharOrUnicodeEscape(out surrogateCharacter, out error); AddError(error); } else @@ -3988,7 +3998,7 @@ private bool ScanXmlCrefToken(ref TokenInfo info) case '&': TextWindow.Reset(beforeConsumed); - if (!TextWindow.TryScanXmlEntity(out consumedChar, out consumedSurrogate)) + if (!TryScanXmlEntity(out consumedChar, out consumedSurrogate)) { TextWindow.Reset(beforeConsumed); this.ScanXmlEntity(ref info); @@ -4243,7 +4253,7 @@ private bool AdvanceIfMatches(char ch) char nextChar; char nextSurrogate; - if (TextWindow.TryScanXmlEntity(out nextChar, out nextSurrogate) + if (TryScanXmlEntity(out nextChar, out nextSurrogate) && nextChar == ch && nextSurrogate == SlidingTextWindow.InvalidCharacter) { return true; @@ -4896,5 +4906,299 @@ private void LexXmlWhitespaceAndNewLineTrivia(ref SyntaxListBuilder? trivia) } } } + + private bool IsUnicodeEscape() + { + if (TextWindow.PeekChar() == '\\') + { + var ch2 = TextWindow.PeekChar(1); + if (ch2 == 'U' || ch2 == 'u') + { + return true; + } + } + + return false; + } + + private char PeekCharOrUnicodeEscape(out char surrogateCharacter) + { + if (IsUnicodeEscape()) + { + return PeekUnicodeEscape(out surrogateCharacter); + } + else + { + surrogateCharacter = SlidingTextWindow.InvalidCharacter; + return TextWindow.PeekChar(); + } + } + + private char PeekUnicodeEscape(out char surrogateCharacter) + { + int position = TextWindow.Position; + + // if we're peeking, then we don't want to change the position + SyntaxDiagnosticInfo? info; + var ch = ScanUnicodeEscape(peek: true, surrogateCharacter: out surrogateCharacter, info: out info); + Debug.Assert(info == null, "Never produce a diagnostic while peeking."); + TextWindow.Reset(position); + return ch; + } + + private char NextCharOrUnicodeEscape(out char surrogateCharacter, out SyntaxDiagnosticInfo? info) + { + var ch = TextWindow.PeekChar(); + Debug.Assert(ch != SlidingTextWindow.InvalidCharacter, "Precondition established by all callers; required for correctness of AdvanceChar() call."); + if (ch == '\\') + { + var ch2 = TextWindow.PeekChar(1); + if (ch2 == 'U' || ch2 == 'u') + { + return ScanUnicodeEscape(peek: false, surrogateCharacter: out surrogateCharacter, info: out info); + } + } + + surrogateCharacter = SlidingTextWindow.InvalidCharacter; + info = null; + TextWindow.AdvanceChar(); + return ch; + } + + private char NextUnicodeEscape(out char surrogateCharacter, out SyntaxDiagnosticInfo? info) + { + return ScanUnicodeEscape(peek: false, surrogateCharacter: out surrogateCharacter, info: out info); + } + + private char ScanUnicodeEscape(bool peek, out char surrogateCharacter, out SyntaxDiagnosticInfo? info) + { + surrogateCharacter = SlidingTextWindow.InvalidCharacter; + info = null; + + int start = TextWindow.Position; + char character = TextWindow.PeekChar(); + Debug.Assert(character == '\\'); + TextWindow.AdvanceChar(); + + character = TextWindow.PeekChar(); + if (character == 'U') + { + uint uintChar = 0; + + TextWindow.AdvanceChar(); + if (!SyntaxFacts.IsHexDigit(TextWindow.PeekChar())) + { + if (!peek) + { + info = CreateIllegalEscapeDiagnostic(start); + } + } + else + { + for (int i = 0; i < 8; i++) + { + character = TextWindow.PeekChar(); + if (!SyntaxFacts.IsHexDigit(character)) + { + if (!peek) + { + info = CreateIllegalEscapeDiagnostic(start); + } + + break; + } + + uintChar = (uint)((uintChar << 4) + SyntaxFacts.HexValue(character)); + TextWindow.AdvanceChar(); + } + + if (uintChar > 0x0010FFFF) + { + if (!peek) + { + info = CreateIllegalEscapeDiagnostic(start); + } + } + else + { + character = GetCharsFromUtf32(uintChar, out surrogateCharacter); + } + } + } + else + { + Debug.Assert(character == 'u' || character == 'x'); + + int intChar = 0; + TextWindow.AdvanceChar(); + if (!SyntaxFacts.IsHexDigit(TextWindow.PeekChar())) + { + if (!peek) + { + info = CreateIllegalEscapeDiagnostic(start); + } + } + else + { + for (int i = 0; i < 4; i++) + { + char ch2 = TextWindow.PeekChar(); + if (!SyntaxFacts.IsHexDigit(ch2)) + { + if (character == 'u') + { + if (!peek) + { + info = CreateIllegalEscapeDiagnostic(start); + } + } + + break; + } + + intChar = (intChar << 4) + SyntaxFacts.HexValue(ch2); + TextWindow.AdvanceChar(); + } + + character = (char)intChar; + } + } + + return character; + } + + /// + /// Given that the next character is an ampersand ('&'), attempt to interpret the + /// following characters as an XML entity. On success, populate the out parameters + /// with the low and high UTF-16 surrogates for the character represented by the + /// entity. + /// + /// e.g. '<' for &lt;. + /// e.g. '\uDC00' for &#x10000; (ch == '\uD800'). + /// True if a valid XML entity was consumed. + /// + /// NOTE: Always advances, even on failure. + /// + public bool TryScanXmlEntity(out char ch, out char surrogate) + { + Debug.Assert(TextWindow.PeekChar() == '&'); + + ch = '&'; + TextWindow.AdvanceChar(); + + surrogate = SlidingTextWindow.InvalidCharacter; + + switch (TextWindow.PeekChar()) + { + case 'l': + if (TextWindow.AdvanceIfMatches("lt;")) + { + ch = '<'; + return true; + } + break; + case 'g': + if (TextWindow.AdvanceIfMatches("gt;")) + { + ch = '>'; + return true; + } + break; + case 'a': + if (TextWindow.AdvanceIfMatches("amp;")) + { + ch = '&'; + return true; + } + else if (TextWindow.AdvanceIfMatches("apos;")) + { + ch = '\''; + return true; + } + break; + case 'q': + if (TextWindow.AdvanceIfMatches("quot;")) + { + ch = '"'; + return true; + } + break; + case '#': + { + TextWindow.AdvanceChar(); //# + + uint uintChar = 0; + + if (TextWindow.AdvanceIfMatches("x")) + { + char digit; + while (SyntaxFacts.IsHexDigit(digit = TextWindow.PeekChar())) + { + TextWindow.AdvanceChar(); + + // disallow overflow + if (uintChar <= 0x7FFFFFF) + { + uintChar = (uintChar << 4) + (uint)SyntaxFacts.HexValue(digit); + } + else + { + return false; + } + } + } + else + { + char digit; + while (SyntaxFacts.IsDecDigit(digit = TextWindow.PeekChar())) + { + TextWindow.AdvanceChar(); + + // disallow overflow + if (uintChar <= 0x7FFFFFF) + { + uintChar = (uintChar << 3) + (uintChar << 1) + (uint)SyntaxFacts.DecValue(digit); + } + else + { + return false; + } + } + } + + if (TextWindow.AdvanceIfMatches(";")) + { + ch = GetCharsFromUtf32(uintChar, out surrogate); + return true; + } + + break; + } + } + + return false; + } + + private SyntaxDiagnosticInfo CreateIllegalEscapeDiagnostic(int start) + { + return new SyntaxDiagnosticInfo(start - TextWindow.LexemeStartPosition, + TextWindow.Position - start, + ErrorCode.ERR_IllegalEscape); + } + + internal static char GetCharsFromUtf32(uint codepoint, out char lowSurrogate) + { + if (codepoint < (uint)0x00010000) + { + lowSurrogate = SlidingTextWindow.InvalidCharacter; + return (char)codepoint; + } + else + { + Debug.Assert(codepoint > 0x0000FFFF && codepoint <= 0x0010FFFF); + lowSurrogate = (char)((codepoint - 0x00010000) % 0x0400 + 0xDC00); + return (char)((codepoint - 0x00010000) / 0x0400 + 0xD800); + } + } } } diff --git a/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs b/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs index d625d7ed22bda..27f6b6efcce50 100644 --- a/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs +++ b/src/Compilers/CSharp/Portable/Parser/Lexer_StringLiteral.cs @@ -171,7 +171,7 @@ private char ScanEscapeSequence(out char surrogateCharacter) case 'U': TextWindow.Reset(start); SyntaxDiagnosticInfo? error; - ch = TextWindow.NextUnicodeEscape(surrogateCharacter: out surrogateCharacter, info: out error); + ch = NextUnicodeEscape(surrogateCharacter: out surrogateCharacter, info: out error); AddError(error); break; default: diff --git a/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs b/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs index 52b56ffe1be34..25e050f990e3a 100644 --- a/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs +++ b/src/Compilers/CSharp/Portable/Parser/SlidingTextWindow.cs @@ -361,283 +361,11 @@ public char PeekChar(int delta) return ch; } - public bool IsUnicodeEscape() - { - if (this.PeekChar() == '\\') - { - var ch2 = this.PeekChar(1); - if (ch2 == 'U' || ch2 == 'u') - { - return true; - } - } - - return false; - } - - public char PeekCharOrUnicodeEscape(out char surrogateCharacter) - { - if (this.IsUnicodeEscape()) - { - return this.PeekUnicodeEscape(out surrogateCharacter); - } - else - { - surrogateCharacter = InvalidCharacter; - return this.PeekChar(); - } - } - - public char PeekUnicodeEscape(out char surrogateCharacter) - { - int position = this.Position; - - // if we're peeking, then we don't want to change the position - SyntaxDiagnosticInfo? info; - var ch = this.ScanUnicodeEscape(peek: true, surrogateCharacter: out surrogateCharacter, info: out info); - Debug.Assert(info == null, "Never produce a diagnostic while peeking."); - this.Reset(position); - return ch; - } - - public char NextCharOrUnicodeEscape(out char surrogateCharacter, out SyntaxDiagnosticInfo? info) - { - var ch = this.PeekChar(); - Debug.Assert(ch != InvalidCharacter, "Precondition established by all callers; required for correctness of AdvanceChar() call."); - if (ch == '\\') - { - var ch2 = this.PeekChar(1); - if (ch2 == 'U' || ch2 == 'u') - { - return this.ScanUnicodeEscape(peek: false, surrogateCharacter: out surrogateCharacter, info: out info); - } - } - - surrogateCharacter = InvalidCharacter; - info = null; - this.AdvanceChar(); - return ch; - } - - public char NextUnicodeEscape(out char surrogateCharacter, out SyntaxDiagnosticInfo? info) - { - return ScanUnicodeEscape(peek: false, surrogateCharacter: out surrogateCharacter, info: out info); - } - - private char ScanUnicodeEscape(bool peek, out char surrogateCharacter, out SyntaxDiagnosticInfo? info) - { - surrogateCharacter = InvalidCharacter; - info = null; - - int start = this.Position; - char character = this.PeekChar(); - Debug.Assert(character == '\\'); - this.AdvanceChar(); - - character = this.PeekChar(); - if (character == 'U') - { - uint uintChar = 0; - - this.AdvanceChar(); - if (!SyntaxFacts.IsHexDigit(this.PeekChar())) - { - if (!peek) - { - info = CreateIllegalEscapeDiagnostic(start); - } - } - else - { - for (int i = 0; i < 8; i++) - { - character = this.PeekChar(); - if (!SyntaxFacts.IsHexDigit(character)) - { - if (!peek) - { - info = CreateIllegalEscapeDiagnostic(start); - } - - break; - } - - uintChar = (uint)((uintChar << 4) + SyntaxFacts.HexValue(character)); - this.AdvanceChar(); - } - - if (uintChar > 0x0010FFFF) - { - if (!peek) - { - info = CreateIllegalEscapeDiagnostic(start); - } - } - else - { - character = GetCharsFromUtf32(uintChar, out surrogateCharacter); - } - } - } - else - { - Debug.Assert(character == 'u' || character == 'x'); - - int intChar = 0; - this.AdvanceChar(); - if (!SyntaxFacts.IsHexDigit(this.PeekChar())) - { - if (!peek) - { - info = CreateIllegalEscapeDiagnostic(start); - } - } - else - { - for (int i = 0; i < 4; i++) - { - char ch2 = this.PeekChar(); - if (!SyntaxFacts.IsHexDigit(ch2)) - { - if (character == 'u') - { - if (!peek) - { - info = CreateIllegalEscapeDiagnostic(start); - } - } - - break; - } - - intChar = (intChar << 4) + SyntaxFacts.HexValue(ch2); - this.AdvanceChar(); - } - - character = (char)intChar; - } - } - - return character; - } - - /// - /// Given that the next character is an ampersand ('&'), attempt to interpret the - /// following characters as an XML entity. On success, populate the out parameters - /// with the low and high UTF-16 surrogates for the character represented by the - /// entity. - /// - /// e.g. '<' for &lt;. - /// e.g. '\uDC00' for &#x10000; (ch == '\uD800'). - /// True if a valid XML entity was consumed. - /// - /// NOTE: Always advances, even on failure. - /// - public bool TryScanXmlEntity(out char ch, out char surrogate) - { - Debug.Assert(this.PeekChar() == '&'); - - ch = '&'; - this.AdvanceChar(); - - surrogate = InvalidCharacter; - - switch (this.PeekChar()) - { - case 'l': - if (AdvanceIfMatches("lt;")) - { - ch = '<'; - return true; - } - break; - case 'g': - if (AdvanceIfMatches("gt;")) - { - ch = '>'; - return true; - } - break; - case 'a': - if (AdvanceIfMatches("amp;")) - { - ch = '&'; - return true; - } - else if (AdvanceIfMatches("apos;")) - { - ch = '\''; - return true; - } - break; - case 'q': - if (AdvanceIfMatches("quot;")) - { - ch = '"'; - return true; - } - break; - case '#': - { - this.AdvanceChar(); //# - - uint uintChar = 0; - - if (AdvanceIfMatches("x")) - { - char digit; - while (SyntaxFacts.IsHexDigit(digit = this.PeekChar())) - { - this.AdvanceChar(); - - // disallow overflow - if (uintChar <= 0x7FFFFFF) - { - uintChar = (uintChar << 4) + (uint)SyntaxFacts.HexValue(digit); - } - else - { - return false; - } - } - } - else - { - char digit; - while (SyntaxFacts.IsDecDigit(digit = this.PeekChar())) - { - this.AdvanceChar(); - - // disallow overflow - if (uintChar <= 0x7FFFFFF) - { - uintChar = (uintChar << 3) + (uintChar << 1) + (uint)SyntaxFacts.DecValue(digit); - } - else - { - return false; - } - } - } - - if (AdvanceIfMatches(";")) - { - ch = GetCharsFromUtf32(uintChar, out surrogate); - return true; - } - - break; - } - } - - return false; - } - /// /// If the next characters in the window match the given string, /// then advance past those characters. Otherwise, do nothing. /// - private bool AdvanceIfMatches(string desired) + internal bool AdvanceIfMatches(string desired) { int length = desired.Length; @@ -653,13 +381,6 @@ private bool AdvanceIfMatches(string desired) return true; } - private SyntaxDiagnosticInfo CreateIllegalEscapeDiagnostic(int start) - { - return new SyntaxDiagnosticInfo(start - this.LexemeStartPosition, - this.Position - start, - ErrorCode.ERR_IllegalEscape); - } - public string Intern(StringBuilder text) { return _strings.Add(text); diff --git a/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalErrorTests.cs b/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalErrorTests.cs index 5dbdc3833a388..463aedf413c01 100644 --- a/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalErrorTests.cs +++ b/src/Compilers/CSharp/Test/Syntax/LexicalAndXml/LexicalErrorTests.cs @@ -4,8 +4,11 @@ #nullable disable +using System.Collections.Generic; using Microsoft.CodeAnalysis.CSharp.Test.Utilities; +using Microsoft.CodeAnalysis.Test.Utilities; using Roslyn.Test.Utilities; +using Roslyn.Utilities; using Xunit; namespace Microsoft.CodeAnalysis.CSharp.UnitTests @@ -389,6 +392,154 @@ class A Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"\u0060")); } + [Fact] + public void CS1056ERR_UnexpectedCharacter_UnpairedSurrogate1() + { + var test = $$""" + using System; + class Test + { + public static void Main() + { + int {{'\ud86d'}} = 1; + } + } + """; + + ParsingTests.ParseAndValidate(test, + // (6,13): error CS1001: Identifier expected + // int � = 1; + Diagnostic(ErrorCode.ERR_IdentifierExpected, "\ud86d").WithLocation(6, 13), + // (6,13): error CS1056: Unexpected character '\ud86d' + // int � = 1; + Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"\ud86d").WithLocation(6, 13), + // (6,15): error CS1002: ; expected + // int � = 1; + Diagnostic(ErrorCode.ERR_SemicolonExpected, "=").WithLocation(6, 15), + // (6,15): error CS1525: Invalid expression term '=' + // int � = 1; + Diagnostic(ErrorCode.ERR_InvalidExprTerm, "=").WithArguments("=").WithLocation(6, 15)); + } + + [Fact] + public void CS1056ERR_UnexpectedCharacter_UnpairedSurrogate2() + { + var test = $$""" + using System; + class Test + { + public static void Main() + { + int {{'\udce7'}} = 1; + } + } + """; + + ParsingTests.ParseAndValidate(test, + // (6,13): error CS1001: Identifier expected + // int � = 1; + Diagnostic(ErrorCode.ERR_IdentifierExpected, "\udce7").WithLocation(6, 13), + // (6,13): error CS1056: Unexpected character '\udce7' + // int � = 1; + Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"\udce7").WithLocation(6, 13), + // (6,15): error CS1002: ; expected + // int � = 1; + Diagnostic(ErrorCode.ERR_SemicolonExpected, "=").WithLocation(6, 15), + // (6,15): error CS1525: Invalid expression term '=' + // int � = 1; + Diagnostic(ErrorCode.ERR_InvalidExprTerm, "=").WithArguments("=").WithLocation(6, 15)); + } + + [Fact] + public void CS1056ERR_UnexpectedCharacter_Surrogate() + { + var test = """ + using System; + class Test + { + public static void Main() + { + int 𫓧龦 = 1; + } + } + """; + + ParsingTests.ParseAndValidate(test, + // (6,13): error CS1001: Identifier expected + // int 𫓧龦 = 1; + Diagnostic(ErrorCode.ERR_IdentifierExpected, "𫓧").WithLocation(6, 13), + // (6,13): error CS1056: Unexpected character '𫓧' + // int 𫓧龦 = 1; + Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments("𫓧").WithLocation(6, 13), + // (6,15): error CS1002: ; expected + // int 𫓧龦 = 1; + Diagnostic(ErrorCode.ERR_SemicolonExpected, "龦").WithLocation(6, 15)); + } + + [Fact] + public void CS1056ERR_UnexpectedCharacter_Surrogate_Long() + { + // Create a file with 200 slashes in a row. This will cause 200 'expected character' errors, after which + // the compiler will give up and make a single error (with a multi-char message) for the remainder of the doc. + + var test = $$""" + using System; + class Test + { + public static void Main() + { + int {{new string('\\', 200)}}𫓧𫓧 = 1; + } + } + """; + + var descriptions = new List + { + // (6,13): error CS1001: Identifier expected + // int \..200 more slashes..\𫓧𫓧 = 1; + Diagnostic(ErrorCode.ERR_IdentifierExpected, @"\").WithLocation(6, 13), + }; + + for (int i = 0; i < 200; i++) + { + descriptions.Add( + // (6,13 + i): error CS1056: Unexpected character '\' + // int \..200 more slashes..\𫓧𫓧 = 1; + Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments("\\").WithLocation(6, 13 + i)); + } + + descriptions.Add( + // (6,213): error CS1056: Unexpected character '𫓧' + // int \..200 more slashes..\𫓧𫓧 = 1; + Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"𫓧").WithLocation(6, 213)); + + // (6,214): error CS1056: Unexpected character '龦 = 1;\r\n }\r\n}' + // int \..200 more slashes..\𫓧𫓧 = 1; + if (PathUtilities.IsUnixLikePlatform) + { + descriptions.Add(Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"𫓧 = 1;\n }\n}").WithLocation(6, 215)); + } + else + { + descriptions.Add(Diagnostic(ErrorCode.ERR_UnexpectedCharacter, "").WithArguments(@"𫓧 = 1;\r\n }\r\n}").WithLocation(6, 215)); + } + + descriptions.AddRange(new[] + { + // (8,2): error CS1002: ; expected + // } + Diagnostic(ErrorCode.ERR_SemicolonExpected, "").WithLocation(8, 2), + // (8,2): error CS1513: } expected + // } + Diagnostic(ErrorCode.ERR_RbraceExpected, "").WithLocation(8, 2), + // (8,2): error CS1513: } expected + // } + Diagnostic(ErrorCode.ERR_RbraceExpected, "").WithLocation(8, 2), + }); + + ParsingTests.ParseAndValidate(test, descriptions.ToArray()); + } + [Fact, WorkItem(535937, "http://vstfdevdiv:8080/DevDiv2/DevDiv/_workitems/edit/535937")] public void CS1646ERR_ExpectedVerbatimLiteral() {