From e6de1d9e619c5b151c72cc408aeda9e2a282a882 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 05:59:03 +0000 Subject: [PATCH 01/13] Create a lookup table on token info to avoid excessive branching. --- src/compiler/scanner.ts | 159 +++++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 75 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index fe0a99d86488b..ac7b5287f945c 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -301,6 +301,58 @@ const regExpFlagToFirstAvailableLanguageVersion = new Map= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) { @@ -2302,13 +2299,25 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.Unknown; case CharacterCodes.hash: - if (pos !== 0 && text[pos + 1] === "!") { - error(Diagnostics.can_only_be_used_at_the_start_of_a_file, pos, 2); - pos++; - return token = SyntaxKind.Unknown; + const charAfterHash = codePointUnchecked(pos + 1); + + if (charAfterHash === CharacterCodes.exclamation) { + if (pos === 0) { + pos = scanShebangTrivia(text, pos); + if (skipTrivia) { + continue; + } + else { + return token = SyntaxKind.ShebangTrivia; + } + } + else if (pos !== 0) { + error(Diagnostics.can_only_be_used_at_the_start_of_a_file, pos, 2); + pos++; + return token = SyntaxKind.Unknown; + } } - const charAfterHash = codePointUnchecked(pos + 1); if (charAfterHash === CharacterCodes.backslash) { pos++; const extendedCookedChar = peekExtendedUnicodeEscape(); From f43891898a4872ccd61cfa960ebf4faf185eca27 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 07:37:12 +0000 Subject: [PATCH 02/13] Use an array for common cases. --- src/compiler/scanner.ts | 111 +++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 46 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index ac7b5287f945c..8b4148aead45e 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -302,56 +302,73 @@ const regExpFlagToFirstAvailableLanguageVersion = new Map(); +for (let i = 0; i < CharacterCodes.maxAsciiCharacter; i++) { + charcodeToTokenInfoCommon.push(0); +} + +for ( + const [key, value] of [ + // Line Break Whitespace + [CharacterCodes.lineFeed, TokenInfo.LineBreak], + [CharacterCodes.carriageReturn, TokenInfo.LineBreak], + + // Single Line Whitespace + [CharacterCodes.tab, TokenInfo.SingleLine], + [CharacterCodes.verticalTab, TokenInfo.SingleLine], + [CharacterCodes.formFeed, TokenInfo.SingleLine], + [CharacterCodes.space, TokenInfo.SingleLine], + [CharacterCodes.nonBreakingSpace, TokenInfo.SingleLine], + [CharacterCodes.ogham, TokenInfo.SingleLine], + [CharacterCodes.enQuad, TokenInfo.SingleLine], + [CharacterCodes.emQuad, TokenInfo.SingleLine], + [CharacterCodes.enSpace, TokenInfo.SingleLine], + [CharacterCodes.emSpace, TokenInfo.SingleLine], + [CharacterCodes.threePerEmSpace, TokenInfo.SingleLine], + [CharacterCodes.fourPerEmSpace, TokenInfo.SingleLine], + [CharacterCodes.sixPerEmSpace, TokenInfo.SingleLine], + [CharacterCodes.figureSpace, TokenInfo.SingleLine], + [CharacterCodes.punctuationSpace, TokenInfo.SingleLine], + [CharacterCodes.thinSpace, TokenInfo.SingleLine], + [CharacterCodes.hairSpace, TokenInfo.SingleLine], + [CharacterCodes.zeroWidthSpace, TokenInfo.SingleLine], + [CharacterCodes.narrowNoBreakSpace, TokenInfo.SingleLine], + [CharacterCodes.mathematicalSpace, TokenInfo.SingleLine], + [CharacterCodes.ideographicSpace, TokenInfo.SingleLine], + [CharacterCodes.byteOrderMark, TokenInfo.SingleLine], + + // Simple Single-Character Tokens + [CharacterCodes.openParen, TokenInfo.SimpleToken | SyntaxKind.OpenParenToken], + [CharacterCodes.closeParen, TokenInfo.SimpleToken | SyntaxKind.CloseParenToken], + [CharacterCodes.comma, TokenInfo.SimpleToken | SyntaxKind.CommaToken], + [CharacterCodes.colon, TokenInfo.SimpleToken | SyntaxKind.ColonToken], + [CharacterCodes.semicolon, TokenInfo.SimpleToken | SyntaxKind.SemicolonToken], + [CharacterCodes.openBracket, TokenInfo.SimpleToken | SyntaxKind.OpenBracketToken], + [CharacterCodes.closeBracket, TokenInfo.SimpleToken | SyntaxKind.CloseBracketToken], + [CharacterCodes.openBrace, TokenInfo.SimpleToken | SyntaxKind.OpenBraceToken], + [CharacterCodes.closeBrace, TokenInfo.SimpleToken | SyntaxKind.CloseBraceToken], + [CharacterCodes.tilde, TokenInfo.SimpleToken | SyntaxKind.TildeToken], + [CharacterCodes.at, TokenInfo.SimpleToken | SyntaxKind.AtToken], + ] +) { + if (key < charcodeToTokenInfoCommon.length) { + charcodeToTokenInfoCommon[key] = value; + } + else { + charcodeToTokenInfoUncommon.set(key, value); + } +} /* As per ECMAScript Language Specification 5th Edition, Section 7.6: ISyntaxToken Names and Identifiers @@ -1935,8 +1952,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const ch = codePointUnchecked(pos); - const tokenInfo = charcodeToTokenInfo.get(ch); - if (tokenInfo !== undefined) { + const tokenInfo = ch < charcodeToTokenInfoCommon.length ? + charcodeToTokenInfoCommon[ch] : + charcodeToTokenInfoUncommon.get(ch) ?? TokenInfo.None; + if (tokenInfo !== TokenInfo.None) { if (tokenInfo & TokenInfo.LineBreak) { tokenFlags |= TokenFlags.PrecedingLineBreak; if (skipTrivia) { From e8769ca5418fb096d80c3c54f15ae33c67f4d5bd Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 07:48:48 +0000 Subject: [PATCH 03/13] Handle digits. --- src/compiler/scanner.ts | 150 +++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 70 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 8b4148aead45e..144d633c616df 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -306,8 +306,9 @@ const enum TokenInfo { /** Single-width tokens whose contents fit in the lower masked bits. */ SimpleToken = 1 << 8, - SingleLine = 1 << 9, - LineBreak = 1 << 10, + Whitespace, + LineBreak, + Digit, SimpleTokenMask = SimpleToken - 1, } @@ -325,28 +326,28 @@ for ( [CharacterCodes.carriageReturn, TokenInfo.LineBreak], // Single Line Whitespace - [CharacterCodes.tab, TokenInfo.SingleLine], - [CharacterCodes.verticalTab, TokenInfo.SingleLine], - [CharacterCodes.formFeed, TokenInfo.SingleLine], - [CharacterCodes.space, TokenInfo.SingleLine], - [CharacterCodes.nonBreakingSpace, TokenInfo.SingleLine], - [CharacterCodes.ogham, TokenInfo.SingleLine], - [CharacterCodes.enQuad, TokenInfo.SingleLine], - [CharacterCodes.emQuad, TokenInfo.SingleLine], - [CharacterCodes.enSpace, TokenInfo.SingleLine], - [CharacterCodes.emSpace, TokenInfo.SingleLine], - [CharacterCodes.threePerEmSpace, TokenInfo.SingleLine], - [CharacterCodes.fourPerEmSpace, TokenInfo.SingleLine], - [CharacterCodes.sixPerEmSpace, TokenInfo.SingleLine], - [CharacterCodes.figureSpace, TokenInfo.SingleLine], - [CharacterCodes.punctuationSpace, TokenInfo.SingleLine], - [CharacterCodes.thinSpace, TokenInfo.SingleLine], - [CharacterCodes.hairSpace, TokenInfo.SingleLine], - [CharacterCodes.zeroWidthSpace, TokenInfo.SingleLine], - [CharacterCodes.narrowNoBreakSpace, TokenInfo.SingleLine], - [CharacterCodes.mathematicalSpace, TokenInfo.SingleLine], - [CharacterCodes.ideographicSpace, TokenInfo.SingleLine], - [CharacterCodes.byteOrderMark, TokenInfo.SingleLine], + [CharacterCodes.tab, TokenInfo.Whitespace], + [CharacterCodes.verticalTab, TokenInfo.Whitespace], + [CharacterCodes.formFeed, TokenInfo.Whitespace], + [CharacterCodes.space, TokenInfo.Whitespace], + [CharacterCodes.nonBreakingSpace, TokenInfo.Whitespace], + [CharacterCodes.ogham, TokenInfo.Whitespace], + [CharacterCodes.enQuad, TokenInfo.Whitespace], + [CharacterCodes.emQuad, TokenInfo.Whitespace], + [CharacterCodes.enSpace, TokenInfo.Whitespace], + [CharacterCodes.emSpace, TokenInfo.Whitespace], + [CharacterCodes.threePerEmSpace, TokenInfo.Whitespace], + [CharacterCodes.fourPerEmSpace, TokenInfo.Whitespace], + [CharacterCodes.sixPerEmSpace, TokenInfo.Whitespace], + [CharacterCodes.figureSpace, TokenInfo.Whitespace], + [CharacterCodes.punctuationSpace, TokenInfo.Whitespace], + [CharacterCodes.thinSpace, TokenInfo.Whitespace], + [CharacterCodes.hairSpace, TokenInfo.Whitespace], + [CharacterCodes.zeroWidthSpace, TokenInfo.Whitespace], + [CharacterCodes.narrowNoBreakSpace, TokenInfo.Whitespace], + [CharacterCodes.mathematicalSpace, TokenInfo.Whitespace], + [CharacterCodes.ideographicSpace, TokenInfo.Whitespace], + [CharacterCodes.byteOrderMark, TokenInfo.Whitespace], // Simple Single-Character Tokens [CharacterCodes.openParen, TokenInfo.SimpleToken | SyntaxKind.OpenParenToken], @@ -360,6 +361,19 @@ for ( [CharacterCodes.closeBrace, TokenInfo.SimpleToken | SyntaxKind.CloseBraceToken], [CharacterCodes.tilde, TokenInfo.SimpleToken | SyntaxKind.TildeToken], [CharacterCodes.at, TokenInfo.SimpleToken | SyntaxKind.AtToken], + + // Digits + [CharacterCodes._0, TokenInfo.Digit], + [CharacterCodes._1, TokenInfo.Digit], + [CharacterCodes._2, TokenInfo.Digit], + [CharacterCodes._3, TokenInfo.Digit], + [CharacterCodes._4, TokenInfo.Digit], + [CharacterCodes._5, TokenInfo.Digit], + [CharacterCodes._6, TokenInfo.Digit], + [CharacterCodes._7, TokenInfo.Digit], + [CharacterCodes._8, TokenInfo.Digit], + [CharacterCodes._9, TokenInfo.Digit], + ] ) { if (key < charcodeToTokenInfoCommon.length) { @@ -1974,7 +1988,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (tokenInfo & TokenInfo.SingleLine) { + if (tokenInfo & TokenInfo.Whitespace) { if (skipTrivia) { pos++; continue; @@ -1991,6 +2005,46 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = tokenInfo & TokenInfo.SimpleTokenMask; } + + if (tokenInfo & TokenInfo.Digit) { + if (ch === 0) { + if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { + pos += 2; + tokenValue = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ true); + if (!tokenValue) { + error(Diagnostics.Hexadecimal_digit_expected); + tokenValue = "0"; + } + tokenValue = "0x" + tokenValue; + tokenFlags |= TokenFlags.HexSpecifier; + return token = checkBigIntSuffix(); + } + else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.B || charCodeUnchecked(pos + 1) === CharacterCodes.b)) { + pos += 2; + tokenValue = scanBinaryOrOctalDigits(/* base */ 2); + if (!tokenValue) { + error(Diagnostics.Binary_digit_expected); + tokenValue = "0"; + } + tokenValue = "0b" + tokenValue; + tokenFlags |= TokenFlags.BinarySpecifier; + return token = checkBigIntSuffix(); + } + else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.O || charCodeUnchecked(pos + 1) === CharacterCodes.o)) { + pos += 2; + tokenValue = scanBinaryOrOctalDigits(/* base */ 8); + if (!tokenValue) { + error(Diagnostics.Octal_digit_expected); + tokenValue = "0"; + } + tokenValue = "0o" + tokenValue; + tokenFlags |= TokenFlags.OctalSpecifier; + return token = checkBigIntSuffix(); + } + } + + return token = scanNumber(); + } } switch (ch) { @@ -2150,51 +2204,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; return token = SyntaxKind.SlashToken; - case CharacterCodes._0: - if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { - pos += 2; - tokenValue = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ true); - if (!tokenValue) { - error(Diagnostics.Hexadecimal_digit_expected); - tokenValue = "0"; - } - tokenValue = "0x" + tokenValue; - tokenFlags |= TokenFlags.HexSpecifier; - return token = checkBigIntSuffix(); - } - else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.B || charCodeUnchecked(pos + 1) === CharacterCodes.b)) { - pos += 2; - tokenValue = scanBinaryOrOctalDigits(/* base */ 2); - if (!tokenValue) { - error(Diagnostics.Binary_digit_expected); - tokenValue = "0"; - } - tokenValue = "0b" + tokenValue; - tokenFlags |= TokenFlags.BinarySpecifier; - return token = checkBigIntSuffix(); - } - else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.O || charCodeUnchecked(pos + 1) === CharacterCodes.o)) { - pos += 2; - tokenValue = scanBinaryOrOctalDigits(/* base */ 8); - if (!tokenValue) { - error(Diagnostics.Octal_digit_expected); - tokenValue = "0"; - } - tokenValue = "0o" + tokenValue; - tokenFlags |= TokenFlags.OctalSpecifier; - return token = checkBigIntSuffix(); - } - // falls through - case CharacterCodes._1: - case CharacterCodes._2: - case CharacterCodes._3: - case CharacterCodes._4: - case CharacterCodes._5: - case CharacterCodes._6: - case CharacterCodes._7: - case CharacterCodes._8: - case CharacterCodes._9: - return token = scanNumber(); + case CharacterCodes.lessThan: if (isConflictMarkerTrivia(text, pos)) { pos = scanConflictMarkerTrivia(text, pos, error); From a087c2415b8c048ab7729f8f53a070129ceae318 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 08:13:04 +0000 Subject: [PATCH 04/13] Fixes. --- src/compiler/scanner.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 144d633c616df..dfe0ef69a1959 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -305,10 +305,10 @@ const enum TokenInfo { None = 0, /** Single-width tokens whose contents fit in the lower masked bits. */ - SimpleToken = 1 << 8, - Whitespace, - LineBreak, - Digit, + SimpleToken = 1 << 8, // must come first + Whitespace = 1 << 9, + LineBreak = 1 << 10, + Digit = 1 << 11, SimpleTokenMask = SimpleToken - 1, } @@ -2007,7 +2007,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } if (tokenInfo & TokenInfo.Digit) { - if (ch === 0) { + if (ch === CharacterCodes._0) { if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { pos += 2; tokenValue = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ true); From 811526d99359ac1fe749097c122df844051fd822 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 08:14:03 +0000 Subject: [PATCH 05/13] Moved everything outside of identifier handling to use a lookup. --- src/compiler/scanner.ts | 623 +++++++++++++++++++++------------------- 1 file changed, 324 insertions(+), 299 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index dfe0ef69a1959..0929a2e7712d9 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -309,6 +309,11 @@ const enum TokenInfo { Whitespace = 1 << 9, LineBreak = 1 << 10, Digit = 1 << 11, + /** + * Not a character that can be generically handled, + * but needs to be handled in some well-understood way. + */ + RecognizedMisc = 1 << 12, SimpleTokenMask = SimpleToken - 1, } @@ -374,6 +379,26 @@ for ( [CharacterCodes._8, TokenInfo.Digit], [CharacterCodes._9, TokenInfo.Digit], + [CharacterCodes.exclamation, TokenInfo.RecognizedMisc], + [CharacterCodes.doubleQuote, TokenInfo.RecognizedMisc], + [CharacterCodes.singleQuote, TokenInfo.RecognizedMisc], + [CharacterCodes.backtick, TokenInfo.RecognizedMisc], + [CharacterCodes.percent, TokenInfo.RecognizedMisc], + [CharacterCodes.ampersand, TokenInfo.RecognizedMisc], + [CharacterCodes.asterisk, TokenInfo.RecognizedMisc], + [CharacterCodes.plus, TokenInfo.RecognizedMisc], + [CharacterCodes.minus, TokenInfo.RecognizedMisc], + [CharacterCodes.dot, TokenInfo.RecognizedMisc], + [CharacterCodes.slash, TokenInfo.RecognizedMisc], + [CharacterCodes.lessThan, TokenInfo.RecognizedMisc], + [CharacterCodes.equals, TokenInfo.RecognizedMisc], + [CharacterCodes.greaterThan, TokenInfo.RecognizedMisc], + [CharacterCodes.question, TokenInfo.RecognizedMisc], + [CharacterCodes.caret, TokenInfo.RecognizedMisc], + [CharacterCodes.bar, TokenInfo.RecognizedMisc], + [CharacterCodes.backslash, TokenInfo.RecognizedMisc], + [CharacterCodes.hash, TokenInfo.RecognizedMisc], + [CharacterCodes.replacementCharacter, TokenInfo.RecognizedMisc], ] ) { if (key < charcodeToTokenInfoCommon.length) { @@ -2045,363 +2070,363 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = scanNumber(); } - } - switch (ch) { - case CharacterCodes.exclamation: - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.ExclamationEqualsEqualsToken; + Debug.assert(tokenInfo & TokenInfo.RecognizedMisc); + switch (ch) { + case CharacterCodes.exclamation: + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.ExclamationEqualsEqualsToken; + } + return pos += 2, token = SyntaxKind.ExclamationEqualsToken; } - return pos += 2, token = SyntaxKind.ExclamationEqualsToken; - } - pos++; - return token = SyntaxKind.ExclamationToken; - case CharacterCodes.doubleQuote: - case CharacterCodes.singleQuote: - tokenValue = scanString(); - return token = SyntaxKind.StringLiteral; - case CharacterCodes.backtick: - return token = scanTemplateAndSetTokenValue(/*shouldEmitInvalidEscapeError*/ false); - case CharacterCodes.percent: - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.PercentEqualsToken; - } - pos++; - return token = SyntaxKind.PercentToken; - case CharacterCodes.ampersand: - if (charCodeUnchecked(pos + 1) === CharacterCodes.ampersand) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.AmpersandAmpersandEqualsToken; + pos++; + return token = SyntaxKind.ExclamationToken; + case CharacterCodes.doubleQuote: + case CharacterCodes.singleQuote: + tokenValue = scanString(); + return token = SyntaxKind.StringLiteral; + case CharacterCodes.backtick: + return token = scanTemplateAndSetTokenValue(/*shouldEmitInvalidEscapeError*/ false); + case CharacterCodes.percent: + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.PercentEqualsToken; } - return pos += 2, token = SyntaxKind.AmpersandAmpersandToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.AmpersandEqualsToken; - } - pos++; - return token = SyntaxKind.AmpersandToken; - case CharacterCodes.asterisk: - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.AsteriskEqualsToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.AsteriskAsteriskEqualsToken; + pos++; + return token = SyntaxKind.PercentToken; + case CharacterCodes.ampersand: + if (charCodeUnchecked(pos + 1) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.AmpersandAmpersandEqualsToken; + } + return pos += 2, token = SyntaxKind.AmpersandAmpersandToken; } - return pos += 2, token = SyntaxKind.AsteriskAsteriskToken; - } - pos++; - if (skipJsDocLeadingAsterisks && !asteriskSeen && (tokenFlags & TokenFlags.PrecedingLineBreak)) { - // decoration at the start of a JSDoc comment line - asteriskSeen = true; - continue; - } - return token = SyntaxKind.AsteriskToken; - case CharacterCodes.plus: - if (charCodeUnchecked(pos + 1) === CharacterCodes.plus) { - return pos += 2, token = SyntaxKind.PlusPlusToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.PlusEqualsToken; - } - pos++; - return token = SyntaxKind.PlusToken; - case CharacterCodes.minus: - if (charCodeUnchecked(pos + 1) === CharacterCodes.minus) { - return pos += 2, token = SyntaxKind.MinusMinusToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.MinusEqualsToken; - } - pos++; - return token = SyntaxKind.MinusToken; - case CharacterCodes.dot: - if (isDigit(charCodeUnchecked(pos + 1))) { - scanNumber(); - return token = SyntaxKind.NumericLiteral; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && charCodeUnchecked(pos + 2) === CharacterCodes.dot) { - return pos += 3, token = SyntaxKind.DotDotDotToken; - } - pos++; - return token = SyntaxKind.DotToken; - case CharacterCodes.slash: - // Single-line comment - if (charCodeUnchecked(pos + 1) === CharacterCodes.slash) { - pos += 2; - - while (pos < end) { - if (isLineBreak(charCodeUnchecked(pos))) { - break; + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.AmpersandEqualsToken; + } + pos++; + return token = SyntaxKind.AmpersandToken; + case CharacterCodes.asterisk: + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.AsteriskEqualsToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.AsteriskAsteriskEqualsToken; } - pos++; + return pos += 2, token = SyntaxKind.AsteriskAsteriskToken; } - - commentDirectives = appendIfCommentDirective( - commentDirectives, - text.slice(tokenStart, pos), - commentDirectiveRegExSingleLine, - tokenStart, - ); - - if (skipTrivia) { + pos++; + if (skipJsDocLeadingAsterisks && !asteriskSeen && (tokenFlags & TokenFlags.PrecedingLineBreak)) { + // decoration at the start of a JSDoc comment line + asteriskSeen = true; continue; } - else { - return token = SyntaxKind.SingleLineCommentTrivia; + return token = SyntaxKind.AsteriskToken; + case CharacterCodes.plus: + if (charCodeUnchecked(pos + 1) === CharacterCodes.plus) { + return pos += 2, token = SyntaxKind.PlusPlusToken; } - } - // Multi-line comment - if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { - pos += 2; - const isJSDoc = charCodeUnchecked(pos) === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) !== CharacterCodes.slash; - - let commentClosed = false; - let lastLineStart = tokenStart; - while (pos < end) { - const ch = charCodeUnchecked(pos); + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.PlusEqualsToken; + } + pos++; + return token = SyntaxKind.PlusToken; + case CharacterCodes.minus: + if (charCodeUnchecked(pos + 1) === CharacterCodes.minus) { + return pos += 2, token = SyntaxKind.MinusMinusToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.MinusEqualsToken; + } + pos++; + return token = SyntaxKind.MinusToken; + case CharacterCodes.dot: + if (isDigit(charCodeUnchecked(pos + 1))) { + scanNumber(); + return token = SyntaxKind.NumericLiteral; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && charCodeUnchecked(pos + 2) === CharacterCodes.dot) { + return pos += 3, token = SyntaxKind.DotDotDotToken; + } + pos++; + return token = SyntaxKind.DotToken; + case CharacterCodes.slash: + // Single-line comment + if (charCodeUnchecked(pos + 1) === CharacterCodes.slash) { + pos += 2; - if (ch === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) === CharacterCodes.slash) { - pos += 2; - commentClosed = true; - break; + while (pos < end) { + if (isLineBreak(charCodeUnchecked(pos))) { + break; + } + pos++; } - pos++; + commentDirectives = appendIfCommentDirective( + commentDirectives, + text.slice(tokenStart, pos), + commentDirectiveRegExSingleLine, + tokenStart, + ); - if (isLineBreak(ch)) { - lastLineStart = pos; - tokenFlags |= TokenFlags.PrecedingLineBreak; + if (skipTrivia) { + continue; + } + else { + return token = SyntaxKind.SingleLineCommentTrivia; } } + // Multi-line comment + if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { + pos += 2; + const isJSDoc = charCodeUnchecked(pos) === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) !== CharacterCodes.slash; - if (isJSDoc && shouldParseJSDoc()) { - tokenFlags |= TokenFlags.PrecedingJSDocComment; - } + let commentClosed = false; + let lastLineStart = tokenStart; + while (pos < end) { + const ch = charCodeUnchecked(pos); - commentDirectives = appendIfCommentDirective(commentDirectives, text.slice(lastLineStart, pos), commentDirectiveRegExMultiLine, lastLineStart); + if (ch === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) === CharacterCodes.slash) { + pos += 2; + commentClosed = true; + break; + } - if (!commentClosed) { - error(Diagnostics.Asterisk_Slash_expected); - } + pos++; + + if (isLineBreak(ch)) { + lastLineStart = pos; + tokenFlags |= TokenFlags.PrecedingLineBreak; + } + } + + if (isJSDoc && shouldParseJSDoc()) { + tokenFlags |= TokenFlags.PrecedingJSDocComment; + } + + commentDirectives = appendIfCommentDirective(commentDirectives, text.slice(lastLineStart, pos), commentDirectiveRegExMultiLine, lastLineStart); - if (skipTrivia) { - continue; - } - else { if (!commentClosed) { - tokenFlags |= TokenFlags.Unterminated; + error(Diagnostics.Asterisk_Slash_expected); + } + + if (skipTrivia) { + continue; + } + else { + if (!commentClosed) { + tokenFlags |= TokenFlags.Unterminated; + } + return token = SyntaxKind.MultiLineCommentTrivia; } - return token = SyntaxKind.MultiLineCommentTrivia; } - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.SlashEqualsToken; - } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.SlashEqualsToken; + } - pos++; - return token = SyntaxKind.SlashToken; + pos++; + return token = SyntaxKind.SlashToken; - - case CharacterCodes.lessThan: - if (isConflictMarkerTrivia(text, pos)) { - pos = scanConflictMarkerTrivia(text, pos, error); - if (skipTrivia) { - continue; - } - else { - return token = SyntaxKind.ConflictMarkerTrivia; + case CharacterCodes.lessThan: + if (isConflictMarkerTrivia(text, pos)) { + pos = scanConflictMarkerTrivia(text, pos, error); + if (skipTrivia) { + continue; + } + else { + return token = SyntaxKind.ConflictMarkerTrivia; + } } - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.lessThan) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.LessThanLessThanEqualsToken; + if (charCodeUnchecked(pos + 1) === CharacterCodes.lessThan) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.LessThanLessThanEqualsToken; + } + return pos += 2, token = SyntaxKind.LessThanLessThanToken; } - return pos += 2, token = SyntaxKind.LessThanLessThanToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.LessThanEqualsToken; - } - if ( - languageVariant === LanguageVariant.JSX && - charCodeUnchecked(pos + 1) === CharacterCodes.slash && - charCodeUnchecked(pos + 2) !== CharacterCodes.asterisk - ) { - return pos += 2, token = SyntaxKind.LessThanSlashToken; - } - pos++; - return token = SyntaxKind.LessThanToken; - case CharacterCodes.equals: - if (isConflictMarkerTrivia(text, pos)) { - pos = scanConflictMarkerTrivia(text, pos, error); - if (skipTrivia) { - continue; + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.LessThanEqualsToken; } - else { - return token = SyntaxKind.ConflictMarkerTrivia; + if ( + languageVariant === LanguageVariant.JSX && + charCodeUnchecked(pos + 1) === CharacterCodes.slash && + charCodeUnchecked(pos + 2) !== CharacterCodes.asterisk + ) { + return pos += 2, token = SyntaxKind.LessThanSlashToken; + } + pos++; + return token = SyntaxKind.LessThanToken; + case CharacterCodes.equals: + if (isConflictMarkerTrivia(text, pos)) { + pos = scanConflictMarkerTrivia(text, pos, error); + if (skipTrivia) { + continue; + } + else { + return token = SyntaxKind.ConflictMarkerTrivia; + } } - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.EqualsEqualsEqualsToken; + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.EqualsEqualsEqualsToken; + } + return pos += 2, token = SyntaxKind.EqualsEqualsToken; } - return pos += 2, token = SyntaxKind.EqualsEqualsToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.greaterThan) { - return pos += 2, token = SyntaxKind.EqualsGreaterThanToken; - } - pos++; - return token = SyntaxKind.EqualsToken; - case CharacterCodes.greaterThan: - if (isConflictMarkerTrivia(text, pos)) { - pos = scanConflictMarkerTrivia(text, pos, error); - if (skipTrivia) { - continue; + if (charCodeUnchecked(pos + 1) === CharacterCodes.greaterThan) { + return pos += 2, token = SyntaxKind.EqualsGreaterThanToken; } - else { - return token = SyntaxKind.ConflictMarkerTrivia; + pos++; + return token = SyntaxKind.EqualsToken; + case CharacterCodes.greaterThan: + if (isConflictMarkerTrivia(text, pos)) { + pos = scanConflictMarkerTrivia(text, pos, error); + if (skipTrivia) { + continue; + } + else { + return token = SyntaxKind.ConflictMarkerTrivia; + } } - } - pos++; - return token = SyntaxKind.GreaterThanToken; - case CharacterCodes.question: - if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && !isDigit(charCodeUnchecked(pos + 2))) { - return pos += 2, token = SyntaxKind.QuestionDotToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.question) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.QuestionQuestionEqualsToken; - } - return pos += 2, token = SyntaxKind.QuestionQuestionToken; - } - pos++; - return token = SyntaxKind.QuestionToken; - case CharacterCodes.caret: - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.CaretEqualsToken; - } - pos++; - return token = SyntaxKind.CaretToken; - case CharacterCodes.bar: - if (isConflictMarkerTrivia(text, pos)) { - pos = scanConflictMarkerTrivia(text, pos, error); - if (skipTrivia) { - continue; + pos++; + return token = SyntaxKind.GreaterThanToken; + case CharacterCodes.question: + if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && !isDigit(charCodeUnchecked(pos + 2))) { + return pos += 2, token = SyntaxKind.QuestionDotToken; } - else { - return token = SyntaxKind.ConflictMarkerTrivia; + if (charCodeUnchecked(pos + 1) === CharacterCodes.question) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.QuestionQuestionEqualsToken; + } + return pos += 2, token = SyntaxKind.QuestionQuestionToken; } - } - - if (charCodeUnchecked(pos + 1) === CharacterCodes.bar) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.BarBarEqualsToken; + pos++; + return token = SyntaxKind.QuestionToken; + case CharacterCodes.caret: + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.CaretEqualsToken; } - return pos += 2, token = SyntaxKind.BarBarToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.BarEqualsToken; - } - pos++; - return token = SyntaxKind.BarToken; - case CharacterCodes.backslash: - const extendedCookedChar = peekExtendedUnicodeEscape(); - if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) { - tokenValue = scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts(); - return token = getIdentifierToken(); - } - - const cookedChar = peekUnicodeEscape(); - if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) { - pos += 6; - tokenFlags |= TokenFlags.UnicodeEscape; - tokenValue = String.fromCharCode(cookedChar) + scanIdentifierParts(); - return token = getIdentifierToken(); - } - - error(Diagnostics.Invalid_character); - pos++; - return token = SyntaxKind.Unknown; - case CharacterCodes.hash: - const charAfterHash = codePointUnchecked(pos + 1); - - if (charAfterHash === CharacterCodes.exclamation) { - if (pos === 0) { - pos = scanShebangTrivia(text, pos); + pos++; + return token = SyntaxKind.CaretToken; + case CharacterCodes.bar: + if (isConflictMarkerTrivia(text, pos)) { + pos = scanConflictMarkerTrivia(text, pos, error); if (skipTrivia) { continue; } else { - return token = SyntaxKind.ShebangTrivia; + return token = SyntaxKind.ConflictMarkerTrivia; } } - else if (pos !== 0) { - error(Diagnostics.can_only_be_used_at_the_start_of_a_file, pos, 2); - pos++; - return token = SyntaxKind.Unknown; - } - } - if (charAfterHash === CharacterCodes.backslash) { + if (charCodeUnchecked(pos + 1) === CharacterCodes.bar) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.BarBarEqualsToken; + } + return pos += 2, token = SyntaxKind.BarBarToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.BarEqualsToken; + } pos++; + return token = SyntaxKind.BarToken; + case CharacterCodes.backslash: const extendedCookedChar = peekExtendedUnicodeEscape(); if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) { - tokenValue = "#" + scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts(); - return token = SyntaxKind.PrivateIdentifier; + tokenValue = scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts(); + return token = getIdentifierToken(); } const cookedChar = peekUnicodeEscape(); if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) { pos += 6; tokenFlags |= TokenFlags.UnicodeEscape; - tokenValue = "#" + String.fromCharCode(cookedChar) + scanIdentifierParts(); - return token = SyntaxKind.PrivateIdentifier; + tokenValue = String.fromCharCode(cookedChar) + scanIdentifierParts(); + return token = getIdentifierToken(); } - pos--; - } - if (isIdentifierStart(charAfterHash, languageVersion)) { + error(Diagnostics.Invalid_character); pos++; - // We're relying on scanIdentifier's behavior and adjusting the token kind after the fact. - // Notably absent from this block is the fact that calling a function named "scanIdentifier", - // but identifiers don't include '#', and that function doesn't deal with it at all. - // This works because 'scanIdentifier' tries to reuse source characters and builds up substrings; - // however, it starts at the 'tokenPos' which includes the '#', and will "accidentally" prepend the '#' for us. - scanIdentifier(charAfterHash, languageVersion); - } - else { - tokenValue = "#"; - error(Diagnostics.Invalid_character, pos++, charSize(ch)); - } - return token = SyntaxKind.PrivateIdentifier; - case CharacterCodes.replacementCharacter: - error(Diagnostics.File_appears_to_be_binary, 0, 0); - pos = end; - return token = SyntaxKind.NonTextFileMarkerTrivia; - default: - const identifierKind = scanIdentifier(ch, languageVersion); - if (identifierKind) { - return token = identifierKind; - } - else if (isWhiteSpaceSingleLine(ch)) { - pos += charSize(ch); - continue; - } - else if (isLineBreak(ch)) { - tokenFlags |= TokenFlags.PrecedingLineBreak; - pos += charSize(ch); - continue; - } - const size = charSize(ch); - error(Diagnostics.Invalid_character, pos, size); - pos += size; - return token = SyntaxKind.Unknown; + return token = SyntaxKind.Unknown; + case CharacterCodes.hash: + const charAfterHash = codePointUnchecked(pos + 1); + + if (charAfterHash === CharacterCodes.exclamation) { + if (pos === 0) { + pos = scanShebangTrivia(text, pos); + if (skipTrivia) { + continue; + } + else { + return token = SyntaxKind.ShebangTrivia; + } + } + else if (pos !== 0) { + error(Diagnostics.can_only_be_used_at_the_start_of_a_file, pos, 2); + pos++; + return token = SyntaxKind.Unknown; + } + } + + if (charAfterHash === CharacterCodes.backslash) { + pos++; + const extendedCookedChar = peekExtendedUnicodeEscape(); + if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) { + tokenValue = "#" + scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts(); + return token = SyntaxKind.PrivateIdentifier; + } + + const cookedChar = peekUnicodeEscape(); + if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) { + pos += 6; + tokenFlags |= TokenFlags.UnicodeEscape; + tokenValue = "#" + String.fromCharCode(cookedChar) + scanIdentifierParts(); + return token = SyntaxKind.PrivateIdentifier; + } + pos--; + } + + if (isIdentifierStart(charAfterHash, languageVersion)) { + pos++; + // We're relying on scanIdentifier's behavior and adjusting the token kind after the fact. + // Notably absent from this block is the fact that calling a function named "scanIdentifier", + // but identifiers don't include '#', and that function doesn't deal with it at all. + // This works because 'scanIdentifier' tries to reuse source characters and builds up substrings; + // however, it starts at the 'tokenPos' which includes the '#', and will "accidentally" prepend the '#' for us. + scanIdentifier(charAfterHash, languageVersion); + } + else { + tokenValue = "#"; + error(Diagnostics.Invalid_character, pos++, charSize(ch)); + } + return token = SyntaxKind.PrivateIdentifier; + case CharacterCodes.replacementCharacter: + error(Diagnostics.File_appears_to_be_binary, 0, 0); + pos = end; + return token = SyntaxKind.NonTextFileMarkerTrivia; + } + } + + const identifierKind = scanIdentifier(ch, languageVersion); + if (identifierKind) { + return token = identifierKind; + } + else if (isWhiteSpaceSingleLine(ch)) { + pos += charSize(ch); + continue; } + else if (isLineBreak(ch)) { + tokenFlags |= TokenFlags.PrecedingLineBreak; + pos += charSize(ch); + continue; + } + const size = charSize(ch); + error(Diagnostics.Invalid_character, pos, size); + pos += size; + return token = SyntaxKind.Unknown; } } From 9830671eef604cc6c49cbc29da2c1fc92e8bd9f9 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 08:36:09 +0000 Subject: [PATCH 06/13] Check for CR, LF, space, and tab before doing the lookup. --- src/compiler/scanner.ts | 45 ++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 0929a2e7712d9..3dab6570c37fa 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -1991,28 +1991,41 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const ch = codePointUnchecked(pos); - const tokenInfo = ch < charcodeToTokenInfoCommon.length ? - charcodeToTokenInfoCommon[ch] : - charcodeToTokenInfoUncommon.get(ch) ?? TokenInfo.None; - if (tokenInfo !== TokenInfo.None) { - if (tokenInfo & TokenInfo.LineBreak) { - tokenFlags |= TokenFlags.PrecedingLineBreak; - if (skipTrivia) { + if (ch === CharacterCodes.tab || ch === CharacterCodes.space) { + if (skipTrivia) { + pos++; + continue; + } + else { + while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { pos++; - continue; + } + return token = SyntaxKind.WhitespaceTrivia; + } + } + + if (ch === CharacterCodes.lineFeed || ch === CharacterCodes.carriageReturn) { + tokenFlags |= TokenFlags.PrecedingLineBreak; + if (skipTrivia) { + pos++; + continue; + } + else { + if (ch === CharacterCodes.carriageReturn && pos + 1 < end && charCodeUnchecked(pos + 1) === CharacterCodes.lineFeed) { + // consume both CR and LF + pos += 2; } else { - if (ch === CharacterCodes.carriageReturn && pos + 1 < end && charCodeUnchecked(pos + 1) === CharacterCodes.lineFeed) { - // consume both CR and LF - pos += 2; - } - else { - pos++; - } - return token = SyntaxKind.NewLineTrivia; + pos++; } + return token = SyntaxKind.NewLineTrivia; } + } + const tokenInfo = ch < charcodeToTokenInfoCommon.length ? + charcodeToTokenInfoCommon[ch] : + charcodeToTokenInfoUncommon.get(ch) ?? TokenInfo.None; + if (tokenInfo !== TokenInfo.None) { if (tokenInfo & TokenInfo.Whitespace) { if (skipTrivia) { pos++; From af340b7156d021ffe95d4da2ae8ce887971376ec Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 17:30:08 +0000 Subject: [PATCH 07/13] Flip condition for readabiity (avoids indentation). --- src/compiler/scanner.ts | 695 ++++++++++++++++++++-------------------- 1 file changed, 348 insertions(+), 347 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 3dab6570c37fa..944b3e1e21fff 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2025,421 +2025,422 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const tokenInfo = ch < charcodeToTokenInfoCommon.length ? charcodeToTokenInfoCommon[ch] : charcodeToTokenInfoUncommon.get(ch) ?? TokenInfo.None; - if (tokenInfo !== TokenInfo.None) { - if (tokenInfo & TokenInfo.Whitespace) { - if (skipTrivia) { - pos++; - continue; - } - else { - while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { - pos++; - } - return token = SyntaxKind.WhitespaceTrivia; - } + + if (tokenInfo === TokenInfo.None) { + const identifierKind = scanIdentifier(ch, languageVersion); + if (identifierKind) { + return token = identifierKind; } + else if (isWhiteSpaceSingleLine(ch)) { + pos += charSize(ch); + continue; + } + else if (isLineBreak(ch)) { + tokenFlags |= TokenFlags.PrecedingLineBreak; + pos += charSize(ch); + continue; + } + const size = charSize(ch); + error(Diagnostics.Invalid_character, pos, size); + pos += size; + return token = SyntaxKind.Unknown; + } - if (tokenInfo & TokenInfo.SimpleToken) { + if (tokenInfo & TokenInfo.Whitespace) { + if (skipTrivia) { pos++; - return token = tokenInfo & TokenInfo.SimpleTokenMask; + continue; + } + else { + while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { + pos++; + } + return token = SyntaxKind.WhitespaceTrivia; } + } - if (tokenInfo & TokenInfo.Digit) { - if (ch === CharacterCodes._0) { - if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { - pos += 2; - tokenValue = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ true); - if (!tokenValue) { - error(Diagnostics.Hexadecimal_digit_expected); - tokenValue = "0"; - } - tokenValue = "0x" + tokenValue; - tokenFlags |= TokenFlags.HexSpecifier; - return token = checkBigIntSuffix(); + if (tokenInfo & TokenInfo.SimpleToken) { + pos++; + return token = tokenInfo & TokenInfo.SimpleTokenMask; + } + + if (tokenInfo & TokenInfo.Digit) { + if (ch === CharacterCodes._0) { + if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { + pos += 2; + tokenValue = scanMinimumNumberOfHexDigits(1, /*canHaveSeparators*/ true); + if (!tokenValue) { + error(Diagnostics.Hexadecimal_digit_expected); + tokenValue = "0"; } - else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.B || charCodeUnchecked(pos + 1) === CharacterCodes.b)) { - pos += 2; - tokenValue = scanBinaryOrOctalDigits(/* base */ 2); - if (!tokenValue) { - error(Diagnostics.Binary_digit_expected); - tokenValue = "0"; - } - tokenValue = "0b" + tokenValue; - tokenFlags |= TokenFlags.BinarySpecifier; - return token = checkBigIntSuffix(); + tokenValue = "0x" + tokenValue; + tokenFlags |= TokenFlags.HexSpecifier; + return token = checkBigIntSuffix(); + } + else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.B || charCodeUnchecked(pos + 1) === CharacterCodes.b)) { + pos += 2; + tokenValue = scanBinaryOrOctalDigits(/* base */ 2); + if (!tokenValue) { + error(Diagnostics.Binary_digit_expected); + tokenValue = "0"; } - else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.O || charCodeUnchecked(pos + 1) === CharacterCodes.o)) { - pos += 2; - tokenValue = scanBinaryOrOctalDigits(/* base */ 8); - if (!tokenValue) { - error(Diagnostics.Octal_digit_expected); - tokenValue = "0"; - } - tokenValue = "0o" + tokenValue; - tokenFlags |= TokenFlags.OctalSpecifier; - return token = checkBigIntSuffix(); + tokenValue = "0b" + tokenValue; + tokenFlags |= TokenFlags.BinarySpecifier; + return token = checkBigIntSuffix(); + } + else if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.O || charCodeUnchecked(pos + 1) === CharacterCodes.o)) { + pos += 2; + tokenValue = scanBinaryOrOctalDigits(/* base */ 8); + if (!tokenValue) { + error(Diagnostics.Octal_digit_expected); + tokenValue = "0"; } + tokenValue = "0o" + tokenValue; + tokenFlags |= TokenFlags.OctalSpecifier; + return token = checkBigIntSuffix(); } - - return token = scanNumber(); } - Debug.assert(tokenInfo & TokenInfo.RecognizedMisc); - switch (ch) { - case CharacterCodes.exclamation: - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.ExclamationEqualsEqualsToken; - } - return pos += 2, token = SyntaxKind.ExclamationEqualsToken; - } - pos++; - return token = SyntaxKind.ExclamationToken; - case CharacterCodes.doubleQuote: - case CharacterCodes.singleQuote: - tokenValue = scanString(); - return token = SyntaxKind.StringLiteral; - case CharacterCodes.backtick: - return token = scanTemplateAndSetTokenValue(/*shouldEmitInvalidEscapeError*/ false); - case CharacterCodes.percent: - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.PercentEqualsToken; - } - pos++; - return token = SyntaxKind.PercentToken; - case CharacterCodes.ampersand: - if (charCodeUnchecked(pos + 1) === CharacterCodes.ampersand) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.AmpersandAmpersandEqualsToken; - } - return pos += 2, token = SyntaxKind.AmpersandAmpersandToken; + return token = scanNumber(); + } + + Debug.assert(tokenInfo & TokenInfo.RecognizedMisc); + switch (ch) { + case CharacterCodes.exclamation: + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.ExclamationEqualsEqualsToken; } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.AmpersandEqualsToken; + return pos += 2, token = SyntaxKind.ExclamationEqualsToken; + } + pos++; + return token = SyntaxKind.ExclamationToken; + case CharacterCodes.doubleQuote: + case CharacterCodes.singleQuote: + tokenValue = scanString(); + return token = SyntaxKind.StringLiteral; + case CharacterCodes.backtick: + return token = scanTemplateAndSetTokenValue(/*shouldEmitInvalidEscapeError*/ false); + case CharacterCodes.percent: + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.PercentEqualsToken; + } + pos++; + return token = SyntaxKind.PercentToken; + case CharacterCodes.ampersand: + if (charCodeUnchecked(pos + 1) === CharacterCodes.ampersand) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.AmpersandAmpersandEqualsToken; } - pos++; - return token = SyntaxKind.AmpersandToken; - case CharacterCodes.asterisk: - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.AsteriskEqualsToken; + return pos += 2, token = SyntaxKind.AmpersandAmpersandToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.AmpersandEqualsToken; + } + pos++; + return token = SyntaxKind.AmpersandToken; + case CharacterCodes.asterisk: + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.AsteriskEqualsToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.AsteriskAsteriskEqualsToken; } - if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.AsteriskAsteriskEqualsToken; + return pos += 2, token = SyntaxKind.AsteriskAsteriskToken; + } + pos++; + if (skipJsDocLeadingAsterisks && !asteriskSeen && (tokenFlags & TokenFlags.PrecedingLineBreak)) { + // decoration at the start of a JSDoc comment line + asteriskSeen = true; + continue; + } + return token = SyntaxKind.AsteriskToken; + case CharacterCodes.plus: + if (charCodeUnchecked(pos + 1) === CharacterCodes.plus) { + return pos += 2, token = SyntaxKind.PlusPlusToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.PlusEqualsToken; + } + pos++; + return token = SyntaxKind.PlusToken; + case CharacterCodes.minus: + if (charCodeUnchecked(pos + 1) === CharacterCodes.minus) { + return pos += 2, token = SyntaxKind.MinusMinusToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.MinusEqualsToken; + } + pos++; + return token = SyntaxKind.MinusToken; + case CharacterCodes.dot: + if (isDigit(charCodeUnchecked(pos + 1))) { + scanNumber(); + return token = SyntaxKind.NumericLiteral; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && charCodeUnchecked(pos + 2) === CharacterCodes.dot) { + return pos += 3, token = SyntaxKind.DotDotDotToken; + } + pos++; + return token = SyntaxKind.DotToken; + case CharacterCodes.slash: + // Single-line comment + if (charCodeUnchecked(pos + 1) === CharacterCodes.slash) { + pos += 2; + + while (pos < end) { + if (isLineBreak(charCodeUnchecked(pos))) { + break; } - return pos += 2, token = SyntaxKind.AsteriskAsteriskToken; + pos++; } - pos++; - if (skipJsDocLeadingAsterisks && !asteriskSeen && (tokenFlags & TokenFlags.PrecedingLineBreak)) { - // decoration at the start of a JSDoc comment line - asteriskSeen = true; + + commentDirectives = appendIfCommentDirective( + commentDirectives, + text.slice(tokenStart, pos), + commentDirectiveRegExSingleLine, + tokenStart, + ); + + if (skipTrivia) { continue; } - return token = SyntaxKind.AsteriskToken; - case CharacterCodes.plus: - if (charCodeUnchecked(pos + 1) === CharacterCodes.plus) { - return pos += 2, token = SyntaxKind.PlusPlusToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.PlusEqualsToken; - } - pos++; - return token = SyntaxKind.PlusToken; - case CharacterCodes.minus: - if (charCodeUnchecked(pos + 1) === CharacterCodes.minus) { - return pos += 2, token = SyntaxKind.MinusMinusToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.MinusEqualsToken; - } - pos++; - return token = SyntaxKind.MinusToken; - case CharacterCodes.dot: - if (isDigit(charCodeUnchecked(pos + 1))) { - scanNumber(); - return token = SyntaxKind.NumericLiteral; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && charCodeUnchecked(pos + 2) === CharacterCodes.dot) { - return pos += 3, token = SyntaxKind.DotDotDotToken; + else { + return token = SyntaxKind.SingleLineCommentTrivia; } - pos++; - return token = SyntaxKind.DotToken; - case CharacterCodes.slash: - // Single-line comment - if (charCodeUnchecked(pos + 1) === CharacterCodes.slash) { - pos += 2; + } + // Multi-line comment + if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { + pos += 2; + const isJSDoc = charCodeUnchecked(pos) === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) !== CharacterCodes.slash; - while (pos < end) { - if (isLineBreak(charCodeUnchecked(pos))) { - break; - } - pos++; + let commentClosed = false; + let lastLineStart = tokenStart; + while (pos < end) { + const ch = charCodeUnchecked(pos); + + if (ch === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) === CharacterCodes.slash) { + pos += 2; + commentClosed = true; + break; } - commentDirectives = appendIfCommentDirective( - commentDirectives, - text.slice(tokenStart, pos), - commentDirectiveRegExSingleLine, - tokenStart, - ); + pos++; - if (skipTrivia) { - continue; - } - else { - return token = SyntaxKind.SingleLineCommentTrivia; + if (isLineBreak(ch)) { + lastLineStart = pos; + tokenFlags |= TokenFlags.PrecedingLineBreak; } } - // Multi-line comment - if (charCodeUnchecked(pos + 1) === CharacterCodes.asterisk) { - pos += 2; - const isJSDoc = charCodeUnchecked(pos) === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) !== CharacterCodes.slash; - - let commentClosed = false; - let lastLineStart = tokenStart; - while (pos < end) { - const ch = charCodeUnchecked(pos); - - if (ch === CharacterCodes.asterisk && charCodeUnchecked(pos + 1) === CharacterCodes.slash) { - pos += 2; - commentClosed = true; - break; - } - pos++; - - if (isLineBreak(ch)) { - lastLineStart = pos; - tokenFlags |= TokenFlags.PrecedingLineBreak; - } - } + if (isJSDoc && shouldParseJSDoc()) { + tokenFlags |= TokenFlags.PrecedingJSDocComment; + } - if (isJSDoc && shouldParseJSDoc()) { - tokenFlags |= TokenFlags.PrecedingJSDocComment; - } + commentDirectives = appendIfCommentDirective(commentDirectives, text.slice(lastLineStart, pos), commentDirectiveRegExMultiLine, lastLineStart); - commentDirectives = appendIfCommentDirective(commentDirectives, text.slice(lastLineStart, pos), commentDirectiveRegExMultiLine, lastLineStart); + if (!commentClosed) { + error(Diagnostics.Asterisk_Slash_expected); + } + if (skipTrivia) { + continue; + } + else { if (!commentClosed) { - error(Diagnostics.Asterisk_Slash_expected); - } - - if (skipTrivia) { - continue; - } - else { - if (!commentClosed) { - tokenFlags |= TokenFlags.Unterminated; - } - return token = SyntaxKind.MultiLineCommentTrivia; + tokenFlags |= TokenFlags.Unterminated; } + return token = SyntaxKind.MultiLineCommentTrivia; } + } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.SlashEqualsToken; - } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.SlashEqualsToken; + } - pos++; - return token = SyntaxKind.SlashToken; + pos++; + return token = SyntaxKind.SlashToken; - case CharacterCodes.lessThan: - if (isConflictMarkerTrivia(text, pos)) { - pos = scanConflictMarkerTrivia(text, pos, error); - if (skipTrivia) { - continue; - } - else { - return token = SyntaxKind.ConflictMarkerTrivia; - } + case CharacterCodes.lessThan: + if (isConflictMarkerTrivia(text, pos)) { + pos = scanConflictMarkerTrivia(text, pos, error); + if (skipTrivia) { + continue; } - - if (charCodeUnchecked(pos + 1) === CharacterCodes.lessThan) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.LessThanLessThanEqualsToken; - } - return pos += 2, token = SyntaxKind.LessThanLessThanToken; + else { + return token = SyntaxKind.ConflictMarkerTrivia; } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.LessThanEqualsToken; + } + + if (charCodeUnchecked(pos + 1) === CharacterCodes.lessThan) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.LessThanLessThanEqualsToken; } - if ( - languageVariant === LanguageVariant.JSX && - charCodeUnchecked(pos + 1) === CharacterCodes.slash && - charCodeUnchecked(pos + 2) !== CharacterCodes.asterisk - ) { - return pos += 2, token = SyntaxKind.LessThanSlashToken; + return pos += 2, token = SyntaxKind.LessThanLessThanToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.LessThanEqualsToken; + } + if ( + languageVariant === LanguageVariant.JSX && + charCodeUnchecked(pos + 1) === CharacterCodes.slash && + charCodeUnchecked(pos + 2) !== CharacterCodes.asterisk + ) { + return pos += 2, token = SyntaxKind.LessThanSlashToken; + } + pos++; + return token = SyntaxKind.LessThanToken; + case CharacterCodes.equals: + if (isConflictMarkerTrivia(text, pos)) { + pos = scanConflictMarkerTrivia(text, pos, error); + if (skipTrivia) { + continue; } - pos++; - return token = SyntaxKind.LessThanToken; - case CharacterCodes.equals: - if (isConflictMarkerTrivia(text, pos)) { - pos = scanConflictMarkerTrivia(text, pos, error); - if (skipTrivia) { - continue; - } - else { - return token = SyntaxKind.ConflictMarkerTrivia; - } + else { + return token = SyntaxKind.ConflictMarkerTrivia; } + } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.EqualsEqualsEqualsToken; - } - return pos += 2, token = SyntaxKind.EqualsEqualsToken; + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.EqualsEqualsEqualsToken; } - if (charCodeUnchecked(pos + 1) === CharacterCodes.greaterThan) { - return pos += 2, token = SyntaxKind.EqualsGreaterThanToken; + return pos += 2, token = SyntaxKind.EqualsEqualsToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.greaterThan) { + return pos += 2, token = SyntaxKind.EqualsGreaterThanToken; + } + pos++; + return token = SyntaxKind.EqualsToken; + case CharacterCodes.greaterThan: + if (isConflictMarkerTrivia(text, pos)) { + pos = scanConflictMarkerTrivia(text, pos, error); + if (skipTrivia) { + continue; } - pos++; - return token = SyntaxKind.EqualsToken; - case CharacterCodes.greaterThan: - if (isConflictMarkerTrivia(text, pos)) { - pos = scanConflictMarkerTrivia(text, pos, error); - if (skipTrivia) { - continue; - } - else { - return token = SyntaxKind.ConflictMarkerTrivia; - } + else { + return token = SyntaxKind.ConflictMarkerTrivia; } + } - pos++; - return token = SyntaxKind.GreaterThanToken; - case CharacterCodes.question: - if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && !isDigit(charCodeUnchecked(pos + 2))) { - return pos += 2, token = SyntaxKind.QuestionDotToken; + pos++; + return token = SyntaxKind.GreaterThanToken; + case CharacterCodes.question: + if (charCodeUnchecked(pos + 1) === CharacterCodes.dot && !isDigit(charCodeUnchecked(pos + 2))) { + return pos += 2, token = SyntaxKind.QuestionDotToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.question) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.QuestionQuestionEqualsToken; } - if (charCodeUnchecked(pos + 1) === CharacterCodes.question) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.QuestionQuestionEqualsToken; - } - return pos += 2, token = SyntaxKind.QuestionQuestionToken; + return pos += 2, token = SyntaxKind.QuestionQuestionToken; + } + pos++; + return token = SyntaxKind.QuestionToken; + case CharacterCodes.caret: + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.CaretEqualsToken; + } + pos++; + return token = SyntaxKind.CaretToken; + case CharacterCodes.bar: + if (isConflictMarkerTrivia(text, pos)) { + pos = scanConflictMarkerTrivia(text, pos, error); + if (skipTrivia) { + continue; } - pos++; - return token = SyntaxKind.QuestionToken; - case CharacterCodes.caret: - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.CaretEqualsToken; + else { + return token = SyntaxKind.ConflictMarkerTrivia; } - pos++; - return token = SyntaxKind.CaretToken; - case CharacterCodes.bar: - if (isConflictMarkerTrivia(text, pos)) { - pos = scanConflictMarkerTrivia(text, pos, error); + } + + if (charCodeUnchecked(pos + 1) === CharacterCodes.bar) { + if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { + return pos += 3, token = SyntaxKind.BarBarEqualsToken; + } + return pos += 2, token = SyntaxKind.BarBarToken; + } + if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { + return pos += 2, token = SyntaxKind.BarEqualsToken; + } + pos++; + return token = SyntaxKind.BarToken; + case CharacterCodes.backslash: + const extendedCookedChar = peekExtendedUnicodeEscape(); + if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) { + tokenValue = scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts(); + return token = getIdentifierToken(); + } + + const cookedChar = peekUnicodeEscape(); + if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) { + pos += 6; + tokenFlags |= TokenFlags.UnicodeEscape; + tokenValue = String.fromCharCode(cookedChar) + scanIdentifierParts(); + return token = getIdentifierToken(); + } + + error(Diagnostics.Invalid_character); + pos++; + return token = SyntaxKind.Unknown; + case CharacterCodes.hash: + const charAfterHash = codePointUnchecked(pos + 1); + + if (charAfterHash === CharacterCodes.exclamation) { + if (pos === 0) { + pos = scanShebangTrivia(text, pos); if (skipTrivia) { continue; } else { - return token = SyntaxKind.ConflictMarkerTrivia; + return token = SyntaxKind.ShebangTrivia; } } - - if (charCodeUnchecked(pos + 1) === CharacterCodes.bar) { - if (charCodeUnchecked(pos + 2) === CharacterCodes.equals) { - return pos += 3, token = SyntaxKind.BarBarEqualsToken; - } - return pos += 2, token = SyntaxKind.BarBarToken; - } - if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { - return pos += 2, token = SyntaxKind.BarEqualsToken; + else if (pos !== 0) { + error(Diagnostics.can_only_be_used_at_the_start_of_a_file, pos, 2); + pos++; + return token = SyntaxKind.Unknown; } + } + + if (charAfterHash === CharacterCodes.backslash) { pos++; - return token = SyntaxKind.BarToken; - case CharacterCodes.backslash: const extendedCookedChar = peekExtendedUnicodeEscape(); if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) { - tokenValue = scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts(); - return token = getIdentifierToken(); + tokenValue = "#" + scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts(); + return token = SyntaxKind.PrivateIdentifier; } const cookedChar = peekUnicodeEscape(); if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) { pos += 6; tokenFlags |= TokenFlags.UnicodeEscape; - tokenValue = String.fromCharCode(cookedChar) + scanIdentifierParts(); - return token = getIdentifierToken(); + tokenValue = "#" + String.fromCharCode(cookedChar) + scanIdentifierParts(); + return token = SyntaxKind.PrivateIdentifier; } + pos--; + } - error(Diagnostics.Invalid_character); + if (isIdentifierStart(charAfterHash, languageVersion)) { pos++; - return token = SyntaxKind.Unknown; - case CharacterCodes.hash: - const charAfterHash = codePointUnchecked(pos + 1); - - if (charAfterHash === CharacterCodes.exclamation) { - if (pos === 0) { - pos = scanShebangTrivia(text, pos); - if (skipTrivia) { - continue; - } - else { - return token = SyntaxKind.ShebangTrivia; - } - } - else if (pos !== 0) { - error(Diagnostics.can_only_be_used_at_the_start_of_a_file, pos, 2); - pos++; - return token = SyntaxKind.Unknown; - } - } - - if (charAfterHash === CharacterCodes.backslash) { - pos++; - const extendedCookedChar = peekExtendedUnicodeEscape(); - if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) { - tokenValue = "#" + scanExtendedUnicodeEscape(/*shouldEmitInvalidEscapeError*/ true) + scanIdentifierParts(); - return token = SyntaxKind.PrivateIdentifier; - } - - const cookedChar = peekUnicodeEscape(); - if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) { - pos += 6; - tokenFlags |= TokenFlags.UnicodeEscape; - tokenValue = "#" + String.fromCharCode(cookedChar) + scanIdentifierParts(); - return token = SyntaxKind.PrivateIdentifier; - } - pos--; - } - - if (isIdentifierStart(charAfterHash, languageVersion)) { - pos++; - // We're relying on scanIdentifier's behavior and adjusting the token kind after the fact. - // Notably absent from this block is the fact that calling a function named "scanIdentifier", - // but identifiers don't include '#', and that function doesn't deal with it at all. - // This works because 'scanIdentifier' tries to reuse source characters and builds up substrings; - // however, it starts at the 'tokenPos' which includes the '#', and will "accidentally" prepend the '#' for us. - scanIdentifier(charAfterHash, languageVersion); - } - else { - tokenValue = "#"; - error(Diagnostics.Invalid_character, pos++, charSize(ch)); - } - return token = SyntaxKind.PrivateIdentifier; - case CharacterCodes.replacementCharacter: - error(Diagnostics.File_appears_to_be_binary, 0, 0); - pos = end; - return token = SyntaxKind.NonTextFileMarkerTrivia; - } - } - - const identifierKind = scanIdentifier(ch, languageVersion); - if (identifierKind) { - return token = identifierKind; - } - else if (isWhiteSpaceSingleLine(ch)) { - pos += charSize(ch); - continue; - } - else if (isLineBreak(ch)) { - tokenFlags |= TokenFlags.PrecedingLineBreak; - pos += charSize(ch); - continue; + // We're relying on scanIdentifier's behavior and adjusting the token kind after the fact. + // Notably absent from this block is the fact that calling a function named "scanIdentifier", + // but identifiers don't include '#', and that function doesn't deal with it at all. + // This works because 'scanIdentifier' tries to reuse source characters and builds up substrings; + // however, it starts at the 'tokenPos' which includes the '#', and will "accidentally" prepend the '#' for us. + scanIdentifier(charAfterHash, languageVersion); + } + else { + tokenValue = "#"; + error(Diagnostics.Invalid_character, pos++, charSize(ch)); + } + return token = SyntaxKind.PrivateIdentifier; + case CharacterCodes.replacementCharacter: + error(Diagnostics.File_appears_to_be_binary, 0, 0); + pos = end; + return token = SyntaxKind.NonTextFileMarkerTrivia; } - const size = charSize(ch); - error(Diagnostics.Invalid_character, pos, size); - pos += size; - return token = SyntaxKind.Unknown; } } From d5898b5b80e416da725bedc653343272cccba04e Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 18:59:25 +0000 Subject: [PATCH 08/13] Use `Debug.fail` to avoid a call. --- src/compiler/scanner.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 944b3e1e21fff..8f977217e1ee9 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -2104,7 +2104,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = scanNumber(); } - Debug.assert(tokenInfo & TokenInfo.RecognizedMisc); + if (!(tokenInfo & TokenInfo.RecognizedMisc)) Debug.fail(`Unhandled token category ${tokenInfo}`); switch (ch) { case CharacterCodes.exclamation: if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { From 11a24a7c7702adb0786008ec474e565b1f48b1b6 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 19:14:36 +0000 Subject: [PATCH 09/13] `TokenInfo` -> `TokenCategory` --- src/compiler/scanner.ts | 164 ++++++++++++++++++++-------------------- 1 file changed, 82 insertions(+), 82 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 8f977217e1ee9..fea4cb33a02ad 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -301,7 +301,7 @@ const regExpFlagToFirstAvailableLanguageVersion = new Map(); +const tokenCategoryLookup: TokenCategory[] = []; +const tokenCategoryLookupUncommon = new Map(); for (let i = 0; i < CharacterCodes.maxAsciiCharacter; i++) { - charcodeToTokenInfoCommon.push(0); + tokenCategoryLookup.push(TokenCategory.None); } for ( const [key, value] of [ // Line Break Whitespace - [CharacterCodes.lineFeed, TokenInfo.LineBreak], - [CharacterCodes.carriageReturn, TokenInfo.LineBreak], + [CharacterCodes.lineFeed, TokenCategory.LineBreak], + [CharacterCodes.carriageReturn, TokenCategory.LineBreak], // Single Line Whitespace - [CharacterCodes.tab, TokenInfo.Whitespace], - [CharacterCodes.verticalTab, TokenInfo.Whitespace], - [CharacterCodes.formFeed, TokenInfo.Whitespace], - [CharacterCodes.space, TokenInfo.Whitespace], - [CharacterCodes.nonBreakingSpace, TokenInfo.Whitespace], - [CharacterCodes.ogham, TokenInfo.Whitespace], - [CharacterCodes.enQuad, TokenInfo.Whitespace], - [CharacterCodes.emQuad, TokenInfo.Whitespace], - [CharacterCodes.enSpace, TokenInfo.Whitespace], - [CharacterCodes.emSpace, TokenInfo.Whitespace], - [CharacterCodes.threePerEmSpace, TokenInfo.Whitespace], - [CharacterCodes.fourPerEmSpace, TokenInfo.Whitespace], - [CharacterCodes.sixPerEmSpace, TokenInfo.Whitespace], - [CharacterCodes.figureSpace, TokenInfo.Whitespace], - [CharacterCodes.punctuationSpace, TokenInfo.Whitespace], - [CharacterCodes.thinSpace, TokenInfo.Whitespace], - [CharacterCodes.hairSpace, TokenInfo.Whitespace], - [CharacterCodes.zeroWidthSpace, TokenInfo.Whitespace], - [CharacterCodes.narrowNoBreakSpace, TokenInfo.Whitespace], - [CharacterCodes.mathematicalSpace, TokenInfo.Whitespace], - [CharacterCodes.ideographicSpace, TokenInfo.Whitespace], - [CharacterCodes.byteOrderMark, TokenInfo.Whitespace], + [CharacterCodes.tab, TokenCategory.Whitespace], + [CharacterCodes.verticalTab, TokenCategory.Whitespace], + [CharacterCodes.formFeed, TokenCategory.Whitespace], + [CharacterCodes.space, TokenCategory.Whitespace], + [CharacterCodes.nonBreakingSpace, TokenCategory.Whitespace], + [CharacterCodes.ogham, TokenCategory.Whitespace], + [CharacterCodes.enQuad, TokenCategory.Whitespace], + [CharacterCodes.emQuad, TokenCategory.Whitespace], + [CharacterCodes.enSpace, TokenCategory.Whitespace], + [CharacterCodes.emSpace, TokenCategory.Whitespace], + [CharacterCodes.threePerEmSpace, TokenCategory.Whitespace], + [CharacterCodes.fourPerEmSpace, TokenCategory.Whitespace], + [CharacterCodes.sixPerEmSpace, TokenCategory.Whitespace], + [CharacterCodes.figureSpace, TokenCategory.Whitespace], + [CharacterCodes.punctuationSpace, TokenCategory.Whitespace], + [CharacterCodes.thinSpace, TokenCategory.Whitespace], + [CharacterCodes.hairSpace, TokenCategory.Whitespace], + [CharacterCodes.zeroWidthSpace, TokenCategory.Whitespace], + [CharacterCodes.narrowNoBreakSpace, TokenCategory.Whitespace], + [CharacterCodes.mathematicalSpace, TokenCategory.Whitespace], + [CharacterCodes.ideographicSpace, TokenCategory.Whitespace], + [CharacterCodes.byteOrderMark, TokenCategory.Whitespace], // Simple Single-Character Tokens - [CharacterCodes.openParen, TokenInfo.SimpleToken | SyntaxKind.OpenParenToken], - [CharacterCodes.closeParen, TokenInfo.SimpleToken | SyntaxKind.CloseParenToken], - [CharacterCodes.comma, TokenInfo.SimpleToken | SyntaxKind.CommaToken], - [CharacterCodes.colon, TokenInfo.SimpleToken | SyntaxKind.ColonToken], - [CharacterCodes.semicolon, TokenInfo.SimpleToken | SyntaxKind.SemicolonToken], - [CharacterCodes.openBracket, TokenInfo.SimpleToken | SyntaxKind.OpenBracketToken], - [CharacterCodes.closeBracket, TokenInfo.SimpleToken | SyntaxKind.CloseBracketToken], - [CharacterCodes.openBrace, TokenInfo.SimpleToken | SyntaxKind.OpenBraceToken], - [CharacterCodes.closeBrace, TokenInfo.SimpleToken | SyntaxKind.CloseBraceToken], - [CharacterCodes.tilde, TokenInfo.SimpleToken | SyntaxKind.TildeToken], - [CharacterCodes.at, TokenInfo.SimpleToken | SyntaxKind.AtToken], + [CharacterCodes.openParen, TokenCategory.SimpleToken | SyntaxKind.OpenParenToken], + [CharacterCodes.closeParen, TokenCategory.SimpleToken | SyntaxKind.CloseParenToken], + [CharacterCodes.comma, TokenCategory.SimpleToken | SyntaxKind.CommaToken], + [CharacterCodes.colon, TokenCategory.SimpleToken | SyntaxKind.ColonToken], + [CharacterCodes.semicolon, TokenCategory.SimpleToken | SyntaxKind.SemicolonToken], + [CharacterCodes.openBracket, TokenCategory.SimpleToken | SyntaxKind.OpenBracketToken], + [CharacterCodes.closeBracket, TokenCategory.SimpleToken | SyntaxKind.CloseBracketToken], + [CharacterCodes.openBrace, TokenCategory.SimpleToken | SyntaxKind.OpenBraceToken], + [CharacterCodes.closeBrace, TokenCategory.SimpleToken | SyntaxKind.CloseBraceToken], + [CharacterCodes.tilde, TokenCategory.SimpleToken | SyntaxKind.TildeToken], + [CharacterCodes.at, TokenCategory.SimpleToken | SyntaxKind.AtToken], // Digits - [CharacterCodes._0, TokenInfo.Digit], - [CharacterCodes._1, TokenInfo.Digit], - [CharacterCodes._2, TokenInfo.Digit], - [CharacterCodes._3, TokenInfo.Digit], - [CharacterCodes._4, TokenInfo.Digit], - [CharacterCodes._5, TokenInfo.Digit], - [CharacterCodes._6, TokenInfo.Digit], - [CharacterCodes._7, TokenInfo.Digit], - [CharacterCodes._8, TokenInfo.Digit], - [CharacterCodes._9, TokenInfo.Digit], - - [CharacterCodes.exclamation, TokenInfo.RecognizedMisc], - [CharacterCodes.doubleQuote, TokenInfo.RecognizedMisc], - [CharacterCodes.singleQuote, TokenInfo.RecognizedMisc], - [CharacterCodes.backtick, TokenInfo.RecognizedMisc], - [CharacterCodes.percent, TokenInfo.RecognizedMisc], - [CharacterCodes.ampersand, TokenInfo.RecognizedMisc], - [CharacterCodes.asterisk, TokenInfo.RecognizedMisc], - [CharacterCodes.plus, TokenInfo.RecognizedMisc], - [CharacterCodes.minus, TokenInfo.RecognizedMisc], - [CharacterCodes.dot, TokenInfo.RecognizedMisc], - [CharacterCodes.slash, TokenInfo.RecognizedMisc], - [CharacterCodes.lessThan, TokenInfo.RecognizedMisc], - [CharacterCodes.equals, TokenInfo.RecognizedMisc], - [CharacterCodes.greaterThan, TokenInfo.RecognizedMisc], - [CharacterCodes.question, TokenInfo.RecognizedMisc], - [CharacterCodes.caret, TokenInfo.RecognizedMisc], - [CharacterCodes.bar, TokenInfo.RecognizedMisc], - [CharacterCodes.backslash, TokenInfo.RecognizedMisc], - [CharacterCodes.hash, TokenInfo.RecognizedMisc], - [CharacterCodes.replacementCharacter, TokenInfo.RecognizedMisc], + [CharacterCodes._0, TokenCategory.Digit], + [CharacterCodes._1, TokenCategory.Digit], + [CharacterCodes._2, TokenCategory.Digit], + [CharacterCodes._3, TokenCategory.Digit], + [CharacterCodes._4, TokenCategory.Digit], + [CharacterCodes._5, TokenCategory.Digit], + [CharacterCodes._6, TokenCategory.Digit], + [CharacterCodes._7, TokenCategory.Digit], + [CharacterCodes._8, TokenCategory.Digit], + [CharacterCodes._9, TokenCategory.Digit], + + [CharacterCodes.exclamation, TokenCategory.RecognizedMisc], + [CharacterCodes.doubleQuote, TokenCategory.RecognizedMisc], + [CharacterCodes.singleQuote, TokenCategory.RecognizedMisc], + [CharacterCodes.backtick, TokenCategory.RecognizedMisc], + [CharacterCodes.percent, TokenCategory.RecognizedMisc], + [CharacterCodes.ampersand, TokenCategory.RecognizedMisc], + [CharacterCodes.asterisk, TokenCategory.RecognizedMisc], + [CharacterCodes.plus, TokenCategory.RecognizedMisc], + [CharacterCodes.minus, TokenCategory.RecognizedMisc], + [CharacterCodes.dot, TokenCategory.RecognizedMisc], + [CharacterCodes.slash, TokenCategory.RecognizedMisc], + [CharacterCodes.lessThan, TokenCategory.RecognizedMisc], + [CharacterCodes.equals, TokenCategory.RecognizedMisc], + [CharacterCodes.greaterThan, TokenCategory.RecognizedMisc], + [CharacterCodes.question, TokenCategory.RecognizedMisc], + [CharacterCodes.caret, TokenCategory.RecognizedMisc], + [CharacterCodes.bar, TokenCategory.RecognizedMisc], + [CharacterCodes.backslash, TokenCategory.RecognizedMisc], + [CharacterCodes.hash, TokenCategory.RecognizedMisc], + [CharacterCodes.replacementCharacter, TokenCategory.RecognizedMisc], ] ) { - if (key < charcodeToTokenInfoCommon.length) { - charcodeToTokenInfoCommon[key] = value; + if (key < tokenCategoryLookup.length) { + tokenCategoryLookup[key] = value; } else { - charcodeToTokenInfoUncommon.set(key, value); + tokenCategoryLookupUncommon.set(key, value); } } @@ -2022,11 +2022,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - const tokenInfo = ch < charcodeToTokenInfoCommon.length ? - charcodeToTokenInfoCommon[ch] : - charcodeToTokenInfoUncommon.get(ch) ?? TokenInfo.None; + const tokenInfo = ch < tokenCategoryLookup.length ? + tokenCategoryLookup[ch] : + tokenCategoryLookupUncommon.get(ch) ?? TokenCategory.None; - if (tokenInfo === TokenInfo.None) { + if (tokenInfo === TokenCategory.None) { const identifierKind = scanIdentifier(ch, languageVersion); if (identifierKind) { return token = identifierKind; @@ -2046,7 +2046,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.Unknown; } - if (tokenInfo & TokenInfo.Whitespace) { + if (tokenInfo & TokenCategory.Whitespace) { if (skipTrivia) { pos++; continue; @@ -2059,12 +2059,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (tokenInfo & TokenInfo.SimpleToken) { + if (tokenInfo & TokenCategory.SimpleToken) { pos++; - return token = tokenInfo & TokenInfo.SimpleTokenMask; + return token = tokenInfo & TokenCategory.SimpleTokenMask; } - if (tokenInfo & TokenInfo.Digit) { + if (tokenInfo & TokenCategory.Digit) { if (ch === CharacterCodes._0) { if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { pos += 2; @@ -2104,7 +2104,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = scanNumber(); } - if (!(tokenInfo & TokenInfo.RecognizedMisc)) Debug.fail(`Unhandled token category ${tokenInfo}`); + if (!(tokenInfo & TokenCategory.RecognizedMisc)) Debug.fail(`Unhandled token category ${tokenInfo}`); switch (ch) { case CharacterCodes.exclamation: if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { From 9e5e29fd33eef907a7a9d341e1ce511ec817b296 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 19:17:39 +0000 Subject: [PATCH 10/13] More rename. --- src/compiler/scanner.ts | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index fea4cb33a02ad..44e721c824f04 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -329,6 +329,8 @@ for ( // Line Break Whitespace [CharacterCodes.lineFeed, TokenCategory.LineBreak], [CharacterCodes.carriageReturn, TokenCategory.LineBreak], + [CharacterCodes.lineSeparator, TokenCategory.LineBreak], + [CharacterCodes.paragraphSeparator, TokenCategory.LineBreak], // Single Line Whitespace [CharacterCodes.tab, TokenCategory.Whitespace], @@ -2022,11 +2024,11 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - const tokenInfo = ch < tokenCategoryLookup.length ? + const tokenCategory = ch < tokenCategoryLookup.length ? tokenCategoryLookup[ch] : tokenCategoryLookupUncommon.get(ch) ?? TokenCategory.None; - if (tokenInfo === TokenCategory.None) { + if (tokenCategory === TokenCategory.None) { const identifierKind = scanIdentifier(ch, languageVersion); if (identifierKind) { return token = identifierKind; @@ -2046,7 +2048,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = SyntaxKind.Unknown; } - if (tokenInfo & TokenCategory.Whitespace) { + if (tokenCategory & TokenCategory.Whitespace) { if (skipTrivia) { pos++; continue; @@ -2059,12 +2061,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } - if (tokenInfo & TokenCategory.SimpleToken) { + if (tokenCategory & TokenCategory.SimpleToken) { pos++; - return token = tokenInfo & TokenCategory.SimpleTokenMask; + return token = tokenCategory & TokenCategory.SimpleTokenMask; } - if (tokenInfo & TokenCategory.Digit) { + if (tokenCategory & TokenCategory.Digit) { if (ch === CharacterCodes._0) { if (pos + 2 < end && (charCodeUnchecked(pos + 1) === CharacterCodes.X || charCodeUnchecked(pos + 1) === CharacterCodes.x)) { pos += 2; @@ -2104,7 +2106,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean return token = scanNumber(); } - if (!(tokenInfo & TokenCategory.RecognizedMisc)) Debug.fail(`Unhandled token category ${tokenInfo}`); + if (!(tokenCategory & TokenCategory.RecognizedMisc)) Debug.fail(`Unhandled token category ${tokenCategory}`); switch (ch) { case CharacterCodes.exclamation: if (charCodeUnchecked(pos + 1) === CharacterCodes.equals) { From db10caef8034a786f29e341831b73a14ec0776bb Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 19:27:33 +0000 Subject: [PATCH 11/13] Add missing `nextLine` entry to whitespace, remove unnecessary branches. --- src/compiler/scanner.ts | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 44e721c824f04..ca41554256a03 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -333,11 +333,12 @@ for ( [CharacterCodes.paragraphSeparator, TokenCategory.LineBreak], // Single Line Whitespace + [CharacterCodes.space, TokenCategory.Whitespace], [CharacterCodes.tab, TokenCategory.Whitespace], [CharacterCodes.verticalTab, TokenCategory.Whitespace], [CharacterCodes.formFeed, TokenCategory.Whitespace], - [CharacterCodes.space, TokenCategory.Whitespace], [CharacterCodes.nonBreakingSpace, TokenCategory.Whitespace], + [CharacterCodes.nextLine, TokenCategory.Whitespace], [CharacterCodes.ogham, TokenCategory.Whitespace], [CharacterCodes.enQuad, TokenCategory.Whitespace], [CharacterCodes.emQuad, TokenCategory.Whitespace], @@ -2033,15 +2034,6 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean if (identifierKind) { return token = identifierKind; } - else if (isWhiteSpaceSingleLine(ch)) { - pos += charSize(ch); - continue; - } - else if (isLineBreak(ch)) { - tokenFlags |= TokenFlags.PrecedingLineBreak; - pos += charSize(ch); - continue; - } const size = charSize(ch); error(Diagnostics.Invalid_character, pos, size); pos += size; @@ -2061,6 +2053,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } } + if (tokenCategory & TokenCategory.LineBreak) { + tokenFlags |= TokenFlags.PrecedingLineBreak; + pos += charSize(ch); + continue; + } + if (tokenCategory & TokenCategory.SimpleToken) { pos++; return token = tokenCategory & TokenCategory.SimpleTokenMask; From 3bdfe84fd95db9ec062e31ee352b62bef5380571 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Tue, 2 Jul 2024 19:29:49 +0000 Subject: [PATCH 12/13] Rename `None` to `IdentifierOrUnknown` --- src/compiler/scanner.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index ca41554256a03..9eddd645de424 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -302,9 +302,12 @@ const regExpFlagToFirstAvailableLanguageVersion = new Map(); for (let i = 0; i < CharacterCodes.maxAsciiCharacter; i++) { - tokenCategoryLookup.push(TokenCategory.None); + tokenCategoryLookup.push(TokenCategory.IdentifierOrUnknown); } for ( @@ -2027,9 +2030,9 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const tokenCategory = ch < tokenCategoryLookup.length ? tokenCategoryLookup[ch] : - tokenCategoryLookupUncommon.get(ch) ?? TokenCategory.None; + tokenCategoryLookupUncommon.get(ch) ?? TokenCategory.IdentifierOrUnknown; - if (tokenCategory === TokenCategory.None) { + if (tokenCategory === TokenCategory.IdentifierOrUnknown) { const identifierKind = scanIdentifier(ch, languageVersion); if (identifierKind) { return token = identifierKind; From 33ae58db20882936d9f31b2c04eab6a0f5a75442 Mon Sep 17 00:00:00 2001 From: Daniel Rosenwasser Date: Wed, 3 Jul 2024 19:57:03 +0000 Subject: [PATCH 13/13] Hit the table once for whitespace, then tight-loop. --- src/compiler/scanner.ts | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index 9eddd645de424..ba841ff73811e 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -1997,19 +1997,6 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean const ch = codePointUnchecked(pos); - if (ch === CharacterCodes.tab || ch === CharacterCodes.space) { - if (skipTrivia) { - pos++; - continue; - } - else { - while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { - pos++; - } - return token = SyntaxKind.WhitespaceTrivia; - } - } - if (ch === CharacterCodes.lineFeed || ch === CharacterCodes.carriageReturn) { tokenFlags |= TokenFlags.PrecedingLineBreak; if (skipTrivia) { @@ -2044,14 +2031,23 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean } if (tokenCategory & TokenCategory.Whitespace) { + pos++; + // Tight loop here on consecutive whitespace to avoid a table lookup. + while (pos < end) { + const nextCh = charCodeUnchecked(pos); + // Check for the original character to hitting the slow path. + if (nextCh === ch || isWhiteSpaceSingleLine(nextCh)) { + pos++; + continue; + } + + break; + } + if (skipTrivia) { - pos++; continue; } else { - while (pos < end && isWhiteSpaceSingleLine(charCodeUnchecked(pos))) { - pos++; - } return token = SyntaxKind.WhitespaceTrivia; } }