Skip to content

Commit

Permalink
Merge pull request #145979 from Lazyuki/fix-maxTokenizationLineLength…
Browse files Browse the repository at this point in the history
…-for-monaco
  • Loading branch information
alexdima authored May 30, 2022
2 parents 1c2ba17 + 61347c8 commit acb156d
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 13 deletions.
5 changes: 3 additions & 2 deletions src/vs/editor/standalone/browser/standaloneLanguages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneT
import { IMarkerData, IMarkerService } from 'vs/platform/markers/common/markers';
import { ILanguageFeaturesService } from 'vs/editor/common/services/languageFeatures';
import { LanguageSelector } from 'vs/editor/common/languageSelector';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';

/**
* Register information about a new language.
Expand Down Expand Up @@ -374,7 +375,7 @@ export function registerTokensProviderFactory(languageId: string, factory: Token
if (isATokensProvider(result)) {
return createTokenizationSupportAdapter(languageId, result);
}
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result));
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result), StandaloneServices.get(IConfigurationService));
}
};
return languages.TokenizationRegistry.registerFactory(languageId, adaptedFactory);
Expand Down Expand Up @@ -405,7 +406,7 @@ export function setTokensProvider(languageId: string, provider: TokensProvider |
*/
export function setMonarchTokensProvider(languageId: string, languageDef: IMonarchLanguage | Thenable<IMonarchLanguage>): IDisposable {
const create = (languageDef: IMonarchLanguage) => {
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef));
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef), StandaloneServices.get(IConfigurationService));
};
if (isThenable<IMonarchLanguage>(languageDef)) {
return registerTokensProviderFactory(languageId, { create: () => languageDef });
Expand Down
22 changes: 20 additions & 2 deletions src/vs/editor/standalone/common/monarch/monarchLexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@

import { IDisposable } from 'vs/base/common/lifecycle';
import * as languages from 'vs/editor/common/languages';
import { NullState } from 'vs/editor/common/languages/nullTokenize';
import { NullState, nullTokenizeEncoded, nullTokenize } from 'vs/editor/common/languages/nullTokenize';
import { TokenTheme } from 'vs/editor/common/languages/supports/tokenization';
import { ILanguageService } from 'vs/editor/common/languages/language';
import * as monarchCommon from 'vs/editor/standalone/common/monarch/monarchCommon';
import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneTheme';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { LanguageId } from 'vs/editor/common/encodedTokenAttributes';

const CACHE_STACK_DEPTH = 5;
Expand Down Expand Up @@ -395,8 +396,9 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
private readonly _embeddedLanguages: { [languageId: string]: boolean };
public embeddedLoaded: Promise<void>;
private readonly _tokenizationRegistryListener: IDisposable;
private _maxTokenizationLineLength: number;

constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer) {
constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) {
this._languageService = languageService;
this._standaloneThemeService = standaloneThemeService;
this._languageId = languageId;
Expand Down Expand Up @@ -424,6 +426,16 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
emitting = false;
}
});
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
overrideIdentifier: this._languageId
});
this._configurationService.onDidChangeConfiguration(e => {
if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
overrideIdentifier: this._languageId
});
}
});
}

public dispose(): void {
Expand Down Expand Up @@ -468,12 +480,18 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
}

public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult {
if (line.length >= this._maxTokenizationLineLength) {
return nullTokenize(this._languageId, lineState);
}
const tokensCollector = new MonarchClassicTokensCollector();
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
return tokensCollector.finalize(endLineState);
}

public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult {
if (line.length >= this._maxTokenizationLineLength) {
return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);
}
const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
return tokensCollector.finalize(endLineState);
Expand Down
59 changes: 50 additions & 9 deletions src/vs/editor/standalone/test/browser/monarch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,13 @@ import { compile } from 'vs/editor/standalone/common/monarch/monarchCompile';
import { Token, TokenizationRegistry } from 'vs/editor/common/languages';
import { IMonarchLanguage } from 'vs/editor/standalone/common/monarch/monarchTypes';
import { DisposableStore } from 'vs/base/common/lifecycle';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { StandaloneConfigurationService } from 'vs/editor/standalone/browser/standaloneServices';

suite('Monarch', () => {

function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage): MonarchTokenizer {
return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language));
function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage, configurationService: IConfigurationService): MonarchTokenizer {
return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language), configurationService);
}

function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {
Expand All @@ -32,14 +34,15 @@ suite('Monarch', () => {
test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {
const disposables = new DisposableStore();
const languageService = disposables.add(new LanguageService());
const configurationService = new StandaloneConfigurationService();
disposables.add(languageService.registerLanguage({ id: 'sql' }));
disposables.add(TokenizationRegistry.register('sql', createMonarchTokenizer(languageService, 'sql', {
tokenizer: {
root: [
[/./, 'token']
]
}
})));
}, configurationService)));
const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)';
const tokenizer = createMonarchTokenizer(languageService, 'test1', {
tokenizer: {
Expand All @@ -63,7 +66,7 @@ suite('Monarch', () => {
],
endStringWithSQL: [[/"""/, { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', },]],
}
});
}, configurationService);

const lines = [
`mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`,
Expand Down Expand Up @@ -106,6 +109,7 @@ suite('Monarch', () => {
});

test('microsoft/monaco-editor#1235: Empty Line Handling', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', {
tokenizer: {
Expand All @@ -125,7 +129,7 @@ suite('Monarch', () => {
// No possible rule to detect an empty line and @pop?
],
},
});
}, configurationService);

const lines = [
`// This comment \\`,
Expand Down Expand Up @@ -163,6 +167,7 @@ suite('Monarch', () => {
});

test('microsoft/monaco-editor#2265: Exit a state at end of line', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', {
includeLF: true,
Expand All @@ -179,7 +184,7 @@ suite('Monarch', () => {
[/[^\d]+/, '']
]
}
});
}, configurationService);

const lines = [
`PRINT 10 * 20`,
Expand Down Expand Up @@ -211,6 +216,7 @@ suite('Monarch', () => {
});

test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();

const tokenizer1 = createMonarchTokenizer(languageService, 'test', {
Expand All @@ -230,7 +236,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);

const tokenizer2 = createMonarchTokenizer(languageService, 'test', {
ignoreCase: false,
Expand All @@ -242,7 +248,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);

const lines = [
`@ham`
Expand All @@ -265,6 +271,7 @@ suite('Monarch', () => {
});

test('microsoft/monaco-editor#2424: Allow to target @@', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();

const tokenizer = createMonarchTokenizer(languageService, 'test', {
Expand All @@ -277,7 +284,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);

const lines = [
`@@`
Expand All @@ -292,4 +299,38 @@ suite('Monarch', () => {
languageService.dispose();
});

test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', async () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();

// Set maxTokenizationLineLength to 4 so that "ham" works but "hamham" would fail
await configurationService.updateValue('editor.maxTokenizationLineLength', 4);

const tokenizer = createMonarchTokenizer(languageService, 'test', {
tokenizer: {
root: [
{
regex: /ham/,
action: { token: 'ham' }
},
],
},
}, configurationService);

const lines = [
'ham', // length 3, should be tokenized
'hamham' // length 6, should NOT be tokenized
];

const actualTokens = getTokens(tokenizer, lines);
assert.deepStrictEqual(actualTokens, [
[
new Token(0, 'ham.test', 'test'),
], [
new Token(0, '', 'test')
]
]);
languageService.dispose();
});

});

0 comments on commit acb156d

Please sign in to comment.