From aafa2e3acdcc88ec87090e5cc4dca5a256eb4e09 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Fri, 20 Oct 2023 22:22:07 +0200 Subject: [PATCH] feat: value converter for strings & ints (#655) ### Summary of Changes Add conversion for ints and strings, including proper handling of escape sequences. --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com> --- .../language/pipeline-language/expressions.md | 24 +- esbuild.mjs | 2 +- language-configuration.json | 2 +- .../grammar/safe-ds-value-converter.ts | 67 ++++- src/language/grammar/safe-ds.langium | 4 +- .../safe-ds-partial-evaluator.ts | 4 +- .../other/expressions/infixOperations.ts | 2 +- syntaxes/safe-ds.tmLanguage.json | 2 +- .../grammar/safe-ds-value-converter.test.ts | 262 +++++++++++++----- tsconfig.json | 2 +- 10 files changed, 275 insertions(+), 96 deletions(-) diff --git a/docs/language/pipeline-language/expressions.md b/docs/language/pipeline-language/expressions.md index 9189c76c0..2f0ac584d 100644 --- a/docs/language/pipeline-language/expressions.md +++ b/docs/language/pipeline-language/expressions.md @@ -23,16 +23,18 @@ String literals describe text. Their syntax is simply text enclosed by double qu | Escape sequence | Meaning | |-----------------|----------------------------------------------------------------------| -| `#!sds \b` | Backspace | -| `#!sds \t` | Tab | -| `#!sds \n` | New line | -| `#!sds \f` | Form feed | -| `#!sds \r` | Carriage return | -| `#!sds \"` | Double quote | -| `#!sds \'` | Single quote | -| `#!sds \\` | Backslash | -| `#!sds \{` | Opening curly brace (used for [template strings](#template-strings)) | -| `#!sds \uXXXX` | Unicode character, where `#!sds XXXX` is its hexadecimal index | +| `\b` | Backspace | +| `\f` | Form feed | +| `\n` | New line | +| `\r` | Carriage return | +| `\t` | Tab | +| `\v` | Vertical tab | +| `\0` | Null character | +| `\'` | Single quote | +| `\"` | Double quote | +| `\{` | Opening curly brace (used for [template strings](#template-strings)) | +| `\\` | Backslash | +| `\uXXXX` | Unicode character, where `XXXX` is its hexadecimal code | String literals can contain also contain raw line breaks: @@ -134,7 +136,7 @@ nullableExpression ?: 42 The syntax for template strings is similar to [string literals](#string-literals): They are also delimited by double quotes, the text can contain escape sequences, and raw newlines can be inserted. The additional syntax are _template expressions_, which are any expression enclosed by `#!sds {{` and `#!sds }}`. There must be no space between the curly braces. -These template expressions are evaluated, converted to a string and inserted into the template string at their position. The template string in the example above is, hence, equivalent to the [string literal](#string-literals) "1 + 2 = 3". +These template expressions are evaluated, converted to a string and inserted into the template string at their position. The template string in the example above is, hence, equivalent to the [string literal](#string-literals) `#!sds "1 + 2 = 3"`. ## References diff --git a/esbuild.mjs b/esbuild.mjs index 6ef0c7de9..50c6880a8 100644 --- a/esbuild.mjs +++ b/esbuild.mjs @@ -50,7 +50,7 @@ const ctx = await esbuild.context({ entryPoints: ['src/cli/main.ts', 'src/extension/main.ts', 'src/language/main.ts'], outdir: 'out', bundle: true, - target: 'ES2017', + target: 'ES2020', // VSCode's extension host is still using cjs, so we need to transform the code format: 'cjs', // To prevent confusing node, we explicitly use the `.cjs` extension diff --git a/language-configuration.json b/language-configuration.json index ca1183f5e..350fc8ad1 100644 --- a/language-configuration.json +++ b/language-configuration.json @@ -16,7 +16,7 @@ { "open": "\"", "close": "\"", "notIn": ["string", "comment"] }, { "open": "`", "close": "`", "notIn": ["string", "comment"] }, { "open": "»", "close": "«", "notIn": ["string", "comment"] }, - { "open": "/*", "close": " */", "notIn": ["string"] } + { "open": "/*", "close": " */", "notIn": ["string", "comment"] } ], "surroundingPairs": [ ["(", ")"], diff --git a/src/language/grammar/safe-ds-value-converter.ts b/src/language/grammar/safe-ds-value-converter.ts index bc61170e6..01f4ce423 100644 --- a/src/language/grammar/safe-ds-value-converter.ts +++ b/src/language/grammar/safe-ds-value-converter.ts @@ -1,18 +1,77 @@ -import { convertString, CstNode, DefaultValueConverter, GrammarAST, ValueType } from 'langium'; +import { convertBigint, CstNode, DefaultValueConverter, GrammarAST, ValueType } from 'langium'; export class SafeDsValueConverter extends DefaultValueConverter { protected override runConverter(rule: GrammarAST.AbstractRule, input: string, cstNode: CstNode): ValueType { switch (rule.name.toUpperCase()) { case 'ID': return input.replaceAll('`', ''); + case 'INT': + return convertBigint(input); + case 'STRING': + return convertString(input, 1, 1); case 'TEMPLATE_STRING_START': - return convertString(input.substring(0, input.length - 1)); + return convertString(input, 1, 2); case 'TEMPLATE_STRING_INNER': - return convertString(input.substring(1, input.length - 1)); + return convertString(input, 2, 2); case 'TEMPLATE_STRING_END': - return convertString(input.substring(1)); + return convertString(input, 2, 1); default: return super.runConverter(rule, input, cstNode); } } } + +const convertString = (input: string, openingDelimiterLength: number, closingDelimiterLength: number): string => { + let result = ''; + const endIndex = input.length - 1 - closingDelimiterLength; + + for (let i = openingDelimiterLength; i <= endIndex; i++) { + const current = input.charAt(i); + if (current === '\\' && i < endIndex) { + const [stringToAdd, newIndex] = handleEscapeSequence(input, i + 1, endIndex); + result += stringToAdd; + i = newIndex - 1; // -1 because the loop will increment it + } else { + result += current; + } + } + + return result; +}; + +/** + * Handle an escape sequence. + * + * @param input The entire input string. + * @param index The index of the escape sequence (after the slash). + * @param endIndex The index of the last character of the input string, excluding delimiters. + * @returns An array containing the string to add to the result and the new index. + */ +const handleEscapeSequence = (input: string, index: number, endIndex: number): [string, number] => { + const current = input.charAt(index); + switch (current) { + case 'b': + return ['\b', index + 1]; + case 'f': + return ['\f', index + 1]; + case 'n': + return ['\n', index + 1]; + case 'r': + return ['\r', index + 1]; + case 't': + return ['\t', index + 1]; + case 'v': + return ['\v', index + 1]; + case '0': + return ['\0', index + 1]; + } + + if (current === 'u' && index + 4 <= endIndex) { + const code = input.substring(index + 1, index + 5); + if (code.match(/[0-9a-fA-F]{4}/gu)) { + return [String.fromCharCode(parseInt(code, 16)), index + 5]; + } + } + + return [current, index + 1]; +}; diff --git a/src/language/grammar/safe-ds.langium b/src/language/grammar/safe-ds.langium index 291c631c3..22219846b 100644 --- a/src/language/grammar/safe-ds.langium +++ b/src/language/grammar/safe-ds.langium @@ -766,7 +766,7 @@ SdsFloat returns SdsFloat: ; interface SdsInt extends SdsNumber { - value: number + value: bigint } SdsInt returns SdsInt: @@ -1069,7 +1069,7 @@ terminal FLOAT returns number | DECIMAL_DIGIT+ FLOAT_EXPONENT; terminal fragment DECIMAL_DIGIT: /[0-9]/; terminal fragment FLOAT_EXPONENT: ('e' | 'E' )('+' | '-' )? DECIMAL_DIGIT+; -terminal INT returns number: DECIMAL_DIGIT+; +terminal INT returns bigint: DECIMAL_DIGIT+; terminal STRING returns string: STRING_START STRING_TEXT* STRING_END; terminal fragment STRING_START: STRING_DELIMITER; terminal fragment STRING_END: '{'? STRING_DELIMITER; diff --git a/src/language/partialEvaluation/safe-ds-partial-evaluator.ts b/src/language/partialEvaluation/safe-ds-partial-evaluator.ts index ce3142b4a..7172d841a 100644 --- a/src/language/partialEvaluation/safe-ds-partial-evaluator.ts +++ b/src/language/partialEvaluation/safe-ds-partial-evaluator.ts @@ -100,7 +100,7 @@ export class SafeDsPartialEvaluator { } else if (isSdsFloat(node)) { return new FloatConstant(node.value); } else if (isSdsInt(node)) { - return new IntConstant(BigInt(node.value)); + return new IntConstant(node.value); } else if (isSdsNull(node)) { return NullConstant; } else if (isSdsString(node)) { @@ -520,4 +520,4 @@ export class SafeDsPartialEvaluator { } const NO_SUBSTITUTIONS: ParameterSubstitutions = new Map(); -const zeroes = [new IntConstant(BigInt(0)), new FloatConstant(0.0), new FloatConstant(-0.0)]; +const zeroes = [new IntConstant(0n), new FloatConstant(0.0), new FloatConstant(-0.0)]; diff --git a/src/language/validation/other/expressions/infixOperations.ts b/src/language/validation/other/expressions/infixOperations.ts index a711aa77a..66ae0ea0e 100644 --- a/src/language/validation/other/expressions/infixOperations.ts +++ b/src/language/validation/other/expressions/infixOperations.ts @@ -10,7 +10,7 @@ export const divisionDivisorMustNotBeZero = (services: SafeDsServices) => { const partialEvaluator = services.evaluation.PartialEvaluator; const typeComputer = services.types.TypeComputer; - const zeroInt = new IntConstant(BigInt(0)); + const zeroInt = new IntConstant(0n); const zeroFloat = new FloatConstant(0.0); const minusZeroFloat = new FloatConstant(-0.0); diff --git a/syntaxes/safe-ds.tmLanguage.json b/syntaxes/safe-ds.tmLanguage.json index 5a5980c5c..d6cfc2e6c 100644 --- a/syntaxes/safe-ds.tmLanguage.json +++ b/syntaxes/safe-ds.tmLanguage.json @@ -92,7 +92,7 @@ }, "string-character-escape": { "name": "constant.character.escape.safe-ds", - "match": "\\\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|u\\{[0-9A-Fa-f]+\\}|[0-2][0-7]{0,2}|3[0-6][0-7]?|37[0-7]?|[4-7][0-7]?|.|$)" + "match": "\\\\(b|f|n|r|t|v|0|'|\"|{|\\\\|u[0-9a-fA-F]{4})" } } } diff --git a/tests/language/grammar/safe-ds-value-converter.test.ts b/tests/language/grammar/safe-ds-value-converter.test.ts index 4b3955175..9cc87e7c5 100644 --- a/tests/language/grammar/safe-ds-value-converter.test.ts +++ b/tests/language/grammar/safe-ds-value-converter.test.ts @@ -4,7 +4,9 @@ import { EmptyFileSystem } from 'langium'; import { getNodeOfType } from '../../helpers/nodeFinder.js'; import { isSdsClass, + isSdsInt, isSdsModule, + isSdsString, isSdsTemplateStringEnd, isSdsTemplateStringInner, isSdsTemplateStringStart, @@ -13,87 +15,203 @@ import { const services = createSafeDsServices(EmptyFileSystem).SafeDs; describe('runConverter', () => { - it('should remove backticks from IDs (package)', async () => { - const code = ` - package \`foo\`.bar - `; - - const module = await getNodeOfType(services, code, isSdsModule); - expect(module.name).toBe('foo.bar'); - }); - - it('should remove backticks from IDs (declaration)', async () => { - const code = ` - class \`MyClass\` - `; - - const firstClass = await getNodeOfType(services, code, isSdsClass); - expect(firstClass.name).toBe('MyClass'); - }); - - it('should remove delimiters from TEMPLATE_STRING_STARTs', async () => { - const code = ` - pipeline myPipeline { - "start{{ 1 }}inner{{ 2 }}end"; - } - `; - - const firstTemplateStringStart = await getNodeOfType(services, code, isSdsTemplateStringStart); - expect(firstTemplateStringStart.value).toBe('start'); + describe('ID', () => { + it('should remove backticks (package)', async () => { + const code = ` + package \`foo\`.bar + `; + + const module = await getNodeOfType(services, code, isSdsModule); + expect(module.name).toBe('foo.bar'); + }); + + it('should remove backticks (declaration)', async () => { + const code = ` + class \`MyClass\` + `; + + const firstClass = await getNodeOfType(services, code, isSdsClass); + expect(firstClass.name).toBe('MyClass'); + }); }); - it('should handle escape sequences in TEMPLATE_STRING_STARTs', async () => { - const code = ` - pipeline myPipeline { - "\\tstart{{ 1 }}inner{{ 2 }}end"; - } - `; - - const firstTemplateStringStart = await getNodeOfType(services, code, isSdsTemplateStringStart); - expect(firstTemplateStringStart.value).toBe('\tstart'); + describe('INT', () => { + it('should return a bigint', async () => { + const code = ` + pipeline myPipeline { + 123; + } + `; + + const firstInt = await getNodeOfType(services, code, isSdsInt); + expect(firstInt.value).toBe(123n); + }); }); - it('should remove delimiters from TEMPLATE_STRING_INNERs', async () => { - const code = ` - pipeline myPipeline { - "start{{ 1 }}inner{{ 2 }}end"; - } - `; - - const firstTemplateStringInner = await getNodeOfType(services, code, isSdsTemplateStringInner); - expect(firstTemplateStringInner.value).toBe('inner'); + const escapeSequences = [ + { + escaped: '\\b', + unescaped: '\b', + }, + { + escaped: '\\f', + unescaped: '\f', + }, + { + escaped: '\\n', + unescaped: '\n', + }, + { + escaped: '\\r', + unescaped: '\r', + }, + { + escaped: '\\t', + unescaped: '\t', + }, + { + escaped: '\\v', + unescaped: '\v', + }, + { + escaped: '\\0', + unescaped: '\0', + }, + { + escaped: "\\'", + unescaped: "'", + }, + { + escaped: '\\"', + unescaped: '"', + }, + { + escaped: '\\{', + unescaped: '{', + }, + { + escaped: '\\\\', + unescaped: '\\', + }, + { + escaped: '\\u0061', + unescaped: 'a', + }, + { + escaped: '\\u00a9', + unescaped: '©', + }, + { + escaped: '\\u00A9', + unescaped: '©', + }, + { + escaped: '\\u', + unescaped: 'u', + }, + { + escaped: '\\u00', + unescaped: 'u00', + }, + { + escaped: '\\uWXYZ', + unescaped: 'uWXYZ', + }, + ]; + + describe('STRING', () => { + it('should remove delimiters', async () => { + const code = ` + pipeline myPipeline { + "text"; + } + `; + + const firstTemplateStringStart = await getNodeOfType(services, code, isSdsString); + expect(firstTemplateStringStart.value).toBe('text'); + }); + + it.each(escapeSequences)('should unescape $escaped', async ({ escaped, unescaped }) => { + const code = ` + pipeline myPipeline { + "${escaped}"; + } + `; + + const firstTemplateStringStart = await getNodeOfType(services, code, isSdsString); + expect(firstTemplateStringStart.value).toBe(unescaped); + }); }); - it('should handle escape sequences in TEMPLATE_STRING_INNERs', async () => { - const code = ` - pipeline myPipeline { - "start{{ 1 }}\\tinner{{ 2 }}end"; - } - `; - - const firstTemplateStringInner = await getNodeOfType(services, code, isSdsTemplateStringInner); - expect(firstTemplateStringInner.value).toBe('\tinner'); + describe('TEMPLATE_STRING_START', () => { + it('should remove delimiters', async () => { + const code = ` + pipeline myPipeline { + "start{{ 1 }}inner{{ 2 }}end"; + } + `; + + const firstTemplateStringStart = await getNodeOfType(services, code, isSdsTemplateStringStart); + expect(firstTemplateStringStart.value).toBe('start'); + }); + + it.each(escapeSequences)('should unescape $escaped', async ({ escaped, unescaped }) => { + const code = ` + pipeline myPipeline { + "${escaped}{{ 1 }}inner{{ 2 }}end"; + } + `; + + const firstTemplateStringStart = await getNodeOfType(services, code, isSdsTemplateStringStart); + expect(firstTemplateStringStart.value).toBe(unescaped); + }); }); - it('should remove delimiters from TEMPLATE_STRING_ENDs', async () => { - const code = ` - pipeline myPipeline { - "start{{ 1 }}inner{{ 2 }}end"; - } - `; - - const firstTemplateStringEnd = await getNodeOfType(services, code, isSdsTemplateStringEnd); - expect(firstTemplateStringEnd.value).toBe('end'); + describe('TEMPLATE_STRING_INNER', () => { + it('should remove delimiters', async () => { + const code = ` + pipeline myPipeline { + "start{{ 1 }}inner{{ 2 }}end"; + } + `; + + const firstTemplateStringInner = await getNodeOfType(services, code, isSdsTemplateStringInner); + expect(firstTemplateStringInner.value).toBe('inner'); + }); + + it.each(escapeSequences)('should unescape $escaped', async ({ escaped, unescaped }) => { + const code = ` + pipeline myPipeline { + "start{{ 1 }}${escaped}{{ 2 }}end"; + } + `; + + const firstTemplateStringInner = await getNodeOfType(services, code, isSdsTemplateStringInner); + expect(firstTemplateStringInner.value).toBe(unescaped); + }); }); - it('should handle escape sequences in TEMPLATE_STRING_ENDs', async () => { - const code = ` - pipeline myPipeline { - "start{{ 1 }}inner{{ 2 }}\\tend"; - } - `; - - const firstTemplateStringEnd = await getNodeOfType(services, code, isSdsTemplateStringEnd); - expect(firstTemplateStringEnd.value).toBe('\tend'); + describe('TEMPLATE_STRING_END', () => { + it('should remove delimiters', async () => { + const code = ` + pipeline myPipeline { + "start{{ 1 }}inner{{ 2 }}end"; + } + `; + + const firstTemplateStringEnd = await getNodeOfType(services, code, isSdsTemplateStringEnd); + expect(firstTemplateStringEnd.value).toBe('end'); + }); + + it.each(escapeSequences)('should unescape $escaped', async ({ escaped, unescaped }) => { + const code = ` + pipeline myPipeline { + "start{{ 1 }}inner{{ 2 }}${escaped}"; + } + `; + + const firstTemplateStringEnd = await getNodeOfType(services, code, isSdsTemplateStringEnd); + expect(firstTemplateStringEnd.value).toBe(unescaped); + }); }); }); diff --git a/tsconfig.json b/tsconfig.json index 24bbd82e2..6e4d84d6a 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -1,6 +1,6 @@ { "compilerOptions": { - "target": "ES2017", + "target": "ES2020", "module": "Node16", "lib": ["ESNext", "DOM", "WebWorker"], "sourceMap": true,