From 26fbb88c106c7556458d2ba9f69aa50f3b3fcbd4 Mon Sep 17 00:00:00 2001 From: Alexander Jones Date: Wed, 9 Oct 2024 09:42:11 -0500 Subject: [PATCH 1/4] Rewrite splitHedString as class --- parser/main.js | 4 +- parser/splitHedString.js | 167 ++++++++++++++++++++++++++----------- tests/stringParser.spec.js | 4 +- 3 files changed, 121 insertions(+), 54 deletions(-) diff --git a/parser/main.js b/parser/main.js index ff250d4b..145c1a30 100644 --- a/parser/main.js +++ b/parser/main.js @@ -2,7 +2,7 @@ import { mergeParsingIssues } from '../utils/hedData' import { generateIssue } from '../common/issues/issues' import ParsedHedString from './parsedHedString' -import splitHedString from './splitHedString' +import HedStringSplitter from './splitHedString' import { getCharacterCount, stringIsEmpty } from '../utils/string' const openingGroupCharacter = '(' @@ -154,7 +154,7 @@ export const parseHedString = function (hedString, hedSchemas) { fullStringIssues.syntax = [] return [null, fullStringIssues] } - const [parsedTags, splitIssues] = splitHedString(hedString, hedSchemas) + const [parsedTags, splitIssues] = new HedStringSplitter(hedString, hedSchemas).splitHedString() const parsingIssues = Object.assign(fullStringIssues, splitIssues) if (parsedTags === null) { return [null, parsingIssues] diff --git a/parser/splitHedString.js b/parser/splitHedString.js index 1887fdc4..c1c939f0 100644 --- a/parser/splitHedString.js +++ b/parser/splitHedString.js @@ -19,84 +19,149 @@ const generationToClass = [ new ParsedHed3Tag(tagSpec, hedSchemas, hedString), ] -/** - * Create the parsed HED tag and group objects. - * - * @param {string} hedString The HED string to be split. - * @param {Schemas} hedSchemas The collection of HED schemas. - * @param {TagSpec[]} tagSpecs The tag specifications. - * @param {GroupSpec} groupSpecs The bounds of the tag groups. - * @returns {[ParsedHedSubstring[], Object]} The parsed HED string data and any issues found. - */ -const createParsedTags = function (hedString, hedSchemas, tagSpecs, groupSpecs) { - const conversionIssues = [] - const syntaxIssues = [] - const ParsedHedTagConstructor = generationToClass[hedSchemas.generation] +export default class HedStringSplitter { + /** + * The HED string being split. + * @type {string} + */ + hedString + /** + * The collection of HED schemas. + * @type {Schemas} + */ + hedSchemas + /** + * Any issues found during tag conversion. + * @type {Issue[]} + */ + conversionIssues + /** + * Any syntax issues found. + * @type {Issue[]} + */ + syntaxIssues + /** + * The constructor to be used to build the parsed HED tags. + * @type {function (string, string, number[], Schemas, string, TagSpec): ParsedHedTag} + */ + ParsedHedTagConstructor - const createParsedTag = (tagSpec) => { + /** + * Constructor for the HED string parser. + * + * @param {string} hedString The HED string to be split and parsed. + * @param {Schemas} hedSchemas The collection of HED schemas. + */ + constructor(hedString, hedSchemas) { + this.hedString = hedString + this.hedSchemas = hedSchemas + this.conversionIssues = [] + this.syntaxIssues = [] + this.ParsedHedTagConstructor = generationToClass[hedSchemas.generation] + } + + /** + * Split and parse a HED string into tags and groups. + * + * @returns {[ParsedHedSubstring[], Object]} The parsed HED string data and any issues found. + */ + splitHedString() { + const [tagSpecs, groupBounds, tokenizingIssues] = new HedStringTokenizer(this.hedString).tokenize() + if (tokenizingIssues.syntax.length > 0) { + return [null, tokenizingIssues] + } + + const [parsedTags, parsingIssues] = this._createParsedTags(tagSpecs, groupBounds) + mergeParsingIssues(tokenizingIssues, parsingIssues) + + return [parsedTags, tokenizingIssues] + } + + /** + * Create parsed HED tags and groups from specifications. + * + * @param {TagSpec[]} tagSpecs The tag specifications. + * @param {GroupSpec} groupSpecs The group specifications. + * @returns {[ParsedHedSubstring[], Object]} The parsed HED tags and any issues. + */ + _createParsedTags(tagSpecs, groupSpecs) { + // Create tags from specifications + const parsedTags = recursiveMap((tagSpec) => this._createParsedTag(tagSpec), tagSpecs) + + // Create groups from the parsed tags + const parsedTagsWithGroups = this._createParsedGroups(parsedTags, groupSpecs.children) + + const issues = { syntax: this.syntaxIssues, conversion: this.conversionIssues } + return [parsedTagsWithGroups, issues] + } + + /** + * Create a parsed tag object based on the tag specification. + * + * @param {TagSpec|ColumnSpliceSpec} tagSpec The tag or column splice specification. + * @returns {ParsedHedTag|ParsedHedColumnSplice|null} The parsed HED tag or column splice. + */ + _createParsedTag(tagSpec) { if (tagSpec instanceof TagSpec) { try { - const parsedTag = ParsedHedTagConstructor( + return this.ParsedHedTagConstructor( tagSpec.tag, - hedString, + this.hedString, tagSpec.bounds, - hedSchemas, + this.hedSchemas, tagSpec.library, tagSpec, ) - return parsedTag } catch (issueError) { - if (issueError instanceof IssueError) { - conversionIssues.push(issueError.issue) - } else if (issueError instanceof Error) { - conversionIssues.push(generateIssue('internalError', { message: issueError.message })) - } + this._handleIssueError(issueError) return null } } else if (tagSpec instanceof ColumnSpliceSpec) { return new ParsedHedColumnSplice(tagSpec.columnName, tagSpec.bounds) } } - const createParsedGroups = (tags, groupSpecs) => { + + /** + * Handle any issue encountered during tag parsing. + * + * @param {Error|IssueError} issueError The error encountered. + */ + _handleIssueError(issueError) { + if (issueError instanceof IssueError) { + this.conversionIssues.push(issueError.issue) + } else if (issueError instanceof Error) { + this.conversionIssues.push(generateIssue('internalError', { message: issueError.message })) + } + } + + /** + * Create parsed HED groups from parsed tags and group specifications. + * + * @param {ParsedHedTag[]} tags The parsed HED tags. + * @param {GroupSpec[]} groupSpecs The group specifications. + * @returns {ParsedHedGroup[]} The parsed HED groups. + */ + _createParsedGroups(tags, groupSpecs) { const tagGroups = [] let index = 0 + for (const tag of tags) { if (Array.isArray(tag)) { const groupSpec = groupSpecs[index] tagGroups.push( - new ParsedHedGroup(createParsedGroups(tag, groupSpec.children), hedSchemas, hedString, groupSpec.bounds), + new ParsedHedGroup( + this._createParsedGroups(tag, groupSpec.children), + this.hedSchemas, + this.hedString, + groupSpec.bounds, + ), ) index++ } else if (tag !== null) { tagGroups.push(tag) } } - return tagGroups - } - const parsedTags = recursiveMap(createParsedTag, tagSpecs) - const parsedTagsWithGroups = createParsedGroups(parsedTags, groupSpecs.children) - const issues = { - syntax: syntaxIssues, - conversion: conversionIssues, - } - - return [parsedTagsWithGroups, issues] -} - -/** - * Split a HED string. - * - * @param {string} hedString The HED string to be split. - * @param {Schemas} hedSchemas The collection of HED schemas. - * @returns {[ParsedHedSubstring[], Object]} The parsed HED string data and any issues found. - */ -export default function splitHedString(hedString, hedSchemas) { - const [tagSpecs, groupBounds, tokenizingIssues] = new HedStringTokenizer(hedString).tokenize() - if (tokenizingIssues.syntax.length > 0) { - return [null, tokenizingIssues] + return tagGroups } - const [parsedTags, parsingIssues] = createParsedTags(hedString, hedSchemas, tagSpecs, groupBounds) - mergeParsingIssues(tokenizingIssues, parsingIssues) - return [parsedTags, tokenizingIssues] } diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js index 0e3aca2f..9c46be82 100644 --- a/tests/stringParser.spec.js +++ b/tests/stringParser.spec.js @@ -7,7 +7,7 @@ import { Schemas, SchemaSpec, SchemasSpec } from '../common/schema/types' import { recursiveMap } from '../utils/array' import { parseHedString } from '../parser/main' import { ParsedHedTag } from '../parser/parsedHedTag' -import splitHedString from '../parser/splitHedString' +import HedStringSplitter from '../parser/splitHedString' import { buildSchemas } from '../validator/schema/init' import ColumnSplicer from '../parser/columnSplicer' import ParsedHedGroup from '../parser/parsedHedGroup' @@ -21,6 +21,8 @@ describe('HED string parsing', () => { */ const originalMap = (parsedTag) => parsedTag.originalTag + const splitHedString = (hedString, hedSchemas) => new HedStringSplitter(hedString, hedSchemas).splitHedString() + const hedSchemaFile = 'tests/data/HED8.0.0.xml' let hedSchemas From 9def82bfcfde92b0d1640045a757b3e98071f6c5 Mon Sep 17 00:00:00 2001 From: Alexander Jones Date: Fri, 11 Oct 2024 08:22:02 -0500 Subject: [PATCH 2/4] Rewrite main parser module as class and rename --- parser/main.js | 186 ---------------------------------------- parser/parser.js | 214 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+), 186 deletions(-) delete mode 100644 parser/main.js create mode 100644 parser/parser.js diff --git a/parser/main.js b/parser/main.js deleted file mode 100644 index 145c1a30..00000000 --- a/parser/main.js +++ /dev/null @@ -1,186 +0,0 @@ -import { mergeParsingIssues } from '../utils/hedData' -import { generateIssue } from '../common/issues/issues' - -import ParsedHedString from './parsedHedString' -import HedStringSplitter from './splitHedString' -import { getCharacterCount, stringIsEmpty } from '../utils/string' - -const openingGroupCharacter = '(' -const closingGroupCharacter = ')' -const delimiters = new Set([',']) - -/** - * Substitute certain illegal characters and report warnings when found. - */ -const substituteCharacters = function (hedString) { - const issues = [] - const illegalCharacterMap = { '\0': ['ASCII NUL', ' '], '\t': ['Tab', ' '] } - const replaceFunction = function (match, offset) { - if (match in illegalCharacterMap) { - const [name, replacement] = illegalCharacterMap[match] - issues.push( - generateIssue('invalidCharacter', { - character: name, - index: offset, - string: hedString, - }), - ) - return replacement - } else { - return match - } - } - const fixedString = hedString.replace(/./g, replaceFunction) - - return [fixedString, issues] -} - -/** - * Check if group parentheses match. Pushes an issue if they don't match. - */ -const countTagGroupParentheses = function (hedString) { - const issues = [] - const numberOfOpeningParentheses = getCharacterCount(hedString, openingGroupCharacter) - const numberOfClosingParentheses = getCharacterCount(hedString, closingGroupCharacter) - if (numberOfOpeningParentheses !== numberOfClosingParentheses) { - issues.push( - generateIssue('parentheses', { - opening: numberOfOpeningParentheses, - closing: numberOfClosingParentheses, - }), - ) - } - return issues -} - -/** - * Check if a comma is missing after an opening parenthesis. - */ -const isCommaMissingAfterClosingParenthesis = function (lastNonEmptyCharacter, currentCharacter) { - return ( - lastNonEmptyCharacter === closingGroupCharacter && - !(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter) - ) -} - -/** - * Check for delimiter issues in a HED string (e.g. missing commas adjacent to groups, extra commas or tildes). - */ -const findDelimiterIssuesInHedString = function (hedString) { - const issues = [] - let lastNonEmptyValidCharacter = '' - let lastNonEmptyValidIndex = 0 - let currentTag = '' - for (let i = 0; i < hedString.length; i++) { - const currentCharacter = hedString.charAt(i) - currentTag += currentCharacter - if (stringIsEmpty(currentCharacter)) { - continue - } - if (delimiters.has(currentCharacter)) { - if (currentTag.trim() === currentCharacter) { - issues.push( - generateIssue('extraDelimiter', { - character: currentCharacter, - index: i, - string: hedString, - }), - ) - currentTag = '' - continue - } - currentTag = '' - } else if (currentCharacter === openingGroupCharacter) { - if (currentTag.trim() === openingGroupCharacter) { - currentTag = '' - } else { - issues.push(generateIssue('commaMissing', { tag: currentTag })) - } - } else if (isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) { - issues.push( - generateIssue('commaMissing', { - tag: currentTag.slice(0, -1), - }), - ) - break - } - lastNonEmptyValidCharacter = currentCharacter - lastNonEmptyValidIndex = i - } - if (delimiters.has(lastNonEmptyValidCharacter)) { - issues.push( - generateIssue('extraDelimiter', { - character: lastNonEmptyValidCharacter, - index: lastNonEmptyValidIndex, - string: hedString, - }), - ) - } - return issues -} - -/** - * Validate the full unparsed HED string. - * - * @param {string} hedString The unparsed HED string. - * @returns {Object} String substitution issues and other issues. - */ -const validateFullUnparsedHedString = function (hedString) { - const [fixedHedString, substitutionIssues] = substituteCharacters(hedString) - const delimiterIssues = [].concat( - countTagGroupParentheses(fixedHedString), - findDelimiterIssuesInHedString(fixedHedString), - ) - - return { - substitution: substitutionIssues, - delimiter: delimiterIssues, - } -} - -/** - * Parse a full HED string into an object of tag types. - * - * @param {string|ParsedHedString} hedString The full HED string to parse. - * @param {Schemas} hedSchemas The collection of HED schemas. - * @returns {[ParsedHedString|null, Object]} The parsed HED tag data and an object containing lists of parsing issues. - */ -export const parseHedString = function (hedString, hedSchemas) { - if (hedString instanceof ParsedHedString) { - return [hedString, {}] - } - const fullStringIssues = validateFullUnparsedHedString(hedString) - if (fullStringIssues.delimiter.length > 0) { - fullStringIssues.syntax = [] - return [null, fullStringIssues] - } - const [parsedTags, splitIssues] = new HedStringSplitter(hedString, hedSchemas).splitHedString() - const parsingIssues = Object.assign(fullStringIssues, splitIssues) - if (parsedTags === null) { - return [null, parsingIssues] - } - const parsedString = new ParsedHedString(hedString, parsedTags) - return [parsedString, parsingIssues] -} - -/** - * Parse a set of HED strings. - * - * @param {string[]|ParsedHedString[]} hedStrings A set of HED strings. - * @param {Schemas} hedSchemas The collection of HED schemas. - * @returns {[ParsedHedString[], Object]} The parsed HED strings and any issues found. - */ -export const parseHedStrings = function (hedStrings, hedSchemas) { - return hedStrings - .map((hedString) => { - return parseHedString(hedString, hedSchemas) - }) - .reduce( - ([previousStrings, previousIssues], [currentString, currentIssues]) => { - previousStrings.push(currentString) - mergeParsingIssues(previousIssues, currentIssues) - return [previousStrings, previousIssues] - }, - [[], {}], - ) -} diff --git a/parser/parser.js b/parser/parser.js new file mode 100644 index 00000000..2e9f9617 --- /dev/null +++ b/parser/parser.js @@ -0,0 +1,214 @@ +import { mergeParsingIssues } from '../utils/hedData' +import { generateIssue } from '../common/issues/issues' +import ParsedHedString from './parsedHedString' +import HedStringSplitter from './splitHedString' +import { getCharacterCount, stringIsEmpty } from '../utils/string' + +const openingGroupCharacter = '(' +const closingGroupCharacter = ')' +const delimiters = new Set([',']) + +/** + * A parser for HED strings. + */ +class HedStringParser { + /** + * The HED string being parsed. + * @type {string|ParsedHedString} + */ + hedString + /** + * The collection of HED schemas. + * @type {Schemas} + */ + hedSchemas + + /** + * Constructor. + * + * @param {string|ParsedHedString} hedString The HED string to be parsed. + * @param {Schemas} hedSchemas The collection of HED schemas. + */ + constructor(hedString, hedSchemas) { + this.hedString = hedString + this.hedSchemas = hedSchemas + } + + /** + * Check if the parentheses in a tag group match. + * + * @returns {Issue[]} Any issues found related to unmatched parentheses. + */ + _countTagGroupParentheses() { + const issues = [] + const numberOfOpeningParentheses = getCharacterCount(this.hedString, openingGroupCharacter) + const numberOfClosingParentheses = getCharacterCount(this.hedString, closingGroupCharacter) + + if (numberOfOpeningParentheses !== numberOfClosingParentheses) { + issues.push( + generateIssue('parentheses', { + opening: numberOfOpeningParentheses, + closing: numberOfClosingParentheses, + }), + ) + } + + return issues + } + + /** + * Check if a comma is missing after an opening parenthesis. + * + * @param {string} lastNonEmptyCharacter The last non-empty character. + * @param {string} currentCharacter The current character in the HED string. + * @returns {boolean} Whether a comma is missing after a closing parenthesis. + */ + _isCommaMissingAfterClosingParenthesis(lastNonEmptyCharacter, currentCharacter) { + return ( + lastNonEmptyCharacter === closingGroupCharacter && + !(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter) + ) + } + + /** + * Find delimiter-related issues in a HED string. + * + * @returns {Issue[]} Any issues related to delimiters. + */ + _findDelimiterIssues() { + const issues = [] + let lastNonEmptyValidCharacter = '' + let lastNonEmptyValidIndex = 0 + let currentTag = '' + + for (let i = 0; i < this.hedString.length; i++) { + const currentCharacter = this.hedString.charAt(i) + currentTag += currentCharacter + + if (stringIsEmpty(currentCharacter)) { + continue + } + + if (delimiters.has(currentCharacter)) { + if (currentTag.trim() === currentCharacter) { + issues.push( + generateIssue('extraDelimiter', { + character: currentCharacter, + index: i, + string: this.hedString, + }), + ) + currentTag = '' + continue + } + currentTag = '' + } else if (currentCharacter === openingGroupCharacter) { + if (currentTag.trim() !== openingGroupCharacter) { + issues.push(generateIssue('commaMissing', { tag: currentTag })) + } + currentTag = '' + } else if (this._isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) { + issues.push( + generateIssue('commaMissing', { + tag: currentTag.slice(0, -1), + }), + ) + break + } + + lastNonEmptyValidCharacter = currentCharacter + lastNonEmptyValidIndex = i + } + + if (delimiters.has(lastNonEmptyValidCharacter)) { + issues.push( + generateIssue('extraDelimiter', { + character: lastNonEmptyValidCharacter, + index: lastNonEmptyValidIndex, + string: this.hedString, + }), + ) + } + + return issues + } + + /** + * Validate the full unparsed HED string. + * + * @returns {Object} Any issues found during validation. + */ + _validateFullUnparsedHedString() { + const delimiterIssues = [].concat(this._countTagGroupParentheses(), this._findDelimiterIssues()) + + return { delimiter: delimiterIssues } + } + + /** + * Parse a full HED string. + * + * @returns {[ParsedHedString|null, Object]} The parsed HED string and any parsing issues. + */ + parseHedString() { + if (this.hedString instanceof ParsedHedString) { + return [this.hedString, {}] + } + + const fullStringIssues = this._validateFullUnparsedHedString() + if (fullStringIssues.delimiter.length > 0) { + fullStringIssues.syntax = [] + return [null, fullStringIssues] + } + + const [parsedTags, splitIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString() + const parsingIssues = Object.assign(fullStringIssues, splitIssues) + if (parsedTags === null) { + return [null, parsingIssues] + } + + const parsedString = new ParsedHedString(this.hedString, parsedTags) + return [parsedString, parsingIssues] + } + + /** + * Parse a list of HED strings. + * + * @param {string[]|ParsedHedString[]} hedStrings A list of HED strings. + * @param {Schemas} hedSchemas The collection of HED schemas. + * @returns {[ParsedHedString[], Object]} The parsed HED strings and any issues found. + */ + static parseHedStrings(hedStrings, hedSchemas) { + const parsedStrings = [] + const cumulativeIssues = {} + + for (const hedString of hedStrings) { + const [parsedString, currentIssues] = new HedStringParser(hedString, hedSchemas).parseHedString() + parsedStrings.push(parsedString) + mergeParsingIssues(cumulativeIssues, currentIssues) + } + + return [parsedStrings, cumulativeIssues] + } +} + +/** + * Parse a HED string. + * + * @param {string|ParsedHedString} hedString A (possibly already parsed) HED string. + * @param {Schemas} hedSchemas The collection of HED schemas. + * @returns {[ParsedHedString, Object]} The parsed HED string and any issues found. + */ +export function parseHedString(hedString, hedSchemas) { + return new HedStringParser(hedString, hedSchemas).parseHedString() +} + +/** + * Parse a list of HED strings. + * + * @param {string[]|ParsedHedString[]} hedStrings A list of HED strings. + * @param {Schemas} hedSchemas The collection of HED schemas. + * @returns {[ParsedHedString[], Object]} The parsed HED strings and any issues found. + */ +export function parseHedStrings(hedStrings, hedSchemas) { + return HedStringParser.parseHedStrings(hedStrings, hedSchemas) +} From 613e64e35a079fcd1c3dba95edebccc97571624b Mon Sep 17 00:00:00 2001 From: Alexander Jones Date: Fri, 11 Oct 2024 08:22:58 -0500 Subject: [PATCH 3/4] Move invalid character check to tokenizer This bans all characters in the C0 and C1 control code ranges, using the unicode-name NPM module to display the Unicode names of any invalid characters. --- esbuild.mjs | 2 +- package-lock.json | 14 ++++++++++ package.json | 6 ++++- parser/{splitHedString.js => splitter.js} | 2 +- parser/tokenizer.js | 12 ++++++++- tests/bids.spec.js | 2 +- tests/event.spec.js | 32 ++++++++--------------- tests/stringParser.spec.js | 6 ++--- 8 files changed, 47 insertions(+), 29 deletions(-) rename parser/{splitHedString.js => splitter.js} (99%) diff --git a/esbuild.mjs b/esbuild.mjs index ec1316eb..785a6a11 100644 --- a/esbuild.mjs +++ b/esbuild.mjs @@ -7,7 +7,7 @@ await esbuild.build({ entryPoints: [path.join(process.cwd(), 'index.js')], loader: { '.xml': 'text' }, outdir: path.join(process.cwd(), 'dist', 'commonjs'), - target: 'node12', + target: 'node18', bundle: true, sourcemap: true, platform: 'node', diff --git a/package-lock.json b/package-lock.json index 03dbdf4e..36be2288 100644 --- a/package-lock.json +++ b/package-lock.json @@ -19,6 +19,7 @@ "pluralize": "^8.0.0", "semver": "^7.6.0", "string_decoder": "^1.3.0", + "unicode-name": "^1.0.2", "xml2js": "^0.6.2" }, "devDependencies": { @@ -5191,6 +5192,14 @@ "node": ">=14.17" } }, + "node_modules/unicode-name": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/unicode-name/-/unicode-name-1.0.2.tgz", + "integrity": "sha512-PETEgU8TKsHoGZXP/3eWRU/4xnXJKwAIm+H7b0s/6CEP6o+YK4tWbwBXPLKe0U5+njWEAo2snT5+Mvoau6BI8A==", + "engines": { + "node": ">=18.20" + } + }, "node_modules/update-browserslist-db": { "version": "1.0.14", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.14.tgz", @@ -9108,6 +9117,11 @@ "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", "dev": true }, + "unicode-name": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/unicode-name/-/unicode-name-1.0.2.tgz", + "integrity": "sha512-PETEgU8TKsHoGZXP/3eWRU/4xnXJKwAIm+H7b0s/6CEP6o+YK4tWbwBXPLKe0U5+njWEAo2snT5+Mvoau6BI8A==" + }, "update-browserslist-db": { "version": "1.0.14", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.14.tgz", diff --git a/package.json b/package.json index 79b45ad6..801cf414 100644 --- a/package.json +++ b/package.json @@ -45,6 +45,7 @@ "pluralize": "^8.0.0", "semver": "^7.6.0", "string_decoder": "^1.3.0", + "unicode-name": "^1.0.2", "xml2js": "^0.6.2" }, "devDependencies": { @@ -74,7 +75,10 @@ "transform": { "\\.js$": "esbuild-runner/jest", "\\.xml$": "/fileTransformer.js" - } + }, + "transformIgnorePatterns": [ + "node_modules/(?!unicode-name)" + ] }, "browser": { "fs": false diff --git a/parser/splitHedString.js b/parser/splitter.js similarity index 99% rename from parser/splitHedString.js rename to parser/splitter.js index c1c939f0..31038244 100644 --- a/parser/splitHedString.js +++ b/parser/splitter.js @@ -47,7 +47,7 @@ export default class HedStringSplitter { ParsedHedTagConstructor /** - * Constructor for the HED string parser. + * Constructor. * * @param {string} hedString The HED string to be split and parsed. * @param {Schemas} hedSchemas The collection of HED schemas. diff --git a/parser/tokenizer.js b/parser/tokenizer.js index 89ecaab3..b308a9d9 100644 --- a/parser/tokenizer.js +++ b/parser/tokenizer.js @@ -1,3 +1,5 @@ +import { unicodeName } from 'unicode-name' + import { generateIssue } from '../common/issues/issues' import { stringIsEmpty } from '../utils/string' import { replaceTagNameWithPound } from '../utils/hedStrings' @@ -12,6 +14,14 @@ const slashCharacter = '/' const invalidCharacters = new Set(['[', ']', '~', '"']) const invalidCharactersOutsideOfValues = new Set([':']) +// C0 control codes +for (let i = 0x00; i <= 0x1f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} +// DEL and C1 control codes +for (let i = 0x7f; i <= 0x9f; i++) { + invalidCharacters.add(String.fromCodePoint(i)) +} /** * A specification for a tokenized substring. @@ -366,7 +376,7 @@ export class HedStringTokenizer { _pushInvalidCharacterIssue(character, index) { this.syntaxIssues.push( generateIssue('invalidCharacter', { - character: character, + character: unicodeName(character), index: index, string: this.hedString, }), diff --git a/tests/bids.spec.js b/tests/bids.spec.js index af427877..c210f90f 100644 --- a/tests/bids.spec.js +++ b/tests/bids.spec.js @@ -668,7 +668,7 @@ describe('BIDS datasets', () => { syntax: [ BidsHedIssue.fromHedIssue( generateIssue('invalidCharacter', { - character: '{', + character: 'LEFT CURLY BRACKET', index: 9, string: '(Def/Acc/{response_time})', }), diff --git a/tests/event.spec.js b/tests/event.spec.js index 87ef696b..1a364e62 100644 --- a/tests/event.spec.js +++ b/tests/event.spec.js @@ -184,11 +184,13 @@ describe('HED string and event validation', () => { closingBracket: '/Attribute/Object side/Left,/Participant/Effect]/Body part/Arm', tilde: '/Attribute/Object side/Left,/Participant/Effect~/Body part/Arm', doubleQuote: '/Attribute/Object side/Left,/Participant/Effect"/Body part/Arm', + null: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\0', + tab: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\t', } const expectedIssues = { openingBrace: [ generateIssue('invalidCharacter', { - character: '{', + character: 'LEFT CURLY BRACKET', index: 47, string: testStrings.openingBrace, }), @@ -201,54 +203,42 @@ describe('HED string and event validation', () => { ], openingBracket: [ generateIssue('invalidCharacter', { - character: '[', + character: 'LEFT SQUARE BRACKET', index: 47, string: testStrings.openingBracket, }), ], closingBracket: [ generateIssue('invalidCharacter', { - character: ']', + character: 'RIGHT SQUARE BRACKET', index: 47, string: testStrings.closingBracket, }), ], tilde: [ generateIssue('invalidCharacter', { - character: '~', + character: 'TILDE', index: 47, string: testStrings.tilde, }), ], doubleQuote: [ generateIssue('invalidCharacter', { - character: '"', + character: 'QUOTATION MARK', index: 47, string: testStrings.doubleQuote, }), ], - } - // No-op function as this check is done during the parsing stage. - // eslint-disable-next-line no-unused-vars - validatorSyntactic(testStrings, expectedIssues, (validator) => {}) - }) - - it('should substitute and warn for certain illegal characters', () => { - const testStrings = { - nul: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\0', - tab: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\t', - } - const expectedIssues = { - nul: [ + null: [ generateIssue('invalidCharacter', { - character: 'ASCII NUL', + character: 'NULL', index: 61, - string: testStrings.nul, + string: testStrings.null, }), ], tab: [ generateIssue('invalidCharacter', { - character: 'Tab', + character: 'CHARACTER TABULATION', index: 61, string: testStrings.tab, }), diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js index 9c46be82..dcb83eec 100644 --- a/tests/stringParser.spec.js +++ b/tests/stringParser.spec.js @@ -84,7 +84,7 @@ describe('HED string parsing', () => { conversion: [], syntax: [ generateIssue('invalidCharacter', { - character: '[', + character: 'LEFT SQUARE BRACKET', index: 56, string: testStrings.openingSquare, }), @@ -94,7 +94,7 @@ describe('HED string parsing', () => { conversion: [], syntax: [ generateIssue('invalidCharacter', { - character: ']', + character: 'RIGHT SQUARE BRACKET', index: 56, string: testStrings.closingSquare, }), @@ -104,7 +104,7 @@ describe('HED string parsing', () => { conversion: [], syntax: [ generateIssue('invalidCharacter', { - character: '~', + character: 'TILDE', index: 56, string: testStrings.tilde, }), From cae39a34780a1c83a65bba8eba325b88dac0c3fd Mon Sep 17 00:00:00 2001 From: Alexander Jones Date: Fri, 11 Oct 2024 08:28:32 -0500 Subject: [PATCH 4/4] Fix imports broken by previous file moves --- bids/types/json.js | 2 +- bids/validator/bidsHedTsvValidator.js | 2 +- converter/converter.js | 2 +- parser/columnSplicer.js | 2 +- parser/parser.js | 2 +- tests/bids.spec.js | 2 +- tests/event.spec.js | 2 +- tests/stringParser.spec.js | 4 ++-- validator/dataset.js | 2 +- validator/event/init.js | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/bids/types/json.js b/bids/types/json.js index 36da7826..814b531c 100644 --- a/bids/types/json.js +++ b/bids/types/json.js @@ -1,7 +1,7 @@ import isPlainObject from 'lodash/isPlainObject' import { sidecarValueHasHed } from '../utils' -import { parseHedString } from '../../parser/main' +import { parseHedString } from '../../parser/parser' import ParsedHedString from '../../parser/parsedHedString' import { BidsFile } from './basic' import BidsHedSidecarValidator from '../validator/bidsHedSidecarValidator' diff --git a/bids/validator/bidsHedTsvValidator.js b/bids/validator/bidsHedTsvValidator.js index 6ecf52d2..231ea14b 100644 --- a/bids/validator/bidsHedTsvValidator.js +++ b/bids/validator/bidsHedTsvValidator.js @@ -1,7 +1,7 @@ import BidsHedSidecarValidator from './bidsHedSidecarValidator' import { BidsHedIssue, BidsIssue } from '../types/issues' import { BidsTsvEvent, BidsTsvRow } from '../types/tsv' -import { parseHedString } from '../../parser/main' +import { parseHedString } from '../../parser/parser' import ColumnSplicer from '../../parser/columnSplicer' import ParsedHedString from '../../parser/parsedHedString' import { generateIssue } from '../../common/issues/issues' diff --git a/converter/converter.js b/converter/converter.js index 6c7b1f59..6c21c56f 100644 --- a/converter/converter.js +++ b/converter/converter.js @@ -1,4 +1,4 @@ -import { parseHedString } from '../parser/main' +import { parseHedString } from '../parser/parser' /** * Convert a HED string. diff --git a/parser/columnSplicer.js b/parser/columnSplicer.js index bc27e02e..cacb7b64 100644 --- a/parser/columnSplicer.js +++ b/parser/columnSplicer.js @@ -2,7 +2,7 @@ import ParsedHedString from './parsedHedString' import ParsedHedColumnSplice from './parsedHedColumnSplice' import ParsedHedGroup from './parsedHedGroup' import { generateIssue } from '../common/issues/issues' -import { parseHedString } from './main' +import { parseHedString } from './parser' export class ColumnSplicer { /** diff --git a/parser/parser.js b/parser/parser.js index 2e9f9617..a86e05f3 100644 --- a/parser/parser.js +++ b/parser/parser.js @@ -1,7 +1,7 @@ import { mergeParsingIssues } from '../utils/hedData' import { generateIssue } from '../common/issues/issues' import ParsedHedString from './parsedHedString' -import HedStringSplitter from './splitHedString' +import HedStringSplitter from './splitter' import { getCharacterCount, stringIsEmpty } from '../utils/string' const openingGroupCharacter = '(' diff --git a/tests/bids.spec.js b/tests/bids.spec.js index c210f90f..f62b3de9 100644 --- a/tests/bids.spec.js +++ b/tests/bids.spec.js @@ -8,7 +8,7 @@ import { SchemaSpec, SchemasSpec } from '../common/schema/types' import { buildBidsSchemas, parseSchemasSpec } from '../bids/schema' import { BidsDataset, BidsHedIssue, BidsIssue, validateBidsDataset } from '../bids' import { bidsDatasetDescriptions, bidsSidecars, bidsTsvFiles } from './bids.spec.data' -import { parseHedString } from '../parser/main' +import { parseHedString } from '../parser/parser' import { BidsHedTsvParser } from '../bids/validator/bidsHedTsvValidator' describe('BIDS datasets', () => { diff --git a/tests/event.spec.js b/tests/event.spec.js index 1a364e62..dfc22f2e 100644 --- a/tests/event.spec.js +++ b/tests/event.spec.js @@ -4,7 +4,7 @@ import { beforeAll, describe, it } from '@jest/globals' import * as hed from '../validator/event' import { buildSchemas } from '../validator/schema/init' -import { parseHedString } from '../parser/main' +import { parseHedString } from '../parser/parser' import { ParsedHedTag } from '../parser/parsedHedTag' import { HedValidator, Hed2Validator, Hed3Validator } from '../validator/event' import { generateIssue } from '../common/issues/issues' diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js index dcb83eec..c9335456 100644 --- a/tests/stringParser.spec.js +++ b/tests/stringParser.spec.js @@ -5,9 +5,9 @@ import { beforeAll, describe, it } from '@jest/globals' import { generateIssue } from '../common/issues/issues' import { Schemas, SchemaSpec, SchemasSpec } from '../common/schema/types' import { recursiveMap } from '../utils/array' -import { parseHedString } from '../parser/main' +import { parseHedString } from '../parser/parser' import { ParsedHedTag } from '../parser/parsedHedTag' -import HedStringSplitter from '../parser/splitHedString' +import HedStringSplitter from '../parser/splitter' import { buildSchemas } from '../validator/schema/init' import ColumnSplicer from '../parser/columnSplicer' import ParsedHedGroup from '../parser/parsedHedGroup' diff --git a/validator/dataset.js b/validator/dataset.js index 29dcd90e..831b1209 100644 --- a/validator/dataset.js +++ b/validator/dataset.js @@ -2,7 +2,7 @@ import zip from 'lodash/zip' import { generateIssue, Issue } from '../common/issues/issues' import { validateHedEventWithDefinitions } from './event' -import { parseHedStrings } from '../parser/main' +import { parseHedStrings } from '../parser/parser' import { filterNonEqualDuplicates } from '../utils/map' /** diff --git a/validator/event/init.js b/validator/event/init.js index b8ed69eb..6c27f44d 100644 --- a/validator/event/init.js +++ b/validator/event/init.js @@ -1,4 +1,4 @@ -import { parseHedString } from '../../parser/main' +import { parseHedString } from '../../parser/parser' import ParsedHedString from '../../parser/parsedHedString' import { Schemas } from '../../common/schema/types'