From 26fbb88c106c7556458d2ba9f69aa50f3b3fcbd4 Mon Sep 17 00:00:00 2001
From: Alexander Jones <happy5214@gmail.com>
Date: Wed, 9 Oct 2024 09:42:11 -0500
Subject: [PATCH 1/4] Rewrite splitHedString as class

---
 parser/main.js             |   4 +-
 parser/splitHedString.js   | 167 ++++++++++++++++++++++++++-----------
 tests/stringParser.spec.js |   4 +-
 3 files changed, 121 insertions(+), 54 deletions(-)

diff --git a/parser/main.js b/parser/main.js
index ff250d4b..145c1a30 100644
--- a/parser/main.js
+++ b/parser/main.js
@@ -2,7 +2,7 @@ import { mergeParsingIssues } from '../utils/hedData'
 import { generateIssue } from '../common/issues/issues'
 
 import ParsedHedString from './parsedHedString'
-import splitHedString from './splitHedString'
+import HedStringSplitter from './splitHedString'
 import { getCharacterCount, stringIsEmpty } from '../utils/string'
 
 const openingGroupCharacter = '('
@@ -154,7 +154,7 @@ export const parseHedString = function (hedString, hedSchemas) {
     fullStringIssues.syntax = []
     return [null, fullStringIssues]
   }
-  const [parsedTags, splitIssues] = splitHedString(hedString, hedSchemas)
+  const [parsedTags, splitIssues] = new HedStringSplitter(hedString, hedSchemas).splitHedString()
   const parsingIssues = Object.assign(fullStringIssues, splitIssues)
   if (parsedTags === null) {
     return [null, parsingIssues]
diff --git a/parser/splitHedString.js b/parser/splitHedString.js
index 1887fdc4..c1c939f0 100644
--- a/parser/splitHedString.js
+++ b/parser/splitHedString.js
@@ -19,84 +19,149 @@ const generationToClass = [
     new ParsedHed3Tag(tagSpec, hedSchemas, hedString),
 ]
 
-/**
- * Create the parsed HED tag and group objects.
- *
- * @param {string} hedString The HED string to be split.
- * @param {Schemas} hedSchemas The collection of HED schemas.
- * @param {TagSpec[]} tagSpecs The tag specifications.
- * @param {GroupSpec} groupSpecs The bounds of the tag groups.
- * @returns {[ParsedHedSubstring[], Object<string, Issue[]>]} The parsed HED string data and any issues found.
- */
-const createParsedTags = function (hedString, hedSchemas, tagSpecs, groupSpecs) {
-  const conversionIssues = []
-  const syntaxIssues = []
-  const ParsedHedTagConstructor = generationToClass[hedSchemas.generation]
+export default class HedStringSplitter {
+  /**
+   * The HED string being split.
+   * @type {string}
+   */
+  hedString
+  /**
+   * The collection of HED schemas.
+   * @type {Schemas}
+   */
+  hedSchemas
+  /**
+   * Any issues found during tag conversion.
+   * @type {Issue[]}
+   */
+  conversionIssues
+  /**
+   * Any syntax issues found.
+   * @type {Issue[]}
+   */
+  syntaxIssues
+  /**
+   * The constructor to be used to build the parsed HED tags.
+   * @type {function (string, string, number[], Schemas, string, TagSpec): ParsedHedTag}
+   */
+  ParsedHedTagConstructor
 
-  const createParsedTag = (tagSpec) => {
+  /**
+   * Constructor for the HED string parser.
+   *
+   * @param {string} hedString The HED string to be split and parsed.
+   * @param {Schemas} hedSchemas The collection of HED schemas.
+   */
+  constructor(hedString, hedSchemas) {
+    this.hedString = hedString
+    this.hedSchemas = hedSchemas
+    this.conversionIssues = []
+    this.syntaxIssues = []
+    this.ParsedHedTagConstructor = generationToClass[hedSchemas.generation]
+  }
+
+  /**
+   * Split and parse a HED string into tags and groups.
+   *
+   * @returns {[ParsedHedSubstring[], Object<string, Issue[]>]} The parsed HED string data and any issues found.
+   */
+  splitHedString() {
+    const [tagSpecs, groupBounds, tokenizingIssues] = new HedStringTokenizer(this.hedString).tokenize()
+    if (tokenizingIssues.syntax.length > 0) {
+      return [null, tokenizingIssues]
+    }
+
+    const [parsedTags, parsingIssues] = this._createParsedTags(tagSpecs, groupBounds)
+    mergeParsingIssues(tokenizingIssues, parsingIssues)
+
+    return [parsedTags, tokenizingIssues]
+  }
+
+  /**
+   * Create parsed HED tags and groups from specifications.
+   *
+   * @param {TagSpec[]} tagSpecs The tag specifications.
+   * @param {GroupSpec} groupSpecs The group specifications.
+   * @returns {[ParsedHedSubstring[], Object<string, Issue[]>]} The parsed HED tags and any issues.
+   */
+  _createParsedTags(tagSpecs, groupSpecs) {
+    // Create tags from specifications
+    const parsedTags = recursiveMap((tagSpec) => this._createParsedTag(tagSpec), tagSpecs)
+
+    // Create groups from the parsed tags
+    const parsedTagsWithGroups = this._createParsedGroups(parsedTags, groupSpecs.children)
+
+    const issues = { syntax: this.syntaxIssues, conversion: this.conversionIssues }
+    return [parsedTagsWithGroups, issues]
+  }
+
+  /**
+   * Create a parsed tag object based on the tag specification.
+   *
+   * @param {TagSpec|ColumnSpliceSpec} tagSpec The tag or column splice specification.
+   * @returns {ParsedHedTag|ParsedHedColumnSplice|null} The parsed HED tag or column splice.
+   */
+  _createParsedTag(tagSpec) {
     if (tagSpec instanceof TagSpec) {
       try {
-        const parsedTag = ParsedHedTagConstructor(
+        return this.ParsedHedTagConstructor(
           tagSpec.tag,
-          hedString,
+          this.hedString,
           tagSpec.bounds,
-          hedSchemas,
+          this.hedSchemas,
           tagSpec.library,
           tagSpec,
         )
-        return parsedTag
       } catch (issueError) {
-        if (issueError instanceof IssueError) {
-          conversionIssues.push(issueError.issue)
-        } else if (issueError instanceof Error) {
-          conversionIssues.push(generateIssue('internalError', { message: issueError.message }))
-        }
+        this._handleIssueError(issueError)
         return null
       }
     } else if (tagSpec instanceof ColumnSpliceSpec) {
       return new ParsedHedColumnSplice(tagSpec.columnName, tagSpec.bounds)
     }
   }
-  const createParsedGroups = (tags, groupSpecs) => {
+
+  /**
+   * Handle any issue encountered during tag parsing.
+   *
+   * @param {Error|IssueError} issueError The error encountered.
+   */
+  _handleIssueError(issueError) {
+    if (issueError instanceof IssueError) {
+      this.conversionIssues.push(issueError.issue)
+    } else if (issueError instanceof Error) {
+      this.conversionIssues.push(generateIssue('internalError', { message: issueError.message }))
+    }
+  }
+
+  /**
+   * Create parsed HED groups from parsed tags and group specifications.
+   *
+   * @param {ParsedHedTag[]} tags The parsed HED tags.
+   * @param {GroupSpec[]} groupSpecs The group specifications.
+   * @returns {ParsedHedGroup[]} The parsed HED groups.
+   */
+  _createParsedGroups(tags, groupSpecs) {
     const tagGroups = []
     let index = 0
+
     for (const tag of tags) {
       if (Array.isArray(tag)) {
         const groupSpec = groupSpecs[index]
         tagGroups.push(
-          new ParsedHedGroup(createParsedGroups(tag, groupSpec.children), hedSchemas, hedString, groupSpec.bounds),
+          new ParsedHedGroup(
+            this._createParsedGroups(tag, groupSpec.children),
+            this.hedSchemas,
+            this.hedString,
+            groupSpec.bounds,
+          ),
         )
         index++
       } else if (tag !== null) {
         tagGroups.push(tag)
       }
     }
-    return tagGroups
-  }
-  const parsedTags = recursiveMap(createParsedTag, tagSpecs)
-  const parsedTagsWithGroups = createParsedGroups(parsedTags, groupSpecs.children)
 
-  const issues = {
-    syntax: syntaxIssues,
-    conversion: conversionIssues,
-  }
-
-  return [parsedTagsWithGroups, issues]
-}
-
-/**
- * Split a HED string.
- *
- * @param {string} hedString The HED string to be split.
- * @param {Schemas} hedSchemas The collection of HED schemas.
- * @returns {[ParsedHedSubstring[], Object<string, Issue[]>]} The parsed HED string data and any issues found.
- */
-export default function splitHedString(hedString, hedSchemas) {
-  const [tagSpecs, groupBounds, tokenizingIssues] = new HedStringTokenizer(hedString).tokenize()
-  if (tokenizingIssues.syntax.length > 0) {
-    return [null, tokenizingIssues]
+    return tagGroups
   }
-  const [parsedTags, parsingIssues] = createParsedTags(hedString, hedSchemas, tagSpecs, groupBounds)
-  mergeParsingIssues(tokenizingIssues, parsingIssues)
-  return [parsedTags, tokenizingIssues]
 }
diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js
index 0e3aca2f..9c46be82 100644
--- a/tests/stringParser.spec.js
+++ b/tests/stringParser.spec.js
@@ -7,7 +7,7 @@ import { Schemas, SchemaSpec, SchemasSpec } from '../common/schema/types'
 import { recursiveMap } from '../utils/array'
 import { parseHedString } from '../parser/main'
 import { ParsedHedTag } from '../parser/parsedHedTag'
-import splitHedString from '../parser/splitHedString'
+import HedStringSplitter from '../parser/splitHedString'
 import { buildSchemas } from '../validator/schema/init'
 import ColumnSplicer from '../parser/columnSplicer'
 import ParsedHedGroup from '../parser/parsedHedGroup'
@@ -21,6 +21,8 @@ describe('HED string parsing', () => {
    */
   const originalMap = (parsedTag) => parsedTag.originalTag
 
+  const splitHedString = (hedString, hedSchemas) => new HedStringSplitter(hedString, hedSchemas).splitHedString()
+
   const hedSchemaFile = 'tests/data/HED8.0.0.xml'
   let hedSchemas
 

From 9def82bfcfde92b0d1640045a757b3e98071f6c5 Mon Sep 17 00:00:00 2001
From: Alexander Jones <happy5214@gmail.com>
Date: Fri, 11 Oct 2024 08:22:02 -0500
Subject: [PATCH 2/4] Rewrite main parser module as class and rename

---
 parser/main.js   | 186 ----------------------------------------
 parser/parser.js | 214 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 214 insertions(+), 186 deletions(-)
 delete mode 100644 parser/main.js
 create mode 100644 parser/parser.js

diff --git a/parser/main.js b/parser/main.js
deleted file mode 100644
index 145c1a30..00000000
--- a/parser/main.js
+++ /dev/null
@@ -1,186 +0,0 @@
-import { mergeParsingIssues } from '../utils/hedData'
-import { generateIssue } from '../common/issues/issues'
-
-import ParsedHedString from './parsedHedString'
-import HedStringSplitter from './splitHedString'
-import { getCharacterCount, stringIsEmpty } from '../utils/string'
-
-const openingGroupCharacter = '('
-const closingGroupCharacter = ')'
-const delimiters = new Set([','])
-
-/**
- * Substitute certain illegal characters and report warnings when found.
- */
-const substituteCharacters = function (hedString) {
-  const issues = []
-  const illegalCharacterMap = { '\0': ['ASCII NUL', ' '], '\t': ['Tab', ' '] }
-  const replaceFunction = function (match, offset) {
-    if (match in illegalCharacterMap) {
-      const [name, replacement] = illegalCharacterMap[match]
-      issues.push(
-        generateIssue('invalidCharacter', {
-          character: name,
-          index: offset,
-          string: hedString,
-        }),
-      )
-      return replacement
-    } else {
-      return match
-    }
-  }
-  const fixedString = hedString.replace(/./g, replaceFunction)
-
-  return [fixedString, issues]
-}
-
-/**
- * Check if group parentheses match. Pushes an issue if they don't match.
- */
-const countTagGroupParentheses = function (hedString) {
-  const issues = []
-  const numberOfOpeningParentheses = getCharacterCount(hedString, openingGroupCharacter)
-  const numberOfClosingParentheses = getCharacterCount(hedString, closingGroupCharacter)
-  if (numberOfOpeningParentheses !== numberOfClosingParentheses) {
-    issues.push(
-      generateIssue('parentheses', {
-        opening: numberOfOpeningParentheses,
-        closing: numberOfClosingParentheses,
-      }),
-    )
-  }
-  return issues
-}
-
-/**
- * Check if a comma is missing after an opening parenthesis.
- */
-const isCommaMissingAfterClosingParenthesis = function (lastNonEmptyCharacter, currentCharacter) {
-  return (
-    lastNonEmptyCharacter === closingGroupCharacter &&
-    !(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter)
-  )
-}
-
-/**
- * Check for delimiter issues in a HED string (e.g. missing commas adjacent to groups, extra commas or tildes).
- */
-const findDelimiterIssuesInHedString = function (hedString) {
-  const issues = []
-  let lastNonEmptyValidCharacter = ''
-  let lastNonEmptyValidIndex = 0
-  let currentTag = ''
-  for (let i = 0; i < hedString.length; i++) {
-    const currentCharacter = hedString.charAt(i)
-    currentTag += currentCharacter
-    if (stringIsEmpty(currentCharacter)) {
-      continue
-    }
-    if (delimiters.has(currentCharacter)) {
-      if (currentTag.trim() === currentCharacter) {
-        issues.push(
-          generateIssue('extraDelimiter', {
-            character: currentCharacter,
-            index: i,
-            string: hedString,
-          }),
-        )
-        currentTag = ''
-        continue
-      }
-      currentTag = ''
-    } else if (currentCharacter === openingGroupCharacter) {
-      if (currentTag.trim() === openingGroupCharacter) {
-        currentTag = ''
-      } else {
-        issues.push(generateIssue('commaMissing', { tag: currentTag }))
-      }
-    } else if (isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) {
-      issues.push(
-        generateIssue('commaMissing', {
-          tag: currentTag.slice(0, -1),
-        }),
-      )
-      break
-    }
-    lastNonEmptyValidCharacter = currentCharacter
-    lastNonEmptyValidIndex = i
-  }
-  if (delimiters.has(lastNonEmptyValidCharacter)) {
-    issues.push(
-      generateIssue('extraDelimiter', {
-        character: lastNonEmptyValidCharacter,
-        index: lastNonEmptyValidIndex,
-        string: hedString,
-      }),
-    )
-  }
-  return issues
-}
-
-/**
- * Validate the full unparsed HED string.
- *
- * @param {string} hedString The unparsed HED string.
- * @returns {Object<string, Issue[]>} String substitution issues and other issues.
- */
-const validateFullUnparsedHedString = function (hedString) {
-  const [fixedHedString, substitutionIssues] = substituteCharacters(hedString)
-  const delimiterIssues = [].concat(
-    countTagGroupParentheses(fixedHedString),
-    findDelimiterIssuesInHedString(fixedHedString),
-  )
-
-  return {
-    substitution: substitutionIssues,
-    delimiter: delimiterIssues,
-  }
-}
-
-/**
- * Parse a full HED string into an object of tag types.
- *
- * @param {string|ParsedHedString} hedString The full HED string to parse.
- * @param {Schemas} hedSchemas The collection of HED schemas.
- * @returns {[ParsedHedString|null, Object<string, Issue[]>]} The parsed HED tag data and an object containing lists of parsing issues.
- */
-export const parseHedString = function (hedString, hedSchemas) {
-  if (hedString instanceof ParsedHedString) {
-    return [hedString, {}]
-  }
-  const fullStringIssues = validateFullUnparsedHedString(hedString)
-  if (fullStringIssues.delimiter.length > 0) {
-    fullStringIssues.syntax = []
-    return [null, fullStringIssues]
-  }
-  const [parsedTags, splitIssues] = new HedStringSplitter(hedString, hedSchemas).splitHedString()
-  const parsingIssues = Object.assign(fullStringIssues, splitIssues)
-  if (parsedTags === null) {
-    return [null, parsingIssues]
-  }
-  const parsedString = new ParsedHedString(hedString, parsedTags)
-  return [parsedString, parsingIssues]
-}
-
-/**
- * Parse a set of HED strings.
- *
- * @param {string[]|ParsedHedString[]} hedStrings A set of HED strings.
- * @param {Schemas} hedSchemas The collection of HED schemas.
- * @returns {[ParsedHedString[], Object<string, Issue[]>]} The parsed HED strings and any issues found.
- */
-export const parseHedStrings = function (hedStrings, hedSchemas) {
-  return hedStrings
-    .map((hedString) => {
-      return parseHedString(hedString, hedSchemas)
-    })
-    .reduce(
-      ([previousStrings, previousIssues], [currentString, currentIssues]) => {
-        previousStrings.push(currentString)
-        mergeParsingIssues(previousIssues, currentIssues)
-        return [previousStrings, previousIssues]
-      },
-      [[], {}],
-    )
-}
diff --git a/parser/parser.js b/parser/parser.js
new file mode 100644
index 00000000..2e9f9617
--- /dev/null
+++ b/parser/parser.js
@@ -0,0 +1,214 @@
+import { mergeParsingIssues } from '../utils/hedData'
+import { generateIssue } from '../common/issues/issues'
+import ParsedHedString from './parsedHedString'
+import HedStringSplitter from './splitHedString'
+import { getCharacterCount, stringIsEmpty } from '../utils/string'
+
+const openingGroupCharacter = '('
+const closingGroupCharacter = ')'
+const delimiters = new Set([','])
+
+/**
+ * A parser for HED strings.
+ */
+class HedStringParser {
+  /**
+   * The HED string being parsed.
+   * @type {string|ParsedHedString}
+   */
+  hedString
+  /**
+   * The collection of HED schemas.
+   * @type {Schemas}
+   */
+  hedSchemas
+
+  /**
+   * Constructor.
+   *
+   * @param {string|ParsedHedString} hedString The HED string to be parsed.
+   * @param {Schemas} hedSchemas The collection of HED schemas.
+   */
+  constructor(hedString, hedSchemas) {
+    this.hedString = hedString
+    this.hedSchemas = hedSchemas
+  }
+
+  /**
+   * Check if the parentheses in a tag group match.
+   *
+   * @returns {Issue[]} Any issues found related to unmatched parentheses.
+   */
+  _countTagGroupParentheses() {
+    const issues = []
+    const numberOfOpeningParentheses = getCharacterCount(this.hedString, openingGroupCharacter)
+    const numberOfClosingParentheses = getCharacterCount(this.hedString, closingGroupCharacter)
+
+    if (numberOfOpeningParentheses !== numberOfClosingParentheses) {
+      issues.push(
+        generateIssue('parentheses', {
+          opening: numberOfOpeningParentheses,
+          closing: numberOfClosingParentheses,
+        }),
+      )
+    }
+
+    return issues
+  }
+
+  /**
+   * Check if a comma is missing after an opening parenthesis.
+   *
+   * @param {string} lastNonEmptyCharacter The last non-empty character.
+   * @param {string} currentCharacter The current character in the HED string.
+   * @returns {boolean} Whether a comma is missing after a closing parenthesis.
+   */
+  _isCommaMissingAfterClosingParenthesis(lastNonEmptyCharacter, currentCharacter) {
+    return (
+      lastNonEmptyCharacter === closingGroupCharacter &&
+      !(delimiters.has(currentCharacter) || currentCharacter === closingGroupCharacter)
+    )
+  }
+
+  /**
+   * Find delimiter-related issues in a HED string.
+   *
+   * @returns {Issue[]} Any issues related to delimiters.
+   */
+  _findDelimiterIssues() {
+    const issues = []
+    let lastNonEmptyValidCharacter = ''
+    let lastNonEmptyValidIndex = 0
+    let currentTag = ''
+
+    for (let i = 0; i < this.hedString.length; i++) {
+      const currentCharacter = this.hedString.charAt(i)
+      currentTag += currentCharacter
+
+      if (stringIsEmpty(currentCharacter)) {
+        continue
+      }
+
+      if (delimiters.has(currentCharacter)) {
+        if (currentTag.trim() === currentCharacter) {
+          issues.push(
+            generateIssue('extraDelimiter', {
+              character: currentCharacter,
+              index: i,
+              string: this.hedString,
+            }),
+          )
+          currentTag = ''
+          continue
+        }
+        currentTag = ''
+      } else if (currentCharacter === openingGroupCharacter) {
+        if (currentTag.trim() !== openingGroupCharacter) {
+          issues.push(generateIssue('commaMissing', { tag: currentTag }))
+        }
+        currentTag = ''
+      } else if (this._isCommaMissingAfterClosingParenthesis(lastNonEmptyValidCharacter, currentCharacter)) {
+        issues.push(
+          generateIssue('commaMissing', {
+            tag: currentTag.slice(0, -1),
+          }),
+        )
+        break
+      }
+
+      lastNonEmptyValidCharacter = currentCharacter
+      lastNonEmptyValidIndex = i
+    }
+
+    if (delimiters.has(lastNonEmptyValidCharacter)) {
+      issues.push(
+        generateIssue('extraDelimiter', {
+          character: lastNonEmptyValidCharacter,
+          index: lastNonEmptyValidIndex,
+          string: this.hedString,
+        }),
+      )
+    }
+
+    return issues
+  }
+
+  /**
+   * Validate the full unparsed HED string.
+   *
+   * @returns {Object<string, Issue[]>} Any issues found during validation.
+   */
+  _validateFullUnparsedHedString() {
+    const delimiterIssues = [].concat(this._countTagGroupParentheses(), this._findDelimiterIssues())
+
+    return { delimiter: delimiterIssues }
+  }
+
+  /**
+   * Parse a full HED string.
+   *
+   * @returns {[ParsedHedString|null, Object<string, Issue[]>]} The parsed HED string and any parsing issues.
+   */
+  parseHedString() {
+    if (this.hedString instanceof ParsedHedString) {
+      return [this.hedString, {}]
+    }
+
+    const fullStringIssues = this._validateFullUnparsedHedString()
+    if (fullStringIssues.delimiter.length > 0) {
+      fullStringIssues.syntax = []
+      return [null, fullStringIssues]
+    }
+
+    const [parsedTags, splitIssues] = new HedStringSplitter(this.hedString, this.hedSchemas).splitHedString()
+    const parsingIssues = Object.assign(fullStringIssues, splitIssues)
+    if (parsedTags === null) {
+      return [null, parsingIssues]
+    }
+
+    const parsedString = new ParsedHedString(this.hedString, parsedTags)
+    return [parsedString, parsingIssues]
+  }
+
+  /**
+   * Parse a list of HED strings.
+   *
+   * @param {string[]|ParsedHedString[]} hedStrings A list of HED strings.
+   * @param {Schemas} hedSchemas The collection of HED schemas.
+   * @returns {[ParsedHedString[], Object<string, Issue[]>]} The parsed HED strings and any issues found.
+   */
+  static parseHedStrings(hedStrings, hedSchemas) {
+    const parsedStrings = []
+    const cumulativeIssues = {}
+
+    for (const hedString of hedStrings) {
+      const [parsedString, currentIssues] = new HedStringParser(hedString, hedSchemas).parseHedString()
+      parsedStrings.push(parsedString)
+      mergeParsingIssues(cumulativeIssues, currentIssues)
+    }
+
+    return [parsedStrings, cumulativeIssues]
+  }
+}
+
+/**
+ * Parse a HED string.
+ *
+ * @param {string|ParsedHedString} hedString A (possibly already parsed) HED string.
+ * @param {Schemas} hedSchemas The collection of HED schemas.
+ * @returns {[ParsedHedString, Object<string, Issue[]>]} The parsed HED string and any issues found.
+ */
+export function parseHedString(hedString, hedSchemas) {
+  return new HedStringParser(hedString, hedSchemas).parseHedString()
+}
+
+/**
+ * Parse a list of HED strings.
+ *
+ * @param {string[]|ParsedHedString[]} hedStrings A list of HED strings.
+ * @param {Schemas} hedSchemas The collection of HED schemas.
+ * @returns {[ParsedHedString[], Object<string, Issue[]>]} The parsed HED strings and any issues found.
+ */
+export function parseHedStrings(hedStrings, hedSchemas) {
+  return HedStringParser.parseHedStrings(hedStrings, hedSchemas)
+}

From 613e64e35a079fcd1c3dba95edebccc97571624b Mon Sep 17 00:00:00 2001
From: Alexander Jones <happy5214@gmail.com>
Date: Fri, 11 Oct 2024 08:22:58 -0500
Subject: [PATCH 3/4] Move invalid character check to tokenizer

This bans all characters in the C0 and C1 control code ranges, using
the unicode-name NPM module to display the Unicode names of any
invalid characters.
---
 esbuild.mjs                               |  2 +-
 package-lock.json                         | 14 ++++++++++
 package.json                              |  6 ++++-
 parser/{splitHedString.js => splitter.js} |  2 +-
 parser/tokenizer.js                       | 12 ++++++++-
 tests/bids.spec.js                        |  2 +-
 tests/event.spec.js                       | 32 ++++++++---------------
 tests/stringParser.spec.js                |  6 ++---
 8 files changed, 47 insertions(+), 29 deletions(-)
 rename parser/{splitHedString.js => splitter.js} (99%)

diff --git a/esbuild.mjs b/esbuild.mjs
index ec1316eb..785a6a11 100644
--- a/esbuild.mjs
+++ b/esbuild.mjs
@@ -7,7 +7,7 @@ await esbuild.build({
   entryPoints: [path.join(process.cwd(), 'index.js')],
   loader: { '.xml': 'text' },
   outdir: path.join(process.cwd(), 'dist', 'commonjs'),
-  target: 'node12',
+  target: 'node18',
   bundle: true,
   sourcemap: true,
   platform: 'node',
diff --git a/package-lock.json b/package-lock.json
index 03dbdf4e..36be2288 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -19,6 +19,7 @@
         "pluralize": "^8.0.0",
         "semver": "^7.6.0",
         "string_decoder": "^1.3.0",
+        "unicode-name": "^1.0.2",
         "xml2js": "^0.6.2"
       },
       "devDependencies": {
@@ -5191,6 +5192,14 @@
         "node": ">=14.17"
       }
     },
+    "node_modules/unicode-name": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/unicode-name/-/unicode-name-1.0.2.tgz",
+      "integrity": "sha512-PETEgU8TKsHoGZXP/3eWRU/4xnXJKwAIm+H7b0s/6CEP6o+YK4tWbwBXPLKe0U5+njWEAo2snT5+Mvoau6BI8A==",
+      "engines": {
+        "node": ">=18.20"
+      }
+    },
     "node_modules/update-browserslist-db": {
       "version": "1.0.14",
       "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.14.tgz",
@@ -9108,6 +9117,11 @@
       "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==",
       "dev": true
     },
+    "unicode-name": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/unicode-name/-/unicode-name-1.0.2.tgz",
+      "integrity": "sha512-PETEgU8TKsHoGZXP/3eWRU/4xnXJKwAIm+H7b0s/6CEP6o+YK4tWbwBXPLKe0U5+njWEAo2snT5+Mvoau6BI8A=="
+    },
     "update-browserslist-db": {
       "version": "1.0.14",
       "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.14.tgz",
diff --git a/package.json b/package.json
index 79b45ad6..801cf414 100644
--- a/package.json
+++ b/package.json
@@ -45,6 +45,7 @@
     "pluralize": "^8.0.0",
     "semver": "^7.6.0",
     "string_decoder": "^1.3.0",
+    "unicode-name": "^1.0.2",
     "xml2js": "^0.6.2"
   },
   "devDependencies": {
@@ -74,7 +75,10 @@
     "transform": {
       "\\.js$": "esbuild-runner/jest",
       "\\.xml$": "<rootDir>/fileTransformer.js"
-    }
+    },
+    "transformIgnorePatterns": [
+      "node_modules/(?!unicode-name)"
+    ]
   },
   "browser": {
     "fs": false
diff --git a/parser/splitHedString.js b/parser/splitter.js
similarity index 99%
rename from parser/splitHedString.js
rename to parser/splitter.js
index c1c939f0..31038244 100644
--- a/parser/splitHedString.js
+++ b/parser/splitter.js
@@ -47,7 +47,7 @@ export default class HedStringSplitter {
   ParsedHedTagConstructor
 
   /**
-   * Constructor for the HED string parser.
+   * Constructor.
    *
    * @param {string} hedString The HED string to be split and parsed.
    * @param {Schemas} hedSchemas The collection of HED schemas.
diff --git a/parser/tokenizer.js b/parser/tokenizer.js
index 89ecaab3..b308a9d9 100644
--- a/parser/tokenizer.js
+++ b/parser/tokenizer.js
@@ -1,3 +1,5 @@
+import { unicodeName } from 'unicode-name'
+
 import { generateIssue } from '../common/issues/issues'
 import { stringIsEmpty } from '../utils/string'
 import { replaceTagNameWithPound } from '../utils/hedStrings'
@@ -12,6 +14,14 @@ const slashCharacter = '/'
 
 const invalidCharacters = new Set(['[', ']', '~', '"'])
 const invalidCharactersOutsideOfValues = new Set([':'])
+// C0 control codes
+for (let i = 0x00; i <= 0x1f; i++) {
+  invalidCharacters.add(String.fromCodePoint(i))
+}
+// DEL and C1 control codes
+for (let i = 0x7f; i <= 0x9f; i++) {
+  invalidCharacters.add(String.fromCodePoint(i))
+}
 
 /**
  * A specification for a tokenized substring.
@@ -366,7 +376,7 @@ export class HedStringTokenizer {
   _pushInvalidCharacterIssue(character, index) {
     this.syntaxIssues.push(
       generateIssue('invalidCharacter', {
-        character: character,
+        character: unicodeName(character),
         index: index,
         string: this.hedString,
       }),
diff --git a/tests/bids.spec.js b/tests/bids.spec.js
index af427877..c210f90f 100644
--- a/tests/bids.spec.js
+++ b/tests/bids.spec.js
@@ -668,7 +668,7 @@ describe('BIDS datasets', () => {
         syntax: [
           BidsHedIssue.fromHedIssue(
             generateIssue('invalidCharacter', {
-              character: '{',
+              character: 'LEFT CURLY BRACKET',
               index: 9,
               string: '(Def/Acc/{response_time})',
             }),
diff --git a/tests/event.spec.js b/tests/event.spec.js
index 87ef696b..1a364e62 100644
--- a/tests/event.spec.js
+++ b/tests/event.spec.js
@@ -184,11 +184,13 @@ describe('HED string and event validation', () => {
           closingBracket: '/Attribute/Object side/Left,/Participant/Effect]/Body part/Arm',
           tilde: '/Attribute/Object side/Left,/Participant/Effect~/Body part/Arm',
           doubleQuote: '/Attribute/Object side/Left,/Participant/Effect"/Body part/Arm',
+          null: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\0',
+          tab: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\t',
         }
         const expectedIssues = {
           openingBrace: [
             generateIssue('invalidCharacter', {
-              character: '{',
+              character: 'LEFT CURLY BRACKET',
               index: 47,
               string: testStrings.openingBrace,
             }),
@@ -201,54 +203,42 @@ describe('HED string and event validation', () => {
           ],
           openingBracket: [
             generateIssue('invalidCharacter', {
-              character: '[',
+              character: 'LEFT SQUARE BRACKET',
               index: 47,
               string: testStrings.openingBracket,
             }),
           ],
           closingBracket: [
             generateIssue('invalidCharacter', {
-              character: ']',
+              character: 'RIGHT SQUARE BRACKET',
               index: 47,
               string: testStrings.closingBracket,
             }),
           ],
           tilde: [
             generateIssue('invalidCharacter', {
-              character: '~',
+              character: 'TILDE',
               index: 47,
               string: testStrings.tilde,
             }),
           ],
           doubleQuote: [
             generateIssue('invalidCharacter', {
-              character: '"',
+              character: 'QUOTATION MARK',
               index: 47,
               string: testStrings.doubleQuote,
             }),
           ],
-        }
-        // No-op function as this check is done during the parsing stage.
-        // eslint-disable-next-line no-unused-vars
-        validatorSyntactic(testStrings, expectedIssues, (validator) => {})
-      })
-
-      it('should substitute and warn for certain illegal characters', () => {
-        const testStrings = {
-          nul: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\0',
-          tab: '/Attribute/Object side/Left,/Participant/Effect/Body part/Arm\t',
-        }
-        const expectedIssues = {
-          nul: [
+          null: [
             generateIssue('invalidCharacter', {
-              character: 'ASCII NUL',
+              character: 'NULL',
               index: 61,
-              string: testStrings.nul,
+              string: testStrings.null,
             }),
           ],
           tab: [
             generateIssue('invalidCharacter', {
-              character: 'Tab',
+              character: 'CHARACTER TABULATION',
               index: 61,
               string: testStrings.tab,
             }),
diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js
index 9c46be82..dcb83eec 100644
--- a/tests/stringParser.spec.js
+++ b/tests/stringParser.spec.js
@@ -84,7 +84,7 @@ describe('HED string parsing', () => {
           conversion: [],
           syntax: [
             generateIssue('invalidCharacter', {
-              character: '[',
+              character: 'LEFT SQUARE BRACKET',
               index: 56,
               string: testStrings.openingSquare,
             }),
@@ -94,7 +94,7 @@ describe('HED string parsing', () => {
           conversion: [],
           syntax: [
             generateIssue('invalidCharacter', {
-              character: ']',
+              character: 'RIGHT SQUARE BRACKET',
               index: 56,
               string: testStrings.closingSquare,
             }),
@@ -104,7 +104,7 @@ describe('HED string parsing', () => {
           conversion: [],
           syntax: [
             generateIssue('invalidCharacter', {
-              character: '~',
+              character: 'TILDE',
               index: 56,
               string: testStrings.tilde,
             }),

From cae39a34780a1c83a65bba8eba325b88dac0c3fd Mon Sep 17 00:00:00 2001
From: Alexander Jones <happy5214@gmail.com>
Date: Fri, 11 Oct 2024 08:28:32 -0500
Subject: [PATCH 4/4] Fix imports broken by previous file moves

---
 bids/types/json.js                    | 2 +-
 bids/validator/bidsHedTsvValidator.js | 2 +-
 converter/converter.js                | 2 +-
 parser/columnSplicer.js               | 2 +-
 parser/parser.js                      | 2 +-
 tests/bids.spec.js                    | 2 +-
 tests/event.spec.js                   | 2 +-
 tests/stringParser.spec.js            | 4 ++--
 validator/dataset.js                  | 2 +-
 validator/event/init.js               | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/bids/types/json.js b/bids/types/json.js
index 36da7826..814b531c 100644
--- a/bids/types/json.js
+++ b/bids/types/json.js
@@ -1,7 +1,7 @@
 import isPlainObject from 'lodash/isPlainObject'
 
 import { sidecarValueHasHed } from '../utils'
-import { parseHedString } from '../../parser/main'
+import { parseHedString } from '../../parser/parser'
 import ParsedHedString from '../../parser/parsedHedString'
 import { BidsFile } from './basic'
 import BidsHedSidecarValidator from '../validator/bidsHedSidecarValidator'
diff --git a/bids/validator/bidsHedTsvValidator.js b/bids/validator/bidsHedTsvValidator.js
index 6ecf52d2..231ea14b 100644
--- a/bids/validator/bidsHedTsvValidator.js
+++ b/bids/validator/bidsHedTsvValidator.js
@@ -1,7 +1,7 @@
 import BidsHedSidecarValidator from './bidsHedSidecarValidator'
 import { BidsHedIssue, BidsIssue } from '../types/issues'
 import { BidsTsvEvent, BidsTsvRow } from '../types/tsv'
-import { parseHedString } from '../../parser/main'
+import { parseHedString } from '../../parser/parser'
 import ColumnSplicer from '../../parser/columnSplicer'
 import ParsedHedString from '../../parser/parsedHedString'
 import { generateIssue } from '../../common/issues/issues'
diff --git a/converter/converter.js b/converter/converter.js
index 6c7b1f59..6c21c56f 100644
--- a/converter/converter.js
+++ b/converter/converter.js
@@ -1,4 +1,4 @@
-import { parseHedString } from '../parser/main'
+import { parseHedString } from '../parser/parser'
 
 /**
  * Convert a HED string.
diff --git a/parser/columnSplicer.js b/parser/columnSplicer.js
index bc27e02e..cacb7b64 100644
--- a/parser/columnSplicer.js
+++ b/parser/columnSplicer.js
@@ -2,7 +2,7 @@ import ParsedHedString from './parsedHedString'
 import ParsedHedColumnSplice from './parsedHedColumnSplice'
 import ParsedHedGroup from './parsedHedGroup'
 import { generateIssue } from '../common/issues/issues'
-import { parseHedString } from './main'
+import { parseHedString } from './parser'
 
 export class ColumnSplicer {
   /**
diff --git a/parser/parser.js b/parser/parser.js
index 2e9f9617..a86e05f3 100644
--- a/parser/parser.js
+++ b/parser/parser.js
@@ -1,7 +1,7 @@
 import { mergeParsingIssues } from '../utils/hedData'
 import { generateIssue } from '../common/issues/issues'
 import ParsedHedString from './parsedHedString'
-import HedStringSplitter from './splitHedString'
+import HedStringSplitter from './splitter'
 import { getCharacterCount, stringIsEmpty } from '../utils/string'
 
 const openingGroupCharacter = '('
diff --git a/tests/bids.spec.js b/tests/bids.spec.js
index c210f90f..f62b3de9 100644
--- a/tests/bids.spec.js
+++ b/tests/bids.spec.js
@@ -8,7 +8,7 @@ import { SchemaSpec, SchemasSpec } from '../common/schema/types'
 import { buildBidsSchemas, parseSchemasSpec } from '../bids/schema'
 import { BidsDataset, BidsHedIssue, BidsIssue, validateBidsDataset } from '../bids'
 import { bidsDatasetDescriptions, bidsSidecars, bidsTsvFiles } from './bids.spec.data'
-import { parseHedString } from '../parser/main'
+import { parseHedString } from '../parser/parser'
 import { BidsHedTsvParser } from '../bids/validator/bidsHedTsvValidator'
 
 describe('BIDS datasets', () => {
diff --git a/tests/event.spec.js b/tests/event.spec.js
index 1a364e62..dfc22f2e 100644
--- a/tests/event.spec.js
+++ b/tests/event.spec.js
@@ -4,7 +4,7 @@ import { beforeAll, describe, it } from '@jest/globals'
 
 import * as hed from '../validator/event'
 import { buildSchemas } from '../validator/schema/init'
-import { parseHedString } from '../parser/main'
+import { parseHedString } from '../parser/parser'
 import { ParsedHedTag } from '../parser/parsedHedTag'
 import { HedValidator, Hed2Validator, Hed3Validator } from '../validator/event'
 import { generateIssue } from '../common/issues/issues'
diff --git a/tests/stringParser.spec.js b/tests/stringParser.spec.js
index dcb83eec..c9335456 100644
--- a/tests/stringParser.spec.js
+++ b/tests/stringParser.spec.js
@@ -5,9 +5,9 @@ import { beforeAll, describe, it } from '@jest/globals'
 import { generateIssue } from '../common/issues/issues'
 import { Schemas, SchemaSpec, SchemasSpec } from '../common/schema/types'
 import { recursiveMap } from '../utils/array'
-import { parseHedString } from '../parser/main'
+import { parseHedString } from '../parser/parser'
 import { ParsedHedTag } from '../parser/parsedHedTag'
-import HedStringSplitter from '../parser/splitHedString'
+import HedStringSplitter from '../parser/splitter'
 import { buildSchemas } from '../validator/schema/init'
 import ColumnSplicer from '../parser/columnSplicer'
 import ParsedHedGroup from '../parser/parsedHedGroup'
diff --git a/validator/dataset.js b/validator/dataset.js
index 29dcd90e..831b1209 100644
--- a/validator/dataset.js
+++ b/validator/dataset.js
@@ -2,7 +2,7 @@ import zip from 'lodash/zip'
 
 import { generateIssue, Issue } from '../common/issues/issues'
 import { validateHedEventWithDefinitions } from './event'
-import { parseHedStrings } from '../parser/main'
+import { parseHedStrings } from '../parser/parser'
 import { filterNonEqualDuplicates } from '../utils/map'
 
 /**
diff --git a/validator/event/init.js b/validator/event/init.js
index b8ed69eb..6c27f44d 100644
--- a/validator/event/init.js
+++ b/validator/event/init.js
@@ -1,4 +1,4 @@
-import { parseHedString } from '../../parser/main'
+import { parseHedString } from '../../parser/parser'
 import ParsedHedString from '../../parser/parsedHedString'
 import { Schemas } from '../../common/schema/types'