From 2a2dd9171cf565df32125b0e70460e7bc505dcb2 Mon Sep 17 00:00:00 2001 From: Nikolay Kostyurin Date: Tue, 26 Dec 2023 11:02:20 +0200 Subject: [PATCH] refactor: parser --- benchmark/{test_syntax.js => test.js} | 0 packages/bbob-parser/src/Token.ts | 25 +++++---- packages/bbob-parser/src/parse.ts | 81 ++++++++++++++++++++------- packages/bbob-parser/src/utils.ts | 78 +++++++++----------------- 4 files changed, 102 insertions(+), 82 deletions(-) rename benchmark/{test_syntax.js => test.js} (100%) diff --git a/benchmark/test_syntax.js b/benchmark/test.js similarity index 100% rename from benchmark/test_syntax.js rename to benchmark/test.js diff --git a/packages/bbob-parser/src/Token.ts b/packages/bbob-parser/src/Token.ts index 4513d267..fb77d050 100644 --- a/packages/bbob-parser/src/Token.ts +++ b/packages/bbob-parser/src/Token.ts @@ -6,10 +6,10 @@ import { // type, value, line, row, -const TOKEN_TYPE_ID = 'type'; // 0; -const TOKEN_VALUE_ID = 'value'; // 1; -const TOKEN_COLUMN_ID = 'row'; // 2; -const TOKEN_LINE_ID = 'line'; // 3; +const TOKEN_TYPE_ID = 't'; // 0; +const TOKEN_VALUE_ID = 'v'; // 1; +const TOKEN_COLUMN_ID = 'r'; // 2; +const TOKEN_LINE_ID = 'l'; // 3; const TOKEN_TYPE_WORD = 1; // 'word'; const TOKEN_TYPE_TAG = 2; // 'tag'; @@ -60,7 +60,6 @@ const isAttrNameToken = (token: Token) => { return false; }; - const isAttrValueToken = (token: Token) => { if (token && typeof token[TOKEN_TYPE_ID] !== 'undefined') { return token[TOKEN_TYPE_ID] === TOKEN_TYPE_ATTR_VALUE; @@ -75,7 +74,7 @@ const getTagName = (token: Token) => { return isTagEnd(token) ? value.slice(1) : value; }; -const convertTagToText = (token: Token) => { +const tokenToText = (token: Token) => { let text = OPEN_BRAKET; text += getTokenValue(token); @@ -89,10 +88,10 @@ const convertTagToText = (token: Token) => { * @class Token */ class Token { - private type: number - private value: string - private line: number - private row: number + private t: number // type + private v: string // value + private l: number // line + private r: number // row constructor(type?: number, value?: TokenValue, row: number = 0, col: number = 0) { this[TOKEN_LINE_ID] = row; @@ -101,6 +100,10 @@ class Token { this[TOKEN_VALUE_ID] = String(value); } + get type() { + return this[TOKEN_TYPE_ID] + } + isEmpty() { // eslint-disable-next-line no-restricted-globals return isNaN(this[TOKEN_TYPE_ID]); @@ -147,7 +150,7 @@ class Token { } toString() { - return convertTagToText(this); + return tokenToText(this); } } diff --git a/packages/bbob-parser/src/parse.ts b/packages/bbob-parser/src/parse.ts index ce7b601f..5270d9b2 100644 --- a/packages/bbob-parser/src/parse.ts +++ b/packages/bbob-parser/src/parse.ts @@ -6,7 +6,6 @@ import { } from '@bbob/plugin-helper'; import { createLexer } from './lexer'; -import { createList } from './utils'; import type { NodeContent, TagNodeTree } from '@bbob/plugin-helper' import type { LexerTokenizer, LexerOptions } from './lexer'; @@ -28,6 +27,48 @@ export interface ParseOptions { onError?: (error: ParseError) => void } +class NodeList { + private m: Map + private c: number + + constructor() { + this.m = new Map() + this.c = 0 + } + + last() { + const node = this.m.get(this.c) + + if (node) { + return node + } + + return null + } + + flush() { + if (this.c > 0) { + const item = this.m.get(this.c) + this.m.delete(this.c) + this.c = this.c - 1 + return item + } + + return false + } + + push(value: Value) { + this.c = this.c + 1; + this.m.set(this.c, value) + } + + toArray() { + return [...this.m.values()] + } +} + +const createList = () => new NodeList(); + function parse(input: string, opts: ParseOptions = {}) { const options = opts; const openTag = options.openTag || OPEN_BRAKET; @@ -103,8 +144,8 @@ function parse(input: string, opts: ParseOptions = {}) { * @private */ function flushTagNodes() { - if (tagNodes.flushLast()) { - tagNodesAttrName.flushLast(); + if (tagNodes.flush()) { + tagNodesAttrName.flush(); } } @@ -112,7 +153,7 @@ function parse(input: string, opts: ParseOptions = {}) { * @private */ function getNodes() { - const lastNestedNode = nestedNodes.getLast(); + const lastNestedNode = nestedNodes.last(); if (lastNestedNode && isTagNode(lastNestedNode)) { return lastNestedNode.content; @@ -184,7 +225,7 @@ function parse(input: string, opts: ParseOptions = {}) { function handleTagEnd(token: Token) { flushTagNodes(); - const lastNestedNode = nestedNodes.flushLast(); + const lastNestedNode = nestedNodes.flush(); if (lastNestedNode) { const nodes = getNodes() @@ -226,42 +267,42 @@ function parse(input: string, opts: ParseOptions = {}) { /** * @type {TagNode} */ - const lastTagNode = tagNodes.getLast(); + const activeTagNode = tagNodes.last(); const tokenValue = token.getValue(); const isNested = isTagNested(token.toString()); const nodes = getNodes() - if (lastTagNode !== null) { + if (activeTagNode !== null) { if (token.isAttrName()) { tagNodesAttrName.push(tokenValue); - const last = tagNodesAttrName.getLast() + const attrName = tagNodesAttrName.last() - if (last) { - lastTagNode.attr(last, ''); + if (attrName) { + activeTagNode.attr(attrName, ''); } } else if (token.isAttrValue()) { - const attrName = tagNodesAttrName.getLast(); + const attrName = tagNodesAttrName.last(); if (attrName) { - lastTagNode.attr(attrName, tokenValue); - tagNodesAttrName.flushLast(); + activeTagNode.attr(attrName, tokenValue); + tagNodesAttrName.flush(); } else { - lastTagNode.attr(tokenValue, tokenValue); + activeTagNode.attr(tokenValue, tokenValue); } } else if (token.isText()) { if (isNested) { - lastTagNode.append(tokenValue); + activeTagNode.append(tokenValue); } else { appendNodes(nodes, tokenValue); } } else if (token.isTag()) { - // if tag is not allowed, just past it as is + // if tag is not allowed, just pass it as is appendNodes(nodes, token.toString()); } } else if (token.isText()) { appendNodes(nodes, tokenValue); } else if (token.isTag()) { - // if tag is not allowed, just past it as is + // if tag is not allowed, just pass it as is appendNodes(nodes, token.toString()); } } @@ -278,7 +319,9 @@ function parse(input: string, opts: ParseOptions = {}) { } } - tokenizer = (opts.createTokenizer ? opts.createTokenizer : createLexer)(input, { + const lexer = opts.createTokenizer ? opts.createTokenizer : createLexer + + tokenizer = lexer(input, { onToken, openTag, closeTag, @@ -293,7 +336,7 @@ function parse(input: string, opts: ParseOptions = {}) { // handles situations where we open tag, but forgot close them // for ex [q]test[/q][u]some[/u][q]some [u]some[/u] // forgot to close [/q] // so we need to flush nested content to nodes array - const lastNestedNode = nestedNodes.flushLast(); + const lastNestedNode = nestedNodes.flush(); if (lastNestedNode !== null && lastNestedNode && isTagNode(lastNestedNode) && isTagNested(lastNestedNode.tag)) { appendNodeAsString(getNodes(), lastNestedNode, false); } diff --git a/packages/bbob-parser/src/utils.ts b/packages/bbob-parser/src/utils.ts index b4f822af..4df33621 100644 --- a/packages/bbob-parser/src/utils.ts +++ b/packages/bbob-parser/src/utils.ts @@ -8,86 +8,86 @@ export type CharGrabberOptions = { } export class CharGrabber { - private source: string; - private cursor: { len: number; pos: number }; - private options: CharGrabberOptions; + private s: string; + private c: { len: number; pos: number }; + private o: CharGrabberOptions; constructor(source: string, options: CharGrabberOptions = {}) { - this.source = source - this.cursor = { + this.s = source + this.c = { pos: 0, len: source.length, }; - this.options = options + this.o = options } skip(num = 1, silent?: boolean) { - this.cursor.pos += num; + this.c.pos += num; - if (this.options && this.options.onSkip && !silent) { - this.options.onSkip(); + if (this.o && this.o.onSkip && !silent) { + this.o.onSkip(); } } hasNext() { - return this.cursor.len > this.cursor.pos + return this.c.len > this.c.pos } getCurr() { - return this.source[this.cursor.pos] + return this.s[this.c.pos] } getRest() { - return this.source.substring(this.cursor.pos) + return this.s.substring(this.c.pos) } getNext() { - const nextPos = this.cursor.pos + 1; + const nextPos = this.c.pos + 1; - return nextPos <= (this.source.length - 1) ? this.source[nextPos] : null; + return nextPos <= (this.s.length - 1) ? this.s[nextPos] : null; } getPrev() { - const prevPos = this.cursor.pos - 1; + const prevPos = this.c.pos - 1; - return typeof this.source[prevPos] !== 'undefined' ? this.source[prevPos] : null; + return typeof this.s[prevPos] !== 'undefined' ? this.s[prevPos] : null; } isLast() { - return this.cursor.pos === this.cursor.len + return this.c.pos === this.c.len } includes(val: string) { - return this.source.indexOf(val, this.cursor.pos) >= 0 + return this.s.indexOf(val, this.c.pos) >= 0 } - grabWhile(cond: (curr: string) => boolean, silent?: boolean) { + grabWhile(condition: (curr: string) => boolean, silent?: boolean) { let start = 0; if (this.hasNext()) { - start = this.cursor.pos; + start = this.c.pos; - while (this.hasNext() && cond(this.getCurr())) { + while (this.hasNext() && condition(this.getCurr())) { this.skip(1, silent); } } - return this.source.substring(start, this.cursor.pos); + return this.s.substring(start, this.c.pos); } grabN(num: number = 0) { - return this.source.substring(this.cursor.pos, this.cursor.pos + num) + return this.s.substring(this.c.pos, this.c.pos + num) } /** * Grabs rest of string until it find a char */ substrUntilChar(char: string) { - const { pos } = this.cursor; - const idx = this.source.indexOf(char, pos); + const { pos } = this.c; + const idx = this.s.indexOf(char, pos); - return idx >= 0 ? this.source.substring(pos, idx) : ''; + return idx >= 0 ? this.s.substring(pos, idx) : ''; } } @@ -119,29 +119,3 @@ export const trimChar = (str: string, charToRemove: string) => { * Unquotes \" to " */ export const unquote = (str: string) => str.replace(BACKSLASH + QUOTEMARK, QUOTEMARK); - -export class NodeList { - constructor(private nodes: Value[] = []) { - } - - getLast() { - return ( - Array.isArray(this.nodes) && this.nodes.length > 0 && typeof this.nodes[this.nodes.length - 1] !== 'undefined' - ? this.nodes[this.nodes.length - 1] - : null) - } - - flushLast() { - return (this.nodes.length ? this.nodes.pop() : false) - } - - push(value: Value) { - return this.nodes.push(value) - } - - toArray() { - return this.nodes - } -} - -export const createList = (values: Type[] = []) => new NodeList(values);