Skip to content

Commit

Permalink
Optimizing code and converting to MurmurHash
Browse files Browse the repository at this point in the history
Signed-off-by: Mike Lischke <mike@lischke-online.de>
  • Loading branch information
mike-lischke committed Feb 12, 2024
1 parent 2301cbb commit 7acb938
Show file tree
Hide file tree
Showing 17 changed files with 535 additions and 341 deletions.
4 changes: 4 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
"tests/benchmarks/run-benchmarks.ts",
],
"sourceMaps": true,
"resolveSourceMapLocations": [
"${workspaceFolder}/**",
"!**/node_modules/**"
],
},
{
"type": "node",
Expand Down
7 changes: 2 additions & 5 deletions build/generate-test-parsers.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@

printf "\x1b[1m\x1b[34mGenerating test parsers...\x1b[0m\n\n"

# Temporarily copy the ANTLR4 jar to the antlr4ng-cli directory, to allow using a newer version, not yet published to npm.
cp cli/antlr4-4.13.2-SNAPSHOT-complete.jar node_modules/antlr4ng-cli/antlr4-4.13.2-SNAPSHOT-complete.jar

antlr4ng -Dlanguage=TypeScript -visitor -Xexact-output-dir -o ./tests/generated ./tests/fixtures/grammars/*.g4
antlr4ng -Dlanguage=TypeScript -o tests/benchmarks/generated -visitor -listener -Xexact-output-dir tests/benchmarks/MySQLLexer.g4 tests/benchmarks/MySQLParser.g4
java -jar cli/antlr4-4.13.2-SNAPSHOT-complete.jar -Dlanguage=TypeScript -visitor -Xexact-output-dir -o ./tests/generated ./tests/fixtures/grammars/*.g4
java -jar cli/antlr4-4.13.2-SNAPSHOT-complete.jar -Dlanguage=TypeScript -o tests/benchmarks/generated -visitor -listener -Xexact-output-dir tests/benchmarks/MySQLLexer.g4 tests/benchmarks/MySQLParser.g4

printf "done\n\n"
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
"time-lexer-speed": "node --no-warnings --experimental-vm-modules --loader ts-node/esm tests/api/perf/TimeLexerSpeed.ts",
"generate-test-parsers": "./build/generate-test-parsers.sh",
"generate-runtime-tests": "antlr-tgen --config tests/fixtures/config.json",
"build-bundle": "esbuild ./src/index.js --main-fields=module,main --bundle --target=esnext",
"build-bundle": "esbuild ./src/index.js --main-fields=module,main --bundle --target=esnext --sourcemap=external",
"build-mjs": "npm run build-bundle -- --outfile=dist/index.mjs --format=esm",
"build-mjs-minified": "npm run build-mjs -- --minify",
"build-cjs": "npm run build-bundle -- --outfile=dist/index.cjs --format=cjs",
Expand Down
5 changes: 3 additions & 2 deletions src/BufferedTokenStream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,9 @@ export class BufferedTokenStream implements TokenStream {
}

case 1: {
if (args[0] instanceof Interval) {
const interval = args[0];
const o = args[0] as Object;
if ("start" in o) {
const interval = o as Interval;
const start = interval.start;
let stop = interval.stop;
if (start < 0 || stop < 0) {
Expand Down
6 changes: 3 additions & 3 deletions src/Token.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ export interface Token {
column: number;

/**
* Return the channel this token. Each token can arrive at the parser
* on a different channel, but the parser only "tunes" to a single channel.
* The parser ignores everything not on DEFAULT_CHANNEL.
* Return the channel of this token. Each token can arrive at the parser
* on a different channel, but the parser only "tunes" to a single channel.
* The parser ignores everything not on DEFAULT_CHANNEL.
*/
channel: number;

Expand Down
1 change: 0 additions & 1 deletion src/atn/ATN.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ export class ATN {
}

atnState.nextTokenWithinRule = this.nextTokens(atnState, null);
atnState.nextTokenWithinRule.setReadonly(true);

return atnState.nextTokenWithinRule;

Expand Down
80 changes: 45 additions & 35 deletions src/atn/ATNConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
/* eslint-disable jsdoc/require-param, jsdoc/require-returns */

import { SemanticContext } from "./SemanticContext.js";
import { HashCode } from "../misc/HashCode.js";
import { ATNState } from "./ATNState.js";
import { PredictionContext } from "./PredictionContext.js";
import { Recognizer } from "../Recognizer.js";
import { ATNSimulator } from "./ATNSimulator.js";
import { MurmurHash } from "../utils/MurmurHash.js";

export interface IATNConfigParameters {
state?: ATNState | null,
Expand Down Expand Up @@ -81,13 +81,6 @@ export class ATNConfig {
/** What alt (or lexer rule) is predicted by this configuration */
public readonly alt: number;

/**
* The stack of invoking states leading to the rule/states associated
* with this config. We track only those contexts pushed during
* execution of the ATN simulator.
*/
public context: PredictionContext | null = null;

/**
* We cannot execute predicates dependent upon local context unless
* we know for sure we are in the correct context. Because there is
Expand All @@ -99,12 +92,15 @@ export class ATNConfig {
* depth > 0. Note that it may not be totally accurate depth since I
* don't ever decrement. TODO: make it a boolean then
*/
public reachesIntoOuterContext: number;
public reachesIntoOuterContext: number; // Not used in hash code.

public precedenceFilterSuppressed = false;
public precedenceFilterSuppressed = false; // Not used in hash code.

public readonly semanticContext: SemanticContext;

#context: PredictionContext | null = null;
#cachedHashCode: number | undefined;

/**
* @param {object} params A tuple: (ATN state, predicted alt, syntactic, semantic context).
* The syntactic context is a graph-structured stack node whose
Expand All @@ -114,7 +110,6 @@ export class ATNConfig {
* an ATN state
*/
public constructor(params: IATNConfigParameters, config: ATNConfig | null) {
//this.checkContext(params, config);
const checkedParams = checkParams(params);
const checkedConfig = checkConfig(config);

Expand All @@ -131,52 +126,67 @@ export class ATNConfig {
}

public hashCode(): number {
const hash = new HashCode();
this.updateHashCode(hash);
if (this.#cachedHashCode === undefined) {
let hashCode = MurmurHash.initialize(7);
hashCode = MurmurHash.update(hashCode, this.state.stateNumber);
hashCode = MurmurHash.update(hashCode, this.alt);
hashCode = MurmurHash.update(hashCode, this.context);
hashCode = MurmurHash.update(hashCode, this.semanticContext);
hashCode = MurmurHash.finish(hashCode, 4);
this.#cachedHashCode = hashCode;
}

return hash.finish();
return this.#cachedHashCode;
}

public updateHashCode(hash: HashCode): void {
hash.update(this.state.stateNumber, this.alt, this.context, this.semanticContext);
/**
* The stack of invoking states leading to the rule/states associated
* with this config. We track only those contexts pushed during
* execution of the ATN simulator.
*/
public get context(): PredictionContext | null {
return this.#context;
}

public set context(context: PredictionContext | null) {
this.#context = context;
this.#cachedHashCode = undefined;
}

/**
* An ATN configuration is equal to another if both have
* the same state, they predict the same alternative, and
* syntactic/semantic contexts are the same
*/
public equals(other: unknown): boolean {
public equals(other: ATNConfig): boolean {
if (this === other) {
return true;
} else if (!(other instanceof ATNConfig)) {
return false;
} else {
return (this.state.stateNumber === other.state.stateNumber) &&
(this.alt === other.alt) &&
(this.context === null ? other.context === null : this.context.equals(other.context)) &&
this.semanticContext.equals(other.semanticContext) &&
this.precedenceFilterSuppressed === other.precedenceFilterSuppressed;
}

return (this.state.stateNumber === other.state.stateNumber) &&
(this.alt === other.alt) &&
(this.context === null ? other.context === null : this.context.equals(other.context)) &&
this.semanticContext.equals(other.semanticContext) &&
this.precedenceFilterSuppressed === other.precedenceFilterSuppressed;
}

public hashCodeForConfigSet(): number {
const hash = new HashCode();
hash.update(this.state.stateNumber, this.alt, this.semanticContext);
let hashCode = 7;
hashCode = 31 * hashCode + this.state.stateNumber;
hashCode = 31 * hashCode + this.alt;
hashCode = 31 * hashCode + this.semanticContext.hashCode();

return hash.finish();
return hashCode;
}

public equalsForConfigSet(other: unknown): boolean {
public equalsForConfigSet(other: ATNConfig): boolean {
if (this === other) {
return true;
} else if (!(other instanceof ATNConfig)) {
return false;
} else {
return this.state.stateNumber === other.state.stateNumber &&
this.alt === other.alt &&
this.semanticContext.equals(other.semanticContext);
}

return this.state.stateNumber === other.state.stateNumber &&
this.alt === other.alt &&
this.semanticContext.equals(other.semanticContext);
}

public toString(_recog?: Recognizer<ATNSimulator> | null, showAlt = true): string {
Expand Down
78 changes: 44 additions & 34 deletions src/atn/ATNConfigSet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import { ATN } from "./ATN.js";
import { SemanticContext } from "./SemanticContext.js";
import { merge } from "./PredictionContextUtils.js";
import { HashSet } from "../misc/HashSet.js";
import { HashCode } from "../misc/HashCode.js";

import { equalArrays, arrayToString } from "../utils/helpers.js";
import { ATNConfig } from "./ATNConfig.js";
Expand All @@ -19,6 +18,7 @@ import { DoubleDict } from "../utils/DoubleDict.js";
import { PredictionContext } from "./PredictionContext.js";
import { ATNState } from "./ATNState.js";
import { ATNSimulator } from "./ATNSimulator.js";
import { MurmurHash } from "../utils/MurmurHash.js";

const hashATNConfig = (c: ATNConfig) => {
return c.hashCodeForConfigSet();
Expand Down Expand Up @@ -82,19 +82,21 @@ export class ATNConfigSet {

public conflictingAlts: BitSet | null = null;

private cachedHashCode = -1;
#cachedHashCode = -1;

public constructor(fullCtxOrOldSet?: boolean | ATNConfigSet) {
if (fullCtxOrOldSet instanceof ATNConfigSet) {
const old = fullCtxOrOldSet;

this.addAll(old.configs);
this.uniqueAlt = old.uniqueAlt;
this.conflictingAlts = old.conflictingAlts;
this.hasSemanticContext = old.hasSemanticContext;
this.dipsIntoOuterContext = old.dipsIntoOuterContext;
} else {
this.fullCtx = fullCtxOrOldSet ?? true;
if (fullCtxOrOldSet !== undefined) {
if (typeof fullCtxOrOldSet === "boolean") {
this.fullCtx = fullCtxOrOldSet ?? true;
} else {
const old = fullCtxOrOldSet;

this.addAll(old.configs);
this.uniqueAlt = old.uniqueAlt;
this.conflictingAlts = old.conflictingAlts;
this.hasSemanticContext = old.hasSemanticContext;
this.dipsIntoOuterContext = old.dipsIntoOuterContext;
}
}
}

Expand Down Expand Up @@ -128,7 +130,7 @@ export class ATNConfigSet {

const existing = this.configLookup!.add(config);
if (existing === config) {
this.cachedHashCode = -1;
this.#cachedHashCode = -1;
this.configs.push(config); // track order here

return true;
Expand Down Expand Up @@ -220,33 +222,32 @@ export class ATNConfigSet {
return false;
}

public equals(other: unknown): boolean {
return this === other ||
(other instanceof ATNConfigSet &&
equalArrays(this.configs, other.configs) &&
this.fullCtx === other.fullCtx &&
this.uniqueAlt === other.uniqueAlt &&
this.conflictingAlts === other.conflictingAlts &&
this.hasSemanticContext === other.hasSemanticContext &&
this.dipsIntoOuterContext === other.dipsIntoOuterContext);
}
public equals(other: ATNConfigSet): boolean {
if (this === other) {
return true;
}

public hashCode(): number {
const hash = new HashCode();
hash.update(this.configs);
if (this.fullCtx === other.fullCtx &&
this.uniqueAlt === other.uniqueAlt &&
this.conflictingAlts === other.conflictingAlts &&
this.hasSemanticContext === other.hasSemanticContext &&
this.dipsIntoOuterContext === other.dipsIntoOuterContext) {
return true;
}

return hash.finish();
return equalArrays(this.configs, other.configs);
}

public updateHashCode(hash: HashCode): void {
public hashCode(): number {
if (this.readOnly) {
if (this.cachedHashCode === -1) {
this.cachedHashCode = this.hashCode();
if (this.#cachedHashCode === -1) {
this.#cachedHashCode = this.computeHashCode();
}
hash.update(this.cachedHashCode);
} else {
hash.update(this.hashCode());

return this.#cachedHashCode;
}

return this.computeHashCode();
}

public get length(): number {
Expand Down Expand Up @@ -278,7 +279,7 @@ export class ATNConfigSet {
throw new Error("This set is readonly");
}
this.configs = [];
this.cachedHashCode = -1;
this.#cachedHashCode = -1;
this.configLookup = new HashSet();
}

Expand All @@ -297,4 +298,13 @@ export class ATNConfigSet {
(this.dipsIntoOuterContext ? ",dipsIntoOuterContext" : "");
}

private computeHashCode(): number {
let hash = MurmurHash.initialize();
this.configs.forEach((config) => {
hash = MurmurHash.update(hash, config.hashCode());
});
hash = MurmurHash.finish(hash, this.configs.length);

return hash;
}
}
Loading

0 comments on commit 7acb938

Please sign in to comment.