Skip to content

Commit

Permalink
Replace JsonSchema with ajv for dictionary validation
Browse files Browse the repository at this point in the history
  • Loading branch information
djahandarie committed Nov 3, 2023
1 parent b64f51c commit 3761510
Show file tree
Hide file tree
Showing 17 changed files with 106 additions and 81 deletions.
5 changes: 3 additions & 2 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"plugin:jsonc/recommended-with-json"
],
"parserOptions": {
"ecmaVersion": 9,
"ecmaVersion": 11,
"sourceType": "script",
"ecmaFeatures": {
"globalReturn": false,
Expand Down Expand Up @@ -401,7 +401,8 @@
"DynamicProperty": "readonly",
"EventDispatcher": "readonly",
"EventListenerCollection": "readonly",
"Logger": "readonly"
"Logger": "readonly",
"import": "readonly"
}
},
{
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ dictionaries/
/playwright/.cache/
/test/playwright/__screenshots__/
ext/manifest.json
ext/lib/validate-schemas.js
16 changes: 15 additions & 1 deletion dev/build.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ const childProcess = require('child_process');
const util = require('./util');
const {getAllFiles, getArgs, testMain} = util;
const {ManifestUtil} = require('./manifest-util');

const Ajv = require('ajv');
const standaloneCode = require('ajv/dist/standalone').default;

async function createZip(directory, excludeFiles, outputFileName, sevenZipExes, onUpdate, dryRun) {
try {
Expand Down Expand Up @@ -130,6 +131,19 @@ async function build(buildDir, extDir, manifestUtil, variantNames, manifestPath,
process.stdout.write(message);
};

process.stdout.write('Building schema validators using ajv\n');
const schemaDir = path.join(extDir, 'data/schemas/');
const schemaFileNames = fs.readdirSync(schemaDir);
const schemas = schemaFileNames.map((schemaFileName) => JSON.parse(fs.readFileSync(path.join(schemaDir, schemaFileName))));
const ajv = new Ajv({schemas: schemas, code: {source: true, esm: true}});
const moduleCode = standaloneCode(ajv);

// https://github.com/ajv-validator/ajv/issues/2209
const patchedModuleCode = moduleCode.replaceAll('require("ajv/dist/runtime/ucs2length").default', 'import("/lib/ucs2length.js").default');

fs.writeFileSync(path.join(extDir, 'lib/validate-schemas.js'), patchedModuleCode);


process.stdout.write(`Version: ${yomitanVersion}...\n`);

for (const variantName of variantNames) {
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/custom-audio-list-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "customAudioList",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-index-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryIndex",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"description": "Index file containing information about the data contained in the dictionary.",
Expand Down
3 changes: 2 additions & 1 deletion ext/data/schemas/dictionary-kanji-bank-v1-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryKanjiBankV1",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing kanji information.",
Expand Down Expand Up @@ -30,4 +31,4 @@
"description": "A meaning for the kanji character."
}
}
}
}
3 changes: 2 additions & 1 deletion ext/data/schemas/dictionary-kanji-bank-v3-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryKanjiBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing kanji information.",
Expand Down Expand Up @@ -42,4 +43,4 @@
}
]
}
}
}
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-kanji-meta-bank-v3-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryKanjiMetaBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"frequency": {
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-tag-bank-v3-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryTagBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing tag information for terms and kanji.",
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-term-bank-v1-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryTermBankV1",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "array",
"description": "Data file containing term information.",
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-term-bank-v3-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryTermBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"structuredContent": {
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/dictionary-term-meta-bank-v3-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "dictionaryTermMetaBankV3",
"$schema": "http://json-schema.org/draft-07/schema#",
"definitions": {
"frequency": {
Expand Down
1 change: 1 addition & 0 deletions ext/data/schemas/options-schema.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
{
"$id": "options",
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": [
Expand Down
99 changes: 25 additions & 74 deletions ext/js/language/dictionary-importer.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

/* global
* JSZip
* JsonSchema
* MediaUtil
*/

Expand Down Expand Up @@ -51,8 +50,10 @@ class DictionaryImporter {

const index = JSON.parse(await indexFile.async('string'));

const indexSchema = await this._getSchema('/data/schemas/dictionary-index-schema.json');
this._validateJsonSchema(index, indexSchema, indexFileName);
const ajvSchemas = await import('/lib/validate-schemas.js');
if (!ajvSchemas.dictionaryIndex(index)) {
throw this._formatAjvSchemaError(ajvSchemas.dictionaryIndex, indexFileName);
}

const dictionaryTitle = index.title;
const version = index.format || index.version;
Expand All @@ -75,8 +76,7 @@ class DictionaryImporter {

// Load schemas
this._progressNextStep(0);
const dataBankSchemaPaths = this._getDataBankSchemaPaths(version);
const dataBankSchemas = await Promise.all(dataBankSchemaPaths.map((path) => this._getSchema(path)));
const dataBankSchemas = this._getDataBankSchemas(version);

// Files
const termFiles = this._getArchiveFiles(archive, 'term_bank_?.json');
Expand All @@ -87,11 +87,11 @@ class DictionaryImporter {

// Load data
this._progressNextStep(termFiles.length + termMetaFiles.length + kanjiFiles.length + kanjiMetaFiles.length + tagFiles.length);
const termList = await this._readFileSequence(termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle);
const termMetaList = await this._readFileSequence(termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle);
const kanjiList = await this._readFileSequence(kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle);
const kanjiMetaList = await this._readFileSequence(kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle);
const tagList = await this._readFileSequence(tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle);
const termList = await this._readFileSequence(ajvSchemas, termFiles, convertTermBankEntry, dataBankSchemas[0], dictionaryTitle);
const termMetaList = await this._readFileSequence(ajvSchemas, termMetaFiles, convertTermMetaBankEntry, dataBankSchemas[1], dictionaryTitle);
const kanjiList = await this._readFileSequence(ajvSchemas, kanjiFiles, convertKanjiBankEntry, dataBankSchemas[2], dictionaryTitle);
const kanjiMetaList = await this._readFileSequence(ajvSchemas, kanjiMetaFiles, convertKanjiMetaBankEntry, dataBankSchemas[3], dictionaryTitle);
const tagList = await this._readFileSequence(ajvSchemas, tagFiles, convertTagBankEntry, dataBankSchemas[4], dictionaryTitle);
this._addOldIndexTags(index, tagList, dictionaryTitle);

// Prefix wildcard support
Expand Down Expand Up @@ -214,68 +214,27 @@ class DictionaryImporter {
return summary;
}

async _getSchema(fileName) {
const schema = await this._fetchJsonAsset(fileName);
return new JsonSchema(schema);
}

_validateJsonSchema(value, schema, fileName) {
try {
schema.validate(value);
} catch (e) {
throw this._formatSchemaError(e, fileName);
}
}

_formatSchemaError(e, fileName) {
const valuePathString = this._getSchemaErrorPathString(e.valueStack, 'dictionary');
const schemaPathString = this._getSchemaErrorPathString(e.schemaStack, 'schema');

const e2 = new Error(`Dictionary has invalid data in '${fileName}' for value '${valuePathString}', validated against '${schemaPathString}': ${e.message}`);
e2.data = e;
_formatAjvSchemaError(schema, fileName) {
const e2 = new Error(`Dictionary has invalid data in '${fileName}'`);
e2.data = schema.errors;

return e2;
}

_getSchemaErrorPathString(infoList, base='') {
let result = base;
for (const {path} of infoList) {
const pathArray = Array.isArray(path) ? path : [path];
for (const pathPart of pathArray) {
if (pathPart === null) {
result = base;
} else {
switch (typeof pathPart) {
case 'string':
if (result.length > 0) {
result += '.';
}
result += pathPart;
break;
case 'number':
result += `[${pathPart}]`;
break;
}
}
}
}
return result;
}

_getDataBankSchemaPaths(version) {
_getDataBankSchemas(version) {
const termBank = (
version === 1 ?
'/data/schemas/dictionary-term-bank-v1-schema.json' :
'/data/schemas/dictionary-term-bank-v3-schema.json'
'dictionaryTermBankV1' :
'dictionaryTermBankV3'
);
const termMetaBank = '/data/schemas/dictionary-term-meta-bank-v3-schema.json';
const termMetaBank = 'dictionaryTermMetaBankV3';
const kanjiBank = (
version === 1 ?
'/data/schemas/dictionary-kanji-bank-v1-schema.json' :
'/data/schemas/dictionary-kanji-bank-v3-schema.json'
'dictionaryKanjiBankV1' :
'dictionaryKanjiBankV3'
);
const kanjiMetaBank = '/data/schemas/dictionary-kanji-meta-bank-v3-schema.json';
const tagBank = '/data/schemas/dictionary-tag-bank-v3-schema.json';
const kanjiMetaBank = 'dictionaryKanjiMetaBankV3';
const tagBank = 'dictionaryTagBankV3';

return [termBank, termMetaBank, kanjiBank, kanjiMetaBank, tagBank];
}
Expand Down Expand Up @@ -539,28 +498,20 @@ class DictionaryImporter {
return results;
}

async _readFileSequence(files, convertEntry, schema, dictionaryTitle) {
async _readFileSequence(ajvSchemas, files, convertEntry, schemaName, dictionaryTitle) {
const progressData = this._progressData;
let count = 0;
let startIndex = 0;
if (typeof this._onProgress === 'function') {
schema.progressInterval = 1000;
schema.progress = (s) => {
const index = s.getValueStackLength() > 1 ? s.getValueStackItem(1).path : 0;
progressData.index = startIndex + (index / count);
this._progress();
};
}

const results = [];
for (const file of files) {
const entries = JSON.parse(await file.async('string'));

count = Array.isArray(entries) ? Math.max(entries.length, 1) : 1;
startIndex = progressData.index;
this._progress();

this._validateJsonSchema(entries, schema, file.name);
if (!ajvSchemas[schemaName](entries)) {
throw this._formatAjvSchemaError(ajvSchemas[schemaName], file.name);
}

progressData.index = startIndex + 1;
this._progress();
Expand Down
16 changes: 16 additions & 0 deletions ext/lib/ucs2length.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
export default function ucs2length(str) {
const len = str.length;
let length = 0;
let pos = 0;
let value;
while (pos < len) {
length++;
value = str.charCodeAt(pos++);
if (value >= 0xd800 && value <= 0xdbff && pos < len) {
// high surrogate, and there is a next character
value = str.charCodeAt(pos);
if ((value & 0xfc00) === 0xdc00) pos++; // low surrogate
}
}
return length;
}
33 changes: 32 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@
},
"devDependencies": {
"@playwright/test": "^1.39.0",
"ajv": "^8.11.0",
"@types/node": "^20.8.10",
"ajv": "^8.12.0",
"browserify": "^17.0.0",
"css": "^3.0.0",
"eslint": "^8.52.0",
Expand Down

0 comments on commit 3761510

Please sign in to comment.