From a952e2f5ba1fc5cda2617ea1729a4f0e0489ace6 Mon Sep 17 00:00:00 2001 From: Matttttt <18152455+martholomew@users.noreply.github.com> Date: Tue, 16 Apr 2024 19:50:22 +0000 Subject: [PATCH] Added Old Irish (#831) * added Old Irish (SGA) * Merge Yomitan Updates (#1) * fix (#811) * Add scanOnTouchTap and improve touch scanning defaults (#791) * Add scanOnTouchTap * Update version to 30 * Cleanup if statement * log anki error when hiding button (#821) * Fix noteInfos not getting assigned (#819) * improve term sorting (#806) * improve term sorting * edge case * fix: add missing handlebar entry (#823) * fix: add missing handlebar entry * fix: add new handlebar to tests * Revert to using canAddNotes (#827) Fix #818 * Remove unused canAdd (#824) Co-authored-by: James Maa * Fix duplicate check not working across note types (#830) * Fix duplicate check not working across note types * Add invalidNoteId --------- Co-authored-by: James Maa --------- Co-authored-by: StefanVukovic99 Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Co-authored-by: m-edlund Co-authored-by: Eloy Robillard Co-authored-by: James Maa Co-authored-by: James Maa * Revert "Merge Yomitan Updates (#1)" This reverts commit 748dc2202b3ea9e0a028ebae5ecf51208b422c2b. * Fix Static Analysis Error * Fix eslint erro --------- Co-authored-by: martholomew Co-authored-by: StefanVukovic99 Co-authored-by: Kuuuube <61125188+Kuuuube@users.noreply.github.com> Co-authored-by: m-edlund Co-authored-by: Eloy Robillard Co-authored-by: James Maa Co-authored-by: James Maa --- .eslintrc.json | 1 + ext/js/language/language-descriptors.js | 11 ++ ext/js/language/sga/old-irish-transforms.js | 205 ++++++++++++++++++++ types/ext/language-descriptors.d.ts | 3 + 4 files changed, 220 insertions(+) create mode 100644 ext/js/language/sga/old-irish-transforms.js diff --git a/.eslintrc.json b/.eslintrc.json index e44a326e08..ab49e47156 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -653,6 +653,7 @@ "ext/js/language/languages.js", "ext/js/language/multi-language-transformer.js", "ext/js/language/ru/russian-text-preprocessors.js", + "ext/js/language/sga/old-irish-transforms.js", "ext/js/language/sq/albanian-transforms.js", "ext/js/language/text-preprocessors.js", "ext/js/language/translator.js", diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index b5d7573b86..2bdc7c7cce 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -23,6 +23,7 @@ import {japaneseTransforms} from './ja/japanese-transforms.js'; import {isStringPartiallyJapanese} from './ja/japanese.js'; import {removeRussianDiacritics, yoToE} from './ru/russian-text-preprocessors.js'; import {albanianTransforms} from './sq/albanian-transforms.js'; +import {oldIrishTransforms} from './sga/old-irish-transforms.js'; import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-preprocessors.js'; const capitalizationPreprocessors = { @@ -166,6 +167,16 @@ const languageDescriptors = [ removeRussianDiacritics } }, + { + iso: 'sga', + name: 'Old Irish', + exampleText: 'légaid', + textPreprocessors: { + ...capitalizationPreprocessors, + removeAlphabeticDiacritics + }, + languageTransforms: oldIrishTransforms + }, { iso: 'sh', name: 'Serbo-Croatian', diff --git a/ext/js/language/sga/old-irish-transforms.js b/ext/js/language/sga/old-irish-transforms.js new file mode 100644 index 0000000000..3dc12ea506 --- /dev/null +++ b/ext/js/language/sga/old-irish-transforms.js @@ -0,0 +1,205 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {suffixInflection, prefixInflection} from '../language-transforms.js'; + +/** + * @param {boolean} notBeginning + * @param {string} originalOrthography + * @param {string} alternateOrthography + * @param {string[]} conditionsIn + * @param {string[]} conditionsOut + * @returns {import('language-transformer').Rule} + */ +function tryAlternateOrthography(notBeginning, originalOrthography, alternateOrthography, conditionsIn, conditionsOut) { + const orthographyRegExp = notBeginning ? new RegExp('(? text.replace(orthographyRegExp, alternateOrthography), + conditionsIn, + conditionsOut + }; +} + +/** @type {import('language-transformer').LanguageTransformDescriptor} */ +export const oldIrishTransforms = { + language: 'sga', + conditions: {}, + transforms: [ + { + name: 'nd for nn', + description: 'nd for nn', + rules: [ + suffixInflection('nd', 'nn', [], []) + ] + }, + { + name: 'cg for c', + description: 'cg for c', + rules: [ + tryAlternateOrthography(false, 'cg', 'c', [], []) + ] + }, + { + name: 'td for t', + description: 'td for t', + rules: [ + tryAlternateOrthography(false, 'td', 't', [], []) + ] + }, + { + name: 'pb for p', + description: 'pb for p', + rules: [ + tryAlternateOrthography(false, 'pb', 'p', [], []) + ] + }, + { + name: 'ǽ/æ for é', + description: 'ǽ/æ for é', + rules: [ + tryAlternateOrthography(false, 'ǽ', 'é', [], []), + tryAlternateOrthography(false, 'æ', 'é', [], []) + ] + }, + { + name: 'doubled vowel', + description: 'Doubled Vowel', + rules: [ + tryAlternateOrthography(true, 'aa', 'á', [], []), + tryAlternateOrthography(true, 'ee', 'é', [], []), + tryAlternateOrthography(true, 'ii', 'í', [], []), + tryAlternateOrthography(true, 'oo', 'ó', [], []), + tryAlternateOrthography(true, 'uu', 'ú', [], []) + ] + }, + { + name: 'doubled consonant', + description: 'Doubled Consonant', + rules: [ + tryAlternateOrthography(true, 'cc', 'c', [], []), + tryAlternateOrthography(true, 'pp', 'p', [], []), + tryAlternateOrthography(true, 'tt', 't', [], []), + tryAlternateOrthography(true, 'gg', 'g', [], []), + tryAlternateOrthography(true, 'bb', 'b', [], []), + tryAlternateOrthography(true, 'dd', 'd', [], []), + tryAlternateOrthography(true, 'rr', 'r', [], []), + tryAlternateOrthography(true, 'll', 'l', [], []), + tryAlternateOrthography(true, 'nn', 'n', [], []), + tryAlternateOrthography(true, 'mm', 'm', [], []), + tryAlternateOrthography(true, 'ss', 's', [], []) + ] + }, + { + name: 'lenited', + description: 'Non-Beginning Lenition', + rules: [ + tryAlternateOrthography(true, 'ch', 'c', [], []), + tryAlternateOrthography(true, 'ph', 'p', [], []), + tryAlternateOrthography(true, 'th', 't', [], []) + ] + }, + { + name: 'lenited (Middle Irish)', + description: 'Non-Beginning Lenition (Middle Irish)', + rules: [ + tryAlternateOrthography(true, 'gh', 'g', [], []), + tryAlternateOrthography(true, 'bh', 'b', [], []), + tryAlternateOrthography(true, 'dh', 'd', [], []) + ] + }, + { + name: '[IM] nasalized', + description: 'Nasalized Word', + rules: [ + prefixInflection('ng', 'g', [], []), + prefixInflection('mb', 'b', [], []), + prefixInflection('nd', 'd', [], []), + prefixInflection('n-', '', [], []), + prefixInflection('m-', '', [], []) + ] + }, + { + name: '[IM] nasalized (Middle Irish)', + description: 'Nasalized Word (Middle Irish)', + rules: [ + prefixInflection('gc', 'c', [], []), + prefixInflection('bp', 'p', [], []), + prefixInflection('dt', 'd', [], []) + ] + }, + { + name: '[IM] lenited', + description: 'Lenited Word', + rules: [ + prefixInflection('ch', 'c', [], []), + prefixInflection('ph', 'p', [], []), + prefixInflection('th', 't', [], []) + ] + }, + { + name: '[IM] lenited (Middle Irish)', + description: 'Lenited Word (Middle Irish)', + rules: [ + prefixInflection('gh', 'g', [], []), + prefixInflection('bh', 'b', [], []), + prefixInflection('dh', 'd', [], []) + ] + }, + { + name: '[IM] aspirated', + description: 'Aspirated Word', + rules: [ + prefixInflection('ha', 'a', [], []), + prefixInflection('he', 'e', [], []), + prefixInflection('hi', 'i', [], []), + prefixInflection('ho', 'o', [], []), + prefixInflection('hu', 'u', [], []), + prefixInflection('h-', '', [], []) + ] + }, + { + name: '[IM] geminated', + description: 'Geminated Word', + rules: [ + prefixInflection('cc', 'c', [], []), + prefixInflection('pp', 'p', [], []), + prefixInflection('tt', 't', [], []), + prefixInflection('gg', 'g', [], []), + prefixInflection('bb', 'b', [], []), + prefixInflection('dd', 'd', [], []), + prefixInflection('rr', 'r', [], []), + prefixInflection('ll', 'l', [], []), + prefixInflection('nn', 'n', [], []), + prefixInflection('mm', 'm', [], []), + prefixInflection('ss', 's', [], []), + prefixInflection('c-c', 'c', [], []), + prefixInflection('p-p', 'p', [], []), + prefixInflection('t-t', 't', [], []), + prefixInflection('g-g', 'g', [], []), + prefixInflection('b-b', 'b', [], []), + prefixInflection('d-d', 'd', [], []), + prefixInflection('r-r', 'r', [], []), + prefixInflection('l-l', 'l', [], []), + prefixInflection('n-n', 'n', [], []), + prefixInflection('m-m', 'm', [], []), + prefixInflection('s-s', 's', [], []) + ] + } + ] +}; diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index 41a1eec8e3..6674b28c05 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -93,6 +93,9 @@ type AllTextPreprocessors = { yoToE: TextPreprocessor; removeRussianDiacritics: TextPreprocessor; }; + sga: CapitalizationPreprocessors & { + removeAlphabeticDiacritics: TextPreprocessor; + }; sh: CapitalizationPreprocessors; sq: CapitalizationPreprocessors; sv: CapitalizationPreprocessors;