Completed Typescript tests in ts_spec

NaturalNode · Apr 7, 2024 · d6c0aaa · d6c0aaa
1 parent cc725b5
commit d6c0aaa
Show file tree

Hide file tree

Showing 20 changed files with 1,436 additions and 70 deletions.
diff --git a/ts_spec/wordnet_test.ts → io_spec/wordnet_test.ts b/ts_spec/wordnet_test.ts → io_spec/wordnet_test.ts
diff --git a/lib/natural/classifiers/index.d.ts b/lib/natural/classifiers/index.d.ts
@@ -124,16 +124,17 @@ export class LogisticRegressionClassifier extends ClassifierBase {
   static loadFrom (storage: StorageBackend): ClassifierBase
 }
 
-declare type MaxEntClassifierCallback = (err: NodeJS.ErrnoException | null, classifier?: MaxEntClassifier | null) => void
+declare type MaxEntClassifierCallback = (err: NodeJS.ErrnoException | null, classifier?: MaxEntClassifier) => void
 
 export class MaxEntClassifier {
   sample: Sample
   features: FeatureSet
+  scaler: GISScaler
 
   constructor (features: FeatureSet, sample: Sample)
   addElement (x: Element): void
   addDocument (context: Context, classification: string, elementClass: Element): void
-  train (maxIterations: number, minImprovement: number, unused: any): void
+  train (maxIterations: number, minImprovement: number): void
   getClassifications (b: Context): ApparatusClassification[]
   classify (b: Context): string
   // These are not static like in other Classifier classes
@@ -232,6 +233,9 @@ export class POSElement extends Element {
 }
 
 export class GISScaler {
+  iteration: number
+  improvement: number
+
   constructor (featureSet: FeatureSet, sample: Sample)
   calculateMaxSumOfFeatures (): boolean
   addCorrectionFeature (): void

diff --git a/lib/natural/classifiers/maxent/Classifier.js b/lib/natural/classifiers/maxent/Classifier.js
@@ -32,6 +32,8 @@ const Sample = require('./Sample')
 const Scaler = require('./GISScaler')
 const FeatureSet = require('./FeatureSet')
 
+const DEBUG = false
+
 class Classifier {
   constructor (features, sample) {
     if (features) {
@@ -77,7 +79,7 @@ class Classifier {
     const classifier = this
     fs.writeFile(filename, data, 'utf8', function (err) {
       if (callback) {
-        console.log('Saved classifier to ' + filename)
+        DEBUG && console.log('Saved classifier to ' + filename)
         callback(err, err ? null : classifier)
       }
     })
@@ -91,9 +93,9 @@ class Classifier {
     Classifier.prototype.addElement(new ElementClass(classification, context))
   }
 
-  train (maxIterations, minImprovement, approxExpectation) {
+  train (maxIterations, minImprovement) {
     this.scaler = new Scaler(this.features, this.sample)
-    this.p = this.scaler.run(maxIterations, minImprovement, approxExpectation)
+    this.p = this.scaler.run(maxIterations, minImprovement)
   }
 
   getClassifications (b) {

diff --git a/lib/natural/normalizers/index.d.ts b/lib/natural/normalizers/index.d.ts
@@ -24,7 +24,9 @@ THE SOFTWARE.
 
 export function normalize (tokens: string | string[]): string[]
 // eslint-disable-next-line @typescript-eslint/naming-convention
-export function normalize_ja (str: string): string
+export function normalizeJa (str: string): string
+export function normalizeNo (str: string): string
+export function normalizeSv (str: string): string
 export function removeDiacritics (str: string): string
 
 export class Converters {
@@ -42,8 +44,8 @@ export class Converters {
   katakanaHF: (str: string) => string
   static fixFullwidthKana: (str: string) => string
   static normalize: (str: string) => string
+  hiraganaToKatakana (str: string): string
+  katakanaToHiragana (str: string): string
 }
 
 type FixCompositeSymbolsTable = Record<string, string>
-
-type NormalizeJa = (str: string) => string
diff --git a/lib/natural/normalizers/index.js b/lib/natural/normalizers/index.js
@@ -23,6 +23,8 @@ THE SOFTWARE.
 'use strict'
 
 exports.normalize = require('./normalizer').normalizeTokens
-exports.normalize_ja = require('./normalizer_ja').normalizeJa
+exports.normalizeJa = require('./normalizer_ja').normalizeJa
+exports.normalizeNo = require('./normalizer_no').removeDiacritics
+exports.normalizeSv = require('./normalizer_sv')
 exports.Converters = require('./normalizer_ja').Converters
 exports.removeDiacritics = require('./remove_diacritics')
diff --git a/lib/natural/phonetics/index.d.ts b/lib/natural/phonetics/index.d.ts
@@ -26,6 +26,27 @@ export class Phonetic<T> {
 }
 
 export class Metaphone extends Phonetic<string> {
+  dedup (token: string): string
+  dropInitialLetters (token: string): string
+  dropBafterMAtEnd (token: string): string
+  cTransform (token: string): string
+  dTransform (token: string): string
+  dropG (token: string): string
+  transformG (token: string): string
+  dropH (token: string): string
+  transformCK (token: string): string
+  transformPH (token: string): string
+  transformQ (token: string): string
+  transformS (token: string): string
+  transformT (token: string): string
+  dropT (token: string): string
+  transformV (token: string): string
+  transformWH (token: string): string
+  dropW (token: string): string
+  transformX (token: string): string
+  dropY (token: string): string
+  transformZ (token: string): string
+  dropVowels (token: string): string
   process (token: string, maxLength?: number): string
 }
 

diff --git a/package.json b/package.json
@@ -78,7 +78,7 @@
     "test": "cross-env NODE_PATH=. jasmine --random=false spec/*_spec.js",
     "test_io": "jasmine --random=false io_spec/*_spec.js",
     "test_ts": "cross-env NODE_PATH=.:./dist jasmine --random=false dist/ts_spec/*_spec.js",
-    "coverage": "nyc --reporter=lcov npm run test && nyc npm run test_io",
+    "coverage": "nyc --reporter=lcov npm run test && nyc --reporter=lcov npm run test_io",
     "test_browser": "cross-env NODE_PATH=. node ./node_modules/gulp/bin/gulp.js",
     "lint": "eslint . --ext .ts"
   },

diff --git a/ts_spec/MaxEntClassifier_spec.ts b/ts_spec/MaxEntClassifier_spec.ts
@@ -0,0 +1,130 @@
+/*
+Unit test of Classifier
+Copyright (C) 2018 Hugo W.L. ter Doest
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+'use strict'
+
+import {
+  Context,
+  FeatureSet,
+  Sample,
+  MaxEntClassifier as Classifier,
+  SEElement
+} from 'lib/natural'
+
+import type { Element } from 'lib/natural'
+
+const classifierFilename = 'classifier.json'
+const minImprovement = 0.01
+const nrIterations = 20
+
+let sample: Sample
+let featureSet: FeatureSet
+let classifier: Classifier
+
+const DEBUG = false
+
+describe('The MaxEnt module', function () {
+  it('The Sample class creates a sample', function () {
+    sample = new Sample()
+    sample.addElement(new SEElement('x', new Context('0')))
+    sample.addElement(new SEElement('x', new Context('0')))
+    sample.addElement(new SEElement('x', new Context('0')))
+    sample.addElement(new SEElement('y', new Context('0')))
+    sample.addElement(new SEElement('y', new Context('0')))
+    sample.addElement(new SEElement('y', new Context('0')))
+
+    sample.addElement(new SEElement('x', new Context('1')))
+    sample.addElement(new SEElement('y', new Context('1')))
+    sample.addElement(new SEElement('y', new Context('1')))
+    sample.addElement(new SEElement('y', new Context('1')))
+
+    expect(sample.size()).toBe(10)
+  })
+
+  it('The FeatureSet class creates a feature set', function () {
+    featureSet = new FeatureSet()
+    sample.generateFeatures(featureSet)
+
+    expect(featureSet.size()).toBe(2)
+  })
+
+  it('The Classifier class creates a classifier', function () {
+    // Create a classifier
+    classifier = new Classifier(featureSet, sample)
+
+    expect(classifier).toBeDefined()
+  })
+
+  it('Classifier does not need a correction feature', function () {
+
+  })
+
+  it('The classifier stops training after a specified number or iterations ' +
+    'or when the minimum improvement in likelihood is reached', function () {
+    classifier.train(nrIterations, minImprovement)
+
+    expect(classifier.scaler.iteration).toBeLessThan(nrIterations + 1)
+    if (classifier.scaler.iteration < nrIterations) {
+      expect(classifier.scaler.improvement).toBeLessThan(minImprovement)
+    }
+  })
+
+  it('Save classifer to a file', function (done) {
+    classifier.save(classifierFilename, function (err, c) {
+      if (err !== null) {
+        console.log(err)
+      } else {
+        DEBUG && console.log('Classifier saved to ' + classifierFilename)
+      }
+      done()
+    })
+  })
+
+  let newClassifier: Classifier
+  it('Load classifer', function (done) {
+    classifier.load(classifierFilename, SEElement as unknown as Element, function (err, c) {
+      if (err !== null) {
+        console.log(err)
+      } else if (c !== undefined) {
+        DEBUG && console.log('Classifier loaded from ' + classifierFilename)
+        newClassifier = c
+      }
+      done()
+    })
+    if (newClassifier !== undefined) {
+      classifier = newClassifier
+    }
+  })
+
+  it('The classifier classifies events', function () {
+    let context = new Context('0')
+    DEBUG && console.log('Classes plus scores ' + JSON.stringify(classifier.getClassifications(context)))
+    let classification = classifier.classify(context)
+    expect(classification).toBe('x')
+
+    context = new Context('1')
+    DEBUG && console.log('Classes plus scores ' + JSON.stringify(classifier.getClassifications(context)))
+    classification = classifier.classify(context)
+    expect(classification).toBe('y')
+  })
+})
diff --git a/ts_spec/WordPunctTokenizer_spec.ts b/ts_spec/WordPunctTokenizer_spec.ts
@@ -0,0 +1,57 @@
+/*
+Copyright (c) 2018, Hugo W.L. ter Doest
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+'use strict'
+
+import { WordPunctTokenizer } from 'lib/natural'
+const tokenizer = new WordPunctTokenizer()
+
+const sentences = [
+  'Knot: geldpers aanzetten is paardenmiddel voor half procent inflatie',
+  'De president van De Nederlandsche Bank, Klaas Knot, vindt de geldinjectie in de Europese economie van ruim 1.100 miljard euro veel te hoog voor het beoogde resultaat: een half procent inflatie in 2016.',
+  "'Oftewel 50 basispunten, dat zijn heel dure basispunten', zei Knot donderdag in de Tweede Kamer.",
+  'Door: Robert Giebels 5 februari 2015, 21:55 Bron: ANP',
+  'De financiële specialisten onder de Kamerleden hadden Knot gevraagd uitleg te geven over het ECB-besluit van 22 januari.;',
+  'Hoe gaat de tokenizer om met? vraagtekens ?',
+  'Verbindingsstreepje in  een woord: ECB-besluit',
+  'Gedachtestreepje in een zin met spaties eromheen - dit is de gedachte na het streepje ! '
+]
+
+const expectedResults = [
+  ['Knot', ':', 'geldpers', 'aanzetten', 'is', 'paardenmiddel', 'voor', 'half', 'procent', 'inflatie'],
+  ['De', 'president', 'van', 'De', 'Nederlandsche', 'Bank', ',', 'Klaas', 'Knot', ',', 'vindt', 'de', 'geldinjectie', 'in', 'de', 'Europese', 'economie', 'van', 'ruim', '1.100', 'miljard', 'euro', 'veel', 'te', 'hoog', 'voor', 'het', 'beoogde', 'resultaat', ':', 'een', 'half', 'procent', 'inflatie', 'in', '2016.'],
+  ["'", 'Oftewel', '50', 'basispunten', ',', 'dat', 'zijn', 'heel', 'dure', 'basispunten', "'", ',', 'zei', 'Knot', 'donderdag', 'in', 'de', 'Tweede', 'Kamer', '.'],
+  ['Door', ':', 'Robert', 'Giebels', '5', 'februari', '2015', ',', '21', ':', '55', 'Bron', ':', 'ANP'],
+  ['De', 'financiële', 'specialisten', 'onder', 'de', 'Kamerleden', 'hadden', 'Knot', 'gevraagd', 'uitleg', 'te', 'geven', 'over', 'het', 'ECB-besluit', 'van', '22', 'januari', '.', ';'],
+  ['Hoe', 'gaat', 'de', 'tokenizer', 'om', 'met', '?', 'vraagtekens', '?'],
+  ['Verbindingsstreepje', 'in', 'een', 'woord', ':', 'ECB-besluit'],
+  ['Gedachtestreepje', 'in', 'een', 'zin', 'met', 'spaties', 'eromheen', '-', 'dit', 'is', 'de', 'gedachte', 'na', 'het', 'streepje', '!']
+]
+
+describe('Word Punctuation Tokenizer', function () {
+  it('should correctly tokenize words and punctuation symbols', function () {
+    sentences.forEach((sentence, index) => {
+      const result = tokenizer.tokenize(sentence)
+      expect(result).toEqual(expectedResults[index])
+    })
+  })
+})