Skip to content

Commit

Permalink
Completed Typescript tests in ts_spec
Browse files Browse the repository at this point in the history
  • Loading branch information
Hugo-ter-Doest committed Apr 7, 2024
1 parent cc725b5 commit d6c0aaa
Show file tree
Hide file tree
Showing 20 changed files with 1,436 additions and 70 deletions.
File renamed without changes.
8 changes: 6 additions & 2 deletions lib/natural/classifiers/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -124,16 +124,17 @@ export class LogisticRegressionClassifier extends ClassifierBase {
static loadFrom (storage: StorageBackend): ClassifierBase
}

declare type MaxEntClassifierCallback = (err: NodeJS.ErrnoException | null, classifier?: MaxEntClassifier | null) => void
declare type MaxEntClassifierCallback = (err: NodeJS.ErrnoException | null, classifier?: MaxEntClassifier) => void

export class MaxEntClassifier {
sample: Sample
features: FeatureSet
scaler: GISScaler

constructor (features: FeatureSet, sample: Sample)
addElement (x: Element): void
addDocument (context: Context, classification: string, elementClass: Element): void
train (maxIterations: number, minImprovement: number, unused: any): void
train (maxIterations: number, minImprovement: number): void
getClassifications (b: Context): ApparatusClassification[]
classify (b: Context): string
// These are not static like in other Classifier classes
Expand Down Expand Up @@ -232,6 +233,9 @@ export class POSElement extends Element {
}

export class GISScaler {
iteration: number
improvement: number

constructor (featureSet: FeatureSet, sample: Sample)
calculateMaxSumOfFeatures (): boolean
addCorrectionFeature (): void
Expand Down
8 changes: 5 additions & 3 deletions lib/natural/classifiers/maxent/Classifier.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ const Sample = require('./Sample')
const Scaler = require('./GISScaler')
const FeatureSet = require('./FeatureSet')

const DEBUG = false

class Classifier {
constructor (features, sample) {
if (features) {
Expand Down Expand Up @@ -77,7 +79,7 @@ class Classifier {
const classifier = this
fs.writeFile(filename, data, 'utf8', function (err) {
if (callback) {
console.log('Saved classifier to ' + filename)
DEBUG && console.log('Saved classifier to ' + filename)
callback(err, err ? null : classifier)
}
})
Expand All @@ -91,9 +93,9 @@ class Classifier {
Classifier.prototype.addElement(new ElementClass(classification, context))
}

train (maxIterations, minImprovement, approxExpectation) {
train (maxIterations, minImprovement) {
this.scaler = new Scaler(this.features, this.sample)
this.p = this.scaler.run(maxIterations, minImprovement, approxExpectation)
this.p = this.scaler.run(maxIterations, minImprovement)
}

getClassifications (b) {
Expand Down
8 changes: 5 additions & 3 deletions lib/natural/normalizers/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ THE SOFTWARE.

export function normalize (tokens: string | string[]): string[]
// eslint-disable-next-line @typescript-eslint/naming-convention
export function normalize_ja (str: string): string
export function normalizeJa (str: string): string
export function normalizeNo (str: string): string
export function normalizeSv (str: string): string
export function removeDiacritics (str: string): string

export class Converters {
Expand All @@ -42,8 +44,8 @@ export class Converters {
katakanaHF: (str: string) => string
static fixFullwidthKana: (str: string) => string
static normalize: (str: string) => string
hiraganaToKatakana (str: string): string
katakanaToHiragana (str: string): string
}

type FixCompositeSymbolsTable = Record<string, string>

type NormalizeJa = (str: string) => string
4 changes: 3 additions & 1 deletion lib/natural/normalizers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ THE SOFTWARE.
'use strict'

exports.normalize = require('./normalizer').normalizeTokens
exports.normalize_ja = require('./normalizer_ja').normalizeJa
exports.normalizeJa = require('./normalizer_ja').normalizeJa
exports.normalizeNo = require('./normalizer_no').removeDiacritics
exports.normalizeSv = require('./normalizer_sv')
exports.Converters = require('./normalizer_ja').Converters
exports.removeDiacritics = require('./remove_diacritics')
21 changes: 21 additions & 0 deletions lib/natural/phonetics/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,27 @@ export class Phonetic<T> {
}

export class Metaphone extends Phonetic<string> {
dedup (token: string): string
dropInitialLetters (token: string): string
dropBafterMAtEnd (token: string): string
cTransform (token: string): string
dTransform (token: string): string
dropG (token: string): string
transformG (token: string): string
dropH (token: string): string
transformCK (token: string): string
transformPH (token: string): string
transformQ (token: string): string
transformS (token: string): string
transformT (token: string): string
dropT (token: string): string
transformV (token: string): string
transformWH (token: string): string
dropW (token: string): string
transformX (token: string): string
dropY (token: string): string
transformZ (token: string): string
dropVowels (token: string): string
process (token: string, maxLength?: number): string
}

Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
"test": "cross-env NODE_PATH=. jasmine --random=false spec/*_spec.js",
"test_io": "jasmine --random=false io_spec/*_spec.js",
"test_ts": "cross-env NODE_PATH=.:./dist jasmine --random=false dist/ts_spec/*_spec.js",
"coverage": "nyc --reporter=lcov npm run test && nyc npm run test_io",
"coverage": "nyc --reporter=lcov npm run test && nyc --reporter=lcov npm run test_io",
"test_browser": "cross-env NODE_PATH=. node ./node_modules/gulp/bin/gulp.js",
"lint": "eslint . --ext .ts"
},
Expand Down
130 changes: 130 additions & 0 deletions ts_spec/MaxEntClassifier_spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
Unit test of Classifier
Copyright (C) 2018 Hugo W.L. ter Doest
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

'use strict'

import {
Context,
FeatureSet,
Sample,
MaxEntClassifier as Classifier,
SEElement
} from 'lib/natural'

import type { Element } from 'lib/natural'

const classifierFilename = 'classifier.json'
const minImprovement = 0.01
const nrIterations = 20

let sample: Sample
let featureSet: FeatureSet
let classifier: Classifier

const DEBUG = false

describe('The MaxEnt module', function () {
it('The Sample class creates a sample', function () {
sample = new Sample()
sample.addElement(new SEElement('x', new Context('0')))
sample.addElement(new SEElement('x', new Context('0')))
sample.addElement(new SEElement('x', new Context('0')))
sample.addElement(new SEElement('y', new Context('0')))
sample.addElement(new SEElement('y', new Context('0')))
sample.addElement(new SEElement('y', new Context('0')))

sample.addElement(new SEElement('x', new Context('1')))
sample.addElement(new SEElement('y', new Context('1')))
sample.addElement(new SEElement('y', new Context('1')))
sample.addElement(new SEElement('y', new Context('1')))

expect(sample.size()).toBe(10)
})

it('The FeatureSet class creates a feature set', function () {
featureSet = new FeatureSet()
sample.generateFeatures(featureSet)

expect(featureSet.size()).toBe(2)
})

it('The Classifier class creates a classifier', function () {
// Create a classifier
classifier = new Classifier(featureSet, sample)

expect(classifier).toBeDefined()
})

it('Classifier does not need a correction feature', function () {

})

it('The classifier stops training after a specified number or iterations ' +
'or when the minimum improvement in likelihood is reached', function () {
classifier.train(nrIterations, minImprovement)

expect(classifier.scaler.iteration).toBeLessThan(nrIterations + 1)
if (classifier.scaler.iteration < nrIterations) {
expect(classifier.scaler.improvement).toBeLessThan(minImprovement)
}
})

it('Save classifer to a file', function (done) {
classifier.save(classifierFilename, function (err, c) {
if (err !== null) {
console.log(err)
} else {
DEBUG && console.log('Classifier saved to ' + classifierFilename)
}
done()
})
})

let newClassifier: Classifier
it('Load classifer', function (done) {
classifier.load(classifierFilename, SEElement as unknown as Element, function (err, c) {
if (err !== null) {
console.log(err)
} else if (c !== undefined) {
DEBUG && console.log('Classifier loaded from ' + classifierFilename)
newClassifier = c
}
done()
})
if (newClassifier !== undefined) {
classifier = newClassifier
}
})

it('The classifier classifies events', function () {
let context = new Context('0')
DEBUG && console.log('Classes plus scores ' + JSON.stringify(classifier.getClassifications(context)))
let classification = classifier.classify(context)
expect(classification).toBe('x')

context = new Context('1')
DEBUG && console.log('Classes plus scores ' + JSON.stringify(classifier.getClassifications(context)))
classification = classifier.classify(context)
expect(classification).toBe('y')
})
})
57 changes: 57 additions & 0 deletions ts_spec/WordPunctTokenizer_spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
Copyright (c) 2018, Hugo W.L. ter Doest
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/

'use strict'

import { WordPunctTokenizer } from 'lib/natural'
const tokenizer = new WordPunctTokenizer()

const sentences = [
'Knot: geldpers aanzetten is paardenmiddel voor half procent inflatie',
'De president van De Nederlandsche Bank, Klaas Knot, vindt de geldinjectie in de Europese economie van ruim 1.100 miljard euro veel te hoog voor het beoogde resultaat: een half procent inflatie in 2016.',
"'Oftewel 50 basispunten, dat zijn heel dure basispunten', zei Knot donderdag in de Tweede Kamer.",
'Door: Robert Giebels 5 februari 2015, 21:55 Bron: ANP',
'De financiële specialisten onder de Kamerleden hadden Knot gevraagd uitleg te geven over het ECB-besluit van 22 januari.;',
'Hoe gaat de tokenizer om met? vraagtekens ?',
'Verbindingsstreepje in een woord: ECB-besluit',
'Gedachtestreepje in een zin met spaties eromheen - dit is de gedachte na het streepje ! '
]

const expectedResults = [
['Knot', ':', 'geldpers', 'aanzetten', 'is', 'paardenmiddel', 'voor', 'half', 'procent', 'inflatie'],
['De', 'president', 'van', 'De', 'Nederlandsche', 'Bank', ',', 'Klaas', 'Knot', ',', 'vindt', 'de', 'geldinjectie', 'in', 'de', 'Europese', 'economie', 'van', 'ruim', '1.100', 'miljard', 'euro', 'veel', 'te', 'hoog', 'voor', 'het', 'beoogde', 'resultaat', ':', 'een', 'half', 'procent', 'inflatie', 'in', '2016.'],
["'", 'Oftewel', '50', 'basispunten', ',', 'dat', 'zijn', 'heel', 'dure', 'basispunten', "'", ',', 'zei', 'Knot', 'donderdag', 'in', 'de', 'Tweede', 'Kamer', '.'],
['Door', ':', 'Robert', 'Giebels', '5', 'februari', '2015', ',', '21', ':', '55', 'Bron', ':', 'ANP'],
['De', 'financiële', 'specialisten', 'onder', 'de', 'Kamerleden', 'hadden', 'Knot', 'gevraagd', 'uitleg', 'te', 'geven', 'over', 'het', 'ECB-besluit', 'van', '22', 'januari', '.', ';'],
['Hoe', 'gaat', 'de', 'tokenizer', 'om', 'met', '?', 'vraagtekens', '?'],
['Verbindingsstreepje', 'in', 'een', 'woord', ':', 'ECB-besluit'],
['Gedachtestreepje', 'in', 'een', 'zin', 'met', 'spaties', 'eromheen', '-', 'dit', 'is', 'de', 'gedachte', 'na', 'het', 'streepje', '!']
]

describe('Word Punctuation Tokenizer', function () {
it('should correctly tokenize words and punctuation symbols', function () {
sentences.forEach((sentence, index) => {
const result = tokenizer.tokenize(sentence)
expect(result).toEqual(expectedResults[index])
})
})
})
Loading

0 comments on commit d6c0aaa

Please sign in to comment.