Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace prototype inheritance with classes #710

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/natural/sentiment/SentimentAnalyzer.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

'use strict'

const DEBUG = true
const DEBUG = false

// Afinn
const englishAfinnVoca = require('afinn-165')
Expand Down
26 changes: 11 additions & 15 deletions lib/natural/tokenizers/aggressive_tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,18 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
class AggressiveTokenizer extends Tokenizer {
tokenize (text) {
// break a string up into an array of tokens by anything non-word
// underscore is considered to be non word character
// Old splitter:
// return this.trim(text.split(/[\W|_]+/))
// New splitter:
// Explicitly mentions which characters make up words.
// So words may contain hyphen -, single quote ' and slash /
return this.trim(text.split(/[^a-zA-Z0-9'\-/]+/))
}
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
// underscore is considered to be non word character
// Old splitter:
// return this.trim(text.split(/[\W|_]+/))
// New splitter:
// Explicitly mentions which characters make up words.
// So words may contain hyphen -, single quote ' and slash /
return this.trim(text.split(/[^a-zA-Z0-9'\-/]+/))
}
14 changes: 5 additions & 9 deletions lib/natural/tokenizers/aggressive_tokenizer_es.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
class AggressiveTokenizer extends Tokenizer {
tokenize (text) {
// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/[^a-zA-Zá-úÁ-ÚñÑüÜ]+/))
}
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/[^a-zA-Zá-úÁ-ÚñÑüÜ]+/))
}
36 changes: 16 additions & 20 deletions lib/natural/tokenizers/aggressive_tokenizer_fa.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,23 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
class AggressiveTokenizer extends Tokenizer {
clearEmptyString (array) {
return array.filter(function (a) {
return a !== ''
})
}

clearText (text) {
return text.replace(/.:\+-=\(\)"'!\?،,؛;/g, ' ')
}

tokenize (text) {
// break a string up into an array of tokens by anything non-word
text = this.clearText(text)
return this.clearEmptyString(text.split(/\s+/))
}
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer

AggressiveTokenizer.prototype.clearEmptyString = function (array) {
return array.filter(function (a) {
return a !== ''
})
}

AggressiveTokenizer.prototype.clearText = function (text) {
return text.replace(/.:\+-=\(\)"'!\?،,؛;/g, ' ')
}

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
text = this.clearText(text)
return this.clearEmptyString(text.split(/\s+/))
}
14 changes: 5 additions & 9 deletions lib/natural/tokenizers/aggressive_tokenizer_fr.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
class AggressiveTokenizer extends Tokenizer {
tokenize (text) {
// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/[^a-z0-9äâàéèëêïîöôùüûœç-]+/i))
}
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/[^a-z0-9äâàéèëêïîöôùüûœç-]+/i))
}
15 changes: 5 additions & 10 deletions lib/natural/tokenizers/aggressive_tokenizer_hi.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,6 @@ DEALINGS IN THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
}

util.inherits(AggressiveTokenizer, Tokenizer)

/*
To know more on hindi
Expand All @@ -44,9 +37,11 @@ Important links:
3.https://www.unicode.org/charts/PDF/U0900.pdf
*/

AggressiveTokenizer.prototype.tokenize = function (text) {
const response = this.trim(text.replace(/[\u0964\u0965...?,]/g, '').split(/\s+|(?![\u0900-\u097F\u0020-\u007F])./u)).filter(Boolean)
return response
class AggressiveTokenizer extends Tokenizer {
tokenize (text) {
const response = this.trim(text.replace(/[\u0964\u0965...?,]/g, '').split(/\s+|(?![\u0900-\u097F\u0020-\u007F])./u)).filter(Boolean)
return response
}
}

module.exports = AggressiveTokenizer
30 changes: 13 additions & 17 deletions lib/natural/tokenizers/aggressive_tokenizer_id.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,20 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
class AggressiveTokenizer extends Tokenizer {
// Remove all non alphanumeric characters except '-'
// Replace more than one space character to ' '
normalizeText (text) {
const result = text.replace(/[^a-z0-9 -]/g, ' ').replace(/( +)/g, ' ')
return result
}

tokenize (text) {
// break a string up into an array of tokens by space
text = this.normalizeText(text)
return this.trim(text.split(' '))
}
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer

// Remove all non alphanumeric characters except '-'
// Replace more than one space character to ' '
function normalizeText (text) {
const result = text.replace(/[^a-z0-9 -]/g, ' ').replace(/( +)/g, ' ')
return result
}

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by space
text = normalizeText(text)
return this.trim(text.split(' '))
}
14 changes: 5 additions & 9 deletions lib/natural/tokenizers/aggressive_tokenizer_it.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
class AggressiveTokenizer extends Tokenizer {
tokenize (text) {
// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/\W+/))
}
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/\W+/))
}
14 changes: 5 additions & 9 deletions lib/natural/tokenizers/aggressive_tokenizer_nl.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
class AggressiveTokenizer extends Tokenizer {
tokenize (text) {
// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/[^a-zA-Z0-9_'-]+/))
}
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/[^a-zA-Z0-9_'-]+/))
}
18 changes: 7 additions & 11 deletions lib/natural/tokenizers/aggressive_tokenizer_no.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,14 @@ THE SOFTWARE.

const Tokenizer = require('./tokenizer')
const normalizer = require('../normalizers/normalizer_no')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
class AggressiveTokenizer extends Tokenizer {
tokenize (text) {
text = normalizer.removeDiacritics(text)

// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/[^A-Za-z0-9_æøåÆØÅäÄöÖüÜ]+/))
}
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer

AggressiveTokenizer.prototype.tokenize = function (text) {
text = normalizer.removeDiacritics(text)

// break a string up into an array of tokens by anything non-word
return this.trim(text.split(/[^A-Za-z0-9_æøåÆØÅäÄöÖüÜ]+/))
}
29 changes: 12 additions & 17 deletions lib/natural/tokenizers/aggressive_tokenizer_pl.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,20 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
}

util.inherits(AggressiveTokenizer, Tokenizer)
class AggressiveTokenizer extends Tokenizer {
withoutEmpty (array) {
return array.filter(function (a) { return a })
}

module.exports = AggressiveTokenizer
clearText (text) {
return text.replace(/[^a-zążśźęćńół0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
}

AggressiveTokenizer.prototype.withoutEmpty = function (array) {
return array.filter(function (a) { return a })
tokenize (text) {
// break a string up into an array of tokens by anything non-word
return this.withoutEmpty(this.clearText(text).split(' '))
}
}

AggressiveTokenizer.prototype.clearText = function (text) {
return text.replace(/[^a-zążśźęćńół0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
}

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
return this.withoutEmpty(this.clearText(text).split(' '))
}
module.exports = AggressiveTokenizer
22 changes: 9 additions & 13 deletions lib/natural/tokenizers/aggressive_tokenizer_pt.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,20 +23,16 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
}
util.inherits(AggressiveTokenizer, Tokenizer)

module.exports = AggressiveTokenizer
class AggressiveTokenizer extends Tokenizer {
withoutEmpty (array) {
return array.filter(function (a) { return a })
}

AggressiveTokenizer.prototype.withoutEmpty = function (array) {
return array.filter(function (a) { return a })
tokenize (text) {
// break a string up into an array of tokens by anything non-word
return this.withoutEmpty(this.trim(text.split(/[^a-zA-Zà-úÀ-Ú]/)))
}
}

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
return this.withoutEmpty(this.trim(text.split(/[^a-zA-Zà-úÀ-Ú]/)))
}
module.exports = AggressiveTokenizer
29 changes: 12 additions & 17 deletions lib/natural/tokenizers/aggressive_tokenizer_ru.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,20 @@ THE SOFTWARE.
'use strict'

const Tokenizer = require('./tokenizer')
const util = require('util')

const AggressiveTokenizer = function () {
Tokenizer.call(this)
}

util.inherits(AggressiveTokenizer, Tokenizer)
class AggressiveTokenizer extends Tokenizer {
withoutEmpty (array) {
return array.filter(function (a) { return a })
}

module.exports = AggressiveTokenizer
clearText (text) {
return text.replace(/[^a-zа-яё0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
}

AggressiveTokenizer.prototype.withoutEmpty = function (array) {
return array.filter(function (a) { return a })
tokenize (text) {
// break a string up into an array of tokens by anything non-word
return this.withoutEmpty(this.clearText(text).split(' '))
}
}

AggressiveTokenizer.prototype.clearText = function (text) {
return text.replace(/[^a-zа-яё0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
}

AggressiveTokenizer.prototype.tokenize = function (text) {
// break a string up into an array of tokens by anything non-word
return this.withoutEmpty(this.clearText(text).split(' '))
}
module.exports = AggressiveTokenizer
Loading
Loading