NaturalNode · Hugo-ter-Doest · Dec 1, 2023 · Dec 1, 2023
diff --git a/lib/natural/sentiment/SentimentAnalyzer.js b/lib/natural/sentiment/SentimentAnalyzer.js
@@ -22,7 +22,7 @@
 
 'use strict'
 
-const DEBUG = true
+const DEBUG = false
 
 // Afinn
 const englishAfinnVoca = require('afinn-165')

diff --git a/lib/natural/tokenizers/aggressive_tokenizer.js b/lib/natural/tokenizers/aggressive_tokenizer.js
@@ -23,22 +23,18 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
+class AggressiveTokenizer extends Tokenizer {
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    // underscore is considered to be non word character
+    // Old splitter:
+    // return this.trim(text.split(/[\W|_]+/))
+    // New splitter:
+    // Explicitly mentions which characters make up words.
+    // So words may contain hyphen -, single quote ' and slash /
+    return this.trim(text.split(/[^a-zA-Z0-9'\-/]+/))
+  }
 }
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 module.exports = AggressiveTokenizer
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  // underscore is considered to be non word character
-  // Old splitter:
-  // return this.trim(text.split(/[\W|_]+/))
-  // New splitter:
-  // Explicitly mentions which characters make up words.
-  // So words may contain hyphen -, single quote ' and slash /
-  return this.trim(text.split(/[^a-zA-Z0-9'\-/]+/))
-}
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_es.js b/lib/natural/tokenizers/aggressive_tokenizer_es.js
@@ -23,16 +23,12 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
+class AggressiveTokenizer extends Tokenizer {
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    return this.trim(text.split(/[^a-zA-Zá-úÁ-ÚñÑüÜ]+/))
+  }
 }
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 module.exports = AggressiveTokenizer
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  return this.trim(text.split(/[^a-zA-Zá-úÁ-ÚñÑüÜ]+/))
-}
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_fa.js b/lib/natural/tokenizers/aggressive_tokenizer_fa.js
@@ -24,27 +24,23 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
+class AggressiveTokenizer extends Tokenizer {
+  clearEmptyString (array) {
+    return array.filter(function (a) {
+      return a !== ''
+    })
+  }
+
+  clearText (text) {
+    return text.replace(/.:\+-=\(\)"'!\?،,؛;/g, ' ')
+  }
+
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    text = this.clearText(text)
+    return this.clearEmptyString(text.split(/\s+/))
+  }
 }
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 module.exports = AggressiveTokenizer
-
-AggressiveTokenizer.prototype.clearEmptyString = function (array) {
-  return array.filter(function (a) {
-    return a !== ''
-  })
-}
-
-AggressiveTokenizer.prototype.clearText = function (text) {
-  return text.replace(/.:\+-=\(\)"'!\?،,؛;/g, ' ')
-}
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  text = this.clearText(text)
-  return this.clearEmptyString(text.split(/\s+/))
-}
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_fr.js b/lib/natural/tokenizers/aggressive_tokenizer_fr.js
@@ -23,16 +23,12 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
+class AggressiveTokenizer extends Tokenizer {
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    return this.trim(text.split(/[^a-z0-9äâàéèëêïîöôùüûœç-]+/i))
+  }
 }
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 module.exports = AggressiveTokenizer
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  return this.trim(text.split(/[^a-z0-9äâàéèëêïîöôùüûœç-]+/i))
-}
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_hi.js b/lib/natural/tokenizers/aggressive_tokenizer_hi.js
@@ -28,13 +28,6 @@ DEALINGS IN THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
-
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
-}
-
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 /*
 To know more on hindi
@@ -44,9 +37,11 @@ Important links:
 3.https://www.unicode.org/charts/PDF/U0900.pdf
 */
 
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  const response = this.trim(text.replace(/[\u0964\u0965...?,]/g, '').split(/\s+|(?![\u0900-\u097F\u0020-\u007F])./u)).filter(Boolean)
-  return response
+class AggressiveTokenizer extends Tokenizer {
+  tokenize (text) {
+    const response = this.trim(text.replace(/[\u0964\u0965...?,]/g, '').split(/\s+|(?![\u0900-\u097F\u0020-\u007F])./u)).filter(Boolean)
+    return response
+  }
 }
 
 module.exports = AggressiveTokenizer
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_id.js b/lib/natural/tokenizers/aggressive_tokenizer_id.js
@@ -23,24 +23,20 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
+class AggressiveTokenizer extends Tokenizer {
+  // Remove all non alphanumeric characters except '-'
+  // Replace more than one space character to ' '
+  normalizeText (text) {
+    const result = text.replace(/[^a-z0-9 -]/g, ' ').replace(/( +)/g, ' ')
+    return result
+  }
+
+  tokenize (text) {
+    // break a string up into an array of tokens by space
+    text = this.normalizeText(text)
+    return this.trim(text.split(' '))
+  }
 }
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 module.exports = AggressiveTokenizer
-
-// Remove all non alphanumeric characters except '-'
-// Replace more than one space character to ' '
-function normalizeText (text) {
-  const result = text.replace(/[^a-z0-9 -]/g, ' ').replace(/( +)/g, ' ')
-  return result
-}
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by space
-  text = normalizeText(text)
-  return this.trim(text.split(' '))
-}
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_it.js b/lib/natural/tokenizers/aggressive_tokenizer_it.js
@@ -23,16 +23,12 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
+class AggressiveTokenizer extends Tokenizer {
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    return this.trim(text.split(/\W+/))
+  }
 }
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 module.exports = AggressiveTokenizer
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  return this.trim(text.split(/\W+/))
-}
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_nl.js b/lib/natural/tokenizers/aggressive_tokenizer_nl.js
@@ -23,16 +23,12 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
+class AggressiveTokenizer extends Tokenizer {
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    return this.trim(text.split(/[^a-zA-Z0-9_'-]+/))
+  }
 }
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 module.exports = AggressiveTokenizer
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  return this.trim(text.split(/[^a-zA-Z0-9_'-]+/))
-}
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_no.js b/lib/natural/tokenizers/aggressive_tokenizer_no.js
@@ -24,18 +24,14 @@ THE SOFTWARE.
 
 const Tokenizer = require('./tokenizer')
 const normalizer = require('../normalizers/normalizer_no')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
+class AggressiveTokenizer extends Tokenizer {
+  tokenize (text) {
+    text = normalizer.removeDiacritics(text)
+
+    // break a string up into an array of tokens by anything non-word
+    return this.trim(text.split(/[^A-Za-z0-9_æøåÆØÅäÄöÖüÜ]+/))
+  }
 }
-util.inherits(AggressiveTokenizer, Tokenizer)
 
 module.exports = AggressiveTokenizer
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  text = normalizer.removeDiacritics(text)
-
-  // break a string up into an array of tokens by anything non-word
-  return this.trim(text.split(/[^A-Za-z0-9_æøåÆØÅäÄöÖüÜ]+/))
-}
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_pl.js b/lib/natural/tokenizers/aggressive_tokenizer_pl.js
@@ -23,25 +23,20 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
-}
-
-util.inherits(AggressiveTokenizer, Tokenizer)
+class AggressiveTokenizer extends Tokenizer {
+  withoutEmpty (array) {
+    return array.filter(function (a) { return a })
+  }
 
-module.exports = AggressiveTokenizer
+  clearText (text) {
+    return text.replace(/[^a-zążśźęćńół0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
+  }
 
-AggressiveTokenizer.prototype.withoutEmpty = function (array) {
-  return array.filter(function (a) { return a })
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    return this.withoutEmpty(this.clearText(text).split(' '))
+  }
 }
 
-AggressiveTokenizer.prototype.clearText = function (text) {
-  return text.replace(/[^a-zążśźęćńół0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
-}
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  return this.withoutEmpty(this.clearText(text).split(' '))
-}
+module.exports = AggressiveTokenizer
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_pt.js b/lib/natural/tokenizers/aggressive_tokenizer_pt.js
@@ -23,20 +23,16 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
-}
-util.inherits(AggressiveTokenizer, Tokenizer)
-
-module.exports = AggressiveTokenizer
+class AggressiveTokenizer extends Tokenizer {
+  withoutEmpty (array) {
+    return array.filter(function (a) { return a })
+  }
 
-AggressiveTokenizer.prototype.withoutEmpty = function (array) {
-  return array.filter(function (a) { return a })
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    return this.withoutEmpty(this.trim(text.split(/[^a-zA-Zà-úÀ-Ú]/)))
+  }
 }
 
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  return this.withoutEmpty(this.trim(text.split(/[^a-zA-Zà-úÀ-Ú]/)))
-}
+module.exports = AggressiveTokenizer
diff --git a/lib/natural/tokenizers/aggressive_tokenizer_ru.js b/lib/natural/tokenizers/aggressive_tokenizer_ru.js
@@ -23,25 +23,20 @@ THE SOFTWARE.
 'use strict'
 
 const Tokenizer = require('./tokenizer')
-const util = require('util')
 
-const AggressiveTokenizer = function () {
-  Tokenizer.call(this)
-}
-
-util.inherits(AggressiveTokenizer, Tokenizer)
+class AggressiveTokenizer extends Tokenizer {
+  withoutEmpty (array) {
+    return array.filter(function (a) { return a })
+  }
 
-module.exports = AggressiveTokenizer
+  clearText (text) {
+    return text.replace(/[^a-zа-яё0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
+  }
 
-AggressiveTokenizer.prototype.withoutEmpty = function (array) {
-  return array.filter(function (a) { return a })
+  tokenize (text) {
+    // break a string up into an array of tokens by anything non-word
+    return this.withoutEmpty(this.clearText(text).split(' '))
+  }
 }
 
-AggressiveTokenizer.prototype.clearText = function (text) {
-  return text.replace(/[^a-zа-яё0-9]/gi, ' ').replace(/[\s\n]+/g, ' ').trim()
-}
-
-AggressiveTokenizer.prototype.tokenize = function (text) {
-  // break a string up into an array of tokens by anything non-word
-  return this.withoutEmpty(this.clearText(text).split(' '))
-}
+module.exports = AggressiveTokenizer