Describe new modules and classes

jekyll · Dec 18, 2017 · 01528ca · 01528ca
1 parent f08768d
commit 01528ca
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 0 deletions.
diff --git a/lib/classifier-reborn/extensions/token_filter/stemmer.rb b/lib/classifier-reborn/extensions/token_filter/stemmer.rb
@@ -5,6 +5,7 @@
 
 module ClassifierReborn
   module TokenFilter
+    # This filter converts given tokens to their stemmed versions in the language.
     module Stemmer
       module_function
 

diff --git a/lib/classifier-reborn/extensions/token_filter/stopword.rb b/lib/classifier-reborn/extensions/token_filter/stopword.rb
@@ -5,6 +5,7 @@
 
 module ClassifierReborn
   module TokenFilter
+    # This filter removes stopwords in the language, from given tokens.
     module Stopword
       STOPWORDS_PATH = [File.expand_path(File.dirname(__FILE__) + '/../../../../data/stopwords')]
 

diff --git a/lib/classifier-reborn/extensions/tokenizer/token.rb b/lib/classifier-reborn/extensions/tokenizer/token.rb
@@ -6,6 +6,12 @@
 module ClassifierReborn
   module Tokenizer
     class Token < String
+      # The class can be created with one token string and extra attributes. E.g.,
+      #      t = ClassifierReborn::Tokenizer::Token.new 'Tokenize', stemmable: true, maybe_stopword: false
+      #
+      # Attributes available are:
+      #   stemmable:        true  Possibility that the token can be stemmed. This must be false for un-stemmable terms, otherwise this should be true.
+      #   maybe_stopword:   true  Possibility that the token is a stopword. This must be false for terms which never been stopword, otherwise this should be true.
       def initialize(string, stemmable: true, maybe_stopword: true)
         super(string)
         @stemmable = stemmable

diff --git a/lib/classifier-reborn/extensions/tokenizer/whitespace.rb b/lib/classifier-reborn/extensions/tokenizer/whitespace.rb
@@ -7,6 +7,8 @@
 
 module ClassifierReborn
   module Tokenizer
+    # This tokenizes given input as white-space separated terms.
+    # It mainly aims to tokenize sentences written with a space between words, like English, French, and others.
     module Whitespace
       module_function