From e21a9be6277c7fb2b394667b7a8f72beaaab3512 Mon Sep 17 00:00:00 2001
From: Sawood Alam <ibnesayeed@gmail.com>
Date: Sat, 7 Jan 2017 14:25:38 -0500
Subject: [PATCH] Added Bayes backend benchmarks (#98)

* Disabled Redis disc persistence and refactored integration test, fixes #95

* Added Bayes backend benchmarks
---
 Rakefile                                 | 12 +++++--
 test/bayes/bayesian_common_benchmarks.rb | 43 ++++++++++++++++++++++++
 test/bayes/bayesian_memory_benchmark.rb  | 26 ++++++++++++++
 test/bayes/bayesian_redis_benchmark.rb   | 38 +++++++++++++++++++++
 test/test_helper.rb                      |  3 +-
 5 files changed, 119 insertions(+), 3 deletions(-)
 create mode 100644 test/bayes/bayesian_common_benchmarks.rb
 create mode 100755 test/bayes/bayesian_memory_benchmark.rb
 create mode 100644 test/bayes/bayesian_redis_benchmark.rb

diff --git a/Rakefile b/Rakefile
index 398f30c..a32c0b2 100644
--- a/Rakefile
+++ b/Rakefile
@@ -9,12 +9,20 @@ task default: [:test]
 
 # Run the unit tests
 desc 'Run all unit tests'
-Rake::TestTask.new('test') do |t|
+Rake::TestTask.new(:test) do |t|
   t.libs << 'lib'
   t.pattern = 'test/*/*_test.rb'
   t.verbose = true
 end
 
+# Run benchmarks
+desc 'Run all benchmarks'
+Rake::TestTask.new(:bench) do |t|
+  t.libs << 'lib'
+  t.pattern = 'test/*/*_benchmark.rb'
+  t.verbose = true
+end
+
 # Make a console, useful when working on tests
 desc 'Generate a test console'
 task :console do
@@ -23,7 +31,7 @@ end
 
 # Genereate the RDoc documentation
 desc 'Create documentation'
-Rake::RDocTask.new('doc') do |rdoc|
+Rake::RDocTask.new(:doc) do |rdoc|
   rdoc.title = 'Ruby Classifier - Bayesian and LSI classification library'
   rdoc.rdoc_dir = 'html'
   rdoc.rdoc_files.include('README.markdown')
diff --git a/test/bayes/bayesian_common_benchmarks.rb b/test/bayes/bayesian_common_benchmarks.rb
new file mode 100644
index 0000000..e9a17f5
--- /dev/null
+++ b/test/bayes/bayesian_common_benchmarks.rb
@@ -0,0 +1,43 @@
+# encoding: utf-8
+
+module BayesianCommonBenchmarks
+  def load_data
+    sms_spam_collection = File.expand_path(File.dirname(__FILE__) + '/../data/corpus/SMSSpamCollection.tsv')
+    File.read(sms_spam_collection).force_encoding("utf-8").split("\n")
+  end
+
+  def bench_train
+    assert_performance_linear do |n|
+      n.times do |i|
+        parts = @data[i].strip.split("\t")
+        @classifiers[n].train(parts.first, parts.last)
+      end
+    end
+  end
+
+  def bench_train_untrain
+    assert_performance_linear do |n|
+      n.times do |i|
+        parts = @data[i].strip.split("\t")
+        @classifiers[n].train(parts.first, parts.last)
+      end
+      n.times do |i|
+        parts = @data[i].strip.split("\t")
+        @classifiers[n].untrain(parts.first, parts.last)
+      end
+    end
+  end
+
+  def bench_train_classify
+    assert_performance_linear do |n|
+      n.times do |i|
+        parts = @data[i].strip.split("\t")
+        @classifiers[n].train(parts.first, parts.last)
+      end
+      n.times do |i|
+        parts = @data[i].strip.split("\t")
+        @classifiers[n].classify(parts.last)
+      end
+    end
+  end
+end
diff --git a/test/bayes/bayesian_memory_benchmark.rb b/test/bayes/bayesian_memory_benchmark.rb
new file mode 100755
index 0000000..b40fd6c
--- /dev/null
+++ b/test/bayes/bayesian_memory_benchmark.rb
@@ -0,0 +1,26 @@
+# encoding: utf-8
+
+require File.dirname(__FILE__) + '/../test_helper'
+require_relative './bayesian_common_benchmarks'
+
+class BayesianMemoryBenchmark < Minitest::Benchmark
+  MAX_RECORDS = 5000
+
+  include BayesianCommonBenchmarks
+
+  def self.bench_range
+    (bench_exp(1, MAX_RECORDS) << MAX_RECORDS).uniq
+  end
+
+  def setup
+    @data ||= load_data
+    if @data.length < MAX_RECORDS
+      skip("Not enough records in the dataset")
+    end
+    @classifiers = {}
+    self.class.bench_range.each do |n|
+      @classifiers[n] = ClassifierReborn::Bayes.new 'Ham', 'Spam'
+    end
+    print "memory_"
+  end
+end
diff --git a/test/bayes/bayesian_redis_benchmark.rb b/test/bayes/bayesian_redis_benchmark.rb
new file mode 100644
index 0000000..7772a8b
--- /dev/null
+++ b/test/bayes/bayesian_redis_benchmark.rb
@@ -0,0 +1,38 @@
+# encoding: utf-8
+
+require File.dirname(__FILE__) + '/../test_helper'
+require_relative './bayesian_common_benchmarks'
+
+class BayesianRedisBenchmark < Minitest::Benchmark
+  MAX_RECORDS = 5000
+
+  include BayesianCommonBenchmarks
+
+  def self.bench_range
+    (bench_exp(1, MAX_RECORDS) << MAX_RECORDS).uniq
+  end
+
+  def setup
+    @data ||= load_data
+    if @data.length < MAX_RECORDS
+      skip("Not enough records in the dataset")
+    end
+    @classifiers = {}
+    self.class.bench_range.each_with_index do |n, i|
+      begin
+        redis_backend = ClassifierReborn::BayesRedisBackend.new(db: i)
+        redis_backend.instance_variable_get(:@redis).config(:set, "save", "")
+        @classifiers[n] = ClassifierReborn::Bayes.new 'Ham', 'Spam', backend: redis_backend
+      rescue Redis::CannotConnectError => e
+        skip(e)
+      end
+    end
+    print "redis_"
+  end
+
+  def teardown
+    self.class.bench_range.each do |n|
+      @classifiers[n].instance_variable_get(:@backend).instance_variable_get(:@redis).flushdb
+    end
+  end
+end
diff --git a/test/test_helper.rb b/test/test_helper.rb
index b539406..cf24dbc 100644
--- a/test/test_helper.rb
+++ b/test/test_helper.rb
@@ -1,8 +1,9 @@
 $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')
 
 require 'minitest/autorun'
+require "minitest/benchmark"
 require 'minitest/reporters'
-Minitest::Reporters.use!
+Minitest::Reporters.use! unless ENV['NOPROGRESS']
 require 'pry'
 require 'classifier-reborn'
 include ClassifierReborn