Skip to content

Commit

Permalink
Added Bayes backend benchmarks (#98)
Browse files Browse the repository at this point in the history
* Disabled Redis disc persistence and refactored integration test, fixes #95

* Added Bayes backend benchmarks
  • Loading branch information
ibnesayeed authored and Ch4s3 committed Jan 7, 2017
1 parent a8c7578 commit e21a9be
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 3 deletions.
12 changes: 10 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,20 @@ task default: [:test]

# Run the unit tests
desc 'Run all unit tests'
Rake::TestTask.new('test') do |t|
Rake::TestTask.new(:test) do |t|
t.libs << 'lib'
t.pattern = 'test/*/*_test.rb'
t.verbose = true
end

# Run benchmarks
desc 'Run all benchmarks'
Rake::TestTask.new(:bench) do |t|
t.libs << 'lib'
t.pattern = 'test/*/*_benchmark.rb'
t.verbose = true
end

# Make a console, useful when working on tests
desc 'Generate a test console'
task :console do
Expand All @@ -23,7 +31,7 @@ end

# Genereate the RDoc documentation
desc 'Create documentation'
Rake::RDocTask.new('doc') do |rdoc|
Rake::RDocTask.new(:doc) do |rdoc|
rdoc.title = 'Ruby Classifier - Bayesian and LSI classification library'
rdoc.rdoc_dir = 'html'
rdoc.rdoc_files.include('README.markdown')
Expand Down
43 changes: 43 additions & 0 deletions test/bayes/bayesian_common_benchmarks.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# encoding: utf-8

module BayesianCommonBenchmarks
def load_data
sms_spam_collection = File.expand_path(File.dirname(__FILE__) + '/../data/corpus/SMSSpamCollection.tsv')
File.read(sms_spam_collection).force_encoding("utf-8").split("\n")
end

def bench_train
assert_performance_linear do |n|
n.times do |i|
parts = @data[i].strip.split("\t")
@classifiers[n].train(parts.first, parts.last)
end
end
end

def bench_train_untrain
assert_performance_linear do |n|
n.times do |i|
parts = @data[i].strip.split("\t")
@classifiers[n].train(parts.first, parts.last)
end
n.times do |i|
parts = @data[i].strip.split("\t")
@classifiers[n].untrain(parts.first, parts.last)
end
end
end

def bench_train_classify
assert_performance_linear do |n|
n.times do |i|
parts = @data[i].strip.split("\t")
@classifiers[n].train(parts.first, parts.last)
end
n.times do |i|
parts = @data[i].strip.split("\t")
@classifiers[n].classify(parts.last)
end
end
end
end
26 changes: 26 additions & 0 deletions test/bayes/bayesian_memory_benchmark.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# encoding: utf-8

require File.dirname(__FILE__) + '/../test_helper'
require_relative './bayesian_common_benchmarks'

class BayesianMemoryBenchmark < Minitest::Benchmark
MAX_RECORDS = 5000

include BayesianCommonBenchmarks

def self.bench_range
(bench_exp(1, MAX_RECORDS) << MAX_RECORDS).uniq
end

def setup
@data ||= load_data
if @data.length < MAX_RECORDS
skip("Not enough records in the dataset")
end
@classifiers = {}
self.class.bench_range.each do |n|
@classifiers[n] = ClassifierReborn::Bayes.new 'Ham', 'Spam'
end
print "memory_"
end
end
38 changes: 38 additions & 0 deletions test/bayes/bayesian_redis_benchmark.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# encoding: utf-8

require File.dirname(__FILE__) + '/../test_helper'
require_relative './bayesian_common_benchmarks'

class BayesianRedisBenchmark < Minitest::Benchmark
MAX_RECORDS = 5000

include BayesianCommonBenchmarks

def self.bench_range
(bench_exp(1, MAX_RECORDS) << MAX_RECORDS).uniq
end

def setup
@data ||= load_data
if @data.length < MAX_RECORDS
skip("Not enough records in the dataset")
end
@classifiers = {}
self.class.bench_range.each_with_index do |n, i|
begin
redis_backend = ClassifierReborn::BayesRedisBackend.new(db: i)
redis_backend.instance_variable_get(:@redis).config(:set, "save", "")
@classifiers[n] = ClassifierReborn::Bayes.new 'Ham', 'Spam', backend: redis_backend
rescue Redis::CannotConnectError => e
skip(e)
end
end
print "redis_"
end

def teardown
self.class.bench_range.each do |n|
@classifiers[n].instance_variable_get(:@backend).instance_variable_get(:@redis).flushdb
end
end
end
3 changes: 2 additions & 1 deletion test/test_helper.rb
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')

require 'minitest/autorun'
require "minitest/benchmark"
require 'minitest/reporters'
Minitest::Reporters.use!
Minitest::Reporters.use! unless ENV['NOPROGRESS']

This comment has been minimized.

Copy link
@ibnesayeed

ibnesayeed Jan 9, 2017

Author Contributor

@Ch4s3 and @parkr when I was running recently written benchmarks I faced an issue due to the Minitest::Reporters inclusion. The fancy progress meter was mixing up with the reported benchmark values. That's why I used this ENV hack here to conditionally turn the fancy progress meter off and use the default one instead. However, when I tried to set this special ENV in the :bench Rake task it was applied to all the tasks. As a result now we must pass something like NOPROGRESS=T along with the rake bench command. I don't like this ugliness and would prefer to have a more sophisticated approach. Any better approaches?

This comment has been minimized.

Copy link
@ibnesayeed

ibnesayeed Jan 9, 2017

Author Contributor

I think I have solved it in #107.

require 'pry'
require 'classifier-reborn'
include ClassifierReborn

0 comments on commit e21a9be

Please sign in to comment.