From e21a9be6277c7fb2b394667b7a8f72beaaab3512 Mon Sep 17 00:00:00 2001 From: Sawood Alam Date: Sat, 7 Jan 2017 14:25:38 -0500 Subject: [PATCH] Added Bayes backend benchmarks (#98) * Disabled Redis disc persistence and refactored integration test, fixes #95 * Added Bayes backend benchmarks --- Rakefile | 12 +++++-- test/bayes/bayesian_common_benchmarks.rb | 43 ++++++++++++++++++++++++ test/bayes/bayesian_memory_benchmark.rb | 26 ++++++++++++++ test/bayes/bayesian_redis_benchmark.rb | 38 +++++++++++++++++++++ test/test_helper.rb | 3 +- 5 files changed, 119 insertions(+), 3 deletions(-) create mode 100644 test/bayes/bayesian_common_benchmarks.rb create mode 100755 test/bayes/bayesian_memory_benchmark.rb create mode 100644 test/bayes/bayesian_redis_benchmark.rb diff --git a/Rakefile b/Rakefile index 398f30c..a32c0b2 100644 --- a/Rakefile +++ b/Rakefile @@ -9,12 +9,20 @@ task default: [:test] # Run the unit tests desc 'Run all unit tests' -Rake::TestTask.new('test') do |t| +Rake::TestTask.new(:test) do |t| t.libs << 'lib' t.pattern = 'test/*/*_test.rb' t.verbose = true end +# Run benchmarks +desc 'Run all benchmarks' +Rake::TestTask.new(:bench) do |t| + t.libs << 'lib' + t.pattern = 'test/*/*_benchmark.rb' + t.verbose = true +end + # Make a console, useful when working on tests desc 'Generate a test console' task :console do @@ -23,7 +31,7 @@ end # Genereate the RDoc documentation desc 'Create documentation' -Rake::RDocTask.new('doc') do |rdoc| +Rake::RDocTask.new(:doc) do |rdoc| rdoc.title = 'Ruby Classifier - Bayesian and LSI classification library' rdoc.rdoc_dir = 'html' rdoc.rdoc_files.include('README.markdown') diff --git a/test/bayes/bayesian_common_benchmarks.rb b/test/bayes/bayesian_common_benchmarks.rb new file mode 100644 index 0000000..e9a17f5 --- /dev/null +++ b/test/bayes/bayesian_common_benchmarks.rb @@ -0,0 +1,43 @@ +# encoding: utf-8 + +module BayesianCommonBenchmarks + def load_data + sms_spam_collection = File.expand_path(File.dirname(__FILE__) + '/../data/corpus/SMSSpamCollection.tsv') + File.read(sms_spam_collection).force_encoding("utf-8").split("\n") + end + + def bench_train + assert_performance_linear do |n| + n.times do |i| + parts = @data[i].strip.split("\t") + @classifiers[n].train(parts.first, parts.last) + end + end + end + + def bench_train_untrain + assert_performance_linear do |n| + n.times do |i| + parts = @data[i].strip.split("\t") + @classifiers[n].train(parts.first, parts.last) + end + n.times do |i| + parts = @data[i].strip.split("\t") + @classifiers[n].untrain(parts.first, parts.last) + end + end + end + + def bench_train_classify + assert_performance_linear do |n| + n.times do |i| + parts = @data[i].strip.split("\t") + @classifiers[n].train(parts.first, parts.last) + end + n.times do |i| + parts = @data[i].strip.split("\t") + @classifiers[n].classify(parts.last) + end + end + end +end diff --git a/test/bayes/bayesian_memory_benchmark.rb b/test/bayes/bayesian_memory_benchmark.rb new file mode 100755 index 0000000..b40fd6c --- /dev/null +++ b/test/bayes/bayesian_memory_benchmark.rb @@ -0,0 +1,26 @@ +# encoding: utf-8 + +require File.dirname(__FILE__) + '/../test_helper' +require_relative './bayesian_common_benchmarks' + +class BayesianMemoryBenchmark < Minitest::Benchmark + MAX_RECORDS = 5000 + + include BayesianCommonBenchmarks + + def self.bench_range + (bench_exp(1, MAX_RECORDS) << MAX_RECORDS).uniq + end + + def setup + @data ||= load_data + if @data.length < MAX_RECORDS + skip("Not enough records in the dataset") + end + @classifiers = {} + self.class.bench_range.each do |n| + @classifiers[n] = ClassifierReborn::Bayes.new 'Ham', 'Spam' + end + print "memory_" + end +end diff --git a/test/bayes/bayesian_redis_benchmark.rb b/test/bayes/bayesian_redis_benchmark.rb new file mode 100644 index 0000000..7772a8b --- /dev/null +++ b/test/bayes/bayesian_redis_benchmark.rb @@ -0,0 +1,38 @@ +# encoding: utf-8 + +require File.dirname(__FILE__) + '/../test_helper' +require_relative './bayesian_common_benchmarks' + +class BayesianRedisBenchmark < Minitest::Benchmark + MAX_RECORDS = 5000 + + include BayesianCommonBenchmarks + + def self.bench_range + (bench_exp(1, MAX_RECORDS) << MAX_RECORDS).uniq + end + + def setup + @data ||= load_data + if @data.length < MAX_RECORDS + skip("Not enough records in the dataset") + end + @classifiers = {} + self.class.bench_range.each_with_index do |n, i| + begin + redis_backend = ClassifierReborn::BayesRedisBackend.new(db: i) + redis_backend.instance_variable_get(:@redis).config(:set, "save", "") + @classifiers[n] = ClassifierReborn::Bayes.new 'Ham', 'Spam', backend: redis_backend + rescue Redis::CannotConnectError => e + skip(e) + end + end + print "redis_" + end + + def teardown + self.class.bench_range.each do |n| + @classifiers[n].instance_variable_get(:@backend).instance_variable_get(:@redis).flushdb + end + end +end diff --git a/test/test_helper.rb b/test/test_helper.rb index b539406..cf24dbc 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,8 +1,9 @@ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib') require 'minitest/autorun' +require "minitest/benchmark" require 'minitest/reporters' -Minitest::Reporters.use! +Minitest::Reporters.use! unless ENV['NOPROGRESS'] require 'pry' require 'classifier-reborn' include ClassifierReborn