Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restore and enhance fingerprint self test spec #444

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion Rakefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
require "bundler/gem_tasks"

require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new do |t|
t.pattern = 'spec/**/*_spec.rb'
end

require 'yard'
require 'yard/rake/yardoc_task'
YARD::Rake::YardocTask.new do |t|
Expand All @@ -14,4 +19,4 @@ Cucumber::Rake::Task.new(:features) do |t|
end

task :default => [ :tests, :yard ]
task :tests => [ :features ]
task :tests => [ :spec, :features ]
175 changes: 175 additions & 0 deletions spec/lib/fingerprint_self_test_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
require 'recog/db'
require 'regexp_parser'
require 'nokogiri'

describe Recog::DB do
let(:schema) { Nokogiri::XML::Schema(open(File.join(FINGERPRINT_DIR, 'fingerprints.xsd'))) }
Dir[File.join(FINGERPRINT_DIR, '*.xml')].each do |xml_file_name|

describe "##{File.basename(xml_file_name)}" do

it "is valid XML" do
doc = Nokogiri::XML(open(xml_file_name))
errors = schema.validate(doc)
expect(errors).to be_empty, "#{xml_file_name} is invalid recog XML -- #{errors.inspect}"
end

db = Recog::DB.new(xml_file_name)

it "has a match key" do
expect(db.match_key).not_to be_nil
expect(db.match_key).not_to be_empty
end

it "has valid 'preference' value" do
# Reserve values below 0.10 and above 0.90 for users
# See xml/fingerprints.xsd
expect(db.preference.class).to be ::Float
expect(db.preference).to be_between(0.10, 0.90)
end

fp_descriptions = []
db.fingerprints.each_index do |i|
fp = db.fingerprints[i]

it "doesn't have a duplicate description" do
if fp_descriptions.include?(fp.name)
fail "'#{fp.name}'s description is not unique"
else
fp_descriptions << fp.name
end
end

context "#{fp.name}" do
param_names = []
it "has consistent os.device and hw.device" do
if fp.params['os.device'] && fp.params['hw.device'] && (fp.params['os.device'] != fp.params['hw.device'])
fail "#{fp.name} has both hw.device and os.device but with differing values"
end
end
fp.params.each do |param_name, pos_value|
pos, value = pos_value
it "has valid looking fingerprint parameter names" do
unless param_name =~ /^(?:cookie|[^\.]+\..*)$/
fail "'#{param_name}' is invalid"
end
end

it "doesn't have param values for capture params" do
if pos > 0 && !value.to_s.empty?
fail "'#{fp.name}'s #{param_name} is a non-zero pos but specifies a value of '#{value}'"
end
end

it "has parameter values other than General, Server or Unknown, which are not helpful" do
if pos == 0 && value =~ /^(?i:general|server|unknown)$/
fail "'#{param_name}' has general/server/unknown value '#{value}'"
end
end

it "doesn't omit values for non-capture params" do
if pos == 0 && value.to_s.empty?
fail "'#{fp.name}'s #{param_name} is not a capture (pos=0) but doesn't specify a value"
end
end

it "doesn't have duplicate params" do
if param_names.include?(param_name)
fail "'#{fp.name}'s has duplicate #{param_name}"
else
param_names << param_name
end
end

it "uses interpolation correctly" do
if pos == 0 && /\{(?<interpolated>[^\s{}]+)\}/ =~ value
unless fp.params.key?(interpolated)
fail "'#{fp.name}' uses interpolated value '#{interpolated}' that does not exist"
end
end
end
end
end

context "#{fp.regex}" do

it "has a valid looking name" do
expect(fp.name).not_to be_nil
expect(fp.name).not_to be_empty
end

it "has a regex" do
expect(fp.regex).not_to be_nil
expect(fp.regex.class).to be ::Regexp
end

it 'uses capturing regular expressions properly' do
# the list of index-based captures that the fingerprint is expecting
expected_capture_positions = fp.params.values.map(&:first).map(&:to_i).select { |position| position > 0 }
if fp.params.empty? && expected_capture_positions.size > 0
fail "Non-asserting fingerprint with regex #{fp.regex} captures #{expected_capture_positions.size} time(s); 0 are needed"
else
# parse the regex and count the number of captures
actual_capture_positions = []
capture_number = 1
Regexp::Scanner.scan(fp.regex).each do |token_parts|
if token_parts.first == :group && ![:close, :passive, :options, :options_switch].include?(token_parts[1])
actual_capture_positions << capture_number
capture_number += 1
end
end
# compare the captures actually performed to those being used and ensure that they contain
# the same elements regardless of order, preventing, over-, under- and other forms of mis-capturing.
actual_capture_positions = actual_capture_positions.sort.uniq
expected_capture_positions = expected_capture_positions.sort.uniq
expect(actual_capture_positions).to eq(expected_capture_positions),
"Regex has #{actual_capture_positions.size} capture groups, but the fingerprint expected #{expected_capture_positions.size} extractions."
end
end

# Not yet enforced
# it "has test cases" do
# expect(fp.tests.length).not_to equal(0)
# end

it "Has a reasonable number (<= 20) of test cases" do
expect(fp.tests.length).to be <= 20
end

fp_examples = []
fp.tests.each do |example|
it "doesn't have a duplicate examples" do
if fp_examples.include?(example.content)
fail "'#{fp.name}' has duplicate example '#{example.content}'"
else
fp_examples << example.content
end
end
it "Example '#{example.content}' matches this regex" do
match = fp.match(example.content)
expect(match).to_not be_nil, 'Regex did not match'
# test any extractions specified in the example
example.attributes.each_pair do |k,v|
next if k == '_encoding'
next if k == '_filename'
expect(match[k]).to eq(v), "Regex didn't extract expected value for fingerprint attribute #{k} -- got #{match[k]} instead of #{v}"
end
end

it "Example '#{example.content}' matches this regex first" do
db.fingerprints.slice(0, i).each_index do |previous_i|
prev_fp = db.fingerprints[previous_i]
prev_fp.tests.each do |prev_example|
match = prev_fp.match(example.content)
expect(match).to be_nil, "Matched regex ##{previous_i} (#{db.fingerprints[previous_i].regex}) rather than ##{i} (#{db.fingerprints[i].regex})"
end
end
end
end

end
end

end
end
end
86 changes: 86 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
FINGERPRINT_DIR = File.expand_path(File.join('..', 'xml'), __dir__)

# setup code coverage
require 'simplecov'
SimpleCov.start

require 'rspec'
# This file was generated by the `rspec --init` command. Conventionally, all
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
# The generated `.rspec` file contains `--require spec_helper` which will cause this
# file to always be loaded, without a need to explicitly require it in any files.
#
# Given that it is always loaded, you are encouraged to keep this file as
# light-weight as possible. Requiring heavyweight dependencies from this file
# will add to the boot time of your test suite on EVERY test run, even for an
# individual file that may not need all of that loaded. Instead, make a
# separate helper file that requires this one and then use it only in the specs
# that actually need it.
#
# The `.rspec` file also contains a few flags that are not defaults but that
# users commonly want.
#
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
RSpec.configure do |config|

# Run specs in random order to surface order dependencies. If you find an
# order dependency and want to debug it, you can fix the order by providing
# the seed, which is printed after each run.
# --seed 1234
config.order = :random

# Seed global randomization in this process using the `--seed` CLI option.
# Setting this allows you to use `--seed` to deterministically reproduce
# test failures related to randomization by passing the same `--seed` value
# as the one that triggered the failure.
Kernel.srand config.seed

# Many RSpec users commonly either run the entire suite or an individual
# file, and it's useful to allow more verbose output when running an
# individual spec file.
if config.files_to_run.one?
# Use the documentation formatter for detailed output,
# unless a formatter has already been configured
# (e.g. via a command-line flag).
config.default_formatter = 'doc'
end

# The settings below are suggested to provide a good initial experience
# with RSpec, but feel free to customize to your heart's content.
=begin
# These two settings work together to allow you to limit a spec run
# to individual examples or groups you care about by tagging them with
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
# get run.
config.filter_run :focus
config.run_all_when_everything_filtered = true

# Print the 10 slowest examples and example groups at the
# end of the spec run, to help surface which specs are running
# particularly slow.
config.profile_examples = 10

# rspec-expectations config goes here. You can use an alternate
# assertion/expectation library such as wrong or the stdlib/minitest
# assertions if you prefer.
config.expect_with :rspec do |expectations|
# Enable only the newer, non-monkey-patching expect syntax.
# For more details, see:
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
expectations.syntax = :expect
end

# rspec-mocks config goes here. You can use an alternate test double
# library (such as bogus or mocha) by changing the `mock_with` option here.
config.mock_with :rspec do |mocks|
# Enable only the newer, non-monkey-patching expect syntax.
# For more details, see:
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
mocks.syntax = :expect

# Prevents you from mocking or stubbing a method that does not exist on
# a real object. This is generally recommended.
mocks.verify_partial_doubles = true
end
=end
end