From e64791ff82e9957ba3d93b7446d99c9762ac8f70 Mon Sep 17 00:00:00 2001 From: Matthew Kienow Date: Thu, 14 Apr 2022 17:17:20 -0400 Subject: [PATCH] Restore modified fingerprint self test spec The test will now run from the recog or recog-ruby rspec using the correct fingerprint dir. --- Rakefile | 7 +- spec/lib/fingerprint_self_test_spec.rb | 175 +++++++++++++++++++++++++ spec/spec_helper.rb | 86 ++++++++++++ 3 files changed, 267 insertions(+), 1 deletion(-) create mode 100644 spec/lib/fingerprint_self_test_spec.rb create mode 100644 spec/spec_helper.rb diff --git a/Rakefile b/Rakefile index d4ca2be8..33b4b293 100644 --- a/Rakefile +++ b/Rakefile @@ -1,5 +1,10 @@ require "bundler/gem_tasks" +require 'rspec/core/rake_task' +RSpec::Core::RakeTask.new do |t| + t.pattern = 'spec/**/*_spec.rb' +end + require 'yard' require 'yard/rake/yardoc_task' YARD::Rake::YardocTask.new do |t| @@ -14,4 +19,4 @@ Cucumber::Rake::Task.new(:features) do |t| end task :default => [ :tests, :yard ] -task :tests => [ :features ] +task :tests => [ :spec, :features ] diff --git a/spec/lib/fingerprint_self_test_spec.rb b/spec/lib/fingerprint_self_test_spec.rb new file mode 100644 index 00000000..0d472ba0 --- /dev/null +++ b/spec/lib/fingerprint_self_test_spec.rb @@ -0,0 +1,175 @@ +require 'recog/db' +require 'regexp_parser' +require 'nokogiri' + +describe Recog::DB do + let(:schema) { Nokogiri::XML::Schema(open(File.join(FINGERPRINT_DIR, 'fingerprints.xsd'))) } + Dir[File.join(FINGERPRINT_DIR, '*.xml')].each do |xml_file_name| + + describe "##{File.basename(xml_file_name)}" do + + it "is valid XML" do + doc = Nokogiri::XML(open(xml_file_name)) + errors = schema.validate(doc) + expect(errors).to be_empty, "#{xml_file_name} is invalid recog XML -- #{errors.inspect}" + end + + db = Recog::DB.new(xml_file_name) + + it "has a match key" do + expect(db.match_key).not_to be_nil + expect(db.match_key).not_to be_empty + end + + it "has valid 'preference' value" do + # Reserve values below 0.10 and above 0.90 for users + # See xml/fingerprints.xsd + expect(db.preference.class).to be ::Float + expect(db.preference).to be_between(0.10, 0.90) + end + + fp_descriptions = [] + db.fingerprints.each_index do |i| + fp = db.fingerprints[i] + + it "doesn't have a duplicate description" do + if fp_descriptions.include?(fp.name) + fail "'#{fp.name}'s description is not unique" + else + fp_descriptions << fp.name + end + end + + context "#{fp.name}" do + param_names = [] + it "has consistent os.device and hw.device" do + if fp.params['os.device'] && fp.params['hw.device'] && (fp.params['os.device'] != fp.params['hw.device']) + fail "#{fp.name} has both hw.device and os.device but with differing values" + end + end + fp.params.each do |param_name, pos_value| + pos, value = pos_value + it "has valid looking fingerprint parameter names" do + unless param_name =~ /^(?:cookie|[^\.]+\..*)$/ + fail "'#{param_name}' is invalid" + end + end + + it "doesn't have param values for capture params" do + if pos > 0 && !value.to_s.empty? + fail "'#{fp.name}'s #{param_name} is a non-zero pos but specifies a value of '#{value}'" + end + end + + it "has parameter values other than General, Server or Unknown, which are not helpful" do + if pos == 0 && value =~ /^(?i:general|server|unknown)$/ + fail "'#{param_name}' has general/server/unknown value '#{value}'" + end + end + + it "doesn't omit values for non-capture params" do + if pos == 0 && value.to_s.empty? + fail "'#{fp.name}'s #{param_name} is not a capture (pos=0) but doesn't specify a value" + end + end + + it "doesn't have duplicate params" do + if param_names.include?(param_name) + fail "'#{fp.name}'s has duplicate #{param_name}" + else + param_names << param_name + end + end + + it "uses interpolation correctly" do + if pos == 0 && /\{(?[^\s{}]+)\}/ =~ value + unless fp.params.key?(interpolated) + fail "'#{fp.name}' uses interpolated value '#{interpolated}' that does not exist" + end + end + end + end + end + + context "#{fp.regex}" do + + it "has a valid looking name" do + expect(fp.name).not_to be_nil + expect(fp.name).not_to be_empty + end + + it "has a regex" do + expect(fp.regex).not_to be_nil + expect(fp.regex.class).to be ::Regexp + end + + it 'uses capturing regular expressions properly' do + # the list of index-based captures that the fingerprint is expecting + expected_capture_positions = fp.params.values.map(&:first).map(&:to_i).select { |position| position > 0 } + if fp.params.empty? && expected_capture_positions.size > 0 + fail "Non-asserting fingerprint with regex #{fp.regex} captures #{expected_capture_positions.size} time(s); 0 are needed" + else + # parse the regex and count the number of captures + actual_capture_positions = [] + capture_number = 1 + Regexp::Scanner.scan(fp.regex).each do |token_parts| + if token_parts.first == :group && ![:close, :passive, :options, :options_switch].include?(token_parts[1]) + actual_capture_positions << capture_number + capture_number += 1 + end + end + # compare the captures actually performed to those being used and ensure that they contain + # the same elements regardless of order, preventing, over-, under- and other forms of mis-capturing. + actual_capture_positions = actual_capture_positions.sort.uniq + expected_capture_positions = expected_capture_positions.sort.uniq + expect(actual_capture_positions).to eq(expected_capture_positions), + "Regex has #{actual_capture_positions.size} capture groups, but the fingerprint expected #{expected_capture_positions.size} extractions." + end + end + + # Not yet enforced + # it "has test cases" do + # expect(fp.tests.length).not_to equal(0) + # end + + it "Has a reasonable number (<= 20) of test cases" do + expect(fp.tests.length).to be <= 20 + end + + fp_examples = [] + fp.tests.each do |example| + it "doesn't have a duplicate examples" do + if fp_examples.include?(example.content) + fail "'#{fp.name}' has duplicate example '#{example.content}'" + else + fp_examples << example.content + end + end + it "Example '#{example.content}' matches this regex" do + match = fp.match(example.content) + expect(match).to_not be_nil, 'Regex did not match' + # test any extractions specified in the example + example.attributes.each_pair do |k,v| + next if k == '_encoding' + next if k == '_filename' + expect(match[k]).to eq(v), "Regex didn't extract expected value for fingerprint attribute #{k} -- got #{match[k]} instead of #{v}" + end + end + + it "Example '#{example.content}' matches this regex first" do + db.fingerprints.slice(0, i).each_index do |previous_i| + prev_fp = db.fingerprints[previous_i] + prev_fp.tests.each do |prev_example| + match = prev_fp.match(example.content) + expect(match).to be_nil, "Matched regex ##{previous_i} (#{db.fingerprints[previous_i].regex}) rather than ##{i} (#{db.fingerprints[i].regex})" + end + end + end + end + + end + end + + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..1ab0b7e9 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,86 @@ +FINGERPRINT_DIR = File.expand_path(File.join('..', 'xml'), __dir__) + +# setup code coverage +require 'simplecov' +SimpleCov.start + +require 'rspec' +# This file was generated by the `rspec --init` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause this +# file to always be loaded, without a need to explicitly require it in any files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, make a +# separate helper file that requires this one and then use it only in the specs +# that actually need it. +# +# The `.rspec` file also contains a few flags that are not defaults but that +# users commonly want. +# +# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = 'doc' + end + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # These two settings work together to allow you to limit a spec run + # to individual examples or groups you care about by tagging them with + # `:focus` metadata. When nothing is tagged with `:focus`, all examples + # get run. + config.filter_run :focus + config.run_all_when_everything_filtered = true + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # Enable only the newer, non-monkey-patching expect syntax. + # For more details, see: + # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax + expectations.syntax = :expect + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Enable only the newer, non-monkey-patching expect syntax. + # For more details, see: + # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ + mocks.syntax = :expect + + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended. + mocks.verify_partial_doubles = true + end +=end +end