From e64791ff82e9957ba3d93b7446d99c9762ac8f70 Mon Sep 17 00:00:00 2001
From: Matthew Kienow <matthew_kienow@rapid7.com>
Date: Thu, 14 Apr 2022 17:17:20 -0400
Subject: [PATCH] Restore modified fingerprint self test spec

The test will now run from the recog or recog-ruby rspec using the
correct fingerprint dir.
---
 Rakefile                               |   7 +-
 spec/lib/fingerprint_self_test_spec.rb | 175 +++++++++++++++++++++++++
 spec/spec_helper.rb                    |  86 ++++++++++++
 3 files changed, 267 insertions(+), 1 deletion(-)
 create mode 100644 spec/lib/fingerprint_self_test_spec.rb
 create mode 100644 spec/spec_helper.rb

diff --git a/Rakefile b/Rakefile
index d4ca2be8..33b4b293 100644
--- a/Rakefile
+++ b/Rakefile
@@ -1,5 +1,10 @@
 require "bundler/gem_tasks"
 
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new do |t|
+    t.pattern = 'spec/**/*_spec.rb'
+end
+
 require 'yard'
 require 'yard/rake/yardoc_task'
 YARD::Rake::YardocTask.new do |t|
@@ -14,4 +19,4 @@ Cucumber::Rake::Task.new(:features) do |t|
 end
 
 task :default => [ :tests, :yard ]
-task :tests => [ :features ]
+task :tests => [ :spec, :features ]
diff --git a/spec/lib/fingerprint_self_test_spec.rb b/spec/lib/fingerprint_self_test_spec.rb
new file mode 100644
index 00000000..0d472ba0
--- /dev/null
+++ b/spec/lib/fingerprint_self_test_spec.rb
@@ -0,0 +1,175 @@
+require 'recog/db'
+require 'regexp_parser'
+require 'nokogiri'
+
+describe Recog::DB do
+  let(:schema) { Nokogiri::XML::Schema(open(File.join(FINGERPRINT_DIR, 'fingerprints.xsd'))) }
+  Dir[File.join(FINGERPRINT_DIR, '*.xml')].each do |xml_file_name|
+
+    describe "##{File.basename(xml_file_name)}" do
+
+      it "is valid XML" do
+        doc = Nokogiri::XML(open(xml_file_name))
+        errors = schema.validate(doc)
+        expect(errors).to be_empty, "#{xml_file_name} is invalid recog XML -- #{errors.inspect}"
+      end
+
+      db = Recog::DB.new(xml_file_name)
+
+      it "has a match key" do
+        expect(db.match_key).not_to be_nil
+        expect(db.match_key).not_to be_empty
+      end
+
+      it "has valid 'preference' value" do
+          # Reserve values below 0.10 and above 0.90 for users
+          # See xml/fingerprints.xsd
+          expect(db.preference.class).to be ::Float
+          expect(db.preference).to be_between(0.10, 0.90)
+      end
+
+      fp_descriptions = []
+      db.fingerprints.each_index do |i|
+        fp = db.fingerprints[i]
+
+        it "doesn't have a duplicate description" do
+          if fp_descriptions.include?(fp.name)
+            fail "'#{fp.name}'s description is not unique"
+          else
+            fp_descriptions << fp.name
+          end
+        end
+
+        context "#{fp.name}" do
+          param_names = []
+          it "has consistent os.device and hw.device" do
+            if fp.params['os.device'] && fp.params['hw.device'] && (fp.params['os.device'] != fp.params['hw.device'])
+              fail "#{fp.name} has both hw.device and os.device but with differing values"
+            end
+          end
+          fp.params.each do |param_name, pos_value|
+            pos, value = pos_value
+            it "has valid looking fingerprint parameter names" do
+              unless param_name =~ /^(?:cookie|[^\.]+\..*)$/
+                fail "'#{param_name}' is invalid"
+              end
+            end
+
+            it "doesn't have param values for capture params" do
+              if pos > 0 && !value.to_s.empty?
+                fail "'#{fp.name}'s #{param_name} is a non-zero pos but specifies a value of '#{value}'"
+              end
+            end
+
+            it "has parameter values other than General, Server or Unknown, which are not helpful" do
+              if pos == 0 && value =~ /^(?i:general|server|unknown)$/
+                fail "'#{param_name}' has general/server/unknown value '#{value}'"
+              end
+            end
+
+            it "doesn't omit values for non-capture params" do
+              if pos == 0 && value.to_s.empty?
+                fail "'#{fp.name}'s #{param_name} is not a capture (pos=0) but doesn't specify a value"
+              end
+            end
+
+            it "doesn't have duplicate params" do
+              if param_names.include?(param_name)
+                fail "'#{fp.name}'s has duplicate #{param_name}"
+              else
+                param_names << param_name
+              end
+            end
+
+            it "uses interpolation correctly" do
+              if pos == 0 && /\{(?<interpolated>[^\s{}]+)\}/ =~ value
+                unless fp.params.key?(interpolated)
+                  fail "'#{fp.name}' uses interpolated value '#{interpolated}' that does not exist"
+                end
+              end
+            end
+          end
+        end
+
+        context "#{fp.regex}" do
+
+          it "has a valid looking name" do
+            expect(fp.name).not_to be_nil
+            expect(fp.name).not_to be_empty
+          end
+
+          it "has a regex" do
+            expect(fp.regex).not_to be_nil
+            expect(fp.regex.class).to be ::Regexp
+          end
+
+          it 'uses capturing regular expressions properly' do
+            # the list of index-based captures that the fingerprint is expecting
+            expected_capture_positions = fp.params.values.map(&:first).map(&:to_i).select { |position| position > 0 }
+            if fp.params.empty? && expected_capture_positions.size > 0
+              fail "Non-asserting fingerprint with regex #{fp.regex} captures #{expected_capture_positions.size} time(s); 0 are needed"
+            else
+              # parse the regex and count the number of captures
+              actual_capture_positions = []
+              capture_number = 1
+              Regexp::Scanner.scan(fp.regex).each do |token_parts|
+                if token_parts.first == :group  && ![:close, :passive, :options, :options_switch].include?(token_parts[1])
+                  actual_capture_positions << capture_number
+                  capture_number += 1
+                end
+              end
+              # compare the captures actually performed to those being used and ensure that they contain
+              # the same elements regardless of order, preventing, over-, under- and other forms of mis-capturing.
+              actual_capture_positions = actual_capture_positions.sort.uniq
+              expected_capture_positions = expected_capture_positions.sort.uniq
+              expect(actual_capture_positions).to eq(expected_capture_positions),
+                "Regex has #{actual_capture_positions.size} capture groups, but the fingerprint expected #{expected_capture_positions.size} extractions."
+            end
+          end
+
+          # Not yet enforced
+          # it "has test cases" do
+          #  expect(fp.tests.length).not_to equal(0)
+          # end
+
+          it "Has a reasonable number (<= 20) of test cases" do
+            expect(fp.tests.length).to be <= 20
+          end
+
+          fp_examples = []
+          fp.tests.each do |example|
+            it "doesn't have a duplicate examples" do
+              if fp_examples.include?(example.content)
+                fail "'#{fp.name}' has duplicate example '#{example.content}'"
+              else
+                fp_examples << example.content
+              end
+            end
+            it "Example '#{example.content}' matches this regex" do
+              match = fp.match(example.content)
+              expect(match).to_not be_nil, 'Regex did not match'
+              # test any extractions specified in the example
+              example.attributes.each_pair do |k,v|
+                next if k == '_encoding'
+                next if k == '_filename'
+                expect(match[k]).to eq(v), "Regex didn't extract expected value for fingerprint attribute #{k} -- got #{match[k]} instead of #{v}"
+              end
+            end
+
+            it "Example '#{example.content}' matches this regex first" do
+              db.fingerprints.slice(0, i).each_index do |previous_i|
+                prev_fp = db.fingerprints[previous_i]
+                prev_fp.tests.each do |prev_example|
+                  match = prev_fp.match(example.content)
+                  expect(match).to be_nil, "Matched regex ##{previous_i} (#{db.fingerprints[previous_i].regex}) rather than ##{i} (#{db.fingerprints[i].regex})"
+                end
+              end
+            end
+          end
+
+        end
+      end
+
+    end
+  end
+end
diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
new file mode 100644
index 00000000..1ab0b7e9
--- /dev/null
+++ b/spec/spec_helper.rb
@@ -0,0 +1,86 @@
+FINGERPRINT_DIR = File.expand_path(File.join('..', 'xml'), __dir__)
+
+# setup code coverage
+require 'simplecov'
+SimpleCov.start
+
+require 'rspec'
+# This file was generated by the `rspec --init` command. Conventionally, all
+# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
+# The generated `.rspec` file contains `--require spec_helper` which will cause this
+# file to always be loaded, without a need to explicitly require it in any files.
+#
+# Given that it is always loaded, you are encouraged to keep this file as
+# light-weight as possible. Requiring heavyweight dependencies from this file
+# will add to the boot time of your test suite on EVERY test run, even for an
+# individual file that may not need all of that loaded. Instead, make a
+# separate helper file that requires this one and then use it only in the specs
+# that actually need it.
+#
+# The `.rspec` file also contains a few flags that are not defaults but that
+# users commonly want.
+#
+# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
+RSpec.configure do |config|
+
+  # Run specs in random order to surface order dependencies. If you find an
+  # order dependency and want to debug it, you can fix the order by providing
+  # the seed, which is printed after each run.
+  #     --seed 1234
+  config.order = :random
+
+  # Seed global randomization in this process using the `--seed` CLI option.
+  # Setting this allows you to use `--seed` to deterministically reproduce
+  # test failures related to randomization by passing the same `--seed` value
+  # as the one that triggered the failure.
+  Kernel.srand config.seed
+
+  # Many RSpec users commonly either run the entire suite or an individual
+  # file, and it's useful to allow more verbose output when running an
+  # individual spec file.
+  if config.files_to_run.one?
+    # Use the documentation formatter for detailed output,
+    # unless a formatter has already been configured
+    # (e.g. via a command-line flag).
+    config.default_formatter = 'doc'
+  end
+
+# The settings below are suggested to provide a good initial experience
+# with RSpec, but feel free to customize to your heart's content.
+=begin
+  # These two settings work together to allow you to limit a spec run
+  # to individual examples or groups you care about by tagging them with
+  # `:focus` metadata. When nothing is tagged with `:focus`, all examples
+  # get run.
+  config.filter_run :focus
+  config.run_all_when_everything_filtered = true
+
+  # Print the 10 slowest examples and example groups at the
+  # end of the spec run, to help surface which specs are running
+  # particularly slow.
+  config.profile_examples = 10
+
+  # rspec-expectations config goes here. You can use an alternate
+  # assertion/expectation library such as wrong or the stdlib/minitest
+  # assertions if you prefer.
+  config.expect_with :rspec do |expectations|
+    # Enable only the newer, non-monkey-patching expect syntax.
+    # For more details, see:
+    #   - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
+    expectations.syntax = :expect
+  end
+
+  # rspec-mocks config goes here. You can use an alternate test double
+  # library (such as bogus or mocha) by changing the `mock_with` option here.
+  config.mock_with :rspec do |mocks|
+    # Enable only the newer, non-monkey-patching expect syntax.
+    # For more details, see:
+    #   - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
+    mocks.syntax = :expect
+
+    # Prevents you from mocking or stubbing a method that does not exist on
+    # a real object. This is generally recommended.
+    mocks.verify_partial_doubles = true
+  end
+=end
+end