From 1c08ed4a4f914db8a66cf26352dc6ad8bea9b88c Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Thu, 1 Jan 2015 15:37:15 -0800 Subject: [PATCH 01/40] Generic Rubocop fixups --- .rubocop.yml | 5 ++ Gemfile | 2 +- Rakefile | 10 +-- bin/htmlproof | 46 ++++++------ html-proofer.gemspec | 42 +++++------ lib/html/proofer.rb | 89 +++++++++++----------- lib/html/proofer/check.rb | 6 +- lib/html/proofer/checkable.rb | 28 +++---- lib/html/proofer/checks.rb | 14 ++-- lib/html/proofer/checks/favicon.rb | 6 +- lib/html/proofer/checks/html.rb | 21 +++--- lib/html/proofer/checks/images.rb | 14 ++-- lib/html/proofer/checks/links.rb | 46 +++++++----- lib/html/proofer/checks/scripts.rb | 8 +- lib/html/proofer/issue.rb | 2 +- lib/html/proofer/version.rb | 2 +- spec/html/proofer/favicon_spec.rb | 38 +++++----- spec/html/proofer/html_spec.rb | 48 ++++++------ spec/html/proofer/images_spec.rb | 38 +++++----- spec/html/proofer/links_spec.rb | 114 ++++++++++++++--------------- spec/html/proofer/scripts_spec.rb | 22 +++--- spec/html/proofer_spec.rb | 42 +++++------ spec/spec_helper.rb | 8 +- 23 files changed, 329 insertions(+), 322 deletions(-) create mode 100644 .rubocop.yml diff --git a/.rubocop.yml b/.rubocop.yml new file mode 100644 index 00000000..b15a80bf --- /dev/null +++ b/.rubocop.yml @@ -0,0 +1,5 @@ +HashSyntax: + EnforcedStyle: hash_rockets + +Metrics/LineLength: + Max: 100 diff --git a/Gemfile b/Gemfile index c80ee369..d65e2a66 100644 --- a/Gemfile +++ b/Gemfile @@ -1,3 +1,3 @@ -source "http://rubygems.org" +source 'http://rubygems.org' gemspec diff --git a/Rakefile b/Rakefile index bc751509..1c83d952 100644 --- a/Rakefile +++ b/Rakefile @@ -12,12 +12,10 @@ task :proof_readme do require 'redcarpet' redcarpet = Redcarpet::Markdown.new Redcarpet::Render::HTML.new({}), {} - html = redcarpet.render File.open("README.md").read + html = redcarpet.render File.read('README.md') - mkdir_p "out" - File.open "out/README.html", File::CREAT|File::WRONLY do |file| - file.puts html - end + mkdir_p 'out' + File.write('out/README.html', html) - HTML::Proofer.new("./out").run + HTML::Proofer.new('./out').run end diff --git a/bin/htmlproof b/bin/htmlproof index 80cc7a39..f9136656 100755 --- a/bin/htmlproof +++ b/bin/htmlproof @@ -1,18 +1,18 @@ #!/usr/bin/env ruby STDOUT.sync = true -$:.unshift File.join(File.dirname(__FILE__), *%w{ .. lib }) +$LOAD_PATH.unshift File.join(File.dirname(__FILE__), *%w( .. lib )) require 'html/proofer' require 'mercenary' -require "rubygems" +require 'rubygems' Mercenary.program(:htmlproof) do |p| - p.version Gem::Specification::load(File.join(File.dirname(__FILE__), "..", "html-proofer.gemspec")).version - p.description "Test your rendered HTML files to make sure they're accurate." + p.version Gem::Specification.load(File.join(File.dirname(__FILE__), '..', 'html-proofer.gemspec')).version + p.description %(Test your rendered HTML files to make sure they're accurate.) p.syntax 'htmlproof PATH [options]' - p.description "Runs the HTML-Proofer suite on the files in PATH" + p.description 'Runs the HTML-Proofer suite on the files in PATH' p.option 'ext', '--ext EXT', 'The extension of your HTML files (default: `.html`)' p.option 'favicon', '--favicon', 'Enables the favicon checker (default: `false`).' @@ -29,31 +29,31 @@ Mercenary.program(:htmlproof) do |p| p.option 'check_external_hash', '--check_external_hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).' p.action do |args, opts| - args = ["."] if args.empty? + args = ['.'] if args.empty? path = args.first options = {} - options[:ext] = opts["ext"] unless opts["ext"].nil? - unless opts["swap"].nil? + options[:ext] = opts['ext'] unless opts['ext'].nil? + unless opts['swap'].nil? options[:href_swap] = {} - opts["swap"].each do |s| - pair = s.split(":") - options[:href_swap][%r{#{pair[0]}}] = pair[1] + opts['swap'].each do |s| + pair = s.split(':') + options[:href_swap][/#{pair[0]}/] = pair[1] end end - options[:href_ignore] = opts["href_ignore"] unless opts["href_ignore"].nil? - options[:alt_ignore] = opts["alt_ignore"] unless opts["alt_ignore"].nil? - options[:file_ignore] = opts["file_ignore"] unless opts["file_ignore"].nil? - options[:disable_external] = opts["disable_external"] unless opts["disable_external"].nil? - options[:only_4xx] = opts["only_4xx"] unless opts["only_4xx"].nil? - options[:favicon] = opts["favicon"] unless opts["favicon"].nil? - options[:verbose] = opts["verbose"] unless opts["verbose"].nil? - options[:directory_index_file] = opts["directory_index_file"] unless opts["directory_index_file"].nil? - options[:validate_html] = opts["validate_html"] unless opts["validate_html"].nil? - options[:check_external_hash] = opts["check_external_hash"] unless opts["check_external_hash"].nil? - - path = path.delete(' ').split(",") if opts["as-links"] + options[:href_ignore] = opts['href_ignore'] unless opts['href_ignore'].nil? + options[:alt_ignore] = opts['alt_ignore'] unless opts['alt_ignore'].nil? + options[:file_ignore] = opts['file_ignore'] unless opts['file_ignore'].nil? + options[:disable_external] = opts['disable_external'] unless opts['disable_external'].nil? + options[:only_4xx] = opts['only_4xx'] unless opts['only_4xx'].nil? + options[:favicon] = opts['favicon'] unless opts['favicon'].nil? + options[:verbose] = opts['verbose'] unless opts['verbose'].nil? + options[:directory_index_file] = opts['directory_index_file'] unless opts['directory_index_file'].nil? + options[:validate_html] = opts['validate_html'] unless opts['validate_html'].nil? + options[:check_external_hash] = opts['check_external_hash'] unless opts['check_external_hash'].nil? + + path = path.delete(' ').split(',') if opts['as-links'] HTML::Proofer.new(path, options).run end diff --git a/html-proofer.gemspec b/html-proofer.gemspec index 2214eb57..9f1b117b 100644 --- a/html-proofer.gemspec +++ b/html-proofer.gemspec @@ -1,31 +1,31 @@ # -*- encoding: utf-8 -*- -$:.push File.expand_path("../lib", __FILE__) +$LOAD_PATH.push File.expand_path('../lib', __FILE__) require 'html/proofer/version' Gem::Specification.new do |gem| - gem.name = "html-proofer" + gem.name = 'html-proofer' gem.version = HTML::Proofer::VERSION - gem.authors = ["Garen Torikian"] - gem.email = ["gjtorikian@gmail.com"] - gem.description = %q{Test your rendered HTML files to make sure they're accurate.} - gem.summary = %q{A set of tests to validate your HTML output. These tests check if your image references are legitimate, if they have alt tags, if your internal links are working, and so on. It's intended to be an all-in-one checker for your documentation output.} - gem.homepage = "https://github.com/gjtorikian/html-proofer" - gem.license = "MIT" - gem.executables = ["htmlproof"] + gem.authors = ['Garen Torikian'] + gem.email = ['gjtorikian@gmail.com'] + gem.description = %(Test your rendered HTML files to make sure they're accurate.) + gem.summary = %(A set of tests to validate your HTML output. These tests check if your image references are legitimate, if they have alt tags, if your internal links are working, and so on. It's intended to be an all-in-one checker for your documentation output.) + gem.homepage = 'https://github.com/gjtorikian/html-proofer' + gem.license = 'MIT' + gem.executables = ['htmlproof'] gem.files = `git ls-files`.split($/) gem.test_files = gem.files.grep(%r{^(spec)/}) - gem.require_paths = ["lib"] + gem.require_paths = ['lib'] - gem.add_dependency "mercenary", "~> 0.3.2" - gem.add_dependency "nokogiri", "~> 1.5" - gem.add_dependency "colored", "~> 1.2" - gem.add_dependency "typhoeus", "~> 0.6.7" - gem.add_dependency "yell", "~> 2.0" - gem.add_dependency "parallel", "~> 1.3" - gem.add_dependency "addressable", "~> 2.3" + gem.add_dependency 'mercenary', '~> 0.3.2' + gem.add_dependency 'nokogiri', '~> 1.5' + gem.add_dependency 'colored', '~> 1.2' + gem.add_dependency 'typhoeus', '~> 0.6.7' + gem.add_dependency 'yell', '~> 2.0' + gem.add_dependency 'parallel', '~> 1.3' + gem.add_dependency 'addressable', '~> 2.3' - gem.add_development_dependency "redcarpet" - gem.add_development_dependency "rspec", "~> 3.1.0" - gem.add_development_dependency "rake" - gem.add_development_dependency "awesome_print" + gem.add_development_dependency 'redcarpet' + gem.add_development_dependency 'rspec', '~> 3.1' + gem.add_development_dependency 'rake' + gem.add_development_dependency 'awesome_print' end diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 182f529f..78a25691 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -1,24 +1,24 @@ require 'nokogiri' require 'yell' require 'parallel' -require "addressable/uri" +require 'addressable/uri' begin - require "awesome_print" + require 'awesome_print' rescue LoadError; end -%w[ +%w( checkable checks issue version -].each { |r| require File.join(File.dirname(__FILE__), "proofer", r) } +).each { |r| require File.join(File.dirname(__FILE__), 'proofer', r) } module HTML def self.colorize(color, string) if $stdout.isatty && $stderr.isatty - Colored.colorize(string, :foreground => color) + Colored.colorize(string, foreground: color) else string end @@ -33,7 +33,7 @@ def initialize(src, opts={}) @src = src @proofer_opts = { - :ext => ".html", + :ext => '.html', :favicon => false, :href_swap => [], :href_ignore => [], @@ -43,7 +43,7 @@ def initialize(src, opts={}) :disable_external => false, :verbose => false, :only_4xx => false, - :directory_index_file => "index.html", + :directory_index_file => 'index.html', :validate_html => false, :error_sort => :path } @@ -59,16 +59,14 @@ def initialize(src, opts={}) # Typhoeus won't let you pass in any non-Typhoeus option; if the option is not # a proofer_opt, it must be for Typhoeus opts.keys.each do |key| - if @proofer_opts[key].nil? - @typhoeus_opts[key] = opts[key] - end + @typhoeus_opts[key] = opts[key] if @proofer_opts[key].nil? end @options = @proofer_opts.merge(@typhoeus_opts).merge(opts) @failed_tests = [] - Yell.new({ :format => false, :name => "HTML::Proofer", :level => "gte.#{log_level}" }) do |l| + Yell.new({ :format => false, :name => 'HTML::Proofer', :level => "gte.#{log_level}" }) do |l| l.adapter :stdout, level: [:debug, :info, :warn] l.adapter :stderr, level: [:error, :fatal] end @@ -78,14 +76,14 @@ def run unless @src.is_a? Array external_urls = {} - logger.info HTML::colorize :white, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" + logger.info HTML.colorize :white, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" results = Parallel.map(files, @parallel_opts) do |path| html = HTML::Proofer.create_nokogiri(path) - result = {:external_urls => {}, :failed_tests => []} + result = { :external_urls => {}, :failed_tests => [] } get_checks.each do |klass| - logger.debug HTML::colorize :blue, "Checking #{klass.to_s.downcase} on #{path} ..." + logger.debug HTML.colorize :blue, "Checking #{klass.to_s.downcase} on #{path} ..." check = Object.const_get(klass).new(@src, path, html, @options) check.run result[:external_urls].merge!(check.external_urls) @@ -101,19 +99,20 @@ def run external_link_checker(external_urls) unless @options[:disable_external] - logger.info HTML::colorize :green, "Ran on #{files.length} files!\n\n" + logger.info HTML.colorize :green, "Ran on #{files.length} files!\n\n" else - external_urls = Hash[*@src.map{ |s| [s, nil] }.flatten] + external_urls = Hash[*@src.map { |s| [s, nil] }.flatten] external_link_checker(external_urls) unless @options[:disable_external] end if @failed_tests.empty? - logger.info HTML::colorize :green, "HTML-Proofer finished successfully." + logger.info HTML.colorize :green, 'HTML-Proofer finished successfully.' else matcher = nil - # always sort by the actual option, then path, to ensure consistent alphabetical (by filename) results - @failed_tests = @failed_tests.sort do |a,b| + # always sort by the actual option, then path, to ensure consistent + # alphabetical (by filename) results + @failed_tests = @failed_tests.sort do |a, b| comp = (a.send(@options[:error_sort]) <=> b.send(@options[:error_sort])) comp.zero? ? (a.path <=> b.path) : comp end @@ -122,26 +121,26 @@ def run case @options[:error_sort] when :path if matcher != issue.path - logger.error HTML::colorize :blue, "- #{issue.path}" + logger.error HTML.colorize :blue, "- #{issue.path}" matcher = issue.path end - logger.error HTML::colorize :red, " * #{issue.desc}" + logger.error HTML.colorize :red, " * #{issue.desc}" when :desc if matcher != issue.desc - logger.error HTML::colorize :blue, "- #{issue.desc}" + logger.error HTML.colorize :blue, "- #{issue.desc}" matcher = issue.desc end - logger.error HTML::colorize :red, " * #{issue.path}" + logger.error HTML.colorize :red, " * #{issue.path}" when :status if matcher != issue.status - logger.error HTML::colorize :blue, "- #{issue.status}" + logger.error HTML.colorize :blue, "- #{issue.status}" matcher = issue.status end - logger.error HTML::colorize :red, " * #{issue.to_s}" + logger.error HTML.colorize :red, " * #{issue}" end end - raise HTML::colorize :red, "HTML-Proofer found #{@failed_tests.length} failures!" + fail HTML.colorize :red, "HTML-Proofer found #{@failed_tests.length} failures!" end end @@ -158,18 +157,18 @@ def external_link_checker(external_urls) Ethon.logger = logger # log from Typhoeus/Ethon external_urls.each_pair do |href, filenames| - if has_hash? href && @options[:check_external_hash] + if hash?(href) && @options[:check_external_hash] queue_request(:get, href, filenames) else queue_request(:head, href, filenames) end end - logger.debug HTML::colorize :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..." + logger.debug HTML.colorize :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..." hydra.run end def queue_request(method, href, filenames) - request = Typhoeus::Request.new(href, @typhoeus_opts.merge({:method => method})) + request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method })) request.on_complete { |response| response_handler(response, filenames) } hydra.queue request end @@ -187,11 +186,13 @@ def response_handler(response, filenames) if response_code.between?(200, 299) return if @options[:only_4xx] return unless @options[:check_external_hash] - if hash = has_hash?(href) + if hash = hash?(href) body_doc = Nokogiri::HTML(response.body) # user-content is a special addition by GitHub. - xpath = %$//*[@name="#{hash}"]|//*[@id="#{hash}"]$ - xpath << %$|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"]$ if URI.parse(href).host.match(/github\.com/i) + xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"]) + if URI.parse(href).host.match(/github\.com/i) + xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"]) + end if body_doc.xpath(xpath).empty? add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response_code end @@ -202,7 +203,7 @@ def response_handler(response, filenames) elsif (response_code == 405 || response_code == 420 || response_code == 503) && method == :head # 420s usually come from rate limiting; let's ignore the query and try just the path with a GET uri = URI(href) - queue_request(:get, uri.scheme + "://" + uri.host + uri.path, filenames) + queue_request(:get, uri.scheme + '://' + uri.host + uri.path, filenames) # just be lazy; perform an explicit get request. some servers are apparently not configured to # intercept HTTP HEAD elsif method == :head @@ -220,7 +221,7 @@ def hydra def files if File.directory? @src - pattern = File.join @src, "**", "*#{@options[:ext]}" + pattern = File.join @src, '**', "*#{@options[:ext]}" files = Dir.glob(pattern).select { |fn| File.file? fn } files.reject { |f| ignore_file?(f) } elsif File.extname(@src) == @options[:ext] @@ -248,18 +249,16 @@ def self.create_nokogiri(path) end def get_checks - checks = HTML::Proofer::Checks::Check.subclasses.map { |c| c.name } - checks.delete("Favicons") unless @options[:favicon] - checks.delete("Html") unless @options[:validate_html] + checks = HTML::Proofer::Checks::Check.subclasses.map(&:name) + checks.delete('Favicons') unless @options[:favicon] + checks.delete('Html') unless @options[:validate_html] checks end - def has_hash?(url) - begin - URI.parse(url).fragment + def hash?(url) + URI.parse(url).fragment rescue URI::InvalidURIError nil - end end def log_level @@ -268,11 +267,9 @@ def log_level def add_failed_tests(filenames, desc, status = nil) if filenames.nil? - @failed_tests << Checks::Issue.new("", desc, status) - elsif - filenames.each { |f| - @failed_tests << Checks::Issue.new(f, desc, status) - } + @failed_tests << Checks::Issue.new('', desc, status) + else + filenames.each { |f| @failed_tests << Checks::Issue.new(f, desc, status) } end end diff --git a/lib/html/proofer/check.rb b/lib/html/proofer/check.rb index 513b08de..50f9eb30 100644 --- a/lib/html/proofer/check.rb +++ b/lib/html/proofer/check.rb @@ -23,7 +23,7 @@ def initialize(src, path, html, opts={}) end def run - raise NotImplementedError.new("HTML::Proofer::Check subclasses must implement #run") + fail NotImplementedError.new('HTML::Proofer::Check subclasses must implement #run') end def add_issue(desc, status = -1) @@ -31,7 +31,7 @@ def add_issue(desc, status = -1) end def output_filenames - Dir[@site.config[:output_dir] + '/**/*'].select{ |f| File.file?(f) } + Dir[@site.config[:output_dir] + '/**/*'].select { |f| File.file?(f) } end def add_to_external_urls(href) @@ -56,7 +56,7 @@ def self.subclasses private def remove_ignored(html) - html.css("code, pre").each { |node| node.unlink } + html.css('code, pre').each(&:unlink) html end diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb index dc4caf9c..5f4fbc83 100644 --- a/lib/html/proofer/checkable.rb +++ b/lib/html/proofer/checkable.rb @@ -22,13 +22,13 @@ def initialize(obj, type, check) end # fix up missing protocols - @href.insert 0, "http:" if @href =~ /^\/\// - @src.insert 0, "http:" if @src =~ /^\/\// + @href.insert 0, 'http:' if @href =~ %r{^//} + @src.insert 0, 'http:' if @src =~ %r{^//} end def url - @src || @href || "" + @src || @href || '' end def valid? @@ -42,15 +42,15 @@ def parts end def path - parts.path if !parts.nil? + parts.path unless parts.nil? end def hash - parts.fragment if !parts.nil? + parts.fragment unless parts.nil? end def scheme - parts.scheme if !parts.nil? + parts.scheme unless parts.nil? end # path is to an external server @@ -66,11 +66,11 @@ def ignore? return true if @data_ignore_proofer case @type - when "favicon" + when 'favicon' return true if url.match(/^data:image/) - when "link" + when 'link' return true if ignores_pattern_check(@check.additional_href_ignores) - when "image" + when 'image' return true if url.match(/^data:image/) return true if ignores_pattern_check(@check.additional_alt_ignores) end @@ -83,7 +83,7 @@ def external? # path is an anchor or a query def internal? - url.start_with? "#", "?" + url.start_with? '#', '?' end def file_path @@ -102,7 +102,7 @@ def file_path file = File.join base, path # implicit index support - if File.directory? file and !unslashed_directory? file + if File.directory?(file) && !unslashed_directory?(file) file = File.join file, @check.options[:directory_index_file] end @@ -111,7 +111,7 @@ def file_path # checks if a file exists relative to the current pwd def exists? - return @checked_paths[absolute_path] if @checked_paths.has_key? absolute_path + return @checked_paths[absolute_path] if @checked_paths.key? absolute_path @checked_paths[absolute_path] = File.exist? absolute_path end @@ -132,8 +132,8 @@ def ignores_pattern_check(links) false end - def unslashed_directory? file - File.directory? file and !file.end_with? File::SEPARATOR and !@check.options[:followlocation] + def unslashed_directory?(file) + File.directory?(file) && !file.end_with?(File::SEPARATOR) && !@check.options[:followlocation] end end end diff --git a/lib/html/proofer/checks.rb b/lib/html/proofer/checks.rb index 9e9f39f7..7d5c599b 100644 --- a/lib/html/proofer/checks.rb +++ b/lib/html/proofer/checks.rb @@ -2,13 +2,13 @@ module HTML class Proofer class Checks [ - "issue", - "check", - "checks/images", - "checks/links", - "checks/scripts", - "checks/favicon", - "checks/html" + 'issue', + 'check', + 'checks/images', + 'checks/links', + 'checks/scripts', + 'checks/favicon', + 'checks/html' ].each { |r| require File.join(File.dirname(__FILE__), r) } end end diff --git a/lib/html/proofer/checks/favicon.rb b/lib/html/proofer/checks/favicon.rb index f72f8efa..a079346c 100644 --- a/lib/html/proofer/checks/favicon.rb +++ b/lib/html/proofer/checks/favicon.rb @@ -9,13 +9,13 @@ def rel class Favicons < ::HTML::Proofer::Checks::Check def run - @html.xpath("//link[not(ancestor::pre or ancestor::code)]").each do |favicon| + @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |favicon| favicon = Favicon.new favicon, "favicon", self next if favicon.ignore? - return if favicon.rel.split(" ").last.eql? "icon" + return if favicon.rel.split(' ').last.eql? 'icon' end - self.add_issue "no favicon specified" + add_issue 'no favicon specified' end end diff --git a/lib/html/proofer/checks/html.rb b/lib/html/proofer/checks/html.rb index 85be90d8..ed2590ec 100644 --- a/lib/html/proofer/checks/html.rb +++ b/lib/html/proofer/checks/html.rb @@ -3,22 +3,21 @@ class Html < ::HTML::Proofer::Checks::Check # new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp) - HTML5_TAGS = %w(article aside bdi details dialog figcaption - figure footer header main mark menuitem meter - nav progress rp rt ruby section summary - time wbr datalist keygen output color date - datetime datetime-local email month number - range search tel time url week canvas + HTML5_TAGS = %w(article aside bdi details dialog figcaption + figure footer header main mark menuitem meter + nav progress rp rt ruby section summary + time wbr datalist keygen output color date + datetime datetime-local email month number + range search tel time url week canvas svg audio embed source track video) def run @html.errors.each do |e| + # Nokogiri (or rather libxml2 underhood) only recognizes html4 tags, + # so we need to skip errors caused by the new tags in html5 + next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1] - # Nokogiri (or rather libxml2 underhood) only recognizes html4 tags, - # so we need to skip errors caused by the new tags in html5 - next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1] - - self.add_issue(e.to_s) + add_issue(e.to_s) end end end diff --git a/lib/html/proofer/checks/images.rb b/lib/html/proofer/checks/images.rb index a631b242..a3c923bc 100644 --- a/lib/html/proofer/checks/images.rb +++ b/lib/html/proofer/checks/images.rb @@ -5,7 +5,7 @@ class Image < ::HTML::Proofer::Checkable SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/ def valid_alt_tag? - @alt and !@alt.empty? + @alt && !@alt.empty? end def terrible_filename? @@ -24,27 +24,27 @@ def missing_src? class Images < ::HTML::Proofer::Checks::Check def run - @html.css("img").each do |i| - img = Image.new i, "image", self + @html.css('img').each do |i| + img = Image.new i, 'image', self next if img.ignore? # screenshot filenames should return because of terrible names - next self.add_issue "image has a terrible filename (#{img.src})" if img.terrible_filename? + next add_issue "image has a terrible filename (#{img.src})" if img.terrible_filename? # does the image exist? if img.missing_src? - self.add_issue "image has no src attribute" + add_issue 'image has no src attribute' else if img.remote? add_to_external_urls img.src else - self.add_issue("internal image #{img.src} does not exist") unless img.exists? + add_issue("internal image #{img.src} does not exist") unless img.exists? end end # check alt tag - self.add_issue "image #{img.src} does not have an alt attribute" unless img.valid_alt_tag? + add_issue "image #{img.src} does not have an alt attribute" unless img.valid_alt_tag? end external_urls diff --git a/lib/html/proofer/checks/links.rb b/lib/html/proofer/checks/links.rb index 87ef252c..64a4e54e 100644 --- a/lib/html/proofer/checks/links.rb +++ b/lib/html/proofer/checks/links.rb @@ -15,7 +15,7 @@ def name end def missing_href? - href.nil? and name.nil? and id.nil? + href.nil? && name.nil? && id.nil? end def placeholder? @@ -33,8 +33,8 @@ def real_attr(attr) class Links < ::HTML::Proofer::Checks::Check def run - @html.css("a, link").each do |l| - link = Link.new l, "link", self + @html.css('a, link').each do |l| + link = Link.new l, 'link', self next if link.ignore? next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse @@ -42,22 +42,22 @@ def run # is it even a valid URL? unless link.valid? - self.add_issue "#{link.href} is an invalid URL" + add_issue "#{link.href} is an invalid URL" next end - if link.scheme == "mailto" - self.add_issue "#{link.href} contains no email address" if link.path.empty? - self.add_issue "#{link.href} contain an invalid email address" unless link.path.include?("@") + if link.scheme == 'mailto' + add_issue "#{link.href} contains no email address" if link.path.empty? + add_issue "#{link.href} contain an invalid email address" unless link.path.include?('@') end - if link.scheme == "tel" - self.add_issue "#{link.href} contains no phone number" if link.path.empty? + if link.scheme == 'tel' + add_issue "#{link.href} contains no phone number" if link.path.empty? end # is there even a href? if link.missing_href? - self.add_issue("anchor has no href attribute") + add_issue('anchor has no href attribute') next end @@ -69,26 +69,23 @@ def run add_to_external_urls link.href next elsif !link.internal? - self.add_issue "internally linking to #{link.href}, which does not exist" unless link.exists? + add_issue "internally linking to #{link.href}, which does not exist" unless link.exists? end # does the local directory have a trailing slash? if link.unslashed_directory? link.absolute_path - self.add_issue("internally linking to a directory #{link.absolute_path} without trailing slash") + add_issue("internally linking to a directory #{link.absolute_path} without trailing slash") next end # verify the target hash if link.hash if link.internal? - self.add_issue "linking to internal hash ##{link.hash} that does not exist" unless hash_check @html, link.hash - elsif link.external? - unless link.exists? - self.add_issue "trying to find hash of #{link.href}, but #{link.absolute_path} does not exist" - else - target_html = HTML::Proofer.create_nokogiri link.absolute_path - self.add_issue "linking to #{link.href}, but #{link.hash} does not exist" unless hash_check target_html, link.hash + unless hash_check @html, link.hash + add_issue "linking to internal hash ##{link.hash} that does not exist" end + elsif link.external? + external_link_check(link) end end end @@ -96,6 +93,17 @@ def run external_urls end + def external_link_check(link) + if !link.exists? + add_issue "trying to find hash of #{link.href}, but #{link.absolute_path} does not exist" + else + target_html = HTML::Proofer.create_nokogiri link.absolute_path + unless hash_check target_html, link.hash + add_issue "linking to #{link.href}, but #{link.hash} does not exist" + end + end + end + def hash_check(html, href_hash) html.xpath("//*[@id='#{href_hash}']", "//*[@name='#{href_hash}']").length > 0 end diff --git a/lib/html/proofer/checks/scripts.rb b/lib/html/proofer/checks/scripts.rb index e24bf126..02f03d86 100644 --- a/lib/html/proofer/checks/scripts.rb +++ b/lib/html/proofer/checks/scripts.rb @@ -18,19 +18,19 @@ def blank? class Scripts < ::HTML::Proofer::Checks::Check def run - @html.css("script").each do |s| - script = Script.new s, "script", self + @html.css('script').each do |s| + script = Script.new s, 'script', self next if script.ignore? next unless script.blank? # does the script exist? if script.missing_src? - self.add_issue "script is empty and has no src attribute" + add_issue 'script is empty and has no src attribute' elsif script.remote? add_to_external_urls script.src else - self.add_issue("internal script #{script.src} does not exist") unless script.exists? + add_issue("internal script #{script.src} does not exist") unless script.exists? end end diff --git a/lib/html/proofer/issue.rb b/lib/html/proofer/issue.rb index dd3ffc96..5ccd31cb 100644 --- a/lib/html/proofer/issue.rb +++ b/lib/html/proofer/issue.rb @@ -14,7 +14,7 @@ def initialize(path, desc, status = -1) end def to_s - "#{HTML::colorize(:blue, @path)}: #{desc}" + "#{HTML.colorize(:blue, @path)}: #{desc}" end end diff --git a/lib/html/proofer/version.rb b/lib/html/proofer/version.rb index 9e5a39b6..d5ff4e4a 100644 --- a/lib/html/proofer/version.rb +++ b/lib/html/proofer/version.rb @@ -1,5 +1,5 @@ module HTML class Proofer - VERSION = "1.6.0" + VERSION = '1.6.0' end end diff --git a/spec/html/proofer/favicon_spec.rb b/spec/html/proofer/favicon_spec.rb index 305f3a2b..932a224d 100644 --- a/spec/html/proofer/favicon_spec.rb +++ b/spec/html/proofer/favicon_spec.rb @@ -1,47 +1,47 @@ -require "spec_helper" +require 'spec_helper' -describe "Favicons test" do - it "ignores for absent favicon by default" do +describe 'Favicons test' do + it 'ignores for absent favicon by default' do absent = "#{FIXTURES_DIR}/favicon/favicon_absent.html" expect(make_proofer(absent).failed_tests).to eq [] end - it "fails for absent favicon" do + it 'fails for absent favicon' do absent = "#{FIXTURES_DIR}/favicon/favicon_absent.html" - proofer = make_proofer(absent, {:favicon => true}) - expect(proofer.failed_tests.first).to match /no favicon specified/ + proofer = make_proofer(absent, { :favicon => true }) + expect(proofer.failed_tests.first).to match(/no favicon specified/) end - it "fails for absent favicon but present apple touch icon" do + it 'fails for absent favicon but present apple touch icon' do absent = "#{FIXTURES_DIR}/favicon/favicon_absent_apple.html" - proofer = make_proofer(absent, {:favicon => true}) + proofer = make_proofer(absent, { :favicon => true }) # Travis gives a different error message here for some reason - expect(proofer.failed_tests.last).to match /(internally linking to gpl.png, which does not exist|no favicon specified)/ + expect(proofer.failed_tests.last).to match(/(internally linking to gpl.png, which does not exist|no favicon specified)/) end - it "fails for broken favicon" do + it 'fails for broken favicon' do broken = "#{FIXTURES_DIR}/favicon/favicon_broken.html" - proofer = make_proofer(broken, {:favicon => true}) + proofer = make_proofer(broken, { :favicon => true }) - expect(proofer.failed_tests.first).to match /internally linking to asdadaskdalsdk.png/ + expect(proofer.failed_tests.first).to match(/internally linking to asdadaskdalsdk.png/) end - it "passes for present favicon" do + it 'passes for present favicon' do present = "#{FIXTURES_DIR}/favicon/favicon_present.html" - proofer = make_proofer(present, {:favicon => true}) + proofer = make_proofer(present, { :favicon => true }) expect(proofer.failed_tests).to eq [] end - it "passes for present favicon with shortcut notation" do + it 'passes for present favicon with shortcut notation' do present = "#{FIXTURES_DIR}/favicon/favicon_present_shortcut.html" - proofer = make_proofer(present, {:favicon => true}) + proofer = make_proofer(present, { :favicon => true }) expect(proofer.failed_tests).to eq [] end - it "fails for broken favicon with data-proofer-ignore" do + it 'fails for broken favicon with data-proofer-ignore' do broken_but_ignored = "#{FIXTURES_DIR}/favicon/favicon_broken_but_ignored.html" - proofer = make_proofer(broken_but_ignored, {:favicon => true}) - expect(proofer.failed_tests.first).to match /no favicon specified/ + proofer = make_proofer(broken_but_ignored, { :favicon => true }) + expect(proofer.failed_tests.first).to match(/no favicon specified/) end end diff --git a/spec/html/proofer/html_spec.rb b/spec/html/proofer/html_spec.rb index 154089b5..ca2d897f 100644 --- a/spec/html/proofer/html_spec.rb +++ b/spec/html/proofer/html_spec.rb @@ -1,51 +1,51 @@ -require "spec_helper" +require 'spec_helper' -describe "Html test" do - it "ignores an invalid tag by default" do +describe 'Html test' do + it 'ignores an invalid tag by default' do html = "#{FIXTURES_DIR}/html/invalid_tag.html" output = capture_stderr { HTML::Proofer.new(html).run } - expect(output).to eq "" + expect(output).to eq '' end it "doesn't fail for html5 tags" do html = "#{FIXTURES_DIR}/html/html5_tags.html" - output = capture_stderr { HTML::Proofer.new(html, {:validate_html => true}).run } - expect(output).to eq "" + output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } + expect(output).to eq '' end - it "fails for an invalid tag" do + it 'fails for an invalid tag' do html = "#{FIXTURES_DIR}/html/invalid_tag.html" - output = capture_stderr { HTML::Proofer.new(html, {:validate_html => true}).run } - expect(output).to match /Tag myfancytag invalid/ + output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } + expect(output).to match(/Tag myfancytag invalid/) end - it "fails for an unmatched end tag" do + it 'fails for an unmatched end tag' do html = "#{FIXTURES_DIR}/html/unmatched_end_tag.html" - output = capture_stderr { HTML::Proofer.new(html, {:validate_html => true}).run } - expect(output).to match /Unexpected end tag : div/ + output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } + expect(output).to match(/Unexpected end tag : div/) end - it "fails for an unescaped ampersand in attribute" do + it 'fails for an unescaped ampersand in attribute' do html = "#{FIXTURES_DIR}/html/unescaped_ampersand_in_attribute.html" - output = capture_stderr { HTML::Proofer.new(html, {:validate_html => true}).run } - expect(output).to match /htmlParseEntityRef: expecting ';'/ + output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } + expect(output).to match(/htmlParseEntityRef: expecting ';'/) end - it "fails for mismatch between opening and ending tag" do + it 'fails for mismatch between opening and ending tag' do html = "#{FIXTURES_DIR}/html/opening_and_ending_tag_mismatch.html" - output = capture_stderr { HTML::Proofer.new(html, {:validate_html => true}).run } - expect(output).to match /Opening and ending tag mismatch: p and strong/ + output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } + expect(output).to match(/Opening and ending tag mismatch: p and strong/) end - it "fails for div inside head" do + it 'fails for div inside head' do html = "#{FIXTURES_DIR}/html/div_inside_head.html" - output = capture_stderr { HTML::Proofer.new(html, {:validate_html => true}).run } - expect(output).to match /Unexpected end tag : head/ + output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } + expect(output).to match(/Unexpected end tag : head/) end - it "fails for missing closing quotation mark in href" do + it 'fails for missing closing quotation mark in href' do html = "#{FIXTURES_DIR}/html/missing_closing_quotes.html" - output = capture_stderr { HTML::Proofer.new(html, {:validate_html => true}).run } - expect(output).to match /Couldn't find end of Start Tag a/ + output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } + expect(output).to match(/Couldn't find end of Start Tag a/) end end diff --git a/spec/html/proofer/images_spec.rb b/spec/html/proofer/images_spec.rb index 37835aca..9269e038 100644 --- a/spec/html/proofer/images_spec.rb +++ b/spec/html/proofer/images_spec.rb @@ -1,46 +1,46 @@ -require "spec_helper" +require 'spec_helper' -describe "Images test" do - it "passes for existing external images" do +describe 'Images test' do + it 'passes for existing external images' do externalImageFilepath = "#{FIXTURES_DIR}/images/existingImageExternal.html" proofer = make_proofer(externalImageFilepath) expect(proofer.failed_tests).to eq [] end - it "fails for image without alt attribute" do + it 'fails for image without alt attribute' do missingAltFilepath = "#{FIXTURES_DIR}/images/missingImageAlt.html" proofer = make_proofer(missingAltFilepath) - expect(proofer.failed_tests.first).to match /gpl.png does not have an alt attribute/ + expect(proofer.failed_tests.first).to match(/gpl.png does not have an alt attribute/) end - it "fails for image with an empty alt attribute" do + it 'fails for image with an empty alt attribute' do missingAltFilepath = "#{FIXTURES_DIR}/images/missingImageAltText.html" proofer = make_proofer(missingAltFilepath) - expect(proofer.failed_tests.first).to match /gpl.png does not have an alt attribute/ + expect(proofer.failed_tests.first).to match(/gpl.png does not have an alt attribute/) end - it "fails for missing external images" do + it 'fails for missing external images' do externalImageFilepath = "#{FIXTURES_DIR}/images/missingImageExternal.html" proofer = make_proofer(externalImageFilepath) - expect(proofer.failed_tests.first).to match /External link http:\/\/www.whatthehell\/? failed: 0 Couldn't resolve host/ + expect(proofer.failed_tests.first).to match(%r{External link http://www.whatthehell/ failed: 0 Couldn't resolve host}) end - it "fails for missing internal images" do + it 'fails for missing internal images' do internalImageFilepath = "#{FIXTURES_DIR}/images/missingImageInternal.html" proofer = make_proofer(internalImageFilepath) - expect(proofer.failed_tests.first).to match /doesnotexist.png does not exist/ + expect(proofer.failed_tests.first).to match(/doesnotexist.png does not exist/) end - it "fails for image with no src" do + it 'fails for image with no src' do imageSrcFilepath = "#{FIXTURES_DIR}/images/missingImageSrc.html" proofer = make_proofer(imageSrcFilepath) - expect(proofer.failed_tests.first).to match /image has no src attribute/ + expect(proofer.failed_tests.first).to match(/image has no src attribute/) end - it "fails for image with default mac filename" do + it 'fails for image with default mac filename' do terribleImageName = "#{FIXTURES_DIR}/images/terribleImageName.html" proofer = make_proofer(terribleImageName) - expect(proofer.failed_tests.first).to match /image has a terrible filename/ + expect(proofer.failed_tests.first).to match(/image has a terrible filename/) end it 'ignores images marked as ignore data-proofer-ignore' do @@ -65,16 +65,16 @@ expect(proofer.failed_tests).to eq [] end - it "works for valid images missing the protocol" do + it 'works for valid images missing the protocol' do missingProtocolLink = "#{FIXTURES_DIR}/images/image_missing_protocol_valid.html" proofer = make_proofer(missingProtocolLink) expect(proofer.failed_tests).to eq [] end - it "fails for invalid images missing the protocol" do + it 'fails for invalid images missing the protocol' do missingProtocolLink = "#{FIXTURES_DIR}/images/image_missing_protocol_invalid.html" proofer = make_proofer(missingProtocolLink) - expect(proofer.failed_tests.first).to match /404 No error/ + expect(proofer.failed_tests.first).to match(/404 No error/) end it 'properly checks relative links' do @@ -92,7 +92,7 @@ it 'properly ignores missing alt tags, but not all URLs, when asked' do ignorableLinks = "#{FIXTURES_DIR}/images/ignoreAltButNotLink.html" proofer = make_proofer(ignorableLinks, {:alt_ignore => [/.*/]}) - expect(proofer.failed_tests.first).to match /Couldn't resolve host name/ + expect(proofer.failed_tests.first).to match(/Couldn't resolve host name/) expect(proofer.failed_tests.first).to_not match /does not have an alt attribute/ end end diff --git a/spec/html/proofer/links_spec.rb b/spec/html/proofer/links_spec.rb index 902e78ec..185abcdd 100644 --- a/spec/html/proofer/links_spec.rb +++ b/spec/html/proofer/links_spec.rb @@ -1,72 +1,72 @@ -require "spec_helper" +require 'spec_helper' -describe "Links test" do +describe 'Links test' do - it "fails for broken internal hash (even if the file exists)" do + it 'fails for broken internal hash (even if the file exists)' do brokenHashExternalFilepath = "#{FIXTURES_DIR}/links/brokenHashExternal.html" proofer = make_proofer(brokenHashExternalFilepath) - expect(proofer.failed_tests.last).to match /linking to ..\/images\/missingImageAlt.html#asdfasfdkafl, but asdfasfdkafl does not exist/ + expect(proofer.failed_tests.last).to match(%r{linking to ../images/missingImageAlt.html#asdfasfdkafl, but asdfasfdkafl does not exist}) end - it "fails for broken hashes on the web when asked (even if the file exists)" do + it 'fails for broken hashes on the web when asked (even if the file exists)' do brokenHashOnTheWeb = "#{FIXTURES_DIR}/links/brokenHashOnTheWeb.html" proofer = make_proofer(brokenHashOnTheWeb, { :check_external_hash => true} ) - expect(proofer.failed_tests.first).to match /but the hash 'no' does not/ + expect(proofer.failed_tests.first).to match(/but the hash 'no' does not/) end - it "passes for broken hashes on the web when ignored (even if the file exists)" do + it 'passes for broken hashes on the web when ignored (even if the file exists)' do brokenHashOnTheWeb = "#{FIXTURES_DIR}/links/brokenHashOnTheWeb.html" proofer = make_proofer(brokenHashOnTheWeb) expect(proofer.failed_tests).to eq [] end - it "passes for GitHub hashes on the web when asked" do + it 'passes for GitHub hashes on the web when asked' do githubHash = "#{FIXTURES_DIR}/links/githubHash.html" proofer = make_proofer(githubHash) expect(proofer.failed_tests).to eq [] end - it "passes for broken hashes on the web (when we look only for 4xx)" do + it 'passes for broken hashes on the web (when we look only for 4xx)' do options = { :only_4xx => true } brokenHashOnTheWeb = "#{FIXTURES_DIR}/links/brokenHashOnTheWeb.html" proofer = make_proofer(brokenHashOnTheWeb, options) expect(proofer.failed_tests).to eq [] end - it "fails for broken internal hash" do + it 'fails for broken internal hash' do brokenHashInternalFilepath = "#{FIXTURES_DIR}/links/brokenHashInternal.html" proofer = make_proofer(brokenHashInternalFilepath) - expect(proofer.failed_tests.first).to match /linking to internal hash #noHash that does not exist/ + expect(proofer.failed_tests.first).to match(/linking to internal hash #noHash that does not exist/) end - it "fails for broken external links" do + it 'fails for broken external links' do brokenLinkExternalFilepath = "#{FIXTURES_DIR}/links/brokenLinkExternal.html" proofer = make_proofer(brokenLinkExternalFilepath) - expect(proofer.failed_tests.first).to match /External link http:\/\/www.asdo3IRJ395295jsingrkrg4.com\/? failed: 0 Couldn't resolve host name/ + expect(proofer.failed_tests.first).to match(%r{External link http://www.asdo3IRJ395295jsingrkrg4.com/ failed: 0 Couldn't resolve host name}) end - it "fails for broken internal links" do + it 'fails for broken internal links' do brokenLinkInternalFilepath = "#{FIXTURES_DIR}/links/brokenLinkInternal.html" proofer = make_proofer(brokenLinkInternalFilepath) - expect(proofer.failed_tests.first).to match /internally linking to .\/notreal.html, which does not exist/ + expect(proofer.failed_tests.first).to match(/internally linking to .\/notreal.html, which does not exist/) end - it "fails for link with no href" do + it 'fails for link with no href' do missingLinkHrefFilepath = "#{FIXTURES_DIR}/links/missingLinkHref.html" proofer = make_proofer(missingLinkHrefFilepath) - expect(proofer.failed_tests.first).to match /anchor has no href attribute/ + expect(proofer.failed_tests.first).to match(/anchor has no href attribute/) end - it "should follow redirects" do + it 'should follow redirects' do linkWithRedirectFilepath = "#{FIXTURES_DIR}/links/linkWithRedirect.html" proofer = make_proofer(linkWithRedirectFilepath) expect(proofer.failed_tests).to eq [] end - it "fails on redirects if not following" do + it 'fails on redirects if not following' do linkWithRedirectFilepath = "#{FIXTURES_DIR}/links/linkWithRedirect.html" proofer = make_proofer(linkWithRedirectFilepath, { :followlocation => false }) - expect(proofer.failed_tests.first).to match /failed: 301 No error/ + expect(proofer.failed_tests.first).to match(/failed: 301 No error/) end it "does not fail on redirects we're not following" do @@ -76,16 +76,16 @@ expect(proofer.failed_tests).to eq [] end - it "should understand https" do + it 'should understand https' do linkWithHttpsFilepath = "#{FIXTURES_DIR}/links/linkWithHttps.html" proofer = make_proofer(linkWithHttpsFilepath) expect(proofer.failed_tests).to eq [] end - it "fails for broken hash links with status code numbers" do + it 'fails for broken hash links with status code numbers' do brokenLinkWithNumberFilepath = "#{FIXTURES_DIR}/links/brokenLinkWithNumber.html" proofer = make_proofer(brokenLinkWithNumberFilepath) - expect(proofer.failed_tests.first).to match /linking to internal hash #25-method-not-allowed that does not exist/ + expect(proofer.failed_tests.first).to match(/linking to internal hash #25-method-not-allowed that does not exist/) end it 'properly resolves implicit /index.html in link paths' do @@ -133,7 +133,7 @@ it 'finds a mix of broken and unbroken links' do multipleProblems = "#{FIXTURES_DIR}/links/multipleProblems.html" proofer = make_proofer(multipleProblems) - expect(proofer.failed_tests.first).to match /linking to internal hash #anadaasdadsadschor that does not exist/ + expect(proofer.failed_tests.first).to match(/linking to internal hash #anadaasdadsadschor that does not exist/) end it 'ignores valid mailto links' do @@ -142,10 +142,10 @@ expect(proofer.failed_tests).to eq [] end - it "fails for blank mailto links" do + it 'fails for blank mailto links' do blankMailToLink = "#{FIXTURES_DIR}/links/blank_mailto_link.html" proofer = make_proofer(blankMailToLink) - expect(proofer.failed_tests.first).to match /mailto: contains no email address/ + expect(proofer.failed_tests.first).to match(/mailto: contains no email address/) end it 'ignores valid tel links' do @@ -154,10 +154,10 @@ expect(proofer.failed_tests).to eq [] end - it "fails for blank tel links" do + it 'fails for blank tel links' do blankTelLink = "#{FIXTURES_DIR}/links/blank_tel_link.html" proofer = make_proofer(blankTelLink) - expect(proofer.failed_tests.first).to match /tel: contains no phone number/ + expect(proofer.failed_tests.first).to match(/tel: contains no phone number/) end it 'ignores javascript links' do @@ -166,73 +166,73 @@ expect(proofer.failed_tests).to eq [] end - it "works for valid links missing the protocol" do + it 'works for valid links missing the protocol' do missingProtocolLink = "#{FIXTURES_DIR}/links/link_missing_protocol_valid.html" proofer = make_proofer(missingProtocolLink) expect(proofer.failed_tests).to eq [] end - it "fails for invalid links missing the protocol" do + it 'fails for invalid links missing the protocol' do missingProtocolLink = "#{FIXTURES_DIR}/links/link_missing_protocol_invalid.html" proofer = make_proofer(missingProtocolLink) - expect(proofer.failed_tests.first).to match /Couldn't resolve host name/ + expect(proofer.failed_tests.first).to match(/Couldn't resolve host name/) end - it "works for valid href within link elements" do + it 'works for valid href within link elements' do head_link = "#{FIXTURES_DIR}/links/head_link_href.html" proofer = make_proofer(head_link) expect(proofer.failed_tests).to eq [] end - it "fails for empty href within link elements" do + it 'fails for empty href within link elements' do head_link = "#{FIXTURES_DIR}/links/head_link_href_empty.html" proofer = make_proofer(head_link) - expect(proofer.failed_tests.first).to match /anchor has no href attribute/ + expect(proofer.failed_tests.first).to match(/anchor has no href attribute/) end - it "fails for absent href within link elements" do + it 'fails for absent href within link elements' do head_link = "#{FIXTURES_DIR}/links/head_link_href_absent.html" proofer = make_proofer(head_link) - expect(proofer.failed_tests.first).to match /anchor has no href attribute/ + expect(proofer.failed_tests.first).to match(/anchor has no href attribute/) end - it "fails for internal linking to a directory without trailing slash" do + it 'fails for internal linking to a directory without trailing slash' do options = { :followlocation => false } internal = "#{FIXTURES_DIR}/links/link_directory_without_slash.html" proofer = make_proofer(internal, options) - expect(proofer.failed_tests.first).to match /without trailing slash/ + expect(proofer.failed_tests.first).to match(/without trailing slash/) end - it "works for array of links" do + it 'works for array of links' do proofer = make_proofer(["www.github.com", "foofoofoo.biz"]) - expect(proofer.failed_tests.first).to match /foofoofoo.biz\/? failed: 0 Couldn't resolve host name/ + expect(proofer.failed_tests.first).to match(/foofoofoo.biz\/\)? failed: 0 Couldn't resolve host name/) end - it "works for broken anchors within pre" do + it 'works for broken anchors within pre' do anchor_pre = "#{FIXTURES_DIR}/links/anchors_in_pre.html" proofer = make_proofer(anchor_pre) expect(proofer.failed_tests).to eq [] end - it "works for broken link within pre" do + it 'works for broken link within pre' do link_pre = "#{FIXTURES_DIR}/links/links_in_pre.html" proofer = make_proofer(link_pre) expect(proofer.failed_tests).to eq [] end - it "works for pipes in the URL" do + it 'works for pipes in the URL' do escape_pipes = "#{FIXTURES_DIR}/links/escape_pipes.html" proofer = make_proofer(escape_pipes) expect(proofer.failed_tests).to eq [] end - it "fails for broken hash with query" do + it 'fails for broken hash with query' do broken_hash = "#{FIXTURES_DIR}/links/broken_hash_with_query.html" proofer = make_proofer(broken_hash) - expect(proofer.failed_tests.first).to match /linking to internal hash #example that does not exist/ + expect(proofer.failed_tests.first).to match(/linking to internal hash #example that does not exist/) end - it "works for directory index file" do + it 'works for directory index file' do options = { :directory_index_file => "index.php" } link_pointing_to_directory = "#{FIXTURES_DIR}/links/link_pointing_to_directory.html" proofer = make_proofer(link_pointing_to_directory, options) @@ -246,62 +246,62 @@ expect(proofer.failed_tests.first).to match "internally linking to folder-php/, which does not exist" end - it "ensures Typhoeus options are passed" do + it 'ensures Typhoeus options are passed' do options = { ssl_verifypeer: false } typhoeus_options_link = "#{FIXTURES_DIR}/links/ensure_typhoeus_options.html" proofer = make_proofer(typhoeus_options_link, options) expect(proofer.failed_tests).to eq [] end - it "works if subdirectory ends with .html" do + it 'works if subdirectory ends with .html' do with_subdirectory_html = "#{FIXTURES_DIR}/links/_site" proofer = make_proofer(with_subdirectory_html) expect(proofer.failed_tests).to eq [] end - it "works for hash referring to itself" do + it 'works for hash referring to itself' do hashReferringToSelf = "#{FIXTURES_DIR}/links/hashReferringToSelf.html" proofer = make_proofer(hashReferringToSelf) expect(proofer.failed_tests).to eq [] end - it "ignores placeholder with name" do + it 'ignores placeholder with name' do placeholder_with_name = "#{FIXTURES_DIR}/links/placeholder_with_name.html" proofer = make_proofer(placeholder_with_name) expect(proofer.failed_tests).to eq [] end - it "ignores placeholder with id" do + it 'ignores placeholder with id' do placeholder_with_id = "#{FIXTURES_DIR}/links/placeholder_with_id.html" proofer = make_proofer(placeholder_with_id) expect(proofer.failed_tests).to eq [] end - it "fails for placeholder with empty id" do + it 'fails for placeholder with empty id' do empty_id = "#{FIXTURES_DIR}/links/placeholder_with_empty_id.html" proofer = make_proofer(empty_id) - expect(proofer.failed_tests.first).to match /anchor has no href attribute/ + expect(proofer.failed_tests.first).to match(/anchor has no href attribute/) end - it "ignores non-http(s) protocols" do + it 'ignores non-http(s) protocols' do other_protocols = "#{FIXTURES_DIR}/links/other_protocols.html" proofer = make_proofer(other_protocols) expect(proofer.failed_tests).to eq [] end - it "passes non-standard characters" do + it 'passes non-standard characters' do fixture = "#{FIXTURES_DIR}/links/non_standard_characters.html" proofer = make_proofer(fixture) expect(proofer.failed_tests).to eq [] end - it "does not dupe errors" do + it 'does not dupe errors' do fixture = "#{FIXTURES_DIR}/links/nodupe.html" proofer = make_proofer(fixture, { :check_external_hash => true} ) expect(proofer.failed_tests.length).to eq 1 end - it "passes for broken *nix links" do + it 'passes for broken *nix links' do fixture = "#{FIXTURES_DIR}/links/brokenUnixLinks.html" proofer = make_proofer(fixture) expect(proofer.failed_tests).to eq [] diff --git a/spec/html/proofer/scripts_spec.rb b/spec/html/proofer/scripts_spec.rb index 60879a03..e92f03e3 100644 --- a/spec/html/proofer/scripts_spec.rb +++ b/spec/html/proofer/scripts_spec.rb @@ -1,38 +1,38 @@ -require "spec_helper" +require 'spec_helper' -describe "Scripts test" do +describe 'Scripts test' do - it "fails for broken external src" do + it 'fails for broken external src' do file = "#{FIXTURES_DIR}/scripts/script_broken_external.html" proofer = make_proofer(file) - expect(proofer.failed_tests.first).to match /External link http:\/\/www.asdo3IRJ395295jsingrkrg4.com\/asdo3IRJ.js? failed: 0 Couldn't resolve host name/ + expect(proofer.failed_tests.first).to match(%r{External link http://www.asdo3IRJ395295jsingrkrg4.com/asdo3IRJ.js failed: 0 Couldn't resolve host name}) end - it "works for valid internal src" do + it 'works for valid internal src' do file = "#{FIXTURES_DIR}/scripts/script_valid_internal.html" proofer = make_proofer(file) expect(proofer.failed_tests).to eq [] end - it "fails for missing internal src" do + it 'fails for missing internal src' do file = "#{FIXTURES_DIR}/scripts/script_missing_internal.html" proofer = make_proofer(file) - expect(proofer.failed_tests.first).to match /doesnotexist.js does not exist/ + expect(proofer.failed_tests.first).to match(/doesnotexist.js does not exist/) end - it "works for present content" do + it 'works for present content' do file = "#{FIXTURES_DIR}/scripts/script_content.html" proofer = make_proofer(file) expect(proofer.failed_tests).to eq [] end - it "fails for absent content" do + it 'fails for absent content' do file = "#{FIXTURES_DIR}/scripts/script_content_absent.html" proofer = make_proofer(file) - expect(proofer.failed_tests.first).to match /script is empty and has no src attribute/ + expect(proofer.failed_tests.first).to match(/script is empty and has no src attribute/) end - it "works for broken script within pre" do + it 'works for broken script within pre' do script_pre = "#{FIXTURES_DIR}/scripts/script_in_pre.html" proofer = make_proofer(script_pre) expect(proofer.failed_tests).to eq [] diff --git a/spec/html/proofer_spec.rb b/spec/html/proofer_spec.rb index b1730d01..22bb8158 100644 --- a/spec/html/proofer_spec.rb +++ b/spec/html/proofer_spec.rb @@ -2,31 +2,31 @@ describe HTML::Proofer do - describe "#failed_tests" do - it "is a list of the formatted errors" do + describe '#failed_tests' do + it 'is a list of the formatted errors' do brokenLinkInternalFilepath = "#{FIXTURES_DIR}/links/brokenLinkInternal.html" proofer = make_proofer(brokenLinkInternalFilepath) expect(proofer.failed_tests).to eq(["\e[34mspec/html/proofer/fixtures/links/brokenLinkInternal.html\e[0m: internally linking to ./notreal.html, which does not exist", "\e[34mspec/html/proofer/fixtures/links/brokenLinkInternal.html\e[0m: internally linking to ./missingImageAlt.html, which does not exist"]) end end - describe "#files" do - it "works for directory that ends with .html" do + describe '#files' do + it 'works for directory that ends with .html' do folder = "#{FIXTURES_DIR}/links/_site/folder.html" proofer = HTML::Proofer.new folder expect(proofer.files).to eq(["#{folder}/index.html"]) end end - describe "#options" do - it "strips out undesired Typhoeus options" do + describe '#options' do + it 'strips out undesired Typhoeus options' do folder = "#{FIXTURES_DIR}/links/_site/folder.html" proofer = HTML::Proofer.new folder, :verbose => true expect(proofer.options[:verbose]).to eq(true) expect(proofer.typhoeus_opts[:verbose]).to eq(nil) end - it "takes options for Parallel" do + it 'takes options for Parallel' do folder = "#{FIXTURES_DIR}/links/_site/folder.html" proofer = HTML::Proofer.new folder, :parallel => { :in_processes => 3 } expect(proofer.parallel_opts[:in_processes]).to eq(3) @@ -34,24 +34,24 @@ expect(proofer.options[:parallel]).to eq(nil) end - describe "sorting" do + describe 'sorting' do # would love to know why Travis barfs here - it "understands sorting by path", :skip => ENV['TRAVIS'] do + it 'understands sorting by path', :skip => ENV['TRAVIS'] do output = send_proofer_output("#{FIXTURES_DIR}/sorting/path") - expect(output.strip).to eq(""" + expect(output.strip).to eq(''' - spec/html/proofer/fixtures/sorting/path/multiple_issues.html * tel: contains no phone number * internal image gpl.png does not exist * image gpl.png does not have an alt attribute - spec/html/proofer/fixtures/sorting/path/single_issue.html * image has a terrible filename (./Screen Shot 2012-08-09 at 7.51.18 AM.png) - """.strip) + '''.strip) end - it "understands sorting by issue" do + it 'understands sorting by issue' do output = send_proofer_output("#{FIXTURES_DIR}/sorting/issue", :error_sort => :desc) - expect(output.strip).to eq(""" + expect(output.strip).to eq(''' - image ./gpl.png does not have an alt attribute * spec/html/proofer/fixtures/sorting/issue/broken_image_one.html * spec/html/proofer/fixtures/sorting/issue/broken_image_two.html @@ -60,38 +60,38 @@ * spec/html/proofer/fixtures/sorting/issue/broken_image_two.html - internal image NOT_AN_IMAGE does not exist * spec/html/proofer/fixtures/sorting/issue/broken_image_two.html - """.strip) + '''.strip) end - it "understands sorting by status" do + it 'understands sorting by status' do output = send_proofer_output("#{FIXTURES_DIR}/sorting/status", :followlocation => false, :error_sort => :status) - expect(output.strip).to eq(""" + expect(output.strip).to eq(''' - -1 * spec/html/proofer/fixtures/sorting/status/broken_link.html: internally linking to nowhere.fooof, which does not exist - 404 * spec/html/proofer/fixtures/sorting/status/a_404.html: External link http://upload.wikimedia.org/wikipedia/en/thumb/not_here.png failed: 404 No error * spec/html/proofer/fixtures/sorting/status/broken_link.html: External link http://upload.wikimedia.org/wikipedia/en/thumb/fooooof.png failed: 404 No error - """.strip) + '''.strip) end end - describe "file ignores" do - it "knows how to ignore a file by string" do + describe 'file ignores' do + it 'knows how to ignore a file by string' do options = { :file_ignore => ["#{FIXTURES_DIR}/links/brokenHashInternal.html"] } brokenHashInternalFilepath = "#{FIXTURES_DIR}/links/brokenHashInternal.html" proofer = make_proofer(brokenHashInternalFilepath, options) expect(proofer.failed_tests).to eq [] end - it "knows how to ignore a file by regexp" do + it 'knows how to ignore a file by regexp' do options = { :file_ignore => [/brokenHash/] } brokenHashInternalFilepath = "#{FIXTURES_DIR}/links/brokenHashInternal.html" proofer = make_proofer(brokenHashInternalFilepath, options) expect(proofer.failed_tests).to eq [] end - it "knows how to ignore a directory by regexp" do + it 'knows how to ignore a directory by regexp' do options = { :file_ignore => [/\S\.html/] } linksDir = "#{FIXTURES_DIR}/links" proofer = make_proofer(linksDir, options) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 6fc2ec6d..4c7eb808 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -17,11 +17,11 @@ config.order = :random end -def capture_stderr(&block) +def capture_stderr(*) original_stderr = $stderr original_stdout = $stdout $stderr = fake_err = StringIO.new - $stdout = fake_out = StringIO.new + $stdout = StringIO.new begin yield rescue RuntimeError @@ -32,14 +32,14 @@ def capture_stderr(&block) fake_err.string end -def make_proofer(file, opts={}) +def make_proofer(file, opts = {}) proofer = HTML::Proofer.new(file, opts) capture_stderr { proofer.run } # proofer.run # when I want to see output proofer end -def send_proofer_output(file, opts={}) +def send_proofer_output(file, opts = {}) proofer = HTML::Proofer.new(file, opts) capture_stderr { proofer.run } end From 995b2fb88eb4fb17c4f3e12919299ff6c7485a2d Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Thu, 1 Jan 2015 15:41:55 -0800 Subject: [PATCH 02/40] %r{ appears to fail for Travis --- spec/html/proofer/images_spec.rb | 2 +- spec/html/proofer/links_spec.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/html/proofer/images_spec.rb b/spec/html/proofer/images_spec.rb index 9269e038..8cc7ba66 100644 --- a/spec/html/proofer/images_spec.rb +++ b/spec/html/proofer/images_spec.rb @@ -22,7 +22,7 @@ it 'fails for missing external images' do externalImageFilepath = "#{FIXTURES_DIR}/images/missingImageExternal.html" proofer = make_proofer(externalImageFilepath) - expect(proofer.failed_tests.first).to match(%r{External link http://www.whatthehell/ failed: 0 Couldn't resolve host}) + expect(proofer.failed_tests.first).to match(/failed: 0 Couldn't resolve host/) end it 'fails for missing internal images' do diff --git a/spec/html/proofer/links_spec.rb b/spec/html/proofer/links_spec.rb index 185abcdd..8cdd9e29 100644 --- a/spec/html/proofer/links_spec.rb +++ b/spec/html/proofer/links_spec.rb @@ -42,7 +42,7 @@ it 'fails for broken external links' do brokenLinkExternalFilepath = "#{FIXTURES_DIR}/links/brokenLinkExternal.html" proofer = make_proofer(brokenLinkExternalFilepath) - expect(proofer.failed_tests.first).to match(%r{External link http://www.asdo3IRJ395295jsingrkrg4.com/ failed: 0 Couldn't resolve host name}) + expect(proofer.failed_tests.first).to match(/failed: 0 Couldn't resolve host nam/) end it 'fails for broken internal links' do @@ -205,7 +205,7 @@ it 'works for array of links' do proofer = make_proofer(["www.github.com", "foofoofoo.biz"]) - expect(proofer.failed_tests.first).to match(/foofoofoo.biz\/\)? failed: 0 Couldn't resolve host name/) + expect(proofer.failed_tests.first).to match(/failed: 0 Couldn't resolve host name/) end it 'works for broken anchors within pre' do From d7e0606555b07c73384236e4cadd611013b8348d Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sun, 18 Jan 2015 22:39:40 -0800 Subject: [PATCH 03/40] Clean up the main entry class --- lib/html/proofer.rb | 134 +++++++++++-------- spec/html/proofer/fixtures/links/nodupe.html | 2 +- spec/html/proofer/links_spec.rb | 2 +- spec/html/proofer_spec.rb | 2 +- 4 files changed, 81 insertions(+), 59 deletions(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 78a25691..2acd38b0 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -67,42 +67,18 @@ def initialize(src, opts={}) @failed_tests = [] Yell.new({ :format => false, :name => 'HTML::Proofer', :level => "gte.#{log_level}" }) do |l| - l.adapter :stdout, level: [:debug, :info, :warn] - l.adapter :stderr, level: [:error, :fatal] + l.adapter :stdout, :level => [:debug, :info, :warn] + l.adapter :stderr, :level => [:error, :fatal] end end def run - unless @src.is_a? Array - external_urls = {} + logger.info HTML.colorize :white, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" - logger.info HTML.colorize :white, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" - - results = Parallel.map(files, @parallel_opts) do |path| - html = HTML::Proofer.create_nokogiri(path) - result = { :external_urls => {}, :failed_tests => [] } - - get_checks.each do |klass| - logger.debug HTML.colorize :blue, "Checking #{klass.to_s.downcase} on #{path} ..." - check = Object.const_get(klass).new(@src, path, html, @options) - check.run - result[:external_urls].merge!(check.external_urls) - result[:failed_tests].concat(check.issues) if check.issues.length > 0 - end - result - end - - results.each do |item| - external_urls.merge!(item[:external_urls]) - @failed_tests.concat(item[:failed_tests]) - end - - external_link_checker(external_urls) unless @options[:disable_external] - - logger.info HTML.colorize :green, "Ran on #{files.length} files!\n\n" + if @src.is_a?(Array) && !@options[:disable_external] + check_list_of_links else - external_urls = Hash[*@src.map { |s| [s, nil] }.flatten] - external_link_checker(external_urls) unless @options[:disable_external] + check_directory_of_files end if @failed_tests.empty? @@ -144,11 +120,51 @@ def run end end - # the hypothesis is that Proofer runs way faster if we pull out - # all the external URLs and run the checks at the end. Otherwise, we're halting - # the consuming process for every file. In addition, sorting the list lets - # libcurl keep connections to hosts alive. Finally, we'll make a HEAD request, - # rather than GETing all the contents + def check_list_of_links + external_urls = Hash[*@src.map { |s| [s, nil] }.flatten] + external_link_checker(external_urls) + end + + def check_directory_of_files + external_urls = {} + results = check_files_for_internal_woes + + results.each do |item| + external_urls.merge!(item[:external_urls]) + @failed_tests.concat(item[:failed_tests]) + end + + external_link_checker(external_urls) unless @options[:disable_external] + + logger.info HTML.colorize :green, "Ran on #{files.length} files!\n\n" + end + + # Walks over each implemented check and runs them on the files, in parallel. + def check_files_for_internal_woes + Parallel.map(files, @parallel_opts) do |path| + html = HTML::Proofer.create_nokogiri(path) + result = { :external_urls => {}, :failed_tests => [] } + + get_checks.each do |klass| + logger.debug HTML.colorize :blue, "Checking #{klass.to_s.downcase} on #{path} ..." + check = Object.const_get(klass).new(@src, path, html, @options) + check.run + result[:external_urls].merge!(check.external_urls) + result[:failed_tests].concat(check.issues) if check.issues.length > 0 + end + result + end + end + + # Proofer runs faster if we pull out all the external URLs and run the checks + # at the end. Otherwise, we're halting the consuming process for every file. + # + # In addition, sorting the list lets libcurl keep connections to the same hosts alive. + # + # Finally, we'll first make a HEAD request, rather than GETing all the contents. + # If the HEAD fails, we'll fall back to GET, as some servers are not configured + # for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is + # not an option. def external_link_checker(external_urls) external_urls = Hash[external_urls.sort] @@ -184,28 +200,9 @@ def response_handler(response, filenames) logger.debug debug_msg if response_code.between?(200, 299) - return if @options[:only_4xx] - return unless @options[:check_external_hash] - if hash = hash?(href) - body_doc = Nokogiri::HTML(response.body) - # user-content is a special addition by GitHub. - xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"]) - if URI.parse(href).host.match(/github\.com/i) - xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"]) - end - if body_doc.xpath(xpath).empty? - add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response_code - end - end + check_hash_in_2xx_response(href, effective_url, response, filenames) elsif response.timed_out? - return if @options[:only_4xx] - add_failed_tests filenames, "External link #{href} failed: got a time out", response_code - elsif (response_code == 405 || response_code == 420 || response_code == 503) && method == :head - # 420s usually come from rate limiting; let's ignore the query and try just the path with a GET - uri = URI(href) - queue_request(:get, uri.scheme + '://' + uri.host + uri.path, filenames) - # just be lazy; perform an explicit get request. some servers are apparently not configured to - # intercept HTTP HEAD + handle_timeout(filenames, response_code) elsif method == :head queue_request(:get, effective_url, filenames) else @@ -215,6 +212,31 @@ def response_handler(response, filenames) end end + # Even though the response was a success, we may have been asked to check + # if the hash on the URL exists on the page + def check_hash_in_2xx_response(href, effective_url, response, filenames) + return if @options[:only_4xx] + return unless @options[:check_external_hash] + return unless hash = hash?(href) + + body_doc = Nokogiri::HTML(response.body) + + # user-content is a special addition by GitHub. + xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"]) + if URI.parse(href).host.match(/github\.com/i) + xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"]) + end + + return unless body_doc.xpath(xpath).empty? + + add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code + end + + def handle_timeout + return if @options[:only_4xx] + add_failed_tests filenames, "External link #{href} failed: got a time out", response_code + end + def hydra @hydra ||= Typhoeus::Hydra.new end diff --git a/spec/html/proofer/fixtures/links/nodupe.html b/spec/html/proofer/fixtures/links/nodupe.html index d0c82889..c336bcd1 100644 --- a/spec/html/proofer/fixtures/links/nodupe.html +++ b/spec/html/proofer/fixtures/links/nodupe.html @@ -1 +1 @@ -add ssh +add ssh diff --git a/spec/html/proofer/links_spec.rb b/spec/html/proofer/links_spec.rb index 8cdd9e29..c714dd31 100644 --- a/spec/html/proofer/links_spec.rb +++ b/spec/html/proofer/links_spec.rb @@ -297,7 +297,7 @@ it 'does not dupe errors' do fixture = "#{FIXTURES_DIR}/links/nodupe.html" - proofer = make_proofer(fixture, { :check_external_hash => true} ) + proofer = make_proofer(fixture) expect(proofer.failed_tests.length).to eq 1 end diff --git a/spec/html/proofer_spec.rb b/spec/html/proofer_spec.rb index 22bb8158..ab9f0efb 100644 --- a/spec/html/proofer_spec.rb +++ b/spec/html/proofer_spec.rb @@ -41,9 +41,9 @@ expect(output.strip).to eq(''' - spec/html/proofer/fixtures/sorting/path/multiple_issues.html - * tel: contains no phone number * internal image gpl.png does not exist * image gpl.png does not have an alt attribute + * tel: contains no phone number - spec/html/proofer/fixtures/sorting/path/single_issue.html * image has a terrible filename (./Screen Shot 2012-08-09 at 7.51.18 AM.png) '''.strip) From 286e2dad2d34c36cc1cb023c086ada3df79589c4 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Fri, 23 Jan 2015 22:51:59 -0800 Subject: [PATCH 04/40] Move logging into its own class --- lib/html/proofer.rb | 124 ++++++++++++++++++-------------------- lib/html/proofer/issue.rb | 2 +- lib/html/proofer/log.rb | 35 +++++++++++ 3 files changed, 93 insertions(+), 68 deletions(-) create mode 100644 lib/html/proofer/log.rb diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 2acd38b0..4c3dc78a 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -1,5 +1,13 @@ +def require_all(path) + glob = File.join(File.dirname(__FILE__), path, '**', '*.rb') + puts glob + Dir[glob].each do |f| + require f + end +end + +require_all 'proofer' require 'nokogiri' -require 'yell' require 'parallel' require 'addressable/uri' @@ -7,29 +15,12 @@ require 'awesome_print' rescue LoadError; end -%w( - checkable - checks - issue - version -).each { |r| require File.join(File.dirname(__FILE__), 'proofer', r) } - module HTML - def self.colorize(color, string) - if $stdout.isatty && $stderr.isatty - Colored.colorize(string, foreground: color) - else - string - end - end - class Proofer - include Yell::Loggable - attr_reader :options, :typhoeus_opts, :parallel_opts - def initialize(src, opts={}) + def initialize(src, opts = {}) @src = src @proofer_opts = { @@ -65,15 +56,14 @@ def initialize(src, opts={}) @options = @proofer_opts.merge(@typhoeus_opts).merge(opts) @failed_tests = [] + end - Yell.new({ :format => false, :name => 'HTML::Proofer', :level => "gte.#{log_level}" }) do |l| - l.adapter :stdout, :level => [:debug, :info, :warn] - l.adapter :stderr, :level => [:error, :fatal] - end + def logger + @logger ||= HTML::Proofer::Log.new(@options[:verbose]) end def run - logger.info HTML.colorize :white, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" + logger.log :info, :white, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" if @src.is_a?(Array) && !@options[:disable_external] check_list_of_links @@ -82,41 +72,9 @@ def run end if @failed_tests.empty? - logger.info HTML.colorize :green, 'HTML-Proofer finished successfully.' + logger.log :info, :green, 'HTML-Proofer finished successfully.' else - matcher = nil - - # always sort by the actual option, then path, to ensure consistent - # alphabetical (by filename) results - @failed_tests = @failed_tests.sort do |a, b| - comp = (a.send(@options[:error_sort]) <=> b.send(@options[:error_sort])) - comp.zero? ? (a.path <=> b.path) : comp - end - - @failed_tests.each do |issue| - case @options[:error_sort] - when :path - if matcher != issue.path - logger.error HTML.colorize :blue, "- #{issue.path}" - matcher = issue.path - end - logger.error HTML.colorize :red, " * #{issue.desc}" - when :desc - if matcher != issue.desc - logger.error HTML.colorize :blue, "- #{issue.desc}" - matcher = issue.desc - end - logger.error HTML.colorize :red, " * #{issue.path}" - when :status - if matcher != issue.status - logger.error HTML.colorize :blue, "- #{issue.status}" - matcher = issue.status - end - logger.error HTML.colorize :red, " * #{issue}" - end - end - - fail HTML.colorize :red, "HTML-Proofer found #{@failed_tests.length} failures!" + print_failed_tests end end @@ -136,7 +94,7 @@ def check_directory_of_files external_link_checker(external_urls) unless @options[:disable_external] - logger.info HTML.colorize :green, "Ran on #{files.length} files!\n\n" + logger.log :info, :green, "Ran on #{files.length} files!\n\n" end # Walks over each implemented check and runs them on the files, in parallel. @@ -146,7 +104,7 @@ def check_files_for_internal_woes result = { :external_urls => {}, :failed_tests => [] } get_checks.each do |klass| - logger.debug HTML.colorize :blue, "Checking #{klass.to_s.downcase} on #{path} ..." + logger.log :debug, :blue, "Checking #{klass.to_s.downcase} on #{path} ..." check = Object.const_get(klass).new(@src, path, html, @options) check.run result[:external_urls].merge!(check.external_urls) @@ -168,7 +126,7 @@ def check_files_for_internal_woes def external_link_checker(external_urls) external_urls = Hash[external_urls.sort] - logger.info HTML::colorize :yellow, "Checking #{external_urls.length} external links..." + logger.log :info, :yellow, "Checking #{external_urls.length} external links..." Ethon.logger = logger # log from Typhoeus/Ethon @@ -179,7 +137,7 @@ def external_link_checker(external_urls) queue_request(:head, href, filenames) end end - logger.debug HTML.colorize :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..." + logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..." hydra.run end @@ -197,7 +155,7 @@ def response_handler(response, filenames) debug_msg = "Received a #{response_code} for #{href}" debug_msg << " in #{filenames.join(' ')}" unless filenames.nil? - logger.debug debug_msg + logger.log :debug, :yellow, debug_msg if response_code.between?(200, 299) check_hash_in_2xx_response(href, effective_url, response, filenames) @@ -283,10 +241,6 @@ def hash?(url) nil end - def log_level - @options[:verbose] ? :debug : :info - end - def add_failed_tests(filenames, desc, status = nil) if filenames.nil? @failed_tests << Checks::Issue.new('', desc, status) @@ -301,5 +255,41 @@ def failed_tests @failed_tests.each { |f| result << f.to_s } result end + + def print_failed_tests + matcher = nil + + # always sort by the actual option, then path, to ensure consistent + # alphabetical (by filename) results + @failed_tests = @failed_tests.sort do |a, b| + comp = (a.send(@options[:error_sort]) <=> b.send(@options[:error_sort])) + comp.zero? ? (a.path <=> b.path) : comp + end + + @failed_tests.each do |issue| + case @options[:error_sort] + when :path + if matcher != issue.path + logger.log :error, :blue, "- #{issue.path}" + matcher = issue.path + end + logger.log :error, :red, " * #{issue.desc}" + when :desc + if matcher != issue.desc + logger.log :error, :blue, "- #{issue.desc}" + matcher = issue.desc + end + logger.log :error, :red, " * #{issue.path}" + when :status + if matcher != issue.status + logger.log :error, :blue, "- #{issue.status}" + matcher = issue.status + end + logger.log :error, :red, " * #{issue}" + end + end + + # fail logger.colorize :red, "HTML-Proofer found #{@failed_tests.length} failures!" + end end end diff --git a/lib/html/proofer/issue.rb b/lib/html/proofer/issue.rb index 5ccd31cb..adf756bf 100644 --- a/lib/html/proofer/issue.rb +++ b/lib/html/proofer/issue.rb @@ -14,7 +14,7 @@ def initialize(path, desc, status = -1) end def to_s - "#{HTML.colorize(:blue, @path)}: #{desc}" + "#{@path}: #{desc}" end end diff --git a/lib/html/proofer/log.rb b/lib/html/proofer/log.rb new file mode 100644 index 00000000..49c1851b --- /dev/null +++ b/lib/html/proofer/log.rb @@ -0,0 +1,35 @@ +require 'yell' + +module HTML + class Proofer + class Log + include Yell::Loggable + + def initialize(verbose) + log_level = verbose ? :debug : :info + + @logger = Yell.new({ :format => false, :name => 'HTML::Proofer', :level => "gte.#{log_level}" }) do |l| + l.adapter :stdout, :level => [:debug, :info, :warn] + l.adapter :stderr, :level => [:error, :fatal] + end + end + + def log(level, color, message) + @logger.send level, colorize(color, message) + end + + def colorize(color, message) + if $stdout.isatty && $stderr.isatty + Colored.colorize(message, foreground: color) + else + message + end + end + + # dumb override to play nice with Typhoeus/Ethon + def debug(message = nil) + log(:debug, :yellow, message || $stdout) + end + end + end +end From a0a2a650dbd42bf427fdfaf10b64259beed65a40 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Fri, 23 Jan 2015 23:09:37 -0800 Subject: [PATCH 05/40] Improve some documentation --- README.md | 79 +++++++++++++++++++++------------------ lib/html/proofer.rb | 33 ++++++++-------- lib/html/proofer/utils.rb | 12 ++++++ 3 files changed, 72 insertions(+), 52 deletions(-) create mode 100644 lib/html/proofer/utils.rb diff --git a/README.md b/README.md index 4c18e8a0..dfb183b7 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,43 @@ Or install it yourself as: **NOTE:** When installation speed matters, set `NOKOGIRI_USE_SYSTEM_LIBRARIES` to `true` in your environment. This is useful for increasing the speed of your Continuous Integration builds. +## Real-life examples + +Project | Repository +:--- | :--- +[Raspberry Pi documentation](http://www.raspberrypi.org/documentation/) | [raspberrypi/documentation]( https://github.com/raspberrypi/documentation) +[Open Whisper Systems website](https://whispersystems.org/) | [WhisperSystems/whispersystems.org](https://github.com/WhisperSystems/whispersystems.org) +[Jekyll website](http://jekyllrb.com/) | [jekyll/jekyll](https://github.com/jekyll/jekyll) + +## What's Tested? + +### Images + +`img` elements: + +* Whether all your images have alt tags +* Whether your internal image references are not broken +* Whether external images are showing + +### Links + +`a`, `link` elements: + +* Whether your internal links are not broken; this includes hash references (`#linkToMe`) +* Whether external links are working + +### Scripts + +`script` elements: + +* Whether your internal script references are not broken +* Whether external scripts are loading + +### HTML + +Nokogiri looks at the markup and [provides errors](http://www.nokogiri.org/tutorials/ensuring_well_formed_markup.html) when parsing your document. +This is an optional feature, set the `validate_html` option to enable validation errors from Nokogiri. + ## Usage ### Using in a script @@ -90,43 +127,6 @@ Don't have or want a `Rakefile`? You _could_ also do something like the followin htmlproof ./_site ``` -### Real-life examples - -Project | Repository -:--- | :--- -[Raspberry Pi documentation](http://www.raspberrypi.org/documentation/) | [raspberrypi/documentation]( https://github.com/raspberrypi/documentation) -[Open Whisper Systems website](https://whispersystems.org/) | [WhisperSystems/whispersystems.org](https://github.com/WhisperSystems/whispersystems.org) -[Jekyll website](http://jekyllrb.com/) | [jekyll/jekyll](https://github.com/jekyll/jekyll) - -## What's Tested? - -### Images - -`img` elements: - -* Whether all your images have alt tags -* Whether your internal image references are not broken -* Whether external images are showing - -### Links - -`a`, `link` elements: - -* Whether your internal links are not broken; this includes hash references (`#linkToMe`) -* Whether external links are working - -### Scripts - -`script` elements: - -* Whether your internal script references are not broken -* Whether external scripts are loading - -### HTML - -Nokogiri looks at the markup and [provides errors](http://www.nokogiri.org/tutorials/ensuring_well_formed_markup.html) when parsing your document. -This is an optional feature, set the `validate_html` option to enable validation errors from Nokogiri. - ## Configuration The `HTML::Proofer` constructor takes an optional hash of additional options: @@ -185,6 +185,11 @@ bin/htmlproof www.google.com,www.github.com --as-links Add the `data-proofer-ignore` attribute to any tag to ignore it from the checks. + +``` html +Not checked. +``` + ## Custom tests Want to write your own test? Sure! Just create two classes--one that inherits from `HTML::Proofer::Checkable`, and another that inherits from `HTML::Proofer::Checks::Check`. `Checkable` defines various helper methods for your test, while `Checks::Check` actually runs across your content. `Checks::Check` should call `self.add_issue` on failures, to add them to the list. diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 4c3dc78a..15ba8ad1 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -18,6 +18,8 @@ def require_all(path) module HTML class Proofer + include Utils + attr_reader :options, :typhoeus_opts, :parallel_opts def initialize(src, opts = {}) @@ -63,7 +65,7 @@ def logger end def run - logger.log :info, :white, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" + logger.log :info, :blue, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" if @src.is_a?(Array) && !@options[:disable_external] check_list_of_links @@ -72,7 +74,7 @@ def run end if @failed_tests.empty? - logger.log :info, :green, 'HTML-Proofer finished successfully.' + logger.log :info, :blue, 'HTML-Proofer finished successfully.' else print_failed_tests end @@ -83,6 +85,10 @@ def check_list_of_links external_link_checker(external_urls) end + # Collects any external URLs found in a directory of files. Also collectes + # every failed test from check_files_for_internal_woes. + # Sends the external URLs to Typhoeus for batch processing. See external_link_checker + # for more information on why that is. def check_directory_of_files external_urls = {} results = check_files_for_internal_woes @@ -94,17 +100,17 @@ def check_directory_of_files external_link_checker(external_urls) unless @options[:disable_external] - logger.log :info, :green, "Ran on #{files.length} files!\n\n" + logger.log :info, :blue, "Ran on #{files.length} files!\n\n" end # Walks over each implemented check and runs them on the files, in parallel. def check_files_for_internal_woes Parallel.map(files, @parallel_opts) do |path| - html = HTML::Proofer.create_nokogiri(path) + html = create_nokogiri(path) result = { :external_urls => {}, :failed_tests => [] } get_checks.each do |klass| - logger.log :debug, :blue, "Checking #{klass.to_s.downcase} on #{path} ..." + logger.log :debug, :yellow, "Checking #{klass.to_s.downcase} on #{path} ..." check = Object.const_get(klass).new(@src, path, html, @options) check.run result[:external_urls].merge!(check.external_urls) @@ -115,7 +121,8 @@ def check_files_for_internal_woes end # Proofer runs faster if we pull out all the external URLs and run the checks - # at the end. Otherwise, we're halting the consuming process for every file. + # at the end. Otherwise, we're halting the consuming process for every file during + # the check_directory_of_files process. # # In addition, sorting the list lets libcurl keep connections to the same hosts alive. # @@ -126,7 +133,7 @@ def check_files_for_internal_woes def external_link_checker(external_urls) external_urls = Hash[external_urls.sort] - logger.log :info, :yellow, "Checking #{external_urls.length} external links..." + logger.log :info, :blue, "Checking #{external_urls.length} external links..." Ethon.logger = logger # log from Typhoeus/Ethon @@ -137,6 +144,7 @@ def external_link_checker(external_urls) queue_request(:head, href, filenames) end end + logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..." hydra.run end @@ -223,11 +231,6 @@ def ignore_file?(file) false end - def self.create_nokogiri(path) - content = File.open(path).read - Nokogiri::HTML(content) - end - def get_checks checks = HTML::Proofer::Checks::Check.subclasses.map(&:name) checks.delete('Favicons') unless @options[:favicon] @@ -270,19 +273,19 @@ def print_failed_tests case @options[:error_sort] when :path if matcher != issue.path - logger.log :error, :blue, "- #{issue.path}" + logger.log :error, :red, "- #{issue.path}" matcher = issue.path end logger.log :error, :red, " * #{issue.desc}" when :desc if matcher != issue.desc - logger.log :error, :blue, "- #{issue.desc}" + logger.log :error, :red, "- #{issue.desc}" matcher = issue.desc end logger.log :error, :red, " * #{issue.path}" when :status if matcher != issue.status - logger.log :error, :blue, "- #{issue.status}" + logger.log :error, :red, "- #{issue.status}" matcher = issue.status end logger.log :error, :red, " * #{issue}" diff --git a/lib/html/proofer/utils.rb b/lib/html/proofer/utils.rb new file mode 100644 index 00000000..963d12dc --- /dev/null +++ b/lib/html/proofer/utils.rb @@ -0,0 +1,12 @@ +require 'nokogiri' + +module HTML + module Utils + extend self + + def create_nokogiri(path) + content = File.open(path).read + Nokogiri::HTML(content) + end + end +end From 7b71cd71490d93cc8460e47d23184afc7686b969 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Fri, 23 Jan 2015 23:31:10 -0800 Subject: [PATCH 06/40] Move UrlValidation out into its own class --- lib/html/proofer.rb | 127 ++++-------------------------- lib/html/proofer/url_validator.rb | 127 ++++++++++++++++++++++++++++++ lib/html/proofer/utils.rb | 7 +- 3 files changed, 147 insertions(+), 114 deletions(-) create mode 100644 lib/html/proofer/url_validator.rb diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 15ba8ad1..eef15f61 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -41,21 +41,19 @@ def initialize(src, opts = {}) :error_sort => :path } - @typhoeus_opts = { + @typhoeus_opts = opts[:typhoeus] || { :followlocation => true } + opts.delete(:typhoeus) + + @hydra_opts = opts[:hydra] || {} + opts.delete(:hydra) # fall back to parallel defaults @parallel_opts = opts[:parallel] || {} opts.delete(:parallel) - # Typhoeus won't let you pass in any non-Typhoeus option; if the option is not - # a proofer_opt, it must be for Typhoeus - opts.keys.each do |key| - @typhoeus_opts[key] = opts[key] if @proofer_opts[key].nil? - end - - @options = @proofer_opts.merge(@typhoeus_opts).merge(opts) + @options = @proofer_opts.merge(opts) @failed_tests = [] end @@ -64,6 +62,11 @@ def logger @logger ||= HTML::Proofer::Log.new(@options[:verbose]) end + def validate_urls(external_urls) + url_validator = HTML::Proofer::UrlValidator.new(logger, external_urls, @options, @typhoeus_opts, @hydra_opts) + @failed_tests.concat(url_validator.run) + end + def run logger.log :info, :blue, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" @@ -82,13 +85,12 @@ def run def check_list_of_links external_urls = Hash[*@src.map { |s| [s, nil] }.flatten] - external_link_checker(external_urls) + validate_urls(external_urls) end # Collects any external URLs found in a directory of files. Also collectes # every failed test from check_files_for_internal_woes. - # Sends the external URLs to Typhoeus for batch processing. See external_link_checker - # for more information on why that is. + # Sends the external URLs to Typhoeus for batch processing. def check_directory_of_files external_urls = {} results = check_files_for_internal_woes @@ -98,7 +100,7 @@ def check_directory_of_files @failed_tests.concat(item[:failed_tests]) end - external_link_checker(external_urls) unless @options[:disable_external] + validate_urls(external_urls) unless @options[:disable_external] logger.log :info, :blue, "Ran on #{files.length} files!\n\n" end @@ -120,93 +122,6 @@ def check_files_for_internal_woes end end - # Proofer runs faster if we pull out all the external URLs and run the checks - # at the end. Otherwise, we're halting the consuming process for every file during - # the check_directory_of_files process. - # - # In addition, sorting the list lets libcurl keep connections to the same hosts alive. - # - # Finally, we'll first make a HEAD request, rather than GETing all the contents. - # If the HEAD fails, we'll fall back to GET, as some servers are not configured - # for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is - # not an option. - def external_link_checker(external_urls) - external_urls = Hash[external_urls.sort] - - logger.log :info, :blue, "Checking #{external_urls.length} external links..." - - Ethon.logger = logger # log from Typhoeus/Ethon - - external_urls.each_pair do |href, filenames| - if hash?(href) && @options[:check_external_hash] - queue_request(:get, href, filenames) - else - queue_request(:head, href, filenames) - end - end - - logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..." - hydra.run - end - - def queue_request(method, href, filenames) - request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method })) - request.on_complete { |response| response_handler(response, filenames) } - hydra.queue request - end - - def response_handler(response, filenames) - effective_url = response.options[:effective_url] - href = response.request.base_url - method = response.request.options[:method] - response_code = response.code - - debug_msg = "Received a #{response_code} for #{href}" - debug_msg << " in #{filenames.join(' ')}" unless filenames.nil? - logger.log :debug, :yellow, debug_msg - - if response_code.between?(200, 299) - check_hash_in_2xx_response(href, effective_url, response, filenames) - elsif response.timed_out? - handle_timeout(filenames, response_code) - elsif method == :head - queue_request(:get, effective_url, filenames) - else - return if @options[:only_4xx] && !response_code.between?(400, 499) - # Received a non-successful http response. - add_failed_tests filenames, "External link #{href} failed: #{response_code} #{response.return_message}", response_code - end - end - - # Even though the response was a success, we may have been asked to check - # if the hash on the URL exists on the page - def check_hash_in_2xx_response(href, effective_url, response, filenames) - return if @options[:only_4xx] - return unless @options[:check_external_hash] - return unless hash = hash?(href) - - body_doc = Nokogiri::HTML(response.body) - - # user-content is a special addition by GitHub. - xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"]) - if URI.parse(href).host.match(/github\.com/i) - xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"]) - end - - return unless body_doc.xpath(xpath).empty? - - add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code - end - - def handle_timeout - return if @options[:only_4xx] - add_failed_tests filenames, "External link #{href} failed: got a time out", response_code - end - - def hydra - @hydra ||= Typhoeus::Hydra.new - end - def files if File.directory? @src pattern = File.join @src, '**', "*#{@options[:ext]}" @@ -238,20 +153,6 @@ def get_checks checks end - def hash?(url) - URI.parse(url).fragment - rescue URI::InvalidURIError - nil - end - - def add_failed_tests(filenames, desc, status = nil) - if filenames.nil? - @failed_tests << Checks::Issue.new('', desc, status) - else - filenames.each { |f| @failed_tests << Checks::Issue.new(f, desc, status) } - end - end - def failed_tests return [] if @failed_tests.empty? result = [] diff --git a/lib/html/proofer/url_validator.rb b/lib/html/proofer/url_validator.rb new file mode 100644 index 00000000..0ef22334 --- /dev/null +++ b/lib/html/proofer/url_validator.rb @@ -0,0 +1,127 @@ +require_relative './utils' + +module HTML + class Proofer + class UrlValidator + include Utils + + attr_accessor :logger, :external_urls, :hydra + + def initialize(logger, external_urls, options, typhoeus_opts, hydra_opts) + @logger = logger + @external_urls = external_urls + @failed_tests = [] + @options = options + @hydra = Typhoeus::Hydra.new(hydra_opts) + @typhoeus_opts = typhoeus_opts + end + + def run + external_link_checker(external_urls) + @failed_tests + end + + # Proofer runs faster if we pull out all the external URLs and run the checks + # at the end. Otherwise, we're halting the consuming process for every file during + # the check_directory_of_files process. + # + # In addition, sorting the list lets libcurl keep connections to the same hosts alive. + # + # Finally, we'll first make a HEAD request, rather than GETing all the contents. + # If the HEAD fails, we'll fall back to GET, as some servers are not configured + # for HEAD. If we've decided to check for hashes, we must do a GET--HEAD is + # not an option. + def external_link_checker(external_urls) + external_urls = Hash[external_urls.sort] + + logger.log :info, :blue, "Checking #{external_urls.length} external links..." + + Ethon.logger = logger # log from Typhoeus/Ethon + + url_processor(external_urls) + + logger.log :debug, :yellow, "Running requests for all #{hydra.queued_requests.size} external URLs..." + hydra.run + end + + def url_processor(external_urls) + external_urls.each_pair do |href, filenames| + if hash?(href) && @options[:check_external_hash] + queue_request(:get, href, filenames) + else + queue_request(:head, href, filenames) + end + end + end + + def queue_request(method, href, filenames) + request = Typhoeus::Request.new(href, @typhoeus_opts.merge({ :method => method })) + request.on_complete { |response| response_handler(response, filenames) } + hydra.queue request + end + + def response_handler(response, filenames) + effective_url = response.options[:effective_url] + href = response.request.base_url + method = response.request.options[:method] + response_code = response.code + + debug_msg = "Received a #{response_code} for #{href}" + debug_msg << " in #{filenames.join(' ')}" unless filenames.nil? + logger.log :debug, :yellow, debug_msg + + if response_code.between?(200, 299) + check_hash_in_2xx_response(href, effective_url, response, filenames) + elsif response.timed_out? + handle_timeout(filenames, response_code) + elsif method == :head + queue_request(:get, effective_url, filenames) + else + return if @options[:only_4xx] && !response_code.between?(400, 499) + # Received a non-successful http response. + add_failed_tests filenames, "External link #{href} failed: #{response_code} #{response.return_message}", response_code + end + end + + # Even though the response was a success, we may have been asked to check + # if the hash on the URL exists on the page + def check_hash_in_2xx_response(href, effective_url, response, filenames) + return if @options[:only_4xx] + return unless @options[:check_external_hash] + return unless (hash = hash?(href)) + + body_doc = create_nokogiri(response.body) + + # user-content is a special addition by GitHub. + xpath = %(//*[@name="#{hash}"]|//*[@id="#{hash}"]) + if URI.parse(href).host.match(/github\.com/i) + xpath << %(|//*[@name="user-content-#{hash}"]|//*[@id="user-content-#{hash}"]) + end + + return unless body_doc.xpath(xpath).empty? + + add_failed_tests filenames, "External link #{href} failed: #{effective_url} exists, but the hash '#{hash}' does not", response.code + end + + def handle_timeout + return if @options[:only_4xx] + add_failed_tests filenames, "External link #{href} failed: got a time out", response_code + end + + def add_failed_tests(filenames, desc, status = nil) + if filenames.nil? + @failed_tests << Checks::Issue.new('', desc, status) + else + filenames.each { |f| @failed_tests << Checks::Issue.new(f, desc, status) } + end + end + + def hash?(url) + URI.parse(url).fragment + rescue URI::InvalidURIError + nil + end + + end + end +end diff --git a/lib/html/proofer/utils.rb b/lib/html/proofer/utils.rb index 963d12dc..04e9b9df 100644 --- a/lib/html/proofer/utils.rb +++ b/lib/html/proofer/utils.rb @@ -5,7 +5,12 @@ module Utils extend self def create_nokogiri(path) - content = File.open(path).read + if File.exist? path + content = File.open(path).read + else + content = path + end + Nokogiri::HTML(content) end end From efbb964abb09c69bcfcdac7c4614da23d5679535 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Fri, 23 Jan 2015 23:33:04 -0800 Subject: [PATCH 07/40] Obsolete --- lib/html/proofer/checks.rb | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 lib/html/proofer/checks.rb diff --git a/lib/html/proofer/checks.rb b/lib/html/proofer/checks.rb deleted file mode 100644 index 7d5c599b..00000000 --- a/lib/html/proofer/checks.rb +++ /dev/null @@ -1,15 +0,0 @@ -module HTML - class Proofer - class Checks - [ - 'issue', - 'check', - 'checks/images', - 'checks/links', - 'checks/scripts', - 'checks/favicon', - 'checks/html' - ].each { |r| require File.join(File.dirname(__FILE__), r) } - end - end -end From 904811da11f049016aaf3777a9a545b390f1cdb4 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Fri, 23 Jan 2015 23:33:13 -0800 Subject: [PATCH 08/40] Include Utils here, too --- lib/html/proofer/checks/links.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/html/proofer/checks/links.rb b/lib/html/proofer/checks/links.rb index 64a4e54e..34984535 100644 --- a/lib/html/proofer/checks/links.rb +++ b/lib/html/proofer/checks/links.rb @@ -1,4 +1,5 @@ # encoding: utf-8 +require_relative '../utils' class Link < ::HTML::Proofer::Checkable @@ -31,6 +32,7 @@ def real_attr(attr) end class Links < ::HTML::Proofer::Checks::Check + include HTML::Utils def run @html.css('a, link').each do |l| @@ -97,7 +99,7 @@ def external_link_check(link) if !link.exists? add_issue "trying to find hash of #{link.href}, but #{link.absolute_path} does not exist" else - target_html = HTML::Proofer.create_nokogiri link.absolute_path + target_html = create_nokogiri link.absolute_path unless hash_check target_html, link.hash add_issue "linking to #{link.href}, but #{link.hash} does not exist" end From b0b41022e4df1bea05ce70116eb25ed5071cf5b4 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 00:24:55 -0800 Subject: [PATCH 09/40] Refactor all the check logic --- README.md | 10 ++++--- lib/html/proofer.rb | 14 +++++----- lib/html/proofer/checkable.rb | 34 ++++++++++++++---------- lib/html/proofer/checks/favicon.rb | 4 +-- lib/html/proofer/checks/html.rb | 2 +- lib/html/proofer/checks/images.rb | 6 ++--- lib/html/proofer/checks/links.rb | 10 ++----- lib/html/proofer/checks/scripts.rb | 6 ++--- lib/html/proofer/log.rb | 1 + lib/html/proofer/{check.rb => runner.rb} | 24 ++++++----------- lib/html/proofer/{ => runner}/issue.rb | 6 +---- lib/html/proofer/url_validator.rb | 5 ++-- 12 files changed, 58 insertions(+), 64 deletions(-) rename lib/html/proofer/{check.rb => runner.rb} (62%) rename lib/html/proofer/{ => runner}/issue.rb (83%) diff --git a/README.md b/README.md index dfb183b7..b0a8867b 100644 --- a/README.md +++ b/README.md @@ -192,7 +192,11 @@ Add the `data-proofer-ignore` attribute to any tag to ignore it from the checks. ## Custom tests -Want to write your own test? Sure! Just create two classes--one that inherits from `HTML::Proofer::Checkable`, and another that inherits from `HTML::Proofer::Checks::Check`. `Checkable` defines various helper methods for your test, while `Checks::Check` actually runs across your content. `Checks::Check` should call `self.add_issue` on failures, to add them to the list. +Want to write your own test? Sure! Just create two classes--one that inherits from `HTML::Proofer::Runner`, and another that inherits from `HTML::Proofer::Checkable`. + +The `Runner` subclass must define one method called `run`. This is called on your content, and is responsible for performing the validation on whatever elements you like. When you catch a broken issue, call `add_issue(message)` to explain the error. + +The `Checkable` subclass defines various helper methods you can use as part of your test. Usually, you'll want to instantiate it within `run`. You have access to all of your element's attributes. Here's an example custom test that protects against `mailto` links that point to `octocat@github.com`: @@ -210,14 +214,14 @@ class OctocatLink < ::HTML::Proofer::Checkable end -class MailToOctocat < ::HTML::Proofer::Checks::Check +class MailToOctocat < ::HTML::Proofer::Runner def run @html.css('a').each do |l| link = OctocatLink.new l, "octocat_link", self if link.mailto? && link.octocat? - return self.add_issue("Don't email the Octocat directly!") + return add_issue("Don't email the Octocat directly!") end end end diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index eef15f61..91a38efb 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -1,15 +1,15 @@ def require_all(path) - glob = File.join(File.dirname(__FILE__), path, '**', '*.rb') - puts glob + glob = File.join(File.dirname(__FILE__), path, '*.rb') Dir[glob].each do |f| require f end end require_all 'proofer' -require 'nokogiri' +require_all 'proofer/runner' +require_all 'proofer/checks' + require 'parallel' -require 'addressable/uri' begin require 'awesome_print' @@ -147,9 +147,9 @@ def ignore_file?(file) end def get_checks - checks = HTML::Proofer::Checks::Check.subclasses.map(&:name) - checks.delete('Favicons') unless @options[:favicon] - checks.delete('Html') unless @options[:validate_html] + checks = HTML::Proofer::Runner.checks.map(&:name) + checks.delete('FaviconRunner') unless @options[:favicon] + checks.delete('HtmlRunner') unless @options[:validate_html] checks end diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb index 5f4fbc83..6de4f7f4 100644 --- a/lib/html/proofer/checkable.rb +++ b/lib/html/proofer/checkable.rb @@ -1,19 +1,19 @@ +require 'addressable/uri' + module HTML class Proofer + # Represents the superclass from which all checks derive. class Checkable - def initialize(obj, type, check) - @src = obj['src'] - @href = obj['href'] - @alt = obj['alt'] - @name = obj['name'] - @id = obj['id'] - @rel = obj['rel'] + def initialize(obj, check) + obj.attributes.each_pair do |attribute, value| + self.instance_variable_set("@#{attribute}".to_sym, value.value) + end @data_ignore_proofer = obj['data-proofer-ignore'] @content = obj.content @check = check @checked_paths = {} - @type = type + @type = self.class.name if @href && @check.options[:href_swap] @check.options[:href_swap].each do |link, replace| @@ -24,7 +24,6 @@ def initialize(obj, type, check) # fix up missing protocols @href.insert 0, 'http:' if @href =~ %r{^//} @src.insert 0, 'http:' if @src =~ %r{^//} - end def url @@ -66,13 +65,13 @@ def ignore? return true if @data_ignore_proofer case @type - when 'favicon' + when 'Favicon' return true if url.match(/^data:image/) - when 'link' - return true if ignores_pattern_check(@check.additional_href_ignores) - when 'image' + when 'Link' + return true if ignores_pattern_check(@check.href_ignores) + when 'Image' return true if url.match(/^data:image/) - return true if ignores_pattern_check(@check.additional_alt_ignores) + return true if ignores_pattern_check(@check.alt_ignores) end end @@ -135,6 +134,13 @@ def ignores_pattern_check(links) def unslashed_directory?(file) File.directory?(file) && !file.end_with?(File::SEPARATOR) && !@check.options[:followlocation] end + + private + + def real_attr(attr) + attr.to_s unless attr.nil? || attr.empty? + end + end end end diff --git a/lib/html/proofer/checks/favicon.rb b/lib/html/proofer/checks/favicon.rb index a079346c..a71c1ad1 100644 --- a/lib/html/proofer/checks/favicon.rb +++ b/lib/html/proofer/checks/favicon.rb @@ -6,11 +6,11 @@ def rel end end -class Favicons < ::HTML::Proofer::Checks::Check +class FaviconRunner < ::HTML::Proofer::Runner def run @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |favicon| - favicon = Favicon.new favicon, "favicon", self + favicon = Favicon.new favicon, self next if favicon.ignore? return if favicon.rel.split(' ').last.eql? 'icon' end diff --git a/lib/html/proofer/checks/html.rb b/lib/html/proofer/checks/html.rb index ed2590ec..1b8f58f6 100644 --- a/lib/html/proofer/checks/html.rb +++ b/lib/html/proofer/checks/html.rb @@ -1,6 +1,6 @@ # encoding: utf-8 -class Html < ::HTML::Proofer::Checks::Check +class HtmlRunner < ::HTML::Proofer::Runner # new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp) HTML5_TAGS = %w(article aside bdi details dialog figcaption diff --git a/lib/html/proofer/checks/images.rb b/lib/html/proofer/checks/images.rb index a3c923bc..b1f5c8c2 100644 --- a/lib/html/proofer/checks/images.rb +++ b/lib/html/proofer/checks/images.rb @@ -13,7 +13,7 @@ def terrible_filename? end def src - @src unless @src.nil? || @src.empty? + real_attr @src end def missing_src? @@ -22,10 +22,10 @@ def missing_src? end -class Images < ::HTML::Proofer::Checks::Check +class ImageRunner < ::HTML::Proofer::Runner def run @html.css('img').each do |i| - img = Image.new i, 'image', self + img = Image.new i, self next if img.ignore? diff --git a/lib/html/proofer/checks/links.rb b/lib/html/proofer/checks/links.rb index 34984535..82fd295c 100644 --- a/lib/html/proofer/checks/links.rb +++ b/lib/html/proofer/checks/links.rb @@ -23,20 +23,14 @@ def placeholder? (id || name) && href.nil? end - private - - def real_attr(attr) - attr unless attr.nil? || attr.empty? - end - end -class Links < ::HTML::Proofer::Checks::Check +class LinkRunner < ::HTML::Proofer::Runner include HTML::Utils def run @html.css('a, link').each do |l| - link = Link.new l, 'link', self + link = Link.new l, self next if link.ignore? next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse diff --git a/lib/html/proofer/checks/scripts.rb b/lib/html/proofer/checks/scripts.rb index 02f03d86..8cc5936f 100644 --- a/lib/html/proofer/checks/scripts.rb +++ b/lib/html/proofer/checks/scripts.rb @@ -3,7 +3,7 @@ class Script < ::HTML::Proofer::Checkable def src - @src unless @src.nil? || @src.empty? + real_attr @src end def missing_src? @@ -16,10 +16,10 @@ def blank? end -class Scripts < ::HTML::Proofer::Checks::Check +class ScriptRunner < ::HTML::Proofer::Runner def run @html.css('script').each do |s| - script = Script.new s, 'script', self + script = Script.new s, self next if script.ignore? next unless script.blank? diff --git a/lib/html/proofer/log.rb b/lib/html/proofer/log.rb index 49c1851b..e9ab9183 100644 --- a/lib/html/proofer/log.rb +++ b/lib/html/proofer/log.rb @@ -1,4 +1,5 @@ require 'yell' +require 'colored' module HTML class Proofer diff --git a/lib/html/proofer/check.rb b/lib/html/proofer/runner.rb similarity index 62% rename from lib/html/proofer/check.rb rename to lib/html/proofer/runner.rb index 50f9eb30..6517e63c 100644 --- a/lib/html/proofer/check.rb +++ b/lib/html/proofer/runner.rb @@ -1,15 +1,11 @@ # encoding: utf-8 -require 'net/http' -require 'net/https' -require 'timeout' -require 'uri' -require 'typhoeus' -class HTML::Proofer::Checks +class HTML::Proofer - class Check + # Mostly handles issue management and collecting of external URLs. + class Runner - attr_reader :issues, :src, :path, :options, :external_urls, :additional_href_ignores, :additional_alt_ignores + attr_reader :issues, :src, :path, :options, :external_urls, :href_ignores, :alt_ignores def initialize(src, path, html, opts={}) @src = src @@ -17,23 +13,19 @@ def initialize(src, path, html, opts={}) @html = remove_ignored(html) @options = opts @issues = [] - @additional_href_ignores = @options[:href_ignore] - @additional_alt_ignores = @options[:alt_ignore] + @href_ignores = @options[:href_ignore] + @alt_ignores = @options[:alt_ignore] @external_urls = {} end def run - fail NotImplementedError.new('HTML::Proofer::Check subclasses must implement #run') + fail NotImplementedError, 'HTML::Proofer::Runner subclasses must implement #run' end def add_issue(desc, status = -1) @issues << Issue.new(@path, desc, status) end - def output_filenames - Dir[@site.config[:output_dir] + '/**/*'].select { |f| File.file?(f) } - end - def add_to_external_urls(href) if @external_urls[href] @external_urls[href] << @path @@ -42,7 +34,7 @@ def add_to_external_urls(href) end end - def self.subclasses + def self.checks classes = [] ObjectSpace.each_object(Class) do |c| diff --git a/lib/html/proofer/issue.rb b/lib/html/proofer/runner/issue.rb similarity index 83% rename from lib/html/proofer/issue.rb rename to lib/html/proofer/runner/issue.rb index adf756bf..9b52582c 100644 --- a/lib/html/proofer/issue.rb +++ b/lib/html/proofer/runner/issue.rb @@ -1,10 +1,7 @@ # encoding: utf-8 -require 'colored' - -class HTML::Proofer::Checks +class HTML::Proofer::Runner class Issue - attr_reader :path, :desc, :status def initialize(path, desc, status = -1) @@ -16,6 +13,5 @@ def initialize(path, desc, status = -1) def to_s "#{@path}: #{desc}" end - end end diff --git a/lib/html/proofer/url_validator.rb b/lib/html/proofer/url_validator.rb index 0ef22334..8776134a 100644 --- a/lib/html/proofer/url_validator.rb +++ b/lib/html/proofer/url_validator.rb @@ -1,4 +1,5 @@ require_relative './utils' +require 'typhoeus' module HTML class Proofer @@ -110,9 +111,9 @@ def handle_timeout def add_failed_tests(filenames, desc, status = nil) if filenames.nil? - @failed_tests << Checks::Issue.new('', desc, status) + @failed_tests << Runner::Issue.new('', desc, status) else - filenames.each { |f| @failed_tests << Checks::Issue.new(f, desc, status) } + filenames.each { |f| @failed_tests << Runner::Issue.new(f, desc, status) } end end From c0deedf039d41851e12f0477ea97f786467b459c Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 00:26:49 -0800 Subject: [PATCH 10/40] Suffix all classes with check --- README.md | 4 ++-- lib/html/proofer/checks/favicon.rb | 4 ++-- lib/html/proofer/checks/images.rb | 4 ++-- lib/html/proofer/checks/links.rb | 4 ++-- lib/html/proofer/checks/scripts.rb | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index b0a8867b..1fab1c73 100644 --- a/README.md +++ b/README.md @@ -201,7 +201,7 @@ The `Checkable` subclass defines various helper methods you can use as part of y Here's an example custom test that protects against `mailto` links that point to `octocat@github.com`: ``` ruby -class OctocatLink < ::HTML::Proofer::Checkable +class OctocatLinkCheck < ::HTML::Proofer::Checkable def mailto? return false if @data_ignore_proofer || @href.nil? || @href.empty? @@ -218,7 +218,7 @@ class MailToOctocat < ::HTML::Proofer::Runner def run @html.css('a').each do |l| - link = OctocatLink.new l, "octocat_link", self + link = OctocatLinkCheck.new l, "octocat_link", self if link.mailto? && link.octocat? return add_issue("Don't email the Octocat directly!") diff --git a/lib/html/proofer/checks/favicon.rb b/lib/html/proofer/checks/favicon.rb index a71c1ad1..0dd91483 100644 --- a/lib/html/proofer/checks/favicon.rb +++ b/lib/html/proofer/checks/favicon.rb @@ -1,6 +1,6 @@ # encoding: utf-8 -class Favicon < ::HTML::Proofer::Checkable +class FaviconCheck < ::HTML::Proofer::Checkable def rel @rel end @@ -10,7 +10,7 @@ class FaviconRunner < ::HTML::Proofer::Runner def run @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |favicon| - favicon = Favicon.new favicon, self + favicon = FaviconCheck.new favicon, self next if favicon.ignore? return if favicon.rel.split(' ').last.eql? 'icon' end diff --git a/lib/html/proofer/checks/images.rb b/lib/html/proofer/checks/images.rb index b1f5c8c2..ccf60ee5 100644 --- a/lib/html/proofer/checks/images.rb +++ b/lib/html/proofer/checks/images.rb @@ -1,6 +1,6 @@ # encoding: utf-8 -class Image < ::HTML::Proofer::Checkable +class ImageCheck < ::HTML::Proofer::Checkable SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/ @@ -25,7 +25,7 @@ def missing_src? class ImageRunner < ::HTML::Proofer::Runner def run @html.css('img').each do |i| - img = Image.new i, self + img = ImageCheck.new i, self next if img.ignore? diff --git a/lib/html/proofer/checks/links.rb b/lib/html/proofer/checks/links.rb index 82fd295c..90e54266 100644 --- a/lib/html/proofer/checks/links.rb +++ b/lib/html/proofer/checks/links.rb @@ -1,7 +1,7 @@ # encoding: utf-8 require_relative '../utils' -class Link < ::HTML::Proofer::Checkable +class LinkCheck < ::HTML::Proofer::Checkable def href real_attr @href @@ -30,7 +30,7 @@ class LinkRunner < ::HTML::Proofer::Runner def run @html.css('a, link').each do |l| - link = Link.new l, self + link = LinkCheck.new l, self next if link.ignore? next if link.href =~ /^javascript:/ # can't put this in ignore? because the URI does not parse diff --git a/lib/html/proofer/checks/scripts.rb b/lib/html/proofer/checks/scripts.rb index 8cc5936f..25096024 100644 --- a/lib/html/proofer/checks/scripts.rb +++ b/lib/html/proofer/checks/scripts.rb @@ -1,6 +1,6 @@ # encoding: utf-8 -class Script < ::HTML::Proofer::Checkable +class ScriptCheck < ::HTML::Proofer::Checkable def src real_attr @src @@ -19,7 +19,7 @@ def blank? class ScriptRunner < ::HTML::Proofer::Runner def run @html.css('script').each do |s| - script = Script.new s, self + script = ScriptCheck.new s, self next if script.ignore? next unless script.blank? From fa6a998b3a7480b355e3fc8575d546925f4a695e Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 00:30:08 -0800 Subject: [PATCH 11/40] Mention `typhoeus` and `hydra` --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1fab1c73..79a71ecf 100644 --- a/README.md +++ b/README.md @@ -147,16 +147,18 @@ The `HTML::Proofer` constructor takes an optional hash of additional options: | `validate_html` | Enables HTML validation errors from Nokogiri | `false` | | `check_external_hash` | Checks whether external hashes exist (even if the website exists). This slows the checker down. | `false` | -### Configuring Typhoeus +### Configuring Typhoeus and Hydra You can also pass in any of Typhoeus' options for the external link check. For example: ``` ruby -HTML::Proofer.new("out/", {:ext => ".htm", :verbose => true, :ssl_verifyhost => 2 }) +HTML::Proofer.new("out/", {:ext => ".htm", :typhoeus => { :verbose => true, :ssl_verifyhost => 2 } }) ``` This sets `HTML::Proofer`'s extensions to use _.htm_, and gives Typhoeus a configuration for it to be verbose, and use specific SSL settings. Check [the Typhoeus documentation](https://github.com/typhoeus/typhoeus#other-curl-options) for more information on what options it can receive. +You can similarly pass in a `:hydra` option with a hash configuration for Hydra. + ### Configuring Parallel [Parallel](https://github.com/grosser/parallel) is being used to speed things up a bit. You can pass in any of its options with the options "namespace" `:parallel`. For example: From 0c1a9114f87ebfe8caafdc2d634a5e727d584241 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 00:31:21 -0800 Subject: [PATCH 12/40] begone --- .rubocop.yml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .rubocop.yml diff --git a/.rubocop.yml b/.rubocop.yml deleted file mode 100644 index b15a80bf..00000000 --- a/.rubocop.yml +++ /dev/null @@ -1,5 +0,0 @@ -HashSyntax: - EnforcedStyle: hash_rockets - -Metrics/LineLength: - Max: 100 From a27f4233eb4ab418c8e966a100a1c21c3d2fec5e Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 00:42:45 -0800 Subject: [PATCH 13/40] Rename favicon option --- README.md | 8 ++++++-- lib/html/proofer.rb | 4 ++-- spec/html/proofer/favicon_spec.rb | 12 ++++++------ 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 79a71ecf..1e0a7d78 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ Or install it yourself as: **NOTE:** When installation speed matters, set `NOKOGIRI_USE_SYSTEM_LIBRARIES` to `true` in your environment. This is useful for increasing the speed of your Continuous Integration builds. -## Real-life examples +### Real-life examples Project | Repository :--- | :--- @@ -54,6 +54,10 @@ Project | Repository * Whether your internal script references are not broken * Whether external scripts are loading +### Favicon + +Checks if your favicons are valid. This is an optional feature, set the `validate_favicon` option to turn it on. + ### HTML Nokogiri looks at the markup and [provides errors](http://www.nokogiri.org/tutorials/ensuring_well_formed_markup.html) when parsing your document. @@ -135,7 +139,7 @@ The `HTML::Proofer` constructor takes an optional hash of additional options: | :----- | :---------- | :------ | | `disable_external` | If `true`, does not run the external link checker, which can take a lot of time. | `false` | | `ext` | The extension of your HTML files including the dot. | `.html` -| `favicon` | Enables the favicon checker. | `false` | +| `validate_favicon` | Enables the favicon checker. | `false` | | `followlocation` | Follows external redirections. Amends missing trailing slashes to internal directories. | `true` | | `directory_index_file` | Sets the file to look for when a link refers to a directory. | `index.html` | | `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. | `[]` | diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 91a38efb..da558840 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -27,7 +27,7 @@ def initialize(src, opts = {}) @proofer_opts = { :ext => '.html', - :favicon => false, + :validate_favicon => false, :href_swap => [], :href_ignore => [], :file_ignore => [], @@ -148,7 +148,7 @@ def ignore_file?(file) def get_checks checks = HTML::Proofer::Runner.checks.map(&:name) - checks.delete('FaviconRunner') unless @options[:favicon] + checks.delete('FaviconRunner') unless @options[:validate_favicon] checks.delete('HtmlRunner') unless @options[:validate_html] checks end diff --git a/spec/html/proofer/favicon_spec.rb b/spec/html/proofer/favicon_spec.rb index 932a224d..9b553593 100644 --- a/spec/html/proofer/favicon_spec.rb +++ b/spec/html/proofer/favicon_spec.rb @@ -8,39 +8,39 @@ it 'fails for absent favicon' do absent = "#{FIXTURES_DIR}/favicon/favicon_absent.html" - proofer = make_proofer(absent, { :favicon => true }) + proofer = make_proofer(absent, { :validate_favicon => true }) expect(proofer.failed_tests.first).to match(/no favicon specified/) end it 'fails for absent favicon but present apple touch icon' do absent = "#{FIXTURES_DIR}/favicon/favicon_absent_apple.html" - proofer = make_proofer(absent, { :favicon => true }) + proofer = make_proofer(absent, { :validate_favicon => true }) # Travis gives a different error message here for some reason expect(proofer.failed_tests.last).to match(/(internally linking to gpl.png, which does not exist|no favicon specified)/) end it 'fails for broken favicon' do broken = "#{FIXTURES_DIR}/favicon/favicon_broken.html" - proofer = make_proofer(broken, { :favicon => true }) + proofer = make_proofer(broken, { :validate_favicon => true }) expect(proofer.failed_tests.first).to match(/internally linking to asdadaskdalsdk.png/) end it 'passes for present favicon' do present = "#{FIXTURES_DIR}/favicon/favicon_present.html" - proofer = make_proofer(present, { :favicon => true }) + proofer = make_proofer(present, { :validate_favicon => true }) expect(proofer.failed_tests).to eq [] end it 'passes for present favicon with shortcut notation' do present = "#{FIXTURES_DIR}/favicon/favicon_present_shortcut.html" - proofer = make_proofer(present, { :favicon => true }) + proofer = make_proofer(present, { :validate_favicon => true }) expect(proofer.failed_tests).to eq [] end it 'fails for broken favicon with data-proofer-ignore' do broken_but_ignored = "#{FIXTURES_DIR}/favicon/favicon_broken_but_ignored.html" - proofer = make_proofer(broken_but_ignored, { :favicon => true }) + proofer = make_proofer(broken_but_ignored, { :validate_favicon => true }) expect(proofer.failed_tests.first).to match(/no favicon specified/) end From 590ed997299ac375a54a401bb9d5c505a722dd7b Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 00:49:11 -0800 Subject: [PATCH 14/40] You don't like empty values? Me either. --- lib/html/proofer/checkable.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb index 6de4f7f4..d8c94312 100644 --- a/lib/html/proofer/checkable.rb +++ b/lib/html/proofer/checkable.rb @@ -6,6 +6,7 @@ class Proofer class Checkable def initialize(obj, check) obj.attributes.each_pair do |attribute, value| + next if value.value.empty? self.instance_variable_set("@#{attribute}".to_sym, value.value) end From 700613208843c72b6d06b6400fb8616186040183 Mon Sep 17 00:00:00 2001 From: Anatol Broder Date: Sat, 24 Jan 2015 12:28:33 +0100 Subject: [PATCH 15/40] Add tests for Utils --- spec/html/proofer/fixtures/utils/lang-jp.html | 1 + spec/html/proofer/utils_spec.rb | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 spec/html/proofer/fixtures/utils/lang-jp.html create mode 100644 spec/html/proofer/utils_spec.rb diff --git a/spec/html/proofer/fixtures/utils/lang-jp.html b/spec/html/proofer/fixtures/utils/lang-jp.html new file mode 100644 index 00000000..9138eb79 --- /dev/null +++ b/spec/html/proofer/fixtures/utils/lang-jp.html @@ -0,0 +1 @@ + diff --git a/spec/html/proofer/utils_spec.rb b/spec/html/proofer/utils_spec.rb new file mode 100644 index 00000000..c25069a2 --- /dev/null +++ b/spec/html/proofer/utils_spec.rb @@ -0,0 +1,15 @@ +require 'spec_helper' +require 'html/proofer/utils' + +describe HTML::Utils do + describe '::create_nokogiri' do + it 'passes with a string' do + noko = HTML::Utils::create_nokogiri '' + expect(noko.css("html").first["lang"]).to eq "jp" + end + it 'passes with a file' do + noko = HTML::Utils::create_nokogiri "#{FIXTURES_DIR}/utils/lang-jp.html" + expect(noko.css("html").first["lang"]).to eq "jp" + end + end +end From 7408a1b80d62357d5ccea3e4f641581a6e591757 Mon Sep 17 00:00:00 2001 From: Anatol Broder Date: Sat, 24 Jan 2015 13:44:06 +0100 Subject: [PATCH 16/40] Single quote style --- spec/html/proofer/utils_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/html/proofer/utils_spec.rb b/spec/html/proofer/utils_spec.rb index c25069a2..bc848397 100644 --- a/spec/html/proofer/utils_spec.rb +++ b/spec/html/proofer/utils_spec.rb @@ -5,11 +5,11 @@ describe '::create_nokogiri' do it 'passes with a string' do noko = HTML::Utils::create_nokogiri '' - expect(noko.css("html").first["lang"]).to eq "jp" + expect(noko.css('html').first['lang']).to eq 'jp' end it 'passes with a file' do noko = HTML::Utils::create_nokogiri "#{FIXTURES_DIR}/utils/lang-jp.html" - expect(noko.css("html").first["lang"]).to eq "jp" + expect(noko.css('html').first['lang']).to eq 'jp' end end end From 41558f4249ebff0070da30862b617402228c26bb Mon Sep 17 00:00:00 2001 From: Anatol Broder Date: Sat, 24 Jan 2015 13:44:48 +0100 Subject: [PATCH 17/40] Wording --- spec/html/proofer/utils_spec.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spec/html/proofer/utils_spec.rb b/spec/html/proofer/utils_spec.rb index bc848397..097efeb5 100644 --- a/spec/html/proofer/utils_spec.rb +++ b/spec/html/proofer/utils_spec.rb @@ -3,11 +3,11 @@ describe HTML::Utils do describe '::create_nokogiri' do - it 'passes with a string' do + it 'passes for a string' do noko = HTML::Utils::create_nokogiri '' expect(noko.css('html').first['lang']).to eq 'jp' end - it 'passes with a file' do + it 'passes for a file' do noko = HTML::Utils::create_nokogiri "#{FIXTURES_DIR}/utils/lang-jp.html" expect(noko.css('html').first['lang']).to eq 'jp' end From b04ae08fe1137e2561e5b87fbf7cddf906279c43 Mon Sep 17 00:00:00 2001 From: Anatol Broder Date: Sat, 24 Jan 2015 13:49:54 +0100 Subject: [PATCH 18/40] Simplify the Parallel example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1e0a7d78..4084698f 100644 --- a/README.md +++ b/README.md @@ -168,7 +168,7 @@ You can similarly pass in a `:hydra` option with a hash configuration for Hydra. [Parallel](https://github.com/grosser/parallel) is being used to speed things up a bit. You can pass in any of its options with the options "namespace" `:parallel`. For example: ``` ruby -HTML::Proofer.new("out/", {:ext => ".htm", :verbose => true, :ssl_verifyhost => 2, :parallel => { :in_processes => 3} }) +HTML::Proofer.new("out/", {:ext => ".htm", :parallel => { :in_processes => 3} }) ``` `:in_processes => 3` will be passed into Parallel as a configuration option. From ea18ff0923fc82cb713ec9f09834eefab770b306 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 17:08:26 -0800 Subject: [PATCH 19/40] Fix Typhoeus link example --- spec/html/proofer/links_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/html/proofer/links_spec.rb b/spec/html/proofer/links_spec.rb index c714dd31..7216bb96 100644 --- a/spec/html/proofer/links_spec.rb +++ b/spec/html/proofer/links_spec.rb @@ -247,7 +247,7 @@ end it 'ensures Typhoeus options are passed' do - options = { ssl_verifypeer: false } + options = { :typhoeus => { :ssl_verifypeer => false } } typhoeus_options_link = "#{FIXTURES_DIR}/links/ensure_typhoeus_options.html" proofer = make_proofer(typhoeus_options_link, options) expect(proofer.failed_tests).to eq [] From a42d4790cb27344e4f768dd9a956af103df44a0c Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 17:22:59 -0800 Subject: [PATCH 20/40] Properly set ivar for data-proofer-ignore --- lib/html/proofer/checkable.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb index d8c94312..0a407308 100644 --- a/lib/html/proofer/checkable.rb +++ b/lib/html/proofer/checkable.rb @@ -6,7 +6,7 @@ class Proofer class Checkable def initialize(obj, check) obj.attributes.each_pair do |attribute, value| - next if value.value.empty? + next if attribute == 'data-proofer-ignore' # TODO: not quite sure why this doesn't work self.instance_variable_set("@#{attribute}".to_sym, value.value) end From b4bfe130c61ddb2cb15dfe8e9e2f049ca2f70874 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 18:23:46 -0800 Subject: [PATCH 21/40] Proper pass of `followlocation` --- lib/html/proofer/checkable.rb | 2 +- spec/html/proofer/links_spec.rb | 6 +++--- spec/html/proofer_spec.rb | 2 +- spec/spec_helper.rb | 5 ++--- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb index 0a407308..70b02eb7 100644 --- a/lib/html/proofer/checkable.rb +++ b/lib/html/proofer/checkable.rb @@ -133,7 +133,7 @@ def ignores_pattern_check(links) end def unslashed_directory?(file) - File.directory?(file) && !file.end_with?(File::SEPARATOR) && !@check.options[:followlocation] + File.directory?(file) && !file.end_with?(File::SEPARATOR) && !@check.options[:typhoeus][:followlocation] end private diff --git a/spec/html/proofer/links_spec.rb b/spec/html/proofer/links_spec.rb index 7216bb96..90e9c195 100644 --- a/spec/html/proofer/links_spec.rb +++ b/spec/html/proofer/links_spec.rb @@ -65,14 +65,14 @@ it 'fails on redirects if not following' do linkWithRedirectFilepath = "#{FIXTURES_DIR}/links/linkWithRedirect.html" - proofer = make_proofer(linkWithRedirectFilepath, { :followlocation => false }) + proofer = make_proofer(linkWithRedirectFilepath, :typhoeus => { :followlocation => false }) expect(proofer.failed_tests.first).to match(/failed: 301 No error/) end it "does not fail on redirects we're not following" do # this test should emit a 301--see above--but we're intentionally supressing it linkWithRedirectFilepath = "#{FIXTURES_DIR}/links/linkWithRedirect.html" - proofer = make_proofer(linkWithRedirectFilepath, { :only_4xx => true, :followlocation => false }) + proofer = make_proofer(linkWithRedirectFilepath, { :only_4xx => true, :typhoeus => { :followlocation => false } }) expect(proofer.failed_tests).to eq [] end @@ -197,7 +197,7 @@ end it 'fails for internal linking to a directory without trailing slash' do - options = { :followlocation => false } + options = { :typhoeus => { :followlocation => false } } internal = "#{FIXTURES_DIR}/links/link_directory_without_slash.html" proofer = make_proofer(internal, options) expect(proofer.failed_tests.first).to match(/without trailing slash/) diff --git a/spec/html/proofer_spec.rb b/spec/html/proofer_spec.rb index ab9f0efb..bc5182df 100644 --- a/spec/html/proofer_spec.rb +++ b/spec/html/proofer_spec.rb @@ -65,7 +65,7 @@ it 'understands sorting by status' do - output = send_proofer_output("#{FIXTURES_DIR}/sorting/status", :followlocation => false, :error_sort => :status) + output = send_proofer_output("#{FIXTURES_DIR}/sorting/status", :typhoeus => { :followlocation => false }, :error_sort => :status) expect(output.strip).to eq(''' - -1 * spec/html/proofer/fixtures/sorting/status/broken_link.html: internally linking to nowhere.fooof, which does not exist diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 4c7eb808..d4ee62ce 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -21,13 +21,13 @@ def capture_stderr(*) original_stderr = $stderr original_stdout = $stdout $stderr = fake_err = StringIO.new - $stdout = StringIO.new + # $stdout = StringIO.new begin yield rescue RuntimeError ensure $stderr = original_stderr - $stdout = original_stdout + # $stdout = original_stdout end fake_err.string end @@ -35,7 +35,6 @@ def capture_stderr(*) def make_proofer(file, opts = {}) proofer = HTML::Proofer.new(file, opts) capture_stderr { proofer.run } - # proofer.run # when I want to see output proofer end From 6c49e6b54a77df0522fe869c0c0ba00a96fc765e Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 19:03:24 -0800 Subject: [PATCH 22/40] Rename ignore check --- lib/html/proofer/checkable.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb index 70b02eb7..a5fc3903 100644 --- a/lib/html/proofer/checkable.rb +++ b/lib/html/proofer/checkable.rb @@ -66,11 +66,11 @@ def ignore? return true if @data_ignore_proofer case @type - when 'Favicon' + when 'FaviconCheck' return true if url.match(/^data:image/) - when 'Link' + when 'LinkCheck' return true if ignores_pattern_check(@check.href_ignores) - when 'Image' + when 'ImageCheck' return true if url.match(/^data:image/) return true if ignores_pattern_check(@check.alt_ignores) end From abb4e25a541dd48de85b20579117b5c8f7e501c5 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 19:07:49 -0800 Subject: [PATCH 23/40] Proper check for `followlocation` --- lib/html/proofer/checkable.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb index a5fc3903..8da55a2f 100644 --- a/lib/html/proofer/checkable.rb +++ b/lib/html/proofer/checkable.rb @@ -133,7 +133,11 @@ def ignores_pattern_check(links) end def unslashed_directory?(file) - File.directory?(file) && !file.end_with?(File::SEPARATOR) && !@check.options[:typhoeus][:followlocation] + File.directory?(file) && !file.end_with?(File::SEPARATOR) && !follow_location? + end + + def follow_location? + @check.options[:typhoeus] && @check.options[:typhoeus][:followlocation] end private From 5e82ae64243bb093df11903d5395425cb18bfe3d Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 19:08:06 -0800 Subject: [PATCH 24/40] I am somewhat certain this test is wrong --- spec/html/proofer/fixtures/links/rootLink/rootLink.html | 1 - 1 file changed, 1 deletion(-) diff --git a/spec/html/proofer/fixtures/links/rootLink/rootLink.html b/spec/html/proofer/fixtures/links/rootLink/rootLink.html index f22c5732..d3c31558 100644 --- a/spec/html/proofer/fixtures/links/rootLink/rootLink.html +++ b/spec/html/proofer/fixtures/links/rootLink/rootLink.html @@ -3,7 +3,6 @@

Blah blah blah. I am root.

-

Blah blah blah. I am relative to root.

From e7c5ca7d62bf47b3c632e8c0b4ee2a3b634f8122 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 19:14:09 -0800 Subject: [PATCH 25/40] Emit the correct colors --- spec/html/proofer_spec.rb | 2 +- spec/spec_helper.rb | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spec/html/proofer_spec.rb b/spec/html/proofer_spec.rb index bc5182df..b88c9ce3 100644 --- a/spec/html/proofer_spec.rb +++ b/spec/html/proofer_spec.rb @@ -6,7 +6,7 @@ it 'is a list of the formatted errors' do brokenLinkInternalFilepath = "#{FIXTURES_DIR}/links/brokenLinkInternal.html" proofer = make_proofer(brokenLinkInternalFilepath) - expect(proofer.failed_tests).to eq(["\e[34mspec/html/proofer/fixtures/links/brokenLinkInternal.html\e[0m: internally linking to ./notreal.html, which does not exist", "\e[34mspec/html/proofer/fixtures/links/brokenLinkInternal.html\e[0m: internally linking to ./missingImageAlt.html, which does not exist"]) + expect(proofer.failed_tests).to eq(["spec/html/proofer/fixtures/links/brokenLinkInternal.html: internally linking to ./notreal.html, which does not exist", "spec/html/proofer/fixtures/links/brokenLinkInternal.html: internally linking to ./missingImageAlt.html, which does not exist"]) end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index d4ee62ce..291edfac 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -21,13 +21,13 @@ def capture_stderr(*) original_stderr = $stderr original_stdout = $stdout $stderr = fake_err = StringIO.new - # $stdout = StringIO.new + $stdout = StringIO.new begin yield rescue RuntimeError ensure $stderr = original_stderr - # $stdout = original_stdout + $stdout = original_stdout end fake_err.string end From 778820f0074199e9b08ace1fa7006153f0fee92f Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 24 Jan 2015 19:24:37 -0800 Subject: [PATCH 26/40] Pretty sure `stdout` is rubbish here? --- lib/html/proofer/log.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/html/proofer/log.rb b/lib/html/proofer/log.rb index e9ab9183..078cd728 100644 --- a/lib/html/proofer/log.rb +++ b/lib/html/proofer/log.rb @@ -29,7 +29,7 @@ def colorize(color, message) # dumb override to play nice with Typhoeus/Ethon def debug(message = nil) - log(:debug, :yellow, message || $stdout) + log(:debug, :yellow, message) unless message.nil? end end end From 347d2fc65d21d2e217d389f76448f2ad5e4b88ab Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sun, 25 Jan 2015 14:45:20 -0800 Subject: [PATCH 27/40] Update HTML tests --- spec/html/proofer/html_spec.rb | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/spec/html/proofer/html_spec.rb b/spec/html/proofer/html_spec.rb index ca2d897f..c5762926 100644 --- a/spec/html/proofer/html_spec.rb +++ b/spec/html/proofer/html_spec.rb @@ -3,49 +3,49 @@ describe 'Html test' do it 'ignores an invalid tag by default' do html = "#{FIXTURES_DIR}/html/invalid_tag.html" - output = capture_stderr { HTML::Proofer.new(html).run } - expect(output).to eq '' + proofer = make_proofer(html) + expect(proofer.failed_tests).to eq [] end it "doesn't fail for html5 tags" do html = "#{FIXTURES_DIR}/html/html5_tags.html" - output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } - expect(output).to eq '' + proofer = make_proofer(html, { :validate_html => true }) + expect(proofer.failed_tests).to eq [] end it 'fails for an invalid tag' do html = "#{FIXTURES_DIR}/html/invalid_tag.html" - output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } - expect(output).to match(/Tag myfancytag invalid/) + proofer = make_proofer(html, { :validate_html => true }) + expect(proofer.failed_tests.first).to match(/Tag myfancytag invalid/) end it 'fails for an unmatched end tag' do html = "#{FIXTURES_DIR}/html/unmatched_end_tag.html" - output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } - expect(output).to match(/Unexpected end tag : div/) + proofer = make_proofer(html, { :validate_html => true }) + expect(proofer.failed_tests.first).to match(/Unexpected end tag : div/) end it 'fails for an unescaped ampersand in attribute' do html = "#{FIXTURES_DIR}/html/unescaped_ampersand_in_attribute.html" - output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } - expect(output).to match(/htmlParseEntityRef: expecting ';'/) + proofer = make_proofer(html, { :validate_html => true }) + expect(proofer.failed_tests.first).to match(/htmlParseEntityRef: expecting ';'/) end it 'fails for mismatch between opening and ending tag' do html = "#{FIXTURES_DIR}/html/opening_and_ending_tag_mismatch.html" - output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } - expect(output).to match(/Opening and ending tag mismatch: p and strong/) + proofer = make_proofer(html, { :validate_html => true }) + expect(proofer.failed_tests.first).to match(/Opening and ending tag mismatch: p and strong/) end it 'fails for div inside head' do html = "#{FIXTURES_DIR}/html/div_inside_head.html" - output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } - expect(output).to match(/Unexpected end tag : head/) + proofer = make_proofer(html, { :validate_html => true }) + expect(proofer.failed_tests.first).to match(/Unexpected end tag : head/) end it 'fails for missing closing quotation mark in href' do html = "#{FIXTURES_DIR}/html/missing_closing_quotes.html" - output = capture_stderr { HTML::Proofer.new(html, { :validate_html => true }).run } - expect(output).to match(/Couldn't find end of Start Tag a/) + proofer = make_proofer(html, { :validate_html => true }) + expect(proofer.failed_tests[1]).to match(/Couldn't find end of Start Tag a/) end end From 972fe5a4f96e70b2b0cbdd28362fa6c3dbaaa91c Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sun, 25 Jan 2015 23:01:33 -0800 Subject: [PATCH 28/40] Update syntax --- spec/html/proofer/links_spec.rb | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/spec/html/proofer/links_spec.rb b/spec/html/proofer/links_spec.rb index 90e9c195..366bdabc 100644 --- a/spec/html/proofer/links_spec.rb +++ b/spec/html/proofer/links_spec.rb @@ -42,7 +42,20 @@ it 'fails for broken external links' do brokenLinkExternalFilepath = "#{FIXTURES_DIR}/links/brokenLinkExternal.html" proofer = make_proofer(brokenLinkExternalFilepath) - expect(proofer.failed_tests.first).to match(/failed: 0 Couldn't resolve host nam/) + expect(proofer.failed_tests.first).to match(/failed: 0 Couldn't resolve host name/) + end + + it 'passes for different filename without option' do + brokenLinkExternalFilepath = "#{FIXTURES_DIR}/links/file.foo" + proofer = make_proofer(brokenLinkExternalFilepath) + expect(proofer.failed_tests).to eq [] + end + + it 'fails for different filenames' do + options = { :ext => '.foo' } + brokenLinkExternalFilepath = "#{FIXTURES_DIR}/links/file.foo" + proofer = make_proofer(brokenLinkExternalFilepath, options) + expect(proofer.failed_tests.first).to match(/failed: 0 Couldn't resolve host name/) end it 'fails for broken internal links' do @@ -120,13 +133,13 @@ it 'ignores links via href_ignore' do ignorableLinks = "#{FIXTURES_DIR}/links/ignorableLinksViaOptions.html" - proofer = make_proofer(ignorableLinks, {:href_ignore => [/^http:\/\//, /sdadsad/, "../whaadadt.html"]}) + proofer = make_proofer(ignorableLinks, { :href_ignore => [%r{^http://}, /sdadsad/, '../whaadadt.html'] }) expect(proofer.failed_tests).to eq [] end it 'translates links via href_swap' do translatedLink = "#{FIXTURES_DIR}/links/linkTranslatedViaHrefSwap.html" - proofer = make_proofer(translatedLink, {:href_swap => { /\A\/articles\/([\w-]+)/ => "\\1.html" }}) + proofer = make_proofer(translatedLink, { :href_swap => { %r{\A/articles/([\w-]+)} => "\\1.html" } }) expect(proofer.failed_tests).to eq [] end @@ -203,8 +216,15 @@ expect(proofer.failed_tests.first).to match(/without trailing slash/) end + it 'ignores external links when asked' do + options = { :disable_external => true } + external = "#{FIXTURES_DIR}/links/brokenLinkExternal.html" + proofer = make_proofer(external, options) + expect(proofer.failed_tests).to eq [] + end + it 'works for array of links' do - proofer = make_proofer(["www.github.com", "foofoofoo.biz"]) + proofer = make_proofer(['www.github.com', 'foofoofoo.biz']) expect(proofer.failed_tests.first).to match(/failed: 0 Couldn't resolve host name/) end From cb5ca188cda8568d585d2c1121319f48779c32da Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sun, 25 Jan 2015 23:01:50 -0800 Subject: [PATCH 29/40] Proper grammar! --- lib/html/proofer.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index da558840..0ed33679 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -193,7 +193,8 @@ def print_failed_tests end end - # fail logger.colorize :red, "HTML-Proofer found #{@failed_tests.length} failures!" + failure_text = @failed_tests.length == 1 ? 'failure' : 'failures' + fail logger.colorize :red, "HTML-Proofer found #{@failed_tests.length} #{failure_text}!" end end end From 146d6502ded71ac36afeefc944b9ebf9818d1c0c Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sun, 25 Jan 2015 23:02:04 -0800 Subject: [PATCH 30/40] Revamp and test command binary --- README.md | 20 +++--- bin/htmlproof | 69 ++++++++++++--------- lib/html/proofer/log.rb | 4 +- lib/html/proofer/url_validator.rb | 1 + spec/html/proofer/command_spec.rb | 74 +++++++++++++++++++++++ spec/html/proofer/fixtures/links/file.foo | 11 ++++ spec/spec_helper.rb | 6 +- 7 files changed, 144 insertions(+), 41 deletions(-) create mode 100644 spec/html/proofer/command_spec.rb create mode 100644 spec/html/proofer/fixtures/links/file.foo diff --git a/README.md b/README.md index 4084698f..64a7cf6c 100644 --- a/README.md +++ b/README.md @@ -137,23 +137,23 @@ The `HTML::Proofer` constructor takes an optional hash of additional options: | Option | Description | Default | | :----- | :---------- | :------ | +| `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` | +| `check_external_hash` | Checks whether external hashes exist (even if the website exists). This slows the checker down. | `false` | +| `directory_index_file` | Sets the file to look for when a link refers to a directory. | `index.html` | | `disable_external` | If `true`, does not run the external link checker, which can take a lot of time. | `false` | +| `error_sort` | Defines the sort order for error output. Can be `:path`, `:desc`, or `:status`. | `:path` | `ext` | The extension of your HTML files including the dot. | `.html` -| `validate_favicon` | Enables the favicon checker. | `false` | -| `followlocation` | Follows external redirections. Amends missing trailing slashes to internal directories. | `true` | -| `directory_index_file` | Sets the file to look for when a link refers to a directory. | `index.html` | -| `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. | `[]` | -| `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` | | `file_ignore` | An array of Strings or RegExps containing file paths that are safe to ignore. | `[]` | +| `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. | `[]` | | `href_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. | `{}` | -| `verbose` | If `true`, outputs extra information as the checking happens. Useful for debugging. | `false` | | `only_4xx` | Only reports errors for links that fall within the 4xx status code range. | `false` | +| `validate_favicon` | Enables the favicon checker. | `false` | | `validate_html` | Enables HTML validation errors from Nokogiri | `false` | -| `check_external_hash` | Checks whether external hashes exist (even if the website exists). This slows the checker down. | `false` | +| `verbose` | If `true`, outputs extra information as the checking happens. Useful for debugging. | `false` | ### Configuring Typhoeus and Hydra -You can also pass in any of Typhoeus' options for the external link check. For example: +[Typhoeus](https://github.com/typhoeus/typhoeus) is used to make fast, parallel requests to external URLs. You can pass in any of Typhoeus' options for the external link checks with the options namespace of `:typhoeus`. For example: ``` ruby HTML::Proofer.new("out/", {:ext => ".htm", :typhoeus => { :verbose => true, :ssl_verifyhost => 2 } }) @@ -163,9 +163,11 @@ This sets `HTML::Proofer`'s extensions to use _.htm_, and gives Typhoeus a confi You can similarly pass in a `:hydra` option with a hash configuration for Hydra. +The default value is `typhoeus => { :followlocation => true }`. + ### Configuring Parallel -[Parallel](https://github.com/grosser/parallel) is being used to speed things up a bit. You can pass in any of its options with the options "namespace" `:parallel`. For example: +[Parallel](https://github.com/grosser/parallel) is being used to speed internal file checks. You can pass in any of its options with the options "namespace" `:parallel`. For example: ``` ruby HTML::Proofer.new("out/", {:ext => ".htm", :parallel => { :in_processes => 3} }) diff --git a/bin/htmlproof b/bin/htmlproof index f9136656..82aa0bea 100755 --- a/bin/htmlproof +++ b/bin/htmlproof @@ -7,53 +7,62 @@ require 'html/proofer' require 'mercenary' require 'rubygems' +def to_regex?(item) + if item.start_with? '/' and item.end_with? '/' + Regexp.new item[1...-1] + else + item + end +end + Mercenary.program(:htmlproof) do |p| p.version Gem::Specification.load(File.join(File.dirname(__FILE__), '..', 'html-proofer.gemspec')).version p.description %(Test your rendered HTML files to make sure they're accurate.) p.syntax 'htmlproof PATH [options]' - p.description 'Runs the HTML-Proofer suite on the files in PATH' - - p.option 'ext', '--ext EXT', 'The extension of your HTML files (default: `.html`)' - p.option 'favicon', '--favicon', 'Enables the favicon checker (default: `false`).' - p.option 'as-links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.' - p.option 'swap', '--swap regex:string,[regex:string,...]', Array, 'Array containing key-value pairs of `RegExp:String`. It transforms links that match `RegExp` into `String`' - p.option 'href_ignore', '--href_ignore link1,[link2,...]', Array, 'Array of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored.' - p.option 'alt_ignore', '--alt_ignore image1,[image2,...]', Array, 'Array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore' - p.option 'file_ignore', '--file_ignore file1,[file2,...]', Array, 'Array of Strings or RegExps containing file paths that are safe to ignore' - p.option 'disable_external', '--disable_external', 'Disables the external link checker (default: `false`)' - p.option 'only_4xx', '--only_4xx', 'Only reports errors for links that fall within the 4x status code range.' + p.description 'Runs the HTML-Proofer suite on the files in PATH. For more details, see the README.' + + p.option 'as_links', '--as-links', 'Assumes that `PATH` is a comma-separated array of links to check.' + p.option 'alt_ignore', '--alt-ignore image1,[image2,...]', Array, 'Comma-separated list of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore' + p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).' + p.option 'directory_index_file', '--directory-index-file', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)' + p.option 'disable_external', '--disable-external', 'Disables the external link checker (default: `false`)' + p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `path`, `desc`, or `status` (default: `path`).' + p.option 'ext', '--ext EXT', String, 'The extension of your HTML files (default: `.html`)' + p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'Comma-separated list of Strings or RegExps containing file paths that are safe to ignore' + p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'Comma-separated list of Strings or RegExps containing `href`s that are safe to ignore.' + p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'Comma-separated list of key-value pairs of `RegExp:String`. Transforms links matching `RegExp` into `String`' + p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4x status code range.' + p.option 'validate_favicon', '--validate-favicon', 'Enables the favicon checker (default: `false`).' + p.option 'validate_html', '--validate-html', 'Enables HTML validation errors from Nokogiri (default: `false`).' p.option 'verbose', '--verbose', 'Enables more verbose logging.' - p.option 'directory_index_file', '--directory_index_file', 'Sets the file to look for when a link refers to a directory.' - p.option 'validate_html', '--validate_html', 'Enables HTML validation errors from Nokogiri (default: `false`).' - p.option 'check_external_hash', '--check_external_hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).' p.action do |args, opts| args = ['.'] if args.empty? path = args.first options = {} - options[:ext] = opts['ext'] unless opts['ext'].nil? - unless opts['swap'].nil? + + # prepare every to go to proofer + p.options.select { |o| !opts[o.config_key].nil? }.each do |option| + if option.return_type.to_s == 'Array' # TODO: is_a? doesn't work here? + opts[option.config_key] = opts[option.config_key].map { |i| to_regex?(i) } + end + options[option.config_key.to_sym] = opts[option.config_key] + end + + # some minor manipulation of a special option + unless opts['href_swap'].nil? options[:href_swap] = {} - opts['swap'].each do |s| + opts['href_swap'].each do |s| pair = s.split(':') - options[:href_swap][/#{pair[0]}/] = pair[1] + options[:href_swap][Regexp.new(pair[0])] = pair[1] end end - options[:href_ignore] = opts['href_ignore'] unless opts['href_ignore'].nil? - options[:alt_ignore] = opts['alt_ignore'] unless opts['alt_ignore'].nil? - options[:file_ignore] = opts['file_ignore'] unless opts['file_ignore'].nil? - options[:disable_external] = opts['disable_external'] unless opts['disable_external'].nil? - options[:only_4xx] = opts['only_4xx'] unless opts['only_4xx'].nil? - options[:favicon] = opts['favicon'] unless opts['favicon'].nil? - options[:verbose] = opts['verbose'] unless opts['verbose'].nil? - options[:directory_index_file] = opts['directory_index_file'] unless opts['directory_index_file'].nil? - options[:validate_html] = opts['validate_html'] unless opts['validate_html'].nil? - options[:check_external_hash] = opts['check_external_hash'] unless opts['check_external_hash'].nil? - - path = path.delete(' ').split(',') if opts['as-links'] + options[:error_sort] = opts['error-sort'].to_sym unless opts['error-sort'].nil? + + path = path.delete(' ').split(',') if opts['as_links'] HTML::Proofer.new(path, options).run end diff --git a/lib/html/proofer/log.rb b/lib/html/proofer/log.rb index 078cd728..d12c4d12 100644 --- a/lib/html/proofer/log.rb +++ b/lib/html/proofer/log.rb @@ -9,7 +9,9 @@ class Log def initialize(verbose) log_level = verbose ? :debug : :info - @logger = Yell.new({ :format => false, :name => 'HTML::Proofer', :level => "gte.#{log_level}" }) do |l| + @logger = Yell.new(:format => false, \ + :name => 'HTML::Proofer', \ + :level => "gte.#{log_level}") do |l| l.adapter :stdout, :level => [:debug, :info, :warn] l.adapter :stderr, :level => [:error, :fatal] end diff --git a/lib/html/proofer/url_validator.rb b/lib/html/proofer/url_validator.rb index 8776134a..79000644 100644 --- a/lib/html/proofer/url_validator.rb +++ b/lib/html/proofer/url_validator.rb @@ -1,5 +1,6 @@ require_relative './utils' require 'typhoeus' +require 'uri' module HTML class Proofer diff --git a/spec/html/proofer/command_spec.rb b/spec/html/proofer/command_spec.rb new file mode 100644 index 00000000..7abf57da --- /dev/null +++ b/spec/html/proofer/command_spec.rb @@ -0,0 +1,74 @@ +require 'spec_helper' + +describe 'Command test' do + it 'works with as-links' do + output = make_bin('--as-links www.github.com,foofoofoo.biz') + expect(output).to match('1 failure') + end + + it 'works with alt-ignore' do + ignorableLinks = "#{FIXTURES_DIR}/images/ignorableAltViaOptions.html" + output = make_bin('--alt-ignore /wikimedia/,gpl.png', ignorableLinks) + expect(output).to match('successfully') + end + + it 'works with check-external-hash' do + brokenHashOnTheWeb = "#{FIXTURES_DIR}/links/brokenHashOnTheWeb.html" + output = make_bin('--check-external-hash', brokenHashOnTheWeb) + expect(output).to match('1 failure') + end + + it 'works with directory-index-file' do + link_pointing_to_directory = "#{FIXTURES_DIR}/links/link_pointing_to_directory.html" + output = make_bin('--directory-index-file index.php', link_pointing_to_directory) + expect(output).to match('successfully') + end + + it 'works with disable-external' do + external = "#{FIXTURES_DIR}/links/brokenLinkExternal.html" + output = make_bin('--disable-external', external) + expect(output).to match('successfully') + end + + it 'works with ext' do + external = "#{FIXTURES_DIR}/links/file.foo" + output = make_bin('--ext .foo', external) + expect(output).to match('1 failure') + end + + it 'works with file-ignore' do + external = "#{FIXTURES_DIR}/links/brokenHashInternal.html" + output = make_bin("--file-ignore #{external}", external) + expect(output).to match('successfully') + end + + it 'works with href-ignore' do + ignorableLinks = "#{FIXTURES_DIR}/links/ignorableLinksViaOptions.html" + output = make_bin('--href-ignore /^http:\/\//,/sdadsad/,../whaadadt.html', ignorableLinks) + expect(output).to match('successfully') + end + + it 'works with href-swap' do + translatedLink = "#{FIXTURES_DIR}/links/linkTranslatedViaHrefSwap.html" + output = make_bin('--href-swap \A/articles/([\w-]+):\1.html', translatedLink) + expect(output).to match('successfully') + end + + it 'works with only-4xx' do + brokenHashOnTheWeb = "#{FIXTURES_DIR}/links/brokenHashOnTheWeb.html" + output = make_bin('--only-4xx', brokenHashOnTheWeb) + expect(output).to match('successfully') + end + + it 'works with validate-favicon' do + broken = "#{FIXTURES_DIR}/favicon/favicon_broken.html" + output = make_bin('--validate-favicon', broken) + expect(output).to match('1 failure') + end + + it 'works with validate-html' do + broken = "#{FIXTURES_DIR}/html/invalid_tag.html" + output = make_bin('--validate-html', broken) + expect(output).to match('1 failure') + end +end diff --git a/spec/html/proofer/fixtures/links/file.foo b/spec/html/proofer/fixtures/links/file.foo new file mode 100644 index 00000000..aa5f3c19 --- /dev/null +++ b/spec/html/proofer/fixtures/links/file.foo @@ -0,0 +1,11 @@ + + + + +

Blah blah blah. Working link!

+ + +broken link! + + + diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 291edfac..a9206ce1 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -21,7 +21,7 @@ def capture_stderr(*) original_stderr = $stderr original_stdout = $stdout $stderr = fake_err = StringIO.new - $stdout = StringIO.new + $stdout = fake_out = StringIO.new begin yield rescue RuntimeError @@ -42,3 +42,7 @@ def send_proofer_output(file, opts = {}) proofer = HTML::Proofer.new(file, opts) capture_stderr { proofer.run } end + +def make_bin(cmd, path=nil) + `bin/htmlproof #{cmd} #{path}` +end From 13a376899c7de173d5ed9fcaf287bb055dbb133f Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sun, 25 Jan 2015 23:02:44 -0800 Subject: [PATCH 31/40] Try unblocking Travis? --- spec/html/proofer_spec.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spec/html/proofer_spec.rb b/spec/html/proofer_spec.rb index b88c9ce3..5be4ad2b 100644 --- a/spec/html/proofer_spec.rb +++ b/spec/html/proofer_spec.rb @@ -35,8 +35,7 @@ end describe 'sorting' do - # would love to know why Travis barfs here - it 'understands sorting by path', :skip => ENV['TRAVIS'] do + it 'understands sorting by path' do output = send_proofer_output("#{FIXTURES_DIR}/sorting/path") expect(output.strip).to eq(''' From c1fdc033b0c8744123aafc89dba6c59c9db998a1 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Mon, 26 Jan 2015 21:47:56 -0800 Subject: [PATCH 32/40] Wrap this cmd in quotes --- spec/html/proofer/command_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/html/proofer/command_spec.rb b/spec/html/proofer/command_spec.rb index 7abf57da..853b5a90 100644 --- a/spec/html/proofer/command_spec.rb +++ b/spec/html/proofer/command_spec.rb @@ -50,7 +50,7 @@ it 'works with href-swap' do translatedLink = "#{FIXTURES_DIR}/links/linkTranslatedViaHrefSwap.html" - output = make_bin('--href-swap \A/articles/([\w-]+):\1.html', translatedLink) + output = make_bin('--href-swap "\A/articles/([\w-]+):\1.html"', translatedLink) expect(output).to match('successfully') end From 7ff2ebf9e56866baf9af937f354c38978f234688 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Mon, 26 Jan 2015 21:54:35 -0800 Subject: [PATCH 33/40] Simplify this test, since it fails only on Travis --- spec/html/proofer/html_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/html/proofer/html_spec.rb b/spec/html/proofer/html_spec.rb index c5762926..a68da8b2 100644 --- a/spec/html/proofer/html_spec.rb +++ b/spec/html/proofer/html_spec.rb @@ -46,6 +46,6 @@ it 'fails for missing closing quotation mark in href' do html = "#{FIXTURES_DIR}/html/missing_closing_quotes.html" proofer = make_proofer(html, { :validate_html => true }) - expect(proofer.failed_tests[1]).to match(/Couldn't find end of Start Tag a/) + expect(proofer.failed_tests.to_s).to match(/Couldn't find end of Start Tag a/) end end From f8dd45db0b6e62042bc9cd2893b3709ad124e6ee Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Mon, 26 Jan 2015 22:12:16 -0800 Subject: [PATCH 34/40] Try to improve on sorting logic --- lib/html/proofer.rb | 44 ++++++++++++++++++++------------------- spec/html/proofer_spec.rb | 2 +- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 0ed33679..2d08f37e 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -170,31 +170,33 @@ def print_failed_tests comp.zero? ? (a.path <=> b.path) : comp end - @failed_tests.each do |issue| - case @options[:error_sort] - when :path - if matcher != issue.path - logger.log :error, :red, "- #{issue.path}" - matcher = issue.path - end - logger.log :error, :red, " * #{issue.desc}" - when :desc - if matcher != issue.desc - logger.log :error, :red, "- #{issue.desc}" - matcher = issue.desc - end - logger.log :error, :red, " * #{issue.path}" - when :status - if matcher != issue.status - logger.log :error, :red, "- #{issue.status}" - matcher = issue.status - end - logger.log :error, :red, " * #{issue}" - end + case @options[:error_sort] + when :path + sort_and_report_failure(:path, :desc) + when :desc + sort_and_report_failure(:desc, :path) + when :status + sort_and_report_failure(:status, :path) end failure_text = @failed_tests.length == 1 ? 'failure' : 'failures' fail logger.colorize :red, "HTML-Proofer found #{@failed_tests.length} #{failure_text}!" end + + def sort_and_report_failure(first_sort, second_sort) + matcher = nil + sorted_failures = @failed_tests.sort_by { |t| [ t.send(first_sort), t.send(second_sort) ] } + sorted_failures.each do |issue| + if matcher != issue.send(first_sort) + logger.log :error, :red, "- #{issue.send(first_sort)}" + matcher = issue.send(first_sort) + end + if first_sort == :status + logger.log :error, :red, " * #{issue}" + else + logger.log :error, :red, " * #{issue.send(second_sort)}" + end + end + end end end diff --git a/spec/html/proofer_spec.rb b/spec/html/proofer_spec.rb index 5be4ad2b..5f6eaf53 100644 --- a/spec/html/proofer_spec.rb +++ b/spec/html/proofer_spec.rb @@ -40,8 +40,8 @@ expect(output.strip).to eq(''' - spec/html/proofer/fixtures/sorting/path/multiple_issues.html - * internal image gpl.png does not exist * image gpl.png does not have an alt attribute + * internal image gpl.png does not exist * tel: contains no phone number - spec/html/proofer/fixtures/sorting/path/single_issue.html * image has a terrible filename (./Screen Shot 2012-08-09 at 7.51.18 AM.png) From 05439f7a49a2002a3e07c2249a91598fe8ad13ef Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Mon, 26 Jan 2015 22:43:03 -0800 Subject: [PATCH 35/40] Use `module_function` --- lib/html/proofer/utils.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/html/proofer/utils.rb b/lib/html/proofer/utils.rb index 04e9b9df..0f34ffb9 100644 --- a/lib/html/proofer/utils.rb +++ b/lib/html/proofer/utils.rb @@ -2,8 +2,6 @@ module HTML module Utils - extend self - def create_nokogiri(path) if File.exist? path content = File.open(path).read @@ -13,5 +11,6 @@ def create_nokogiri(path) Nokogiri::HTML(content) end + module_function :create_nokogiri end end From d93234bcf49374361f08de6f85be0ad302b94e42 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Mon, 26 Jan 2015 22:43:41 -0800 Subject: [PATCH 36/40] Pull sorting and reporting out --- lib/html/proofer.rb | 38 +++------------------------ lib/html/proofer/runner/issue.rb | 44 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 34 deletions(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 2d08f37e..fd87f89a 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -161,42 +161,12 @@ def failed_tests end def print_failed_tests - matcher = nil + sorted_failures = HTML::Proofer::Runner::SortedIssues.new(@failed_tests, @options[:error_sort], logger) - # always sort by the actual option, then path, to ensure consistent - # alphabetical (by filename) results - @failed_tests = @failed_tests.sort do |a, b| - comp = (a.send(@options[:error_sort]) <=> b.send(@options[:error_sort])) - comp.zero? ? (a.path <=> b.path) : comp - end - - case @options[:error_sort] - when :path - sort_and_report_failure(:path, :desc) - when :desc - sort_and_report_failure(:desc, :path) - when :status - sort_and_report_failure(:status, :path) - end - - failure_text = @failed_tests.length == 1 ? 'failure' : 'failures' + sorted_failures.sort_and_report + count = @failed_tests.length + failure_text = "#{count} " << (count == 1 ? 'failure' : 'failures') fail logger.colorize :red, "HTML-Proofer found #{@failed_tests.length} #{failure_text}!" end - - def sort_and_report_failure(first_sort, second_sort) - matcher = nil - sorted_failures = @failed_tests.sort_by { |t| [ t.send(first_sort), t.send(second_sort) ] } - sorted_failures.each do |issue| - if matcher != issue.send(first_sort) - logger.log :error, :red, "- #{issue.send(first_sort)}" - matcher = issue.send(first_sort) - end - if first_sort == :status - logger.log :error, :red, " * #{issue}" - else - logger.log :error, :red, " * #{issue.send(second_sort)}" - end - end - end end end diff --git a/lib/html/proofer/runner/issue.rb b/lib/html/proofer/runner/issue.rb index 9b52582c..4aa86fe0 100644 --- a/lib/html/proofer/runner/issue.rb +++ b/lib/html/proofer/runner/issue.rb @@ -14,4 +14,48 @@ def to_s "#{@path}: #{desc}" end end + + class SortedIssues + attr_reader :issues + + def initialize(issues, error_sort, logger) + @issues = issues + @error_sort = error_sort + @logger = logger + end + + def sort_and_report + case @error_sort + when :path + sorted_issues = sort(:path, :desc) + report(sorted_issues, :path, :desc) + when :desc + sorted_issues = sort(:desc, :path) + report(sorted_issues, :desc, :path) + when :status + sorted_issues = sort(:status, :path) + report(sorted_issues, :status, :path) + end + end + + def sort(first_sort, second_sort) + issues.sort_by { |t| [ t.send(first_sort), t.send(second_sort) ] } + end + + def report(sorted_issues, first_report, second_report) + matcher = nil + + sorted_issues.each do |issue| + if matcher != issue.send(first_report) + @logger.log :error, :red, "- #{issue.send(first_report)}" + matcher = issue.send(first_report) + end + if first_report == :status + @logger.log :error, :red, " * #{issue}" + else + @logger.log :error, :red, " * #{issue.send(second_report)}" + end + end + end + end end From 2929aafbc8a79ed70a04942e92f455de2673c3f4 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Mon, 26 Jan 2015 22:48:35 -0800 Subject: [PATCH 37/40] Be green --- lib/html/proofer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index fd87f89a..cca4b438 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -77,7 +77,7 @@ def run end if @failed_tests.empty? - logger.log :info, :blue, 'HTML-Proofer finished successfully.' + logger.log :info, :green, 'HTML-Proofer finished successfully.' else print_failed_tests end From bab216f1c139f39f757c81aeaec0bd4a9b859778 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Mon, 26 Jan 2015 22:50:10 -0800 Subject: [PATCH 38/40] No nakey --- lib/html/proofer.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index cca4b438..1cb4107f 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -124,7 +124,7 @@ def check_files_for_internal_woes def files if File.directory? @src - pattern = File.join @src, '**', "*#{@options[:ext]}" + pattern = File.join(@src, '**', "*#{@options[:ext]}") files = Dir.glob(pattern).select { |fn| File.file? fn } files.reject { |f| ignore_file?(f) } elsif File.extname(@src) == @options[:ext] From 731dc20e6b087f847d6c9734139646e26d3d909b Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Wed, 28 Jan 2015 14:53:55 -0800 Subject: [PATCH 39/40] Rename `get_checks` to just `checks` --- lib/html/proofer.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 1cb4107f..5d5332be 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -68,7 +68,7 @@ def validate_urls(external_urls) end def run - logger.log :info, :blue, "Running #{get_checks} checks on #{@src} on *#{@options[:ext]}... \n\n" + logger.log :info, :blue, "Running #{checks} checks on #{@src} on *#{@options[:ext]}... \n\n" if @src.is_a?(Array) && !@options[:disable_external] check_list_of_links @@ -111,7 +111,7 @@ def check_files_for_internal_woes html = create_nokogiri(path) result = { :external_urls => {}, :failed_tests => [] } - get_checks.each do |klass| + checks.each do |klass| logger.log :debug, :yellow, "Checking #{klass.to_s.downcase} on #{path} ..." check = Object.const_get(klass).new(@src, path, html, @options) check.run @@ -146,7 +146,7 @@ def ignore_file?(file) false end - def get_checks + def checks checks = HTML::Proofer::Runner.checks.map(&:name) checks.delete('FaviconRunner') unless @options[:validate_favicon] checks.delete('HtmlRunner') unless @options[:validate_html] From ff40de13ab048fdf2a2eec3b2cc051f8e6e36469 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Wed, 28 Jan 2015 14:59:25 -0800 Subject: [PATCH 40/40] More aesthetics --- lib/html/proofer.rb | 10 +++++----- lib/html/proofer/checkable.rb | 2 +- lib/html/proofer/checks/scripts.rb | 1 - lib/html/proofer/runner/issue.rb | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index 5d5332be..40b346da 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -62,11 +62,6 @@ def logger @logger ||= HTML::Proofer::Log.new(@options[:verbose]) end - def validate_urls(external_urls) - url_validator = HTML::Proofer::UrlValidator.new(logger, external_urls, @options, @typhoeus_opts, @hydra_opts) - @failed_tests.concat(url_validator.run) - end - def run logger.log :info, :blue, "Running #{checks} checks on #{@src} on *#{@options[:ext]}... \n\n" @@ -122,6 +117,11 @@ def check_files_for_internal_woes end end + def validate_urls(external_urls) + url_validator = HTML::Proofer::UrlValidator.new(logger, external_urls, @options, @typhoeus_opts, @hydra_opts) + @failed_tests.concat(url_validator.run) + end + def files if File.directory? @src pattern = File.join(@src, '**', "*#{@options[:ext]}") diff --git a/lib/html/proofer/checkable.rb b/lib/html/proofer/checkable.rb index 8da55a2f..499661e3 100644 --- a/lib/html/proofer/checkable.rb +++ b/lib/html/proofer/checkable.rb @@ -7,7 +7,7 @@ class Checkable def initialize(obj, check) obj.attributes.each_pair do |attribute, value| next if attribute == 'data-proofer-ignore' # TODO: not quite sure why this doesn't work - self.instance_variable_set("@#{attribute}".to_sym, value.value) + instance_variable_set("@#{attribute}".to_sym, value.value) end @data_ignore_proofer = obj['data-proofer-ignore'] diff --git a/lib/html/proofer/checks/scripts.rb b/lib/html/proofer/checks/scripts.rb index 25096024..75d266e3 100644 --- a/lib/html/proofer/checks/scripts.rb +++ b/lib/html/proofer/checks/scripts.rb @@ -32,7 +32,6 @@ def run else add_issue("internal script #{script.src} does not exist") unless script.exists? end - end external_urls diff --git a/lib/html/proofer/runner/issue.rb b/lib/html/proofer/runner/issue.rb index 4aa86fe0..dae27232 100644 --- a/lib/html/proofer/runner/issue.rb +++ b/lib/html/proofer/runner/issue.rb @@ -39,7 +39,7 @@ def sort_and_report end def sort(first_sort, second_sort) - issues.sort_by { |t| [ t.send(first_sort), t.send(second_sort) ] } + issues.sort_by { |t| [t.send(first_sort), t.send(second_sort)] } end def report(sorted_issues, first_report, second_report)