From 4a343e9fc6b197af5d8a19f48a4edfead4f7375c Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Tue, 4 Aug 2015 19:19:45 -0700 Subject: [PATCH 1/6] Add failing test for script tag, data-proofer-ignore, and check_html --- .../fixtures/scripts/script_proofer_ignore_with_html.html | 3 +++ spec/html/proofer/scripts_spec.rb | 6 ++++++ 2 files changed, 9 insertions(+) create mode 100644 spec/html/proofer/fixtures/scripts/script_proofer_ignore_with_html.html diff --git a/spec/html/proofer/fixtures/scripts/script_proofer_ignore_with_html.html b/spec/html/proofer/fixtures/scripts/script_proofer_ignore_with_html.html new file mode 100644 index 00000000..fa19dfbc --- /dev/null +++ b/spec/html/proofer/fixtures/scripts/script_proofer_ignore_with_html.html @@ -0,0 +1,3 @@ + diff --git a/spec/html/proofer/scripts_spec.rb b/spec/html/proofer/scripts_spec.rb index 7270848e..62a96771 100644 --- a/spec/html/proofer/scripts_spec.rb +++ b/spec/html/proofer/scripts_spec.rb @@ -44,4 +44,10 @@ expect(proofer.failed_tests).to eq [] end + it 'ignores scripts with proofer-ignore and html' do + opts = { check_html: true } + ignorableScript = "#{FIXTURES_DIR}/scripts/script_proofer_ignore_with_html.html" + proofer = run_proofer(ignorableScript, opts) + expect(proofer.failed_tests).to eq [] + end end From 2f99ee2438019cd0f7675bd3bb5c0eb252ad0782 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Tue, 4 Aug 2015 19:38:37 -0700 Subject: [PATCH 2/6] Ignore embedded scripts when asked --- lib/html/proofer/checks/html.rb | 12 ++++++++++-- .../ignore_script_embeds.html} | 0 spec/html/proofer/html_spec.rb | 7 +++++++ spec/html/proofer/scripts_spec.rb | 7 ------- 4 files changed, 17 insertions(+), 9 deletions(-) rename spec/html/proofer/fixtures/{scripts/script_proofer_ignore_with_html.html => html/ignore_script_embeds.html} (100%) diff --git a/lib/html/proofer/checks/html.rb b/lib/html/proofer/checks/html.rb index d7c98185..1c37f190 100644 --- a/lib/html/proofer/checks/html.rb +++ b/lib/html/proofer/checks/html.rb @@ -1,6 +1,9 @@ # encoding: utf-8 class HtmlCheck < ::HTML::Proofer::CheckRunner + class HtmlCheckable < ::HTML::Proofer::Checkable + + end # new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp) # and svg child tags (source: https://developer.mozilla.org/en-US/docs/Web/SVG/Element) @@ -30,11 +33,16 @@ class HtmlCheck < ::HTML::Proofer::CheckRunner def run @html.errors.each do |e| + message = e.message + # Nokogiri (or rather libxml2 underhood) only recognizes html4 tags, # so we need to skip errors caused by the new tags in html5 - next if HTML5_TAGS.include? e.to_s[/Tag ([\w-]+) invalid/o, 1] + next if HTML5_TAGS.include? message[/Tag ([\w-]+) invalid/o, 1] + + # tags embedded in scripts are used in templating languages: http://git.io/vOovv + next if @options[:ignore_script_embeds] && message =~ /Element script embeds close tag/ - add_issue(e.to_s) + add_issue(message) end end end diff --git a/spec/html/proofer/fixtures/scripts/script_proofer_ignore_with_html.html b/spec/html/proofer/fixtures/html/ignore_script_embeds.html similarity index 100% rename from spec/html/proofer/fixtures/scripts/script_proofer_ignore_with_html.html rename to spec/html/proofer/fixtures/html/ignore_script_embeds.html diff --git a/spec/html/proofer/html_spec.rb b/spec/html/proofer/html_spec.rb index 8800fec1..92aac542 100644 --- a/spec/html/proofer/html_spec.rb +++ b/spec/html/proofer/html_spec.rb @@ -54,4 +54,11 @@ proofer = run_proofer(html, { :check_html => true }) expect(proofer.failed_tests.to_s).to match(/Couldn't find end of Start Tag a/) end + + it 'ignores embeded scripts when asked' do + opts = { check_html: true, ignore_script_embeds: true } + ignorableScript = "#{FIXTURES_DIR}/html/ignore_script_embeds.html" + proofer = run_proofer(ignorableScript, opts) + expect(proofer.failed_tests).to eq [] + end end diff --git a/spec/html/proofer/scripts_spec.rb b/spec/html/proofer/scripts_spec.rb index 62a96771..5379bfcd 100644 --- a/spec/html/proofer/scripts_spec.rb +++ b/spec/html/proofer/scripts_spec.rb @@ -43,11 +43,4 @@ proofer = run_proofer(ignorableLinks, { :url_ignore => [/\/assets\/.*(js|css|png|svg)/] }) expect(proofer.failed_tests).to eq [] end - - it 'ignores scripts with proofer-ignore and html' do - opts = { check_html: true } - ignorableScript = "#{FIXTURES_DIR}/scripts/script_proofer_ignore_with_html.html" - proofer = run_proofer(ignorableScript, opts) - expect(proofer.failed_tests).to eq [] - end end From f6b771bffe675c140ee8cb6a7e9af0148299a3d3 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 8 Aug 2015 10:59:20 -0700 Subject: [PATCH 3/6] Remove cruft --- lib/html/proofer/checks/html.rb | 5 +---- spec/html/proofer/fixtures/html/ignore_script_embeds.html | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/html/proofer/checks/html.rb b/lib/html/proofer/checks/html.rb index 1c37f190..b25103d4 100644 --- a/lib/html/proofer/checks/html.rb +++ b/lib/html/proofer/checks/html.rb @@ -1,10 +1,6 @@ # encoding: utf-8 class HtmlCheck < ::HTML::Proofer::CheckRunner - class HtmlCheckable < ::HTML::Proofer::Checkable - - end - # new html5 tags (source: http://www.w3schools.com/html/html5_new_elements.asp) # and svg child tags (source: https://developer.mozilla.org/en-US/docs/Web/SVG/Element) HTML5_TAGS = %w(article aside bdi details dialog figcaption @@ -39,6 +35,7 @@ def run # so we need to skip errors caused by the new tags in html5 next if HTML5_TAGS.include? message[/Tag ([\w-]+) invalid/o, 1] + ap e.code # tags embedded in scripts are used in templating languages: http://git.io/vOovv next if @options[:ignore_script_embeds] && message =~ /Element script embeds close tag/ diff --git a/spec/html/proofer/fixtures/html/ignore_script_embeds.html b/spec/html/proofer/fixtures/html/ignore_script_embeds.html index fa19dfbc..46369a81 100644 --- a/spec/html/proofer/fixtures/html/ignore_script_embeds.html +++ b/spec/html/proofer/fixtures/html/ignore_script_embeds.html @@ -1,3 +1,3 @@ - From b0f76b732624efc37d9a20b9e6c5702b721f2a17 Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Sat, 8 Aug 2015 11:06:15 -0700 Subject: [PATCH 4/6] Move opts to :validation opts --- lib/html/proofer.rb | 7 +++++-- lib/html/proofer/check_runner.rb | 5 +++-- lib/html/proofer/checks/html.rb | 3 +-- spec/html/proofer/html_spec.rb | 2 +- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/html/proofer.rb b/lib/html/proofer.rb index e6059e65..a4ada594 100644 --- a/lib/html/proofer.rb +++ b/lib/html/proofer.rb @@ -21,7 +21,7 @@ module HTML class Proofer include Utils - attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts + attr_reader :options, :typhoeus_opts, :hydra_opts, :parallel_opts, :validation_opts TYPHOEUS_DEFAULTS = { :followlocation => true, @@ -62,6 +62,9 @@ def initialize(src, opts = {}) @parallel_opts = opts[:parallel] || {} opts.delete(:parallel) + @validation_opts = opts[:validation] || {} + opts.delete(:validation) + @options = @proofer_opts.merge(opts) @failed_tests = [] @@ -124,7 +127,7 @@ def check_files_for_internal_woes checks.each do |klass| logger.log :debug, :yellow, "Checking #{klass.to_s.downcase} on #{path} ..." - check = Object.const_get(klass).new(@src, path, html, @options, @typhoeus_opts, @hydra_opts, @parallel_opts) + check = Object.const_get(klass).new(@src, path, html, @options, @typhoeus_opts, @hydra_opts, @parallel_opts, @validation_opts) check.run result[:external_urls].merge!(check.external_urls) result[:failed_tests].concat(check.issues) if check.issues.length > 0 diff --git a/lib/html/proofer/check_runner.rb b/lib/html/proofer/check_runner.rb index 5c260bb1..9d794b05 100644 --- a/lib/html/proofer/check_runner.rb +++ b/lib/html/proofer/check_runner.rb @@ -6,9 +6,9 @@ class Proofer class CheckRunner attr_reader :issues, :src, :path, :options, :typhoeus_opts, :hydra_opts, :parallel_opts, \ - :external_urls, :href_ignores, :url_ignores, :alt_ignores, :empty_alt_ignore + :validation_opts, :external_urls, :href_ignores, :url_ignores, :alt_ignores, :empty_alt_ignore - def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts) + def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opts, validation_opts) @src = src @path = path @html = remove_ignored(html) @@ -16,6 +16,7 @@ def initialize(src, path, html, options, typhoeus_opts, hydra_opts, parallel_opt @typhoeus_opts = typhoeus_opts @hydra_opts = hydra_opts @parallel_opts = parallel_opts + @validation_opts = validation_opts @issues = [] @href_ignores = @options[:href_ignore] @url_ignores = @options[:url_ignore] diff --git a/lib/html/proofer/checks/html.rb b/lib/html/proofer/checks/html.rb index b25103d4..7eeae0eb 100644 --- a/lib/html/proofer/checks/html.rb +++ b/lib/html/proofer/checks/html.rb @@ -35,9 +35,8 @@ def run # so we need to skip errors caused by the new tags in html5 next if HTML5_TAGS.include? message[/Tag ([\w-]+) invalid/o, 1] - ap e.code # tags embedded in scripts are used in templating languages: http://git.io/vOovv - next if @options[:ignore_script_embeds] && message =~ /Element script embeds close tag/ + next if @validation_opts[:ignore_script_embeds] && message =~ /Element script embeds close tag/ add_issue(message) end diff --git a/spec/html/proofer/html_spec.rb b/spec/html/proofer/html_spec.rb index 92aac542..a0fb68ec 100644 --- a/spec/html/proofer/html_spec.rb +++ b/spec/html/proofer/html_spec.rb @@ -56,7 +56,7 @@ end it 'ignores embeded scripts when asked' do - opts = { check_html: true, ignore_script_embeds: true } + opts = { :check_html => true, :validation => { :ignore_script_embeds => true } } ignorableScript = "#{FIXTURES_DIR}/html/ignore_script_embeds.html" proofer = run_proofer(ignorableScript, opts) expect(proofer.failed_tests).to eq [] From 021fc6d2460d5ba7e17326f2163cb9ff4672febd Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Tue, 1 Sep 2015 10:37:38 +0200 Subject: [PATCH 5/6] Add docs --- README.md | 1 + bin/htmlproof | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index cb6dd6b9..fc1a1560 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,7 @@ The `HTML::Proofer` constructor takes an optional hash of additional options: | `file_ignore` | An array of Strings or RegExps containing file paths that are safe to ignore. | `[]` | | `href_ignore` | An array of Strings or RegExps containing `href`s that are safe to ignore. Note that non-HTTP(S) URIs are always ignored. | `[]` | | `href_swap` | A hash containing key-value pairs of `RegExp => String`. It transforms links that match `RegExp` into `String` via `gsub`. | `{}` | +| `ignore_script_embeds` | When `check_html` is enabled, `script` tags containing markup [are reported as errors](http://git.io/vOovv). Enabling this option ignores those errors. | `false` | `only_4xx` | Only reports errors for links that fall within the 4xx status code range. | `false` | | `url_ignore` | An array of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored. | `[]` | | `check_favicon` | Enables the favicon checker. | `false` | diff --git a/bin/htmlproof b/bin/htmlproof index 5ce5f15e..46be7a83 100755 --- a/bin/htmlproof +++ b/bin/htmlproof @@ -34,6 +34,7 @@ Mercenary.program(:htmlproof) do |p| p.option 'file_ignore', '--file-ignore file1,[file2,...]', Array, 'Comma-separated list of Strings or RegExps containing file paths that are safe to ignore' p.option 'href_ignore', '--href-ignore link1,[link2,...]', Array, 'Comma-separated list of Strings or RegExps containing `href`s that are safe to ignore.' p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'Comma-separated list of key-value pairs of `RegExp:String`. Transforms links matching `RegExp` into `String`' + p.option 'ignore_script_errors', '--ignore-script-errors', 'Ignore `check_html` errors associated with `script`s (default: `false`)' p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4x status code range.' p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).' p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).' From 8ae9cd753a1821170942fe9737e854fa74540a9c Mon Sep 17 00:00:00 2001 From: Garen Torikian Date: Tue, 1 Sep 2015 10:38:13 +0200 Subject: [PATCH 6/6] alphabetize --- README.md | 4 ++-- bin/htmlproof | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fc1a1560..b20cebed 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,8 @@ The `HTML::Proofer` constructor takes an optional hash of additional options: | `alt_ignore` | An array of Strings or RegExps containing `img`s whose missing `alt` tags are safe to ignore. | `[]` | | `empty_alt_ignore` | If `true`, ignores images with empty alt tags. | `false` | | `check_external_hash` | Checks whether external hashes exist (even if the website exists). This slows the checker down. | `false` | +| `check_favicon` | Enables the favicon checker. | `false` | +| `check_html` | Enables HTML validation errors from Nokogiri | `false` | |`checks_to_ignore`| An array of Strings indicating which checks you'd like to not perform. | `[]` | `directory_index_file` | Sets the file to look for when a link refers to a directory. | `index.html` | | `disable_external` | If `true`, does not run the external link checker, which can take a lot of time. | `false` | @@ -151,8 +153,6 @@ The `HTML::Proofer` constructor takes an optional hash of additional options: | `ignore_script_embeds` | When `check_html` is enabled, `script` tags containing markup [are reported as errors](http://git.io/vOovv). Enabling this option ignores those errors. | `false` | `only_4xx` | Only reports errors for links that fall within the 4xx status code range. | `false` | | `url_ignore` | An array of Strings or RegExps containing URLs that are safe to ignore. It affects all HTML attributes. Note that non-HTTP(S) URIs are always ignored. | `[]` | -| `check_favicon` | Enables the favicon checker. | `false` | -| `check_html` | Enables HTML validation errors from Nokogiri | `false` | | `verbose` | If `true`, outputs extra information as the checking happens. Useful for debugging. | `false` | ### Configuring Typhoeus and Hydra diff --git a/bin/htmlproof b/bin/htmlproof index 46be7a83..f1b16cb7 100755 --- a/bin/htmlproof +++ b/bin/htmlproof @@ -27,6 +27,8 @@ Mercenary.program(:htmlproof) do |p| p.option 'empty_alt_ignore', '--empty-alt-ignore', 'Ignores images with empty alt tags.' p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.' p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the website exists). This slows the checker down (default: `false`).' + p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).' + p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).' p.option 'directory_index_file', '--directory-index-file', String, 'Sets the file to look for when a link refers to a directory. (default: `index.html`)' p.option 'disable_external', '--disable-external', 'Disables the external link checker (default: `false`)' p.option 'error_sort', '--error-sort SORT', 'Defines the sort order for error output. Can be `path`, `desc`, or `status` (default: `path`).' @@ -36,8 +38,6 @@ Mercenary.program(:htmlproof) do |p| p.option 'href_swap', '--href-swap re:string,[re:string,...]', Array, 'Comma-separated list of key-value pairs of `RegExp:String`. Transforms links matching `RegExp` into `String`' p.option 'ignore_script_errors', '--ignore-script-errors', 'Ignore `check_html` errors associated with `script`s (default: `false`)' p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4x status code range.' - p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).' - p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).' p.option 'url_ignore', '--url-ignore link1,[link2,...]', Array, 'Comma-separated list of Strings or RegExps containing URLs that are safe to ignore.' p.option 'verbose', '--verbose', 'Enables more verbose logging.'