Skip to content

Commit

Permalink
Merge pull request #605 from Webperf-se/issue-603
Browse files Browse the repository at this point in the history
Make the HTML test look for deprecated elements
  • Loading branch information
7h3Rabbit authored Aug 22, 2024
2 parents 8e35b4c + 101a420 commit 2e899fe
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 8 deletions.
Binary file modified locales/en/LC_MESSAGES/html_validator_w3c.mo
Binary file not shown.
5 changes: 4 additions & 1 deletion locales/en/LC_MESSAGES/html_validator_w3c.po
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@ msgid "TEXT_REVIEW_RATING_GROUPED"
msgstr "- Number of grouped error type: {0}"

msgid "TEXT_REVIEW_RATING_ITEMS"
msgstr "- Number of errors: {0}"
msgstr "- Number of errors: {0}"

msgid "TEXT_REVIEW_DEPRECATED_ELEMENT"
msgstr "The use of “{0}” element is deprecated."
Binary file modified locales/gov/LC_MESSAGES/html_validator_w3c.mo
Binary file not shown.
5 changes: 4 additions & 1 deletion locales/gov/LC_MESSAGES/html_validator_w3c.po
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@ msgid "TEXT_REVIEW_RATING_GROUPED"
msgstr "- Number of grouped error type: {0}"

msgid "TEXT_REVIEW_RATING_ITEMS"
msgstr "- Number of errors: {0}"
msgstr "- Number of errors: {0}"

msgid "TEXT_REVIEW_DEPRECATED_ELEMENT"
msgstr "The use of “{0}” element is deprecated."
Binary file modified locales/sv/LC_MESSAGES/html_validator_w3c.mo
Binary file not shown.
5 changes: 4 additions & 1 deletion locales/sv/LC_MESSAGES/html_validator_w3c.po
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,7 @@ msgid "TEXT_REVIEW_RATING_GROUPED"
msgstr " - Antal (grupperade fel): {0}"

msgid "TEXT_REVIEW_RATING_ITEMS"
msgstr "- Antal fel: {0}"
msgstr "- Antal fel: {0}"

msgid "TEXT_REVIEW_DEPRECATED_ELEMENT"
msgstr "Användningen av elementet “{0}” är föråldrad."
74 changes: 69 additions & 5 deletions tests/html_validator_w3c.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# -*- coding: utf-8 -*-
from datetime import datetime
import re

from bs4 import BeautifulSoup
from models import Rating
from tests.utils import get_friendly_url_name,\
from tests.utils import get_friendly_url_name, get_http_content,\
get_translation,\
set_cache_file
from tests.w3c_base import calculate_rating, get_data_for_url,\
Expand All @@ -11,7 +13,6 @@
from helpers.setting_helper import get_config

# DEFAULTS
HTML_REVIEW_GROUP_ERRORS = True
HTML_START_STRINGS = [
'Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”',
'Element “head” is missing a required instance of child element “title”.'
Expand Down Expand Up @@ -97,7 +98,7 @@ def handle_html_markup_entry(entry, global_translation, local_translation, resul
req_url = entry['url']
name = get_friendly_url_name(global_translation, req_url, entry['index'])
html = entry['content']
errors = get_errors_for_html(req_url, html)
errors = get_errors_for_html(req_url, html, local_translation)
result_dict['errors']['all'].extend(errors)
result_dict['errors']['html_files'].extend(errors)
is_first_entry = entry['index'] <= 1
Expand Down Expand Up @@ -153,7 +154,7 @@ def create_review_and_rating(

tmp = re.sub(
r"(“[^”]+”)", "X", error_message, 0, re.MULTILINE)
if HTML_REVIEW_GROUP_ERRORS:
if not get_config('general.review.details'):
error_message = tmp

if msg_grouped_dict.get(error_message, False):
Expand Down Expand Up @@ -197,7 +198,61 @@ def is_start_html_error(error_message):
return True
return False

def get_errors_for_html(url, html):
def get_mdn_web_docs_deprecated_elements():
"""
Returns a list of strings, of deprecated html elements.
"""
elements = []

html = get_http_content(
('https://developer.mozilla.org/'
'en-US/docs/Web/HTML/Element'
'#obsolete_and_deprecated_elements'))

soup = BeautifulSoup(html, 'lxml')

header = soup.find('h2', id = 'obsolete_and_deprecated_elements')
if header is None:
return []

section = header.parent
if section is None:
return []

tbody = section.find('tbody')
if tbody is None:
return []

table_rows = tbody.find_all('tr')
if table_rows is None:
return []

for table_row in table_rows:
if table_row is None:
continue

first_td = table_row.find('td')
if first_td is None:
continue

code = first_td.find('code')
if code is None:
continue

regex = r'(\&lt;|<)(?P<name>[^<>]+)(\&gt;|>)'
matches = re.search(regex, code.string)
if matches:
property_name = '<' + matches.group('name')
elements.append(property_name)

return sorted(list(set(elements)))


# TODO: change this to just in time, right now it is called every time webperf_core is being called.
html_deprecated_elements = get_mdn_web_docs_deprecated_elements()


def get_errors_for_html(url, html, local_translation):
"""
Caches the HTML content of a URL and retrieves the errors associated with it.
Expand All @@ -212,4 +267,13 @@ def get_errors_for_html(url, html):
results = get_errors_for_url(
'html',
url)

for element in html_deprecated_elements:
if element not in html:
continue
results.append({
'type': 'error',
'message': local_translation('TEXT_REVIEW_DEPRECATED_ELEMENT').format(element.replace('<', ''))
})

return results

0 comments on commit 2e899fe

Please sign in to comment.