Skip to content

Commit

Permalink
Merge pull request #251 from Webperf-se/issue-247
Browse files Browse the repository at this point in the history
Issue 247
  • Loading branch information
7h3Rabbit authored Oct 18, 2023
2 parents 6221018 + 8e958dc commit 30c5341
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 41 deletions.
67 changes: 64 additions & 3 deletions .github/workflows/verify_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
import os
import os.path
import ssl
import sys
import getopt
import json
Expand All @@ -11,6 +12,8 @@
import getopt
import gettext

import requests


def prepare_config_file(sample_filename, filename, arguments):
print('A', arguments)
Expand Down Expand Up @@ -319,7 +322,7 @@ def validate_locales(dir, msg_ids):
current_number_of_valid_translations += 1
elif file.endswith('.po'):
# po file had errors, try generate new mo file and try again.
msgfmt_path = find_msgfmt_py()
msgfmt_path = ensure_msgfmt_py()
if msgfmt_path != None:
print(
' - Trying to generate .mo file so it matches .po file')
Expand Down Expand Up @@ -357,8 +360,56 @@ def validate_locales(dir, msg_ids):
print(' No languages found')
return is_valid

def httpRequestGetContent(url, allow_redirects=False, use_text_instead_of_content=True):
"""Trying to fetch the response content
Attributes: url, as for the URL to fetch
"""

try:
headers = {'user-agent': 'Mozilla/5.0 (compatible; Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Edg/88.0.705.56'}
a = requests.get(url, allow_redirects=allow_redirects,
headers=headers, timeout=120)

if use_text_instead_of_content:
content = a.text
else:
content = a.content
return content
except ssl.CertificateError as error:
print('Info: Certificate error. {0}'.format(error.reason))
pass
except requests.exceptions.SSLError as error:
if 'http://' in url: # trying the same URL over SSL/TLS
print('Info: Trying SSL before giving up.')
return httpRequestGetContent(url.replace('http://', 'https://'))
else:
print('Info: SSLError. {0}'.format(error))
return ''
pass
except requests.exceptions.ConnectionError as error:
if 'http://' in url: # trying the same URL over SSL/TLS
print('Connection error! Info: Trying SSL before giving up.')
return httpRequestGetContent(url.replace('http://', 'https://'))
else:
print(
'Connection error! Unfortunately the request for URL "{0}" failed.\nMessage:\n{1}'.format(url, sys.exc_info()[0]))
return ''
pass
except:
print(
'Error! Unfortunately the request for URL "{0}" either timed out or failed for other reason(s). The timeout is set to {1} seconds.\nMessage:\n{2}'.format(url, 120, sys.exc_info()[0]))
pass
return ''

def set_file(file_path, content, use_text_instead_of_content):
if use_text_instead_of_content:
with open(file_path, 'w', encoding='utf-8', newline='') as file:
file.write(content)
else:
with open(file_path, 'wb') as file:
file.write(content)

def find_msgfmt_py():
def ensure_msgfmt_py():
import sys
for python_path in sys.path:
a = python_path
Expand All @@ -370,7 +421,17 @@ def find_msgfmt_py():
msgfmt_path = has_dir_msgfmt_py(a, 0)
if msgfmt_path != None:
return msgfmt_path

else:
dir = Path(os.path.dirname(
os.path.realpath(__file__)) + os.path.sep).parent.parent
data_dir = os.path.join(dir.resolve(), 'data') + os.sep
filename = 'msgfmt.py'
file_path = os.path.join(data_dir,filename)

if not os.path.exists(file_path):
content = httpRequestGetContent('https://raw.githubusercontent.com/python/cpython/main/Tools/i18n/msgfmt.py', True, True)
set_file(file_path, content, True)
return file_path
return None


Expand Down
Binary file modified locales/en/LC_MESSAGES/tracking_validator.mo
Binary file not shown.
26 changes: 15 additions & 11 deletions locales/en/LC_MESSAGES/tracking_validator.po
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ msgid "TEXT_RUNNING_TEST"
msgstr "## Test: 23 - Tracking and Privacy (Beta)\r\n"

msgid "TEXT_VISITOR_ANALYTICS_USED"
msgstr " - Visitor analytics used:\r\n"
msgstr " Visitor analytics used:\r\n"

msgid "TEXT_TRACKING"
msgstr "- Tracking"
msgstr "##### Tracking"

msgid "TEXT_TRACKING_FOUND_ALLOWED"
msgstr " - {0} - Tracking found, having {1} are allowed"
Expand All @@ -40,7 +40,7 @@ msgid "TEXT_TRACKING_REFERENCE"
msgstr "{0} - Has references to {1}"

msgid "TEXT_FINGERPRINTING"
msgstr "- Fingerprinting/identifying technology"
msgstr "##### Fingerprinting/identifying technology"

msgid "TEXT_FINGERPRINTING_FOUND"
msgstr " - {0} - Fingerprinting/identifying found.\n"
Expand All @@ -52,7 +52,7 @@ msgid "TEXT_FINGERPRINTING_TOTAL_FOUND"
msgstr " - A total of {0} tracking requests found.\n"

msgid "TEXT_ADS"
msgstr "- Ads"
msgstr "##### Ads"

msgid "TEXT_ADS_FOUND_ALLOWED"
msgstr " - {0} - Ad server request found, having {1} are allowed"
Expand Down Expand Up @@ -97,19 +97,19 @@ msgid "TEXT_COOKIE_NO_ANALYTICS_COOKIE"
msgstr " - Not using analytic cookies without consent"

msgid "TEXT_COOKIE"
msgstr "- Cookies"
msgstr "##### Cookies"

msgid "TEXT_GDPR_COUNTRIES"
msgstr " - Number of countries: {0}\r\n"
msgstr " Number of countries: {0}\r\n"

msgid "TEXT_GDPR_NONE_COMPLIANT_COUNTRIES"
msgstr " - Countries lacking adequate level of data protection: {0}\r\n"
msgstr " Countries lacking adequate level of data protection: {0}\r\n"

msgid "TEXT_GDPR_NONE_COMPLIANT_COUNTRIES_REQUESTS"
msgstr " - {0}, {1} request\r\n"
msgstr " {0}, {1} request:\r\n"

msgid "TEXT_GDPR_PAGE_IN_SWEDEN"
msgstr " - Page hosted in Sweden: {0}\r\n"
msgstr " Page hosted in Sweden: {0}\r\n"

msgid "TEXT_GDPR_True"
msgstr "Yes"
Expand All @@ -118,7 +118,11 @@ msgid "TEXT_GDPR_False"
msgstr "No"

msgid "TEXT_GDPR_HAS_POINTS"
msgstr "- GDPR and Schrems"
msgstr "##### GDPR and Schrems"

msgid "TEXT_GDPR_NO_POINTS"
msgstr "- GDPR and Schrems"
msgstr "##### GDPR and Schrems"

msgid "TEXT_GDPR_MAX_SHOWED"
msgstr " - More than {0} requests found, filtering out the rest\r\n"

Binary file modified locales/gov/LC_MESSAGES/tracking_validator.mo
Binary file not shown.
5 changes: 4 additions & 1 deletion locales/gov/LC_MESSAGES/tracking_validator.po
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ msgid "TEXT_REQUEST_UNKNOWN"
msgstr "#{0}: "

msgid "TEXT_VISITOR_ANALYTICS_USED"
msgstr " - Visitor analytics used:\r\n"
msgstr " Visitor analytics used:\r\n"

msgid "TEXT_TRACKING"
msgstr "GOV-IGNORE"
Expand Down Expand Up @@ -125,3 +125,6 @@ msgstr "- GDPR"

msgid "TEXT_GDPR_NO_POINTS"
msgstr "- GDPR"

msgid "TEXT_GDPR_MAX_SHOWED"
msgstr " - More than {0} requests found, filtering out the rest\r\n"
Binary file modified locales/sv/LC_MESSAGES/tracking_validator.mo
Binary file not shown.
25 changes: 14 additions & 11 deletions locales/sv/LC_MESSAGES/tracking_validator.po
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ msgid "TEXT_REQUEST_UNKNOWN"
msgstr "#{0}: "

msgid "TEXT_VISITOR_ANALYTICS_USED"
msgstr " - Besökaranalys används:\r\n"
msgstr " Besökaranalys används:\r\n"

msgid "TEXT_TRACKING"
msgstr "- Spårning"
msgstr "##### Spårning"

msgid "TEXT_TRACKING_FOUND_ALLOWED"
msgstr " - {0} - Spårning hittad, tillåtet att ha {1} st"
Expand All @@ -43,7 +43,7 @@ msgid "TEXT_TRACKING_REFERENCE"
msgstr "{0} - Har referens till {1}"

msgid "TEXT_FINGERPRINTING"
msgstr "- Identifierings­tekniker"
msgstr "##### Identifierings­tekniker"

msgid "TEXT_FINGERPRINTING_FOUND"
msgstr " - {0} - Identifierings­tekniker hittade.\n"
Expand All @@ -55,7 +55,7 @@ msgid "TEXT_FINGERPRINTING_TOTAL_FOUND"
msgstr " - Hittade totalt {0} identifierings­förfrågningar.\n"

msgid "TEXT_ADS"
msgstr "- Annonsörer"
msgstr "##### Annonsörer"

msgid "TEXT_ADS_FOUND_ALLOWED"
msgstr " - {0} - Annonsörs­förfrågan hittad, tillåtet att ha {1} st"
Expand Down Expand Up @@ -100,19 +100,19 @@ msgid "TEXT_COOKIE_NO_ANALYTICS_COOKIE"
msgstr " - Använder ej analyskakor utan samtycke"

msgid "TEXT_COOKIE"
msgstr "- Kakor"
msgstr "##### Kakor"

msgid "TEXT_GDPR_COUNTRIES"
msgstr " - Antal olika länder: {0}\r\n"
msgstr " Antal olika länder: {0}\r\n"

msgid "TEXT_GDPR_NONE_COMPLIANT_COUNTRIES"
msgstr " - Länder utan adekvat nivå av dataskydd: {0}\r\n"
msgstr " Länder utan adekvat nivå av dataskydd: {0}\r\n"

msgid "TEXT_GDPR_NONE_COMPLIANT_COUNTRIES_REQUESTS"
msgstr " - {0}, {1} förfrågningar\r\n"
msgstr " {0}, {1} förfrågningar:\r\n"

msgid "TEXT_GDPR_PAGE_IN_SWEDEN"
msgstr " - Sidan skickades från Sverige: {0}\r\n"
msgstr " Sidan skickades från Sverige: {0}\r\n"

msgid "TEXT_GDPR_True"
msgstr "Ja"
Expand All @@ -121,7 +121,10 @@ msgid "TEXT_GDPR_False"
msgstr "Nej"

msgid "TEXT_GDPR_HAS_POINTS"
msgstr "- Dataskyddsförordningen och Schrems II-domen"
msgstr "##### Dataskyddsförordningen och Schrems II-domen"

msgid "TEXT_GDPR_NO_POINTS"
msgstr "- Dataskyddsförordningen och Schrems II-domen"
msgstr "##### Dataskyddsförordningen och Schrems II-domen"

msgid "TEXT_GDPR_MAX_SHOWED"
msgstr " - Fler än {0} förfrågningar hittade, döljer resten\r\n"
50 changes: 35 additions & 15 deletions tests/tracking_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,8 @@ def rate_gdpr_and_schrems(content, _local, _):
review = ''
countries = {}
countries_outside_eu_or_exception_list = {}
max_nof_requests_showed = 5
limit_message_index = max_nof_requests_showed + 1

json_content = ''
try:
Expand All @@ -428,6 +430,14 @@ def rate_gdpr_and_schrems(content, _local, _):

entries_index = 0
while entries_index < number_of_entries:
request_friendly_name = None
if 'request' in entries[entries_index]:
request = entries[entries_index]['request']
if 'url' in request:
url = request['url']
request_friendly_name = get_friendly_url_name(_,
url, entries_index + 1)

entry_country_code = ''

entry_ip_address = entries[entries_index]['serverIPAddress']
Expand All @@ -436,12 +446,14 @@ def rate_gdpr_and_schrems(content, _local, _):

if entry_country_code == '' or entry_country_code == '-':
entry_country_code = 'unknown'
if entry_country_code in countries:
countries[entry_country_code] = countries[entry_country_code] + 1
else:
countries[entry_country_code] = 1
if not is_country_code_in_eu_or_on_exception_list(entry_country_code):
countries_outside_eu_or_exception_list[entry_country_code] = 1
if entry_country_code not in countries:
countries[entry_country_code] = list()
countries[entry_country_code].append(request_friendly_name)

if not is_country_code_in_eu_or_on_exception_list(entry_country_code):
if entry_country_code not in countries_outside_eu_or_exception_list:
countries_outside_eu_or_exception_list[entry_country_code] = list()
countries_outside_eu_or_exception_list[entry_country_code].append(request_friendly_name)

entries_index += 1

Expand All @@ -454,6 +466,11 @@ def rate_gdpr_and_schrems(content, _local, _):
# review += ' - {0} (number of requests: {1})\r\n'.format(country_code,
# countries[country_code])

page_is_hosted_in_sweden = page_countrycode == 'SE'
# '-- Page hosted in Sweden: {0}\r\n'
review += _local('TEXT_GDPR_PAGE_IN_SWEDEN').format(
_local('TEXT_GDPR_{0}'.format(page_is_hosted_in_sweden)))

number_of_countries_outside_eu = len(
countries_outside_eu_or_exception_list)
if number_of_countries_outside_eu > 0:
Expand All @@ -463,15 +480,18 @@ def rate_gdpr_and_schrems(content, _local, _):
number_of_countries_outside_eu)
for country_code in countries_outside_eu_or_exception_list:
review += _local('TEXT_GDPR_NONE_COMPLIANT_COUNTRIES_REQUESTS').format(country_code,
countries[country_code])
len(countries[country_code]))

request_index = 1
for req_url in countries[country_code]:
if request_index <= max_nof_requests_showed:
review += ' - {0}\r\n'.format(req_url)
elif request_index == limit_message_index:
review += _local('TEXT_GDPR_MAX_SHOWED').format(max_nof_requests_showed)
request_index += 1

points = 1.0

page_is_hosted_in_sweden = page_countrycode == 'SE'
# '-- Page hosted in Sweden: {0}\r\n'
review += _local('TEXT_GDPR_PAGE_IN_SWEDEN').format(
_local('TEXT_GDPR_{0}'.format(page_is_hosted_in_sweden)))

if points > 0.0:
rating.set_integrity_and_security(points, _local('TEXT_GDPR_HAS_POINTS').format(
0.0, ''))
Expand Down Expand Up @@ -531,7 +551,7 @@ def rate_tracking(website_urls, _local, _):

resource_analytics_used = dict()
resource_analytics_used.update(
get_analytics(_local, website_url, website_url_content, request_index, analytics_rules))
get_analytics(_, _local, website_url, website_url_content, request_index, analytics_rules))

if len(resource_analytics_used):
if not url_is_tracker:
Expand Down Expand Up @@ -575,7 +595,7 @@ def rate_tracking(website_urls, _local, _):
review_analytics += _local('TEXT_VISITOR_ANALYTICS_USED')
analytics_used_items = analytics_used.items()
for analytics_name, analytics_should_count in analytics_used_items:
review_analytics += ' - {0}\r\n'.format(analytics_name)
review_analytics += ' - {0}\r\n'.format(analytics_name)

integrity_and_security_review = rating.integrity_and_security_review

Expand Down Expand Up @@ -821,7 +841,7 @@ def run_test(_, langCode, url):
return (rating, result_dict)


def get_analytics(_local, url, content, request_index, analytics_rules):
def get_analytics(_, _local, url, content, request_index, analytics_rules):
analytics = {}

request_friendly_name = get_friendly_url_name(_,
Expand Down

0 comments on commit 30c5341

Please sign in to comment.