Skip to content

Commit

Permalink
Update error checking (#160)
Browse files Browse the repository at this point in the history
In the master branch, instances which return an error message and nothing are erronously seen as a valid. This is fixed.

For "Search response time":
* Some instances return results only from the wiki* engines: this commit expect at least one result of a generic engine (not a wiki* engine).
* 5 requests are sent instead of 3

The sort algorithm was not pushing back the instances with errors (no result). This is fixed.
  • Loading branch information
dalf authored Sep 23, 2023
1 parent 061d06b commit 7b94622
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 29 deletions.
26 changes: 19 additions & 7 deletions html/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,18 @@ const COMMON_ERROR_MESSAGE = {
'Tor Error: ': 'Tor Error'
};

const SORT_CRITERIAS = ['http.status_code', 'error', 'timing.search.error', 'version', 'tls.grade',
'http.grade', 'html.grade', 'timing.search.all', 'url'];
const SORT_CRITERIAS = [
'http.status_code',
'error',
'timing.search.error',
'timing.search_go.error',
'version',
'tls.grade',
'http.grade',
'html.grade',
'timing.search.all',
'url'
];

const HTML_GRADE_MAPPING = {
'V': 3,
Expand Down Expand Up @@ -212,25 +222,27 @@ function getTime(timing) {
}

function isError(timing) {
if (timing.success_percentage > 0) {
return false;
if (timing.success_percentage < 100) {
return 100 - timing.success_percentage;
}
return (timing.error !== undefined);
if (timing.error !== undefined) {
return 100;
}
return 0;
}

const CompareFunctionCriterias = {
'http.status_code': (a, b) => -compareTool(a, b, null, 'http', 'status_code'),
'error': (a, b) => -compareTool(a, b, null, 'error'),
'error_wp': (a, b) => compareTool(a, b, null, 'timing', 'search_wp', 'error'),
'network.asn_privacy': (a, b) => compareTool(a, b, null, 'network', 'asn_privacy'),
'version': (a, b) => compareVersion(a.version, b.version),
'tls.grade': (a, b) => compareTool(a, b, normalizeGrade, 'tls', 'grade'),
'html.grade': (a, b) => compareTool(a, b, normalizeHtmlGrade, 'html', 'grade'),
'http.grade': (a, b) => compareTool(a, b, normalizeGrade, 'http', 'grade'),
'timing.initial.all': (a, b) => -compareTool(a, b, getTime, 'timing', 'initial', 'all'),
'timing.search.error': (a, b) => -compareTool(a, b, isError, 'timing', 'search'),
'timing.search_go.error': (a, b) => -compareTool(a, b, isError, 'timing', 'search_go'),
'timing.search.all': (a, b) => -compareTool(a, b, getTime, 'timing', 'search', 'all'),
'timing.search_wp.server': (a, b) => -compareTool(a, b, getTime, 'timing', 'search_wp', 'server'),
'timing.search_wp.all': (a, b) => -compareTool(a, b, getTime, 'timing', 'search_wp', 'all'),
'url': (a, b) => -compareTool(a, b, null, 'url'),
};
Expand Down
67 changes: 45 additions & 22 deletions searxstats/fetcher/timing.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,44 @@ class CheckResult:
# There are results, error on the side (above infoboxes)
alter_danger_side: etree.XPath

@staticmethod
def is_wikiengine(engine_name):
return engine_name.startswith('wiki')

async def _check_html_result_page(self, engine_name, response):
result_count = 0
document = await html_fromstring(response.text)
result_element_list = self.results(document)
if len(result_element_list) == 0:
for result_element, engine_names in self._iter_meaningful_results(document):
engine_names = [
extract_text(engine_element)
for engine_element in self.engines(result_element)
]
result_count += 1
if engine_name in engine_names:
continue
return False, f'A result is not from the {engine_name}'
if result_count == 0:
return False, 'No result'
for result_element in result_element_list:
for engine_element in self.engines(result_element):
if extract_text(engine_element).find(engine_name) >= 0:
continue
return False, f'A result is not from the {engine_name}'
return True, None

def _get_meaningful_result_count(self, document):
return len(list(self._iter_meaningful_results(document)))

def _iter_meaningful_results(self, document):
"""Iterator over meaningful results
= results from generic engines, not from wiki* engines
"""
result_element_list = self.results(document)
for result_element in result_element_list:
engine_names = [
extract_text(engine_element)
for engine_element in self.engines(result_element)
]
only_wiki_engine = all(map(self.is_wikiengine, engine_names))
if only_wiki_engine:
continue
yield result_element, engine_names

async def check_google_result(self, response):
return await self._check_html_result_page('google', response)

Expand All @@ -45,35 +71,32 @@ async def check_wikipedia_result(self, response):

async def check_search_result(self, response):
document = await html_fromstring(response.text)
message = None
result_element_list = self.results(document)
alert_danger_list = self.alert_danger_main(document)
if len(alert_danger_list) > 0:
return True, extract_text(alert_danger_list)
alert_danger_list = self.alter_danger_side(document)
if len(alert_danger_list) > 0:
return True, extract_text(alert_danger_list)
return False, extract_text(alert_danger_list)
alert_danger_side_list = self.alter_danger_side(document)
if len(alert_danger_side_list) > 0:
message = extract_text(alert_danger_side_list)
if len(result_element_list) == 0:
return False, 'No result'
if self._get_meaningful_result_count(document) == 0:
return False, 'Only result(s) from wiki* engines'
if len(result_element_list) == 1:
return False, 'Only one result'
if len(result_element_list) == 2:
return False, 'Only two results'
return True, None
return True, message


CheckResultByTheme = {
'simple': CheckResult(
results=etree.XPath("//div[@id='urls']//article"),
engines=etree.XPath("//div[contains(@class, 'engines')]/span"),
engines=etree.XPath(".//div[contains(@class, 'engines')]/span"),
alert_danger_main=etree.XPath("//div[@id='urls']/div[contains(@class, 'dialog-error')]"),
alter_danger_side=etree.XPath("//div[@id='sidebar']/div[contains(@class, 'dialog-error')]"),
),
'oscar': CheckResult(
results=etree.XPath("//div[@id='main_results']/div[contains(@class,'result-default')]"),
engines=etree.XPath("//span[contains(@class, 'label label-default')]"),
alert_danger_main=etree.XPath("//div[contains(@class, 'alert-danger')]/p[2]"),
alter_danger_side=etree.XPath("//div[contains(@class, 'alert-danger')]/text()"),
),
}


Expand Down Expand Up @@ -157,16 +180,16 @@ async def fetch_one(instance_url: str, detail) -> dict:

# /search instead of / : https://github.com/searx/searx/pull/1681
search_url = urljoin(instance_url, 'search')
theme = 'simple' if detail['generator'] == 'searxng' else 'oscar'
theme = 'simple'
print(search_url, '(', theme, ')')
check_result = CheckResultByTheme[theme]
default_params = {'theme': theme}
default_params = {'theme': 'simple'}

# check the default engines
print('🔎 ' + instance_url)
await request_stat_with_log(search_url, timing, 'search',
client, instance_url,
3, 120, 160, check_result.check_search_result,
5, 120, 160, check_result.check_search_result,
params={'q': 'time', **default_params},
cookies=cookies, headers=DEFAULT_HEADERS)

Expand Down

0 comments on commit 7b94622

Please sign in to comment.