Skip to content

Commit

Permalink
Hotfix: extract only 'q' element from query string
Browse files Browse the repository at this point in the history
Occasionally the search results will contain links with arguments such
as 'dq', which was being erroneously used in attempts to extract the 'q'
element from query strings. This enforces that only links with '?q=' or
'&q=' (elements with a standalone 'q' arg) will have the element
extracted.

I also refactored the naming of this element once extracted to be just
'q'. Although this seems counterintuitive, it makes a little more sense
since this element is the one we're extracting. It's a vague url arg
name, but it is what it is.

Bump version to 0.5.2 for hotfix release
  • Loading branch information
benbusby committed May 29, 2021
1 parent e1e6e84 commit cbe32a0
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 11 deletions.
2 changes: 1 addition & 1 deletion app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
app.no_cookie_ips = []
app.config['SECRET_KEY'] = os.urandom(32)
app.config['SESSION_TYPE'] = 'filesystem'
app.config['VERSION_NUMBER'] = '0.5.1'
app.config['VERSION_NUMBER'] = '0.5.2'
app.config['APP_ROOT'] = os.getenv(
'APP_ROOT',
os.path.dirname(os.path.abspath(__file__)))
Expand Down
31 changes: 22 additions & 9 deletions app/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,21 @@ def strip_blocked_sites(query: str) -> str:
return query[:query.find('-site:')] if '-site:' in query else query


def extract_q(q_str: str, href: str) -> str:
"""Extracts the 'q' element from a result link. This is typically
either the link to a result's website, or a string.
Args:
q_str: The result link to parse
href: The full url to check for standalone 'q' elements first,
rather than parsing the whole query string and then checking.
Returns:
str: The 'q' element of the link, or an empty string
"""
return parse_qs(q_str)['q'][0] if ('&q=' in href or '?q=' in href) else ''


class Filter:
def __init__(self, user_key: str, mobile=False, config=None) -> None:
if config is None:
Expand Down Expand Up @@ -223,20 +238,18 @@ def update_link(self, link: Tag) -> None:
link['target'] = '_blank'

result_link = urlparse.urlparse(href)
query = parse_qs(
result_link.query
)['q'][0] if 'q=' in href else ''
q = extract_q(result_link.query, href)

if query.startswith('/'):
if q.startswith('/'):
# Internal google links (i.e. mail, maps, etc) should still
# be forwarded to Google
link['href'] = 'https://google.com' + query
link['href'] = 'https://google.com' + q
elif '/search?q=' in href:
# "li:1" implies the query should be interpreted verbatim,
# which is accomplished by wrapping the query in double quotes
if 'li:1' in href:
query = '"' + query + '"'
new_search = 'search?q=' + self.encrypt_path(query)
q = '"' + q + '"'
new_search = 'search?q=' + self.encrypt_path(q)

query_params = parse_qs(urlparse.urlparse(href).query)
for param in VALID_PARAMS:
Expand All @@ -247,15 +260,15 @@ def update_link(self, link: Tag) -> None:
link['href'] = new_search
elif 'url?q=' in href:
# Strip unneeded arguments
link['href'] = filter_link_args(query)
link['href'] = filter_link_args(q)

# Add no-js option
if self.nojs:
append_nojs(link)
else:
if href.startswith(MAPS_URL):
# Maps links don't work if a site filter is applied
link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(query)
link['href'] = MAPS_URL + "?q=" + strip_blocked_sites(q)
else:
link['href'] = href

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
author='Ben Busby',
author_email='benbusby@protonmail.com',
name='whoogle-search',
version='0.5.1',
version='0.5.2',
include_package_data=True,
install_requires=requirements,
description='Self-hosted, ad-free, privacy-respecting metasearch engine',
Expand Down

0 comments on commit cbe32a0

Please sign in to comment.