Skip to content

Commit

Permalink
Merge pull request #5950 from dojutsu-user/fix--invalid-highlight-search
Browse files Browse the repository at this point in the history
Hotfix: Return empty dict when no highlight dict is present
  • Loading branch information
ericholscher authored Jul 17, 2019
2 parents fda7fc3 + c0a5fd9 commit 2888d2e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 61 deletions.
30 changes: 7 additions & 23 deletions readthedocs/search/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import itertools
import logging
from operator import attrgetter
from pprint import pformat

from rest_framework import generics, serializers
from rest_framework.exceptions import ValidationError
Expand Down Expand Up @@ -38,8 +36,8 @@ def get_link(self, obj):
def get_highlight(self, obj):
highlight = getattr(obj.meta, 'highlight', None)
if highlight:
ret = utils._remove_newlines_from_dict(highlight.to_dict())
log.debug('API Search highlight [Page title]: %s', pformat(ret))
ret = highlight.to_dict()
log.debug('API Search highlight [Page title]: %s', ret)
return ret

def get_inner_hits(self, obj):
Expand All @@ -49,27 +47,13 @@ def get_inner_hits(self, obj):
domains = inner_hits.domains or []
all_results = itertools.chain(sections, domains)

sorted_results = [
{
'type': hit._nested.field,
'_source': hit._source.to_dict(),
'highlight': self._get_inner_hits_highlights(hit),
}
for hit in sorted(all_results, key=attrgetter('_score'), reverse=True)
]

return sorted_results

def _get_inner_hits_highlights(self, hit):
"""Removes new lines from highlight and log it."""
if hasattr(hit, 'highlight'):
highlight_dict = utils._remove_newlines_from_dict(
hit.highlight.to_dict()
sorted_results = utils._get_sorted_results(
results=all_results,
source_key='_source',
)

log.debug('API Search highlight: %s', pformat(highlight_dict))
return highlight_dict
return {}
log.debug('[API] Sorted Results: %s', sorted_results)
return sorted_results


class PageSearchAPIView(generics.ListAPIView):
Expand Down
32 changes: 12 additions & 20 deletions readthedocs/search/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Utilities related to reading and generating indexable search content."""

import logging
from operator import attrgetter

from django.shortcuts import get_object_or_404
from django_elasticsearch_dsl.apps import DEDConfig
Expand Down Expand Up @@ -153,24 +154,15 @@ def _indexing_helper(html_objs_qs, wipe=False):
delete_objects_in_es.delay(**kwargs)


def _remove_newlines_from_dict(highlight):
"""
Recursively change results to turn newlines into periods.
def _get_sorted_results(results, source_key='_source'):
"""Sort results according to their score and returns results as list."""
sorted_results = [
{
'type': hit._nested.field,
source_key: hit._source.to_dict(),
'highlight': hit.highlight.to_dict() if hasattr(hit, 'highlight') else {}
}
for hit in sorted(results, key=attrgetter('_score'), reverse=True)
]

See: https://github.com/rtfd/readthedocs.org/issues/5168
:param highlight: highlight dict whose contents are to be edited.
:type highlight: dict
:returns: dict with all the newlines changed to periods.
:rtype: dict
"""
for k, v in highlight.items():
if isinstance(v, dict):
highlight[k] = _remove_newlines_from_dict(v)
else:
# elastic returns the contents of the
# highlighted field in a list.
if isinstance(v, list):
v_new_list = [res.replace('\n', '. ') for res in v]
highlight[k] = v_new_list

return highlight
return sorted_results
24 changes: 6 additions & 18 deletions readthedocs/search/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import itertools
import logging
from operator import attrgetter
from pprint import pformat

from django.shortcuts import get_object_or_404, render

Expand Down Expand Up @@ -117,28 +116,17 @@ def elastic_search(request, project_slug=None):
domains = inner_hits.domains or []
all_results = itertools.chain(sections, domains)

sorted_results = [
{
'type': hit._nested.field,

# here _source term is not used because
# django gives error if the names of the
# variables start with underscore
'source': hit._source.to_dict(),

'highlight': utils._remove_newlines_from_dict(
hit.highlight.to_dict()
),
}
for hit in sorted(all_results, key=attrgetter('_score'), reverse=True)
]
sorted_results = utils._get_sorted_results(
results=all_results,
source_key='source',
)

result.meta.inner_hits = sorted_results
except Exception:
log.exception('Error while sorting the results (inner_hits).')

log.debug('Search results: %s', pformat(results.to_dict()))
log.debug('Search facets: %s', pformat(results.facets.to_dict()))
log.debug('Search results: %s', results.to_dict())
log.debug('Search facets: %s', results.facets.to_dict())

template_vars = user_input._asdict()
template_vars.update({
Expand Down

0 comments on commit 2888d2e

Please sign in to comment.