From 4857a80c7e37802f0a4b3c0d802cb63d2508daa0 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 17 Jul 2019 17:05:13 +0530 Subject: [PATCH 1/3] return empty dict when no highlight --- readthedocs/search/api.py | 25 +++++-------------------- readthedocs/search/utils.py | 30 ++++++++++++++++++++++++++++++ readthedocs/search/views.py | 20 +++++--------------- 3 files changed, 40 insertions(+), 35 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 3ccd6d18190..ca8e199b7b6 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -1,6 +1,5 @@ import itertools import logging -from operator import attrgetter from pprint import pformat from rest_framework import generics, serializers @@ -48,27 +47,13 @@ def get_inner_hits(self, obj): sections = inner_hits.sections or [] domains = inner_hits.domains or [] all_results = itertools.chain(sections, domains) - - sorted_results = [ - { - 'type': hit._nested.field, - '_source': hit._source.to_dict(), - 'highlight': self._get_inner_hits_highlights(hit), - } - for hit in sorted(all_results, key=attrgetter('_score'), reverse=True) - ] - + sorted_results = list(utils._get_sorted_results( + results=all_results, + source_key='_source', + logging=True, + )) return sorted_results - def _get_inner_hits_highlights(self, hit): - """Removes new lines from highlight and log it.""" - highlight_dict = utils._remove_newlines_from_dict( - hit.highlight.to_dict() - ) - - log.debug('API Search highlight: %s', pformat(highlight_dict)) - return highlight_dict - class PageSearchAPIView(generics.ListAPIView): diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index cf1f0fb73aa..a06b649bee7 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -1,6 +1,8 @@ """Utilities related to reading and generating indexable search content.""" import logging +from pprint import pformat +from operator import attrgetter from django.shortcuts import get_object_or_404 from django_elasticsearch_dsl.apps import DEDConfig @@ -174,3 +176,31 @@ def _remove_newlines_from_dict(highlight): highlight[k] = v_new_list return highlight + + +def _get_inner_hits_highlights(hit, logging=False): + """Removes new lines from highlight dict""" + if hasattr(hit, 'highlight'): + highlight_dict = _remove_newlines_from_dict( + hit.highlight.to_dict() + ) + + if logging: + log.debug('API Search highlight: %s', pformat(highlight_dict)) + + return highlight_dict + return {} + + +def _get_sorted_results(results, source_key='_source', logging=False): + """Sort results according to their score and return a generator expression.""" + sorted_results = ( + { + 'type': hit._nested.field, + source_key: hit._source.to_dict(), + 'highlight': _get_inner_hits_highlights(hit, logging) + } + for hit in sorted(results, key=attrgetter('_score'), reverse=True) + ) + + return sorted_results diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index af5f97446ff..99f79ba77c0 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -117,21 +117,11 @@ def elastic_search(request, project_slug=None): domains = inner_hits.domains or [] all_results = itertools.chain(sections, domains) - sorted_results = [ - { - 'type': hit._nested.field, - - # here _source term is not used because - # django gives error if the names of the - # variables start with underscore - 'source': hit._source.to_dict(), - - 'highlight': utils._remove_newlines_from_dict( - hit.highlight.to_dict() - ), - } - for hit in sorted(all_results, key=attrgetter('_score'), reverse=True) - ] + sorted_results = list(utils._get_sorted_results( + results=all_results, + source_key='source', + logging=False, + )) result.meta.inner_hits = sorted_results except Exception: From a9acda280940ff47c8fe049fc6eb8fc737fb4436 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 17 Jul 2019 19:19:19 +0530 Subject: [PATCH 2/3] remove logic for removing new line --- readthedocs/search/api.py | 2 +- readthedocs/search/utils.py | 29 ++--------------------------- 2 files changed, 3 insertions(+), 28 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index ca8e199b7b6..8f68b8ea2fe 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -37,7 +37,7 @@ def get_link(self, obj): def get_highlight(self, obj): highlight = getattr(obj.meta, 'highlight', None) if highlight: - ret = utils._remove_newlines_from_dict(highlight.to_dict()) + ret = highlight.to_dict() log.debug('API Search highlight [Page title]: %s', pformat(ret)) return ret diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index a06b649bee7..bb69d4fce1c 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -155,35 +155,10 @@ def _indexing_helper(html_objs_qs, wipe=False): delete_objects_in_es.delay(**kwargs) -def _remove_newlines_from_dict(highlight): - """ - Recursively change results to turn newlines into periods. - - See: https://github.com/rtfd/readthedocs.org/issues/5168 - :param highlight: highlight dict whose contents are to be edited. - :type highlight: dict - :returns: dict with all the newlines changed to periods. - :rtype: dict - """ - for k, v in highlight.items(): - if isinstance(v, dict): - highlight[k] = _remove_newlines_from_dict(v) - else: - # elastic returns the contents of the - # highlighted field in a list. - if isinstance(v, list): - v_new_list = [res.replace('\n', '. ') for res in v] - highlight[k] = v_new_list - - return highlight - - def _get_inner_hits_highlights(hit, logging=False): - """Removes new lines from highlight dict""" + """Returns highlight dict and does conditional logging of the same.""" if hasattr(hit, 'highlight'): - highlight_dict = _remove_newlines_from_dict( - hit.highlight.to_dict() - ) + highlight_dict = hit.highlight.to_dict() if logging: log.debug('API Search highlight: %s', pformat(highlight_dict)) From a1a6666bb9a46a3ddb57acccf4d01016e3b5fd89 Mon Sep 17 00:00:00 2001 From: dojutsu-user Date: Wed, 17 Jul 2019 20:46:41 +0530 Subject: [PATCH 3/3] simplify logic --- readthedocs/search/api.py | 11 ++++++----- readthedocs/search/utils.py | 23 +++++------------------ readthedocs/search/views.py | 10 ++++------ 3 files changed, 15 insertions(+), 29 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 8f68b8ea2fe..02884cec00a 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -1,6 +1,5 @@ import itertools import logging -from pprint import pformat from rest_framework import generics, serializers from rest_framework.exceptions import ValidationError @@ -38,7 +37,7 @@ def get_highlight(self, obj): highlight = getattr(obj.meta, 'highlight', None) if highlight: ret = highlight.to_dict() - log.debug('API Search highlight [Page title]: %s', pformat(ret)) + log.debug('API Search highlight [Page title]: %s', ret) return ret def get_inner_hits(self, obj): @@ -47,11 +46,13 @@ def get_inner_hits(self, obj): sections = inner_hits.sections or [] domains = inner_hits.domains or [] all_results = itertools.chain(sections, domains) - sorted_results = list(utils._get_sorted_results( + + sorted_results = utils._get_sorted_results( results=all_results, source_key='_source', - logging=True, - )) + ) + + log.debug('[API] Sorted Results: %s', sorted_results) return sorted_results diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py index bb69d4fce1c..25e5f95c0bd 100644 --- a/readthedocs/search/utils.py +++ b/readthedocs/search/utils.py @@ -1,7 +1,6 @@ """Utilities related to reading and generating indexable search content.""" import logging -from pprint import pformat from operator import attrgetter from django.shortcuts import get_object_or_404 @@ -155,27 +154,15 @@ def _indexing_helper(html_objs_qs, wipe=False): delete_objects_in_es.delay(**kwargs) -def _get_inner_hits_highlights(hit, logging=False): - """Returns highlight dict and does conditional logging of the same.""" - if hasattr(hit, 'highlight'): - highlight_dict = hit.highlight.to_dict() - - if logging: - log.debug('API Search highlight: %s', pformat(highlight_dict)) - - return highlight_dict - return {} - - -def _get_sorted_results(results, source_key='_source', logging=False): - """Sort results according to their score and return a generator expression.""" - sorted_results = ( +def _get_sorted_results(results, source_key='_source'): + """Sort results according to their score and returns results as list.""" + sorted_results = [ { 'type': hit._nested.field, source_key: hit._source.to_dict(), - 'highlight': _get_inner_hits_highlights(hit, logging) + 'highlight': hit.highlight.to_dict() if hasattr(hit, 'highlight') else {} } for hit in sorted(results, key=attrgetter('_score'), reverse=True) - ) + ] return sorted_results diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 99f79ba77c0..94a63c43bee 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -3,7 +3,6 @@ import itertools import logging from operator import attrgetter -from pprint import pformat from django.shortcuts import get_object_or_404, render @@ -117,18 +116,17 @@ def elastic_search(request, project_slug=None): domains = inner_hits.domains or [] all_results = itertools.chain(sections, domains) - sorted_results = list(utils._get_sorted_results( + sorted_results = utils._get_sorted_results( results=all_results, source_key='source', - logging=False, - )) + ) result.meta.inner_hits = sorted_results except Exception: log.exception('Error while sorting the results (inner_hits).') - log.debug('Search results: %s', pformat(results.to_dict())) - log.debug('Search facets: %s', pformat(results.facets.to_dict())) + log.debug('Search results: %s', results.to_dict()) + log.debug('Search facets: %s', results.facets.to_dict()) template_vars = user_input._asdict() template_vars.update({