Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search analytics #6019

Merged
merged 52 commits into from
Aug 7, 2019
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
7477591
make model
dojutsu-user Jul 31, 2019
68c7f23
save searchqueries to databse
dojutsu-user Jul 31, 2019
c161dcd
update template
dojutsu-user Jul 31, 2019
c463226
add form
dojutsu-user Jul 31, 2019
8d65c70
update views
dojutsu-user Jul 31, 2019
43aa613
update meta options
dojutsu-user Jul 31, 2019
483d07b
add help_text
dojutsu-user Jul 31, 2019
d1e3f3c
add new line
dojutsu-user Jul 31, 2019
6406871
add h2
dojutsu-user Jul 31, 2019
55611bf
add cron job
dojutsu-user Jul 31, 2019
5d2a947
fix lint
dojutsu-user Jul 31, 2019
af37565
integrate chartsjs
dojutsu-user Jul 31, 2019
9cdef59
fix task
dojutsu-user Jul 31, 2019
dc12a22
change msgs
dojutsu-user Jul 31, 2019
5ad4752
enable only integers for chart
dojutsu-user Jul 31, 2019
cc8ac82
add doughnut part
dojutsu-user Jul 31, 2019
f4e68ee
add doughnut chart
dojutsu-user Jul 31, 2019
9ab6af5
record query in a celery task
dojutsu-user Aug 1, 2019
4e17e7b
generate doughnut chart data from the classmethod
dojutsu-user Aug 1, 2019
a26cd44
add logger
dojutsu-user Aug 1, 2019
a20a01b
update cronjob schedule
dojutsu-user Aug 1, 2019
f25c8e6
Update searchquery model to remove count and fix graph generating fun…
dojutsu-user Aug 1, 2019
ef8acfd
create a search obj every day
dojutsu-user Aug 1, 2019
8b9eafd
update admin.py
dojutsu-user Aug 1, 2019
52bce57
update search-analytics view func
dojutsu-user Aug 1, 2019
7a8edc1
template improvements
dojutsu-user Aug 1, 2019
c7c9336
fix lint
dojutsu-user Aug 1, 2019
3097be0
Update colors
dojutsu-user Aug 1, 2019
9e3df32
small improvements
dojutsu-user Aug 1, 2019
306c47c
update template
dojutsu-user Aug 1, 2019
75bfd13
fix typo
dojutsu-user Aug 1, 2019
199867c
fix typo
dojutsu-user Aug 1, 2019
3880f4b
add option for last 3 months
dojutsu-user Aug 1, 2019
53777ba
Merge branch 'search-analytics' of https://github.com/dojutsu-user/re…
dojutsu-user Aug 1, 2019
94310f9
Merge branch 'master' into search-analytics
dojutsu-user Aug 1, 2019
6b8ad1f
query optimization
dojutsu-user Aug 1, 2019
fbc14d5
improve in utils.py
dojutsu-user Aug 2, 2019
8670741
add test fixture
dojutsu-user Aug 2, 2019
53c9a67
add form tests
dojutsu-user Aug 2, 2019
b8e786b
add views test
dojutsu-user Aug 2, 2019
a358bf0
add test_search_tasks.py
dojutsu-user Aug 2, 2019
343bdc2
Merge branch 'master' into search-analytics
dojutsu-user Aug 2, 2019
0d83ba6
add feature flag
dojutsu-user Aug 3, 2019
3cce53f
fix test
dojutsu-user Aug 3, 2019
4c720ca
add download-data button
dojutsu-user Aug 3, 2019
3f06d8d
fix line
dojutsu-user Aug 3, 2019
372f1ac
add test for generated csv data
dojutsu-user Aug 4, 2019
324ff36
refactoring and simplify form
dojutsu-user Aug 7, 2019
13a6205
fix tests
dojutsu-user Aug 7, 2019
13b6880
remove form completely
dojutsu-user Aug 7, 2019
c8fea2c
fix tests
dojutsu-user Aug 7, 2019
862ab30
remove unnecessary import
dojutsu-user Aug 7, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions readthedocs/projects/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,3 +355,19 @@
)

GITHUB_PR_PULL_PATTERN = 'pull/{id}/head:external-{id}'

SEARCH_ANALYTICS_PARAMS = {
'period': (
('recent', 'Recent Queries'),
('last-24-hrs', 'Top Queries of Last 24 Today'),
('last-48-hrs', 'Top Queries of Yesterday'),
('last-7-days', 'Top Queries of Last 7 Days'),
('last-1-month', 'Top Queries of Last 1 Month'),
),
'size': (
(5, 5),
(10, 10),
(50, 50),
(100, 100),
)
}
28 changes: 28 additions & 0 deletions readthedocs/projects/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.integrations.models import Integration
from readthedocs.oauth.models import RemoteRepository
from readthedocs.projects.constants import SEARCH_ANALYTICS_PARAMS
from readthedocs.projects.exceptions import ProjectSpamError
from readthedocs.projects.models import (
Domain,
Expand Down Expand Up @@ -806,3 +807,30 @@ def clean_name(self):
_('Only letters, numbers and underscore are allowed'),
)
return name


class SearchAnalyticsForm(forms.Form):
version = forms.ChoiceField(required=False)
period = forms.ChoiceField(
choices=SEARCH_ANALYTICS_PARAMS['period'],
required=False,
help_text=_('Choose the time-period for the results.')
)
size = forms.ChoiceField(
choices=SEARCH_ANALYTICS_PARAMS['size'],
required=False
)

def __init__(self, *args, **kwargs):
self.project = kwargs.pop('project', None)
super().__init__(*args, **kwargs)

versions = (
self.project.versions(manager=INTERNAL)
.filter(project=self.project, active=True)
)
sorted_versions = sort_version_aware(versions)

self.fields['version'].choices = [
(version.slug, version.verbose_name) for version in sorted_versions
]
4 changes: 4 additions & 0 deletions readthedocs/projects/urls/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@
r'^(?P<project_slug>[-\w]+)/advertising/$',
ProjectAdvertisingUpdate.as_view(), name='projects_advertising',
),
url(
r'^(?P<project_slug>[-\w]+)/search-analytics/$',
private.search_analytics_view, name='projects_search_analytics',
),
]

domain_urls = [
Expand Down
84 changes: 84 additions & 0 deletions readthedocs/projects/views/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from django.middleware.csrf import get_token
from django.shortcuts import get_object_or_404, render
from django.urls import reverse
from django.utils import timezone
from django.utils.safestring import mark_safe
from django.utils.translation import ugettext_lazy as _
from django.views.generic import ListView, TemplateView, View
Expand All @@ -42,6 +43,7 @@
ProjectExtraForm,
ProjectRelationshipForm,
RedirectForm,
SearchAnalyticsForm,
TranslationForm,
UpdateProjectForm,
UserForm,
Expand All @@ -58,6 +60,7 @@
from readthedocs.projects.notifications import EmailConfirmNotification
from readthedocs.projects.views.base import ProjectAdminMixin, ProjectSpamMixin
from readthedocs.projects.views.mixins import ProjectImportMixin
from readthedocs.search.models import SearchQuery

from ..tasks import retry_domain_verification

Expand Down Expand Up @@ -899,3 +902,84 @@ class EnvironmentVariableDelete(EnvironmentVariableMixin, DeleteView):
# This removes the delete confirmation
def get(self, request, *args, **kwargs):
return self.http_method_not_allowed(request, *args, **kwargs)


@login_required
def search_analytics_view(request, project_slug):
project = get_object_or_404(
Project.objects.for_admin_user(request.user),
slug=project_slug,
)

version_slug = request.GET.get('version', project.default_version)
period = request.GET.get('period', 'recent')
size = request.GET.get('size', 5)

try:
size = int(size)
except ValueError:
size = 5

data = {
'version': version_slug,
'period': period,
'size': size,
}

form = SearchAnalyticsForm(data=data, project=project)

if version_slug:
version_qs = Version.objects.filter(project=project, slug=version_slug)

if version_qs.exists():
version = version_qs.first()
search_queries = SearchQuery.objects.filter(
project=project,
version=version,
)

now = timezone.now()

if period == 'recent':
queries = search_queries.order_by('-modified')

elif period == 'last-24-hrs':
last_24_hrs = now - timezone.timedelta(days=1)
queries = search_queries.filter(
modified__gte=last_24_hrs,
modified__lte=now,
).order_by('-count')

elif period == 'last-48-hrs':
last_48_hrs = now - timezone.timedelta(days=2)
queries = search_queries.filter(
modified__gte=last_48_hrs,
modified__lte=now,
).order_by('-count')

elif period == 'last-7-days':
last_7_days = now - timezone.timedelta(days=7)
queries = search_queries.filter(
modified__gte=last_7_days,
modified__lte=now,
).order_by('-count')

elif period == 'last-1-month':
last_30_days = now - timezone.timedelta(days=30)
queries = search_queries.filter(
modified__gte=last_30_days,
modified__lte=now,
).order_by('-count')

else:
queries = []
else:
queries = []

queries = queries[:size]

return render(
request,
'projects/projects_search_analytics.html',
{'form': form, 'project': project, 'queries': queries},
)
16 changes: 16 additions & 0 deletions readthedocs/search/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""SearchQuery Admin classes."""

from django.contrib import admin

from .models import SearchQuery


class SearchQueryAdmin(admin.ModelAdmin):
list_filter = ('created',)
list_display = ('__str__', 'count', 'created', 'modified')
search_fields = ('project__slug', 'version__slug', 'query')
readonly_fields = ('created', 'modified')
list_select_related = ('project', 'version', 'version__project')


admin.site.register(SearchQuery, SearchQueryAdmin)
26 changes: 26 additions & 0 deletions readthedocs/search/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,29 @@ def get_all_projects_url(self):
for project in all_projects:
projects_url[project.slug] = project.get_docs_url(version_slug=version_slug)
return projects_url

def list(self, request, *args, **kwargs):
"""Overriding ``list`` method to record query in database."""

response = super().list(request, *args, **kwargs)

project_slug = self.request.query_params.get('project', None)
version_slug = self.request.query_params.get('version', None)
query = self.request.query_params.get('q', '')
total_results = response.data.get('count', 0)

try:
utils.record_search_query(
project_slug,
version_slug,
query,
total_results,
)
except Exception:
log.exception('[%s] [%s] [Query: %s] Error recording search query in database.' % (
project_slug,
version_slug,
query,
))

return response
36 changes: 36 additions & 0 deletions readthedocs/search/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.22 on 2019-07-31 14:25
from __future__ import unicode_literals

from django.db import migrations, models
import django.db.models.deletion
import django_extensions.db.fields


class Migration(migrations.Migration):

initial = True

dependencies = [
('projects', '0044_auto_20190703_1300'),
('builds', '0009_added_external_version_type'),
]

operations = [
migrations.CreateModel(
name='SearchQuery',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('created', django_extensions.db.fields.CreationDateTimeField(auto_now_add=True, verbose_name='created')),
('modified', django_extensions.db.fields.ModificationDateTimeField(auto_now=True, verbose_name='modified')),
('query', models.CharField(max_length=4092, verbose_name='Query')),
('count', models.PositiveIntegerField(default=1, verbose_name='No. of times this query was searched')),
('project', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='search_queries', to='projects.Project')),
('version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='search_queries', to='builds.Version', verbose_name='Version')),
],
options={
'verbose_name': 'Search query',
'verbose_name_plural': 'Search queries',
},
),
]
Empty file.
43 changes: 43 additions & 0 deletions readthedocs/search/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Search Queries."""

from django.db import models
from django.utils.translation import ugettext_lazy as _

from django_extensions.db.models import TimeStampedModel

from readthedocs.builds.models import Version
from readthedocs.projects.models import Project
from readthedocs.projects.querysets import RelatedProjectQuerySet


class SearchQuery(TimeStampedModel):

"""Information about the search queries."""

project = models.ForeignKey(
Project,
related_name='search_queries',
on_delete=models.CASCADE,
)
version = models.ForeignKey(
Version,
verbose_name=_('Version'),
related_name='search_queries',
on_delete=models.CASCADE,
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if we really even want to cascade these deletes. Is there a reason we don't want to store Version here as a string, so we can keep them forever even if a version is deleted?

query = models.CharField(
_('Query'),
max_length=4092,
)
count = models.PositiveIntegerField(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if we want more data here. Should we be storing an object each time a search happens? That way we can show the frequency of a search over time. Currently, this only tells us how many times a search has happened.

I think if we plan to delete the data every 3 months, we can probably store every search query with it's own timestamp. I'm fine with shipping this initially though, before we start storing a lot more data.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure what you meant be more data.

Storing search object everytime a search was made is a better idea. I realised that the graphs were wrong before. And going this way makes them correct and easier.

That way we can show the frequency of a search over time.

Can you exapand this more?

  • Do we want this to be selected by user? Like the user can select a date and we show him the frequency of searches made vs time for that day.
  • Or we just show this for today/yesterday?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just that we will be able to see when a specific search was done each time it was done. The current modeling only shows the number of times a search was done, but no time data about each search.

_('No. of times this query was searched'),
default=1,
)
objects = RelatedProjectQuerySet.as_manager()

class Meta:
verbose_name = 'Search query'
verbose_name_plural = 'Search queries'

def __str__(self):
return f'[{self.project.slug}:{self.version.slug}]: {self.query}'
14 changes: 14 additions & 0 deletions readthedocs/search/tasks.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging

from django.apps import apps
from django.utils import timezone
from django_elasticsearch_dsl.registries import registry

from readthedocs.worker import app
from .models import SearchQuery
from .utils import _get_index, _get_document

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -118,3 +120,15 @@ def index_missing_objects(app_label, model_name, document_class, index_generatio
log.info("Indexed %s missing objects from model: %s'", queryset.count(), model.__name__)

# TODO: Figure out how to remove the objects from ES index that has been deleted


@app.task(queue='web')
def delete_old_search_queries_from_db():
last_3_months = timezone.timedelta(days=90)
search_queries_qs = SearchQuery.objects.filter(
created__lte=last_3_months
)

if search_queries_qs.exists():
log.info('Deleting search queries for last 3 months. Total: %s', search_queries_qs.count())
dojutsu-user marked this conversation as resolved.
Show resolved Hide resolved
search_queries_qs.delete()
36 changes: 36 additions & 0 deletions readthedocs/search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from readthedocs.builds.models import Version
from readthedocs.projects.models import HTMLFile, Project
from readthedocs.search.documents import PageDocument
from readthedocs.search.models import SearchQuery


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -168,3 +169,38 @@ def _get_sorted_results(results, source_key='_source'):
]

return sorted_results


def record_search_query(project_slug, version_slug, query, total_results):
"""Record search query in database."""
if not project_slug or not version_slug or not query or not total_results:
dojutsu-user marked this conversation as resolved.
Show resolved Hide resolved
return
dojutsu-user marked this conversation as resolved.
Show resolved Hide resolved

project_qs = Project.objects.filter(slug=project_slug)

if not project_qs.exists():
return

project = project_qs.first()
version_qs = Version.objects.filter(project=project, slug=version_slug)

if not version_qs.exists():
return

version = version_qs.first()
search_query_qs = SearchQuery.objects.filter(
project=project,
version=version,
query=query,
)

if search_query_qs.exists():
search_query_obj = search_query_qs.first()
search_query_obj.count += 1
search_query_obj.save()
else:
SearchQuery.objects.create(
project=project,
version=version,
query=query,
)
10 changes: 10 additions & 0 deletions readthedocs/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,16 @@ def USE_PROMOS(self): # noqa
'schedule': crontab(minute=0, hour='*/3'),
'options': {'queue': 'web'},
},
'every-three-month-delete-old-search-queries': {
'task': 'readthedocs.search.tasks.delete_old_search_queries_from_db',
'schedule': crontab(
minute=0,
hour=0,
day_of_month=1,
month_of_year='*/4'
dojutsu-user marked this conversation as resolved.
Show resolved Hide resolved
),
'options': {'queue': 'web'},
}
}
MULTIPLE_APP_SERVERS = [CELERY_DEFAULT_QUEUE]
MULTIPLE_BUILD_SERVERS = [CELERY_DEFAULT_QUEUE]
Expand Down
Loading