Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

User session tracking #1114

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions galaxy/api/access.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,22 @@ def can_delete(self, obj):
return False


class SessionEventAccess(BaseAccess):
model = models.SessionEvent

def can_read(self, obj):
return True
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really want to allow anybody access to our internal session tracking information?
I would say it should be push-only.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if we change SessionEvent to store session hashes, does it matter if people can read the hashes?


def can_add(self, data):
return True

def can_change(self, obj, data):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe make it so that you can only change an object if the session ID's match

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree

return True

def can_delete(self, obj):
return False


register_access(User, UserAccess)
register_access(EmailAddress, EmailAddressAccess)
register_access(Token, UserTokenAccess)
Expand All @@ -411,3 +427,4 @@ def can_delete(self, obj):
register_access(models.ContentType, ContentTypeAccess)
register_access(models.CloudPlatform, CloudPlatformsAccess)
register_access(models.CommunitySurvey, CommunitySurveyAccess)
register_access(models.SessionEvent, SessionEventAccess)
1 change: 1 addition & 0 deletions galaxy/api/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .content_block import * # noqa
from .content_type import * # noqa
from .email import * # noqa
from .event import * # noqa
from .namespace import * # noqa
from .notification import * # noqa
from .provider import * # noqa
Expand Down
124 changes: 124 additions & 0 deletions galaxy/api/serializers/event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# (c) 2012-2018, Ansible by Red Hat
#
# This file is part of Ansible Galaxy
#
# Ansible Galaxy is free software: you can redistribute it and/or modify
# it under the terms of the Apache License as published by
# the Apache Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# Ansible Galaxy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Apache License for more details.
#
# You should have received a copy of the Apache License
# along with Galaxy. If not, see <http://www.apache.org/licenses/>.

import copy
import logging

from galaxy import constants
from galaxy.main import models

from rest_framework import serializers as drf_serializers

from django.core.urlresolvers import reverse

from . import serializers


__all__ = (
'EventSerializer',
)

logger = logging.getLogger(__name__)


class ValidSearchType(object):
choices = []

def __init__(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you initialize static class field in the __init__ method?

class ValidSearchType(object):
    choices = [tp.value for tp in constances.EventType]

for choice in constants.EventType.choices():
self.choices.append(choice[0])

def __call__(self, value):
if value not in self.choices:
message = '%s is not a valid choice.' % value
raise drf_serializers.ValidationError(message)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually you could replace this whole class with simple function:

def valid_search_type(value):
    try:
        constants.EnumType(value)
   except ValueError:
       raise drf_serializers.ValidationError(...)



def clean_search_data(data):
return {
'search_params': data.get('search_params', {}),
'search_results': data.get('search_results', 0),
'next_page_clicks': data.get('next_page_clicks', 0),
'prev_page_clicks': data.get('prev_page_clicks', 0),
'results_clicked': data.get('results_clicked', []),
'repositories_clicked': data.get('repositories_clicked', []),
'content_items_clicked': data.get('content_items_clicked', []),
'last_page_size': data.get('last_page_size', 0)
}


def append_list_to_list(source_data, target_data, label):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's unclear what this function does. Docstring or some explanation is required.
Function name indicates common used util function, but instead it implements some context specific business logic.

if source_data.get(label):
for item in source_data[label]:
if item not in target_data[label]:
target_data[label].append(item)


class EventSerializer(serializers.BaseSerializer):
event_data = drf_serializers.JSONField()
event_type = drf_serializers.CharField(validators=[ValidSearchType()])

class Meta:
model = models.SessionEvent
fields = (
'id',
'url',
'related',
'summary_fields',
'created',
'modified',
'session_id',
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer it if we didn't make user's actual session IDs available to the public via the API. It seems like these could be used to potentially link searches back to individual users, or make it easier for attackers to spoof other people's sessions.

I propose storing SHA hashes of the session IDs in the database instead. That way we can still aggregate events by a unique session identifier, but we can't look up session IDs that are linked to a specific user's session without knowing the real session ID, which should only be available to the individual user.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Completely agree. Considering current implementation of ACL (SessionEventAccess class) it's a huge security issue.

'event_type',
'event_data'
)

def get_url(self, instance):
if instance is None:
return ''
return reverse('api:event_detail', args=(instance.pk,))

def get_releated(self, instance):
return{}

def get_summary_fields(self, instance):
return {}

def update(self, instance, validated_data):
event_data = clean_search_data(copy.copy(instance.event_data))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why copy is needed here?

validated_event_data = copy.copy(validated_data.get('event_data'))
if validated_data.get('event_type') == \
constants.EventType.SEARCH.value:
if validated_event_data.get('next_page_clicks'):
event_data['next_page_clicks'] += 1
if validated_event_data.get('prev_page_clicks'):
event_data['prev_page_clicks'] += 1
if validated_event_data.get('last_page_size'):
event_data['last_page_size'] = \
validated_event_data['last_page_size']
for label in ('results_clicked', 'repositories_clicked',
'content_items_clicked'):
append_list_to_list(
validated_event_data, event_data, label)
validated_data['event_data'] = event_data
return super(EventSerializer, self).update(instance, validated_data)

def create(self, validated_data):
if validated_data.get('event_type') == \
constants.EventType.SEARCH.value:
event_data = validated_data.get('event_data', {})
validated_data['event_data'] = clean_search_data(event_data)
return super(EventSerializer, self).create(validated_data)
10 changes: 7 additions & 3 deletions galaxy/api/serializers/users.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
from collections import OrderedDict

from allauth.account.models import EmailAddress

from django.contrib.auth import get_user_model
from django.contrib.auth.models import AnonymousUser
from django.core.urlresolvers import reverse

from rest_framework import serializers

from .serializers import BaseSerializer
Expand Down Expand Up @@ -47,9 +50,10 @@ def get_authenticated(self, obj):
return obj.is_authenticated()

def get_primary_email(self, obj):
emails = EmailAddress.objects.filter(user=obj, primary=True)
if emails:
return emails[0].email
if obj and not isinstance(obj, AnonymousUser):
emails = EmailAddress.objects.filter(user=obj, primary=True)
if emails:
return emails[0].email
return None


Expand Down
11 changes: 11 additions & 0 deletions galaxy/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,16 @@
name='community_survey_detail'),
]

event_tracking_urls = [
url(r'^$',
views.EventList.as_view(),
name='event_list'),

url(r'^(?P<pk>[0-9]+)/$',
views.EventDetail.as_view(),
name='event_detail'),
]

v1_urls = [
url(r'^$', views.ApiV1RootView.as_view(), name='api_v1_root_view'),
url(r'^account/', include(account_urls)),
Expand All @@ -265,6 +275,7 @@
url(r'^content_blocks/', include(content_block_urls)),
url(r'^content_types/', include(content_type_urls)),
url(r'^community_surveys/', include(community_survey_urls)),
url(r'^events/', include(event_tracking_urls)),
]

urlpatterns = [
Expand Down
1 change: 1 addition & 0 deletions galaxy/api/views/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .content_block import * # noqa
from .content_type import * # noqa
from .email import * # noqa
from .event import * # noqa
from .namespace import * # noqa
from .notification import * # noqa
from .repository import * # noqa
Expand Down
66 changes: 66 additions & 0 deletions galaxy/api/views/event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# (c) 2012-2018, Ansible by Red Hat
#
# This file is part of Ansible Galaxy
#
# Ansible Galaxy is free software: you can redistribute it and/or modify
# it under the terms of the Apache License as published by
# the Apache Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# Ansible Galaxy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# Apache License for more details.
#
# You should have received a copy of the Apache License
# along with Galaxy. If not, see <http://www.apache.org/licenses/>.

import logging

from rest_framework import serializers as drf_serializers
from galaxy.main import models
from galaxy.api import serializers

from . import base_views

from rest_framework.response import Response

logger = logging.getLogger(__name__)

__all__ = [
'EventList',
'EventDetail'
]


class EventList(base_views.ListCreateAPIView):
model = models.SessionEvent
serializer_class = serializers.EventSerializer

def post(self, request, *args, **kwargs):
if not request.session.get('session_id'):
request.session['session_id'] = str(
models.SessionIdentifier.objects.create())
request.data['session_id'] = request.session['session_id']
serializer = self.get_serializer(data=request.data)
serializer.is_valid(raise_exception=True)
serializer.save()
headers = self.get_success_headers(serializer.data)
return Response(serializer.data, headers=headers)


class EventDetail(base_views.RetrieveUpdateAPIView):
model = models.SessionEvent
serializer_class = serializers.EventSerializer

def update(self, request, *args, **kwargs):
if not request.session.get('session_id'):
request.session['session_id'] = str(
models.SessionIdentifier.objects.create())
request.data['session_id'] = request.session['session_id']
instance = self.get_object()
if str(instance.session_id.session_id) != request.data['session_id']:
message = ('Request session_id %s does not match '
'existing event object.' % request.data['session_id'])
raise drf_serializers.ValidationError(message)
return super(EventDetail, self).update(request, *args, **kwargs)
9 changes: 9 additions & 0 deletions galaxy/api/views/users.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging

from rest_framework import exceptions
from rest_framework.response import Response

from django.core.exceptions import ObjectDoesNotExist
from django.contrib.auth.models import AnonymousUser
Expand Down Expand Up @@ -59,6 +60,14 @@ def get_object(self):
obj = AnonymousUser()
return obj

def retrieve(self, request, *args, **kwargs):
if not request.session.get('session_id'):
request.session['session_id'] = str(
models.SessionIdentifier.objects.create())
instance = self.get_object()
serializer = self.get_serializer(instance)
return Response(serializer.data)


class UserRepositoriesList(base_views.SubListAPIView):
model = models.Repository
Expand Down
1 change: 1 addition & 0 deletions galaxy/api/views/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def get(self, request, format=None):
data['users'] = reverse('api:user_list')
data['surveys'] = reverse('api:community_survey_list')
data['emails'] = reverse('api:email_list')
data['events'] = reverse('api:event_list')
return Response(data)


Expand Down
10 changes: 10 additions & 0 deletions galaxy/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,13 @@ def choices(cls):
(cls.APB.value, 'Ansible Playbook Bundle'),
(cls.MULTI.value, 'Multi-content'),
]


class EventType(enum.Enum):
SEARCH = 'search'

@classmethod
def choices(cls):
return [
(cls.SEARCH.value, 'Search'),
]
60 changes: 60 additions & 0 deletions galaxy/main/migrations/0114_auto_20180829_1035.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.15 on 2018-08-29 14:35
from __future__ import unicode_literals

import django.contrib.postgres.fields.jsonb
from django.db import migrations, models
import django.db.models.deletion
import galaxy.main.mixins
import uuid


class Migration(migrations.Migration):

dependencies = [
('main', '0113_repository_community_score'),
]

operations = [
migrations.CreateModel(
name='SessionEvent',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True,
serialize=False, verbose_name='ID')),
('created', models.DateTimeField(auto_now_add=True)),
('modified', models.DateTimeField(auto_now=True)),
('event_type', models.CharField(
choices=[('search', 'Search')], max_length=30)),
('event_data',
django.contrib.postgres.fields.jsonb.JSONField(default={})),
],
bases=(models.Model, galaxy.main.mixins.DirtyMixin),
),
migrations.CreateModel(
name='SessionIdentifier',
fields=[
('created', models.DateTimeField(auto_now_add=True)),
('modified', models.DateTimeField(auto_now=True)),
('session_id', models.UUIDField(default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False)),
],
options={
'abstract': False,
},
bases=(models.Model, galaxy.main.mixins.DirtyMixin),
),
migrations.AddField(
model_name='sessionevent',
name='session_id',
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='events',
to='main.SessionIdentifier'),
),
migrations.AlterUniqueTogether(
name='sessionevent',
unique_together=set([('session_id', 'created', 'event_type')]),
),
]
Loading