-
Notifications
You must be signed in to change notification settings - Fork 331
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
User session tracking #1114
User session tracking #1114
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -391,6 +391,22 @@ def can_delete(self, obj): | |
return False | ||
|
||
|
||
class SessionEventAccess(BaseAccess): | ||
model = models.SessionEvent | ||
|
||
def can_read(self, obj): | ||
return True | ||
|
||
def can_add(self, data): | ||
return True | ||
|
||
def can_change(self, obj, data): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe make it so that you can only change an object if the session ID's match There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree |
||
return True | ||
|
||
def can_delete(self, obj): | ||
return False | ||
|
||
|
||
register_access(User, UserAccess) | ||
register_access(EmailAddress, EmailAddressAccess) | ||
register_access(Token, UserTokenAccess) | ||
|
@@ -411,3 +427,4 @@ def can_delete(self, obj): | |
register_access(models.ContentType, ContentTypeAccess) | ||
register_access(models.CloudPlatform, CloudPlatformsAccess) | ||
register_access(models.CommunitySurvey, CommunitySurveyAccess) | ||
register_access(models.SessionEvent, SessionEventAccess) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
# (c) 2012-2018, Ansible by Red Hat | ||
# | ||
# This file is part of Ansible Galaxy | ||
# | ||
# Ansible Galaxy is free software: you can redistribute it and/or modify | ||
# it under the terms of the Apache License as published by | ||
# the Apache Software Foundation, either version 2 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# Ansible Galaxy is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# Apache License for more details. | ||
# | ||
# You should have received a copy of the Apache License | ||
# along with Galaxy. If not, see <http://www.apache.org/licenses/>. | ||
|
||
import copy | ||
import logging | ||
|
||
from galaxy import constants | ||
from galaxy.main import models | ||
|
||
from rest_framework import serializers as drf_serializers | ||
|
||
from django.core.urlresolvers import reverse | ||
|
||
from . import serializers | ||
|
||
|
||
__all__ = ( | ||
'EventSerializer', | ||
) | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class ValidSearchType(object): | ||
choices = [] | ||
|
||
def __init__(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you initialize static class field in the
|
||
for choice in constants.EventType.choices(): | ||
self.choices.append(choice[0]) | ||
|
||
def __call__(self, value): | ||
if value not in self.choices: | ||
message = '%s is not a valid choice.' % value | ||
raise drf_serializers.ValidationError(message) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually you could replace this whole class with simple function:
|
||
|
||
|
||
def clean_search_data(data): | ||
return { | ||
'search_params': data.get('search_params', {}), | ||
'search_results': data.get('search_results', 0), | ||
'next_page_clicks': data.get('next_page_clicks', 0), | ||
'prev_page_clicks': data.get('prev_page_clicks', 0), | ||
'results_clicked': data.get('results_clicked', []), | ||
'repositories_clicked': data.get('repositories_clicked', []), | ||
'content_items_clicked': data.get('content_items_clicked', []), | ||
'last_page_size': data.get('last_page_size', 0) | ||
} | ||
|
||
|
||
def append_list_to_list(source_data, target_data, label): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's unclear what this function does. Docstring or some explanation is required. |
||
if source_data.get(label): | ||
for item in source_data[label]: | ||
if item not in target_data[label]: | ||
target_data[label].append(item) | ||
|
||
|
||
class EventSerializer(serializers.BaseSerializer): | ||
event_data = drf_serializers.JSONField() | ||
event_type = drf_serializers.CharField(validators=[ValidSearchType()]) | ||
|
||
class Meta: | ||
model = models.SessionEvent | ||
fields = ( | ||
'id', | ||
'url', | ||
'related', | ||
'summary_fields', | ||
'created', | ||
'modified', | ||
'session_id', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer it if we didn't make user's actual session IDs available to the public via the API. It seems like these could be used to potentially link searches back to individual users, or make it easier for attackers to spoof other people's sessions. I propose storing SHA hashes of the session IDs in the database instead. That way we can still aggregate events by a unique session identifier, but we can't look up session IDs that are linked to a specific user's session without knowing the real session ID, which should only be available to the individual user. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Completely agree. Considering current implementation of ACL ( |
||
'event_type', | ||
'event_data' | ||
) | ||
|
||
def get_url(self, instance): | ||
if instance is None: | ||
return '' | ||
return reverse('api:event_detail', args=(instance.pk,)) | ||
|
||
def get_releated(self, instance): | ||
return{} | ||
|
||
def get_summary_fields(self, instance): | ||
return {} | ||
|
||
def update(self, instance, validated_data): | ||
event_data = clean_search_data(copy.copy(instance.event_data)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why copy is needed here? |
||
validated_event_data = copy.copy(validated_data.get('event_data')) | ||
if validated_data.get('event_type') == \ | ||
constants.EventType.SEARCH.value: | ||
if validated_event_data.get('next_page_clicks'): | ||
event_data['next_page_clicks'] += 1 | ||
if validated_event_data.get('prev_page_clicks'): | ||
event_data['prev_page_clicks'] += 1 | ||
if validated_event_data.get('last_page_size'): | ||
event_data['last_page_size'] = \ | ||
validated_event_data['last_page_size'] | ||
for label in ('results_clicked', 'repositories_clicked', | ||
'content_items_clicked'): | ||
append_list_to_list( | ||
validated_event_data, event_data, label) | ||
validated_data['event_data'] = event_data | ||
return super(EventSerializer, self).update(instance, validated_data) | ||
|
||
def create(self, validated_data): | ||
if validated_data.get('event_type') == \ | ||
constants.EventType.SEARCH.value: | ||
event_data = validated_data.get('event_data', {}) | ||
validated_data['event_data'] = clean_search_data(event_data) | ||
return super(EventSerializer, self).create(validated_data) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# (c) 2012-2018, Ansible by Red Hat | ||
# | ||
# This file is part of Ansible Galaxy | ||
# | ||
# Ansible Galaxy is free software: you can redistribute it and/or modify | ||
# it under the terms of the Apache License as published by | ||
# the Apache Software Foundation, either version 2 of the License, or | ||
# (at your option) any later version. | ||
# | ||
# Ansible Galaxy is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# Apache License for more details. | ||
# | ||
# You should have received a copy of the Apache License | ||
# along with Galaxy. If not, see <http://www.apache.org/licenses/>. | ||
|
||
import logging | ||
|
||
from rest_framework import serializers as drf_serializers | ||
from galaxy.main import models | ||
from galaxy.api import serializers | ||
|
||
from . import base_views | ||
|
||
from rest_framework.response import Response | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
__all__ = [ | ||
'EventList', | ||
'EventDetail' | ||
] | ||
|
||
|
||
class EventList(base_views.ListCreateAPIView): | ||
model = models.SessionEvent | ||
serializer_class = serializers.EventSerializer | ||
|
||
def post(self, request, *args, **kwargs): | ||
if not request.session.get('session_id'): | ||
request.session['session_id'] = str( | ||
models.SessionIdentifier.objects.create()) | ||
request.data['session_id'] = request.session['session_id'] | ||
serializer = self.get_serializer(data=request.data) | ||
serializer.is_valid(raise_exception=True) | ||
serializer.save() | ||
headers = self.get_success_headers(serializer.data) | ||
return Response(serializer.data, headers=headers) | ||
|
||
|
||
class EventDetail(base_views.RetrieveUpdateAPIView): | ||
model = models.SessionEvent | ||
serializer_class = serializers.EventSerializer | ||
|
||
def update(self, request, *args, **kwargs): | ||
if not request.session.get('session_id'): | ||
request.session['session_id'] = str( | ||
models.SessionIdentifier.objects.create()) | ||
request.data['session_id'] = request.session['session_id'] | ||
instance = self.get_object() | ||
if str(instance.session_id.session_id) != request.data['session_id']: | ||
message = ('Request session_id %s does not match ' | ||
'existing event object.' % request.data['session_id']) | ||
raise drf_serializers.ValidationError(message) | ||
return super(EventDetail, self).update(request, *args, **kwargs) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# -*- coding: utf-8 -*- | ||
# Generated by Django 1.11.15 on 2018-08-29 14:35 | ||
from __future__ import unicode_literals | ||
|
||
import django.contrib.postgres.fields.jsonb | ||
from django.db import migrations, models | ||
import django.db.models.deletion | ||
import galaxy.main.mixins | ||
import uuid | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('main', '0113_repository_community_score'), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name='SessionEvent', | ||
fields=[ | ||
('id', models.AutoField(auto_created=True, primary_key=True, | ||
serialize=False, verbose_name='ID')), | ||
('created', models.DateTimeField(auto_now_add=True)), | ||
('modified', models.DateTimeField(auto_now=True)), | ||
('event_type', models.CharField( | ||
choices=[('search', 'Search')], max_length=30)), | ||
('event_data', | ||
django.contrib.postgres.fields.jsonb.JSONField(default={})), | ||
], | ||
bases=(models.Model, galaxy.main.mixins.DirtyMixin), | ||
), | ||
migrations.CreateModel( | ||
name='SessionIdentifier', | ||
fields=[ | ||
('created', models.DateTimeField(auto_now_add=True)), | ||
('modified', models.DateTimeField(auto_now=True)), | ||
('session_id', models.UUIDField(default=uuid.uuid4, | ||
editable=False, | ||
primary_key=True, | ||
serialize=False)), | ||
], | ||
options={ | ||
'abstract': False, | ||
}, | ||
bases=(models.Model, galaxy.main.mixins.DirtyMixin), | ||
), | ||
migrations.AddField( | ||
model_name='sessionevent', | ||
name='session_id', | ||
field=models.ForeignKey( | ||
on_delete=django.db.models.deletion.CASCADE, | ||
related_name='events', | ||
to='main.SessionIdentifier'), | ||
), | ||
migrations.AlterUniqueTogether( | ||
name='sessionevent', | ||
unique_together=set([('session_id', 'created', 'event_type')]), | ||
), | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really want to allow anybody access to our internal session tracking information?
I would say it should be push-only.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if we change
SessionEvent
to store session hashes, does it matter if people can read the hashes?