Skip to content
This repository has been archived by the owner on Jun 19, 2019. It is now read-only.

Commit

Permalink
rework lookup response caching
Browse files Browse the repository at this point in the history
IMPORTANT: this commit removes the previous LOOKUP_SELF setting and
replaces it with LOOKUP_ROOT which points to the root of the lookupproxy
API server. Deployments will need to be re-configured.

Previously we (ab)used the token passed to us from the user to retrieve
their lookup profile. The token is only conveniently present at
authorisation time and so the authorisation logic was combined with
fetching and caching the lookup resource for the current user.

Aside from complication the authorisation flow, it slightly violated the
spirit of OAuth2 tokens in that we were not accessing lookup *on behalf*
of the user. We were, in fact, accessing lookup on behalf of the IAR
backend application.

We also encoded the lookup scheme and identifier in the username which,
if not exactly brittle, was ugly.

Re-work the lookup caching logic to be separate from the authentication
logic:

1. Explicitly record the mapping between lookup scheme/identifier pairs
   and Django users by way of a new UserLookup model.

2. Separate the caching logic out into a new accessor function,
   assets.lookup.get_person_for_user(), which takes a Django user and
   returns the lookup person resource for that user. The resource is
   fetched from an in-memory cache if present or fetched from lookup if
   not. When fetching data from lookup, the IAR backend obtains its own
   token with a lookup:anonymous scope and uses that token to talk to
   lookup. The avoids the need to keep the user's token around.

None of the existing permissions logic has been changed but now we
explicitly ask for a lookup resource for a user *at the point we need
it* rather than relying on it having been cached as part of the
authentication flow.

Since the user token no longer requires a lookup:anonymous scope, remove
it from the Swagger UI.

The default cached person resource lifetime is configurable through a
setting.

We update the scripts/create-client.sh script to create a backend client
capable of requesting hydra.introspect and lookup:anonymous scopes and a
client used for testing the UI.
  • Loading branch information
rjw57 committed Mar 6, 2018
1 parent ec90a5f commit e978edd
Show file tree
Hide file tree
Showing 19 changed files with 330 additions and 83 deletions.
74 changes: 28 additions & 46 deletions assets/authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
"""
import datetime
import logging
import requests
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist
from rest_framework.authentication import BaseAuthentication
import requests.exceptions

from .models import UserLookup
from .oauth2client import AuthenticatedSession


Expand All @@ -21,9 +19,6 @@
INTROSPECT_SESSION = AuthenticatedSession(scopes=settings.ASSETS_OAUTH2_INTROSPECT_SCOPES)


_request.__session = None


def _utc_now():
"""Return a UNIX-style timestamp representing "now" in UTC."""
return (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)).total_seconds()
Expand Down Expand Up @@ -59,38 +54,7 @@ def authenticate(self, request):
subject = token.get('sub', '')

if subject != '':
# Our subjects are of the form '<scheme>:<identifier>'. Form a valid Django username
# from these values.
scheme, identifier = subject.split(':')
username = '{}+{}'.format(scheme, identifier)

# This is not quite the same as the default get_or_create() behaviour because we make
# use of the create_user() helper here. This ensures the user is created and that
# set_unusable_password() is also called on it.
try:
user = get_user_model().objects.get(username=username)
except ObjectDoesNotExist:
user = get_user_model().objects.create_user(username=username)

if cache.get("{user.username}:lookup".format(user=user)) is None:
# Adding 10 extra seconds to the expiry so that if the API requests takes long
# the cache doesn't get expired between the authentication and the response
lookup_response = requests.get(
settings.LOOKUP_SELF + "?fetch=all_insts,all_groups",
headers={"Authorization": "Bearer %s" % bearer})

try:
# Ensure the response succeeded
lookup_response.raise_for_status()

# Cache the response body as parsed JSON
cache.set("{user.username}:lookup".format(user=user), lookup_response.json(),
datetime.timedelta(token['exp'] - _utc_now()).seconds+10)
except requests.exceptions.HTTPError as error:
LOG.error(
('HTTP Error {error} retrieving institutions for user "{user.username}" '
'with subject {subject}').format(error=error, user=user, subject=subject))
LOG.error('Payload was: {}'.format(lookup_response.content))
user = user_from_subject(subject)
else:
user = None

Expand Down Expand Up @@ -124,14 +88,6 @@ def validate_token(self, token):
token['exp'], now)
return None

# HACK: lookup:anonymous is required for the moment since we make use of the token/self
# lookupproxy endpoint *and* we do so using the bearer token provided to the backend by the
# user. TODO: refactor this to use the lookupproxy as the backend client.
if 'lookup:anonymous' not in token.get('scope', '').split(' '):
LOG.warning(
'Presented bearer token with no lookup:anonymous scope. Permissions checking '
'will be broken.')

return token

def authenticate_header(self, request):
Expand All @@ -140,3 +96,29 @@ def authenticate_header(self, request):
"""
return 'Bearer'


def user_from_subject(subject):
"""
Return a Django user object given a token subject.
"""
# Our subjects are of the form '<scheme>:<identifier>'. Form a valid Django username
# from these values.
scheme, identifier = subject.split(':')
username = '{}+{}'.format(scheme, identifier)

# This is not quite the same as the default get_or_create() behaviour because we make
# use of the create_user() helper here. This ensures the user is created and that
# set_unusable_password() is also called on it.
try:
user = get_user_model().objects.get(username=username)
except ObjectDoesNotExist:
user = get_user_model().objects.create_user(username=username)

# Record this association of user, subject and lookup identity in the DB. Since the user is
# marked as the primary key field, this will throw a database error if there is an existing
# record with differing scheme and identifier.
UserLookup.objects.get_or_create(user=user, scheme=scheme, identifier=identifier)

This comment has been minimized.

Copy link
@abrahammartin

abrahammartin Mar 9, 2018

Member

I'm starting to wonder if it is now the time to create an APIUser that extends User and we put all the extra bits in there, like scheme, identifier, etc and use that as our default User model when using API authentication and use the normal User model when authenticating via Raven.

This comment has been minimized.

Copy link
@rjw57

rjw57 Mar 9, 2018

Author Member

Agree. I created #64 to track this.


return user
17 changes: 15 additions & 2 deletions assets/defaultsettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,22 @@
"""

ASSETS_OAUTH2_LOOKUP_SCOPES = ['lookup:anonymous']
"""
List of OAuth2 scopes the API server will request for the token it will use with lookup.
"""

LOOKUP_ROOT = None
"""
URL of the lookup proxy's API root.
"""

LOOKUP_SELF = None
"""
URL of the LookupProxy endpoint that the authentication uses to get data about the user logged in.
Responses to the people endpoint of lookupproxy are cached to increase performance. We assume that
lookup details on people change rarely. This setting specifies the lifetime of a single cached
lookup resource for a person in seconds.
"""
LOOKUP_PEOPLE_CACHE_LIFETIME = 1800
75 changes: 75 additions & 0 deletions assets/lookup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
Module providing lookup API-related functionality.
"""
from urllib.parse import urljoin
import logging
from django.conf import settings
from django.core.cache import cache

from .models import UserLookup
from .oauth2client import AuthenticatedSession


LOG = logging.getLogger(__name__)


#: An authenticated session which can access the lookup API
LOOKUP_SESSION = AuthenticatedSession(scopes=settings.ASSETS_OAUTH2_LOOKUP_SCOPES)


class LookupError(RuntimeError):
"""
Error raised if :py:func:`~.get_person_for_user` encounters a problem.
"""
pass


def get_person_for_user(user):
"""
Return the resource from Lookup associated with the specified user. A requests package
:py:class:`HTTPError` is raised if the request fails.
The result of this function call is cached based on the username so it is safe to call this
multiple times.
If user is the anonymous user (user.is_anonymous is True), :py:class:`~.UserIsAnonymousError`
is raised.
"""
# check that the user is not anonymous
if user.is_anonymous:
raise LookupError('User is anonymous')

# check the user has an associated lookup identity
if not UserLookup.objects.filter(user=user).exists():
raise LookupError('User has no lookup identity')

# return a cached response if we have it
cached_resource = cache.get("{user.username}:lookup".format(user=user))
if cached_resource is not None:
return cached_resource

# Extract the scheme and identifier for the token
scheme = user.lookup.scheme
identifier = user.lookup.identifier

# Ask lookup about this person
lookup_response = LOOKUP_SESSION.request(
method='GET', url=urljoin(
settings.LOOKUP_ROOT,
'people/{scheme}/{identifier}?fetch=all_insts,all_groups'.format(
scheme=scheme, identifier=identifier
)
)
)

# Raise if there was an error
lookup_response.raise_for_status()

# save cached value
cache.set("{user.username}:lookup".format(user=user), lookup_response.json(),
settings.LOOKUP_PEOPLE_CACHE_LIFETIME)

# recurse, which should now retrieve the value from the cache
return get_person_for_user(user)
24 changes: 24 additions & 0 deletions assets/migrations/0009_userlookup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 2.0.2 on 2018-03-06 12:31

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('auth', '0009_alter_user_last_name_max_length'),
('assets', '0008_auto_20180227_1035'),
]

operations = [
migrations.CreateModel(
name='UserLookup',
fields=[
('user', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='lookup', serialize=False, to=settings.AUTH_USER_MODEL)),
('scheme', models.CharField(max_length=255)),
('identifier', models.CharField(max_length=255)),
],
),
]
18 changes: 18 additions & 0 deletions assets/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import uuid

from automationcommon.models import ModelChangeMixin
from django.conf import settings
from django.db import models
from django.db.models import Case, When, Q, BooleanField, Value
from multiselectfield import MultiSelectField
Expand Down Expand Up @@ -184,3 +185,20 @@ class Asset(ModelChangeMixin, models.Model):
created_at = models.DateTimeField(auto_now_add=True, db_index=True)
updated_at = models.DateTimeField(auto_now=True, db_index=True)
deleted_at = models.DateTimeField(default=None, blank=True, null=True)


class UserLookup(models.Model):
"""
A mapping from Django users to lookup schemes and identifiers.
"""
#: The corresponding user. Since each use only has one token identity, this is a OneToOneField.
user = models.OneToOneField(
settings.AUTH_USER_MODEL, on_delete=models.CASCADE, primary_key=True,
related_name='lookup')

#: The lookup identifier scheme property for the user
scheme = models.CharField(max_length=255)

#: The lookup identifier identifier property for the user
identifier = models.CharField(max_length=255)
9 changes: 5 additions & 4 deletions assets/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
"""
import logging

from django.core.cache import cache
from rest_framework import permissions
from rest_framework.exceptions import ValidationError

from .lookup import get_person_for_user

LOG = logging.getLogger(__name__)


Expand Down Expand Up @@ -75,11 +76,11 @@ def _validate_asset_user_institution(user, department):
"""Validates that the user is member of the department that the asset belongs to
(asset_department)."""

lookup_response = cache.get("{user.username}:lookup".format(user=user))
if lookup_response is None:
LOG.error('No cached lookup response for user %s', user.username)
if user is None or user.is_anonymous or department is None:
return False

lookup_response = get_person_for_user(user)

institutions = lookup_response.get('institutions')
if institutions is None:
LOG.error('No institutions in cached lookup response for user %s', user.username)
Expand Down
2 changes: 1 addition & 1 deletion assets/systemchecks.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def api_credentials_check(app_configs, **kwargs):
'ASSETS_OAUTH2_CLIENT_ID',
'ASSETS_OAUTH2_CLIENT_SECRET',
'ASSETS_OAUTH2_INTROSPECT_SCOPES',
'LOOKUP_SELF',
'LOOKUP_ROOT',
]
for idx, name in enumerate(required_settings):
value = getattr(settings, name, None)
Expand Down
13 changes: 13 additions & 0 deletions assets/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from django.core.cache import cache


def clear_cached_person_for_user(user):
"""Explicitly clear the cached lookup response for a user."""
if not user.is_anonymous:
cache.delete("{user.username}:lookup".format(user=user))


def set_cached_person_for_user(user, person):
"""Explicitly set the cached lookup response for a user."""
if not user.is_anonymous:
cache.set("{user.username}:lookup".format(user=user), person)
6 changes: 4 additions & 2 deletions assets/tests/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,17 @@
from django.test import TestCase
from django.urls import reverse

from assets.models import Asset
from assets.models import Asset, UserLookup


class AdminViewsTests(TestCase):
def setUp(self):
super().setUp()
self.asset = Asset.objects.create(name='foo')
self.superuser = get_user_model().objects.create_user(
username='testing', is_staff=True, is_superuser=True)
username='test0001', is_staff=True, is_superuser=True)
self.superuser_lookup = UserLookup.objects.create(
user=self.superuser, scheme='mock', identifier=self.superuser.username)

def test_index(self):
"""Viewing the assets index in admin should be possible."""
Expand Down
2 changes: 1 addition & 1 deletion assets/tests/test_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class RequiredSettings(TestCase):
'ASSETS_OAUTH2_CLIENT_ID',
'ASSETS_OAUTH2_CLIENT_SECRET',
'ASSETS_OAUTH2_INTROSPECT_SCOPES',
'LOOKUP_SELF'
'LOOKUP_ROOT'
]

def test_checks_pass(self):
Expand Down
Loading

0 comments on commit e978edd

Please sign in to comment.