Skip to content

Commit

Permalink
Handle paginated responses about follower and following.
Browse files Browse the repository at this point in the history
Make a few refinements to how we are populating followers and following.
  • Loading branch information
matthewcburke committed Jul 27, 2015
1 parent dd8502c commit 8ebfe50
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 63 deletions.
45 changes: 27 additions & 18 deletions github_users/github_users/github_user_api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
"""
TODO:
- Handle paginated responses
"""
import logging
import requests
import signal
Expand Down Expand Up @@ -52,7 +48,7 @@ def _populate_headers(self, endpoint, etag=None):

if etag is None:
etag = cache.get('e_tag-%s' % endpoint)
if etag is not None:
if etag is not None and etag is not False:
headers['If-None-Match'] = '%s' % etag

return headers
Expand All @@ -68,7 +64,7 @@ def _cache_etag(response, endpoint):
cache.set(cache_key,
response.headers[header_key].lstrip('W/'))

def _get(self, endpoint, headers):
def _get(self, endpoint, headers, absolute_url=False):
log.debug("== Get '%s'" % endpoint)
if self.rate_limit is not None and self.rate_remaining == 0:
sec_to_reset = self.rate_reset - int(time.time())
Expand All @@ -79,8 +75,13 @@ def _get(self, endpoint, headers):
time.sleep(sec_to_reset + 1)
signal.signal(signal.SIGINT, prev_handler)

if absolute_url:
url = endpoint
else:
url = ''.join([self.HOST, endpoint])

try:
response = requests.get(''.join([self.HOST, endpoint]), headers=headers)
response = requests.get(url, headers=headers, timeout=19)
except requests.exceptions.RequestException as e:
log.exception("Exception while getting '%s': %s" % (endpoint, e))
else:
Expand Down Expand Up @@ -112,6 +113,8 @@ def _repackage_response(response):
'etag': response.headers.get('etag', '').lstrip('W/'),
'json': json_data
}
if 'next' in response.links:
resp_dict['next'] = response.links['next']['url']
else:
resp_dict = {'status': None, 'etag': None, 'json': []}

Expand All @@ -125,17 +128,23 @@ def get_user(self, username, etag=None):
self._cache_etag(response, user_endpoint)
return self._repackage_response(response)

def get_user_followers(self, username, follower_etag, follower_endpoint=None):
if follower_endpoint is None:
follower_endpoint = '/users/%s/followers' % username
headers = self._populate_headers(follower_endpoint, follower_etag)
response = self._get(follower_endpoint, headers)
self._cache_etag(response, follower_endpoint)
def get_user_followers(self, username, follower_etag=None, follower_url=None):
absolute_url = True
if follower_url is None:
follower_url = '/users/%s/followers?per_page=100' % username
absolute_url = False
headers = self._populate_headers(follower_url, follower_etag)
response = self._get(follower_url, headers, absolute_url)
self._cache_etag(response, follower_url)
return self._repackage_response(response)

def get_user_following(self, username, following_etag):
following_endpoint = '/users/%s/following' % username
headers = self._populate_headers(following_endpoint, following_etag)
response = self._get(following_endpoint, headers)
self._cache_etag(response, following_endpoint)
def get_user_following(self, username, following_etag=None, following_url=None):
absolute_url = True
if following_url is None:
following_url = '/users/%s/following?per_page=100' % username
absolute_url = False

headers = self._populate_headers(following_url, following_etag)
response = self._get(following_url, headers, absolute_url)
self._cache_etag(response, following_url)
return self._repackage_response(response)
114 changes: 69 additions & 45 deletions github_users/github_users/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,9 @@ def __init__(self, *args, **kwargs):
def __unicode__(self):
return self.login

def populate_from_github(self, save=True):
api_resp = self.api.get_user(self.login, self.e_tag)
def populate_from_github(self, save=True, force=False):
etag = False if force else self.e_tag
api_resp = self.api.get_user(self.login, etag)
now = datetime.datetime.now(tz=pytz.UTC)
if api_resp['status'] == requests.codes.ok:
self.github_id = api_resp['json']['id']
Expand All @@ -95,77 +96,100 @@ def populate_from_github(self, save=True):
self.save()
return self

def _add_followers(self, data):
followers = list(self.followers.all())
to_add = []
for user in data:
now = datetime.datetime.now(tz=pytz.UTC)
follower, created = GitHubUser.objects.get_or_create(
github_id=user['id'],
login=user['login'],
defaults={'last_retrieved': now, 'last_checked': now}
)
if follower not in followers:
to_add.append(follower)
self.followers.add(*to_add)

def populate_followers(self, force=False):
if force:
self.following.clear()
if self.num_followers == 0 and not force:
return

api_resp = self.api.get_user_followers(self.login, self.followers_etag)
etag = False if force else self.followers_etag
api_resp = self.api.get_user_followers(self.login, etag)
if api_resp['status'] == requests.codes.ok:
follower_data = api_resp['json']
self.followers_etag = api_resp['etag']
self.save()
for user in follower_data:
now = datetime.datetime.now(tz=pytz.UTC)
follower, created = GitHubUser.objects.get_or_create(
github_id=user['id'],
login=user['login'],
defaults={'last_retrieved': now, 'last_checked': now}
)
if follower not in self.followers.all():
self.followers.add(follower)
self._add_followers(follower_data)
while 'next' in api_resp:
api_resp = self.api.get_user_followers(self.login, follower_url=api_resp['next'])
self._add_followers(api_resp['json'])

def _add_following(self, data):
following = list(self.following.all())
to_add = []
for user in data:
now = datetime.datetime.now(tz=pytz.UTC)
gh_user, created = GitHubUser.objects.get_or_create(
github_id=user['id'],
login=user['login'],
defaults={'last_retrieved': now, 'last_checked': now}
)
if gh_user not in following:
to_add.append(gh_user)
self.following.add(*to_add)

def populate_following(self, force=False):
if force:
self.following.clear()
if self.num_following == 0 and not force:
return

api_resp = self.api.get_user_following(self.login, self.following_etag)
etag = False if force else self.following_etag
api_resp = self.api.get_user_following(self.login, etag)
if api_resp['status'] == requests.codes.ok:
following_data = api_resp['json']
self.following_etag = api_resp['etag']
self.save()
for user in following_data:
now = datetime.datetime.now(tz=pytz.UTC)
following, created = GitHubUser.objects.get_or_create(
github_id=user['id'],
login=user['login'],
defaults={'last_retrieved': now, 'last_checked': now}
)
if following not in self.following.all():
self.following.add(following)

def fill_follow_graph(self, depth=3, parents=None):
self._add_following(following_data)
while 'next' in api_resp:
api_resp = self.api.get_user_following(self.login, following_url=api_resp['next'])
self._add_following(api_resp['json'])

def fill_follow_graph(self, depth=3, parents=None, force=False):
"""
Get followers and followees recursively to the given depth.
``parents`` is used by the recursive calls to avoid following
graph cycles.
"""
if depth <= 0:
return

if parents is None:
parents = []

parents.append(self)

self.populate_followers()
self.populate_following()

for follower in self.followers.all():
# start with the original list of parents each time.
calling_parents = copy.copy(parents)
if follower in calling_parents:
continue
follower = follower.populate_from_github()
follower.fill_follow_graph(depth=depth - 1, parents=calling_parents)

for followee in self.following.all():
# start with the original list of parents each time.
calling_parents = copy.copy(parents)
if followee in calling_parents:
continue
followee = followee.populate_from_github()
followee.fill_follow_graph(depth=depth - 1, parents=calling_parents)
self.populate_followers(force=force)
self.populate_following(force=force)

if depth > 1:
for follower in self.followers.all():
# start with the original list of parents each time.
calling_parents = copy.copy(parents)
if follower in calling_parents:
continue
# Only fully populate the follower if there will be multiple levels
follower = follower.populate_from_github(force=force)
follower.fill_follow_graph(depth=depth - 1, parents=calling_parents, force=force)

for followee in self.following.all():
# start with the original list of parents each time.
calling_parents = copy.copy(parents)
if followee in calling_parents:
continue
followee = followee.populate_from_github(force=force)
followee.fill_follow_graph(depth=depth - 1, parents=calling_parents, force=force)

def users_at_distance(self, distance):
"""
Expand Down
6 changes: 6 additions & 0 deletions github_users/github_users/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,11 @@
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
'formatters': {
'verbose': {
'format': '%(levelname)s %(asctime)s %(module)s %(process)d %(thread)d %(message)s'
},
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
Expand All @@ -115,6 +120,7 @@
'level': 'DEBUG',
'class': 'logging.FileHandler',
'filename': '/Users/mburke/debug.log',
'formatter': 'verbose',
},
},
'loggers': {
Expand Down

0 comments on commit 8ebfe50

Please sign in to comment.