Skip to content

Commit

Permalink
1.0.0 Improved Postgres Sharded ID fields (#47)
Browse files Browse the repository at this point in the history
* adding a BasePostgresShardGeneratedIDField to suport more fields

* add showmigrations database default as all

* whoops wrong shards

* WIP

* WIP

* WIP

* WIP

* use pre_save and add decorator for user

* whoops

* bump ersion to 1.0.0
  • Loading branch information
JBKahn authored Oct 17, 2016
1 parent a3de57b commit 6ffc60e
Show file tree
Hide file tree
Showing 19 changed files with 323 additions and 89 deletions.
18 changes: 17 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,23 @@
Changelog
=========

0.1.0 (Oct 7th 2015)
1.0.0 (Oct 16th 2016)
------------------

### Django 1.10 compatibility and some additional library features!

- Added decorator for shard storage.
- Renamed `PostgresShardGeneratedIDField` to `PostgresShardGeneratedIDAutoField`.
- Added non-autoid `PostgresShardGeneratedIDField` that makes a separate call to
the database prior to saving. Good for statement based replication. Now you can
have more than one of these fields on a model.
- Fix `TableShardedIDField` to take a table name rather than model class so that
it doesn't give errors when reading the migrations file after deleting the table.
- Fix `showmigrations` to use the same database param as `migrate` and act on
all by default.


0.1.0 (Oct 7th 2016)
------------------

### Django 1.10 compatibility and some additional library features!
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class ShardedCarIDs(TableStrategyModel):

@model_config(sharded=True)
class Car(models.Model):
id = TableShardedIDField(primary_key=True, source_table=ShardeCarIDs)
id = TableShardedIDField(primary_key=True, source_table_name='app.ShardeCarIDs')
ignition_type = models.CharField(max_length=120)
company = models.ForeignKey('companies.Company')

Expand Down
5 changes: 5 additions & 0 deletions django_sharding/management/commands/showmigrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from django_sharding_library.management.commands.showmigrations import Command as ShowMigrationsCommand


class Command(ShowMigrationsCommand):
pass
26 changes: 18 additions & 8 deletions django_sharding_library/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,21 @@

from django_sharding_library.exceptions import NonExistentDatabaseException, ShardedModelInitializationException
from django_sharding_library.manager import ShardManager
from django_sharding_library.fields import ShardedIDFieldMixin, PostgresShardGeneratedIDField
from django_sharding_library.fields import ShardedIDFieldMixin, BasePostgresShardGeneratedIDField
from django_sharding_library.utils import register_migration_signal_for_model_receiver

PRE_MIGRATION_DISPATCH_UID = "PRE_MIGRATE_FOR_MODEL_%s"


def shard_storage_config(shard_group='default', shared_field='shard'):
def configure(cls):
setattr(cls, 'django_sharding__shard_group', shard_group)
setattr(cls, 'django_sharding__shard_field', shared_field)
setattr(cls, 'django_sharding__stores_shard', True)
return cls
return configure


def model_config(shard_group=None, database=None, sharded_by_field=None):
"""
A decorator for marking a model as being either sharded or stored on a
Expand All @@ -33,7 +42,7 @@ def configure(cls):
)
setattr(cls, 'django_sharding__database', database)

postgres_shard_id_fields = list(filter(lambda field: issubclass(type(field), PostgresShardGeneratedIDField), cls._meta.fields))
postgres_shard_id_fields = list(filter(lambda field: issubclass(type(field), BasePostgresShardGeneratedIDField), cls._meta.fields))
if postgres_shard_id_fields:
database_dicts = [settings.DATABASES[database]] if database else [db_settings for db, db_settings in
iteritems(settings.DATABASES) if
Expand All @@ -42,19 +51,20 @@ def configure(cls):
raise ShardedModelInitializationException(
'You cannot use a PostgresShardGeneratedIDField on a non-Postgres database.')

register_migration_signal_for_model_receiver(apps.get_app_config(cls._meta.app_label),
PostgresShardGeneratedIDField.migration_receiver,
dispatch_uid=PRE_MIGRATION_DISPATCH_UID % cls._meta.app_label)
for field in postgres_shard_id_fields:
register_migration_signal_for_model_receiver(apps.get_app_config(cls._meta.app_label),
field.migration_receiver,
dispatch_uid=PRE_MIGRATION_DISPATCH_UID % cls._meta.app_label)

if shard_group:
sharded_fields = list(filter(lambda field: issubclass(type(field), ShardedIDFieldMixin), cls._meta.fields))
if not sharded_fields and not postgres_shard_id_fields:
raise ShardedModelInitializationException('All sharded models require a ShardedIDFieldMixin or a '
'PostgresShardGeneratedIDField.')

if not list(filter(lambda field: field == cls._meta.pk, sharded_fields)) and not postgres_shard_id_fields:
raise ShardedModelInitializationException('All sharded models require the ShardedAutoIDField or '
'PostgresShardGeneratedIDFieldto be the primary key. Set '
if not list(filter(lambda field: field == cls._meta.pk, sharded_fields + postgres_shard_id_fields)):
raise ShardedModelInitializationException('All sharded models require a ShardedAutoIDField or '
'PostgresShardGeneratedIDField to be the primary key. Set '
'primary_key=True on the field.')

if not callable(getattr(cls, 'get_shard', None)):
Expand Down
4 changes: 4 additions & 0 deletions django_sharding_library/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,9 @@ class InvalidMigrationException(DjangoShardingException):
pass


class InvalidShowMigrationsException(DjangoShardingException):
pass


class NonExistentDatabaseException(DjangoShardingException):
pass
78 changes: 56 additions & 22 deletions django_sharding_library/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.db.models import AutoField, CharField, ForeignKey, BigIntegerField, OneToOneField

from django_sharding_library.constants import Backends
from django_sharding_library.utils import create_postgres_global_sequence, create_postgres_shard_id_function
from django_sharding_library.utils import create_postgres_global_sequence, create_postgres_shard_id_function, get_next_sharded_id

try:
from django.db.backends.postgresql.base import DatabaseWrapper as PostgresDatabaseWrapper
Expand Down Expand Up @@ -59,19 +59,19 @@ def get_pk_value_on_save(self, instance):

class TableShardedIDField(ShardedIDFieldMixin, BigAutoField):
"""
An autoincrimenting field which takes a `source_table` as an argument in
order to generate unqiue ids for the sharded model.
An autoincrimenting field which takes a `source_table_name` as an argument in
order to generate unqiue ids for the sharded model. i.e. `app.model_name`.
"""
def __init__(self, *args, **kwargs):
from django_sharding_library.id_generation_strategies import TableStrategy
kwargs['strategy'] = TableStrategy(backing_model=kwargs['source_table'])
setattr(self, 'source_table', kwargs['source_table'])
del kwargs['source_table']
kwargs['strategy'] = TableStrategy(backing_model_name=kwargs['source_table_name'])
setattr(self, 'source_table_name', kwargs['source_table_name'])
del kwargs['source_table_name']
return super(TableShardedIDField, self).__init__(*args, **kwargs)

def deconstruct(self):
name, path, args, kwargs = super(TableShardedIDField, self).deconstruct()
kwargs['source_table'] = getattr(self, 'source_table')
kwargs['source_table_name'] = getattr(self, 'source_table_name')
return name, path, args, kwargs


Expand Down Expand Up @@ -169,25 +169,14 @@ class ShardForeignKeyStorageField(ShardForeignKeyStorageFieldMixin, ForeignKey):
pass


class PostgresShardGeneratedIDField(AutoField):
"""
A field that uses a Postgres stored procedure to return an ID generated on the database.
"""
def db_type(self, connection, *args, **kwargs):
class BasePostgresShardGeneratedIDField(object):

def __init__(self, *args, **kwargs):

if not hasattr(settings, 'SHARD_EPOCH'):
raise ValueError("PostgresShardGeneratedIDField requires a SHARD_EPOCH to be defined in your settings file.")

if connection.vendor == PostgresDatabaseWrapper.vendor:
return "bigint DEFAULT next_sharded_id()"
else:
return super(PostgresShardGeneratedIDField, self).db_type(connection)

def get_internal_type(self):
return 'BigIntegerField'

def rel_db_type(self, connection):
return BigIntegerField().db_type(connection=connection)
return super(BasePostgresShardGeneratedIDField, self).__init__(*args, **kwargs)

@staticmethod
def migration_receiver(*args, **kwargs):
Expand All @@ -202,6 +191,51 @@ def migration_receiver(*args, **kwargs):
create_postgres_shard_id_function(sequence_name, db_alias, shard_id)


class PostgresShardGeneratedIDAutoField(BasePostgresShardGeneratedIDField, BigAutoField):
"""
A field that uses a Postgres stored procedure to return an ID generated on the database.
"""
def db_type(self, connection, *args, **kwargs):
if connection.vendor == PostgresDatabaseWrapper.vendor:
return "bigint DEFAULT next_sharded_id()"
else:
return super(PostgresShardGeneratedIDAutoField, self).db_type(connection)


class PostgresShardGeneratedIDField(BasePostgresShardGeneratedIDField, BigIntegerField):
"""
A field that uses a Postgres stored procedure to return an ID generated on the database.
Generates them prior to save with a seperate call to the DB.
"""

def get_shard_from_id(self, instance_id):
group = getattr(self, 'django_sharding__shard_group', None)
shard_id_to_find = int(bin(instance_id)[-23:-10], 2) # We know where the shard id is stored in the PK's bits.

# We can check the shard id from the PK against the shard ID in the databases config
for alias, db_settings in settings.DATABASES.items():
if db_settings["SHARD_GROUP"] == group and db_settings["SHARD_ID"] == shard_id_to_find:
return alias

return None # Return None if we could not determine the shard so we can fall through to the next shard grab attempt

def get_pk_value_on_save(self, instance):
return self.generate_id(instance)

def pre_save(self, model_instance, add):
if getattr(model_instance, self.attname, None) is not None:
return super(PostgresShardGeneratedIDField, self).pre_save(model_instance, add)
value = self.generate_id(model_instance)
setattr(model_instance, self.attname, value)
return value

@staticmethod
def generate_id(instance):
shard = instance._state.db or instance.get_shard()
return get_next_sharded_id(shard)


class PostgresShardForeignKey(ForeignKey):
def db_type(self, connection):
# The database column type of a ForeignKey is the column type
Expand Down
22 changes: 14 additions & 8 deletions django_sharding_library/id_generation_strategies.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import uuid

from django.apps import apps
from django.db import connections, transaction
from django.utils.deconstruct import deconstructible

Expand All @@ -24,33 +25,38 @@ class TableStrategy(BaseIDGenerationStrategy):
Uses an autoincrement field, on a TableStrategyModel model `backing_model`
to generate unique IDs.
"""
def __init__(self, backing_model):
if not issubclass(backing_model, TableStrategyModel):
raise ValueError("Unsupported model used for generating IDs")
self.backing_model = backing_model
def __init__(self, backing_model_name):
self.backing_model_name = backing_model_name

def get_next_id(self, database=None):
"""
Returns a new unique integer identifier for an object using an
auto-incrimenting field in the database.
"""
app_label = self.backing_model_name.split('.')[0]
app = apps.get_app_config(app_label)
backing_model = app.get_model(self.backing_model_name[len(app_label) + 1:])

if not issubclass(backing_model, TableStrategyModel):
raise ValueError("Unsupported model used for generating IDs")

from django.conf import settings
backing_table_db = getattr(self.backing_model, 'database', 'default')
backing_table_db = getattr(backing_model, 'database', 'default')
if settings.DATABASES[backing_table_db]['ENGINE'] in Backends.MYSQL:
with transaction.atomic(backing_table_db):
cursor = connections[backing_table_db].cursor()
sql = "REPLACE INTO `{0}` (`stub`) VALUES ({1})".format(
self.backing_model._meta.db_table, True
backing_model._meta.db_table, True
)
cursor.execute(sql)

if getattr(cursor.cursor.cursor, 'lastrowid', None):
id = cursor.cursor.cursor.lastrowid
else:
id = self.backing_model.objects.get(stub=True).id
id = backing_model.objects.get(stub=True).id
else:
with transaction.atomic(backing_table_db):
id = self.backing_model.objects.create(stub=None).id
id = backing_model.objects.create(stub=None).id
return id


Expand Down
33 changes: 33 additions & 0 deletions django_sharding_library/management/commands/showmigrations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from django.conf import settings
from django.core.management.commands.showmigrations import Command as ShowMigrationsCommand

from django_sharding_library.exceptions import InvalidShowMigrationsException


class Command(ShowMigrationsCommand):
def handle(self, *args, **options):
if not options['database'] or options['database'] == 'all':
databases = self.get_all_but_replica_dbs()
elif options['database'] not in self.get_all_but_replica_dbs():
raise InvalidShowMigrationsException('You must use showmigrations an existing non-primary DB.')
else:
databases = [options['database']]

for database in databases:
options['database'] = database
# Writen in green text to stand out from the surrouding headings
if options['verbosity'] >= 1:
self.stdout.write(getattr(self.style, "MIGRATE_SUCCESS", getattr(self.style, "SUCCESS", lambda a: a))("\nDatabase: {}\n").format(database))
super(Command, self).handle(*args, **options)

def get_all_but_replica_dbs(self):
return list(filter(
lambda db: not settings.DATABASES[db].get('PRIMARY', None),
settings.DATABASES.keys()
))

def add_arguments(self, parser):
super(Command, self).add_arguments(parser)
parser._option_string_actions['--database'].default = None
parser._option_string_actions['--database'].help = u'Nominates a database to synchronize. Defaults to all databases.'
parser._option_string_actions['--database'].choices = ['all'] + self.get_all_but_replica_dbs()
4 changes: 2 additions & 2 deletions django_sharding_library/router.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from django.apps import apps
from django.conf import settings

from django_sharding_library.fields import PostgresShardGeneratedIDField
from django_sharding_library.fields import BasePostgresShardGeneratedIDField
from django_sharding_library.exceptions import DjangoShardingException, InvalidMigrationException
from django_sharding_library.utils import (
is_model_class_on_database,
Expand Down Expand Up @@ -69,7 +69,7 @@ def _get_shard(self, model, **hints):
if sharded_by_field_id:
shard = self.get_shard_for_id_field(model, sharded_by_field_id)

is_pk_postgres_generated_id_field = isinstance(getattr(model._meta, 'pk'), PostgresShardGeneratedIDField)
is_pk_postgres_generated_id_field = issubclass(type(getattr(model._meta, 'pk')), BasePostgresShardGeneratedIDField)
lookup_pk = hints.get('exact_lookups', {}).get('pk') or hints.get('exact_lookups', {}).get('id')

if shard is None and is_pk_postgres_generated_id_field and lookup_pk is not None:
Expand Down
10 changes: 9 additions & 1 deletion django_sharding_library/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from django_sharding_library.exceptions import DjangoShardingException



def create_postgres_global_sequence(sequence_name, db_alias, reset_sequence=False):
cursor = connections[db_alias].cursor()
sid = transaction.savepoint(db_alias)
Expand Down Expand Up @@ -110,3 +109,12 @@ def get_database_for_model_instance(instance):
return instance.get_shard()

raise DjangoShardingException("Unable to deduce datbase for model instance")


def get_next_sharded_id(shard):
cursor = connections[shard].cursor()
cursor.execute("SELECT next_sharded_id();")
generated_id = cursor.fetchone()
cursor.close()

return generated_id[0]
10 changes: 5 additions & 5 deletions docs/components/OtherComponents.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,19 +106,19 @@ As an example using the above mixin, one of the included fields uses a secondary

class TableShardedIDField(ShardedIDFieldMixin, BigAutoField):
"""
An autoincrimenting field which takes a `source_table` as an argument in
An autoincrimenting field which takes a `source_table_name` as an argument in
order to generate unqiue ids for the sharded model.
"""
def __init__(self, *args, **kwargs):
from django_sharding_library.id_generation_strategies import TableStrategy
kwargs['strategy'] = TableStrategy(backing_model=kwargs['source_table'])
setattr(self, 'source_table', kwargs['source_table'])
del kwargs['source_table']
kwargs['strategy'] = TableStrategy(backing_model_name=kwargs['source_table_name'])
setattr(self, 'source_table_name', kwargs['source_table_name'])
del kwargs['source_table_name']
return super(TableShardedIDField, self).__init__(*args, **kwargs)

def deconstruct(self):
name, path, args, kwargs = super(TableShardedIDField, self).deconstruct()
kwargs['source_table'] = getattr(self, 'source_table')
kwargs['source_table_name'] = getattr(self, 'source_table_name')
return name, path, args, kwargs
```

Expand Down
Loading

0 comments on commit 6ffc60e

Please sign in to comment.