Skip to content

Commit

Permalink
Database schema: set {} as default for DbNode.repository_metadata (
Browse files Browse the repository at this point in the history
…#4898)

Initially, when the column `repository_metadata` was added to the
`DbNode` table, it was set to be nullable since a significant number of
nodes will not actually have any files and therefore this value would be
empty. However, to prevent clients having to deal with a null-value, the
front-end ORM `Node.repository_metadata` property would return an empty
dictionary in this case, such that the return type is always a
dictionary. The main argument was that this would prevent unnecessary
bytes from being stored in the database.

However, a bug surfaced where some code expected a dictionary for the
`repository_metadata` but got `None`. This particular instance was in
the import code, which circumvents the ORM and went straight to the
database. This is of course undesirable, but it also happens through the
`QueryBuilder` that doesn't transform the returned attributes of
entities through the ORM interface. Given that there are a number of
layers from the ORM to the database, making sure that the typing across
all layers is consistent would be tricky and prone to more bugs. The
most secure solution is to simply set an empty dict as the default on
the database level. The added cost to the database size should still be
minimal and so is an acceptable downside to the increased stability of
the code.

Note that the column in the model is declared both with a server default
as well as a default on the ORM level. The reason is that the server
default is required for the migration. If the column were to be added
without the default, existing rows would violate the non-nullable
clause. For consistency, the server default is also added to the table
column declaration. The ORM default is necessary to guarantee that an
empty dictionary is set on a new `DbNode` instance when it is created.
SqlAlchemy cannot execute the server default and so would leave it as
`None`, but we require that even for unstored instances, the value
defaults to an empty dictionary.
  • Loading branch information
sphuber authored Apr 30, 2021
1 parent fa8d05f commit 4a347b6
Show file tree
Hide file tree
Showing 10 changed files with 20 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Migration(migrations.Migration):
migrations.AddField(
model_name='dbnode',
name='repository_metadata',
field=django.contrib.postgres.fields.jsonb.JSONField(null=True),
field=django.contrib.postgres.fields.jsonb.JSONField(default=dict, null=True),
),
upgrade_schema_version(REVISION, DOWN_REVISION),
]
2 changes: 1 addition & 1 deletion aiida/backends/djsite/db/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ class DbNode(m.Model):
attributes = JSONField(default=dict, null=True)
# JSON Extras
extras = JSONField(default=dict, null=True)
repository_metadata = JSONField(null=True)
repository_metadata = JSONField(default=dict, null=True)

objects = m.Manager()
# Return aiida Node instances or their subclasses instead of DbNode instances
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@

def upgrade():
"""Upgrade: Add the extras column to the 'db_dbgroup' table"""
# We add the column with a `server_default` because otherwise the migration would fail since existing rows will not
# have a value and violate the not-nullable clause. However, the model doesn't use a server default but a default
# on the ORM level, so we remove the server default from the column directly after.
op.add_column(
'db_dbgroup', sa.Column('extras', postgresql.JSONB(astext_type=sa.Text()), nullable=False, server_default='{}')
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@

def upgrade():
"""Migrations for the upgrade."""
op.add_column('db_dbnode', sa.Column('repository_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True))
# We add the column with a `server_default` because otherwise the migration would fail since existing rows will not
# have a value and violate the not-nullable clause.
op.add_column(
'db_dbnode',
sa.Column('repository_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=False, server_default='{}')
)


def downgrade():
Expand Down
2 changes: 1 addition & 1 deletion aiida/backends/sqlalchemy/models/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class DbNode(Base):
mtime = Column(DateTime(timezone=True), default=timezone.now, onupdate=timezone.now)
attributes = Column(JSONB)
extras = Column(JSONB)
repository_metadata = Column(JSONB)
repository_metadata = Column(JSONB, nullable=False, default=dict, server_default='{}')

dbcomputer_id = Column(
Integer,
Expand Down
3 changes: 1 addition & 2 deletions aiida/orm/nodes/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
from .repository import NodeRepositoryMixin

if TYPE_CHECKING:
from aiida.repository import File
from ..implementation import Backend
from ..implementation.nodes import BackendNode

Expand Down Expand Up @@ -286,7 +285,7 @@ def repository_metadata(self) -> typing.Dict:
:return: the repository metadata
"""
return self.backend_entity.repository_metadata or {}
return self.backend_entity.repository_metadata

@repository_metadata.setter
def repository_metadata(self, value):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def setUpBeforeMigration(self):
dbnode.save()
self.node_pk = dbnode.pk

def test_group_string_update(self):
def test_migration(self):
"""Test that the column is added and null by default."""
DbNode = self.apps.get_model('db', 'DbNode')
node = DbNode.objects.get(pk=self.node_pk)
assert hasattr(node, 'repository_metadata')
assert node.repository_metadata is None
assert node.repository_metadata == {}
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_migration(self):
}
}
}
assert node_03.repository_metadata is None
assert node_03.repository_metadata == {}

for hashkey, content in (
(node_01.repository_metadata['o']['sub']['o']['path']['o']['file_b.txt']['k'], b'b'),
Expand Down
4 changes: 2 additions & 2 deletions tests/backends/aiida_sqlalchemy/test_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1786,7 +1786,7 @@ def test_add_node_repository_metadata(self):
try:
node = session.query(DbNode).filter(DbNode.id == self.node_id).one()
assert hasattr(node, 'repository_metadata')
assert node.repository_metadata is None
assert node.repository_metadata == {}
finally:
session.close()

Expand Down Expand Up @@ -1884,7 +1884,7 @@ def test_migration(self):
}
}
}
assert node_03.repository_metadata is None
assert node_03.repository_metadata == {}

for hashkey, content in (
(node_01.repository_metadata['o']['sub']['o']['path']['o']['file_b.txt']['k'], b'b'),
Expand Down
4 changes: 2 additions & 2 deletions tests/orm/implementation/test_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_creation(self):
self.assertIsNone(node.process_type)
self.assertEqual(node.attributes, dict())
self.assertEqual(node.extras, dict())
self.assertEqual(node.repository_metadata, None)
self.assertEqual(node.repository_metadata, {})
self.assertEqual(node.node_type, self.node_type)
self.assertEqual(node.label, self.node_label)
self.assertEqual(node.description, self.node_description)
Expand Down Expand Up @@ -87,7 +87,7 @@ def test_creation(self):
self.assertIsNone(node.process_type)
self.assertEqual(node.attributes, dict())
self.assertEqual(node.extras, dict())
self.assertEqual(node.repository_metadata, None)
self.assertEqual(node.repository_metadata, {})
self.assertEqual(node.node_type, self.node_type)
self.assertEqual(node.label, self.node_label)
self.assertEqual(node.description, self.node_description)
Expand Down

0 comments on commit 4a347b6

Please sign in to comment.