Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add json data type #1051

Merged
merged 40 commits into from
Feb 10, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
8657cfe
Add json type.
guzman-raphael Sep 10, 2022
10b0281
Add support.
guzman-raphael Sep 10, 2022
a7c2bd7
Fix styling.
guzman-raphael Sep 10, 2022
df4825a
Fix preview.
guzman-raphael Sep 10, 2022
0ec3a94
Replace with JSON_VALUE for greater functionality.
guzman-raphael Sep 13, 2022
12b27ab
Reduce tests and update changelog.
guzman-raphael Sep 13, 2022
26f703c
Disable 3.7 python test temporarily.
guzman-raphael Sep 13, 2022
5e25617
Fix projections.
guzman-raphael Sep 15, 2022
15686d6
Clean up logs and tests.
guzman-raphael Sep 15, 2022
2b3409b
Enable MySQL 5.7 tests, fix logging errors, verify json tests only ru…
guzman-raphael Sep 15, 2022
7c04f1f
Add missing space.
guzman-raphael Sep 15, 2022
b97ec62
Rename multi restriction function.
guzman-raphael Sep 16, 2022
eb77d77
Remove unneeded import.
guzman-raphael Sep 16, 2022
0dd5ceb
Update healthchecks to facilitate testing, verify unused imports, all…
guzman-raphael Sep 17, 2022
5ef3de9
Fix merge conflicts.
guzman-raphael Sep 17, 2022
d46462d
Fix unused imports.
guzman-raphael Sep 17, 2022
065a346
Allow comments in requirements.txt.
guzman-raphael Sep 17, 2022
72da4dd
Fix merge conflicts.
guzman-raphael Sep 21, 2022
405171d
Remove backslash from f-string.
guzman-raphael Sep 21, 2022
1e16e7e
Fix merge conflicts.
guzman-raphael Sep 21, 2022
cfb6469
Merge branch 'docs-styling' into json
guzman-raphael Sep 28, 2022
8008a7a
Initialize jupyter tutorial for use of json type.
guzman-raphael Sep 28, 2022
3778b9f
Merge branch 'docs-styling' into json
guzman-raphael Sep 29, 2022
6ad07ea
Merge branch 'master' of github.com:datajoint/datajoint-python into json
guzman-raphael Sep 30, 2022
0a4f193
Fix merge conflicts.
guzman-raphael Feb 2, 2023
ca235ae
Fix grammar.
guzman-raphael Feb 2, 2023
a27a176
Fix merge conflicts.
guzman-raphael Feb 7, 2023
946f65b
Fix merge conflicts.
guzman-raphael Feb 7, 2023
9126614
Update codespace doc and include default debug setting.
guzman-raphael Feb 8, 2023
713c497
Add some projection examples to json docs.
guzman-raphael Feb 8, 2023
35e0fd3
Update docs and remove deprecated features.
guzman-raphael Feb 9, 2023
661533f
Remove deprecated features in tests.
guzman-raphael Feb 9, 2023
3df217b
Fix merge conflicts.
guzman-raphael Feb 9, 2023
18ef064
Merge branch 'remove-deprecated-features' into json
guzman-raphael Feb 9, 2023
edd2f6b
Merge branch 'master' of https://github.com/datajoint/datajoint-pytho…
guzman-raphael Feb 9, 2023
317c1cf
Reduce parenthesis.
guzman-raphael Feb 9, 2023
2769d56
Simplify boolean logic.
guzman-raphael Feb 10, 2023
ce3e1d9
Apply styling to if-block.
guzman-raphael Feb 10, 2023
f81dc67
Rename restrictions -> conditions.
guzman-raphael Feb 10, 2023
477d270
Add json comment.
guzman-raphael Feb 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
## Release notes

### 0.14.0 -- TBA
* Add `json` data type
- Fix lingering prints by replacing with logs
- `table.progress()` defaults to no stdout
- `table.describe()` defaults to no stdout

### 0.13.7 -- Jul 13, 2022
* Bugfix - Fix networkx incompatable change by version pinning to 2.6.3 PR #1036 (#1035)
* Add - Support for serializing numpy datetime64 types PR #1036 (#1022)
Expand Down
4 changes: 2 additions & 2 deletions LNX-docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# docker-compose -f LNX-docker-compose.yml --env-file LNX.env up --exit-code-from app --build
# docker compose -f LNX-docker-compose.yml --env-file LNX.env up --exit-code-from app --build
version: '2.2'
x-net: &net
networks:
Expand Down Expand Up @@ -32,7 +32,7 @@ services:
interval: 1s
fakeservices.datajoint.io:
<<: *net
image: datajoint/nginx:v0.2.1
image: datajoint/nginx:v0.2.3
environment:
- ADD_db_TYPE=DATABASE
- ADD_db_ENDPOINT=db:3306
Expand Down
9 changes: 6 additions & 3 deletions datajoint/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from .connection import conn
from .settings import config
from .utils import user_choice
import logging

logger = logging.getLogger(__name__.split(".")[0])


def set_password(
Expand All @@ -13,10 +16,10 @@ def set_password(
new_password = getpass("New password: ")
confirm_password = getpass("Confirm password: ")
if new_password != confirm_password:
print("Failed to confirm the password! Aborting password change.")
logger.warn("Failed to confirm the password! Aborting password change.")
return
connection.query("SET PASSWORD = PASSWORD('%s')" % new_password)
print("Password updated.")
logger.info("Password updated.")

if update_config or (
update_config is None and user_choice("Update local setting?") == "yes"
Expand Down Expand Up @@ -81,7 +84,7 @@ def kill(restriction=None, connection=None, order_by=None): # pragma: no cover
try:
connection.query("kill %d" % pid)
except pymysql.err.InternalError:
print("Process not found")
logger.warn("Process not found")


def kill_quick(restriction=None, connection=None):
Expand Down
9 changes: 4 additions & 5 deletions datajoint/autopopulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def _populate1(
finally:
self.__class__._allow_insert = False

def progress(self, *restrictions, display=True):
def progress(self, *restrictions, display=False):
"""
Report the progress of populating the table.
:return: (remaining, total) -- numbers of tuples to be populated
Expand All @@ -323,9 +323,9 @@ def progress(self, *restrictions, display=True):
total = len(todo)
remaining = len(todo - self.target)
if display:
guzman-raphael marked this conversation as resolved.
Show resolved Hide resolved
print(
"%-20s" % self.__class__.__name__,
"Completed %d of %d (%2.1f%%) %s"
logger.info(
"%-20s" % self.__class__.__name__
+ " Completed %d of %d (%2.1f%%) %s"
% (
total - remaining,
total,
Expand All @@ -334,6 +334,5 @@ def progress(self, *restrictions, display=True):
datetime.datetime.now(), "%Y-%m-%d %H:%M:%S"
),
),
flush=True,
)
return remaining, total
100 changes: 68 additions & 32 deletions datajoint/condition.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,27 @@
import decimal
import numpy
import pandas
import json
from .errors import DataJointError

json_pattern = re.compile(r"^(?P<attr>\w+)(\.(?P<path>[\w.*\[\]]+))?(:(?P<type>\w+))?$")


def translate_attribute(key):
match = json_pattern.match(key)
if match is None:
return match, key
match = match.groupdict()
if match["path"] is None:
return match, match["attr"]
else:
return match, "JSON_VALUE(`{}`, '$.{}'{})".format(
*{
k: ((f" RETURNING {v}" if k == "type" else v) if v else "")
for k, v in match.items()
}.values()
)


class PromiscuousOperand:
"""
Expand Down Expand Up @@ -94,35 +113,59 @@ def make_condition(query_expression, condition, columns):
from .expression import QueryExpression, Aggregation, U

def prep_value(k, v):
"""prepare value v for inclusion as a string in an SQL condition"""
if query_expression.heading[k].uuid:
"""prepare SQL condition"""
key_match, k = translate_attribute(k)
if key_match["path"] is None:
k = f"`{k}`"
if (
query_expression.heading[key_match["attr"]].json
and key_match["path"] is not None
and isinstance(v, dict)
):
return f"{k}='{json.dumps(v)}'"
if v is None:
return f"{k} IS NULL"
if query_expression.heading[key_match["attr"]].uuid:
if not isinstance(v, uuid.UUID):
try:
v = uuid.UUID(v)
except (AttributeError, ValueError):
raise DataJointError(
"Badly formed UUID {v} in restriction by `{k}`".format(k=k, v=v)
)
return "X'%s'" % v.bytes.hex()
return f"{k}=X'{v.bytes.hex()}'"
if isinstance(
v, (datetime.date, datetime.datetime, datetime.time, decimal.Decimal)
v,
(
datetime.date,
datetime.datetime,
datetime.time,
decimal.Decimal,
list,
),
):
return '"%s"' % v
return f'{k}="{v}"'
if isinstance(v, str):
return '"%s"' % v.replace("%", "%%")
return "%r" % v
return f'{k}="{v.replace("%", "%%")}"'
return f"{k}={v}"

def template(restrictions, operator=None):
guzman-raphael marked this conversation as resolved.
Show resolved Hide resolved
return ("NOT (%s)" if negate else "%s") % (
guzman-raphael marked this conversation as resolved.
Show resolved Hide resolved
restrictions[0]
if len(restrictions) == 1
else f"({f') {operator} ('.join(restrictions)})"
guzman-raphael marked this conversation as resolved.
Show resolved Hide resolved
)

negate = False
while isinstance(condition, Not):
negate = not negate
condition = condition.restriction
template = "NOT (%s)" if negate else "%s"

# restrict by string
if isinstance(condition, str):
columns.update(extract_column_names(condition))
return template % condition.strip().replace(
"%", "%%"
return template(
restrictions=[condition.strip().replace("%", "%%")]
) # escape %, see issue #376

# restrict by AndList
Expand All @@ -139,7 +182,7 @@ def prep_value(k, v):
return negate # if any item is False, the whole thing is False
if not items:
return not negate # and empty AndList is True
return template % ("(" + ") AND (".join(items) + ")")
return template(restrictions=items, operator="AND")

# restriction by dj.U evaluates to True
if isinstance(condition, U):
Expand All @@ -151,23 +194,19 @@ def prep_value(k, v):

# restrict by a mapping/dict -- convert to an AndList of string equality conditions
if isinstance(condition, collections.abc.Mapping):
common_attributes = set(condition).intersection(query_expression.heading.names)
common_attributes = set(c.split(".", 1)[0] for c in condition).intersection(
query_expression.heading.names
)
if not common_attributes:
return not negate # no matching attributes -> evaluates to True
columns.update(common_attributes)
return template % (
"("
+ ") AND (".join(
"`%s`%s"
% (
k,
" IS NULL"
if condition[k] is None
else f"={prep_value(k, condition[k])}",
)
for k in common_attributes
)
+ ")"
return template(
restrictions=[
prep_value(k, v)
for k, v in condition.items()
dimitri-yatsenko marked this conversation as resolved.
Show resolved Hide resolved
if k.split(".", 1)[0] in common_attributes
],
operator="AND",
)

# restrict by a numpy record -- convert to an AndList of string equality conditions
Expand All @@ -178,12 +217,9 @@ def prep_value(k, v):
if not common_attributes:
return not negate # no matching attributes -> evaluate to True
columns.update(common_attributes)
return template % (
"("
+ ") AND (".join(
"`%s`=%s" % (k, prep_value(k, condition[k])) for k in common_attributes
)
+ ")"
return template(
restrictions=[prep_value(k, condition[k]) for k in common_attributes],
operator="AND",
)

# restrict by a QueryExpression subclass -- trigger instantiation and move on
Expand Down Expand Up @@ -231,7 +267,7 @@ def prep_value(k, v):
] # ignore False conditions
if any(item is True for item in or_list): # if any item is True, entirely True
return not negate
return template % ("(%s)" % " OR ".join(or_list)) if or_list else negate
return template(restrictions=or_list, operator="OR") if or_list else negate


def extract_column_names(sql_expression):
Expand Down
1 change: 1 addition & 0 deletions datajoint/declare.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
DECIMAL=r"(decimal|numeric)(\s*\(.+\))?(\s+unsigned)?$",
FLOAT=r"(double|float|real)(\s*\(.+\))?(\s+unsigned)?$",
STRING=r"(var)?char\s*\(.+\)$",
JSON=r"json$",
ENUM=r"enum\s*\(.+\)$",
BOOL=r"bool(ean)?$", # aliased to tinyint(1)
TEMPORAL=r"(date|datetime|time|timestamp|year)(\s*\(.+\))?$",
Expand Down
4 changes: 1 addition & 3 deletions datajoint/diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,7 @@ def make_dot(self):
if name.split(".")[0] in self.context:
cls = eval(name, self.context)
assert issubclass(cls, Table)
description = (
cls().describe(context=self.context, printout=False).split("\n")
)
description = cls().describe(context=self.context).split("\n")
description = (
"-" * 30
if q.startswith("---")
Expand Down
5 changes: 5 additions & 0 deletions datajoint/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
assert_join_compatibility,
extract_column_names,
PromiscuousOperand,
json_pattern,
translate_attribute,
)
from .declare import CONSTANT_LITERALS

Expand Down Expand Up @@ -342,6 +344,9 @@ def proj(self, *attributes, **named_attributes):
from other attributes available before the projection.
Each attribute name can only be used once.
"""
named_attributes = {
k: translate_attribute(v)[1] for k, v in named_attributes.items()
}
# new attributes in parentheses are included again with the new name without removing original
duplication_pattern = re.compile(
rf'^\s*\(\s*(?!{"|".join(CONSTANT_LITERALS)})(?P<name>[a-zA-Z_]\w*)\s*\)\s*$'
Expand Down
4 changes: 3 additions & 1 deletion datajoint/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas
import itertools
import re
import json
import numpy as np
import uuid
import numbers
Expand Down Expand Up @@ -47,6 +48,8 @@ def _get(connection, attr, data, squeeze, download_path):
"""
if data is None:
return
if attr.json:
return json.loads(data)

extern = (
connection.schemas[attr.database].external[attr.store]
Expand All @@ -59,7 +62,6 @@ def _get(connection, attr, data, squeeze, download_path):

if attr.is_filepath:
return adapt(extern.download_filepath(uuid.UUID(bytes=data))[0])

if attr.is_attachment:
# Steps:
# 1. get the attachment filename
Expand Down
13 changes: 10 additions & 3 deletions datajoint/heading.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
numeric=None,
string=None,
uuid=False,
json=None,
is_blob=False,
is_attachment=False,
is_filepath=False,
Expand Down Expand Up @@ -142,7 +143,7 @@ def non_blobs(self):
return [
k
for k, v in self.attributes.items()
if not v.is_blob and not v.is_attachment and not v.is_filepath
if not (v.is_blob or v.is_attachment or v.is_filepath or v.json)
]

@property
Expand Down Expand Up @@ -291,6 +292,7 @@ def _init_from_database(self):
),
is_blob=bool(TYPE_PATTERN["INTERNAL_BLOB"].match(attr["type"])),
uuid=False,
json=bool(TYPE_PATTERN["JSON"].match(attr["type"])),
is_attachment=False,
is_filepath=False,
adapter=None,
Expand Down Expand Up @@ -376,10 +378,15 @@ def _init_from_database(self):
)

if attr["in_key"] and any(
(attr["is_blob"], attr["is_attachment"], attr["is_filepath"])
(
attr["is_blob"],
attr["is_attachment"],
attr["is_filepath"],
attr["json"],
)
):
raise DataJointError(
"Blob, attachment, or filepath attributes are not allowed in the primary key"
"Json, Blob, attachment, or filepath attributes are not allowed in the primary key"
)

if (
Expand Down
10 changes: 6 additions & 4 deletions datajoint/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
from pathlib import Path
import re
from .utils import user_choice
import logging

logger = logging.getLogger(__name__.split(".")[0])


def migrate_dj011_external_blob_storage_to_dj012(migration_schema, store):
Expand Down Expand Up @@ -34,13 +37,13 @@ def migrate_dj011_external_blob_storage_to_dj012(migration_schema, store):
)
if do_migration:
_migrate_dj011_blob(dj.Schema(migration_schema), store)
print(
logger.info(
"Migration completed for schema: {}, store: {}.".format(
migration_schema, store
)
)
return
print("No migration performed.")
logger.warn("No migration performed.")


def _migrate_dj011_blob(schema, default_store):
Expand Down Expand Up @@ -114,8 +117,7 @@ def _migrate_dj011_blob(schema, default_store):
)
)
except:
print("Column already added")
pass
logger.warn("Column already added")

for _hash, size in zip(*legacy_external.fetch("hash", "size")):
if _hash in hashes:
Expand Down
Loading