From e7adabd8392848934f09ba33a22083c5ac799134 Mon Sep 17 00:00:00 2001 From: CBroz1 Date: Fri, 9 Dec 2022 15:53:17 -0600 Subject: [PATCH 1/5] Add support for insert Path obj to csv --- README.md | 20 ++++++++++---------- datajoint/table.py | 19 ++++++++++++++----- tests/test_university.py | 6 ++---- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 6c818865d..3c3143485 100644 --- a/README.md +++ b/README.md @@ -112,15 +112,15 @@ important DataJoint schema or records. ### API docs -The API documentation can be built using sphinx by running +The API documentation can be built with mkdocs using the docker compose file in +`docs/` with the following command: ``` bash -pip install sphinx sphinx_rtd_theme -(cd docs-api/sphinx && make html) +MODE="LIVE" PACKAGE=datajoint UPSTREAM_REPO=https://github.com/datajoint/datajoint-python.git HOST_UID=$(id -u) docker compose -f docs/docker-compose.yaml up --build ``` -Generated docs are written to `docs-api/docs/html/index.html`. -More details in [docs-api/README.md](docs-api/README.md). +The site will then be available at `http://localhost/`. When finished, be sure to run +the same command as above, but replace `up --build` with `down`. ## Running Tests Locally
@@ -141,11 +141,11 @@ HOST_GID=1000 * Add entry in `/etc/hosts` for `127.0.0.1 fakeservices.datajoint.io` * Run desired tests. Some examples are as follows: -| Use Case | Shell Code | -| ---------------------------- | ------------------------------------------------------------------------------ | -| Run all tests | `nosetests -vsw tests --with-coverage --cover-package=datajoint` | -| Run one specific class test | `nosetests -vs --tests=tests.test_fetch:TestFetch.test_getattribute_for_fetch1` | -| Run one specific basic test | `nosetests -vs --tests=tests.test_external_class:test_insert_and_fetch` | +| Use Case | Shell Code | +| ---------------------------- | ------------------------------------------------------------------------------ | +| Run all tests | `nosetests -vsw tests --with-coverage --cover-package=datajoint` | +| Run one specific class test | `nosetests -vs --tests=tests.test_fetch:TestFetch.test_getattribute_for_fetch1` | +| Run one specific basic test | `nosetests -vs --tests=tests.test_external_class:test_insert_and_fetch` | ### Launch Docker Terminal diff --git a/datajoint/table.py b/datajoint/table.py index f56d35922..56f4db99c 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -6,6 +6,7 @@ import pandas import logging import uuid +import csv import re from pathlib import Path from .settings import config @@ -345,13 +346,17 @@ def insert( """ Insert a collection of rows. - :param rows: An iterable where an element is a numpy record, a dict-like object, a - pandas.DataFrame, a sequence, or a query expression with the same heading as self. + :param rows: Either (a) an iterable where an element is a numpy record, a + dict-like object, a pandas.DataFrame, a sequence, or a query expression with + the same heading as self, or (b) a pathlib.Path object specifying a path + relative to the current directory with a CSV file, the contents of which + will be inserted. :param replace: If True, replaces the existing tuple. :param skip_duplicates: If True, silently skip duplicate inserts. - :param ignore_extra_fields: If False, fields that are not in the heading raise error. - :param allow_direct_insert: applies only in auto-populated tables. If False (default), - insert are allowed only from inside the make callback. + :param ignore_extra_fields: If False, fields that are not in the heading raise + error. + :param allow_direct_insert: applies only in auto-populated tables. If False + (default), insert are allowed only from inside the make callback. Example: @@ -366,6 +371,10 @@ def insert( drop=len(rows.index.names) == 1 and not rows.index.names[0] ).to_records(index=False) + if isinstance(rows, Path): + with open(rows, newline="") as data_file: + rows = list(csv.DictReader(data_file, delimiter=",")) + # prohibit direct inserts into auto-populated tables if not allow_direct_insert and not getattr(self, "_allow_insert", True): raise DataJointError( diff --git a/tests/test_university.py b/tests/test_university.py index f0575e41e..2d87e3f86 100644 --- a/tests/test_university.py +++ b/tests/test_university.py @@ -33,11 +33,9 @@ def test_activate(): Enroll, Grade, ): - import csv + from pathlib import Path - with open("./data/" + table.__name__ + ".csv") as f: - reader = csv.DictReader(f) - table().insert(reader) + table().insert(Path("./data/" + table.__name__ + ".csv")) def test_fill(): From 7e4a7ab7b72aacb63d77b36fcc6d8d436adafe01 Mon Sep 17 00:00:00 2001 From: CBroz1 Date: Fri, 9 Dec 2022 15:59:52 -0600 Subject: [PATCH 2/5] Adjust insert docstring --- datajoint/table.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/datajoint/table.py b/datajoint/table.py index 56f4db99c..12eea9759 100644 --- a/datajoint/table.py +++ b/datajoint/table.py @@ -353,10 +353,9 @@ def insert( will be inserted. :param replace: If True, replaces the existing tuple. :param skip_duplicates: If True, silently skip duplicate inserts. - :param ignore_extra_fields: If False, fields that are not in the heading raise - error. - :param allow_direct_insert: applies only in auto-populated tables. If False - (default), insert are allowed only from inside the make callback. + :param ignore_extra_fields: If False, fields that are not in the heading raise error. + :param allow_direct_insert: Only applies in auto-populated tables. If False (default), + insert may only be called from inside the make callback. Example: From 17ad3e5c1847bba4227ed33624742765a8072c3b Mon Sep 17 00:00:00 2001 From: CBroz1 Date: Tue, 13 Dec 2022 09:16:18 -0600 Subject: [PATCH 3/5] Update nginx version --- LNX-docker-compose.yml | 2 +- local-docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LNX-docker-compose.yml b/LNX-docker-compose.yml index c55d6f4f1..92d2aee51 100644 --- a/LNX-docker-compose.yml +++ b/LNX-docker-compose.yml @@ -32,7 +32,7 @@ services: interval: 1s fakeservices.datajoint.io: <<: *net - image: datajoint/nginx:v0.2.3 + image: datajoint/nginx:v0.2.4 environment: - ADD_db_TYPE=DATABASE - ADD_db_ENDPOINT=db:3306 diff --git a/local-docker-compose.yml b/local-docker-compose.yml index f61f9e5d4..a6baedc3e 100644 --- a/local-docker-compose.yml +++ b/local-docker-compose.yml @@ -34,7 +34,7 @@ services: interval: 1s fakeservices.datajoint.io: <<: *net - image: datajoint/nginx:v0.2.3 + image: datajoint/nginx:v0.2.4 environment: - ADD_db_TYPE=DATABASE - ADD_db_ENDPOINT=db:3306 From ccf7f7df7801edde2f53e1414e21942a4573b5ba Mon Sep 17 00:00:00 2001 From: CBroz1 Date: Tue, 13 Dec 2022 09:54:02 -0600 Subject: [PATCH 4/5] Document new insert feature --- docs/src/query-lang/common-commands.md | 85 ++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/docs/src/query-lang/common-commands.md b/docs/src/query-lang/common-commands.md index e30863c70..83f70bc3e 100644 --- a/docs/src/query-lang/common-commands.md +++ b/docs/src/query-lang/common-commands.md @@ -1,6 +1,70 @@ - - +## Insert + +Data entry is as easy as providing the appropriate data structure to a permitted table. +Given the following table definition, we can insert data as tuples, dicts, pandas +dataframes, or pathlib `Path` relative paths to local CSV files. + +```text + mouse_id: int # unique mouse id + --- + dob: date # mouse date of birth + sex: enum('M', 'F', 'U') # sex of mouse - Male, Female, or Unknown +``` + +=== "Tuple" + + ```python + mouse.insert1( (0, '2017-03-01', 'M') ) # Single entry + data = [ + (1, '2016-11-19', 'M'), + (2, '2016-11-20', 'U'), + (5, '2016-12-25', 'F') + ] + mouse.insert(data) # Multi-entry + ``` + +=== "Dict" + + ```python + mouse.insert1( dict(mouse_id=0, dob='2017-03-01', sex='M') ) # Single entry + data = [ + {'mouse_id':1, 'dob':'2016-11-19', 'sex':'M'}, + {'mouse_id':2, 'dob':'2016-11-20', 'sex':'U'}, + {'mouse_id':5, 'dob':'2016-12-25', 'sex':'F'} + ] + mouse.insert(data) # Multi-entry + ``` + +=== "Pandas" + + ```python + import pandas as pd + data = pd.DataFrame( + [[1, "2016-11-19", "M"], [2, "2016-11-20", "U"], [5, "2016-12-25", "F"]], + columns=["mouse_id", "dob", "sex"], + ) + mouse.insert(data) + ``` + +=== "CSV" + + Given the following CSV in the current working directory as `mice.csv` + + ```console + mouse_id,dob,sex + 1,2016-11-19,M + 2,2016-11-20,U + 5,2016-12-25,F + ``` + + We can import as follows: + + ```python + from pathlib import Path + mouse.insert(Path('./mice.csv')) + ``` + ## Make See the article on [`make` methods](../../reproduce/make-method/) @@ -31,8 +95,8 @@ data = query.fetch(as_dict=True) # (2) ### Separate variables ``` python -name, img = query.fetch1('name', 'image') # when query has exactly one entity -name, img = query.fetch('name', 'image') # [name, ...] [image, ...] +name, img = query.fetch1('mouse_id', 'dob') # when query has exactly one entity +name, img = query.fetch('mouse_id', 'dob') # [mouse_id, ...] [dob, ...] ``` ### Primary key values @@ -51,11 +115,10 @@ primary keys. To sort the result, use the `order_by` keyword argument. ``` python -data = query.fetch(order_by='name') # ascending order -data = query.fetch(order_by='name desc') # descending order -data = query.fetch(order_by=('name desc', 'year')) # by name first, year second -data = query.fetch(order_by='KEY') # sort by the primary key -data = query.fetch(order_by=('name', 'KEY desc')) # sort by name but for same names order by primary key +data = query.fetch(order_by='mouse_id') # ascending order +data = query.fetch(order_by='mouse_id desc') # descending order +data = query.fetch(order_by=('mouse_id', 'dob')) # by ID first, dob second +data = query.fetch(order_by='KEY') # sort by the primary key ``` The `order_by` argument can be a string specifying the attribute to sort by. By default @@ -63,7 +126,7 @@ the sort is in ascending order. Use `'attr desc'` to sort in descending order by attribute `attr`. The value can also be a sequence of strings, in which case, the sort performed on all the attributes jointly in the order specified. -The special attribute name `'KEY'` represents the primary key attributes in order that +The special attribute named `'KEY'` represents the primary key attributes in order that they appear in the index. Otherwise, this name can be used as any other argument. If an attribute happens to be a SQL reserved word, it needs to be enclosed in @@ -82,7 +145,7 @@ Similar to sorting, the `limit` and `offset` arguments can be used to limit the to a subset of entities. ``` python -data = query.fetch(order_by='name', limit=10, offset=5) +data = query.fetch(order_by='mouse_id', limit=10, offset=5) ``` Note that an `offset` cannot be used without specifying a `limit` as From 7692f3db19a88932c99056077e13bba3088b01a9 Mon Sep 17 00:00:00 2001 From: CBroz1 Date: Tue, 13 Dec 2022 10:56:13 -0600 Subject: [PATCH 5/5] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index efaa2bce1..3f7922862 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### 0.14.0 -- TBA * Bugfix - Activating a schema requires all tables to exist even if `create_tables=False` PR [#1058](https://github.com/datajoint/datajoint-python/pull/1058) * Update - Populate call with `reserve_jobs=True` to exclude `error` and `ignore` keys - PR [#1062](https://github.com/datajoint/datajoint-python/pull/1062) +* Add - Support for inserting data with CSV files - PR [#1067](https://github.com/datajoint/datajoint-python/pull/1067) ### 0.13.8 -- Sep 21, 2022 * Add - New documentation structure based on markdown PR [#1052](https://github.com/datajoint/datajoint-python/pull/1052)