From c5f1bf7e476ce6538726db5fa84441c9ddde9957 Mon Sep 17 00:00:00 2001 From: Oliver Ruebel Date: Wed, 21 Jul 2021 11:27:26 -0700 Subject: [PATCH] Add functionality for linked DynamicTables (#645) Smplify interacting with DynamicTables that reference other tables via DynamicTableRegion, creating a collection of linked tables. In the ICEphys case this is a "simple" linear hierarchy of tables, but in principle a table may contain any number of DynamicTableRegion columns. This PR adds several functions to simplify introspection of linked DynamicTables and conversion to pandas DataFrames. - [X] Fix #646 by adding ``AlignedDynamicTable.get`` - [X] Fix #651 by updating ``AlignedDynamicTable.get`` to support slicing with ``[int, (str, str)]``, ``[int, str, str]``, and ``[int, str]`` to select a single cell or row of a category table, repectively - [X] Add ``AlignedDynamicTable.get_colnames(...)`` functions to allow us to keep compliance of the ``colnames`` property with ``DynamicTable`` while providing an easy way to get the full list of column names. - [X] Set name of DataFrame in ``DynamicTable.to_dataframe()`` and ``DynamicTable.get`` - [X] Add helper functions to ``DynamicTable`` to deal with foreign columns: - [X] ``DynamicTable.get_foreign_columns`` to identify if the table contains ``DynamicTableRegion`` columns - [X] ``DynamicTable.has_foreign_columns`` to identify which columns are``DynamicTableRegion`` columns - [X] ``DynamicTable.get_linked_tables`` to retrieve all tables linked to either directly or indirectly from the current table via ``DynamicTableRegion`` - [x] Implement the same helper functions also for ``AlignedDynamicTable`` - [x] ``DynamicTable.get_foreign_columns`` to identify if the table contains ``DynamicTableRegion`` columns - [X] ``DynamicTable.has_foreign_columns`` to identify which columns are``DynamicTableRegion`` columns - [x] ``DynamicTable.get_linked_tables`` to retrieve all tables linked to either directly or indirectly from the current table via ``DynamicTableRegion`` - [X] Add new module ``hdmf.common.hierarchicaltable`` with helper functions to facilitate conversion of linked tables to a single Pandas dataframe. - [X] ``to_hierarchical_dataframe`` to merge linked tables into a single consolidated pandas DataFrame. - [X] ``drop_id_columns`` to remove "id" columns from a DataFrame. - [X] ``flatten_column_index`` to replace a ``pandas.MultiIndex`` with a regular ``pandas.Index`` - [x] Add test for DyanmicTableRegion pointing to AlignedDynamicTable to check that the all columns are used - [x] Add tests for hierarchicaltable.py for - [X] to_hierarchical_dataframe - [x] drop_id_columns - [x] flatten_column_index functions - [X] File issue tickets for open TODO items for future PRs - [X] ``to_hierarchical_dataframe`` should be updated to support resolution of more than one DynamicTableRegion column. See https://github.com/hdmf-dev/hdmf/issues/649 - [x] Add tutorial for DynamicTableRegion and how to use for linking to tables and for creating linked tables. See https://github.com/hdmf-dev/hdmf/issues/648 --- CHANGELOG.md | 24 + src/hdmf/common/alignedtable.py | 182 ++++++- src/hdmf/common/hierarchicaltable.py | 248 +++++++++ src/hdmf/common/table.py | 76 +++ src/hdmf/container.py | 6 + src/hdmf/data_utils.py | 5 + src/hdmf/utils.py | 2 + tests/unit/common/test_alignedtable.py | 93 +++- tests/unit/common/test_linkedtables.py | 716 +++++++++++++++++++++++++ 9 files changed, 1336 insertions(+), 16 deletions(-) create mode 100644 src/hdmf/common/hierarchicaltable.py create mode 100644 tests/unit/common/test_linkedtables.py diff --git a/CHANGELOG.md b/CHANGELOG.md index d4fcae78c..8b26c8654 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,32 @@ ## Upcoming (TBD) +### New features +- Added several features to simplify interaction with ``DynamicTable`` objects that link to other tables via + ``DynamicTableRegion`` columns. @oruebel (#645) + - Added ``DynamicTable.get_foreign_columns`` to find all columns in a table that are a ``DynamicTableRegion`` + - Added ``DynamicTable.has_foreign_columns`` to identify if a ``DynamicTable`` contains ``DynamicTableRegion`` columns + - Added ``DynamicTable.get_linked_tables`` to retrieve all tables linked to either directly or indirectly from + the current table via ``DynamicTableRegion`` + - Implemented the new ``get_foreign_columns``, ``has_foreign_columns``, and ``get_linked_tables`` also for + ``AlignedDynamicTable`` + - Added new module ``hdmf.common.hierarchicaltable`` with helper functions to facilitate conversion of + hierarchically nested ``DynamicTable`` objects via the following new functions: + - ``to_hierarchical_dataframe`` to merge linked tables into a single consolidated pandas DataFrame. + - ``drop_id_columns`` to remove "id" columns from a DataFrame. + - ``flatten_column_index`` to replace a ``pandas.MultiIndex`` with a regular ``pandas.Index`` + ### Bug fixes - Do not build wheels compatible with Python 2 because HDMF requires Python 3.7. @rly (#642) +- ``AlignedDynamicTable`` did not overwrite its ``get`` function. When using ``DynamicTableRegion`` to referenece ``AlignedDynamicTable`` this led to cases where the columns of the category subtables where omitted during data access (e.g., conversion to pandas.DataFrame). This fix adds the ``AlignedDynamicTable.get`` based on the existing ``AlignedDynamicTable.__getitem__``. @oruebel (#645) +- Fixed #651 to support selection of cells in an ``AlignedDynamicTable`` via slicing with ``[int, (str, str)]``(and ``[int, str, str]``) to select a single cell, and ``[int, str]`` to select a single row of a category table. @oruebel (#645) + +### Minor improvements +- Updated ``DynamicTable.to_dataframe()`` and ``DynamicTable.get`` functions to set the ``.name`` attribute + on generated pandas DataFrame objects. @oruebel (#645) +- Added ``AlignedDynamicTable.get_colnames(...)`` to support look-up of the full list of columns as the + ``AlignedDynamicTable.colnames`` property only includes the columns of the main table for compliance with + ``DynamicTable`` @oruebel (#645) - Fix documentation for `DynamicTable.get` and `DynamicTableRegion.get`. @rly (#650) ## HDMF 3.0.1 (July 7, 2021) diff --git a/src/hdmf/common/alignedtable.py b/src/hdmf/common/alignedtable.py index d051f3830..e711e1da6 100644 --- a/src/hdmf/common/alignedtable.py +++ b/src/hdmf/common/alignedtable.py @@ -20,6 +20,10 @@ class AlignedDynamicTable(DynamicTable): defines a 2-level table in which the main data is stored in the main table implemented by this type and additional columns of the table are grouped into categories, with each category being' represented by a separate DynamicTable stored within the group. + + NOTE: To remain compatible with DynamicTable, the attribute colnames represents only the + columns of the main table (not including the category tables). To get the full list of + column names, use the get_colnames() function instead. """ __fields__ = ({'name': 'category_tables', 'child': True}, ) @@ -209,6 +213,28 @@ def add_row(self, **kwargs): for category, values in category_data.items(): self.category_tables[category].add_row(**values) + @docval({'name': 'include_category_tables', 'type': bool, + 'doc': "Ignore sub-category tables and just look at the main table", 'default': False}, + {'name': 'ignore_category_ids', 'type': bool, + 'doc': "Ignore id columns of sub-category tables", 'default': False}) + def get_colnames(self, **kwargs): + """Get the full list of names of columns for this table + + :returns: List of tuples (str, str) where the first string is the name of the DynamicTable + that contains the column and the second string is the name of the column. If + include_category_tables is False, then a list of column names is returned. + """ + if not getargs('include_category_tables', kwargs): + return self.colnames + else: + ignore_category_ids = getargs('ignore_category_ids', kwargs) + columns = [(self.name, c) for c in self.colnames] + for category in self.category_tables.values(): + if not ignore_category_ids: + columns += [(category.name, 'id'), ] + columns += [(category.name, c) for c in category.colnames] + return columns + @docval({'name': 'ignore_category_ids', 'type': bool, 'doc': "Ignore id columns of sub-category tables", 'default': False}) def to_dataframe(self, **kwargs): @@ -225,21 +251,62 @@ def to_dataframe(self, **kwargs): def __getitem__(self, item): """ - :param item: Selection defining the items of interest. This may be a + Called to implement standard array slicing syntax. - * **int, list, array, slice** : Return one or multiple row of the table as a DataFrame - * **string** : Return a single category table as a DynamicTable or a single column of the - primary table as a - * **tuple**: Get a column, row, or cell from a particular category. The tuple is expected to consist - of (category, selection) where category may be a string with the name of the sub-category - or None (or the name of this AlignedDynamicTable) if we want to slice into the main table. + Same as ``self.get(item)``. See :py:meth:`~hdmf.common.alignedtable.AlignedDynamicTable.get` for details. + """ + return self.get(item) - :returns: DataFrame when retrieving a row or category. Returns scalar when selecting a cell. - Returns a VectorData/VectorIndex when retrieving a single column. + def get(self, item, **kwargs): + """ + Access elements (rows, columns, category tables etc.) from the table. Instead of calling + this function directly, the class also implements standard array slicing syntax + via :py:meth:`~hdmf.common.alignedtable.AlignedDynamicTable.__getitem__` + (which calls this function). For example, instead of calling + ``self.get(item=slice(2,5))`` we may use the often more convenient form of ``self[2:5]`` instead. + + :param item: Selection defining the items of interest. This may be either a: + + * **int, list, array, slice** : Return one or multiple row of the table as a pandas.DataFrame. For example: + * ``self[0]`` : Select the first row of the table + * ``self[[0,3]]`` : Select the first and fourth row of the table + * ``self[1:4]`` : Select the rows with index 1,2,3 from the table + + * **string** : Return a column from the main table or a category table. For example: + * ``self['column']`` : Return the column from the main table. + * ``self['my_category']`` : Returns a DataFrame of the ``my_category`` category table. + This is a shorthand for ``self.get_category('my_category').to_dataframe()``. + + * **tuple**: Get a column, row, or cell from a particular category table. + The tuple is expected to consist of the following elements: + + * ``category``: string with the name of the category. To select from the main + table use ``self.name`` or ``None``. + * ``column``: string with the name of the column, and + * ``row``: integer index of the row. + + The tuple itself then may take the following forms: + + * Select a single column from a table via: + * ``self[category, column]`` + * Select a single full row of a given category table via: + * ``self[row, category]`` (recommended, for consistency with DynamicTable) + * ``self[category, row]`` + * Select a single cell via: + * ``self[row, (category, column)]`` (recommended, for consistency with DynamicTable) + * ``self[row, category, column]`` + * ``self[category, column, row]`` + + :returns: Depending on the type of selection the function returns a: + + * **pandas.DataFrame**: when retrieving a row or category table + * **array** : when retrieving a single column + * **single value** : when retrieving a single cell. The data type and shape will depend on the + data type and shape of the cell/column. """ if isinstance(item, (int, list, np.ndarray, slice)): # get a single full row from all tables - dfs = ([super().__getitem__(item).reset_index(), ] + + dfs = ([super().get(item, **kwargs).reset_index(), ] + [category[item].reset_index() for category in self.category_tables.values()]) names = [self.name, ] + list(self.category_tables.keys()) res = pd.concat(dfs, axis=1, keys=names) @@ -248,14 +315,101 @@ def __getitem__(self, item): elif isinstance(item, str) or item is None: if item in self.colnames: # get a specific column - return super().__getitem__(item) + return super().get(item, **kwargs) else: # get a single category return self.get_category(item).to_dataframe() elif isinstance(item, tuple): if len(item) == 2: - return self.get_category(item[0])[item[1]] + # DynamicTable allows selection of cells via the syntax [int, str], i.e,. [row_index, columnname] + # We support this syntax here as well with the additional caveat that in AlignedDynamicTable + # columns are identified by tuples of strings. As such [int, str] refers not to a cell but + # a single row in a particular category table (i.e., [row_index, category]). To select a cell + # the second part of the item then is a tuple of strings, i.e., [row_index, (category, column)] + if isinstance(item[0], (int, np.integer)): + # Select a single cell or row of a sub-table based on row-index(item[0]) + # and the category (if item[1] is a string) or column (if item[1] is a tuple of (category, column) + re = self[item[0]][item[1]] + # re is a pandas.Series or pandas.Dataframe. If we selected a single cell + # (i.e., item[2] was a tuple defining a particular column) then return the value of the cell + if re.size == 1: + re = re.values[0] + # If we selected a single cell from a ragged column then we need to change the list to a tuple + if isinstance(re, list): + re = tuple(re) + # We selected a row of a whole table (i.e., item[2] identified only the category table, + # but not a particular column). + # Change the result from a pandas.Series to a pandas.DataFrame for consistency with DynamicTable + if isinstance(re, pd.Series): + re = re.to_frame() + return re + else: + return self.get_category(item[0])[item[1]] elif len(item) == 3: - return self.get_category(item[0])[item[1]][item[2]] + if isinstance(item[0], (int, np.integer)): + return self.get_category(item[1])[item[2]][item[0]] + else: + return self.get_category(item[0])[item[1]][item[2]] else: - raise ValueError("Expected tuple of length 2 or 3 with (category, column, row) as value.") + raise ValueError("Expected tuple of length 2 of the form [category, column], [row, category], " + "[row, (category, column)] or a tuple of length 3 of the form " + "[category, column, row], [row, category, column]") + + @docval({'name': 'ignore_category_tables', 'type': bool, + 'doc': "Ignore the category tables and only check in the main table columns", 'default': False}, + allow_extra=False) + def has_foreign_columns(self, **kwargs): + """ + Does the table contain DynamicTableRegion columns + + :returns: True if the table or any of the category tables contains a DynamicTableRegion column, else False + """ + ignore_category_tables = getargs('ignore_category_tables', kwargs) + if super().has_foreign_columns(): + return True + if not ignore_category_tables: + for table in self.category_tables.values(): + if table.has_foreign_columns(): + return True + return False + + @docval({'name': 'ignore_category_tables', 'type': bool, + 'doc': "Ignore the category tables and only check in the main table columns", 'default': False}, + allow_extra=False) + def get_foreign_columns(self, **kwargs): + """ + Determine the names of all columns that link to another DynamicTable, i.e., + find all DynamicTableRegion type columns. Similar to a foreign key in a + database, a DynamicTableRegion column references elements in another table. + + :returns: List of tuples (str, str) where the first string is the name of the + category table (or None if the column is in the main table) and the + second string is the column name. + """ + ignore_category_tables = getargs('ignore_category_tables', kwargs) + col_names = [(None, col_name) for col_name in super().get_foreign_columns()] + if not ignore_category_tables: + for table in self.category_tables.values(): + col_names += [(table.name, col_name) for col_name in table.get_foreign_columns()] + return col_names + + @docval(*get_docval(DynamicTable.get_linked_tables), + {'name': 'ignore_category_tables', 'type': bool, + 'doc': "Ignore the category tables and only check in the main table columns", 'default': False}, + allow_extra=False) + def get_linked_tables(self, **kwargs): + """ + Get a list of the full list of all tables that are being linked to directly or indirectly + from this table via foreign DynamicTableColumns included in this table or in any table that + can be reached through DynamicTableRegion columns + + + Returns: List of dicts with the following keys: + * 'source_table' : The source table containing the DynamicTableRegion column + * 'source_column' : The relevant DynamicTableRegion column in the 'source_table' + * 'target_table' : The target DynamicTable; same as source_column.table. + + """ + ignore_category_tables = getargs('ignore_category_tables', kwargs) + other_tables = None if ignore_category_tables else list(self.category_tables.values()) + return super().get_linked_tables(other_tables=other_tables) diff --git a/src/hdmf/common/hierarchicaltable.py b/src/hdmf/common/hierarchicaltable.py new file mode 100644 index 000000000..fbcca4517 --- /dev/null +++ b/src/hdmf/common/hierarchicaltable.py @@ -0,0 +1,248 @@ +""" +Module providing additional functionality for dealing with hierarchically nested tables, i.e., +tables containing DynamicTableRegion references. +""" +import pandas as pd +import numpy as np +from hdmf.common.table import DynamicTable, DynamicTableRegion, VectorIndex +from hdmf.common.alignedtable import AlignedDynamicTable +from hdmf.utils import docval, getargs + + +@docval({'name': 'dynamic_table', 'type': DynamicTable, + 'doc': 'DynamicTable object to be converted to a hierarchical pandas.Dataframe'}, + returns="Hierarchical pandas.DataFrame with usually a pandas.MultiIndex on both the index and columns.", + rtype='pandas.DataFrame', + is_method=False) +def to_hierarchical_dataframe(dynamic_table): + """ + Create a hierarchical pandas.DataFrame that represents all data from a collection of linked DynamicTables. + + **LIMITATIONS:** Currently this function only supports DynamicTables with a single DynamicTableRegion column. + If a table has more than one DynamicTableRegion column then the function will expand only the + first DynamicTableRegion column found for each table. Any additional DynamicTableRegion columns will remain + nested. + + **NOTE:** Some useful functions for further processing of the generated + DataFrame include: + + * pandas.DataFrame.reset_index to turn the data from the pandas.MultiIndex into columns + * :py:meth:`~hdmf.common.hierarchicaltable.drop_id_columns` to remove all 'id' columns + * :py:meth:`~hdmf.common.hierarchicaltable.flatten_column_index` to flatten the column index + """ + # TODO: Need to deal with the case where we have more than one DynamicTableRegion column in a given table + # Get the references column + foreign_columns = dynamic_table.get_foreign_columns() + # if table does not contain any DynamicTableRegion columns then we can just convert it to a dataframe + if len(foreign_columns) == 0: + return dynamic_table.to_dataframe() + hcol_name = foreign_columns[0] # We only denormalize the first foreign column for now + hcol = dynamic_table[hcol_name] # Either a VectorIndex pointing to a DynamicTableRegion or a DynamicTableRegion + # Get the target DynamicTable that hcol is pointing to. If hcol is a VectorIndex then we first need + # to get the target of it before we look up the table. + hcol_target = hcol.table if isinstance(hcol, DynamicTableRegion) else hcol.target.table + + # Create the data variables we need to collect the data for our output dataframe and associated index + index = [] + data = [] + columns = None + index_names = None + + # First we here get a list of DataFrames, one for each row of the column we need to process. + # If hcol is a VectorIndex (i.e., our column is a ragged array of row indices), then simply loading + # the data from the VectorIndex will do the trick. If we have a regular DynamicTableRegion column, + # then we need to load the elements ourselves (using slice syntax to make sure we get DataFrames) + # one-row-at-a-time + if isinstance(hcol, VectorIndex): + rows = hcol.get(slice(None), index=False, df=True) + else: + rows = [hcol[i:(i+1)] for i in range(len(hcol))] + # Retrieve the columns we need to iterate over from our input table. For AlignedDynamicTable we need to + # use the get_colnames function instead of the colnames property to ensure we get all columns not just + # the columns from the main table + dynamic_table_colnames = (dynamic_table.get_colnames(include_category_tables=True, ignore_category_ids=False) + if isinstance(dynamic_table, AlignedDynamicTable) + else dynamic_table.colnames) + + # Case 1: Our DynamicTableRegion column points to a DynamicTable that itself does not contain + # any DynamicTableRegion references (i.e., we have reached the end of our table hierarchy). + # If this is the case than we need to de-normalize the data and flatten the hierarchy + if not hcol_target.has_foreign_columns(): + # Iterate over all rows, where each row is described by a DataFrame with one-or-more rows + for row_index, row_df in enumerate(rows): + # Since each row contains a pandas.DataFrame (with possible multiple rows), we + # next need to iterate over all rows in that table to denormalize our data + for row in row_df.itertuples(index=True): + # Determine the column data for our row. Each selected row from our target table + # becomes a row in our flattened table + data.append(row) + # Determine the multi-index tuple for our row, consisting of: i) id of the row in this + # table, ii) all columns (except the hierarchical column we are flattening), and + # iii) the index (i.e., id) from our target row + index_data = ([dynamic_table.id[row_index], ] + + [dynamic_table[row_index, colname] + for colname in dynamic_table_colnames if colname != hcol_name]) + index.append(tuple(index_data)) + + # Determine the names for our index and columns of our output table + # We need to do this even if our table was empty (i.e. even is len(rows)==0) + # NOTE: While for a regular DynamicTable the "colnames" property will give us the full list of column names, + # for AlignedDynamicTable we need to use the get_colnames() function instead to make sure we include + # the category table columns as well. + index_names = ([(dynamic_table.name, 'id')] + + [(dynamic_table.name, colname) + for colname in dynamic_table_colnames if colname != hcol_name]) + # Determine the name of our columns + hcol_iter_columns = (hcol_target.get_colnames(include_category_tables=True, ignore_category_ids=False) + if isinstance(hcol_target, AlignedDynamicTable) + else hcol_target.colnames) + columns = pd.MultiIndex.from_tuples([(hcol_target.name, 'id'), ] + + [(hcol_target.name, c) for c in hcol_iter_columns], + names=('source_table', 'label')) + + # Case 2: Our DynamicTableRegion columns points to another table with a DynamicTableRegion, i.e., + # we need to recursively resolve more levels of the table hieararchy + else: + # First we need to recursively flatten the hierarchy by calling 'to_hierarchical_dataframe()' + # (i.e., this function) on the target of our hierarchical column + hcol_hdf = to_hierarchical_dataframe(hcol_target) + # Iterate over all rows, where each row is described by a DataFrame with one-or-more rows + for row_index, row_df_level1 in enumerate(rows): + # Since each row contains a pandas.DataFrame (with possible multiple rows), we + # next need to iterate over all rows in that table to denormalize our data + for row_df_level2 in row_df_level1.itertuples(index=True): + # Since our target is itself a a DynamicTable with a DynamicTableRegion columns, + # each target row itself may expand into multiple rows in the flattened hcol_hdf. + # So we now need to look up the rows in hcol_hdf that correspond to the rows in + # row_df_level2. + # NOTE: In this look-up we assume that the ids (and hence the index) of + # each row in the table are in fact unique. + for row_tuple_level3 in hcol_hdf.loc[[row_df_level2[0]]].itertuples(index=True): + # Determine the column data for our row. + data.append(row_tuple_level3[1:]) + # Determine the multi-index tuple for our row, + index_data = ([dynamic_table.id[row_index], ] + + [dynamic_table[row_index, colname] + for colname in dynamic_table_colnames if colname != hcol_name] + + list(row_tuple_level3[0])) + index.append(tuple(index_data)) + # Determine the names for our index and columns of our output table + # We need to do this even if our table was empty (i.e. even is len(rows)==0) + index_names = ([(dynamic_table.name, "id")] + + [(dynamic_table.name, colname) + for colname in dynamic_table_colnames if colname != hcol_name] + + hcol_hdf.index.names) + columns = hcol_hdf.columns + + # Construct the pandas dataframe with the hierarchical multi-index + multi_index = pd.MultiIndex.from_tuples(index, names=index_names) + out_df = pd.DataFrame(data=data, index=multi_index, columns=columns) + return out_df + + +def __get_col_name(col): + """ + Internal helper function to get the actual name of a pandas DataFrame column from a + column name that may consists of an arbitrary sequence of tuples. The function + will return the last value of the innermost tuple. + """ + curr_val = col + while isinstance(curr_val, tuple): + curr_val = curr_val[-1] + return curr_val + + +def __flatten_column_name(col): + """ + Internal helper function used to iteratively flatten a nested tuple + + :param col: Column name to flatten + :type col: Tuple or String + + :returns: If col is a tuple then the result is a flat tuple otherwise col is returned as is + """ + if isinstance(col, tuple): + re = col + while np.any([isinstance(v, tuple) for v in re]): + temp = [] + for v in re: + if isinstance(v, tuple): + temp += list(v) + else: + temp += [v, ] + re = temp + return tuple(re) + else: + return col + + +@docval({'name': 'dataframe', 'type': pd.DataFrame, + 'doc': 'Pandas dataframe to update (usually generated by the to_hierarchical_dataframe function)'}, + {'name': 'inplace', 'type': (bool, np.bool), 'doc': 'Update the dataframe inplace or return a modified copy', + 'default': False}, + returns="pandas.DataFrame with the id columns removed", + rtype='pandas.DataFrame', + is_method=False) +def drop_id_columns(**kwargs): + """ + Drop all columns named 'id' from the table. + + In case a column name is a tuple the function will drop any column for which + the inner-most name is 'id'. The 'id' columns of DynamicTable is in many cases + not necessary for analysis or display. This function allow us to easily filter + all those columns. + + :raises TypeError: In case that dataframe parameter is not a pandas.Dataframe. + """ + dataframe, inplace = getargs('dataframe', 'inplace', kwargs) + col_name = 'id' + drop_labels = [] + for col in dataframe.columns: + if __get_col_name(col) == col_name: + drop_labels.append(col) + re = dataframe.drop(labels=drop_labels, axis=1, inplace=inplace) + return dataframe if inplace else re + + +@docval({'name': 'dataframe', 'type': pd.DataFrame, + 'doc': 'Pandas dataframe to update (usually generated by the to_hierarchical_dataframe function)'}, + {'name': 'max_levels', 'type': (int, np.integer), + 'doc': 'Maximum number of levels to use in the resulting column Index. NOTE: When ' + 'limiting the number of levels the function simply removes levels from the ' + 'beginning. As such, removing levels may result in columns with duplicate names.' + 'Value must be >0.', + 'default': None}, + {'name': 'inplace', 'type': (bool, np.bool), 'doc': 'Update the dataframe inplace or return a modified copy', + 'default': False}, + returns="pandas.DataFrame with a regular pandas.Index columns rather and a pandas.MultiIndex", + rtype='pandas.DataFrame', + is_method=False) +def flatten_column_index(**kwargs): + """ + Flatten the column index of a pandas DataFrame. + + The functions changes the dataframe.columns from a pandas.MultiIndex to a normal Index, + with each column usually being identified by a tuple of strings. This function is + typically used in conjunction with DataFrames generated + by :py:meth:`~hdmf.common.hierarchicaltable.to_hierarchical_dataframe` + + :raises ValueError: In case the num_levels is not >0 + :raises TypeError: In case that dataframe parameter is not a pandas.Dataframe. + """ + dataframe, max_levels, inplace = getargs('dataframe', 'max_levels', 'inplace', kwargs) + if max_levels is not None and max_levels <= 0: + raise ValueError('max_levels must be greater than 0') + # Compute the new column names + col_names = [__flatten_column_name(col) for col in dataframe.columns.values] + # Apply the max_levels filter. Make sure to do this only for columns that are actually tuples + # in order not to accidentally shorten the actual string name of columns + if max_levels is None: + select_levels = slice(None) + elif max_levels == 1: + select_levels = -1 + else: # max_levels > 1 + select_levels = slice(-max_levels, None) + col_names = [col[select_levels] if isinstance(col, tuple) else col for col in col_names] + re = dataframe if inplace else dataframe.copy() + re.columns = col_names + return re diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index b1169a77a..b090c14fc 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -47,9 +47,21 @@ def add_row(self, **kwargs): self.append(val) def get(self, key, **kwargs): + """ + Retrieve elements from this VectorData + + :param key: Selection of the elements + :param **kwargs: Ignored + """ return super().get(key) def extend(self, ar, **kwargs): + """Add all elements of the iterable arg to the end of this VectorData. + + Each subclass of VectorData should have its own extend method to ensure functionality and efficiency. + + :param arg: The iterable to add to the end of this VectorData + """ ################################################################################# # Each subclass of VectorData should have its own extend method to ensure # functionality AND efficiency of the extend operation. However, because currently @@ -927,6 +939,7 @@ def __get_selection_as_df_single_row(self, coldata): else: # scalar, don't wrap df_input[k] = coldata[k] ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index)) + ret.name = self.name return ret def __get_selection_as_df(self, coldata): @@ -951,6 +964,7 @@ def __get_selection_as_df(self, coldata): else: df_input[k] = coldata[k] ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index)) + ret.name = self.name return ret def __contains__(self, val): @@ -959,6 +973,68 @@ def __contains__(self, val): """ return val in self.__colids or val in self.__indices + def get_foreign_columns(self): + """ + Determine the names of all columns that link to another DynamicTable, i.e., + find all DynamicTableRegion type columns. Similar to a foreign key in a + database, a DynamicTableRegion column references elements in another table. + + :returns: List of strings with the column names + """ + col_names = [] + for col_index, col in enumerate(self.columns): + if isinstance(col, DynamicTableRegion): + col_names.append(col.name) + return col_names + + def has_foreign_columns(self): + """ + Does the table contain DynamicTableRegion columns + + :returns: True if the table contains a DynamicTableRegion column, else False + """ + for col_index, col in enumerate(self.columns): + if isinstance(col, DynamicTableRegion): + return True + return False + + @docval({'name': 'other_tables', 'type': (list, tuple, set), + 'doc': "List of additional tables to consider in the search. Usually this " + "parameter is used for internal purposes, e.g., when we need to " + "consider AlignedDynamicTable", 'default': None}, + allow_extra=False) + def get_linked_tables(self, **kwargs): + """ + Get a list of the full list of all tables that are being linked to directly or indirectly + from this table via foreign DynamicTableColumns included in this table or in any table that + can be reached through DynamicTableRegion columns + + Returns: List of dicts with the following keys: + * 'source_table' : The source table containing the DynamicTableRegion column + * 'source_column' : The relevant DynamicTableRegion column in the 'source_table' + * 'target_table' : The target DynamicTable; same as source_column.table. + """ + curr_tables = [self, ] # Set of tables + other_tables = getargs('other_tables', kwargs) + if other_tables is not None: + curr_tables += other_tables + curr_index = 0 + foreign_cols = [] + while curr_index < len(curr_tables): + for col_index, col in enumerate(curr_tables[curr_index].columns): + if isinstance(col, DynamicTableRegion): + foreign_cols.append({'source_table': curr_tables[curr_index], + 'source_column': col, + 'target_table': col.table}) + curr_table_visited = False + for t in curr_tables: + if t is col.table: + curr_table_visited = True + if not curr_table_visited: + curr_tables.append(col.table) + curr_index += 1 + return foreign_cols + @docval({'name': 'exclude', 'type': set, 'doc': 'Set of column names to exclude from the dataframe', 'default': None}, {'name': 'index', 'type': bool, diff --git a/src/hdmf/container.py b/src/hdmf/container.py index a7d56dd64..67a5fbf4a 100644 --- a/src/hdmf/container.py +++ b/src/hdmf/container.py @@ -550,6 +550,12 @@ def append(self, arg): self.__data = append_data(self.__data, arg) def extend(self, arg): + """ + The extend_data method adds all the elements of the iterable arg to the + end of the data of this Data container. + + :param arg: The iterable to add to the end of this VectorData + """ self.__data = extend_data(self.__data, arg) diff --git a/src/hdmf/data_utils.py b/src/hdmf/data_utils.py index 93856a040..5c99687e4 100644 --- a/src/hdmf/data_utils.py +++ b/src/hdmf/data_utils.py @@ -27,6 +27,11 @@ def append_data(data, arg): def extend_data(data, arg): + """Add all the elements of the iterable arg to the end of data. + + :param data: The array to extend + :type data: list, DataIO, np.ndarray, h5py.Dataset + """ if isinstance(data, (list, DataIO)): data.extend(arg) return data diff --git a/src/hdmf/utils.py b/src/hdmf/utils.py index 771b0f248..fe85d73f0 100644 --- a/src/hdmf/utils.py +++ b/src/hdmf/utils.py @@ -492,6 +492,8 @@ def foo(self, **kwargs): :param is_method: True if this is decorating an instance or class method, False otherwise (Default=True) :param enforce_shape: Enforce the dimensions of input arrays (Default=True) :param validator: :py:func:`dict` objects specifying the method parameters + :param allow_extra: Allow extra arguments (Default=False) + :param allow_positional: Allow positional arguments (Default=True) :param options: additional options for documenting and validating method parameters ''' enforce_type = options.pop('enforce_type', True) diff --git a/tests/unit/common/test_alignedtable.py b/tests/unit/common/test_alignedtable.py index 0d83ac684..74e436d5b 100644 --- a/tests/unit/common/test_alignedtable.py +++ b/tests/unit/common/test_alignedtable.py @@ -3,13 +3,19 @@ import warnings from hdmf.backends.hdf5 import HDF5IO -from hdmf.common import DynamicTable, VectorData, get_manager, AlignedDynamicTable +from hdmf.common import DynamicTable, VectorData, get_manager, AlignedDynamicTable, DynamicTableRegion from hdmf.testing import TestCase, remove_test_file class TestAlignedDynamicTableContainer(TestCase): """ Test the AlignedDynamicTable Container class. + + NOTE: Functions specific to linked tables, specifically the: + * has_foreign_columns + * get_foreign_columns + * get_linked_tables + methods are tested in the test_linkedtables.TestLinkedAlignedDynamicTables class instead of here. """ def setUp(self): warnings.simplefilter("always") # Trigger all warnings @@ -410,10 +416,22 @@ def test_get_item(self): self.assertListEqual(temp['test1', 'c1'][:].tolist(), (np.arange(num_rows) + 3).tolist()) # Test getting a specific cell self.assertEqual(temp[None, 'main_c1', 1], 3) + self.assertEqual(temp[1, None, 'main_c1'], 3) # Test bad selection tuple with self.assertRaisesWith(ValueError, - "Expected tuple of length 2 or 3 with (category, column, row) as value."): + "Expected tuple of length 2 of the form [category, column], [row, category], " + "[row, (category, column)] or a tuple of length 3 of the form " + "[category, column, row], [row, category, column]"): temp[('main_c1',)] + # Test selecting a single cell or row of a category table by having a + # [int, str] or [int, (str, str)] type selection + # Select row 0 from category 'test1' + re = temp[0, 'test1'] + self.assertListEqual(re.columns.to_list(), ['id', 'c1', 'c2']) + self.assertListEqual(re.index.names, [('test_aligned_table', 'id')]) + self.assertListEqual(re.values.tolist()[0], [0, 3, 4]) + # Select a single cell from a columm + self.assertEqual(temp[1, ('test_aligned_table', 'main_c1')], 3) def test_to_dataframe(self): """Test that the to_dataframe method works""" @@ -497,3 +515,74 @@ def test_nested_aligned_dynamic_table_not_allowed(self): msg = "Category is an AlignedDynamicTable. Nesting of AlignedDynamicTable is currently not supported." with self.assertRaisesWith(ValueError, msg): adt.add_category(adt_category) + + def test_dynamictable_region_to_aligneddynamictable(self): + """ + Test to ensure data is being retrieved correctly when pointing to an AlignedDynamicTable. + In particular, make sure that all columns are being used, including those of the + category tables, not just the ones from the main table. + """ + temp_table = DynamicTable(name='t1', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + VectorData(name='c2', description='c2', data=np.arange(4))]) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + VectorData(name='a2', description='c1', data=np.arange(4))]) + dtr = DynamicTableRegion(name='test', description='test', data=np.arange(4), table=temp_aligned_table) + dtr_df = dtr[:] + # Full number of rows + self.assertEqual(len(dtr_df), 4) + # Test num columns: 2 columns from the main table, 2 columns from the category, 1 id columns from the category + self.assertEqual(len(dtr_df.columns), 5) + # Test that the data is correct + for i, v in enumerate([('my_aligned_table', 'a1'), ('my_aligned_table', 'a2'), + ('t1', 'id'), ('t1', 'c1'), ('t1', 'c2')]): + self.assertTupleEqual(dtr_df.columns[i], v) + # Test the column data + for c in dtr_df.columns: + self.assertListEqual(dtr_df[c].to_list(), list(range(4))) + + def test_get_colnames(self): + """ + Test the AlignedDynamicTable.get_colnames function + """ + category_names = ['test1', 'test2', 'test3'] + num_rows = 10 + categories = [DynamicTable(name=val, + description=val+" description", + columns=[VectorData(name=t, + description=val+t+' description', + data=np.arange(num_rows)) for t in ['c1', 'c2', 'c3']] + ) for val in category_names] + adt = AlignedDynamicTable( + name='test_aligned_table', + description='Test aligned container', + category_tables=categories, + columns=[VectorData(name='main_' + t, + description='main_'+t+'_description', + data=np.arange(num_rows)) for t in ['c1', 'c2', 'c3']]) + # Default, only get the colnames of the main table. Same as adt.colnames property + expected_colnames = ('main_c1', 'main_c2', 'main_c3') + self.assertTupleEqual(adt.get_colnames(), expected_colnames) + # Same as default because if we don't include the catgories than ignore_category_ids has no effect + self.assertTupleEqual(adt.get_colnames(include_category_tables=False, ignore_category_ids=True), + expected_colnames) + # Full set of columns + expected_colnames = [('test_aligned_table', 'main_c1'), ('test_aligned_table', 'main_c2'), + ('test_aligned_table', 'main_c3'), ('test1', 'id'), ('test1', 'c1'), + ('test1', 'c2'), ('test1', 'c3'), ('test2', 'id'), ('test2', 'c1'), + ('test2', 'c2'), ('test2', 'c3'), ('test3', 'id'), ('test3', 'c1'), + ('test3', 'c2'), ('test3', 'c3')] + self.assertListEqual(adt.get_colnames(include_category_tables=True, ignore_category_ids=False), + expected_colnames) + # All columns without the id columns of the category tables + expected_colnames = [('test_aligned_table', 'main_c1'), ('test_aligned_table', 'main_c2'), + ('test_aligned_table', 'main_c3'), ('test1', 'c1'), ('test1', 'c2'), + ('test1', 'c3'), ('test2', 'c1'), ('test2', 'c2'), ('test2', 'c3'), + ('test3', 'c1'), ('test3', 'c2'), ('test3', 'c3')] + self.assertListEqual(adt.get_colnames(include_category_tables=True, ignore_category_ids=True), + expected_colnames) diff --git a/tests/unit/common/test_linkedtables.py b/tests/unit/common/test_linkedtables.py new file mode 100644 index 000000000..14db1c471 --- /dev/null +++ b/tests/unit/common/test_linkedtables.py @@ -0,0 +1,716 @@ +""" +Module for testing functions specific to tables containing DynamicTableRegion columns +""" + +import numpy as np +from hdmf.common import DynamicTable, AlignedDynamicTable, VectorData, DynamicTableRegion, VectorIndex +from hdmf.testing import TestCase +from hdmf.utils import docval, popargs, get_docval, call_docval_func +from hdmf.common.hierarchicaltable import to_hierarchical_dataframe, drop_id_columns, flatten_column_index +from pandas.testing import assert_frame_equal + + +class DynamicTableSingleDTR(DynamicTable): + """Test table class that references a single foreign table""" + __columns__ = ( + {'name': 'child_table_ref1', + 'description': 'Column with a references to the next level in the hierarchy', + 'required': True, + 'index': True, + 'table': True}, + ) + + @docval({'name': 'name', 'type': str, 'doc': 'The name of the table'}, + {'name': 'child_table1', + 'type': DynamicTable, + 'doc': 'the child DynamicTable this DynamicTableSingleDTR point to.'}, + *get_docval(DynamicTable.__init__, 'id', 'columns', 'colnames')) + def __init__(self, **kwargs): + # Define default name and description settings + kwargs['description'] = (kwargs['name'] + " DynamicTableSingleDTR") + # Initialize the DynamicTable + call_docval_func(super(DynamicTableSingleDTR, self).__init__, kwargs) + if self['child_table_ref1'].target.table is None: + self['child_table_ref1'].target.table = popargs('child_table1', kwargs) + + +class DynamicTableMultiDTR(DynamicTable): + """Test table class that references multiple related tables""" + __columns__ = ( + {'name': 'child_table_ref1', + 'description': 'Column with a references to the next level in the hierarchy', + 'required': True, + 'index': True, + 'table': True}, + {'name': 'child_table_ref2', + 'description': 'Column with a references to the next level in the hierarchy', + 'required': True, + 'index': True, + 'table': True}, + ) + + @docval({'name': 'name', 'type': str, 'doc': 'The name of the table'}, + {'name': 'child_table1', + 'type': DynamicTable, + 'doc': 'the child DynamicTable this DynamicTableSingleDTR point to.'}, + {'name': 'child_table2', + 'type': DynamicTable, + 'doc': 'the child DynamicTable this DynamicTableSingleDTR point to.'}, + *get_docval(DynamicTable.__init__, 'id', 'columns', 'colnames')) + def __init__(self, **kwargs): + # Define default name and description settings + kwargs['description'] = (kwargs['name'] + " DynamicTableSingleDTR") + # Initialize the DynamicTable + call_docval_func(super(DynamicTableMultiDTR, self).__init__, kwargs) + if self['child_table_ref1'].target.table is None: + self['child_table_ref1'].target.table = popargs('child_table1', kwargs) + if self['child_table_ref2'].target.table is None: + self['child_table_ref2'].target.table = popargs('child_table2', kwargs) + + +class TestLinkedAlignedDynamicTables(TestCase): + """ + Test functionality specific to AlignedDynamicTables containing DynamicTableRegion columns. + + Since these functions only implements front-end convenient functions for DynamicTable + we do not need to worry about I/O here (that is tested elsewere), but it is sufficient if + we test with container class. The only time I/O becomes relevant is on read in case that, e.g., a + h5py.Dataset may behave differently than a numpy array. + """ + def setUp(self): + """ + Create basic set of linked tables consisting of + + aligned_table + | + +--> category0 ---> table_level_0_0 + | + +--> category1 ---> table_level_0_1 + """ + # Level 0 0 table. I.e., first table on level 0 + self.table_level0_0 = DynamicTable(name='level0_0', description="level0_0 DynamicTable") + self.table_level0_0.add_row(id=10) + self.table_level0_0.add_row(id=11) + self.table_level0_0.add_row(id=12) + self.table_level0_0.add_row(id=13) + self.table_level0_0.add_column(data=['tag1', 'tag2', 'tag2', 'tag1', 'tag3', 'tag4', 'tag5'], + name='tags', + description='custom tags', + index=[1, 2, 4, 7]) + self.table_level0_0.add_column(data=np.arange(4), + name='myid', + description='custom ids', + index=False) + # Level 0 1 table. I.e., second table on level 0 + self.table_level0_1 = DynamicTable(name='level0_1', description="level0_1 DynamicTable") + self.table_level0_1.add_row(id=14) + self.table_level0_1.add_row(id=15) + self.table_level0_1.add_row(id=16) + self.table_level0_1.add_row(id=17) + self.table_level0_1.add_column(data=['tag1', 'tag1', 'tag2', 'tag2', 'tag3', 'tag3', 'tag4'], + name='tags', + description='custom tags', + index=[2, 4, 6, 7]) + self.table_level0_1.add_column(data=np.arange(4), + name='myid', + description='custom ids', + index=False) + + # category 0 table + self.category0 = DynamicTableSingleDTR(name='category0', child_table1=self.table_level0_0) + self.category0.add_row(id=0, child_table_ref1=[0, ]) + self.category0.add_row(id=1, child_table_ref1=[1, 2]) + self.category0.add_row(id=1, child_table_ref1=[3, ]) + self.category0.add_column(data=[10, 11, 12], + name='filter', + description='filter value', + index=False) + + # category 1 table + self.category1 = DynamicTableSingleDTR(name='category1', child_table1=self.table_level0_1) + self.category1.add_row(id=0, child_table_ref1=[0, 1]) + self.category1.add_row(id=1, child_table_ref1=[2, 3]) + self.category1.add_row(id=1, child_table_ref1=[1, 3]) + self.category1.add_column(data=[1, 2, 3], + name='filter', + description='filter value', + index=False) + # Aligned table + self.aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + columns=[VectorData(name='a1', description='a1', data=np.arange(3)), ], + colnames=['a1', ], + category_tables=[self.category0, self.category1]) + + def tearDown(self): + del self.table_level0_0 + del self.table_level0_1 + del self.category0 + del self.category1 + del self.aligned_table + + def test_to_hierarchical_dataframe(self): + """Test that converting an AlignedDynamicTable with links works""" + hier_df = to_hierarchical_dataframe(self.aligned_table) + self.assertListEqual(hier_df.columns.to_list(), + [('level0_0', 'id'), ('level0_0', 'tags'), ('level0_0', 'myid')]) + self.assertListEqual(hier_df.index.names, + [('my_aligned_table', 'id'), ('my_aligned_table', ('my_aligned_table', 'a1')), + ('my_aligned_table', ('category0', 'id')), ('my_aligned_table', ('category0', 'filter')), + ('my_aligned_table', ('category1', 'id')), + ('my_aligned_table', ('category1', 'child_table_ref1')), + ('my_aligned_table', ('category1', 'filter'))]) + self.assertListEqual(hier_df.index.to_list(), + [(0, 0, 0, 10, 0, (0, 1), 1), + (1, 1, 1, 11, 1, (2, 3), 2), + (1, 1, 1, 11, 1, (2, 3), 2), + (2, 2, 1, 12, 1, (1, 3), 3)]) + self.assertListEqual(hier_df[('level0_0', 'tags')].values.tolist(), + [['tag1'], ['tag2'], ['tag2', 'tag1'], ['tag3', 'tag4', 'tag5']]) + + def test_has_foreign_columns_in_category_tables(self): + """Test confirming working order for DynamicTableRegions in subtables""" + self.assertTrue(self.aligned_table.has_foreign_columns()) + self.assertFalse(self.aligned_table.has_foreign_columns(ignore_category_tables=True)) + + def test_has_foreign_columns_false(self): + """Test false if there are no DynamicTableRegionColumns""" + temp_table = DynamicTable(name='t1', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + VectorData(name='c2', description='c2', data=np.arange(4))]) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + VectorData(name='a2', description='c2', data=np.arange(4))]) + self.assertFalse(temp_aligned_table.has_foreign_columns()) + self.assertFalse(temp_aligned_table.has_foreign_columns(ignore_category_tables=True)) + + def test_has_foreign_column_in_main_table(self): + temp_table = DynamicTable(name='t1', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + VectorData(name='c2', description='c2', data=np.arange(4))]) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table)]) + self.assertTrue(temp_aligned_table.has_foreign_columns()) + self.assertTrue(temp_aligned_table.has_foreign_columns(ignore_category_tables=True)) + + def test_get_foreign_columns(self): + # check without subcateogries + foreign_cols = self.aligned_table.get_foreign_columns(ignore_category_tables=True) + self.assertListEqual(foreign_cols, []) + # check with subcateogries + foreign_cols = self.aligned_table.get_foreign_columns() + self.assertEqual(len(foreign_cols), 2) + for i, v in enumerate([('category0', 'child_table_ref1'), ('category1', 'child_table_ref1')]): + self.assertTupleEqual(foreign_cols[i], v) + + def test_get_foreign_columns_none(self): + """Test false if there are no DynamicTableRegionColumns""" + temp_table = DynamicTable(name='t1', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + VectorData(name='c2', description='c2', data=np.arange(4))]) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + VectorData(name='a2', description='c2', data=np.arange(4))]) + self.assertListEqual(temp_aligned_table.get_foreign_columns(), []) + self.assertListEqual(temp_aligned_table.get_foreign_columns(ignore_category_tables=True), []) + + def test_get_foreign_column_in_main_and_category_table(self): + temp_table0 = DynamicTable(name='t0', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + VectorData(name='c2', description='c2', data=np.arange(4))]) + temp_table = DynamicTable(name='t1', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='c2', description='c2', + data=np.arange(4), table=temp_table0)]) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table)]) + # We should get both the DynamicTableRegion from the main table and the category 't1' + self.assertListEqual(temp_aligned_table.get_foreign_columns(), [(None, 'a2'), ('t1', 'c2')]) + # We should only get the column from the main table + self.assertListEqual(temp_aligned_table.get_foreign_columns(ignore_category_tables=True), [(None, 'a2')]) + + def test_get_linked_tables(self): + # check without subcateogries + linked_table = self.aligned_table.get_linked_tables(ignore_category_tables=True) + self.assertListEqual(linked_table, []) + # check with subcateogries + linked_tables = self.aligned_table.get_linked_tables() + self.assertEqual(len(linked_tables), 2) + self.assertTupleEqual((linked_tables[0]['source_table'].name, + linked_tables[0]['source_column'].name, + linked_tables[0]['target_table'].name), + ('category0', 'child_table_ref1', 'level0_0')) + self.assertTupleEqual((linked_tables[1]['source_table'].name, + linked_tables[1]['source_column'].name, + linked_tables[1]['target_table'].name), + ('category1', 'child_table_ref1', 'level0_1')) + + def test_get_linked_tables_none(self): + """Test false if there are no DynamicTableRegionColumns""" + temp_table = DynamicTable(name='t1', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + VectorData(name='c2', description='c2', data=np.arange(4))]) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + VectorData(name='a2', description='c2', data=np.arange(4))]) + self.assertListEqual(temp_aligned_table.get_linked_tables(), []) + self.assertListEqual(temp_aligned_table.get_linked_tables(ignore_category_tables=True), []) + + def test_get_linked_tables_complex_link(self): + temp_table0 = DynamicTable(name='t0', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + VectorData(name='c2', description='c2', data=np.arange(4))]) + temp_table = DynamicTable(name='t1', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='c2', description='c2', + data=np.arange(4), table=temp_table0)]) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table)]) + # NOTE: in this example templ_aligned_table both points to temp_table and at the + # same time contains temp_table as a category. This could lead to temp_table + # visited multiple times and we want to make sure this doesn't happen + # We should get both the DynamicTableRegion from the main table and the category 't1' + linked_tables = temp_aligned_table.get_linked_tables() + self.assertEqual(len(linked_tables), 2) + for i, v in enumerate([('my_aligned_table', 'a2', 't1'), ('t1', 'c2', 't0')]): + self.assertTupleEqual((linked_tables[i]['source_table'].name, + linked_tables[i]['source_column'].name, + linked_tables[i]['target_table'].name), v) + # Now, since our main table links to the category table the result should remain the same + # even if we ignore the category table + linked_tables = temp_aligned_table.get_linked_tables(ignore_category_tables=True) + self.assertEqual(len(linked_tables), 2) + for i, v in enumerate([('my_aligned_table', 'a2', 't1'), ('t1', 'c2', 't0')]): + self.assertTupleEqual((linked_tables[i]['source_table'].name, + linked_tables[i]['source_column'].name, + linked_tables[i]['target_table'].name), v) + + def test_get_linked_tables_simple_link(self): + temp_table0 = DynamicTable(name='t0', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + VectorData(name='c2', description='c2', data=np.arange(4))]) + temp_table = DynamicTable(name='t1', description='t1', + colnames=['c1', 'c2'], + columns=[VectorData(name='c1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='c2', description='c2', + data=np.arange(4), table=temp_table0)]) + temp_aligned_table = AlignedDynamicTable(name='my_aligned_table', + description='my test table', + category_tables=[temp_table], + colnames=['a1', 'a2'], + columns=[VectorData(name='a1', description='c1', data=np.arange(4)), + DynamicTableRegion(name='a2', description='c2', + data=np.arange(4), table=temp_table0)]) + # NOTE: in this example temp_aligned_table and temp_table both point to temp_table0 + # We should get both the DynamicTableRegion from the main table and the category 't1' + linked_tables = temp_aligned_table.get_linked_tables() + self.assertEqual(len(linked_tables), 2) + for i, v in enumerate([('my_aligned_table', 'a2', 't0'), ('t1', 'c2', 't0')]): + self.assertTupleEqual((linked_tables[i]['source_table'].name, + linked_tables[i]['source_column'].name, + linked_tables[i]['target_table'].name), v) + # Since no table ever link to our category temp_table we should only get the link from our + # main table here, in contrast to what happens in the test_get_linked_tables_complex_link case + linked_tables = temp_aligned_table.get_linked_tables() + self.assertEqual(len(linked_tables), 2) + for i, v in enumerate([('my_aligned_table', 'a2', 't0'), ]): + self.assertTupleEqual((linked_tables[i]['source_table'].name, + linked_tables[i]['source_column'].name, + linked_tables[i]['target_table'].name), v) + + +class TestHierarchicalTable(TestCase): + + def setUp(self): + """ + Create basic set of linked tables consisting of + + super_parent_table ---> parent_table ---> aligned_table + | + +--> category0 + """ + # Level 0 0 table. I.e., first table on level 0 + self.category0 = DynamicTable(name='level0_0', description="level0_0 DynamicTable") + self.category0.add_row(id=10) + self.category0.add_row(id=11) + self.category0.add_row(id=12) + self.category0.add_row(id=13) + self.category0.add_column(data=['tag1', 'tag2', 'tag2', 'tag1', 'tag3', 'tag4', 'tag5'], + name='tags', + description='custom tags', + index=[1, 2, 4, 7]) + self.category0.add_column(data=np.arange(4), + name='myid', + description='custom ids', + index=False) + + # Aligned table + self.aligned_table = AlignedDynamicTable(name='aligned_table', + description='parent_table', + columns=[VectorData(name='a1', description='a1', data=np.arange(4)), ], + colnames=['a1', ], + category_tables=[self.category0, ]) + + # Parent table + self.parent_table = DynamicTable(name='parent_table', + description='parent_table', + columns=[VectorData(name='p1', description='p1', data=np.arange(4)), + DynamicTableRegion(name='l1', description='l1', + data=np.arange(4), table=self.aligned_table)]) + # Super-parent table + dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=self.parent_table) + vi_dtr_sp = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_sp) + self.super_parent_table = DynamicTable(name='super_parent_table', + description='super_parent_table', + columns=[VectorData(name='sp1', description='sp1', data=np.arange(3)), + dtr_sp, vi_dtr_sp]) + + def tearDown(self): + del self.category0 + del self.aligned_table + del self.parent_table + + def test_to_hierarchical_dataframe_no_dtr_on_top_level(self): + # Cover the case where our top dtr is flat (i.e., without a VectorIndex) + dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=self.parent_table) + spttable = DynamicTable(name='super_parent_table', + description='super_parent_table', + columns=[VectorData(name='sp1', description='sp1', data=np.arange(4)), dtr_sp]) + hier_df = to_hierarchical_dataframe(spttable).reset_index() + expected_columns = [('super_parent_table', 'id'), ('super_parent_table', 'sp1'), + ('parent_table', 'id'), ('parent_table', 'p1'), + ('aligned_table', 'id'), + ('aligned_table', ('aligned_table', 'a1')), ('aligned_table', ('level0_0', 'id')), + ('aligned_table', ('level0_0', 'tags')), ('aligned_table', ('level0_0', 'myid'))] + self.assertListEqual(hier_df.columns.to_list(), expected_columns) + + def test_to_hierarchical_dataframe_indexed_dtr_on_last_level(self): + # Parent table + dtr_p1 = DynamicTableRegion(name='l1', description='l1', data=np.arange(4), table=self.aligned_table) + vi_dtr_p1 = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_p1) + p1 = DynamicTable(name='parent_table', description='parent_table', + columns=[VectorData(name='p1', description='p1', data=np.arange(3)), dtr_p1, vi_dtr_p1]) + # Super-parent table + dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=p1) + vi_dtr_sp = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_sp) + spt = DynamicTable(name='super_parent_table', description='super_parent_table', + columns=[VectorData(name='sp1', description='sp1', data=np.arange(3)), dtr_sp, vi_dtr_sp]) + hier_df = to_hierarchical_dataframe(spt).reset_index() + expected_columns = [('super_parent_table', 'id'), ('super_parent_table', 'sp1'), + ('parent_table', 'id'), ('parent_table', 'p1'), + ('aligned_table', 'id'), + ('aligned_table', ('aligned_table', 'a1')), ('aligned_table', ('level0_0', 'id')), + ('aligned_table', ('level0_0', 'tags')), ('aligned_table', ('level0_0', 'myid'))] + self.assertListEqual(hier_df.columns.to_list(), expected_columns) # make sure we have the right columns + self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list(), + [['tag1'], ['tag2'], ['tag2', 'tag1']]) + + def test_to_hierarchical_dataframe_empty_tables(self): + # Setup empty tables with the following hierarchy + # super_parent_table ---> parent_table ---> child_table + a1 = DynamicTable(name='level0_0', description="level0_0 DynamicTable", + columns=[VectorData(name='l0', description='l0', data=[])]) + p1 = DynamicTable(name='parent_table', description='parent_table', + columns=[DynamicTableRegion(name='l1', description='l1', data=[], table=a1), + VectorData(name='p1c', description='l0', data=[])]) + dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=p1) + vi_dtr_sp = VectorIndex(name='sl1_index', data=[], target=dtr_sp) + spt = DynamicTable(name='super_parent_table', description='super_parent_table', + columns=[dtr_sp, vi_dtr_sp, VectorData(name='sptc', description='l0', data=[])]) + # Convert to hierarchical dataframe and make sure we get the right columns + hier_df = to_hierarchical_dataframe(spt).reset_index() + expected_columns = [('super_parent_table', 'id'), ('super_parent_table', 'sptc'), + ('parent_table', 'id'), ('parent_table', 'p1c'), + ('level0_0', 'id'), ('level0_0', 'l0')] + self.assertListEqual(hier_df.columns.to_list(), expected_columns) + + def test_to_hierarchical_dataframe_multilevel(self): + hier_df = to_hierarchical_dataframe(self.super_parent_table).reset_index() + expected_cols = [('super_parent_table', 'id'), ('super_parent_table', 'sp1'), + ('parent_table', 'id'), ('parent_table', 'p1'), + ('aligned_table', 'id'), + ('aligned_table', ('aligned_table', 'a1')), + ('aligned_table', ('level0_0', 'id')), + ('aligned_table', ('level0_0', 'tags')), + ('aligned_table', ('level0_0', 'myid'))] + # Check that we have all the columns + self.assertListEqual(hier_df.columns.to_list(), expected_cols) + # Spot-check the data in two columns + self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list(), + [['tag1'], ['tag2'], ['tag2', 'tag1']]) + self.assertListEqual(hier_df[('aligned_table', ('aligned_table', 'a1'))].to_list(), list(range(3))) + + def test_to_hierarchical_dataframe(self): + hier_df = to_hierarchical_dataframe(self.parent_table) + self.assertEqual(len(hier_df), 4) + self.assertEqual(len(hier_df.columns), 5) + self.assertEqual(len(hier_df.index.names), 2) + columns = [('aligned_table', 'id'), + ('aligned_table', ('aligned_table', 'a1')), + ('aligned_table', ('level0_0', 'id')), + ('aligned_table', ('level0_0', 'tags')), + ('aligned_table', ('level0_0', 'myid'))] + for i, c in enumerate(hier_df.columns): + self.assertTupleEqual(c, columns[i]) + index_names = [('parent_table', 'id'), ('parent_table', 'p1')] + self.assertListEqual(hier_df.index.names, index_names) + self.assertListEqual(hier_df.index.to_list(), [(i, i) for i in range(4)]) + self.assertListEqual(hier_df[('aligned_table', ('aligned_table', 'a1'))].to_list(), list(range(4))) + self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'id'))].to_list(), list(range(10, 14))) + self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'myid'))].to_list(), list(range(4))) + tags = [['tag1'], ['tag2'], ['tag2', 'tag1'], ['tag3', 'tag4', 'tag5']] + for i, v in enumerate(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list()): + self.assertListEqual(v, tags[i]) + + def test_to_hierarchical_dataframe_flat_table(self): + hier_df = to_hierarchical_dataframe(self.category0) + assert_frame_equal(hier_df, self.category0.to_dataframe()) + hier_df = to_hierarchical_dataframe(self.aligned_table) + assert_frame_equal(hier_df, self.aligned_table.to_dataframe()) + + def test_drop_id_columns(self): + hier_df = to_hierarchical_dataframe(self.parent_table) + cols = hier_df.columns.to_list() + mod_df = drop_id_columns(hier_df, inplace=False) + expected_cols = [('aligned_table', ('aligned_table', 'a1')), + ('aligned_table', ('level0_0', 'tags')), + ('aligned_table', ('level0_0', 'myid'))] + self.assertListEqual(hier_df.columns.to_list(), cols) # Test that no columns are dropped with inplace=False + self.assertListEqual(mod_df.columns.to_list(), expected_cols) # Assert that we got back a modified dataframe + drop_id_columns(hier_df, inplace=True) + self.assertListEqual(hier_df.columns.to_list(), + expected_cols) + flat_df = to_hierarchical_dataframe(self.parent_table).reset_index(inplace=False) + drop_id_columns(flat_df, inplace=True) + self.assertListEqual(flat_df.columns.to_list(), + [('parent_table', 'p1'), + ('aligned_table', ('aligned_table', 'a1')), + ('aligned_table', ('level0_0', 'tags')), + ('aligned_table', ('level0_0', 'myid'))]) + + def test_flatten_column_index(self): + hier_df = to_hierarchical_dataframe(self.parent_table).reset_index() + cols = hier_df.columns.to_list() + expexted_cols = [('parent_table', 'id'), + ('parent_table', 'p1'), + ('aligned_table', 'id'), + ('aligned_table', 'aligned_table', 'a1'), + ('aligned_table', 'level0_0', 'id'), + ('aligned_table', 'level0_0', 'tags'), + ('aligned_table', 'level0_0', 'myid')] + df = flatten_column_index(hier_df, inplace=False) + # Test that our columns have not changed with inplace=False + self.assertListEqual(hier_df.columns.to_list(), cols) + self.assertListEqual(df.columns.to_list(), expexted_cols) # make sure we got back a modified dataframe + flatten_column_index(hier_df, inplace=True) # make sure we can also directly flatten inplace + self.assertListEqual(hier_df.columns.to_list(), expexted_cols) + # Test that we can apply flatten_column_index again on our already modified dataframe to reduce the levels + flatten_column_index(hier_df, inplace=True, max_levels=2) + expexted_cols = [('parent_table', 'id'), ('parent_table', 'p1'), ('aligned_table', 'id'), + ('aligned_table', 'a1'), ('level0_0', 'id'), ('level0_0', 'tags'), ('level0_0', 'myid')] + self.assertListEqual(hier_df.columns.to_list(), expexted_cols) + # Test that we can directly reduce the max_levels to just 1 + hier_df = to_hierarchical_dataframe(self.parent_table).reset_index() + flatten_column_index(hier_df, inplace=True, max_levels=1) + expexted_cols = ['id', 'p1', 'id', 'a1', 'id', 'tags', 'myid'] + self.assertListEqual(hier_df.columns.to_list(), expexted_cols) + + def test_flatten_column_index_already_flat_index(self): + hier_df = to_hierarchical_dataframe(self.parent_table).reset_index() + flatten_column_index(hier_df, inplace=True, max_levels=1) + expexted_cols = ['id', 'p1', 'id', 'a1', 'id', 'tags', 'myid'] + self.assertListEqual(hier_df.columns.to_list(), expexted_cols) + # Now try to flatten the already flat columns again to make sure nothing changes + flatten_column_index(hier_df, inplace=True, max_levels=1) + self.assertListEqual(hier_df.columns.to_list(), expexted_cols) + + def test_flatten_column_index_bad_maxlevels(self): + hier_df = to_hierarchical_dataframe(self.parent_table) + with self.assertRaisesWith(ValueError, 'max_levels must be greater than 0'): + flatten_column_index(dataframe=hier_df, inplace=True, max_levels=-1) + with self.assertRaisesWith(ValueError, 'max_levels must be greater than 0'): + flatten_column_index(dataframe=hier_df, inplace=True, max_levels=0) + + +class TestLinkedDynamicTables(TestCase): + """ + Test functionality specific to DynamicTables containing DynamicTableRegion columns. + + Since these functions only implements front-end convenient functions for DynamicTable + we do not need to worry about I/O here (that is tested elsewere), ut it is sufficient if + we test with container class. The only time I/O becomes relevant is on read in case that, e.g., a + h5py.Dataset may behave differently than a numpy array. + """ + def setUp(self): + """ + Create basic set of linked tables consisting of + + table_level2 ---> table_level1 ----> table_level_0_0 + \ + ------> table_level_0_1 + + """ + self.table_level0_0 = DynamicTable(name='level0_0', description="level0_0 DynamicTable") + self.table_level0_1 = DynamicTable(name='level0_1', description="level0_1 DynamicTable") + self.table_level1 = DynamicTableMultiDTR(name='level1', + child_table1=self.table_level0_0, + child_table2=self.table_level0_1) + self.table_level2 = DynamicTableSingleDTR(name='level2', child_table1=self.table_level1) + + def tearDown(self): + del self.table_level0_0 + del self.table_level0_1 + del self.table_level1 + del self.table_level2 + + def popolate_tables(self): + """Helper function to populate our tables generate in setUp with some simple data""" + # Level 0 0 table. I.e., first table on level 0 + self.table_level0_0.add_row(id=10) + self.table_level0_0.add_row(id=11) + self.table_level0_0.add_row(id=12) + self.table_level0_0.add_row(id=13) + self.table_level0_0.add_column(data=['tag1', 'tag2', 'tag2', 'tag1', 'tag3', 'tag4', 'tag5'], + name='tags', + description='custom tags', + index=[1, 2, 4, 7]) + self.table_level0_0.add_column(data=np.arange(4), + name='myid', + description='custom ids', + index=False) + # Level 0 1 table. I.e., second table on level 0 + self.table_level0_1.add_row(id=14) + self.table_level0_1.add_row(id=15) + self.table_level0_1.add_row(id=16) + self.table_level0_1.add_row(id=17) + self.table_level0_1.add_column(data=['tag1', 'tag1', 'tag2', 'tag2', 'tag3', 'tag3', 'tag4'], + name='tags', + description='custom tags', + index=[2, 4, 6, 7]) + self.table_level0_1.add_column(data=np.arange(4), + name='myid', + description='custom ids', + index=False) + # Level 1 table + self.table_level1.add_row(id=0, child_table_ref1=[0, 1], child_table_ref2=[0]) + self.table_level1.add_row(id=1, child_table_ref1=[2], child_table_ref2=[1, 2]) + self.table_level1.add_row(id=2, child_table_ref1=[3], child_table_ref2=[3]) + self.table_level1.add_column(data=['tag1', 'tag2', 'tag2'], + name='tag', + description='custom tag', + index=False) + self.table_level1.add_column(data=['tag1', 'tag2', 'tag2', 'tag3', 'tag3', 'tag4', 'tag5'], + name='tags', + description='custom tags', + index=[2, 4, 7]) + # Level 2 data + self.table_level2.add_row(id=0, child_table_ref1=[0, ]) + self.table_level2.add_row(id=1, child_table_ref1=[1, 2]) + self.table_level2.add_column(data=[10, 12], + name='filter', + description='filter value', + index=False) + + def test_populate_table_hierarchy(self): + """Test that just checks that populating the tables with data works correctly""" + self.popolate_tables() + # Check level0 0 data + self.assertListEqual(self.table_level0_0.id[:], np.arange(10, 14, 1).tolist()) + self.assertListEqual(self.table_level0_0['tags'][:], + [['tag1'], ['tag2'], ['tag2', 'tag1'], ['tag3', 'tag4', 'tag5']]) + self.assertListEqual(self.table_level0_0['myid'][:].tolist(), np.arange(0, 4, 1).tolist()) + # Check level0 1 data + self.assertListEqual(self.table_level0_1.id[:], np.arange(14, 18, 1).tolist()) + self.assertListEqual(self.table_level0_1['tags'][:], + [['tag1', 'tag1'], ['tag2', 'tag2'], ['tag3', 'tag3'], ['tag4']]) + self.assertListEqual(self.table_level0_1['myid'][:].tolist(), np.arange(0, 4, 1).tolist()) + # Check level1 data + self.assertListEqual(self.table_level1.id[:], np.arange(0, 3, 1).tolist()) + self.assertListEqual(self.table_level1['tag'][:], ['tag1', 'tag2', 'tag2']) + self.assertTrue(self.table_level1['child_table_ref1'].target.table is self.table_level0_0) + self.assertTrue(self.table_level1['child_table_ref2'].target.table is self.table_level0_1) + self.assertEqual(len(self.table_level1['child_table_ref1'].target.table), 4) + self.assertEqual(len(self.table_level1['child_table_ref2'].target.table), 4) + # Check level2 data + self.assertListEqual(self.table_level2.id[:], np.arange(0, 2, 1).tolist()) + self.assertListEqual(self.table_level2['filter'][:], [10, 12]) + self.assertTrue(self.table_level2['child_table_ref1'].target.table is self.table_level1) + self.assertEqual(len(self.table_level2['child_table_ref1'].target.table), 3) + + def test_get_foreign_columns(self): + """Test DynamicTable.get_foreign_columns""" + self.popolate_tables() + self.assertListEqual(self.table_level0_0.get_foreign_columns(), []) + self.assertListEqual(self.table_level0_1.get_foreign_columns(), []) + self.assertListEqual(self.table_level1.get_foreign_columns(), ['child_table_ref1', 'child_table_ref2']) + self.assertListEqual(self.table_level2.get_foreign_columns(), ['child_table_ref1']) + + def test_has_foreign_columns(self): + """Test DynamicTable.get_foreign_columns""" + self.popolate_tables() + self.assertFalse(self.table_level0_0.has_foreign_columns()) + self.assertFalse(self.table_level0_1.has_foreign_columns()) + self.assertTrue(self.table_level1.has_foreign_columns()) + self.assertTrue(self.table_level2.has_foreign_columns()) + + def test_get_linked_tables(self): + """Test DynamicTable.get_linked_tables""" + self.popolate_tables() + # check level0_0 + self.assertListEqual(self.table_level0_0.get_linked_tables(), []) + # check level0_0 + self.assertListEqual(self.table_level0_1.get_linked_tables(), []) + # check level1 + temp = self.table_level1.get_linked_tables() + self.assertEqual(len(temp), 2) + self.assertEqual(temp[0]['source_table'].name, self.table_level1.name) + self.assertEqual(temp[0]['source_column'].name, 'child_table_ref1') + self.assertEqual(temp[0]['target_table'].name, self.table_level0_0.name) + self.assertEqual(temp[1]['source_table'].name, self.table_level1.name) + self.assertEqual(temp[1]['source_column'].name, 'child_table_ref2') + self.assertEqual(temp[1]['target_table'].name, self.table_level0_1.name) + # check level2 + temp = self.table_level2.get_linked_tables() + self.assertEqual(len(temp), 3) + self.assertEqual(temp[0]['source_table'].name, self.table_level2.name) + self.assertEqual(temp[0]['source_column'].name, 'child_table_ref1') + self.assertEqual(temp[0]['target_table'].name, self.table_level1.name) + self.assertEqual(temp[1]['source_table'].name, self.table_level1.name) + self.assertEqual(temp[1]['source_column'].name, 'child_table_ref1') + self.assertEqual(temp[1]['target_table'].name, self.table_level0_0.name) + self.assertEqual(temp[2]['source_table'].name, self.table_level1.name) + self.assertEqual(temp[2]['source_column'].name, 'child_table_ref2') + self.assertEqual(temp[2]['target_table'].name, self.table_level0_1.name)