Skip to content

Commit

Permalink
Added ability to combine models with different table columns.
Browse files Browse the repository at this point in the history
  • Loading branch information
smartin71 committed Apr 28, 2022
1 parent de210ce commit 41984e8
Show file tree
Hide file tree
Showing 7 changed files with 160 additions and 43 deletions.
1 change: 1 addition & 0 deletions docker/compose/slycat-compose/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ sklearn
npTDMS
pandas
pysmb
natsort
79 changes: 63 additions & 16 deletions web-server/plugins/slycat-dac/dac-model.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,6 +782,9 @@ def check_compatible_models(database, model, verb, type, command, **kwargs):
# intersect time flag
intersect_time = kwargs["intersect_time"]

# combine tables flag
combine_tables = kwargs["combine_tables"]

# get models to compare from database
model_ids_selected = kwargs["models_selected"][1:]
models_selected = []
Expand Down Expand Up @@ -852,20 +855,22 @@ def check_compatible_models(database, model, verb, type, command, **kwargs):
new_metadata = slycat.web.server.get_model_arrayset_metadata(database, models_selected[i],
"dac-datapoints-meta")[0]["attributes"]

# same number of columns?
if len(origin_metadata) != len(new_metadata):
return json.dumps(["Error", 'model "' +
model_names[i] + '" table columns do not match existing model ("' +
model["name"] + '").'])

# compare type and name for metadata tables
for j in range(len(origin_metadata)):
if (origin_metadata[j]["type"] != new_metadata[j]["type"]) or \
(origin_metadata[j]["name"] != new_metadata[j]["name"]):
# same number of columns? (only check if user does not want to combine tables)
if not combine_tables:

if len(origin_metadata) != len(new_metadata):
return json.dumps(["Error", 'model "' +
model_names[i] + '" table columns do not match existing model ("' +
model["name"] + '").'])
model_names[i] + '" table columns do not match existing model ("' +
model["name"] + '").'])

# compare type and name for metadata tables
for j in range(len(origin_metadata)):
if (origin_metadata[j]["type"] != new_metadata[j]["type"]) or \
(origin_metadata[j]["name"] != new_metadata[j]["name"]):

return json.dumps(["Error", 'model "' +
model_names[i] + '" table columns do not match existing model ("' +
model["name"] + '").'])

return json.dumps(["Success", 1])

Expand All @@ -881,6 +886,9 @@ def combine_models(database, model, verb, type, command, **kwargs):
# do we need to intersect time points?
intersect_time = kwargs["intersect_time"]

# do we want to combine tables?
combine_tables = kwargs["combine_tables"]

models_selected = []
model_names = []
for i in range(len(model_ids_selected)):
Expand All @@ -891,14 +899,15 @@ def combine_models(database, model, verb, type, command, **kwargs):
stop_event = threading.Event()
thread = threading.Thread(target=combine_models_thread,
args=(database, model, models_selected,
new_model_type, model_names, intersect_time, stop_event))
new_model_type, model_names, intersect_time,
combine_tables, stop_event))
thread.start()

return json.dumps(["Success", 1])

# thread that does actual work for combine by recomputing
def combine_models_thread(database, model, models_selected, new_model_type,
model_names, intersect_time, stop_event):
model_names, intersect_time, combine_tables, stop_event):

# put entire thread into a try-except block in order
# to log and report loading errors
Expand All @@ -917,19 +926,45 @@ def combine_models_thread(database, model, models_selected, new_model_type,

# get metadata column names/types
meta_column_names = []
meta_column_types = []
for i in range(len(metadata)):
meta_column_names.append(metadata[i]["name"])
meta_column_types.append(metadata[i]["type"])

# if we are combining non-matching tables, add additional columns
num_models = len(models_selected)
if combine_tables:

# record combination in log.
parse_error_log = dac_error.update_parse_log(database, model, parse_error_log, "Progress",
'Combined tables columns. Values not present will be marked "Not Present" or given ' +
'nan (not a number) values.')

# go through each model
for i in range(num_models):

metadata = slycat.web.server.get_model_arrayset_metadata(database,
models_selected[i], "dac-datapoints-meta")[0]["attributes"]

# if new column is found, add to end of list
for j in range(len(metadata)):
if metadata[j]["name"] not in meta_column_names:
meta_column_names.append(metadata[j]["name"])
meta_column_types.append(metadata[j]["type"])

# keep track of model origin
from_model = []

# merge tables into new table
num_cols = len(meta_column_names)
num_models = len(models_selected)
num_rows_per_model = []
meta_rows = []
for i in range(num_models):

# get table metadata
metadata = slycat.web.server.get_model_arrayset_metadata(database,
models_selected[i], "dac-datapoints-meta")[0]["attributes"]

# get table data
meta_table = slycat.web.server.get_model_arrayset_data(database,
models_selected[i], "dac-datapoints-meta", "0/.../...")
Expand All @@ -946,9 +981,21 @@ def combine_models_thread(database, model, models_selected, new_model_type,
num_rows = len(meta_table[0])
num_rows_per_model.append(num_rows)
for j in range(num_rows):

# initialize empty row to "Not Present", or NaN values
meta_row_j = []
for k in range(num_cols):
meta_row_j.append(meta_table[k][j])
if meta_column_types[k] == 'string' or \
meta_column_types[k] == b'string':
meta_row_j.append("Not Present")
else:
meta_row_j.append(float('nan'))

for k in range(len(metadata)):

# find column header exists in master list
col_ind = meta_column_names.index(metadata[k]["name"])
meta_row_j[col_ind] = meta_table[k][j]

# use existing model origin, if available
if len(model_origin) > 0:
Expand Down
4 changes: 4 additions & 0 deletions web-server/plugins/slycat-dac/html/dac-add-data-wizard.html
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ <h3 class="modal-title">Dial-A-Cluster Combined Model</h3>
<input type="checkbox" name="dac-intersect-time" value="True" data-bind="checked: dac_intersect_time">
Intersect time steps if mismatches are found during model combination.
</div>
<div style="margin-top: 10px">
<input type="checkbox" name="dac-combine-tables" value="True" data-bind="checked: dac_combine_tables">
Combine tables even when table headers do not match.
</div>
</div>


Expand Down
9 changes: 7 additions & 2 deletions web-server/plugins/slycat-dac/js/dac-add-data-wizard.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ function constructor(params)
// dac time intersection flag for combining models
component.dac_intersect_time = ko.observable(false);

// dac combine tables flag
component.dac_combine_tables = ko.observable(false);

// creates a model of type "DAC"
component.create_model = function() {

Expand Down Expand Up @@ -251,7 +254,8 @@ function constructor(params)
type: "DAC",
command: "check_compatible_models",
parameters: {models_selected: models_selected,
intersect_time: component.dac_intersect_time()},
intersect_time: component.dac_intersect_time(),
combine_tables: component.dac_combine_tables()},
success: function (result)
{
// convert result to variable
Expand Down Expand Up @@ -334,7 +338,8 @@ function constructor(params)
command: "combine_models",
parameters: {models_selected: models_selected,
model_type: component.dac_model_type(),
intersect_time: component.dac_intersect_time()},
intersect_time: component.dac_intersect_time(),
combine_tables: component.dac_combine_tables()},
success: function (result)
{

Expand Down
37 changes: 31 additions & 6 deletions web-server/plugins/slycat-dac/js/dac-scatter-buttons.js
Original file line number Diff line number Diff line change
Expand Up @@ -617,16 +617,41 @@ function update_color_by_col_data (data, data_col, select_col)
// check for string data
if (color_by_type[color_by_cols.indexOf(select_col) - 1] == "string") {

// use alphabetical order by number to color

// get string data
var color_by_string_data = data["data"][data_col];

// attempt to convert strings to numbers
let num_array = [];
let any_nums = false;
for (let i = 0; i < color_by_string_data.length; i++) {
num_array.push(Number(color_by_string_data[i]));
if (!Number.isNaN(num_array[i])) {
any_nums = true;
}
}

// if there are nay numbers, use those + NaNs for the remaining strings
if (any_nums) {

curr_color_by_col = num_array;

} else {

// use alphabetical order by number to color

// natural sort compare for strings
var comparer = function (a,b) {
return a.localeCompare(b, navigator.languages[0] || navigator.language,
{numeric: true, ignorePunctuation: true});
}

// get unique sorted string data
var unique_sorted_string_data = Array.from(new Set(color_by_string_data)).sort();
// get unique sorted string data
var unique_sorted_string_data = Array.from(new Set(color_by_string_data)).sort(comparer);

// set curr_color_by_col
set_curr_color_by_col (color_by_string_data, unique_sorted_string_data);

// set curr_color_by_col
set_curr_color_by_col (color_by_string_data, unique_sorted_string_data);
}

// for categorical data, use given order of categories
} else if (color_by_type[color_by_cols.indexOf(select_col) - 1] == "categorical") {
Expand Down
54 changes: 36 additions & 18 deletions web-server/plugins/slycat-dac/py/dac_compute_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from scipy import spatial
from sklearn.decomposition import PCA

from natsort import natsorted

import cherrypy

# cmdscale translation from Matlab by Francis Song
Expand Down Expand Up @@ -536,7 +538,7 @@ def compute_alpha_clusters_PCA (var_dist, meta_columns, meta_column_types):
# values starting at 0)

# sort potential values in string metadata
uniq_sorted_columns = sorted(set(meta_columns[i]))
uniq_sorted_columns = natsorted(set(meta_columns[i]))

# use alphabetical order to make a vector of numbers
meta_column_num = np.asarray([uniq_sorted_columns.index(str_meta)
Expand All @@ -549,7 +551,7 @@ def compute_alpha_clusters_PCA (var_dist, meta_columns, meta_column_types):
prop_vec = 0

# scale property vector data
prop_scale = np.amax(np.absolute(prop_vec))
prop_scale = np.nanmax(np.absolute(prop_vec))
if prop_scale < np.finfo(float).eps:
prop_scale = 1.0
prop_vec = prop_vec / prop_scale
Expand All @@ -564,14 +566,22 @@ def compute_alpha_clusters_PCA (var_dist, meta_columns, meta_column_types):
if (meta_column_types[i] == "float64") or \
(meta_column_types[i] == "string"):

beta_i = scipy.optimize.nnls(X, prop_vecs[i])
alpha_i = np.sqrt(beta_i[0])
# remove NaNs
nan_mask = np.logical_not(np.isnan(prop_vecs[i]))
nan_prop = prop_vecs[i][nan_mask]
nan_X = X[nan_mask, :]

# if nothing left then return zero
if nan_X.shape[0] > 1:

beta_i = scipy.optimize.nnls(nan_X, nan_prop)
alpha_i = np.sqrt(beta_i[0])

# again don't divide by zero
alpha_max_i = np.amax(alpha_i)
if alpha_max_i <= np.finfo(float).eps:
alpha_max_i = 1
alpha_cluster_mat[i, :] = alpha_i / alpha_max_i
# again don't divide by zero
alpha_max_i = np.amax(alpha_i)
if alpha_max_i <= np.finfo(float).eps:
alpha_max_i = 1
alpha_cluster_mat[i, :] = alpha_i / alpha_max_i

return alpha_cluster_mat

Expand Down Expand Up @@ -636,7 +646,7 @@ def compute_alpha_clusters (var_dist, meta_columns, meta_column_types,
# values starting at 0)

# sort potential values in string metadata
uniq_sorted_columns = sorted(set(meta_columns[i]))
uniq_sorted_columns = natsorted(set(meta_columns[i]))

# use alphabetical order to make a vector of numbers
meta_column_num = np.asarray([uniq_sorted_columns.index(str_meta)
Expand All @@ -656,14 +666,22 @@ def compute_alpha_clusters (var_dist, meta_columns, meta_column_types,
if (meta_column_types[i] == "float64") or \
(meta_column_types[i] == "string"):

beta_i = scipy.optimize.nnls(all_dist_mat, prop_dist_mats[i])
alpha_i = np.sqrt(beta_i[0])
# remove NaNs
nan_mask = np.logical_not(np.isnan(prop_dist_mats[i]))
nan_prop = prop_dist_mats[i][nan_mask]
nan_dist_mat = all_dist_mat[nan_mask, :]

# if nothing left then return zero
if nan_dist_mat.shape[0] > 1:

beta_i = scipy.optimize.nnls(all_dist_mat, prop_dist_mats[i])
alpha_i = np.sqrt(beta_i[0])

# again don't divide by zero
alpha_max_i = np.amax(alpha_i)
if alpha_max_i <= np.finfo(float).eps:
alpha_max_i = 1
alpha_cluster_mat[i, :] = alpha_i / alpha_max_i
# again don't divide by zero
alpha_max_i = np.amax(alpha_i)
if alpha_max_i <= np.finfo(float).eps:
alpha_max_i = 1
alpha_cluster_mat[i, :] = alpha_i / alpha_max_i

return alpha_cluster_mat

Expand All @@ -678,7 +696,7 @@ def compute_prop_dist_vec(prop_vec, vec_length):
prop_dist_vec = np.squeeze(np.reshape(prop_dist_mat, (vec_length * vec_length, 1)))

# make sure we don't divide by 0
prop_dist_vec_max = np.amax(prop_dist_vec)
prop_dist_vec_max = np.nanmax(prop_dist_vec)
if prop_dist_vec_max <= np.finfo(float).eps:
prop_dist_vec_max = 1.0

Expand Down
19 changes: 18 additions & 1 deletion web-server/plugins/slycat-dac/py/dac_upload_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,36 @@ def init_upload_model (database, model, dac_error, parse_error_log, meta_column_
# convert from meta data from row-oriented to column-oriented data,
# and convert to numeric columns where possible.
meta_column_types = ["string" for name in meta_column_names]
alpha_column_types = ["string" for name in meta_column_names]
meta_columns = list(zip(*meta_rows))
alpha_columns = list(zip(*meta_rows))
for index in range(len(meta_columns)):
try:

meta_columns[index] = numpy.array(meta_columns[index], dtype="float64")
alpha_columns[index] = numpy.array(meta_columns[index], dtype="float64")

# if there are any nan values using strings instead
if numpy.any(numpy.isnan(meta_columns[index])):

# for alpha columns keep with float64
alpha_column_types[index] = "float64"

# for meta_columns use strings
meta_columns[index] = numpy.array(meta_columns[index], dtype="str")

# replace "nan" with better looking "NaN"
for i in range(len(meta_columns[index])):
if meta_columns[index][i] == "nan":
meta_columns[index][i] = "NaN"

else:
meta_column_types[index] = "float64"
alpha_column_types[index] = "float64"

except:
meta_columns[index] = numpy.array(meta_columns[index], dtype="str")
alpha_columns[index] = meta_columns[index]

# convert variable meta data from row-oriented to column-oriented data
meta_var_cols = list(zip(*meta_vars))
Expand All @@ -68,7 +85,7 @@ def init_upload_model (database, model, dac_error, parse_error_log, meta_column_
landmarks=landmarks, use_coordinates=use_coordinates)

# finally compute alpha cluster values
alpha_cluster_mat = dac.compute_alpha_clusters(var_dist, meta_columns, meta_column_types,
alpha_cluster_mat = dac.compute_alpha_clusters(var_dist, alpha_columns, alpha_column_types,
landmarks=landmarks, use_coordinates=use_coordinates)

dac_error.log_dac_msg("Pushing data to database.")
Expand Down

0 comments on commit 41984e8

Please sign in to comment.