Skip to content

Commit

Permalink
Store the relative path to the object, not the full path.
Browse files Browse the repository at this point in the history
If the object lies at the root of the subdirectory, its relative path
is technically '.', but we just set it to NULL.
  • Loading branch information
LTLA committed Dec 17, 2023
1 parent 9faaf14 commit 78c63b5
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 25 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ The table will have at least the following fields, all of which have the `TEXT`
- `_asset`: the name of the asset.
- `_version`: the name of the version.
- `_path`: the path to the object inside this versioned asset.
This is only supplied if the object is located inside a subdirectory of the asset, otherwise it is set to `null`.
- `_object`: the object type.

The SQLite file may contain a `free_text` virtual FTS5 table where each row corresponds to an indexed **gypsum** object.
Expand Down
49 changes: 27 additions & 22 deletions scripts/indexVersion.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ export async function indexVersion(project, asset, version, validators, converte
let output = {};

// BIOCONDUCTOR indexing section:
let bioc_paths = [];
let bioc_full_paths = [];
let bioc_relative_paths = [];
let bioc_meta = [];
let bioc_objects = [];

Expand All @@ -32,15 +33,24 @@ export async function indexVersion(project, asset, version, validators, converte
}
}

const prefix = project + "/" + asset + "/" + version;
let meta_contents = [];
let obj_contents = [];
for (const [k, v] of Object.entries(candidates)) {
if (!(v.bioconductor) || !(v.object)) {
continue;
}
bioc_paths.push(k);

bioc_full_paths.push(k);
meta_contents.push(gbi.fetchJson(k + "/_bioconductor.json"));
obj_contents.push(gbi.fetchJson(k + "/OBJECT"));

// Stripping the path down to its relative component from the prefix.
let relative_path = null;
if (k != prefix) {
relative_path = k.slice(prefix.length + 1);
}
bioc_relative_paths.push(relative_path);
}

bioc_meta = await Promise.all(meta_contents);
Expand All @@ -51,7 +61,8 @@ export async function indexVersion(project, asset, version, validators, converte
let validator = validators["bioconductor"];

for (var i = 0; i < bioc_meta.length; i++) {
let bp = bioc_paths[i];
let bfp = bioc_full_paths[i];
let brp = bioc_relative_paths[i];
let bm = bioc_meta[i];

try {
Expand All @@ -60,7 +71,7 @@ export async function indexVersion(project, asset, version, validators, converte
throw new Error("failed to validate metadata at '" + bp + "'; " + e.message);
}

let converted = converter(project, asset, version, bp, bioc_objects[i].type, bm);
let converted = converter(project, asset, version, brp, bioc_objects[i].type, bm);
for (const x of converted) {
statements.push(x);
}
Expand All @@ -71,23 +82,15 @@ export async function indexVersion(project, asset, version, validators, converte

// scRNAseq indexing section:
if (project == "scRNAseq") {
let se_paths = [];
let nrows = [];
let ncols = [];
let used = [];
let altexp_names = [];
let reddim_names = [];
let assay_names = [];

for (var i = 0; i < bioc_meta.length; i++) {
let curobj = bioc_objects[i];
let obj_type = curobj.type;

if (obj_type.endsWith("_experiment")) {
let bpath = bioc_paths[i];
se_paths.push(bpath);
let dim = curobj.summarized_experiment.dimensions;
nrows.push(dim[0]);
ncols.push(dim[1]);
if (bioc_objects[i].type.endsWith("_experiment")) {
used.push(i);
let bpath = bioc_full_paths[i];
reddim_names.push(gbi.fetchJson(bpath + "/reduced_dimensions/names.json", { mustWork: false }));
altexp_names.push(gbi.fetchJson(bpath + "/alternative_experiments/names.json", { mustWork: false }));
assay_names.push(gbi.fetchJson(bpath + "/assays/names.json", { mustWork: false }));
Expand All @@ -100,11 +103,12 @@ export async function indexVersion(project, asset, version, validators, converte

let converter = converters["scRNAseq"];
let statements = [];
for (var i = 0; i < se_paths.length; i++) {
let meta = {
nrow: nrows[i],
ncol: ncols[i]
};
for (var i = 0; i < used.length; i++) {
let u = used[i];
let curobj = bioc_objects[u]
let dim = curobj.summarized_experiment.dimensions;
let meta = { nrow: dim[0], ncol: dim[1] };

if (assay_names[i] !== null) {
meta.assay_names = assay_names[i];
}
Expand All @@ -114,7 +118,8 @@ export async function indexVersion(project, asset, version, validators, converte
if (altexp_names[i] !== null) {
meta.alternative_experiment_names = altexp_names[i];
}
let converted = converter(project, asset, version, bioc_paths[i], bioc_objects[i].type, meta);

let converted = converter(project, asset, version, bioc_relative_paths[u], curobj.type, meta);
for (const x of converted) {
statements.push(x);
}
Expand Down
14 changes: 11 additions & 3 deletions src/converterFromSchema.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,20 @@ export function converterFromSchema(schema) {
}

return (project, asset, version, path, object, metadata) => {
let key = project + '/' + asset + '/' + version + '/' + path;
let key = project + '/' + asset + '/' + version;
if (path !== null) {
key += '/' + path;
}
const statements = [];

{
let current_values = [key, project, asset, version, path, object];
let current_columns = ["_key", "_project", "_asset", "_version", "_path", "_object"];
let current_values = [key, project, asset, version, object];
let current_columns = ["_key", "_project", "_asset", "_version", "_object"];
if (path !== null) {
current_values.push(path);
current_columns.push("_path");
}

for (const y of core_inserts) {
if (y in metadata) {
current_columns.push(y);
Expand Down
20 changes: 20 additions & 0 deletions tests/converterFromSchema.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ test("converterFromSchema works as expected", () => {
let statements = converter("foo", "bar", "v1", "asd/asd", "summarized_experiment", examples[0]);
runSqlStatements(db, statements);

// Checking that the key is correctly assembled.
let info = db.prepare("SELECT * FROM core LIMIT 1").all();
expect(info[0]["_key"]).toEqual("foo/bar/v1/asd/asd");
expect(info[0]["_path"]).toEqual("asd/asd");

// Checking that we have entries in each of the tables.
let res = db.prepare("SELECT COUNT(*) FROM multi_genome").all();
expect(res[0]["COUNT(*)"]).toBe(3);
Expand All @@ -27,3 +32,18 @@ test("converterFromSchema works as expected", () => {
res = db.prepare("SELECT COUNT(*) FROM free_text").all();
expect(res[0]["COUNT(*)"]).toBe(1);
})

test("converterFromSchema works with a NULL path", () => {
const initialized = init.initializeFromSchema(schema);
const db = new Database(':memory:');
db.exec(initialized);

const converter = convert.converterFromSchema(schema);
let statements = converter("foo", "bar", "v1", null, "summarized_experiment", examples[0]);
runSqlStatements(db, statements);

// Checking that the key is correctly assembled.
let res = db.prepare("SELECT * FROM core LIMIT 1").all();
expect(res[0]["_key"]).toEqual("foo/bar/v1");
expect(res[0]["_path"]).toBeNull();
})

0 comments on commit 78c63b5

Please sign in to comment.