Skip to content

Commit

Permalink
add nobrainer.dataset namespace (#92)
Browse files Browse the repository at this point in the history
  • Loading branch information
kaczmarj authored Jan 12, 2020
1 parent 305d804 commit b35fdfd
Show file tree
Hide file tree
Showing 11 changed files with 313 additions and 355 deletions.
19 changes: 6 additions & 13 deletions guide/02-preparing_training_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
"metadata": {},
"outputs": [],
"source": [
"!mkdir -p tfrecords"
"!mkdir -p data"
]
},
{
Expand All @@ -96,8 +96,8 @@
"source": [
"!nobrainer convert \\\n",
" --csv='/tmp/nobrainer-data/filepaths.csv' \\\n",
" --tfrecords-template='tfrecords/data_shard-{shard:03d}.tfrec' \\\n",
" --examples-per-shard=2 \\\n",
" --tfrecords-template='data/data_shard-{shard:03d}.tfrec' \\\n",
" --examples-per-shard=3 \\\n",
" --volume-shape 256 256 256 \\\n",
" --verbose"
]
Expand Down Expand Up @@ -128,7 +128,7 @@
"outputs": [],
"source": [
"# A glob pattern to match the files we want to train on.\n",
"file_pattern = 'tfrecords/data_shard-*.tfrec'\n",
"file_pattern = 'data/data_shard-*.tfrec'\n",
"\n",
"# The number of classes the model predicts. A value of 1 means the model performs\n",
"# binary classification (i.e., target vs background).\n",
Expand Down Expand Up @@ -177,7 +177,7 @@
},
"outputs": [],
"source": [
"dataset = nobrainer.volume.get_dataset(\n",
"dataset = nobrainer.dataset.get_dataset(\n",
" file_pattern=file_pattern,\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand All @@ -191,13 +191,6 @@
"dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -272,7 +265,7 @@
"metadata": {},
"outputs": [],
"source": [
"steps_per_epoch = nobrainer.volume.get_steps_per_epoch(\n",
"steps_per_epoch = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=10, \n",
" volume_shape=volume_shape, \n",
" block_shape=block_shape, \n",
Expand Down
100 changes: 24 additions & 76 deletions guide/train_binary_classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -159,72 +159,27 @@
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"def get_dataset(file_pattern, shuffle=True):\n",
" d = tf.data.Dataset.list_files(file_pattern, shuffle=shuffle)\n",
" d = d.interleave(\n",
" map_func=lambda x: tf.data.TFRecordDataset(x, compression_type=\"GZIP\"),\n",
" cycle_length=num_parallel_calls,\n",
" num_parallel_calls=num_parallel_calls)\n",
"\n",
" # Parse each example in each TFRecords file as a tensor of features and a\n",
" # tensor of labels.\n",
" parse_fn = nobrainer.tfrecord.parse_example_fn(\n",
" volume_shape=volume_shape, scalar_label=True)\n",
" d = d.map(map_func=parse_fn)\n",
" return d\n",
"dataset_train = nobrainer.dataset.get_dataset(\n",
" file_pattern=\"data/data-train_shard*.tfrec\",\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
" volume_shape=volume_shape,\n",
" scalar_label=True,\n",
" block_shape=block_shape,\n",
" n_epochs=n_epochs,\n",
" num_parallel_calls=num_parallel_calls,\n",
")\n",
"\n",
"def _preprocess(x, y):\n",
" x = nobrainer.volume.standardize(x)\n",
" x = nobrainer.volume.to_blocks(x, block_shape=block_shape)\n",
" n_blocks = tf.shape(x)[0]\n",
" # Add grayscale channel.\n",
" return tf.expand_dims(x, axis=-1), tf.repeat(y, n_blocks)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset_train = get_dataset(\"data/data-train*.tfrec\")\n",
"dataset_train = dataset_train.map(_preprocess)\n",
"# We need to unbatch because separating into blocks added a dimension.\n",
"dataset_train = dataset_train.unbatch()\n",
"dataset_train = dataset_train.batch(batch_size, drop_remainder=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset_evaluate = get_dataset(\"data/data-evaluate*.tfrec\", shuffle=False)\n",
"dataset_evaluate = dataset_evaluate.map(_preprocess)\n",
"# We need to unbatch because separating into blocks added a dimension.\n",
"dataset_evaluate = dataset_evaluate.unbatch()\n",
"dataset_evaluate = dataset_evaluate.batch(batch_size, drop_remainder=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset_evaluate"
"dataset_evaluate = nobrainer.dataset.get_dataset(\n",
" file_pattern=\"data/data-evaluate_shard-*.tfrec\",\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
" volume_shape=volume_shape,\n",
" scalar_label=True,\n",
" block_shape=block_shape,\n",
" n_epochs=1,\n",
" num_parallel_calls=num_parallel_calls,\n",
")"
]
},
{
Expand All @@ -240,6 +195,8 @@
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf\n",
"\n",
"model = tf.keras.Sequential([\n",
" tf.keras.layers.Conv3D(filters=12, kernel_size=3, padding=\"same\", \n",
" strides=2, activation=\"relu\", input_shape=(*block_shape, 1)),\n",
Expand Down Expand Up @@ -267,15 +224,6 @@
"# Choose a loss function and metrics"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import tensorflow as tf"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -308,7 +256,7 @@
"metadata": {},
"outputs": [],
"source": [
"steps_per_epoch = nobrainer.volume.get_steps_per_epoch(\n",
"steps_per_epoch = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(train_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
Expand All @@ -323,7 +271,7 @@
"metadata": {},
"outputs": [],
"source": [
"validation_steps = nobrainer.volume.get_steps_per_epoch(\n",
"validation_steps = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(evaluate_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
Expand Down
10 changes: 5 additions & 5 deletions guide/train_binary_segmentation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"metadata": {},
"outputs": [],
"source": [
"!mkdir data"
"!mkdir -p data"
]
},
{
Expand Down Expand Up @@ -148,7 +148,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset_train = nobrainer.volume.get_dataset(\n",
"dataset_train = nobrainer.dataset.get_dataset(\n",
" file_pattern='data/data-train_shard-*.tfrec',\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand All @@ -160,7 +160,7 @@
" num_parallel_calls=num_parallel_calls,\n",
")\n",
"\n",
"dataset_evaluate = nobrainer.volume.get_dataset(\n",
"dataset_evaluate = nobrainer.dataset.get_dataset(\n",
" file_pattern='data/data-evaluate_shard-*.tfrec',\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand Down Expand Up @@ -294,7 +294,7 @@
"metadata": {},
"outputs": [],
"source": [
"steps_per_epoch = nobrainer.volume.get_steps_per_epoch(\n",
"steps_per_epoch = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(train_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
Expand All @@ -309,7 +309,7 @@
"metadata": {},
"outputs": [],
"source": [
"validation_steps = nobrainer.volume.get_steps_per_epoch(\n",
"validation_steps = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(evaluate_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
Expand Down
10 changes: 5 additions & 5 deletions guide/train_on_multiple_gpus.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"metadata": {},
"outputs": [],
"source": [
"!mkdir data"
"!mkdir -p data"
]
},
{
Expand Down Expand Up @@ -148,7 +148,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset_train = nobrainer.volume.get_dataset(\n",
"dataset_train = nobrainer.dataset.get_dataset(\n",
" file_pattern='data/data-train_shard-*.tfrec',\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand All @@ -160,7 +160,7 @@
" num_parallel_calls=num_parallel_calls,\n",
")\n",
"\n",
"dataset_evaluate = nobrainer.volume.get_dataset(\n",
"dataset_evaluate = nobrainer.dataset.get_dataset(\n",
" file_pattern='data/data-evaluate_shard-*.tfrec',\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand Down Expand Up @@ -198,13 +198,13 @@
"outputs": [],
"source": [
"# Get the steps for an epoch of training and an epoch of validation.\n",
"steps_per_epoch = nobrainer.volume.get_steps_per_epoch(\n",
"steps_per_epoch = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(train_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
" batch_size=batch_size)\n",
"\n",
"validation_steps = nobrainer.volume.get_steps_per_epoch(\n",
"validation_steps = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(evaluate_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
Expand Down
8 changes: 4 additions & 4 deletions guide/train_on_tpu.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset_train = nobrainer.volume.get_dataset(\n",
"dataset_train = nobrainer.dataset.get_dataset(\n",
" file_pattern='data/data-train_shard-*.tfrec',\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand All @@ -160,7 +160,7 @@
" num_parallel_calls=num_parallel_calls,\n",
")\n",
"\n",
"dataset_evaluate = nobrainer.volume.get_dataset(\n",
"dataset_evaluate = nobrainer.dataset.get_dataset(\n",
" file_pattern='data/data-evaluate_shard-*.tfrec',\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand Down Expand Up @@ -198,13 +198,13 @@
"outputs": [],
"source": [
"# Get the steps for an epoch of training and an epoch of validation.\n",
"steps_per_epoch = nobrainer.volume.get_steps_per_epoch(\n",
"steps_per_epoch = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(train_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
" batch_size=batch_size)\n",
"\n",
"validation_steps = nobrainer.volume.get_steps_per_epoch(\n",
"validation_steps = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(evaluate_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
Expand Down
10 changes: 5 additions & 5 deletions guide/transfer_learning.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
"metadata": {},
"outputs": [],
"source": [
"!mkdir data"
"!mkdir -p data"
]
},
{
Expand Down Expand Up @@ -148,7 +148,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset_train = nobrainer.volume.get_dataset(\n",
"dataset_train = nobrainer.dataset.get_dataset(\n",
" file_pattern='data/data-train_shard-*.tfrec',\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand All @@ -160,7 +160,7 @@
" num_parallel_calls=num_parallel_calls,\n",
")\n",
"\n",
"dataset_evaluate = nobrainer.volume.get_dataset(\n",
"dataset_evaluate = nobrainer.dataset.get_dataset(\n",
" file_pattern='data/data-evaluate_shard-*.tfrec',\n",
" n_classes=n_classes,\n",
" batch_size=batch_size,\n",
Expand Down Expand Up @@ -282,7 +282,7 @@
"metadata": {},
"outputs": [],
"source": [
"steps_per_epoch = nobrainer.volume.get_steps_per_epoch(\n",
"steps_per_epoch = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(train_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
Expand All @@ -297,7 +297,7 @@
"metadata": {},
"outputs": [],
"source": [
"validation_steps = nobrainer.volume.get_steps_per_epoch(\n",
"validation_steps = nobrainer.dataset.get_steps_per_epoch(\n",
" n_volumes=len(evaluate_paths),\n",
" volume_shape=volume_shape,\n",
" block_shape=block_shape,\n",
Expand Down
1 change: 1 addition & 0 deletions nobrainer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tensorflow as tf

from nobrainer._version import get_versions
import nobrainer.dataset
import nobrainer.io
import nobrainer.layers
import nobrainer.losses
Expand Down
Loading

0 comments on commit b35fdfd

Please sign in to comment.