Skip to content

Commit

Permalink
Replace preprocessing test data
Browse files Browse the repository at this point in the history
  • Loading branch information
sumanthratna committed Mar 23, 2020
1 parent 468fbec commit 64be5bf
Show file tree
Hide file tree
Showing 8 changed files with 131,505 additions and 41 deletions.
5 changes: 2 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

tests/TCGA-MR-A520-01Z-00-DX1.2F323BAC-56C9-4A0C-9C1B-2B4F776056B4.svs

tests/inputs/21_5_mask.pkl
tests/inputs/21_5.zarr
tests/patch_information.db
tests/inputs/TCGA-18-5592-01Z-00-DX1.zarr
tests/inputs/TCGA-18-5592-01Z-00-DX1_mask.pkl

tests/output.pkl
tests/output_zarr.zarr
Expand Down
25 changes: 16 additions & 9 deletions pathflowai/cli_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
import dask
import time

CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"], "max_content_width": 90}
CONTEXT_SETTINGS = {"help_option_names": [
"-h", "--help"], "max_content_width": 90}


@click.group(context_settings=CONTEXT_SETTINGS)
Expand All @@ -34,7 +35,6 @@ def output_if_exists(filename):
Filename.
"""

if os.path.exists(filename):
return filename
return None
Expand Down Expand Up @@ -200,15 +200,17 @@ def preprocess_pipeline(
".png",
".h5",
]:
svs_file = output_if_exists(join(input_dir, "{}{}".format(basename, ext)))
svs_file = output_if_exists(
join(input_dir, "{}{}".format(basename, ext)))
if svs_file is not None:
break

if img2npy and not svs_file.endswith(".npy"):
svs_file = img2npy_(input_dir, basename, svs_file)

xml_file = output_if_exists(join(input_dir, "{}.xml".format(basename)))
npy_mask = output_if_exists(join(input_dir, "{}_mask.npy".format(basename)))
npy_mask = output_if_exists(
join(input_dir, "{}_mask.npy".format(basename)))
out_zarr = join(input_dir, "{}.zarr".format(basename))
out_pkl = join(input_dir, "{}_mask.pkl".format(basename))
adj_npy = ""
Expand All @@ -230,7 +232,8 @@ def preprocess_pipeline(
npy_mask = join(input_dir, "{}_mask.npz".format(basename))
target_segmentation_class = 1
generate_finetune_segmentation = True
create_zero_mask(npy_mask, out_zarr if not no_zarr else svs_file, out_pkl)
create_zero_mask(
npy_mask, out_zarr if not no_zarr else svs_file, out_pkl)

preprocess_point = time.time()
print("Data dump took {}".format(preprocess_point - start))
Expand Down Expand Up @@ -363,7 +366,8 @@ def remove_basename_from_db(input_patch_db, output_patch_db, basename, patch_siz
conn.close()
df = df.loc[df["ID"] != basename]
conn = sqlite3.connect(output_patch_db)
df.set_index("index").to_sql(str(patch_size), con=conn, if_exists="replace")
df.set_index("index").to_sql(
str(patch_size), con=conn, if_exists="replace")
conn.close()


Expand Down Expand Up @@ -439,20 +443,23 @@ def collapse_annotations(
conn.close()
from_to = zip(from_annotations, to_annotations)
if remove_background_annotation:
df = df.loc[df[remove_background_annotation] <= (1.0 - max_background_area)]
df = df.loc[df[remove_background_annotation]
<= (1.0 - max_background_area)]
for fr, to in from_to:
df.loc[:, to] += df[fr]
df = df[[col for col in list(df) if col not in from_annotations]]
annotations = list(df.iloc[:, 6:])
df = df.rename(columns={annot: str(i) for i, annot in enumerate(annotations)})
df = df.rename(columns={annot: str(i)
for i, annot in enumerate(annotations)})
annotations = list(df.iloc[:, 6:])
df.loc[:, "annotation"] = np.vectorize(
lambda i: annotations[df.iloc[i, 6:].values.argmax()]
)(np.arange(df.shape[0]))
df.loc[:, "index"] = np.arange(df.shape[0])
conn = sqlite3.connect(output_patch_db)
# print(df)
df.set_index("index").to_sql(str(patch_size), con=conn, if_exists="replace")
df.set_index("index").to_sql(
str(patch_size), con=conn, if_exists="replace")
conn.close()


Expand Down
Binary file removed tests/inputs/21_5.npy
Binary file not shown.
Binary file removed tests/inputs/21_5_mask.npy
Binary file not shown.
Binary file added tests/inputs/TCGA-18-5592-01Z-00-DX1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 64be5bf

Please sign in to comment.