-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update Click, add .gitignore, format code, and use Poetry #8
base: master
Are you sure you want to change the base?
Changes from 1 commit
50826a7
e3e0de6
4e7fa96
a18f078
582bf1b
798fccd
a89cfa2
0b6e8ff
0a34204
cab0498
7e529cd
f27becb
5dee239
b09ca93
871b6e3
4869050
9d20541
663db89
b6edc15
9b4b78d
4fdde7f
b3db849
42d3f7e
672c92d
ebc7aee
1b1f51b
ce7c4d4
716574c
0606903
bd1e7e7
1e5d2c3
f9620b0
2e27248
e2f3836
e8fd702
d7da285
0f44010
48d9fbd
ca2ef13
ffc5312
59185e1
762eef7
76cf4f7
07f4e08
ce32da7
2d7d4b4
70324e7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,58 +2,35 @@ | |
from numpy import array_equal | ||
|
||
|
||
# def test_svs2dask_array(): | ||
# from .utils import download_svs | ||
# from PIL import Image | ||
# from numpy import array as to_npa | ||
# | ||
# # from os import remove | ||
# | ||
# id = "2e4f6316-588b-4629-adf0-7aeac358a0e2" | ||
# file = "TCGA-MR-A520-01Z-00-DX1.2F323BAC-56C9-4A0C-9C1B-2B4F776056B4.svs" | ||
# download_location = download_svs(id, file) | ||
# | ||
# Image.MAX_IMAGE_PIXELS = None # SECURITY RISK! | ||
# ground_truth = to_npa(Image.open(download_location)) | ||
# | ||
# test = utils.svs2dask_array(download_location).compute() | ||
# crop_height, crop_width, _ = test.shape | ||
# | ||
# # remove(download_location) | ||
# | ||
# assert array_equal(ground_truth[:crop_height, :crop_width, :], test) | ||
def test_svs2dask_array(): | ||
from .utils import download_svs | ||
from PIL import Image | ||
from numpy import array as to_npa | ||
|
||
# from os import remove | ||
|
||
id = "2e4f6316-588b-4629-adf0-7aeac358a0e2" | ||
file = "TCGA-MR-A520-01Z-00-DX1.2F323BAC-56C9-4A0C-9C1B-2B4F776056B4.svs" | ||
download_location = download_svs(id, file) | ||
|
||
Image.MAX_IMAGE_PIXELS = None # SECURITY RISK! | ||
ground_truth = to_npa(Image.open(download_location)) | ||
|
||
test = utils.svs2dask_array(download_location).compute() | ||
crop_height, crop_width, _ = test.shape | ||
|
||
# remove(download_location) | ||
|
||
assert array_equal(ground_truth[:crop_height, :crop_width, :], test) | ||
|
||
|
||
def test_preprocessing_pipeline(): | ||
from .utils import get_tests_dir, image_to_numpy | ||
from .utils import get_tests_dir | ||
from os.path import join, exists | ||
|
||
tests_dir = get_tests_dir() | ||
basename = "TCGA-18-5592-01Z-00-DX1" | ||
input_dir = join(tests_dir, "inputs") | ||
png_file = join(input_dir, basename + ".png") | ||
xml_file = join(input_dir, basename + ".xml") | ||
out_zarr = join(tests_dir, "output_zarr.zarr") | ||
out_pkl = join(tests_dir, "output.pkl") | ||
|
||
# convert a TCGA XML to a binary mask with the following: | ||
# Image.fromarray( | ||
# viewmask.utils.xml_to_image( | ||
# ET.parse('./tests/inputs/TCGA-18-5592-01Z-00-DX1.xml') | ||
# ) | ||
# ).save('/Users/suman/Downloads/bruh.png') | ||
|
||
utils.run_preprocessing_pipeline( | ||
png_file, xml_file=xml_file, out_zarr=out_zarr, out_pkl=out_pkl | ||
) | ||
assert exists(out_zarr) | ||
assert exists(out_pkl) | ||
|
||
from zarr import open as open_zarr | ||
from dask.array import from_zarr as zarr_to_da | ||
|
||
img = zarr_to_da(open_zarr(out_zarr)).compute() | ||
assert array_equal(img, image_to_numpy(png_file)) | ||
|
||
def capture(command): | ||
from subprocess import Popen, PIPE | ||
|
@@ -65,22 +42,75 @@ def capture(command): | |
out, err = proc.communicate() | ||
return out, err, proc.returncode | ||
|
||
odb = join(tests_dir, "patch_information.db") | ||
command = [ | ||
"poetry", "run", "pathflowai-preprocess", | ||
"preprocess-pipeline", | ||
"-odb", odb, | ||
"--preprocess", | ||
"--patches", | ||
"--basename", basename, | ||
"--input_dir", input_dir, | ||
"--patch_size", "256", | ||
"--intensity_threshold", "45.", | ||
"-tc", "7", | ||
"-t", "0.05" | ||
] | ||
out, err, exitcode = capture(command) | ||
assert exists(out_zarr) | ||
assert exists(out_pkl) | ||
assert exists(odb) | ||
assert exitcode == 0 | ||
def test_segmentation(): | ||
npy_file = join(input_dir, basename + ".npy") | ||
npy_mask = join(input_dir, basename + "_mask.npy") | ||
out_zarr = join(tests_dir, "output_zarr.zarr") | ||
out_pkl = join(tests_dir, "output.pkl") | ||
|
||
# convert TCGA annotations (XML) to a binary mask (npy) with the following: | ||
# | ||
# import numpy as np | ||
# import viewmask | ||
# import xml.etree.ElementTree as ET | ||
# np.save( | ||
# './tests/inputs/TCGA-18-5592-01Z-00-DX1_mask.npy', | ||
# viewmask.utils.xml_to_image( | ||
# ET.parse('./tests/inputs/TCGA-18-5592-01Z-00-DX1.xml') | ||
# ) | ||
# ) | ||
# | ||
# | ||
# convert TCGA input (PNG) to a numpy array (npy) with the following: | ||
# | ||
# import numpy as np | ||
# from PIL import Image | ||
# np.save( | ||
# './tests/inputs/TCGA-18-5592-01Z-00-DX1.npy', | ||
# np.array( | ||
# Image.open('./tests/inputs/TCGA-18-5592-01Z-00-DX1.png') | ||
# ) | ||
# ) | ||
|
||
utils.run_preprocessing_pipeline( | ||
npy_file, npy_mask=npy_mask, out_zarr=out_zarr, out_pkl=out_pkl | ||
) | ||
assert exists(out_zarr) | ||
assert exists(out_pkl) | ||
|
||
from numpy import load as npy_to_npa | ||
from zarr import open as open_zarr | ||
from dask.array import from_zarr as zarr_to_da | ||
|
||
img = zarr_to_da(open_zarr(out_zarr)).compute() | ||
assert array_equal(img, npy_to_npa(npy_file)) | ||
|
||
odb = join(tests_dir, "patch_information.db") | ||
command = [ | ||
"poetry", "run", "pathflowai-preprocess", | ||
"preprocess-pipeline", | ||
"-odb", odb, | ||
"--preprocess", | ||
"--patches", | ||
"--basename", basename, | ||
"--input_dir", input_dir, | ||
"--patch_size", "256", | ||
"--intensity_threshold", "45.", | ||
"-tc", "7", | ||
"-t", "0.05" | ||
] | ||
out, err, exitcode = capture(command) | ||
assert exists(out_zarr) | ||
assert exists(out_pkl) | ||
assert exists(odb) | ||
assert exitcode == 0 | ||
|
||
from sqlite3 import connect as sql_connect | ||
connection = sql_connect(odb) | ||
cursor = connection.execute('SELECT * FROM "256";') | ||
names = [description[0] for description in cursor.description] | ||
cursor.close() | ||
true_headers = ['index', 'ID', 'x', 'y', 'patch_size', | ||
'annotation', '0', '1', '2', '3', '4', '5', '6'] | ||
assert names == true_headers | ||
test_segmentation() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically, you could test both classification and segmentation on the same dataset, I'm not sure if this is what you were going for here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or even regression from patch level labels featured in the SQL. I'll try to get a new dataset over soon. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was planning on using TCGA-18-5592-01Z-00-DX1 for testing both segmentation and classification, like you suggested. The reason I'm splitting the test into two different methods is because some of the parameter names change (such as I'm also planning on adding support for TCGA annotations in PathFlow, so a new dataset shouldn't be necessary. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One potential option here is to limit the number of patches before testing the classification and segmentation pipelines