-
Notifications
You must be signed in to change notification settings - Fork 9
/
io.py
466 lines (392 loc) · 15.1 KB
/
io.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
import pathlib
import mrcfile
import argparse
import logging
import numpy.typing as npt
import numpy as np
from contextlib import contextmanager
from operator import attrgetter
from typing import Optional, Union
class ParseLogging(argparse.Action):
"""argparse.Action subclass to parse logging parameter from input scripts. Users can
set these to info/debug."""
def __call__(
self, parser, namespace, values: str, option_string: Optional[str] = None
):
if values.upper() not in ["INFO", "DEBUG"]:
parser.error(
f"{option_string} log got an invalid option, "
"set either to `info` or `debug` "
)
else:
numeric_level = getattr(logging, values.upper(), None)
setattr(namespace, self.dest, numeric_level)
class CheckDirExists(argparse.Action):
"""argparse.Action subclass to check if an expected input directory exists."""
def __call__(
self,
parser,
namespace,
values: pathlib.Path,
option_string: Optional[str] = None,
):
if not values.is_dir():
parser.error(
"{0} got a file path that does not exist ".format(option_string)
)
setattr(namespace, self.dest, values)
class CheckFileExists(argparse.Action):
"""argparse.Action subclass to check if an expected input file exists."""
def __call__(
self,
parser,
namespace,
values: pathlib.Path,
option_string: Optional[str] = None,
):
if not values.exists():
parser.error(
"{0} got a file path that does not exist ".format(option_string)
)
setattr(namespace, self.dest, values)
class LargerThanZero(argparse.Action):
"""argparse.Action subclass to constrain an input value to larger than zero only."""
def __call__(
self,
parser,
namespace,
values: Union[int, float],
option_string: Optional[str] = None,
):
if values <= 0.0:
parser.error("{0} must be larger than 0".format(option_string))
setattr(namespace, self.dest, values)
class BetweenZeroAndOne(argparse.Action):
"""argparse.Action subclass to constrain an input value to a fraction, i.e. between
0 and 1."""
def __call__(
self, parser, namespace, values: float, option_string: Optional[str] = None
):
if 1.0 <= values <= 0.0:
parser.error(
"{0} is a fraction and can only range between 0 and 1".format(
option_string
)
)
setattr(namespace, self.dest, values)
class ParseSearch(argparse.Action):
"""argparse.Action subclass to restrict the search area of tomogram to these indices
along an axis. Checks that these value are larger than zero and that the second
value is larger than the first."""
def __call__(
self,
parser,
namespace,
values: list[int, int],
option_string: Optional[str] = None,
):
if not (0 <= values[0] < values[1]):
parser.error(
f"{option_string} start and end indices must be larger than 0 and end "
"must be larger than start"
)
setattr(namespace, self.dest, values)
class ParseTiltAngles(argparse.Action):
"""argparse.Action subclass to parse tilt_angle info. The input can either be two
floats that specify the tilt range for a continous wedge model. Alternatively can be
a .tlt/.rawtlt file that specifies all the the tilt angles of the tilt-series to use
for more refined wedge models."""
def __call__(
self,
parser,
namespace,
values: Union[list[str, str], str],
option_string: Optional[str] = None,
):
if len(values) == 2: # two wedge angles provided the min and max
try:
values = sorted(list(map(float, values))) # make them floats
setattr(namespace, self.dest, values)
except ValueError:
parser.error(
f"{option_string} the two arguments provided could not be parsed "
"to floats"
)
elif len(values) == 1:
values = pathlib.Path(values[0])
if not values.exists() or values.suffix not in [".tlt", ".rawtlt"]:
parser.error(
f"{option_string} provided tilt angle file does not exist or does "
"not have the right format"
)
setattr(namespace, self.dest, read_tlt_file(values))
else:
parser.error("{0} can only take one or two arguments".format(option_string))
class ParseDoseFile(argparse.Action):
"""argparse.Action subclass to parse a txt file contain information on accumulated
dose per tilt."""
def __call__(
self, parser, namespace, values: str, option_string: Optional[str] = None
):
file_path = pathlib.Path(values)
if not file_path.exists():
parser.error(
"{0} provided dose accumulation file does not exist".format(
option_string
)
)
allowed_suffixes = [".txt"]
if file_path.suffix not in allowed_suffixes:
parser.error(
"{0} provided dose accumulation file does not have the right suffix, "
"allowed are: {1}".format(option_string, ", ".join(allowed_suffixes))
)
setattr(namespace, self.dest, read_dose_file(file_path))
class ParseDefocus(argparse.Action):
"""argparse.Action subclass to read a defocus file, either from IMOD which adheres
to their file format, or a txt file containing per line the defocus of each tilt."""
def __call__(
self, parser, namespace, values: str, option_string: Optional[str] = None
):
if values.endswith((".defocus", ".txt")):
file_path = pathlib.Path(values)
if not file_path.exists():
parser.error(f"{option_string} provided defocus file does not exist")
# in case of a file the parser attribute becomes a list of defocii
setattr(namespace, self.dest, read_defocus_file(file_path))
else:
try:
defocus = float(values)
except ValueError:
parser.error(f"{option_string} not possible to read defocus as float")
# pass back as list so defocus always returns a list
setattr(namespace, self.dest, [defocus])
class UnequalSpacingError(Exception):
"""Exception for an mrc file that has unequal spacing along the xyz dimensions
annotated in its voxel size metadata."""
pass
def write_angle_list(
data: npt.NDArray[float],
file_name: pathlib.Path,
order: tuple[int, int, int] = (0, 2, 1),
):
"""Helper function to write angular search list from old PyTom to current module.
Order had to be changed as old PyTom always stored it as Z1, Z2, X, and here it's
Z1, X, Z2.
@todo remove function
"""
with open(file_name, "w") as fstream:
for i in range(data.shape[1]):
fstream.write(
" ".join([str(x) for x in [data[j, i] for j in order]]) + "\n"
)
@contextmanager
def _wrap_mrcfile_readers(func, *args, **kwargs):
"""Try to autorecover broken mrcfiles, assumes 'permissive' is a kwarg and not an
arg"""
try:
mrc = func(*args, **kwargs)
except ValueError as err:
# see if permissive can safe this
logging.debug(f"mrcfile raised the following error: {err}, will try to recover")
kwargs["permissive"] = True
mrc = func(*args, **kwargs)
if mrc.data is not None:
logging.warning(
f"Loading {args[0]} in strict mode gave an error. "
"However, loading with 'permissive=True' did generate data, make sure "
"this is correct!"
)
else:
logging.debug("Could not reasonably recover")
raise ValueError(
f"{args[0]} header or data is too corrupt to recover, please fix the "
"header or data"
) from err
yield mrc
# this should only be called after the context exists
mrc.close()
def read_mrc_meta_data(file_name: pathlib.Path) -> dict:
"""Read the metadata of provided MRC file path (using mrcfile) and return as dict.
If the voxel size along the x, y, and z dimensions differs a lot (not within 3
decimals) the function will raise an UnequalSpacingError as it could mean template
matching on these volumes might not be consistent.
Parameters
----------
file_name: pathlib.Path
path to an MRC file
Returns
-------
metadata: dict
a dictionary of the mrc metadata with key 'shape' containing the x,y,z
dimensions of the file and key 'voxel_size' containing the voxel size along
x, y, and z and dimensions in Å units
"""
meta_data = {}
with _wrap_mrcfile_readers(mrcfile.mmap, file_name) as mrc:
meta_data["shape"] = tuple(map(int, attrgetter("nx", "ny", "nz")(mrc.header)))
# allow small numerical inconsistencies in voxel size of MRC headers, sometimes
# seen in Warp
if not all(
[
np.round(mrc.voxel_size.x, 3) == np.round(s, 3)
for s in attrgetter("y", "z")(mrc.voxel_size)
]
):
raise UnequalSpacingError(
"Input volume voxel spacing is not identical in each dimension!"
)
else:
if not all(
[mrc.voxel_size.x == s for s in attrgetter("y", "z")(mrc.voxel_size)]
):
logging.warning(
"Voxel size annotation in MRC is slightly different between "
f"dimensions, namely {mrc.voxel_size}. It might be a tiny "
"numerical inaccuracy, but please ensure this is not problematic."
)
meta_data["voxel_size"] = float(mrc.voxel_size.x)
return meta_data
def write_mrc(
file_name: pathlib.Path,
data: npt.NDArray[float],
voxel_size: float,
overwrite: bool = True,
transpose: bool = True,
) -> None:
"""Write data to an MRC file. Data is transposed before writing as pytom internally
uses xyz ordering and MRCs use zyx.
Parameters
----------
file_name: pathlib.Path
path on disk to write the file to
data: npt.NDArray[float]
numpy array to write as MRC
voxel_size: float
voxel size of array to annotate in MRC header
overwrite: bool, default True
True (default) will overwrite current MRC on path, setting to False will error
when writing to existing file
transpose: bool, default True
True (default) transpose array before writing, setting to False prevents this
Returns
-------
"""
if data.dtype != np.float32:
logging.warning(
"data for mrc writing is not np.float32 will convert to np.float32"
)
data = data.astype(np.float32)
mrcfile.write(
file_name,
data.T if transpose else data,
voxel_size=voxel_size,
overwrite=overwrite,
)
def read_mrc(file_name: pathlib.Path, transpose: bool = True) -> npt.NDArray[float]:
"""Read an MRC file from disk. Data is transposed after reading as pytom internally
uses xyz ordering and MRCs use zyx.
Parameters
----------
file_name: pathlib.Path
path to file on disk
transpose: bool, default True
True (default) transposes the volume after reading, setting to False prevents
transpose but probably not a good idea when using the functions from this module
Returns
-------
data: npt.NDArray[float]
returns the MRC data as a numpy array
"""
with _wrap_mrcfile_readers(mrcfile.open, file_name) as mrc:
data = np.ascontiguousarray(mrc.data.T) if transpose else mrc.data
return data
def read_txt_file(file_name: pathlib.Path) -> list[float, ...]:
"""Read a txt file from disk with on each line a single float value.
Parameters
----------
file_name: pathlib.Path
file on disk to read
Returns
-------
output: list[float, ...]
list of floats
"""
with open(file_name, "r") as fstream:
lines = fstream.readlines()
return list(map(float, [x.strip() for x in lines if not x.isspace()]))
def read_tlt_file(file_name: pathlib.Path) -> list[float, ...]:
"""Read a txt file from disk using read_txt_file(). File is expected to have tilt
angles in degrees.
Parameters
----------
file_name: pathlib.Path
file on disk to read
Returns
-------
output: list[float, ...]
list of floats with tilt angles
"""
return read_txt_file(file_name)
def read_dose_file(file_name: pathlib.Path) -> list[float, ...]:
"""Read a txt file from disk using read_txt_file(). File is expected to have dose
accumulation in e-/(Å^2).
Parameters
----------
file_name: pathlib.Path
file on disk to read
Returns
-------
output: list[float, ...]
list of floats with accumulated dose
"""
return read_txt_file(file_name)
def read_imod_defocus_file(file_name: pathlib.Path) -> list[float, ...]:
"""Read an IMOD style defocus file. This function can read version 2 and 3 defocus
files. For format specification see:
https://bio3d.colorado.edu/imod/doc/man/ctfphaseflip.html
(section: Defocus File Format).
Parameters
----------
file_name: pathlib.Path
file on disk to read
Returns
-------
output: list[float, ...]
list of floats with defocus (in μm)
"""
with open(file_name, "r") as fstream:
lines = fstream.readlines()
imod_defocus_version = float(lines[0].strip().split()[5])
# imod defocus files have the values specified in nm:
if imod_defocus_version == 2: # file with one defocus value; data starts on line 0
return [float(x.strip().split()[4]) * 1e-3 for x in lines]
elif (
imod_defocus_version == 3
): # file with astigmatism; line 0 contains metadata that we do not need
return [
(float(x.strip().split()[4]) + float(x.strip().split()[5])) / 2 * 1e-3
for x in lines[1:]
]
else:
raise ValueError("Invalid IMOD defocus file inversion, can only be 2 or 3.")
def read_defocus_file(file_name: pathlib.Path) -> list[float, ...]:
"""Read a defocus file with values in nm. Output returns defocus in μm.
Depending on file suffix the function calls:
- read_imod_defocus_file() for .defocus suffix
- read_txt_file for .txt suffix
Parameters
----------
file_name: pathlib.Path
file on disk to read
Returns
-------
output: list[float, ...]
list of floats with defocus (in μm)
"""
if file_name.suffix == ".defocus":
return read_imod_defocus_file(file_name)
elif file_name.suffix == ".txt":
return read_txt_file(file_name)
else:
raise ValueError("Defocus file needs to have format .defocus or .txt")