Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Reader that does not need open file handles (#239) #926

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 16 additions & 36 deletions package/MDAnalysis/coordinates/XYZ.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def write_next_timestep(self, ts=None):
"".format(atom, x, y, z))


class XYZReader(base.Reader):
class XYZReader(base.NewReader):
"""Reads from an XYZ file

:Data:
Expand Down Expand Up @@ -299,7 +299,7 @@ def __init__(self, filename, **kwargs):
# coordinates::core.py so the last file extension will tell us if it is
# bzipped or not
root, ext = os.path.splitext(self.filename)
self.xyzfile = util.anyopen(self.filename, "r")

self.compression = ext[1:] if ext[1:] != "xyz" else None
self._cache = dict()

Expand All @@ -308,7 +308,7 @@ def __init__(self, filename, **kwargs):
# etc.
# (Also cannot just use seek() or reset() because that would break
# with urllib2.urlopen() streams)
self._read_next_timestep()
self.next()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For python 3 compatibility this should be next(self).


@property
@cached('n_atoms')
Expand All @@ -320,12 +320,13 @@ def n_atoms(self):
return int(n)

@property
@cached('n_frames')
def n_frames(self):
try:
return self._read_xyz_n_frames()
except IOError:
return 0
return len(self._offsets)

@property
@cached('offsets')
def _offsets(self):
return self._read_xyz_n_frames()

def _read_xyz_n_frames(self):
# the number of lines in the XYZ file will be 2 greater than the
Expand All @@ -341,14 +342,12 @@ def _read_xyz_n_frames(self):
offsets.append(f.tell())
line = f.readline()
counter += 1
offsets.pop() # last offset is end of file

# need to check this is an integer!
n_frames = int(counter / linesPerFrame)
self._offsets = offsets
return n_frames
return offsets

def _read_frame(self, frame):
self.xyzfile.seek(self._offsets[frame])
self._file.seek(self._offsets[frame])
self.ts.frame = frame - 1 # gets +1'd in next
return self._read_next_timestep()

Expand All @@ -357,7 +356,7 @@ def _read_next_timestep(self, ts=None):
if ts is None:
ts = self.ts

f = self.xyzfile
f = self._file

try:
# we assume that there are only two header lines per frame
Expand All @@ -371,21 +370,9 @@ def _read_next_timestep(self, ts=None):
raise EOFError(err)

def _reopen(self):
self.close()
self.open_trajectory()

def open_trajectory(self):
if self.xyzfile is not None:
raise IOError(
errno.EALREADY, 'XYZ file already opened', self.filename)

self.xyzfile = util.anyopen(self.filename, "r")

# reset ts
ts = self.ts
ts.frame = -1

return self.xyzfile
"""Reposition (the virtual fh) to just before first frame"""
self._last_fh_pos = 0
self.ts.frame = -1

def Writer(self, filename, n_atoms=None, **kwargs):
"""Returns a XYZWriter for *filename* with the same parameters as this
Expand All @@ -412,10 +399,3 @@ def Writer(self, filename, n_atoms=None, **kwargs):
if n_atoms is None:
n_atoms = self.n_atoms
return XYZWriter(filename, n_atoms=n_atoms, **kwargs)

def close(self):
"""Close xyz trajectory file if it was open."""
if self.xyzfile is None:
return
self.xyzfile.close()
self.xyzfile = None
104 changes: 100 additions & 4 deletions package/MDAnalysis/coordinates/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,12 @@

from six.moves import range
import six

import warnings

import numpy as np
from contextlib import contextmanager
import copy
import itertools
import numpy as np
import os.path
import warnings
import weakref

from .. import (
Expand All @@ -141,6 +142,7 @@
from ..core import flags
from .. import units
from ..lib.util import asiterable, Namespace
from ..lib import util
from . import core
from .. import NoDataError

Expand Down Expand Up @@ -1630,6 +1632,100 @@ def __del__(self):
self.close()


class NewReader(ProtoReader):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the plan to have this class completely replace Reader? If so, is it suppose to take over the Reader name? If not, having "new" in the name is not a smart move, at some point it will become the old one...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's just a temporary thing while I try and see what's possible... hopefully it will be the new Reader class across everything

"""Super top secret magical class that fixes all known (and unknown) bugs"""
def __init__(self, filename, convert_units=None, **kwargs):
self.filename = filename
self._last_fh_pos = 0 # last known position of file handle

if convert_units is None:
convert_units = flags['convert_lengths']
self.convert_units = convert_units

self._auxs = {}
ts_kwargs = {}
for att in ('dt', 'time_offset'):
try:
val = kwargs[att]
except KeyError:
pass
else:
ts_kwargs[att] = val

self._ts_kwargs = ts_kwargs

def _full_iter(self):
self._reopen()
with util.openany(self.filename, 'r') as self._file:
while True:
try:
ts = self._read_next_timestep()
for auxname in self.aux_list:
ts = self._auxs[auxname].update_ts(ts)
yield ts
except (EOFError, IOError):
# goto start of file
self._read_frame_with_aux(0)
self._last_fh_pos = self._file.tell()
raise StopIteration

def _sliced_iter(self, start, stop, step):
with util.openany(self.filename, 'r') as self._file:
for f in range(start, stop, step):
yield self._read_frame_with_aux(f)

self._read_frame_with_aux(0)
self._last_fh_pos = self._file.tell()
raise StopIteration

def _goto_frame(self, i):
with util.openany(self.filename, 'r') as self._file:
ts = self._read_frame_with_aux(i)
return ts

def __iter__(self):
return self._full_iter()

def __getitem__(self, item):
def apply_limits(frame):
if frame < 0:
frame += len(self)
if frame < 0 or frame >= len(self):
raise IndexError("Index {} exceeds length of trajectory ({})."
"".format(frame, len(self)))
return frame

if isinstance(item, int):
return self._goto_frame(apply_limits(item))
elif isinstance(item, (list, np.ndarray)):
return self._sliced_iter(item)
elif isinstance(item, slice): # TODO Fix me!
start, stop, step = self.check_slice_indices(
item.start, item.stop, item.step)

return self._sliced_iter(start, stop, step)

def __next__(self):
with util.openany(self.filename, 'r') as self._file:
try:
self._file.seek(self._last_fh_pos)
ts = self._read_next_timestep()
except (EOFError, IOError):
self.rewind()
raise StopIteration
else:
self._last_fh_pos = self._file.tell()
for auxname in self.aux_list:
ts = self._auxs[auxname].update_ts(ts)
return ts

next = __next__

def close(self):
# "close" by moving last position to start
self._last_fh_pos = 0


class _Writermeta(type):
# Auto register upon class creation
def __init__(cls, name, bases, classdict):
Expand Down
11 changes: 9 additions & 2 deletions package/MDAnalysis/lib/formats/libmdaxdr.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -122,20 +122,23 @@ cdef class _XDRFile:
This class can't be initialized use one of the subclasses XTCFile, TRRFile
"""
cdef readonly int n_atoms
cdef int is_open
cdef readonly int is_open
cdef int reached_eof
cdef XDRFILE *xfp
cdef readonly fname
cdef int current_frame
cdef str _mode
cdef str mode
cdef np.ndarray box
cdef np.ndarray _offsets
cdef int _has_offsets

def __cinit__(self, fname, mode='r'):
self.fname = fname.encode('utf-8')
self._mode = mode
self.is_open = False
self.open(self.fname, mode)
self.open() # to read natoms
self.close()

def __dealloc__(self):
self.close()
Expand Down Expand Up @@ -210,6 +213,7 @@ cdef class _XDRFile:

def __enter__(self):
"""Support context manager"""
self.open(self.fname, self._mode)
return self

def __exit__(self, exc_type, exc_val, exc_tb):
Expand Down Expand Up @@ -318,7 +322,10 @@ cdef class _XDRFile:
set_offsets
"""
if not self._has_offsets:
# wrap this in try/except to ensure it gets closed?
self.open(self.fname, self._mode)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not

with self:
    ...

that might work and should close the file when an IOError is thrown

self._offsets = self.calc_offsets()
self.close()
self._has_offsets = True
return self._offsets

Expand Down
27 changes: 26 additions & 1 deletion testsuite/MDAnalysisTests/coordinates/test_xyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
assert_array_equal,
assert_,
)
import mock

from MDAnalysis.lib.util import openany
from MDAnalysis.coordinates.XYZ import XYZWriter

from MDAnalysisTests.datafiles import COORDINATES_XYZ, COORDINATES_XYZ_BZ2
from MDAnalysisTests.datafiles import XYZ, COORDINATES_XYZ, COORDINATES_XYZ_BZ2
from MDAnalysisTests.coordinates.base import (BaseReaderTest, BaseReference,
BaseWriterTest)
from MDAnalysisTests.core.groupbase import make_Universe, FakeReader
Expand Down Expand Up @@ -148,3 +150,26 @@ def test_list_names(self):

u2 = mda.Universe(self.outfile)
assert_(all(u2.atoms.names == names))


class TestXYZReaderFH(object):
# tests for the new filehandle behaviour (issue #239)
def setUp(self):
self.u = mda.Universe(XYZ)

def tearDown(self):
del self.u

#@mock.patch('MDAnalysis.lib.util.openany', wraps=openany)
def test_iteration(self):
# check filehandle is closed
assert_(self.u.trajectory._file.closed)

with mock.patch('MDAnalysis.lib.util.openany', wraps=openany) as mockopen:
for ts in self.u.trajectory:
print ts.frame

assert_(mockopen.call_count == 1)

# check filehandle is closed
assert_(self.u.trajectory._file.closed)