Skip to content

Commit

Permalink
Improved dataframe cache - see apache#3302
Browse files Browse the repository at this point in the history
  • Loading branch information
rhunwicks committed Dec 5, 2017
1 parent 15d91bb commit 0f98a55
Showing 1 changed file with 33 additions and 26 deletions.
59 changes: 33 additions & 26 deletions contrib/cache/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import pandas as pd
from six import u
from werkzeug.contrib.cache import FileSystemCache
from werkzeug.posixemulation import rename


class DataFrameCache(FileSystemCache):
Expand Down Expand Up @@ -115,32 +114,40 @@ def set(self, key, value, timeout=None):
filename = self._get_filename(key)
cname = filename + self._fs_cache_suffix
mname = filename + self._fs_metadata_suffix
suffix = self._fs_transaction_suffix
self._prune()
try:
fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix,
dir=self._path)
with os.fdopen(fd, 'wb') as f:
try:
value.to_feather(f)
metadata['format'] = 'feather'
except ValueError:
try:
value.to_hdf(tmp, 'df')
metadata['format'] = 'hdf'
metadata['read_args'] = {'key': 'df'}
except Exception:
# PyTables is not installed, so fallback to pickle
pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
metadata['format'] = 'pickle'
rename(tmp, cname)
os.chmod(cname, self._mode)
with open(mname, 'w', encoding='utf-8') as f:
f.write(u(json.dumps(metadata)))
os.chmod(mname, self._mode)
except (IOError, OSError):
return False
else:
return True

def to_feather(filename, dataframe, metadata):
with tempfile.NamedTemporaryFile(dir=self._path, suffix=suffix) as f:
dataframe.to_feather(f)
metadata['format'] = 'feather'
os.link(f.name, cname)

def to_hdf(filename, dataframe, metadata):
with tempfile.NamedTemporaryFile(dir=self._path, suffix=suffix) as f:
dataframe.to_hdf(f.name, 'df')
metadata['format'] = 'hdf'
metadata['read_args'] = {'key': 'df'}
os.link(f.name, cname)

def to_pickle(filename, dataframe, metadata):
with tempfile.NamedTemporaryFile(dir=self._path, suffix=suffix) as f:
pickle.dump(dataframe, f, pickle.HIGHEST_PROTOCOL)
metadata['format'] = 'pickle'
os.link(f.name, cname)

for serializer in [to_feather, to_hdf, to_pickle]:
try:
serializer(cname, value, metadata)
with open(mname, 'w', encoding='utf-8') as f:
f.write(u(json.dumps(metadata)))
return True
except Exception:
# Try the next serializer
pass

# We didn't successfully save the data
return False

def delete(self, key):
filename = self._get_filename(key)
Expand Down

0 comments on commit 0f98a55

Please sign in to comment.