Skip to content

Commit

Permalink
Merge pull request #675 from abhi-glitchhg/pyzenodo
Browse files Browse the repository at this point in the history
merging to check if ci is happy and green.
  • Loading branch information
abhi-glitchhg authored Jun 29, 2023
2 parents a07670b + d9a14f3 commit ace365f
Show file tree
Hide file tree
Showing 11 changed files with 581 additions and 3 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
*.py[cod]
__pycache__

#vim binaries
*.swp

# packaging
*.egg-info/
dist/
Expand Down
2 changes: 1 addition & 1 deletion tvb_library/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
LIBRARY_TEAM = "Marmaduke Woodman, Jan Fousek, Stuart Knock, Paula Sanz Leon, Viktor Jirsa"

LIBRARY_REQUIRED_PACKAGES = ["autopep8", "Deprecated", "docutils", "ipywidgets", "lxml", "mako>=1.1.4", "matplotlib",
"networkx", "numba", "numexpr", "numpy", "pylems", "scipy", "six"]
"networkx", "numba", "numexpr", "numpy", "pooch","pylems", "scipy", "six"]

LIBRARY_REQUIRED_EXTRA = ["h5py", "pytest", "pytest-benchmark", "pytest-xdist", "tvb-gdist", "tvb-data"]

Expand Down
4 changes: 4 additions & 0 deletions tvb_library/tvb/basic/config/profile_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ def __init__(self):
# The number of logs in a message batch that are sent to the server
self.ELASTICSEARCH_BUFFER_THRESHOLD = self.manager.get_attribute(stored.KEY_ELASTICSEARCH_BUFFER_THRESHOLD, 1000000, int)

# Directory where all the datasets will be extracted/unzipped.
self.DATASETS_FOLDER = os.path.join(self.TVB_STORAGE, "DATASETS")


@property
def BIN_FOLDER(self):
"""
Expand Down
2 changes: 2 additions & 0 deletions tvb_library/tvb/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .zenodo import Record, Zenodo
from .tvb_data import TVBZenodoDataset
58 changes: 58 additions & 0 deletions tvb_library/tvb/datasets/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# -*- coding: utf-8 -*-
#
#
# TheVirtualBrain-Scientific Package. This package holds all simulators, and
# analysers necessary to run brain-simulations. You can use it stand alone or
# in conjunction with TheVirtualBrain-Framework Package. See content of the
# documentation-folder for more details. See also http://www.thevirtualbrain.org
#
# (c) 2012-2023, Baycrest Centre for Geriatric Care ("Baycrest") and others
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program. If not, see <http://www.gnu.org/licenses/>.
#
#
# CITATION:
# When using The Virtual Brain for scientific publications, please cite it as explained here:
# https://www.thevirtualbrain.org/tvb/zwei/neuroscience-publications
#
#

"""
.. moduleauthor:: Abhijit Deo <[email protected]>
"""



from tvb.basic.logger.builder import get_logger
from tvb.basic.profile import TvbProfile
from pathlib import Path

class BaseDataset:

def __init__(self, version, extract_dir=None):

self.log = get_logger(self.__class__.__module__)
self.cached_files = None
self.version = version

if (extract_dir==None):
extract_dir = TvbProfile.current.DATASETS_FOLDER

self.extract_dir = Path(extract_dir).expanduser()


def download(self):
pass

def fetch_data(self):
pass

def get_version(self):
return self.version
188 changes: 188 additions & 0 deletions tvb_library/tvb/datasets/tvb_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# -*- coding: utf-8 -*-
#
#
# TheVirtualBrain-Scientific Package. This package holds all simulators, and
# analysers necessary to run brain-simulations. You can use it stand alone or
# in conjunction with TheVirtualBrain-Framework Package. See content of the
# documentation-folder for more details. See also http://www.thevirtualbrain.org
#
# (c) 2012-2023, Baycrest Centre for Geriatric Care ("Baycrest") and others
#
# This program is free software: you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software Foundation,
# either version 3 of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
# PARTICULAR PURPOSE. See the GNU General Public License for more details.
# You should have received a copy of the GNU General Public License along with this
# program. If not, see <http://www.gnu.org/licenses/>.
#
#
# CITATION:
# When using The Virtual Brain for scientific publications, please cite it as explained here:
# https://www.thevirtualbrain.org/tvb/zwei/neuroscience-publications
#
#

"""
.. moduleauthor:: Abhijit Deo <[email protected]>
"""

import os
import requests
import json
import pooch
from pathlib import Path
from zipfile import ZipFile
import shutil

from .base import BaseDataset
from .zenodo import Zenodo, Record, BASE_URL

class TVBZenodoDataset(BaseDataset):

CONCEPTID = "3417206"

def __init__(self, version= "2.7", extract_dir = None):
"""
Constructor for TVB_Data class
parameters
-----------
version: str
- Version number of the dataset, Default value is 2.7
"""
super().__init__(version, extract_dir)
self.cached_dir = self.extract_dir / ".cache"
self.cached_file = self.cached_dir / "tvb_cached_responses.txt"

if not self.cached_dir.is_dir():
self.cached_dir.mkdir(parents=True)

try:
self.recid = self.read_cached_response()[version]['conceptrecid']

except :
self.log.warning(f"Failed to read data from cached response.")
self.recid = Zenodo().get_versions_info(self.CONCEPTID)[version]
self.update_cached_response()


#TODO add logging errors method by catching the exact exceptions.
self.rec = Record(self.read_cached_response()[self.version])

def download(self, path=None):
"""
Downloads the dataset to `path`
"""
self.rec.download(path)

def fetch_data(self, file_name):
"""
Fetches the data
parameters:
-----------
file_name: str
- Name of the file from the downloaded zip file to fetch.
extract_dir: str
- Path where you want to extract the archive. If Path is None, dataset is extracted according to the tvb profile configuration
returns: Pathlib.Path
path of the file which was extracted
"""
# TODO: extract dir needs better description.


extract_dir = self.extract_dir
download_dir = self.cached_dir / "TVB_Data"

try:
file_path = self.rec.file_loc['tvb_data.zip']
except:
self.download(path = download_dir)
file_path = self.rec.file_loc['tvb_data.zip']

with ZipFile(file_path) as zf:
file_names_in_zip = zf.namelist()
zf.close()

file_name = file_name.strip()


file_names_in_zip = {str(Path(i).name): i for i in file_names_in_zip}
if extract_dir==None:
ZipFile(file_path).extract(file_names_in_zip[file_name])

ZipFile(file_path).extract(file_names_in_zip[file_name], path = extract_dir)


if extract_dir.is_absolute():
return extract_dir / file_names_in_zip[file_name]


return Path.cwd()/ extract_dir / file_names_in_zip[file_name]

def delete_data(self):
_dir = self.extract_dir / "tvb_data"
shutil.rmtree(_dir)


def update_cached_response(self):
"""
gets responses from zenodo server and saves them to cache file.
"""

file_dir = self.cached_file

responses = {}

url = f"{BASE_URL}records?q=conceptrecid:{self.CONCEPTID}&all_versions=true"

for hit in requests.get(url).json()['hits']['hits']:
version = hit['metadata']['version']
response = hit

responses[version] = response

Path(file_dir).touch(exist_ok=True)

with open(file_dir, "w") as fp:
json.dump(responses, fp)
fp.close()
self.log.warning("Updated the cache response file")
return

def read_cached_response(self):
"""
reads responses from the cache file.
"""

file_dir = self.cached_file


with open(file_dir) as fp:
responses = json.load(fp)

fp.close()


responses = dict(responses)
return responses


def describe(self):
return self.rec.describe()

def get_record(self):
return self.recid

def __eq__(self, other):
if isinstace(other, TVBZenodoDataset):
return self.rec == tvb_data.rec
return False

Loading

0 comments on commit ace365f

Please sign in to comment.