Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add singularity inspect metalad extractor #200

Merged
merged 16 commits into from
Mar 30, 2023
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
.*.swp
docs/build
docs/source/generated
build # manpage
5 changes: 5 additions & 0 deletions changelog.d/20230307_164111_austin_add_metalad_extractor.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### 🚀 Enhancements and New Features

- Add metalad extractor using `singularity inspect`.
asmacdo marked this conversation as resolved.
Show resolved Hide resolved
Fixes https://github.com/datalad/datalad-container/issues/198 via
https://github.com/datalad/datalad-container/pull/200 (by @asmacdo )
3 changes: 3 additions & 0 deletions datalad_container/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

from .version import __version__

# Imported to set singularity/apptainer version commands at init
import datalad_container.extractors._load_singularity_versions # noqa

# defines a datalad command suite
# this symbold must be identified as a setuptools entrypoint
# to be found by datalad
Expand Down
2 changes: 2 additions & 0 deletions datalad_container/conftest.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from datalad.conftest import setup_package

from .tests.fixtures import * # noqa: F401, F403 # lgtm [py/polluting-import]
1 change: 1 addition & 0 deletions datalad_container/extractors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

28 changes: 28 additions & 0 deletions datalad_container/extractors/_load_singularity_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""
Importing this file extends datalad.support.external_version:

Adds:
- external_versions["cmd:apptainer"]
- external_versions["cmd:singularity"]
"""
import subprocess

from datalad.cmd import (
StdOutCapture,
WitlessRunner,
)
from datalad.support.external_versions import external_versions


def __get_apptainer_version():
version = WitlessRunner().run("apptainer --version", protocol=StdOutCapture)['stdout'].strip()
return version.split("apptainer version ")[1]


def __get_singularity_version():
return WitlessRunner().run("singularity version", protocol=StdOutCapture)['stdout'].strip()


# Load external_versions and patch with "cmd:singularity" and "cmd:apptainer"
external_versions.add("cmd:apptainer", func=__get_apptainer_version)
external_versions.add("cmd:singularity", func=__get_singularity_version)
76 changes: 76 additions & 0 deletions datalad_container/extractors/metalad_container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
asmacdo marked this conversation as resolved.
Show resolved Hide resolved
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""Metadata extractors for Container Images stored in Datalad's own core storage"""
import json
import logging
import subprocess
import time
from uuid import UUID

from datalad.support.external_versions import external_versions, UnknownVersion
from datalad_metalad.extractors.base import DataOutputCategory, ExtractorResult, FileMetadataExtractor
from datalad_metalad import get_file_id

from datalad_container.utils import get_container_command


CURRENT_VERSION = "0.0.1"

lgr = logging.getLogger('datalad.metadata.extractors.metalad_container')


class MetaladContainerInspect(FileMetadataExtractor):
"""
Populates metadata singularity/apptainer version and `inspect` output.
"""

def get_data_output_category(self) -> DataOutputCategory:
return DataOutputCategory.IMMEDIATE

def is_content_required(self) -> bool:
return True

def get_id(self) -> UUID:
# Nothing special, made this up - asmacdo
return UUID('3a28cca6-b7a1-11ed-b106-fc3497650c92')

@staticmethod
def get_version() -> str:
return CURRENT_VERSION

def extract(self, _=None) -> ExtractorResult:
container_command = get_container_command()
return ExtractorResult(
extractor_version=self.get_version(),
extraction_parameter=self.parameter or {},
extraction_success=True,
datalad_result_dict={
"type": "container",
"status": "ok"
},
immediate_data={
"@id": get_file_id(dict(
path=self.file_info.path,
type=self.file_info.type)),
"type": self.file_info.type,
"path": self.file_info.intra_dataset_path,
"content_byte_size": self.file_info.byte_size,
"comment": f"SingularityInspect extractor executed at {time.time()}",
"container_system": container_command,
"container_system_version": str(external_versions[container_command]),
"container_inspect": self._container_inspect(container_command, self.file_info.path),
})

@staticmethod
def _container_inspect(command, path) -> str:
data = subprocess.run(
[command, "inspect", "--json", path],
check=True,
stdout=subprocess.PIPE).stdout.decode()
return json.loads(data)
82 changes: 82 additions & 0 deletions datalad_container/extractors/tests/test_metalad_container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os.path as op
import pytest
import subprocess
import sys
from pathlib import Path
from shutil import which
from unittest.mock import patch

from datalad.api import (
clone,
Dataset,
meta_extract,
)
from datalad.cmd import (
StdOutCapture,
WitlessRunner,
)
from datalad.support.exceptions import CommandError
from datalad.support.external_versions import external_versions, UnknownVersion
from datalad.tests.utils_pytest import (
SkipTest,
assert_in,
assert_raises,
eq_,
ok_exists,
with_tempfile,
with_tree,
)

if not external_versions["datalad_metalad"]:
raise SkipTest("skipping metalad tests")

from datalad_container.utils import get_container_command

try:
container_command = get_container_command()
except RuntimeError:
raise SkipTest("skipping singularity/apptainer tests")

# Must come after skiptest or imports will not work
from datalad_container.extractors.metalad_container import MetaladContainerInspect


@with_tempfile
asmacdo marked this conversation as resolved.
Show resolved Hide resolved
def test__container_inspect_nofile(path=None):
"""Singularity causes CalledProcessError if path DNE."""
with pytest.raises(subprocess.CalledProcessError):
result = MetaladContainerInspect._container_inspect(container_command, path)

def test__container_inspect_valid(singularity_test_image):
"""Call inspect on a valid singularity container image."""
result = MetaladContainerInspect._container_inspect(
container_command,
singularity_test_image["img_path"],
)
expected_result = {
'data': {
'attributes': {
'labels':{
'org.label-schema.build-date': 'Sat,_19_May_2018_07:06:48_+0000',
'org.label-schema.build-size': '62MB',
'org.label-schema.schema-version': '1.0',
'org.label-schema.usage.singularity.deffile': 'Singularity.testhelper',
'org.label-schema.usage.singularity.deffile.bootstrap': 'docker',
'org.label-schema.usage.singularity.deffile.from': 'debian:stable-slim', 'org.label-schema.usage.singularity.version':
'2.5.0-feature-squashbuild-secbuild-2.5.0.gddf62fb5'
}
}
},
'type': 'container'
}
assert result == expected_result

def test_extract(singularity_test_image):
yarikoptic marked this conversation as resolved.
Show resolved Hide resolved
ds = singularity_test_image["ds"]
path = singularity_test_image["img_path"]
result = meta_extract(dataset=ds, extractorname="container_inspect", path=path)
assert len(result) == 1

assert "extracted_metadata" in result[0]["metadata_record"]
yarikoptic marked this conversation as resolved.
Show resolved Hide resolved
assert result[0]["metadata_record"]["extractor_name"] == 'container_inspect'
assert result[0]["metadata_record"]["extractor_version"] == MetaladContainerInspect.get_version()
1 change: 1 addition & 0 deletions datalad_container/tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .singularity_image import singularity_test_image
29 changes: 29 additions & 0 deletions datalad_container/tests/fixtures/singularity_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import pytest
from pathlib import Path

from datalad.api import Dataset
from datalad.tests.utils_pytest import with_tempfile

from datalad_container.utils import get_container_command
from datalad_container.tests.utils import add_pyscript_image

TEST_IMG_URL = 'shub://datalad/datalad-container:testhelper'

@pytest.fixture(scope="session")
def singularity_test_image(tmp_path_factory: pytest.TempPathFactory) -> str:
fixture_file_name = "fixture.sing"
ds = Dataset(tmp_path_factory.mktemp("singularity_image"))
ds.create(force=True)
ds.containers_add(
'mycontainer',
url=TEST_IMG_URL,
image=fixture_file_name,
)
img_path = ds.pathobj / fixture_file_name
ds.get(img_path)
return {"ds": ds, "img_path": img_path}

@pytest.fixture(scope="session")
def container_command():
"""Not a very useful function other than to add session scope."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could have just made that function "lazy" via using e.g. @functools.cache but ok for now

return get_container_command()
11 changes: 11 additions & 0 deletions datalad_container/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from datalad.support.external_versions import external_versions

def get_container_command():
for command in ["apptainer", "singularity"]:
container_system_version = external_versions[f"cmd:{command}"]
if container_system_version:
return command
else:
raise RuntimeError("Did not find apptainer or singularity")


1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Documentation

changelog
acknowledgements
metadata-extraction


API Reference
Expand Down
62 changes: 62 additions & 0 deletions docs/source/metadata-extraction.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
Metadata Extraction
*******************

If `datalad-metalad`_ extension is installed, `datalad-container` can
extract metadata from singularity containers images.

(It is recommended to use a tool like `jq` if you would like to read the
output yourself.)

Singularity Inspect
-------------------

Adds metadata gathered from `singularity inspect` and the version of
`singularity` or `apptainer`.

For example:

(From the ReproNim/containers repository)

`datalad meta-extract -d . container_inspect images/bids/bids-pymvpa--1.0.2.sing | jq`

.. code-block::

{
"type": "file",
"dataset_id": "b02e63c2-62c1-11e9-82b0-52540040489c",
"dataset_version": "9ed0a39406e518f0309bb665a99b64dec719fb08",
"path": "images/bids/bids-pymvpa--1.0.2.sing",
"extractor_name": "container_inspect",
"extractor_version": "0.0.1",
"extraction_parameter": {},
"extraction_time": 1680097317.7093463,
"agent_name": "Austin Macdonald",
"agent_email": "[email protected]",
"extracted_metadata": {
"@id": "datalad:SHA1-s993116191--cc7ac6e6a31e9ac131035a88f699dfcca785b844",
"type": "file",
"path": "images/bids/bids-pymvpa--1.0.2.sing",
"content_byte_size": 0,
"comment": "SingularityInspect extractor executed at 1680097317.6012993",
"container_system": "apptainer",
"container_system_version": "1.1.6-1.fc37",
"container_inspect": {
"data": {
"attributes": {
"labels": {
"org.label-schema.build-date": "Thu,_19_Dec_2019_14:58:41_+0000",
"org.label-schema.build-size": "2442MB",
"org.label-schema.schema-version": "1.0",
"org.label-schema.usage.singularity.deffile": "Singularity.bids-pymvpa--1.0.2",
"org.label-schema.usage.singularity.deffile.bootstrap": "docker",
"org.label-schema.usage.singularity.deffile.from": "bids/pymvpa:v1.0.2",
"org.label-schema.usage.singularity.version": "2.5.2-feature-squashbuild-secbuild-2.5.6e68f9725"
}
}
},
"type": "container"
}
}
}

.. _datalad-metalad: http://docs.datalad.org/projects/metalad/en/latest/
1 change: 1 addition & 0 deletions requirements-devel.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
# requirements for a development environment
-e .[devel]
datalad-metalad
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ datalad.extensions =
# valid datalad interface specification (see demo in this extensions)
container = datalad_container:command_suite

datalad.metadata.extractors =
container_inspect = datalad_container.extractors.metalad_container:MetaladContainerInspect

[versioneer]
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
Expand Down