From 3a1c637672b332c78b23fd18287e51a3932a1c3c Mon Sep 17 00:00:00 2001 From: "Jakub Janaszewski (DOC)" Date: Fri, 26 Apr 2024 11:02:34 +0200 Subject: [PATCH 1/5] Versioning support --- eark_validator/cli/app.py | 11 + .../profiles/{ => V2.0.4}/E-ARK-CSIP.xml | 0 .../profiles/{ => V2.0.4}/E-ARK-DIP.xml | 0 .../profiles/{ => V2.0.4}/E-ARK-SIP.xml | 0 .../resources/profiles/V2.1.0/E-ARK-CSIP.xml | 1729 +++++++++++++++++ .../resources/profiles/V2.1.0/E-ARK-DIP.xml | 408 ++++ .../resources/profiles/V2.1.0/E-ARK-SIP.xml | 832 ++++++++ .../{ => V2.0.4}/CSIP/mets_amdSec_rules.xml | 0 .../CSIP/mets_behaviorSec_rules.xml | 0 .../{ => V2.0.4}/CSIP/mets_dmdSec_rules.xml | 0 .../{ => V2.0.4}/CSIP/mets_fileSec_rules.xml | 0 .../{ => V2.0.4}/CSIP/mets_metsHdr_rules.xml | 0 .../CSIP/mets_metsRootElement_rules.xml | 0 .../CSIP/mets_structLink_rules.xml | 0 .../CSIP/mets_structMap_rules.xml | 0 .../{ => V2.0.4}/DIP/mets_amdSec_rules.xml | 0 .../DIP/mets_behaviorSec_rules.xml | 0 .../{ => V2.0.4}/DIP/mets_dmdSec_rules.xml | 0 .../{ => V2.0.4}/DIP/mets_fileSec_rules.xml | 0 .../{ => V2.0.4}/DIP/mets_metsHdr_rules.xml | 0 .../DIP/mets_metsRootElement_rules.xml | 0 .../DIP/mets_structLink_rules.xml | 0 .../{ => V2.0.4}/DIP/mets_structMap_rules.xml | 0 .../{ => V2.0.4}/SIP/mets_amdSec_rules.xml | 0 .../SIP/mets_behaviorSec_rules.xml | 0 .../{ => V2.0.4}/SIP/mets_dmdSec_rules.xml | 0 .../{ => V2.0.4}/SIP/mets_fileSec_rules.xml | 0 .../{ => V2.0.4}/SIP/mets_metsHdr_rules.xml | 0 .../SIP/mets_metsRootElement_rules.xml | 0 .../SIP/mets_structLink_rules.xml | 0 .../{ => V2.0.4}/SIP/mets_structMap_rules.xml | 0 .../V2.1.0/CSIP/mets_amdSec_rules.xml | 44 + .../V2.1.0/CSIP/mets_behaviorSec_rules.xml | 7 + .../V2.1.0/CSIP/mets_dmdSec_rules.xml | 25 + .../V2.1.0/CSIP/mets_fileSec_rules.xml | 39 + .../V2.1.0/CSIP/mets_metsHdr_rules.xml | 21 + .../CSIP/mets_metsRootElement_rules.xml | 23 + .../V2.1.0/CSIP/mets_structLink_rules.xml | 7 + .../V2.1.0/CSIP/mets_structMap_rules.xml | 61 + .../V2.1.0/DIP/mets_amdSec_rules.xml | 8 + .../V2.1.0/DIP/mets_behaviorSec_rules.xml | 7 + .../V2.1.0/DIP/mets_dmdSec_rules.xml | 8 + .../V2.1.0/DIP/mets_fileSec_rules.xml | 9 + .../V2.1.0/DIP/mets_metsHdr_rules.xml | 7 + .../V2.1.0/DIP/mets_metsRootElement_rules.xml | 8 + .../V2.1.0/DIP/mets_structLink_rules.xml | 7 + .../V2.1.0/DIP/mets_structMap_rules.xml | 7 + .../V2.1.0/SIP/mets_amdSec_rules.xml | 8 + .../V2.1.0/SIP/mets_behaviorSec_rules.xml | 7 + .../V2.1.0/SIP/mets_dmdSec_rules.xml | 8 + .../V2.1.0/SIP/mets_fileSec_rules.xml | 15 + .../V2.1.0/SIP/mets_metsHdr_rules.xml | 31 + .../V2.1.0/SIP/mets_metsRootElement_rules.xml | 12 + .../V2.1.0/SIP/mets_structLink_rules.xml | 7 + .../V2.1.0/SIP/mets_structMap_rules.xml | 7 + eark_validator/ipxml/schematron.py | 4 +- eark_validator/packages.py | 89 +- eark_validator/rules.py | 21 +- .../specifications/specification.py | 65 +- 59 files changed, 3454 insertions(+), 88 deletions(-) rename eark_validator/ipxml/resources/profiles/{ => V2.0.4}/E-ARK-CSIP.xml (100%) rename eark_validator/ipxml/resources/profiles/{ => V2.0.4}/E-ARK-DIP.xml (100%) rename eark_validator/ipxml/resources/profiles/{ => V2.0.4}/E-ARK-SIP.xml (100%) create mode 100644 eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-CSIP.xml create mode 100644 eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-DIP.xml create mode 100644 eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-SIP.xml rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/CSIP/mets_amdSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/CSIP/mets_behaviorSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/CSIP/mets_dmdSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/CSIP/mets_fileSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/CSIP/mets_metsHdr_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/CSIP/mets_metsRootElement_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/CSIP/mets_structLink_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/CSIP/mets_structMap_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/DIP/mets_amdSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/DIP/mets_behaviorSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/DIP/mets_dmdSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/DIP/mets_fileSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/DIP/mets_metsHdr_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/DIP/mets_metsRootElement_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/DIP/mets_structLink_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/DIP/mets_structMap_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/SIP/mets_amdSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/SIP/mets_behaviorSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/SIP/mets_dmdSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/SIP/mets_fileSec_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/SIP/mets_metsHdr_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/SIP/mets_metsRootElement_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/SIP/mets_structLink_rules.xml (100%) rename eark_validator/ipxml/resources/schematron/{ => V2.0.4}/SIP/mets_structMap_rules.xml (100%) create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_behaviorSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsHdr_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structLink_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_amdSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_behaviorSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_fileSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structLink_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structMap_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_amdSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_behaviorSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_dmdSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_fileSec_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsHdr_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsRootElement_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structLink_rules.xml create mode 100644 eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structMap_rules.xml diff --git a/eark_validator/cli/app.py b/eark_validator/cli/app.py index c51142e..10cca79 100644 --- a/eark_validator/cli/app.py +++ b/eark_validator/cli/app.py @@ -37,6 +37,7 @@ from eark_validator.model import ValidationReport import eark_validator.packages as PACKAGES from eark_validator.infopacks.package_handler import PackageHandler +from eark_validator.specifications.specification import SpecificationVersion __version__ = importlib.metadata.version('eark_validator') @@ -81,6 +82,13 @@ def parse_command_line(): dest='outputVerboseFlag', default=False, help='Verbose reporting for selected output options.') + PARSER.add_argument('-s', '--specification_version', + nargs='?', + dest='specification_version', + default=SpecificationVersion.V2_1_0, + type=SpecificationVersion, + choices=list(SpecificationVersion), + help='Specification version used for validation. Default is %(default)s.') PARSER.add_argument('--version', action='version', version=__version__) @@ -101,6 +109,9 @@ def main(): # Get input from command line args = parse_command_line() # If no target files or folders specified then print usage and exit + print('Version: ' + args.specification_version) + print(type(args.specification_version)) + if not args.files: PARSER.print_help() diff --git a/eark_validator/ipxml/resources/profiles/E-ARK-CSIP.xml b/eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-CSIP.xml similarity index 100% rename from eark_validator/ipxml/resources/profiles/E-ARK-CSIP.xml rename to eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-CSIP.xml diff --git a/eark_validator/ipxml/resources/profiles/E-ARK-DIP.xml b/eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-DIP.xml similarity index 100% rename from eark_validator/ipxml/resources/profiles/E-ARK-DIP.xml rename to eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-DIP.xml diff --git a/eark_validator/ipxml/resources/profiles/E-ARK-SIP.xml b/eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-SIP.xml similarity index 100% rename from eark_validator/ipxml/resources/profiles/E-ARK-SIP.xml rename to eark_validator/ipxml/resources/profiles/V2.0.4/E-ARK-SIP.xml diff --git a/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-CSIP.xml b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-CSIP.xml new file mode 100644 index 0000000..5cdf14a --- /dev/null +++ b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-CSIP.xml @@ -0,0 +1,1729 @@ + + + + + https://earkcsip.dilcis.eu/profile/E-ARK-CSIP.xml + E-ARK CSIP METS Profile + This base profile describes the Common Specification for Information Packages (CSIP) and the implementation of METS for packaging OAIS conformant Information Packages. The profile is accompanied with a text document explaning the details of use of this profile. + This will enable repository interoperability and assist in the management of the preservation of digital content. + This profile is a base profile which is extended with E-ARK implementation of SIP, AIP and DIP. + The profile can be used as is, but it is recommended that the supplied extending implementation are used. Alternatively, an own extension fulfilling the extending needs of the implementer can be created. + 2021-10-01T09:00:00 + + DILCIS Board +
http://dilcis.eu/
+ info@dilcis.eu +
+ This profile has no related profiles + + + Principles for a package conforming to the Common Specification for Information Packages (CSIP) +

CSIP Principles

+
+
+ + E-ARK CSIP METS Extension + http://earkcsip.dilcis.eu/schema/DILCISExtensionMETS.xsd + XML-schema for the attributes added by CSIP + +

An extension schema with the added attributes for use in this profile.

+

The schema is identified using the namespace prefix csip.

+
+
+ + PREMIS + http://www.loc.gov/standards/premis/ + Used for preservation metadata + +

A rule set for use with this profile is under development.

+
+
+ +

The filepath must be decoded consistently throughout all file references within the information package.

+
+ + + Content information type specification + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyContentInformationType.xml + Values for `@csip:CONTENTINFORMATIONTYPE` + +

Lists the names of specific E-ARK content information type specifications supported or maintained in this METS profile.

+
+
+ + Content Category + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyContentCategory.xml + Values for `mets/@type` + +

Declares the categorical classification of package content.

+
+
+ + OAIS Package type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyOAISPackageType.xml + Values for `@csip:OAISPACKAGETYPE` + +

Describes the OAIS type the package belongs to in the OAIS reference model.

+
+
+ + Note type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyNoteType.xml + Values for `@csip:NOTETYPE` + +

Provides values for the type of a note for an agent.

+
+
+ + Other agent type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyAgentOtherType.xml + Values for `metsHdr/agent/@OTHERTYPE` + +

Describes the other agent types supported by the profile.

+
+
+ + Identifier type + Library of Congress + http://id.loc.gov/vocabulary/identifiers.html + Values for `metsHdr/altRecordID/@TYPE` + +

Describes the type of the identifier.

+
+
+ + dmdSec status + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyStatus.xml + Values for `dmdSec/@STATUS` + +

Describes the status of the descriptive metadata section (dmdSec) which is supported by the profile.

+
+
+ + IANA media types + IANAs + https://www.iana.org/assignments/media-types/media-types.xhtml + Values for `@MIMETYPE` + +

Valid values for the mime types of referenced files.

+
+
+ + File group names + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyFileGrpAndStructMapDivisionLabel.xml + Values for `fileGrp/@USE` + +

Describes the uses of the file group `<fileGrp>` that are supported by the profile.

+

Own names should be placed in an own extending vocabulary.

+
+
+ + Structural map typing + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyStructMapType.xml + Values for `structMap/@TYPE` + +

Describes the type of the structural map `<structMap>` that is supported by the profile.

+

Own types should be placed in an own extending vocabulary.

+
+
+ + Structural map label + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyStructMapLabel.xml + Values for `structMap/@LABEL` + +

Describes the label of the structural map that is supported by the profile.

+

Own labels should be placed in an own extending vocabulary.

+
+
+
+ + + + + Package Identifier +

The `mets/@OBJID` attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder.

+

For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder.

+
+
METS XPath
mets/@OBJID
+
Cardinality
1..1
+
+
+
+ + + Content Category +

The `mets/@TYPE` attribute MUST be used to declare the category of the content held in the package, e.g. "Datasets", "Websites", "Mixes" , "Other", etc.. Legal values are defined in a fixed vocabulary. When the content category used falls outside of the defined vocabulary the `mets/@TYPE` value must be set to "OTHER" and the specific value declared in `mets/@csip:OTHERTYPE`. The vocabulary will develop under the curation of the DILCIS Board as additional content information type specifications are produced.

+
+
METS XPath
mets/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Other Content Category +

When the `mets/@TYPE` attribute has the value "OTHER" the `mets/@csip:OTHERTYPE` attribute MUST be used to declare the content category of the package/representation. The value can either be "OTHER" or any other string that are not present in the vocabulary used in the `mets/@TYPE` attribute.

+
+
METS XPath
mets[@TYPE='OTHER']/@csip:OTHERTYPE
+
Cardinality
0..1
+
+
+
+ + + Content Information Type Specification +

Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents. The vocabulary will evolve under the care of the DILCIS Board as additional Content Information Type Specifications are developed.

+
+
METS XPath
mets/@csip:CONTENTINFORMATIONTYPE
+
Cardinality
0..1
+
+
+
+ + + Other Content Information Type Specification +

When the `mets/@csip:CONTENTINFORMATIONTYPE` has the value "OTHER" the `mets/@csip:OTHERCONTENTINFORMATIONTYPE` must state the content information type.

+
+
METS XPath
mets[@csip:CONTENTINFORMATIONTYPE='OTHER']/@csip:OTHERCONTENTINFORMATIONTYPE
+
Cardinality
0..1
+
+
+
+ + + METS Profile +

The URL of the METS profile that the information package conforms with.

+
+
METS XPath
mets/@PROFILE
+
Cardinality
1..1
+
+
+
+
+ + + + Package header +

General element for describing the package.

+
+
METS XPath
mets/metsHdr
+
Cardinality
1..1
+
+
+
+ + + Package creation datetime +

`mets/metsHdr/@CREATEDATE` records the date and time the package was created.

+
+
METS XPath
mets/metsHdr/@CREATEDATE
+
Cardinality
1..1
+
+
+
+ + + Package last modification datetime +

`mets/metsHdr/@LASTMODDATE` records the data and time the package was modified and is mandatory when the package has been modified.

+
+
METS XPath
mets/metsHdr/@LASTMODDATE
+
Cardinality
0..1
+
+
+
+ + + OAIS Package type information +

`mets/metsHdr/@csip:OAISPACKAGETYPE` is an additional CSIP attribute that declares the type of the IP.

+
+
METS XPath
mets/metsHdr/@csip:OAISPACKAGETYPE
+
Cardinality
1..1
+
+
+
+ + + Agent +

A mandatory agent element records the software used to create the package. Other uses of agents may be described in any local implementations that extend the profile.

+
+
METS XPath
mets/metsHdr/agent
+
Cardinality
1..n
+
+
+
+ + + Agent role +

The mandatory agent element MUST have a `@ROLE` attribute with the value “CREATOR”.

+
+
METS XPath
mets/metsHdr/agent[@ROLE='CREATOR']
+
Cardinality
1..1
+
+
+
+ + + Agent type +

The mandatory agent element MUST have a `@TYPE` attribute with the value “OTHER”.

+
+
METS XPath
mets/metsHdr/agent[@TYPE='OTHER']
+
Cardinality
1..1
+
+
+
+ + + Agent other type +

The mandatory agent element MUST have a `@OTHERTYPE` attribute with the value “SOFTWARE”.

+
+
METS XPath
mets/metsHdr/agent[@OTHERTYPE='SOFTWARE']
+
Cardinality
1..1
+
+
+
+ + + Agent name +

The mandatory agent's name element records the name of the software tool used to create the IP.

+
+
METS XPath
mets/metsHdr/agent/name
+
Cardinality
1..1
+
+
+
+ + + Agent additional information +

The mandatory agent's note element records the version of the tool used to create the IP.

+
+
METS XPath
mets/metsHdr/agent/note
+
Cardinality
1..1
+
+
+
+ + + Classification of the agent additional information +

The mandatory agent element's note child has a `@csip:NOTETYPE` attribute with a fixed value of "SOFTWARE VERSION".

+
+
METS XPath
mets/metsHdr/agent/note[@csip:NOTETYPE='SOFTWARE VERSION']
+
Cardinality
1..1
+
+
+
+
+ + + + Descriptive metadata +

Must be used if descriptive metadata for the package content is available. Each descriptive metadata section (`<dmdSec>`) contains a single description and must be repeated for multiple descriptions, when available.

+

It is possible to transfer metadata in a package using just the descriptive metadata section and/or administrative metadata section.

+
+
METS XPath
mets/dmdSec
+
Cardinality
0..n
+
+
+
+ + + Descriptive metadata identifier +

An `xml:id` identifier for the descriptive metadata section (`<dmdSec>`) used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/dmdSec/@ID
+
Cardinality
1..1
+
+
+
+ + + Descriptive metadata creation datetime +

Creation date and time of the descriptive metadata in this section.

+
+
METS XPath
mets/dmdSec/@CREATED
+
Cardinality
1..1
+
+
+
+ + + Status of the descriptive metadata +

Indicates the status of the package using a fixed vocabulary.

+
+
METS XPath
mets/dmdSec/@STATUS
+
Cardinality
0..1
+
+
+
+ + + Reference to the document with the descriptive metadata +

Reference to the descriptive metadata file located in the “metadata” section of the IP.

+
+
METS XPath
mets/dmdSec/mdRef
+
Cardinality
0..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/dmdSec/mdRef[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+ + + Type of link +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/dmdSec/mdRef[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. This specification recommends recording a URL type filepath in this attribute.

+
+
METS XPath
mets/dmdSec/mdRef/@xlink:href
+
Cardinality
1..1
+
+
+
+ + + Type of metadata +

Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS.

+
+
METS XPath
mets/dmdSec/mdRef/@MDTYPE
+
Cardinality
1..1
+
+
+
+ + + File mime type +

The IANA mime type of the referenced file.

+
+
METS XPath
mets/dmdSec/mdRef/@MIMETYPE
+
Cardinality
1..1
+
+
+
+ + + File size +

Size of the referenced file in bytes.

+
+
METS XPath
mets/dmdSec/mdRef/@SIZE
+
Cardinality
1..1
+
+
+
+ + + File creation datetime +

The creation date and time of the referenced file.

+
+
METS XPath
mets/dmdSec/mdRef/@CREATED
+
Cardinality
1..1
+
+
+
+ + + File checksum +

The checksum of the referenced file.

+
+
METS XPath
mets/dmdSec/mdRef/@CHECKSUM
+
Cardinality
1..1
+
+
+
+ + + File checksum type +

A value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file.

+
+
METS XPath
mets/dmdSec/mdRef/@CHECKSUMTYPE
+
Cardinality
1..1
+
+
+
+
+ + + + Administrative metadata +

If administrative / preservation metadata is available, it must be described using the administrative metadata section (`<amdSec>`) element.

+

All administrative metadata is present in a single `<amdSec>` element.

+

It is possible to transfer metadata in a package using just the descriptive metadata section and/or administrative metadata section.

+
+
METS XPath
mets/amdSec
+
Cardinality
0..1
+
+
+
+ + + Digital provenance metadata +

For recording information about preservation the standard PREMIS is used. It is mandatory to include one `<digiprovMD>` element for each piece of PREMIS metadata.

+

The use if PREMIS in METS is following the recommendations in the 2017 version of PREMIS in METS Guidelines.

+
+
METS XPath
mets/amdSec/digiprovMD
+
Cardinality
0..n
+
+
+
+ + + Digital provenance metadata identifier +

An `xml:id` identifier for the digital provenance metadata section `mets/amdSec/digiprovMD` used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/amdSec/digiprovMD/@ID
+
Cardinality
1..1
+
+
+
+ + + Status of the digital provenance metadata +

Indicates the status of the package using a fixed vocabulary.

+
+
METS XPath
mets/amdSec/digiprovMD/@STATUS
+
Cardinality
0..1
+
+
+
+ + + Reference to the document with the digital provenance metadata +

Reference to the digital provenance metadata file stored in the “metadata” section of the IP.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef
+
Cardinality
0..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+ + + Type of link +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. This specification recommends recording a URL type filepath within this attribute.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@xlink:href
+
Cardinality
1..1
+
+
+
+ + + Type of metadata +

Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@MDTYPE
+
Cardinality
1..1
+
+
+
+ + + File mime type +

The IANA mime type for the referenced file.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@MIMETYPE
+
Cardinality
1..1
+
+
+
+ + + File size +

Size of the referenced file in bytes.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@SIZE
+
Cardinality
1..1
+
+
+
+ + + File creation datetime +

Creation date and time of the referenced file.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@CREATED
+
Cardinality
1..1
+
+
+
+ + + File checksum +

The checksum of the referenced file.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@CHECKSUM
+
Cardinality
1..1
+
+
+
+ + + File checksum type +

A value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file.

+
+
METS XPath
mets/amdSec/digiprovMD/mdRef/@CHECKSUMTYPE
+
Cardinality
1..1
+
+
+
+ + + Rights metadata +

A simple rights statement may be used to describe general permissions for the package. Individual representations should state their specific rights in their representation METS file.

+

Available standards include RightsStatements.org, Europeana rights statements info, METS Rights Schema created and maintained by the METS Board, the rights part of PREMIS as well as own local rights statements in use.

+
+
METS XPath
mets/amdSec/rightsMD
+
Cardinality
0..n
+
+
+
+ + + Rights metadata identifier +

An `xml:id` identifier for the rights metadata section (`<rightsMD>`) used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/amdSec/rightsMD/@ID
+
Cardinality
1..1
+
+
+
+ + + Status of the rights metadata +

Indicates the status of the package using a fixed vocabulary.

+
+
METS XPath
mets/amdSec/rightsMD/@STATUS
+
Cardinality
0..1
+
+
+
+ + + Reference to the document with the rights metadata +

Reference to the rights metadata file stored in the “metadata” section of the IP.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef
+
Cardinality
0..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+ + + Type of locator +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. We recommend recording a URL type filepath within this attribute.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@xlink:href
+
Cardinality
1..1
+
+
+
+ + + Type of metadata +

Specifies the type of metadata in the referenced file. Value is taken from the list provided by the METS.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@MDTYPE
+
Cardinality
1..1
+
+
+
+ + + File mime type +

The IANA mime type for the referenced file.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@MIMETYPE
+
Cardinality
1..1
+
+
+
+ + + File size +

Size of the referenced file in bytes.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@SIZE
+
Cardinality
1..1
+
+
+
+ + + File creation datetime +

Creation date and time of the referenced file.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@CREATED
+
Cardinality
1..1
+
+
+
+ + + File checksum +

The checksum of the referenced file.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@CHECKSUM
+
Cardinality
1..1
+
+
+
+ + + File checksum type +

A value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file.

+
+
METS XPath
mets/amdSec/rightsMD/mdRef/@CHECKSUMTYPE
+
Cardinality
1..1
+
+
+
+
+ + + + File section +

References to all transferred content SHOULD be placed in the file section in the different file group elements, described in other requirements.

+

Only a single file section (`<fileSec>`) element should be present.

+

In the case that a package only contains metadata updates, i.e. exclusively metadata files, then no file references need to be added to this section.

+
+
METS XPath
mets/fileSec
+
Cardinality
0..1
+
+
+
+ + + File section identifier +

An `xml:id` identifier for the file section used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/fileSec/@ID
+
Cardinality
1..1
+
+
+
+ + + Documentation file group +

All documentation pertaining to the transferred content is placed in one or more file group elements with `mets/fileSec/fileGrp/@USE` attribute value "Documentation".

+
+
METS XPath
mets/fileSec/fileGrp[@USE='Documentation']
+
Cardinality
1..n
+
+
+
+ + + Schema file group +

All XML schemas used in the information package must be referenced from one or more file groups with `mets/fileSec/fileGrp/@USE` attribute value "Schemas".

+
+
METS XPath
mets/fileSec/fileGrp[@USE='Schemas']
+
Cardinality
1..n
+
+
+
+ + + Representations file group +

A pointer to the METS document describing the representation or pointers to the content being transferred must be present in one or more file groups with `mets/fileSec/fileGrp/@USE` attribute value starting with "Representations" followed by the path to the folder where the representation level METS document is placed. For example "Representation/submission" and "Representation/ingest".

+
+
METS XPath
mets/fileSec/fileGrp[@USE=[starts-with('Representations')]]
+
Cardinality
1..n
+
+
+
+ + + Reference to administrative metadata +

If administrative metadata has been provided at file group `mets/fileSec/fileGrp` level this attribute refers to its administrative metadata section by ID.

+
+
METS XPath
mets/fileSec/fileGrp/@ADMID
+
Cardinality
0..1
+
+
+
+ + + Content Information Type Specification +

An added attribute which states the name of the content information type specification used to create the package.

+

The vocabulary will evolve under the curation of the DILCIS Board as additional content information type specifications are developed.

+

When the element "Content Information Type Specification" (CSIP4) has the value "MIXED" or the file group describes a representation, then this element states the content information type specification used for the file group.

+

When the element "Representations file group" (CSIP114), the file group describes a representation with the `mets/fileSec/fileGrp/@USE` attribute value is starting with "Representations", then this element must state the content information type specification used for the representation.

+
+
METS XPath
mets/@csip:CONTENTINFORMATIONTYPE="MIXED"|mets/fileSec/fileGrp[@USE=[starts-with('Representations')]]/@csip:CONTENTINFORMATIONTYPE
+
Cardinality
0..1
+
+
+
+ + + Other Content Information Type Specification +

When the `mets/fileSec/fileGrp/@csip:CONTENTINFORMATIONTYPE` attribute has the value "OTHER" the `mets/fileSec/fileGrp/@csip:OTHERCONTENTINFORMATIONTYPE` must state a value for the Content Information Type Specification used.

+
+
METS XPath
mets/fileSec/fileGrp[@csip:CONTENTINFORMATIONTYPE='OTHER']/@csip:OTHERCONTENTINFORMATIONTYPE
+
Cardinality
0..1
+
+
+
+ + + Description of the use of the file group +

The value in the `mets/fileSec/fileGrp/@USE` is the name of the whole folder structure to the data, e.g "Documentation", "Schemas", "Representations/preingest" or "Representations/submission/data".

+
+
METS XPath
mets/fileSec/fileGrp/@USE
+
Cardinality
1..1
+
+
+
+ + + File group identifier +

An `xml:id` identifier for the file group used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/fileSec/fileGrp/@ID
+
Cardinality
1..1
+
+
+
+ + + File +

The file group (`<fileGrp>`) contains the file elements which describe the file objects.

+
+
METS XPath
mets/fileSec/fileGrp/file
+
Cardinality
1..n
+
+
+
+ + + File identifier +

A unique `xml:id` identifier for this file across the package.

+
+
METS XPath
mets/fileSec/fileGrp/file/@ID
+
Cardinality
1..1
+
+
+
+ + + File mimetype +

The IANA mime type for the referenced file.

+
+
METS XPath
mets/fileSec/fileGrp/file/@MIMETYPE
+
Cardinality
1..1
+
+
+
+ + + File size +

Size of the referenced file in bytes.

+
+
METS XPath
mets/fileSec/fileGrp/file/@SIZE
+
Cardinality
1..1
+
+
+
+ + + File creation datetime +

Creation date and time of the referenced file.

+
+
METS XPath
mets/fileSec/fileGrp/file/@CREATED
+
Cardinality
1..1
+
+
+
+ + + File checksum +

The checksum of the referenced file.

+
+
METS XPath
mets/fileSec/fileGrp/file/@CHECKSUM
+
Cardinality
1..1
+
+
+
+ + + File checksum type +

A value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file.

+
+
METS XPath
mets/fileSec/fileGrp/file/@CHECKSUMTYPE
+
Cardinality
1..1
+
+
+
+ + + File original identification +

If an identifier for the file was supplied by the owner it can be recorded in this attribute.

+
+
METS XPath
mets/fileSec/fileGrp/file/@OWNERID
+
Cardinality
0..1
+
+
+
+ + + File reference to administrative metadata +

If administrative metadata has been provided for the file this attribute refers to the file's administrative metadata by ID.

+
+
METS XPath
mets/fileSec/fileGrp/file/@ADMID
+
Cardinality
0..1
+
+
+
+ + + File reference to descriptive metadata +

If descriptive metadata has been provided per file this attribute refers to the file's descriptive metadata by ID.

+
+
METS XPath
mets/fileSec/fileGrp/file/@DMDID
+
Cardinality
0..1
+
+
+
+ + + File locator reference +

The location of each external file must be defined by the file location `<FLocat>` element using the same rules as references for metadata files. All references to files should be made using the XLink href attribute and the file protocol using the relative location of the file.

+
+
METS XPath
mets/fileSec/fileGrp/file/FLocat
+
Cardinality
1..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/fileSec/fileGrp/file/FLocat[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+ + + Type of link +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/fileSec/fileGrp/file/FLocat[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. We recommend recording a URL type filepath within this attribute.

+
+
METS XPath
mets/fileSec/fileGrp/file/FLocat/@xlink:href
+
Cardinality
1..1
+
+
+
+
+ + + + Structural description of the package +

The structural map `<structMap>` element is the only mandatory element in the METS.

+

The `<structMap>` in the CSIP describes the highest logical structure of the IP.

+

Every CSIP compliant METS file must include ONE structural map `<structMap>` element used exactly as described in this section of requirements.

+

Institutions can add their own additional custom structural maps as separate `<structMap>` sections following their own requirements.

+
+
METS XPath
mets/structMap
+
Cardinality
1..n
+
+
+
+ + + Type of structural description +

The `mets/structMap/@TYPE` attribute must take the value “PHYSICAL” from the vocabulary.

+
+
METS XPath
mets/structMap[@TYPE='PHYSICAL']
+
Cardinality
1..1
+
+
+
+ + + Name of the structural description +

The `mets/structMap/@LABEL` attribute value is set to “CSIP” from the vocabulary.

+

This requirement identifies the CSIP compliant structural map `<structMap>` element.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']
+
Cardinality
1..1
+
+
+
+ + + Structural description identifier +

An `xml:id` identifier for the structural description (structMap) used for internal package references. It must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/@ID
+
Cardinality
1..1
+
+
+
+ + + Main structural division +

The structural map comprises a single division.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div
+
Cardinality
1..1
+
+
+
+ + + Main structural division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/@ID
+
Cardinality
1..1
+
+
+
+ + + Metadata division +

The metadata referenced in the administrative and/or descriptive metadata section is described in the structural map with one sub division.

+

When the transfer consists of only administrative and/or descriptive metadata this is the only sub division that occurs.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']
+
Cardinality
1..1
+
+
+
+ + + Metadata division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']/@ID
+
Cardinality
1..1
+
+
+
+ + + Metadata division label +

The metadata division `<div>` element's `@LABEL` attribute value must be "Metadata".

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']
+
Cardinality
1..1
+
+
+
+ + + Metadata division references administrative metadata +

The admimistrative metadata division should reference all current administrative metadata sections.

+

All `<amdSec>`s with `@STATUS='CURRENT'` SHOULD be referenced by their identifier, @ID.

+

The current `<amdSec>` @IDs are recorded in the `div[@LABEL='Metadata']/@ADMID` attribute in a space delimited list.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']/@ADMID
+
Cardinality
0..1
+
+
+
+ + + Metadata division references descriptive metadata +

The descriptive metadata division should reference all current descriptive metadata sections.

+

All `<dmdSec>`s with `@STATUS='CURRENT'` SHOULD be referenced by their identifier, @ID.

+

The current `<dmdSec>` @IDs are recorded in the `div[@LABEL='Metadata']/@DMDID` attribute in a space delimited list.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Metadata']/@DMDID
+
Cardinality
0..1
+
+
+
+ + + Documentation division +

The documentation referenced in the file section file groups is described in the structural map with one sub division.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']
+
Cardinality
0..1
+
+
+
+ + + Documentation division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']/@ID
+
Cardinality
1..1
+
+
+
+ + + Documentation division label +

The documentation division `<div>` element in the package uses the value "Documentation" from the vocabulary as the value for the `@LABEL` attribute.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']
+
Cardinality
1..1
+
+
+
+ + + Documentation file references +

All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per `<fptr>` element.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']/fptr
+
Cardinality
0..n
+
+
+
+ + + Documentation file group reference pointer +

A reference, by ID, to the "Documentation" file group.

+

Related to the requirements CSIP60 which describes the "Documentation" file group and CSIP65 which describes the file group identifier.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Documentation']/fptr/@FILEID
+
Cardinality
1..1
+
+
+
+ + + Schema division +

The schemas referenced in the file section file groups are described in the structural map within a single sub-division.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']
+
Cardinality
0..1
+
+
+
+ + + Schema division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']/@ID
+
Cardinality
1..1
+
+
+
+ + + Schema division label +

The schema division `<div>` element's `@LABEL` attribute has the value "Schemas" from the vocabulary.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']
+
Cardinality
1..1
+
+
+
+ + + Schema file reference +

All file groups containing schemas described in the package are referenced via the relevant file group identifiers. One file group reference per fptr-element

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']/fptr
+
Cardinality
0..n
+
+
+
+ + + Schema file group reference +

The pointer to the identifier for the "Schema" file group.

+

Related to the requirements CSIP113 which describes the "Schema" file group and CSIP65 which describes the file group identifier.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Schemas']/fptr/@FILEID
+
Cardinality
1..1
+
+
+
+ + + Content division +

When no representations are present the content referenced in the file section file group with `@USE` attribute value "Representations" is described in the structural map as a single sub division.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']
+
Cardinality
0..1
+
+
+
+ + + Content division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']/@ID
+
Cardinality
1..1
+
+
+
+ + + Content division label +

The package's content division `<div>` element must have the `@LABEL` attribute value "Representations", taken from the vocabulary.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']
+
Cardinality
1..1
+
+
+
+ + + Content division file references +

All file groups containing content described in the package are referenced via the relevant file group identifiers. One file group reference per fptr-element.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']/fptr
+
Cardinality
0..n
+
+
+
+ + + Content division file group references +

The pointer to the identifier for the "Representations" file group.

+

Related to the requirements CSIP114 which describes the "Representations" file group and CSIP65 which describes the file group identifier.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div[@LABEL='Representations']/fptr/@FILEID
+
Cardinality
1..1
+
+
+
+ + + Representation division +

When a package consists of multiple representations, each described by a representation level METS.xml document, there should be a discrete representation div element for each representation.

+

Each representation div references the representation level METS.xml document, documenting the structure of the package and its constituent representations.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div
+
Cardinality
0..n
+
+
+
+ + + Representations division identifier +

Mandatory, `xml:id` identifier must be unique within the package.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div/@ID
+
Cardinality
1..1
+
+
+
+ + + Representations division label +

The package's representation division `<div>` element `@LABEL` attribute value must be the path to the representation level METS document starting with the value "Representations" followed by the main folder name for example "Representations/submission" and "Representations/ingest".

+

This requirement gives the same value to be used as the requirement named "Description of the use of the file group" (CSIP64)

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div/@LABEL
+
Cardinality
1..1
+
+
+
+ + + Representations division file references +

The file group containing the files described in the package are referenced via the relevant file group identifier.

+

Related to the requirements CSIP114 which describes the "Representations" file group and CSIP65 which describes the file group identifier.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div/mptr/@xlink:title
+
Cardinality
1..1
+
+
+
+ + + Representation METS pointer +

The division `<div>` of the specific representation includes one occurrence of the METS pointer `<mptr>` element, pointing to the appropriate representation METS file.

+
+
METS XPath
mets/structMap[@LABEL='CSIP']/div/div/mptr
+
Cardinality
1..1
+
+
+
+ + + Resource location +

The actual location of the resource. We recommend recording a URL type filepath within this attribute.

+
+
METS XPath
mets/structMap/div/div/mptr/@xlink:href
+
Cardinality
1..1
+
+
+
+ + + Type of link +

Attribute used with the value “simple”. Value list is maintained by the xlink standard.

+
+
METS XPath
mets/structMap/div/div/mptr[@xlink:type='simple']
+
Cardinality
1..1
+
+
+
+ + + Type of locator +

The locator type is always used with the value "URL" from the vocabulary in the attribute.

+
+
METS XPath
mets/structMap/div/div/mptr[@LOCTYPE='URL']
+
Cardinality
1..1
+
+
+
+
+ + + + structLink +

Section not defined or used in CSIP, additional own uses may occur.

+

Information regarding the structural links is found in the METS Primer

+
+
+
+ + + + behaviorSec +

Section not defined or used in CSIP, additional own uses may occur.

+

Information regarding the behaviour section is found in the METS Primer

+
+
+
+
+ + + + +

Requriments not stated in CSIP

+
+
+
+ + + +

Requriments not stated in CSIP

+
+
+
+ + + +

Requriments not stated in CSIP

+
+
+
+
+ + ESSArch (ETP, ETA, EPP, ECORE) + https://github.com/ESSolutions + +

A suite of tools for e-archiving and digital preservation. The tools provide functionality for producers to archive digital information, for archives to preserve digital information and for consumers to access archived information.

+
+ +

ES Solutions - www.essolutions.se

+
+
+ + RODA + http://github.com/keeps/roda + +

RODA is a digital repository solution that delivers functionality for all the main units of the OAIS reference model. RODA is capable of ingesting, managing and providing access to the various types of digital objects produced by large corporations or public bodies. RODA is based on open-source technologies and is supported by existing standards such as the Open Archival Information System (OAIS), Metadata Encoding and Transmission Standard (METS), Encoded Archival Description (EAD), Dublin Core (DC) and PREMIS (Preservation Metadata).

+
+ +

RODA is licensed under LGPLv3 for all source-code including interoperability libraries like SIP manipulation libraries.

+
+
+ + RODA-in + https://rodain.roda-community.org + +

RODA-in is a tool specially designed for producers and archivists to create Submission Information Packages (SIP) ready to be submitted to an Open Archival Information System (OAIS). The tool creates SIPs from files and folders available on the local file system.

+

+ RODA-in supports several Submission Information Package formats such as BagIt, E-ARK SIP and the Hungarian SIP format. +

+
+ +

RODA-in is licensed under LGPLv3.

+
+
+ + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-DIP.xml b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-DIP.xml new file mode 100644 index 0000000..40a411f --- /dev/null +++ b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-DIP.xml @@ -0,0 +1,408 @@ + + + + + https://earkdip.dilcis.eu/profile/E-ARK-DIP.xml + E-ARK DIP METS Profile + This is the extension of the E-ARK CSIP profile for creation of a E-ARK DIP. The profile describes the Dissemination Information Package (DIP) specification and the implementation of METS for packaging OAIS conformant Information Packages. The profile is accompanied with a textuall document explaning the details of use of this profile. + This will enable repository interoperability and assist in the management of the preservation of digital content. + 2021-10-15T09:00:00 + + DILCIS Board +
http://dilcis.eu/
+ info@dilcis.eu +
+ E-ARK CSIP METS Profile 2.1 + + + E-ARK DIP profile +

This profile together with the E-ARK SIP document describes an DIP conforming to the E-ARK SIP.

+ Principles for a package conforming to the Common Specification for Information Packages (CSIP) +

CSIP Principles

+
+
+ + E-ARK SIP METS Extension + http://earksip.dilcis.eu/schema/DILCISExtensionSIPMETS.xsd + XML-schema for the attributes added by SIP and reused in the DIP + +

An extension schema with the added attributes for use in this profile.

+

The schema is used with a namespace prefix of sip.

+
+
+ +

The filepath must be decoded consistently throughout all file references within the information package.

+
+ + + OAIS Package type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyOAISPackageType.xml + Values for `@csip:OAISPACKAGETYPE` + +

Describes the OAIS type the package belongs to in the OAIS reference model.

+
+
+ + dmdSec status + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyStatus.xml + Values for `dmdSec/@STATUS` + +

Describes the status of the descriptive metadata section (dmdSec) which is supported by the profile.

+
+
+
+ + + + + Package Identifier +

Note that the value of the `mets/@OBJID attribute` for the DIP is expected to be different from the SIP and AIP to reflect the creation of a new package.

+
+
METS XPath
mets/@OBJID
+
Cardinality
1..1
+
+
+
+ + + METS Profile +

The value is set to "https://earkdip.dilcis.eu/profile/E-ARK-DIP.xml".

+
+
METS XPath
mets/@PROFILE
+
Cardinality
1..1
+
+
+
+
+ + + + OAIS Package type information +

The in CSIP added attribute `@csip:OAISPACKAGETYPE` is used with the value "DIP".

+
+
METS XPath
metsHdr[@csip:OAISPACKAGETYPE=`DIP`]
+
Cardinality
1..1
+
+
+
+
+ + + + Status of the descriptive metadata +

Indicates the status of the package using a fixed vocabulary. The status SHOULD in a DIP be set to "CURRENT".

+
+
METS XPath
dmdSec/@STATUS
+
Cardinality
0..1
+
+
+
+
+ + + + Administrative metadata +

The DIP <amdSec> element should comply with amdSec requirements in the CSIP profile.

+
+
+
+ + + + File section +

The DIP fileSec element should comply with fileSec requirements in the CSIP profile.

+
+
+
+ + + + Structural description of the package +

The DIP structMap element should comply with structMap requirements in the CSIP profile.

+
+
+
+ + + + structLink +

Section not defined or used in CSIP or DIP, additional own uses may occur.

+

Information regarding the structural links is found in the METS Primer

+
+
+ + + + behaviorSec +

Section not defined or used in CSIP or DIP, additional own uses may occur.

+

Information regarding the behavior section is found in the METS Primer

+
+
+
+
+ + + + +

Requirements not stated in CSIP or DIP

+
+
+
+ + + +

Requirements not stated in CSIP or DIP

+
+
+
+ + + +

Requirements not stated in CSIP or DIP

+
+
+
+
+ + ESSArch (ETP, ETA, EPP, ECORE) + https://github.com/ESSolutions + +

A suite of tools for e-archiving and digital preservation. The tools provide functionality for producers to archive digital information, for archives to preserve digital information and for consumers to access archived information.

+
+ +

ES Solutions - www.essolutions.se

+
+
+ + RODA + http://github.com/keeps/roda + +

RODA is a digital repository solution that delivers functionality for all the main units of the OAIS reference model. RODA is capable of ingesting, managing and providing access to the various types of digital objects produced by large corporations or public bodies. RODA is based on open-source technologies and is supported by existing standards such as the Open Archival Information System (OAIS), Metadata Encoding and Transmission Standard (METS), Encoded Archival Description (EAD), Dublin Core (DC) and PREMIS (Preservation Metadata).

+
+ +

RODA is licensed under LGPLv3 for all source-code including interoperability libraries like SIP manipulation libraries.

+
+
+ + RODA-in + https://rodain.roda-community.org + +

RODA-in is a tool specially designed for producers and archivists to create Submission Information Packages (SIP) ready to be submitted to an Open Archival Information System (OAIS). The tool creates SIPs from files and folders available on the local file system.

+

+ RODA-in supports several Submission Information Package formats such as BagIt, E-ARK SIP and the Hungarian SIP format. +

+
+ +

RODA-in is licensed under LGPLv3.

+
+
+ + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-SIP.xml b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-SIP.xml new file mode 100644 index 0000000..e8e3b2d --- /dev/null +++ b/eark_validator/ipxml/resources/profiles/V2.1.0/E-ARK-SIP.xml @@ -0,0 +1,832 @@ + + + + + https://earksip.dilcis.eu/profile/E-ARK-SIP.xml + E-ARK SIP METS Profile 2.1 + This is the extension of the E-ARK CS IP profile for creation of a E-ARK SIP. + 2021-10-15T09:00:00 + + DILCIS Board +
http://dilcis.eu/
+ info@dilcis.eu +
+ E-ARK CSIP METS Profile 2.1 + + + E-ARK SIP profile +

This profile together with the E-ARK SIP document describes an SIP conforming to the E-ARK SIP.

+
+
+ + E-ARK SIP METS Extension + https://earksip.dilcis.eu/schema/DILCISExtensionSIPMETS.xsd + XML-schema for the attributes added by SIP + +

An extension schema with the added attributes for use in this profile.

+

The schema is used with a namespace prefix of sip.

+
+
+ + E-ARK CSIP METS Extension + http://earkcsip.dilcis.eu/schema/DILCISExtensionMETS.xsd + XML-schema for the attributes added by CSIP + +

An extension schema with the added attributes for use in this profile.

+

The schema is identified using the namespace prefix csip.

+
+
+ +

The filepath must be decoded consistently throughout all file references within the information package.

+
+ + + Package status + DILCIS Board + http://earksip.dilcis.eu/schema/SIPVocabularyRecordStatus.xml + Used in `@RECORDSTATUS` + +

Describes the status of the package.

+
+
+ + Alternative record ID's + DILCIS Board + http://earksip.dilcis.eu/schema/SIPVocabularyRecordIDType.xml + Used in `altrecordID/@TYPE` + +

Describes the type of the alternative record ID.

+
+
+ + Note type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyNoteType.xml + Used in `@csip:NOTETYPE` + +

Describes the type of a note for an agent.

+
+
+ + OAIS Package type + DILCIS Board + http://earkcsip.dilcis.eu/schema/CSIPVocabularyOAISPackageType.xml + Used in `@csip:OAISPACKAGETYPE` + +

Describes the OAIS type the package belongs to in the OAIS reference model.

+
+
+
+ + + + + Package name +

An optional short text describing the contents of the package, e.g. "Accounting records of 2017".

+
+
METS XPath
mets/@LABEL
+
Cardinality
0..1
+
+
+
+ + + METS Profile +

The value is set to "https://earksip.dilcis.eu/profile/E-ARK-SIP.xml".

+
+
METS XPath
mets/@PROFILE
+
Cardinality
1..1
+
+
+
+
+ + + + Package status +

A way of indicating the status of the package and to instruct the OAIS on how to properly handle the package. If not set, the expected behaviour is equal to NEW.

+
+
METS XPath
metsHdr/@RECORDSTATUS
+
Cardinality
0..1
+
+
+
+ + + OAIS Package type information +

`@csip:OAISPACKAGETYPE` is used with the value "SIP".

+
+
METS XPath
metsHdr/@csip:OAISPACKAGETYPE
+
Cardinality
1..1
+
+
+
+ + + Submission agreement +

A reference to the Submission Agreement associated with the package.

+

`@TYPE` is used with the value "SUBMISSIONAGREEMENT".

+

Example: RA 13-2011/5329; 2012-04-12

+

Example: http://submissionagreement.kb.se/dnr331-1144-2011/20120711/

+

Note: It is recommended to use a machine-readable format for a better description of a submission agreement.

+

For example, the submission agreement developed by Docuteam GmbH http://www.loc.gov/standards/mets/profiles/00000041.xml

+
+
METS XPath
metsHdr/altRecordID
+
Cardinality
0..1
+
+
+
+ + + Previous Submission agreement +

An optional reference to a previous submission agreement(s) which the information may have belonged to.

+

`@TYPE` is used with the value "PREVIOUSSUBMISSIONAGREEMENT".

+

Example: FM 12-2387/12726, 2007-09-19

+

Example: http://submissionagreement.kb.se/dnr331-1144-2011/20120711/

+

Note: It is recommended to use a machine-readable format for a better description of a submission agreement.

+

For example, the submission agreement developed by Docuteam GmbH http://www.loc.gov/standards/mets/profiles/00000041.xml

+
+
METS XPath
metsHdr/altRecordID
+
Cardinality
0..*
+
+
+
+ + + Archival reference code +

An optional reference code indicating where in the archival hierarchy the package shall be placed in the OAIS.

+

`@TYPE` is used with the value "REFERENCECODE".

+

Example: FM 12-2387/12726, 2007-09-19

+
+
METS XPath
metsHdr/altRecordID
+
Cardinality
0..1
+
+
+
+ + + Previous archival reference code +

In cases where the SIP originates from other institutions maintaining a reference code structure, this element can be used to record these reference codes and therefore support the provenance of the package when a whole archival description is not submitted with the submission.

+

`@TYPE` is used with the value "PREVIOUSREFERENCECODE".

+

Example: SE/FM/123/123.1/123.1.3

+
+
METS XPath
metsHdr/altRecordID
+
Cardinality
0..*
+
+
+
+ + + Archival creator agent +

A wrapper element that enables to encode the name of the organisation or person that originally created the data being transferred. Please note that this might be different from the organisation which has been charged with preparing and sending the SIP to the archives.

+
+
METS XPath
metsHdr/agent
+
Cardinality
0..1
+
+
+
+ + + Archival creator agent role +

The role of the person(s) or institution(s) responsible for the document/collection.

+
+
METS XPath
metsHdr/agent/@ROLE
+
Cardinality
1..1
+
+
+
+ + + Archival creator agent type +

The type of the archival creator agent is "ORGANIZATION" or "INDIVIDUAL".

+
+
METS XPath
metsHdr/agent/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Archival creator agent name +

The name of the organisation(s) that originally created the data being transferred.

+

Please note that this might be different from the organisation which has been charged with preparing and sending the SIP to the archives.

+
+
METS XPath
metsHdr/agent/name
+
Cardinality
0..*
+
+
+
+ + + Archival creator agent additional information +

The archival creator agent has a note providing a unique identification code for the archival creator.

+
+
METS XPath
metsHdr/agent/note
+
Cardinality
0..1
+
+
+
+ + + Classification of the archival creator agent additional information +

The archival creator agent note is typed with the value of "IDENTIFICATIONCODE".

+
+
METS XPath
metsHdr/agent/note/@csip:NOTETYPE
+
Cardinality
1..1
+
+
+
+ + + Submitting agent +

The name of the organisation or person submitting the package to the archive.

+
+
METS XPath
metsHdr/agent
+
Cardinality
1..1
+
+
+
+ + + Submitting agent role +

The role of the person(s) or institution(s) responsible for creating and/or submitting the package.

+
+
METS XPath
metsHdr/agent/@ROLE
+
Cardinality
1..1
+
+
+
+ + + Submitting agent type +

The type of the submitting agent is "ORGANIZATION" or "INDIVIDUAL".

+
+
METS XPath
metsHdr/agent/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Submitting agent name +

Name of the organisation submitting the package to the archive.

+
+
METS XPath
metsHdr/agent/name
+
Cardinality
1..1
+
+
+
+ + + Submitting agent additional information +

The submitting agent has a note providing a unique identification code for the archival creator.

+
+
METS XPath
metsHdr/agent/note
+
Cardinality
0..1
+
+
+
+ + + Classification of the submitting agent additional information +

The submitting agent note is typed with the value of "IDENTIFICATIONCODE".

+
+
METS XPath
metsHdr/agent/note/@csip:NOTETYPE
+
Cardinality
1..1
+
+
+
+ + + Contact person agent +

Contact person for the submission.

+
+
METS XPath
metsHdr/agent
+
Cardinality
0..*
+
+
+
+ + + Contact person agent role +

The role of the contact person is "CREATOR".

+
+
METS XPath
metsHdr/agent/@ROLE
+
Cardinality
1..1
+
+
+
+ + + Contact person agent type +

The type of the contact person agent is "INDIVIDUAL".

+
+
METS XPath
metsHdr/agent/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Contact person agent name +

Name of the contact person.

+
+
METS XPath
metsHdr/agent/name
+
Cardinality
1..1
+
+
+
+ + + Contact person agent additional information +

The contact person agent has one or more notes giving the contact information.

+
+
METS XPath
metsHdr/agent/note
+
Cardinality
0..*
+
+
+
+ + + Preservation agent +

The organisation or person that preserves the package.

+
+
METS XPath
metsHdr/agent
+
Cardinality
0..1
+
+
+
+ + + Preservation agent role +

The role of the preservation agent is "PRESERVATION".

+
+
METS XPath
metsHdr/agent/@ROLE
+
Cardinality
1..1
+
+
+
+ + + Preservation agent type +

The type of the submitting agent is "ORGANIZATION".

+
+
METS XPath
metsHdr/agent/@TYPE
+
Cardinality
1..1
+
+
+
+ + + Preservation agent name +

Name of the organisation preserving the package.

+
+
METS XPath
metsHdr/agent/name
+
Cardinality
1..1
+
+
+
+ + + Preservation agent additional information +

The preservation agent has a note providing a unique identification code for the archival creator.

+
+
METS XPath
metsHdr/agent/note
+
Cardinality
0..1
+
+
+
+ + + Classification of the preservation agent additional information +

The preservation agent note is typed with the value of "IDENTIFICATIONCODE".

+
+
METS XPath
metsHdr/agent/note/@csip:NOTETYPE
+
Cardinality
1..1
+
+
+
+
+ + + + Descriptive metadata +

The SIP dmdSec element should comply with dmdSec requirements in the CSIP profile.

+
+
+
+ + + + Administrative metadata +

The SIP amdSec element should comply with amdSec requirements in the CSIP profile.

+
+
+
+ + + + File format name +

An optional attribute may be used if the MIMETYPE is not sufficient for the purposes of processing the information package.

+

Example: "Extensible Markup Language"

+

Example: "PDF/A"

+

Example: "ISO/IEC 26300:2006"

+
+
METS XPath
fileSec/fileGrp/file/@sip:FILEFORMATNAME
+
Cardinality
0..1
+
+
+
+ + + File format version +

The version of the file format when the use of PREMIS has not been agreed upon in the submission agreement.

+

Example: "1.0"

+
+
METS XPath
fileSec/fileGrp/file/@sip:FILEFORMATVERSION
+
Cardinality
0..1
+
+
+
+ + + File format registry +

The name of the format registry used to identify the file format when the use of PREMIS has not been agreed upon in the submission agreement.

+

Example: "PRONOM"

+
+
METS XPath
fileSec/fileGrp/file/@sip:FILEFORMATREGISTRY
+
Cardinality
0..1
+
+
+
+ + + File format registry key +

Key of the file format in the registry when use of PREMIS has not been agreed upon in the submission agreement.

+

Example: "fmt/101"

+
+
METS XPath
fileSec/fileGrp/file/@sip:FILEFORMATKEY
+
Cardinality
0..1
+
+
+
+
+ + + + Structural description of the package +

The SIP structMap element should comply with structMap requirements in the CSIP profile.

+
+
+
+ + + + structLink +

Section not defined or used in CSIP, additional own uses may occur.

+

Information regarding the structural links is found in the METS Primer

+
+
+
+ + + + behaviorSec +

Section not defined or used in CSIP, additional own uses may occur.

+

Information regarding the behavior section is found in the METS Primer

+
+
+
+
+ + + + +

Requirements not stated in CSIP or SIP

+
+
+
+ + + +

Requirements not stated in CSIP or SIP

+
+
+
+ + + +

Requirements not stated in CSIP or SIP

+
+
+
+
+ + ESSArch (ETP, ETA, EPP, ECORE) + https://github.com/ESSolutions + +

A suite of tools for e-archiving and digital preservation. The tools provide functionality for producers to archive digital information, for archives to preserve digital information and for consumers to access archived information.

+
+ +

ES Solutions - www.essolutions.se

+
+
+ + RODA + http://github.com/keeps/roda + +

RODA is a digital repository solution that delivers functionality for all the main units of the OAIS reference model. RODA is capable of ingesting, managing and providing access to the various types of digital objects produced by large corporations or public bodies. RODA is based on open-source technologies and is supported by existing standards such as the Open Archival Information System (OAIS), Metadata Encoding and Transmission Standard (METS), Encoded Archival Description (EAD), Dublin Core (DC) and PREMIS (Preservation Metadata).

+
+ +

RODA is licensed under LGPLv3 for all source-code including interoperability libraries like SIP manipulation libraries.

+
+
+ + RODA-in + https://rodain.roda-community.org + +

RODA-in is a tool specially designed for producers and archivists to create Submission Information Packages (SIP) ready to be submitted to an Open Archival Information System (OAIS). The tool creates SIPs from files and folders available on the local file system.

+

+ RODA-in supports several Submission Information Package formats such as BagIt, E-ARK SIP and the Hungarian SIP format. +

+
+ +

RODA-in is licensed under LGPLv3.

+
+
+ + + + + + + + The Swedish health agency + VAT:SE201345098701 + + + The agency, Personnel + VAT:SE2098109810-AF87 + + + Sven Svensson + Phone: 08-123456, Email: sven.svensson@mail.mail + + + The archives + ID:1234567 + + http://submissionagreement.kb.se/dnr331-1144-2011/20120711/ + FM 12-2387/12726, 2007-09-19 + SE/RA/123456/24/P + SE/FM/123/123.1/123.1.3 + + + + + The Swedish health agency + VAT:SE201345098701 + + + + + The agency, Personnel + VAT:SE2098109810-AF87 + + + + + Sven Svensson + Phone: 08-123456, Email: sven.svensson@mail.mail + + + + + The archives + ID:1234567 + + + + + + + + + + + + RODA-in + 2.1.0-beta.7 + + + The Swedish health agency + VAT:SE201345098701 + + + The agency, Personnel + VAT:SE2098109810-AF87 + + + Sven Svensson + Phone: 08-123456, Email: sven.svensson@mail.mail + + + The archives + ID:1234567 + + + http://submissionagreement.kb.se/dnr331-1144-2011/20120711/ + + + FM 12-2387/12726, 2007-09-19 + + + SE/RA/123456/24/P + + + SE/FM/123/123.1/123.1.3 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_amdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_amdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_amdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_behaviorSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_behaviorSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_behaviorSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_dmdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_dmdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_dmdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_fileSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_fileSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_fileSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsHdr_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_metsHdr_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsHdr_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsRootElement_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_metsRootElement_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_metsRootElement_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structLink_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_structLink_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structLink_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/CSIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structMap_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/CSIP/mets_structMap_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/CSIP/mets_structMap_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_amdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_amdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_amdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_behaviorSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_behaviorSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_behaviorSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_dmdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_dmdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_dmdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_fileSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_fileSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_fileSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsHdr_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_metsHdr_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsHdr_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsRootElement_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_metsRootElement_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_metsRootElement_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_structLink_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_structLink_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_structLink_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/DIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_structMap_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/DIP/mets_structMap_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/DIP/mets_structMap_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_amdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_amdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_amdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_behaviorSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_behaviorSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_behaviorSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_dmdSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_dmdSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_dmdSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_fileSec_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_fileSec_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_fileSec_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_metsHdr_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_metsHdr_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_metsHdr_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_metsRootElement_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_metsRootElement_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_metsRootElement_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_structLink_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_structLink_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_structLink_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/SIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_structMap_rules.xml similarity index 100% rename from eark_validator/ipxml/resources/schematron/SIP/mets_structMap_rules.xml rename to eark_validator/ipxml/resources/schematron/V2.0.4/SIP/mets_structMap_rules.xml diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml new file mode 100644 index 0000000..16cf669 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml @@ -0,0 +1,44 @@ + + + + + + Use of the METS administrative metadata section. + + Sould be used to record information about preservation the standard PREMIS is used. + A simple rights statement may be used to describe general permissions for the package. Individual representations should state their specific rights in their representation METS file. + + + Mandatory, unique id for the digital provenance. + Should be used to indicate the status of the package. + Should provide a reference to the digital provenance metadata file stored in the “metadata” section of the IP. + + + Mandatory, locator type is always used with the value “URL” from the vocabulary in the attribute. + Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard. + MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute. + MUST record the type of metadata at the referenced location. + MUST record the MIME type of the referenced file. + MUST record the size in bytes of the referenced file. + MUST record the date the referenced file was created. + MUST record the checksum of the referenced file. + MUST record the checksum type of the referenced file. + + + Mandatory, unique id for the rights metadata. + Should be used to indicate the status of the package. + Should provide a reference to the digital provenance metadata file stored in the “metadata” section of the IP. + + + Mandatory, locator type is always used with the value “URL” from the vocabulary in the attribute. + Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard. + MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute. + MUST record the type of metadata at the referenced location. + MUST record the MIME type of the referenced file. + MUST record the size in bytes of the referenced file. + MUST record the date the referenced file was created. + MUST record the checksum of the referenced file. + MUST record the checksum type of the referenced file. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_behaviorSec_rules.xml new file mode 100644 index 0000000..4816461 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_behaviorSec_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml new file mode 100644 index 0000000..2e97653 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml @@ -0,0 +1,25 @@ + + + + + + Use of the METS descriptive metadata section. + + Mandatory, identifier must be unique within the package. + Mandatory, creation date of the descriptive metadata in this section. + SHOULD be used to indicated the status of the package. + SHOULD provide a reference to the descriptive metadata file located in the “metadata” section of the IP.. + + + The locator type is always used with the value “URL” from the vocabulary in the attribute. + Attribute used with the value “simple”. Value list is maintained by the xlink standard. + The actual location of the resource. This specification recommends recording a URL type filepath in this attribute. + Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS. + MUST hold the IANA mime type of the referenced file. + MUST hold the size of the referenced file in bytes. + MUST hold the creation date of the referenced file. + MUST hold the checksum of the referenced file. + MUST hold the algorithm type of checksum of the referenced file. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml new file mode 100644 index 0000000..7d37037 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml @@ -0,0 +1,39 @@ + + + + + + Use of the METS file section. + + An xml:id identifier for the file section used for internal package references. + All documentation pertaining to the transferred content is placed in one or more file group elements with mets/fileSec/fileGrp/@USE attribute value “Documentation”. + All XML schemas used in the information package should be referenced from one or more file groups with mets/fileSec/fileGrp/@USE attribute value “Schemas”. + A pointer to the METS document describing the representation or pointers to the content being transferred must be present in one or more file groups with mets/fileSec/fileGrp/@USE attribute value “Representations”. + + + ADMID attribute used. + This attribute is mandatory when the mets/fileSec/fileGrp/@USE attribute value is “Representations”. + This attribute is mandatory when the mets/fileSec/fileGrp/@USE attribute value is “Representations”. + This attribute is mandatory. + This attribute is mandatory. An xml:id identifier for the file group used for internal package references. It must be unique within the package. + The file group contains the file elements which describe the file objects. + + + This attribute is mandatory. An xml:id identifier for the file group used for internal package references. It must be unique within the package. + MUST record the MIME type of the referenced file. + MUST record the size in bytes of the referenced file. + MUST record the date the referenced file was created. + MUST record the checksum of the referenced file. + MUST record the checksum type of the referenced file. + A file element has an OWNERID attribute. + A file element has an ADMID attribute. + A file element has an DMDID attribute. + The location of each external file must be defined by the file location FLocat element. + + + Mandatory, locator type is always used with the value “URL” from the vocabulary in the attribute. + Attribute MUST be used with the value “simple”. Value list is maintained by the xlink standard. + MUST record the actual location of the resource. This specification recommends recording a URL type filepath within this attribute. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsHdr_rules.xml new file mode 100644 index 0000000..d0a5534 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsHdr_rules.xml @@ -0,0 +1,21 @@ + + + + + Use of the METS header + + The metsHdr element MUST have a CREATEDATE attribute. + The metsHdr element SHOULD have a LASTMODDATE attribute. + The metsHdr element MUST have a csip:OAISPACKAGETYPE attribute. + The metsHdr element MUST contain an agent element that records the software used to create the package. + The agent element MUST have a ROLE attribute with the value "CREATOR". + + + The agent element MUST have a TYPE attribute with the value "OTHER". + The agent element MUST have a OTHERTYPE attribute with the value "SOFTWARE". + The agent element MUST have a child name element that records the name of the software tool used to create the IP. + The agent element MUST have a child note element that records the version of the tool used to create the IP. + The mandatory agent element’s note child has a @csip:NOTETYPE attribute with a fixed value of “SOFTWARE VERSION”. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml new file mode 100644 index 0000000..42502f8 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml @@ -0,0 +1,23 @@ + + + + + Validate METS root element. + + The mets root element is mandatory. + + + The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder. + The mets/@TYPE attibute MUST be used to declare the category of the content held in the package, e.g. book, journal, stereograph, video, etc.. Legal values are defined in a fixed vocabulary. + When the content category used falls outside of the defined vocabulary the mets/@TYPE value must be set to “OTHER” and the specific value declared in mets/@csip:OTHERTYPE. The vocabulary will develop under the curation of the DILCIS Board as additional content information type specifications are produced. + Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents. + When the mets/@csip:CONTENTINFORMATIONTYPE has the value “OTHER” the mets/@csip:OTHERCONTENTINFORMATIONTYPE must state the content information type. + The PROFILE attribute MUST contain the URL of the METS profile that the information package conforms with. + Must be used if descriptive metadata about the package content is available. NOTE: According to official METS documentation each metadata section must describe one and only one set of metadata. As such, if implementers want to include multiple occurrences of descriptive metadata into the package this must be done by repeating the whole dmdSec element for each individual metadata. + If administrative / preservation metadata is available, it must be described using the administrative metadata section (amdSec) element. All administrative metadata is present in a single amdSec element. + The transferred content is placed in the file section in different file group elements, described in other requirements. + Each METS file must include ONE structural map structMap element used exactly as described here. + There MUST be a general element that describes the package. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structLink_rules.xml new file mode 100644 index 0000000..eae2368 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structLink_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml new file mode 100644 index 0000000..80a3e8d --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml @@ -0,0 +1,61 @@ + + + + + + Use of the METS structural map. + + The mets/structMap/@TYPE attribute must take the value “PHYSICAL” from the vocabulary. + The mets/structMap/@LABEL attribute value is set to “CSIP” from the vocabulary. + + + An xml:id identifier for the structural description (structMap) used for internal package references. It must be unique within the package. + The structural map MUST comprises a single division. + The package’s top-level structural division div element’s @LABEL attribute value must be identical to the package identifier, i.e. the same value as the mets/@OBJID attribute. + + + An xml:id identifier must be unique within the package. + The metadata referenced in the administrative and/or descriptive metadata section is described in the structural map with one sub division. + The documentation referenced in the file section file groups is described in the structural map with one sub division. + The schemas referenced in the file section file groups are described in the structural map within a single sub-division. + When no representations are present the content referenced in the file section file group with @USE attribute value “Representations” is described in the structural map as a single sub division. + + + An xml:id identifier must be unique within the package. + When there is administrative metadata and the amdSec is present, all administrative metadata MUST be referenced via the administrative sections different identifiers. + When there are descriptive metadata and one or more dmdSec is present, all descriptive metadata MUST be referenced via the descriptive section identifiers. + + + An xml:id identifier must be unique within the package. + All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. + + + A reference, by ID, to the “Documentation” file group. + + + An xml:id identifier must be unique within the package. + All file groups containing schemas described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. + + + The pointer to the identifier for the “Schema” file group. + + + An xml:id identifier must be unique within the package. + All file groups containing documentation described in the package are referenced via the relevant file group identifiers. There MUST be one file group reference per fptr element. + + + The pointer to the identifier for the Representations file group. + + + Mandatory, xml:id identifier must be unique within the package. + The package’s representation division div element @LABEL attribute value must be the path to the representation level METS document. + The division div of the specific representation includes one occurrence of the METS pointer mptr element, pointing to the appropriate representation METS file. + + + The file group containing the files described in the package are referenced via the relevant file group identifier. + MUST point to the actual location of the resource. We recommend recording a URL type filepath within this attribute. + Attribute used with the value “simple”. Value list is maintained by the xlink standard. + The locator type is always used with the value “URL” from the vocabulary in the attribute. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_amdSec_rules.xml new file mode 100644 index 0000000..4aaf161 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_amdSec_rules.xml @@ -0,0 +1,8 @@ + + + + + + Use of the METS administrative metadata section. + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_behaviorSec_rules.xml new file mode 100644 index 0000000..4816461 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_behaviorSec_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml new file mode 100644 index 0000000..e58cf52 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml @@ -0,0 +1,8 @@ + + + + + + Use of the METS descriptive metadata section. + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_fileSec_rules.xml new file mode 100644 index 0000000..9d92416 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_fileSec_rules.xml @@ -0,0 +1,9 @@ + + + + + + + Use of the METS file section. + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml new file mode 100644 index 0000000..43c3715 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml @@ -0,0 +1,7 @@ + + + + + Use of the METS header + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml new file mode 100644 index 0000000..c4c7fc4 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml @@ -0,0 +1,8 @@ + + + + + + Validate METS root element. + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structLink_rules.xml new file mode 100644 index 0000000..eae2368 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structLink_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structMap_rules.xml new file mode 100644 index 0000000..6c44f94 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_structMap_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_amdSec_rules.xml new file mode 100644 index 0000000..4aaf161 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_amdSec_rules.xml @@ -0,0 +1,8 @@ + + + + + + Use of the METS administrative metadata section. + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_behaviorSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_behaviorSec_rules.xml new file mode 100644 index 0000000..4816461 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_behaviorSec_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_dmdSec_rules.xml new file mode 100644 index 0000000..e58cf52 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_dmdSec_rules.xml @@ -0,0 +1,8 @@ + + + + + + Use of the METS descriptive metadata section. + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_fileSec_rules.xml new file mode 100644 index 0000000..bbef1c4 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_fileSec_rules.xml @@ -0,0 +1,15 @@ + + + + + + + Use of the METS file section. + + A file element has an FILEFORMATNAME attribute. + A file element has an FILEFORMATVERSION attribute. + A file element has an FILEFORMATREGISTRY attribute. + A file element has an FILEFORMATKEY attribute. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsHdr_rules.xml new file mode 100644 index 0000000..531404d --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsHdr_rules.xml @@ -0,0 +1,31 @@ + + + + + Use of the METS header + + Optional @metsHdr:RECORDSTATUS attribute used to indicate package status. + The metsHdr element MUST have a csip:OAISPACKAGETYPE attribute. + + + Optional altRecordID element identifying submission agreement. + Optional altRecordID element identifying previous submission agreement. + Optional altRecordID element identifying reference code. + Optional altRecordID element identifying previous reference code. + + + Optional METS agent element found. + + + The agent element MUST have a role attribute. + + + The agent element MUST have a role attribute. + Optional METS name element used. + Optional METS note element used. + + + The creator agent element MUST have a NOTETYPE attribute of value IDENTIFICATIONCODE. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsRootElement_rules.xml new file mode 100644 index 0000000..90c6744 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_metsRootElement_rules.xml @@ -0,0 +1,12 @@ + + + + + + Validate METS root element. + + Optional LABEL attribute used as short text package name. + The PROFILE attribute MUST contain the URL of the METS profile, for a SIP: https://earksip.dilcis.eu/profile/E-ARK-SIP.xml. + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structLink_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structLink_rules.xml new file mode 100644 index 0000000..eae2368 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structLink_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structMap_rules.xml new file mode 100644 index 0000000..6c44f94 --- /dev/null +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/SIP/mets_structMap_rules.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/eark_validator/ipxml/schematron.py b/eark_validator/ipxml/schematron.py index a7f32c7..9039d0c 100644 --- a/eark_validator/ipxml/schematron.py +++ b/eark_validator/ipxml/schematron.py @@ -84,5 +84,5 @@ def validate(self, to_validate: str) -> ET.Element: self.schematron.validate(xml_file) return self.schematron.validation_report -def get_schematron_path(spec_id: str, section: str) -> str: - return str(files(SCHEMATRON).joinpath(spec_id).joinpath(f'mets_{section}_rules.xml')) +def get_schematron_path(version: str, spec_id: str, section: str) -> str: + return str(files(SCHEMATRON).joinpath(version).joinpath(spec_id).joinpath(f'mets_{section}_rules.xml')) diff --git a/eark_validator/packages.py b/eark_validator/packages.py index ae0c224..c8c403f 100644 --- a/eark_validator/packages.py +++ b/eark_validator/packages.py @@ -36,54 +36,19 @@ from eark_validator.model import ValidationReport from eark_validator.model.package_details import InformationPackage from eark_validator.model.validation_report import MetatdataResults -from eark_validator.specifications.specification import EarkSpecifications +from eark_validator.specifications.specification import SpecificationType, SpecificationVersion METS: str = 'METS.xml' -def validate(to_validate: Path) -> ValidationReport: - """Returns the validation report that results from validating the path - to_validate as a folder. The method does not validate archive files.""" - is_struct_valid, struct_results = structure.validate(to_validate) - if not is_struct_valid: - return ValidationReport.model_validate({'structure': struct_results}) - validator = MetsValidator(str(to_validate)) - validator.validate_mets(METS) - if not validator.is_valid: - metadata: MetatdataResults = MetatdataResults.model_validate({ - 'schema_results': validator.validation_errors - }) - return ValidationReport.model_validate({ - 'structure': struct_results, - 'metadata': metadata - }) - - csip_profile = SC.ValidationProfile.from_specification(EarkSpecifications.CSIP.specification) - csip_profile.validate(to_validate.joinpath(METS)) - results = csip_profile.get_all_results() - - package: InformationPackage = InformationPackages.from_path(to_validate) - if package.package.oaispackagetype in ['SIP', 'DIP']: - profile = SC.ValidationProfile.from_specification(package.package.oaispackagetype) - profile.validate(to_validate.joinpath(METS)) - results.extend(profile.get_all_results()) - - metadata: MetatdataResults = MetatdataResults.model_validate({ - 'schema_results': validator.validation_errors, - 'schematron_results': results - }) - return ValidationReport.model_validate({ - 'structure': struct_results, - 'package': package, - 'metadata': metadata - }) - class PackageValidator(): """Class for performing full package validation.""" _package_handler = PackageHandler() - def __init__(self, package_path: Path): + def __init__(self, package_path: Path, version: SpecificationVersion = SpecificationVersion.V2_1_0): self._path : Path = package_path self._name: str = os.path.basename(package_path) self._report: ValidationReport = None + self._version: SpecificationVersion = version + if os.path.isdir(package_path): # If a directory or archive get the path to process self._to_proc = self._path.absolute() @@ -97,7 +62,8 @@ def __init__(self, package_path: Path): # If not an archive we can't process self._report = _report_from_bad_path(package_path) return - self._report = validate(self._to_proc) + + self._report = self.validate(self._version, self._to_proc) @property def original_path(self) -> Path: @@ -113,6 +79,49 @@ def name(self) -> str: def validation_report(self) -> ValidationReport: """Returns the valdiation report for the package.""" return self._report + + @property + def version(self) -> SpecificationVersion: + """Returns the specifiation version used for validation.""" + return self._version + + @classmethod + def validate(self, version: SpecificationVersion, to_validate: Path) -> ValidationReport: + """Returns the validation report that results from validating the path + to_validate as a folder. The method does not validate archive files.""" + is_struct_valid, struct_results = structure.validate(to_validate) + if not is_struct_valid: + return ValidationReport.model_validate({'structure': struct_results}) + validator = MetsValidator(str(to_validate)) + validator.validate_mets(METS) + if not validator.is_valid: + metadata: MetatdataResults = MetatdataResults.model_validate({ + 'schema_results': validator.validation_errors + }) + return ValidationReport.model_validate({ + 'structure': struct_results, + 'metadata': metadata + }) + + csip_profile = SC.ValidationProfile(SpecificationType.CSIP, version) + csip_profile.validate(to_validate.joinpath(METS)) + results = csip_profile.get_all_results() + + package: InformationPackage = InformationPackages.from_path(to_validate) + if package.package.oaispackagetype in ['SIP', 'DIP']: + profile = SC.ValidationProfile(SpecificationType.from_string(package.package.oaispackagetype), version) + profile.validate(to_validate.joinpath(METS)) + results.extend(profile.get_all_results()) + + metadata: MetatdataResults = MetatdataResults.model_validate({ + 'schema_results': validator.validation_errors, + 'schematron_results': results + }) + return ValidationReport.model_validate({ + 'structure': struct_results, + 'package': package, + 'metadata': metadata + }) def _report_from_bad_path(package_path: Path) -> ValidationReport: struct_results = structure.get_bad_path_results(package_path) diff --git a/eark_validator/rules.py b/eark_validator/rules.py index 46e6cb4..18e0e59 100644 --- a/eark_validator/rules.py +++ b/eark_validator/rules.py @@ -30,13 +30,15 @@ from eark_validator.ipxml.schematron import SchematronRuleset, SVRL_NS, get_schematron_path from eark_validator.model.validation_report import Location, Result -from eark_validator.specifications.specification import EarkSpecifications, Specification +from eark_validator.specifications.specification import EarkSpecification, Specification, SpecificationType, SpecificationVersion from eark_validator.const import NO_PATH, NOT_FILE from eark_validator.model import Severity class ValidationProfile(): """ A complete set of Schematron rule sets that comprise a complete validation profile.""" - def __init__(self, specification: Specification): + def __init__(self, type: SpecificationType, version: SpecificationVersion): + specification: Specification = EarkSpecification(type, version).specification + self._rulesets: Dict[str, SchematronRuleset] = {} self._specification: Specification = specification self.is_valid: bool = False @@ -44,8 +46,7 @@ def __init__(self, specification: Specification): self.results: Dict[str, List[Result]] = {} self.messages: List[str] = [] for section in specification.sections: - self.rulesets[section] = SchematronRuleset(get_schematron_path(specification.id, - section)) + self.rulesets[section] = SchematronRuleset(get_schematron_path(version, specification.id, section)) @property def specification(self) -> Specification: @@ -98,18 +99,6 @@ def get_result(self, name: str) -> List[Result]: """Return only the results for element name.""" return self.results.get(name) - @classmethod - def from_specification(cls, specification: str | EarkSpecifications | Specification) -> 'ValidationProfile': - """Create a validation profile from a specification.""" - if isinstance(specification, str): - specification = EarkSpecifications.from_id(specification) - if isinstance(specification, EarkSpecifications): - specification = specification.specification - if not isinstance(specification, Specification): - raise ValueError('Specification must be a instance or valid specification ID.') - return cls(specification) - - class TestResults(): @staticmethod def from_element(rule: ET.Element, failed_assert: ET.Element) -> Result: diff --git a/eark_validator/specifications/specification.py b/eark_validator/specifications/specification.py index 667961d..c05a38d 100644 --- a/eark_validator/specifications/specification.py +++ b/eark_validator/specifications/specification.py @@ -49,18 +49,16 @@ def _from_xml_file(cls, xml_file: str) -> Specification: if not os.path.isfile(xml_file): raise ValueError(NOT_FILE.format(xml_file)) tree = ET.parse(xml_file, parser=cls._parser()) - return cls._from_xml(tree) + return cls._from_xml(tree) @classmethod def _parser(cls) -> ET.XMLParser: """Create a parser for the specification.""" - parser = ET.XMLParser(schema=METS_PROF_SCHEMA, resolve_entities=False, no_network=True) - return parser + return ET.XMLParser(schema=METS_PROF_SCHEMA, resolve_entities=False, no_network=True) @classmethod def _from_xml(cls, tree: ET.ElementTree) -> Specification: - spec = cls.from_element(tree.getroot()) - return spec + return cls.from_element(tree.getroot()) @classmethod def from_element(cls, spec_ele: ET.Element) -> Specification: @@ -151,45 +149,52 @@ def get_requirements() -> list[StructuralRequirement]: @unique -class EarkSpecifications(str, Enum): - """Enumeration of E-ARK specifications.""" +class SpecificationVersion(str, Enum): + V2_0_4 = 'V2.0.4' + V2_1_0 = 'V2.1.0' + + def __str__(self): + return self.value + +@unique +class SpecificationType(str, Enum): CSIP = 'E-ARK-CSIP' SIP = 'E-ARK-SIP' DIP = 'E-ARK-DIP' + + @classmethod + def from_string(cls, type: str) -> Optional['SpecificationType']: + """Get the enum from the value.""" + for spec in cls: + if type in [spec.name, spec.value]: + return spec + return None + + +class EarkSpecification: + def __init__(self, type: SpecificationType, version: SpecificationVersion): + self._type: SpecificationType = type + self._version: SpecificationVersion = version - def __init__(self, value: str): - self._path = str(files(profiles).joinpath(value + '.xml')) + self._path = str(files(profiles).joinpath(version).joinpath(type + '.xml')) self._specfication = Specifications._from_xml_file(self.path) @property - def spec_id(self) -> str: - """Get the specification id.""" - return self.name + def version(self) -> SpecificationVersion: + """Get the specification version.""" + return self._version + + @property + def type(self) -> SpecificationType: + """Get the specification type.""" + return self._type @property def path(self) -> str: """Get the path to the specification file.""" return self._path - @property - def title(self) -> str: - """Get the specification title.""" - return self.value - @property def specification(self) -> Specification: """Get the specification.""" return self._specfication - - @property - def profile(self) -> str: - """Get the specification profile url.""" - return f'https://eark{self.name.lower()}.dilcis.eu/profile/{self.value}.xml' - - @classmethod - def from_id(cls, spec_id: str) -> Optional['EarkSpecifications']: - """Get the enum from the value.""" - for spec in cls: - if spec_id in [ spec.spec_id, spec.value, spec.profile ]: - return spec - return None From 831bb4e9e33ae1002478d405da64ecf9d6bfba10 Mon Sep 17 00:00:00 2001 From: "Jakub Janaszewski (DOC)" Date: Wed, 22 May 2024 16:56:01 +0200 Subject: [PATCH 2/5] 2.1.0 schematron rules --- eark_validator/cli/app.py | 9 +++------ .../schematron/V2.1.0/CSIP/mets_amdSec_rules.xml | 8 ++++---- .../schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml | 6 +++--- .../schematron/V2.1.0/CSIP/mets_fileSec_rules.xml | 14 +++++++------- .../V2.1.0/CSIP/mets_metsRootElement_rules.xml | 8 ++++---- .../V2.1.0/CSIP/mets_structMap_rules.xml | 12 ++++++++---- .../schematron/V2.1.0/DIP/mets_dmdSec_rules.xml | 3 +++ .../schematron/V2.1.0/DIP/mets_metsHdr_rules.xml | 3 +++ .../V2.1.0/DIP/mets_metsRootElement_rules.xml | 4 ++++ 9 files changed, 39 insertions(+), 28 deletions(-) diff --git a/eark_validator/cli/app.py b/eark_validator/cli/app.py index 10cca79..c760aa2 100644 --- a/eark_validator/cli/app.py +++ b/eark_validator/cli/app.py @@ -109,23 +109,20 @@ def main(): # Get input from command line args = parse_command_line() # If no target files or folders specified then print usage and exit - print('Version: ' + args.specification_version) - print(type(args.specification_version)) - if not args.files: PARSER.print_help() # Iterate the file arguments for file_arg in args.files: - _loop_exit, _ = _validate_ip(file_arg) + _loop_exit, _ = _validate_ip(file_arg, args.specification_version) _exit = _loop_exit if (_loop_exit > 0) else _exit sys.exit(_exit) -def _validate_ip(path: str) -> Tuple[int, Optional[ValidationReport]]: +def _validate_ip(path: str, version: SpecificationVersion) -> Tuple[int, Optional[ValidationReport]]: ret_stat, checked_path = _check_path(path) if ret_stat > 0: return ret_stat, None - report = PACKAGES.PackageValidator(checked_path).validation_report + report = PACKAGES.PackageValidator(checked_path, version).validation_report print(f'Path {checked_path}, struct result is: {report.structure.status.value}') # for message in report.structure.messages: print(report.model_dump_json()) diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml index 16cf669..c0e3266 100644 --- a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_amdSec_rules.xml @@ -20,9 +20,9 @@ MUST record the type of metadata at the referenced location. MUST record the MIME type of the referenced file. MUST record the size in bytes of the referenced file. - MUST record the date the referenced file was created. + MUST record the date and time the referenced file was created. MUST record the checksum of the referenced file. - MUST record the checksum type of the referenced file. + MUST record a value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file. Mandatory, unique id for the rights metadata. @@ -36,9 +36,9 @@ MUST record the type of metadata at the referenced location. MUST record the MIME type of the referenced file. MUST record the size in bytes of the referenced file. - MUST record the date the referenced file was created. + MUST record the date and time the referenced file was created. MUST record the checksum of the referenced file. - MUST record the checksum type of the referenced file. + MUST record a value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file. diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml index 2e97653..7ce9143 100644 --- a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_dmdSec_rules.xml @@ -6,7 +6,7 @@ Use of the METS descriptive metadata section. Mandatory, identifier must be unique within the package. - Mandatory, creation date of the descriptive metadata in this section. + Mandatory, creation date and time of the descriptive metadata in this section. SHOULD be used to indicated the status of the package. SHOULD provide a reference to the descriptive metadata file located in the “metadata” section of the IP.. @@ -17,9 +17,9 @@ Specifies the type of metadata in the referenced file. Values are taken from the list provided by the METS. MUST hold the IANA mime type of the referenced file. MUST hold the size of the referenced file in bytes. - MUST hold the creation date of the referenced file. + MUST hold the creation date and time of the referenced file. MUST hold the checksum of the referenced file. - MUST hold the algorithm type of checksum of the referenced file. + MUST hold a value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file. diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml index 7d37037..7d3aaa7 100644 --- a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_fileSec_rules.xml @@ -7,13 +7,13 @@ An xml:id identifier for the file section used for internal package references. All documentation pertaining to the transferred content is placed in one or more file group elements with mets/fileSec/fileGrp/@USE attribute value “Documentation”. - All XML schemas used in the information package should be referenced from one or more file groups with mets/fileSec/fileGrp/@USE attribute value “Schemas”. - A pointer to the METS document describing the representation or pointers to the content being transferred must be present in one or more file groups with mets/fileSec/fileGrp/@USE attribute value “Representations”. + All XML schemas used in the information package must be referenced from one or more file groups with `mets/fileSec/fileGrp/@USE` attribute value "Schemas". + A pointer to the METS document describing the representation or pointers to the content being transferred must be present in one or more file groups with `mets/fileSec/fileGrp/@USE` attribute value starting with "Representations" followed by the path to the folder where the representation level METS document is placed. For example "Representation/submission" and "Representation/ingest". ADMID attribute used. - This attribute is mandatory when the mets/fileSec/fileGrp/@USE attribute value is “Representations”. - This attribute is mandatory when the mets/fileSec/fileGrp/@USE attribute value is “Representations”. + When the element "Content Information Type Specification" (CSIP4) has the value "MIXED" or the file group describes a representation, then this element states the content information type specification used for the file group. When the element "Representations file group" (CSIP114), the file group describes a representation with the `mets/fileSec/fileGrp/@USE` attribute value is starting with "Representations", then this element must state the content information type specification used for the representation. + When the `mets/fileSec/fileGrp/@csip:CONTENTINFORMATIONTYPE` attribute has the value "OTHER" the `mets/fileSec/fileGrp/@csip:OTHERCONTENTINFORMATIONTYPE` must state a value for the Content Information Type Specification used. This attribute is mandatory. This attribute is mandatory. An xml:id identifier for the file group used for internal package references. It must be unique within the package. The file group contains the file elements which describe the file objects. @@ -22,13 +22,13 @@ This attribute is mandatory. An xml:id identifier for the file group used for internal package references. It must be unique within the package. MUST record the MIME type of the referenced file. MUST record the size in bytes of the referenced file. - MUST record the date the referenced file was created. + MUST record the date and time the referenced file was created. MUST record the checksum of the referenced file. - MUST record the checksum type of the referenced file. + MUST record a value from the METS-standard which identifies the algorithm used to calculate the checksum for the referenced file. A file element has an OWNERID attribute. A file element has an ADMID attribute. A file element has an DMDID attribute. - The location of each external file must be defined by the file location FLocat element. + The location of each external file must be defined by the file location `FLocat` element using the same rules as references for metadata files. All references to files should be made using the XLink href attribute and the file protocol using the relative location of the file. Mandatory, locator type is always used with the value “URL” from the vocabulary in the attribute. diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml index 42502f8..679d798 100644 --- a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_metsRootElement_rules.xml @@ -8,15 +8,15 @@ The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. For the package METS document, this should be the name/ID of the package, i.e. the name of the package root folder. For a representation level METS document this value records the name/ID of the representation, i.e. the name of the top-level representation folder. - The mets/@TYPE attibute MUST be used to declare the category of the content held in the package, e.g. book, journal, stereograph, video, etc.. Legal values are defined in a fixed vocabulary. - When the content category used falls outside of the defined vocabulary the mets/@TYPE value must be set to “OTHER” and the specific value declared in mets/@csip:OTHERTYPE. The vocabulary will develop under the curation of the DILCIS Board as additional content information type specifications are produced. + The `mets/@TYPE` attribute MUST be used to declare the category of the content held in the package, e.g. "Datasets", "Websites", "Mixes" , "Other", etc.. Legal values are defined in a fixed vocabulary. When the content category used falls outside of the defined vocabulary the `mets/@TYPE` value must be set to "OTHER" and the specific value declared in `mets/@csip:OTHERTYPE`. The vocabulary will develop under the curation of the DILCIS Board as additional content information type specifications are produced. + When the `mets/@TYPE` attribute has the value "OTHER" the `mets/@csip:OTHERTYPE` attribute MUST be used to declare the content category of the package/representation. The value can either be "OTHER" or any other string that are not present in the vocabulary used in the `mets/@TYPE` attribute. Used to declare the Content Information Type Specification used when creating the package. Legal values are defined in a fixed vocabulary. The attribute is mandatory for representation level METS documents. When the mets/@csip:CONTENTINFORMATIONTYPE has the value “OTHER” the mets/@csip:OTHERCONTENTINFORMATIONTYPE must state the content information type. The PROFILE attribute MUST contain the URL of the METS profile that the information package conforms with. Must be used if descriptive metadata about the package content is available. NOTE: According to official METS documentation each metadata section must describe one and only one set of metadata. As such, if implementers want to include multiple occurrences of descriptive metadata into the package this must be done by repeating the whole dmdSec element for each individual metadata. If administrative / preservation metadata is available, it must be described using the administrative metadata section (amdSec) element. All administrative metadata is present in a single amdSec element. - The transferred content is placed in the file section in different file group elements, described in other requirements. - Each METS file must include ONE structural map structMap element used exactly as described here. + References to all transferred content SHOULD be placed in the file section in the different file group elements, described in other requirements. + Every CSIP compliant METS file must include ONE structural map `structMap` element used exactly as described in this section of requirements. There MUST be a general element that describes the package. diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml index 80a3e8d..d6e007f 100644 --- a/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/CSIP/mets_structMap_rules.xml @@ -11,19 +11,23 @@ An xml:id identifier for the structural description (structMap) used for internal package references. It must be unique within the package. The structural map MUST comprises a single division. - The package’s top-level structural division div element’s @LABEL attribute value must be identical to the package identifier, i.e. the same value as the mets/@OBJID attribute. An xml:id identifier must be unique within the package. The metadata referenced in the administrative and/or descriptive metadata section is described in the structural map with one sub division. + The metadata division div element's @LABEL attribute value must be "Metadata". The documentation referenced in the file section file groups is described in the structural map with one sub division. + The documentation division div element in the package uses the value "Documentation" from the vocabulary as the value for the @LABEL attribute. The schemas referenced in the file section file groups are described in the structural map within a single sub-division. + The schema division div element's @LABEL attribute has the value Schemas from the vocabulary. When no representations are present the content referenced in the file section file group with @USE attribute value “Representations” is described in the structural map as a single sub division. + The package's content division div element must have the @LABEL attribute value "Representations", taken from the vocabulary. + When a package consists of multiple representations, each described by a representation level METS.xml document, there should be a discrete representation div element for each representation. An xml:id identifier must be unique within the package. - When there is administrative metadata and the amdSec is present, all administrative metadata MUST be referenced via the administrative sections different identifiers. - When there are descriptive metadata and one or more dmdSec is present, all descriptive metadata MUST be referenced via the descriptive section identifiers. + The admimistrative metadata division should reference all current administrative metadata sections. + The descriptive metadata division should reference all current descriptive metadata sections. An xml:id identifier must be unique within the package. @@ -48,7 +52,7 @@ Mandatory, xml:id identifier must be unique within the package. - The package’s representation division div element @LABEL attribute value must be the path to the representation level METS document. + The package's representation division div element @LABEL attribute value must be the path to the representation level METS document starting with the value "Representations" followed by the main folder name for example "Representations/submission" and "Representations/ingest". The division div of the specific representation includes one occurrence of the METS pointer mptr element, pointing to the appropriate representation METS file. diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml index e58cf52..2d41696 100644 --- a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_dmdSec_rules.xml @@ -4,5 +4,8 @@ Use of the METS descriptive metadata section. + + Indicates the status of the package using a fixed vocabulary. The status SHOULD in a DIP be set to "CURRENT". + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml index 43c3715..b85006b 100644 --- a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsHdr_rules.xml @@ -3,5 +3,8 @@ Use of the METS header + + The in CSIP added attribute `@csip:OAISPACKAGETYPE` is used with the value "DIP". + diff --git a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml index c4c7fc4..c56c1ec 100644 --- a/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml +++ b/eark_validator/ipxml/resources/schematron/V2.1.0/DIP/mets_metsRootElement_rules.xml @@ -4,5 +4,9 @@ Validate METS root element. + + The mets/@OBJID attribute is mandatory, its value is a string identifier for the METS document. Note that the value of the `mets/@OBJID attribute` for the DIP is expected to be different from the SIP and AIP to reflect the creation of a new package. + The mets/@PROFILE attribute MUST contain the URL of the METS profile that the information package conforms with. + From 79911b561250a6c11c0dc067e1ded118320251ce Mon Sep 17 00:00:00 2001 From: "Jakub Janaszewski (DOC)" Date: Wed, 22 May 2024 17:47:32 +0200 Subject: [PATCH 3/5] Updated tests --- .../specifications/specification.py | 3 +- tests/rules_test.py | 57 ++++---- tests/specification_test.py | 124 +++++------------- 3 files changed, 57 insertions(+), 127 deletions(-) diff --git a/eark_validator/specifications/specification.py b/eark_validator/specifications/specification.py index c05a38d..f9a71e2 100644 --- a/eark_validator/specifications/specification.py +++ b/eark_validator/specifications/specification.py @@ -168,8 +168,7 @@ def from_string(cls, type: str) -> Optional['SpecificationType']: for spec in cls: if type in [spec.name, spec.value]: return spec - return None - + raise ValueError("{type} does not exists") class EarkSpecification: def __init__(self, type: SpecificationType, version: SpecificationVersion): diff --git a/tests/rules_test.py b/tests/rules_test.py index 7023329..cb0392e 100644 --- a/tests/rules_test.py +++ b/tests/rules_test.py @@ -32,7 +32,7 @@ from eark_validator import rules as SC from eark_validator.model.validation_report import Severity, Result -from eark_validator.specifications.specification import EarkSpecifications +from eark_validator.specifications.specification import SpecificationType, SpecificationVersion import tests.resources.schematron as SCHEMATRON import tests.resources.xml as XML @@ -145,91 +145,82 @@ def test_mets_structmap(self): class ValidationProfileTest(unittest.TestCase): def test_load_by_str(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, CSIP_PROF) - profile = SC.ValidationProfile.from_specification('SIP') + profile = SC.ValidationProfile(SpecificationType.from_string('SIP'), SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, SIP_PROF) - profile = SC.ValidationProfile.from_specification('DIP') + profile = SC.ValidationProfile(SpecificationType.from_string('DIP'), SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, DIP_PROF) def test_load_by_eark_spec(self): - profile = SC.ValidationProfile.from_specification(EarkSpecifications.CSIP) + profile = SC.ValidationProfile(SpecificationType.CSIP, SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, CSIP_PROF) - profile = SC.ValidationProfile.from_specification(EarkSpecifications.SIP) + profile = SC.ValidationProfile(SpecificationType.SIP, SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, SIP_PROF) - profile = SC.ValidationProfile.from_specification(EarkSpecifications.DIP) - self.assertEqual(profile.specification.url, DIP_PROF) - - def test_load_by_spec(self): - profile = SC.ValidationProfile.from_specification(EarkSpecifications.CSIP.specification) - self.assertEqual(profile.specification.url, CSIP_PROF) - profile = SC.ValidationProfile.from_specification(EarkSpecifications.SIP.specification) - self.assertEqual(profile.specification.url, SIP_PROF) - profile = SC.ValidationProfile.from_specification(EarkSpecifications.DIP.specification) + profile = SC.ValidationProfile(SpecificationType.DIP, SpecificationVersion.V2_0_4) self.assertEqual(profile.specification.url, DIP_PROF) def test_bad_value(self): with self.assertRaises(ValueError): - SC.ValidationProfile.from_specification('BAD') - + SC.ValidationProfile(SpecificationType.from_string('BAD'), SpecificationVersion.V2_0_4) def test_unimplemented_specifications(self): with self.assertRaises(ValueError): - SC.ValidationProfile.from_specification('AIP') + SC.ValidationProfile(SpecificationType.from_string('AIP'), SpecificationVersion.V2_0_4) with self.assertRaises(ValueError): - SC.ValidationProfile.from_specification('AIU') + SC.ValidationProfile(SpecificationType.from_string('AIU'), SpecificationVersion.V2_0_4) with self.assertRaises(ValueError): - SC.ValidationProfile.from_specification('AIC') + SC.ValidationProfile(SpecificationType.from_string('AIC'), SpecificationVersion.V2_0_4) def test_valid(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) self.assertTrue(profile.is_valid) def test_invalid(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath('METS-no-hdr.xml'))) self.assertFalse(profile.is_valid) def test_validate_file_not_found(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) with self.assertRaises(FileNotFoundError): profile.validate(str(files(SCHEMATRON).joinpath('not-found.xml'))) def test_validate_dir_value_err(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) with self.assertRaises(ValueError): profile.validate(str(files(SCHEMATRON))) def test_validate_empty_file(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES).joinpath('empty.file'))) self.assertFalse(profile.is_valid) def test_validate_not_mets(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath('person.xml'))) self.assertFalse(profile.is_valid) def test_validate_json(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES).joinpath('aip.json'))) self.assertFalse(profile.is_valid) def test_get_results(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) self.assertTrue(profile.is_valid) self.assertEqual(len(profile.get_results()), 8) def test_get_result(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) result = profile.get_result('metsHdr') self.assertTrue(profile.is_valid) self.assertEqual(len(list(filter(lambda a: a.severity == Severity.WARNING, result))), 1) def test_get_bad_key(self): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.from_string('CSIP'), SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) result = profile.get_result('badkey') self.assertIsNone(result) @@ -240,7 +231,7 @@ class SeverityTest(str, Enum): class ResultTest(unittest.TestCase): @classmethod def setUpClass(cls): - profile = SC.ValidationProfile.from_specification('CSIP') + profile = SC.ValidationProfile(SpecificationType.CSIP, SpecificationVersion.V2_0_4) profile.validate(str(files(TEST_RES_XML).joinpath(METS_VALID))) cls._result = profile.get_result('metsHdr')[0] @@ -252,7 +243,7 @@ def test_bad_sev_att(self): Result.model_validate({ 'severity': SeverityTest.NOT_SEV }) def _test_validation(name, to_validate): - rules = SC.SchematronRuleset(SC.get_schematron_path('CSIP', name)) + rules = SC.SchematronRuleset(SC.get_schematron_path(SpecificationVersion.V2_0_4, 'CSIP', name)) rules.validate(str(files(XML).joinpath(to_validate))) results: List[Result] = SC.TestResults.from_validation_report(rules._schematron.validation_report) errors = warnings = infos = 0 @@ -266,6 +257,6 @@ def _test_validation(name, to_validate): return errors < 1, errors, warnings, infos def _full_validation(name, to_validate): - rules = SC.SchematronRuleset(SC.get_schematron_path('CSIP', name)) + rules = SC.SchematronRuleset(SC.get_schematron_path(SpecificationVersion.V2_0_4, 'CSIP', name)) rules.validate(str(files(XML).joinpath(to_validate))) return SC.TestResults.from_validation_report(rules._schematron.validation_report) diff --git a/tests/specification_test.py b/tests/specification_test.py index 21bdaca..a943cc6 100644 --- a/tests/specification_test.py +++ b/tests/specification_test.py @@ -23,7 +23,6 @@ # under the License. # -from typing import Optional import unittest from lxml import etree as ET @@ -31,7 +30,7 @@ from importlib_resources import files from eark_validator.model.specifications import Specification, StructuralRequirement -from eark_validator.specifications.specification import EarkSpecifications, Specifications, StructuralRequirements +from eark_validator.specifications.specification import EarkSpecification, Specifications, StructuralRequirements, SpecificationType, SpecificationVersion import tests.resources.xml as XML from eark_validator.ipxml.resources import profiles @@ -54,8 +53,8 @@ def test_invalid_xml(self): Specifications._from_xml_file(str(files('tests.resources.xml').joinpath('person.xml'))) def test_valid_xml(self): - specification: Specification = Specifications._from_xml_file(str(files(profiles).joinpath('E-ARK-CSIP' + '.xml'))) - self.assertEqual(EarkSpecifications.CSIP.specification, specification) + specification: Specification = Specifications._from_xml_file(str(files(profiles).joinpath('V2.0.4', 'E-ARK-CSIP' + '.xml'))) + self.assertEqual(EarkSpecification(SpecificationType.CSIP, SpecificationVersion.V2_0_4).specification, specification) class StructuralRequirementsTest(unittest.TestCase): @@ -71,92 +70,33 @@ def test_from_rule_no(self): req: StructuralRequirement = StructuralRequirements.from_rule_no(1) self.assertEqual(req.id, 'CSIPSTR1') -class EarkSpecificationsTest(unittest.TestCase): +class SpecificationTypeTest(unittest.TestCase): + def test_value(self): + type = SpecificationType.CSIP + self.assertEqual(type.value, 'E-ARK-CSIP') + type = SpecificationType.SIP + self.assertEqual(type.value, 'E-ARK-SIP') + type = SpecificationType.DIP + self.assertEqual(type.value, 'E-ARK-DIP') + +class SpecificationVersionTest(unittest.TestCase): + def test_value(self): + version = SpecificationVersion.V2_0_4 + self.assertEqual(version.value, 'V2.0.4') + version = SpecificationVersion.V2_1_0 + self.assertEqual(version.value, 'V2.1.0') - def test_title(self): - spec = EarkSpecifications.CSIP - self.assertEqual(spec.title, 'E-ARK-CSIP') - spec = EarkSpecifications.SIP - self.assertEqual(spec.title, 'E-ARK-SIP') - spec = EarkSpecifications.DIP - self.assertEqual(spec.title, 'E-ARK-DIP') - - def test_spec_title(self): - spec = EarkSpecifications.CSIP.specification - self.assertEqual(spec.title, 'E-ARK CSIP METS Profile') - spec = EarkSpecifications.SIP.specification - self.assertEqual(spec.title, 'E-ARK SIP METS Profile 2.0') - spec = EarkSpecifications.DIP.specification - self.assertEqual(spec.title, 'E-ARK DIP METS Profile') - - def test_path(self): - path = str(files(profiles).joinpath('E-ARK-CSIP' + '.xml')) - spec = EarkSpecifications.CSIP - self.assertEqual(spec.path, path) - path = str(files(profiles).joinpath('E-ARK-SIP' + '.xml')) - spec = EarkSpecifications.SIP - self.assertEqual(spec.path, path) - path = str(files(profiles).joinpath('E-ARK-DIP' + '.xml')) - spec = EarkSpecifications.DIP - self.assertEqual(spec.path, path) - - def test_url(self): - spec = EarkSpecifications.CSIP.specification - self.assertEqual(spec.url, 'https://earkcsip.dilcis.eu/profile/E-ARK-CSIP.xml') - spec = EarkSpecifications.SIP.specification - self.assertEqual(spec.url, 'https://earksip.dilcis.eu/profile/E-ARK-SIP.xml') - spec = EarkSpecifications.DIP.specification - self.assertEqual(spec.url, 'https://earkdip.dilcis.eu/profile/E-ARK-DIP.xml') - - def test_version(self): - spec = EarkSpecifications.CSIP.specification - self.assertEqual(spec.version, 'V2.0.4') - spec = EarkSpecifications.SIP.specification - self.assertEqual(spec.version, 'SIPV2.0.4') - spec = EarkSpecifications.DIP.specification - self.assertEqual(spec.version, 'DIPV2.0.4') - - def test_date(self): - spec_date = '2020-06-12T09:00:00' - spec = EarkSpecifications.CSIP.specification - self.assertEqual(spec.date, spec_date) - spec = EarkSpecifications.SIP.specification - self.assertEqual(spec.date, spec_date) - spec = EarkSpecifications.DIP.specification - self.assertEqual(spec.date, spec_date) - - def test_requirements(self): - spec = EarkSpecifications.CSIP.specification - self.assertEqual(_count_reqs(spec), spec.requirement_count) - spec = EarkSpecifications.SIP.specification - self.assertEqual(_count_reqs(spec), spec.requirement_count) - spec = EarkSpecifications.DIP.specification - self.assertEqual(_count_reqs(spec), spec.requirement_count) - - def test_get_requirement(self): - spec = EarkSpecifications.CSIP.specification - rule_1 = spec.get_requirement_by_id('CSIP1') - rule_1_by_sect = spec.get_requirement_by_sect('CSIP1', 'metsRootElement') - self.assertEqual(rule_1, rule_1_by_sect) - self.assertIsNone(spec.get_requirement_by_id('CSIP999')) - - def test_sections(self): - spec = EarkSpecifications.CSIP.specification - self.assertGreater(len(spec.sections), 0) - self.assertEqual(_count_reqs(spec), spec.requirement_count) - self.assertEqual(len(spec.section_requirements()), spec.requirement_count) - spec = EarkSpecifications.SIP.specification - self.assertGreater(len(spec.sections), 0) - self.assertEqual(_count_reqs(spec), spec.requirement_count) - self.assertEqual(len(spec.section_requirements()), spec.requirement_count) - spec = EarkSpecifications.DIP.specification - self.assertGreater(len(spec.sections), 0) - self.assertEqual(_count_reqs(spec), spec.requirement_count) - self.assertEqual(len(spec.section_requirements()), spec.requirement_count) - -def _count_reqs(spec): - req_count = 0 - for section in spec.sections: - for _ in spec.section_requirements(section): - req_count += 1 - return req_count +class EarkSpecificationsTest(unittest.TestCase): + def test_specifiction_type(self): + eark_specification = EarkSpecification(SpecificationType.CSIP, SpecificationVersion.V2_0_4) + self.assertEqual(eark_specification.type, SpecificationType.CSIP) + eark_specification = EarkSpecification(SpecificationType.SIP, SpecificationVersion.V2_0_4) + self.assertEqual(eark_specification.type, SpecificationType.SIP) + eark_specification = EarkSpecification(SpecificationType.DIP, SpecificationVersion.V2_0_4) + self.assertEqual(eark_specification.type, SpecificationType.DIP) + + def test_specifiction_version(self): + eark_specification = EarkSpecification(SpecificationType.CSIP, SpecificationVersion.V2_0_4) + self.assertEqual(eark_specification.version, SpecificationVersion.V2_0_4) + eark_specification = EarkSpecification(SpecificationType.SIP, SpecificationVersion.V2_1_0) + self.assertEqual(eark_specification.version, SpecificationVersion.V2_1_0) From a60e8050b1dfe12dd8762b004f5c5479d36976e6 Mon Sep 17 00:00:00 2001 From: "Jakub Janaszewski (DOC)" Date: Wed, 29 May 2024 12:51:50 +0200 Subject: [PATCH 4/5] Fixed pylint errors --- eark_validator/mets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eark_validator/mets.py b/eark_validator/mets.py index 99131f7..12c94c3 100644 --- a/eark_validator/mets.py +++ b/eark_validator/mets.py @@ -68,7 +68,7 @@ def from_file(mets_file: Path | str) -> MetsFile: raise ValueError(NOT_FILE.format(mets_file)) ns: dict[str, str] = {} entries: list[FileEntry] = [] - othertype = contentinformationtype = oaispackagetype = '' + othertype = contentinformationtype = oaispackagetype = mets_root = '' try: parsed_mets = etree.iterparse(mets_file, events=[START_ELE, START_NS]) for event, element in parsed_mets: From 79d91f380914b11ef7f2b7b795edde535bd41032 Mon Sep 17 00:00:00 2001 From: "Jakub Janaszewski (DOC)" Date: Wed, 29 May 2024 13:19:36 +0200 Subject: [PATCH 5/5] Fixed whitespaces --- eark_validator/packages.py | 4 +- .../specifications/specification.py | 394 +++++++++--------- 2 files changed, 199 insertions(+), 199 deletions(-) diff --git a/eark_validator/packages.py b/eark_validator/packages.py index c8c403f..012b4c4 100644 --- a/eark_validator/packages.py +++ b/eark_validator/packages.py @@ -79,12 +79,12 @@ def name(self) -> str: def validation_report(self) -> ValidationReport: """Returns the valdiation report for the package.""" return self._report - + @property def version(self) -> SpecificationVersion: """Returns the specifiation version used for validation.""" return self._version - + @classmethod def validate(self, version: SpecificationVersion, to_validate: Path) -> ValidationReport: """Returns the validation report that results from validating the path diff --git a/eark_validator/specifications/specification.py b/eark_validator/specifications/specification.py index 6c8bbb7..3f7a828 100644 --- a/eark_validator/specifications/specification.py +++ b/eark_validator/specifications/specification.py @@ -1,198 +1,198 @@ -#!/usr/bin/env python +#!/usr/bin/env python # -*- coding: utf-8 -*- -# -# E-ARK Validation -# Copyright (C) 2019 -# All rights reserved. -# -# Licensed to the E-ARK project under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The E-ARK project licenses -# this file to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -"""Module covering information package structure validation and navigation.""" -import os -from enum import Enum, unique -from typing import Optional - -from importlib_resources import files -from lxml import etree as ET - -from eark_validator.const import NO_PATH, NOT_FILE -from eark_validator.ipxml.namespaces import Namespaces -from eark_validator.ipxml.resources import profiles -from eark_validator.ipxml.schema import METS_PROF_SCHEMA -from eark_validator.model.specifications import Requirement, Specification -from eark_validator.specifications.struct_reqs import REQUIREMENTS -from eark_validator.specifications.struct_reqs import Level - - -class Specifications: - - @classmethod - def _from_xml_file(cls, xml_file: str) -> Specification: - """Create a Specification from an XML file.""" - if not os.path.exists(xml_file): - raise FileNotFoundError(NO_PATH.format(xml_file)) - if not os.path.isfile(xml_file): - raise ValueError(NOT_FILE.format(xml_file)) - tree = ET.parse(xml_file, parser=cls._parser()) - return cls._from_xml(tree) - - @classmethod - def _parser(cls) -> ET.XMLParser: - """Create a parser for the specification.""" - return ET.XMLParser(schema=METS_PROF_SCHEMA, resolve_entities=False, no_network=True) - - @classmethod - def _from_xml(cls, tree: ET.ElementTree) -> Specification: - return cls.from_element(tree.getroot()) - - @classmethod - def from_element(cls, spec_ele: ET.Element) -> Specification: - """Create a Specification from an XML element.""" - version = spec_ele.get('ID') - title = date = '' - requirements: dict[str, Requirement] = {} - profile = '' - # Loop through the child eles - for child in spec_ele: - if child.tag == Namespaces.PROFILE.qualify('title'): - # Process the title element - title = child.text - elif child.tag == Namespaces.PROFILE.qualify('date'): - # Grab the requirement text value - date = child.text - elif child.tag == Namespaces.PROFILE.qualify('structural_requirements'): - requirements = cls._processs_requirements(child) - elif child.tag in [Namespaces.PROFILE.qualify('URI'), 'URI']: - profile = child.text - # Add the structural requirements - struct_reqs = StructuralRequirements.get_requirements() - # Return the Specification - return Specification.model_validate({ - 'title': title, - 'url': profile, - 'version': version, - 'date': date, - 'requirements': requirements, - 'structural_requirements': struct_reqs - }) - - @classmethod - def _processs_requirements(cls, req_root: ET.Element) -> dict[str, 'Requirement']: - requirements = {} - for sect_ele in req_root: - section = sect_ele.tag.replace(Namespaces.PROFILE.qualifier, '') - reqs = [] - for req_ele in sect_ele: - requirement = Requirements.from_element(req_ele) - if not requirement.id.startswith('REF_'): - reqs.append(requirement) - requirements[section] = reqs - return requirements - -class Requirements(): - @staticmethod - def from_element(req_ele: ET.Element) -> Requirement: - """Return a Requirement instance from an XML element.""" - req_id = req_ele.get('ID') - level: Level = Level.from_string(req_ele.get('REQLEVEL')) - name = '' - for child in req_ele: - if child.tag == Namespaces.PROFILE.qualify('description'): - for req_child in child: - if req_child.tag == Namespaces.PROFILE.qualify('head'): - name = req_child.text - return Requirement.model_validate({ - 'id': req_id, - 'name': name, - 'level': level - }) - -class StructuralRequirements(): - @staticmethod - def from_rule_no(rule_no: int) -> Requirement: - """Create an StructuralRequirement from a numerical rule id and a sub_message.""" - item = REQUIREMENTS.get(rule_no) - if not item: - raise ValueError(f'No rule with number {rule_no}') - return StructuralRequirements.from_dictionary(item) - - @staticmethod - def from_dictionary(item: dict[str, str]) -> Requirement: - """Create an StructuralRequirement from dictionary item and a sub_message.""" - return Requirement.model_validate({ - 'id': item.get('id'), - 'level': item.get('level'), - 'message': item.get('message') - }) - - @staticmethod - def get_requirements() -> list[Requirement]: - reqs = [] - for req in REQUIREMENTS.values(): - reqs.append(Requirement.model_validate(req)) - return reqs - -@unique -class SpecificationVersion(str, Enum): - V2_0_4 = 'V2.0.4' - V2_1_0 = 'V2.1.0' - - def __str__(self): - return self.value - -@unique -class SpecificationType(str, Enum): - CSIP = 'E-ARK-CSIP' - SIP = 'E-ARK-SIP' - DIP = 'E-ARK-DIP' - - @classmethod - def from_string(cls, type: str) -> Optional['SpecificationType']: - """Get the enum from the value.""" - for spec in cls: - if type in [spec.name, spec.value]: - return spec - raise ValueError("{type} does not exists") - -class EarkSpecification: - def __init__(self, type: SpecificationType, version: SpecificationVersion): - self._type: SpecificationType = type - self._version: SpecificationVersion = version - - self._path = str(files(profiles).joinpath(version).joinpath(type + '.xml')) - self._specfication = Specifications._from_xml_file(self.path) - - @property - def version(self) -> SpecificationVersion: - """Get the specification version.""" - return self._version - - @property - def type(self) -> SpecificationType: - """Get the specification type.""" - return self._type - - @property - def path(self) -> str: - """Get the path to the specification file.""" - return self._path - - @property - def specification(self) -> Specification: - """Get the specification.""" - return self._specfication +# +# E-ARK Validation +# Copyright (C) 2019 +# All rights reserved. +# +# Licensed to the E-ARK project under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The E-ARK project licenses +# this file to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +"""Module covering information package structure validation and navigation.""" +import os +from enum import Enum, unique +from typing import Optional + +from importlib_resources import files +from lxml import etree as ET + +from eark_validator.const import NO_PATH, NOT_FILE +from eark_validator.ipxml.namespaces import Namespaces +from eark_validator.ipxml.resources import profiles +from eark_validator.ipxml.schema import METS_PROF_SCHEMA +from eark_validator.model.specifications import Requirement, Specification +from eark_validator.specifications.struct_reqs import REQUIREMENTS +from eark_validator.specifications.struct_reqs import Level + + +class Specifications: + + @classmethod + def _from_xml_file(cls, xml_file: str) -> Specification: + """Create a Specification from an XML file.""" + if not os.path.exists(xml_file): + raise FileNotFoundError(NO_PATH.format(xml_file)) + if not os.path.isfile(xml_file): + raise ValueError(NOT_FILE.format(xml_file)) + tree = ET.parse(xml_file, parser=cls._parser()) + return cls._from_xml(tree) + + @classmethod + def _parser(cls) -> ET.XMLParser: + """Create a parser for the specification.""" + return ET.XMLParser(schema=METS_PROF_SCHEMA, resolve_entities=False, no_network=True) + + @classmethod + def _from_xml(cls, tree: ET.ElementTree) -> Specification: + return cls.from_element(tree.getroot()) + + @classmethod + def from_element(cls, spec_ele: ET.Element) -> Specification: + """Create a Specification from an XML element.""" + version = spec_ele.get('ID') + title = date = '' + requirements: dict[str, Requirement] = {} + profile = '' + # Loop through the child eles + for child in spec_ele: + if child.tag == Namespaces.PROFILE.qualify('title'): + # Process the title element + title = child.text + elif child.tag == Namespaces.PROFILE.qualify('date'): + # Grab the requirement text value + date = child.text + elif child.tag == Namespaces.PROFILE.qualify('structural_requirements'): + requirements = cls._processs_requirements(child) + elif child.tag in [Namespaces.PROFILE.qualify('URI'), 'URI']: + profile = child.text + # Add the structural requirements + struct_reqs = StructuralRequirements.get_requirements() + # Return the Specification + return Specification.model_validate({ + 'title': title, + 'url': profile, + 'version': version, + 'date': date, + 'requirements': requirements, + 'structural_requirements': struct_reqs + }) + + @classmethod + def _processs_requirements(cls, req_root: ET.Element) -> dict[str, 'Requirement']: + requirements = {} + for sect_ele in req_root: + section = sect_ele.tag.replace(Namespaces.PROFILE.qualifier, '') + reqs = [] + for req_ele in sect_ele: + requirement = Requirements.from_element(req_ele) + if not requirement.id.startswith('REF_'): + reqs.append(requirement) + requirements[section] = reqs + return requirements + +class Requirements(): + @staticmethod + def from_element(req_ele: ET.Element) -> Requirement: + """Return a Requirement instance from an XML element.""" + req_id = req_ele.get('ID') + level: Level = Level.from_string(req_ele.get('REQLEVEL')) + name = '' + for child in req_ele: + if child.tag == Namespaces.PROFILE.qualify('description'): + for req_child in child: + if req_child.tag == Namespaces.PROFILE.qualify('head'): + name = req_child.text + return Requirement.model_validate({ + 'id': req_id, + 'name': name, + 'level': level + }) + +class StructuralRequirements(): + @staticmethod + def from_rule_no(rule_no: int) -> Requirement: + """Create an StructuralRequirement from a numerical rule id and a sub_message.""" + item = REQUIREMENTS.get(rule_no) + if not item: + raise ValueError(f'No rule with number {rule_no}') + return StructuralRequirements.from_dictionary(item) + + @staticmethod + def from_dictionary(item: dict[str, str]) -> Requirement: + """Create an StructuralRequirement from dictionary item and a sub_message.""" + return Requirement.model_validate({ + 'id': item.get('id'), + 'level': item.get('level'), + 'message': item.get('message') + }) + + @staticmethod + def get_requirements() -> list[Requirement]: + reqs = [] + for req in REQUIREMENTS.values(): + reqs.append(Requirement.model_validate(req)) + return reqs + +@unique +class SpecificationVersion(str, Enum): + V2_0_4 = 'V2.0.4' + V2_1_0 = 'V2.1.0' + + def __str__(self): + return self.value + +@unique +class SpecificationType(str, Enum): + CSIP = 'E-ARK-CSIP' + SIP = 'E-ARK-SIP' + DIP = 'E-ARK-DIP' + + @classmethod + def from_string(cls, type: str) -> Optional['SpecificationType']: + """Get the enum from the value.""" + for spec in cls: + if type in [spec.name, spec.value]: + return spec + raise ValueError('{type} does not exists') + +class EarkSpecification: + def __init__(self, type: SpecificationType, version: SpecificationVersion): + self._type: SpecificationType = type + self._version: SpecificationVersion = version + + self._path = str(files(profiles).joinpath(version).joinpath(type + '.xml')) + self._specfication = Specifications._from_xml_file(self.path) + + @property + def version(self) -> SpecificationVersion: + """Get the specification version.""" + return self._version + + @property + def type(self) -> SpecificationType: + """Get the specification type.""" + return self._type + + @property + def path(self) -> str: + """Get the path to the specification file.""" + return self._path + + @property + def specification(self) -> Specification: + """Get the specification.""" + return self._specfication