Skip to content

Commit

Permalink
Merge pull request #183 from kba/mets-agent
Browse files Browse the repository at this point in the history
mets: record creation date and core version on creating METS, fix #147
  • Loading branch information
kba authored Sep 20, 2018
2 parents b0af088 + e36680d commit 6e6c97a
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 27 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

Changed:

* Creating METS from scratch will set creator agent and creation date, #147

## [0.8.3] - 2018-09-20

Changed:
Expand Down
35 changes: 17 additions & 18 deletions ocrd/model/mets-empty.xml
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
<?xml version="1.0" encoding="UTF-8"?>
<mets:mets xmlns:mets="http://www.loc.gov/METS/" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="info:lc/xmlns/premis-v2 http://www.loc.gov/standards/premis/v2/premis-v2-0.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-6.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/version17/mets.v1-7.xsd http://www.loc.gov/mix/v10 http://www.loc.gov/standards/mix/mix10/mix10.xsd">
<mets:metsHdr CREATEDATE="2017-11-30T16:18:26">
<mets:agent OTHERTYPE="SOFTWARE" ROLE="CREATOR" TYPE="OTHER">
<mets:name>DFG-Koordinierungsprojekt zur Weiterentwicklung von Verfahren der Optical Character Recognition (OCR-D)</mets:name>
<mets:note>OCR-D</mets:note>
</mets:agent>
</mets:metsHdr>
<mets:dmdSec ID="DMDLOG_0001">
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
</mods:mods>
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
<mets:amdSec ID="AMD">
</mets:amdSec>
<mets:fileSec>
</mets:fileSec>
<mets:metsHdr CREATEDATE="{{ NOW }}">
<mets:agent TYPE="OTHER" OTHERTYPE="SOFTWARE" ROLE="CREATOR">
<mets:name>ocrd/core v{{ VERSION }}</mets:name>
</mets:agent>
</mets:metsHdr>
<mets:dmdSec ID="DMDLOG_0001">
<mets:mdWrap MDTYPE="MODS">
<mets:xmlData>
<mods:mods xmlns:mods="http://www.loc.gov/mods/v3">
</mods:mods>
</mets:xmlData>
</mets:mdWrap>
</mets:dmdSec>
<mets:amdSec ID="AMD">
</mets:amdSec>
<mets:fileSec>
</mets:fileSec>
</mets:mets>

19 changes: 18 additions & 1 deletion ocrd/model/ocrd_mets.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,27 @@
from ocrd.constants import NAMESPACES as NS, TAG_METS_FILE, TAG_METS_FILEGRP, IDENTIFIER_PRIORITY, TAG_MODS_IDENTIFIER
from datetime import datetime

from ocrd.constants import (
NAMESPACES as NS,
TAG_METS_FILE,
TAG_METS_FILEGRP,
IDENTIFIER_PRIORITY,
TAG_MODS_IDENTIFIER,
METS_XML_EMPTY,
VERSION
)

from .ocrd_xml_base import OcrdXmlDocument, ET
from .ocrd_file import OcrdFile

class OcrdMets(OcrdXmlDocument):

@staticmethod
def empty_mets():
tpl = METS_XML_EMPTY.decode('utf-8')
tpl = tpl.replace('{{ VERSION }}', VERSION)
tpl = tpl.replace('{{ NOW }}', '%s' % datetime.now())
return OcrdMets(content=tpl.encode('utf-8'))

def __init__(self, file_by_id=None, **kwargs):
super(OcrdMets, self).__init__(**kwargs)
if file_by_id is None:
Expand Down
6 changes: 3 additions & 3 deletions ocrd/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import tempfile
import requests

from ocrd.constants import METS_XML_EMPTY, TMP_PREFIX, EXT_TO_MIME
from ocrd.constants import TMP_PREFIX, EXT_TO_MIME
from ocrd.utils import getLogger, safe_filename
from ocrd.workspace import Workspace
from ocrd.model import OcrdMets
Expand Down Expand Up @@ -221,7 +221,7 @@ def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_me
mets_fpath = os.path.join(directory, mets_basename)
if not clobber_mets and os.path.exists(mets_fpath):
raise Exception("Not clobbering existing mets.xml in '%s'." % directory)
mets = OcrdMets(content=METS_XML_EMPTY)
mets = OcrdMets.empty_mets()
with open(mets_fpath, 'wb') as fmets:
log.info("Writing %s", mets_fpath)
fmets.write(mets.to_xml(xmllint=True))
Expand All @@ -244,7 +244,7 @@ def workspace_from_folder(self, directory, return_mets=False, clobber_mets=False
if not clobber_mets and os.path.exists(os.path.join(directory, 'mets.xml')):
raise Exception("Not clobbering existing mets.xml in '%s'." % directory)

mets = OcrdMets(content=METS_XML_EMPTY)
mets = OcrdMets.empty_mets()

if not os.path.exists(directory):
os.makedirs(directory)
Expand Down
13 changes: 8 additions & 5 deletions test/model/test_ocrd_mets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from test.base import TestCase, main, assets

from ocrd.constants import MIMETYPE_PAGE, METS_XML_EMPTY
from ocrd.constants import MIMETYPE_PAGE, VERSION
from ocrd.model import OcrdMets

class TestOcrdMets(TestCase):
Expand All @@ -14,10 +14,13 @@ def test_unique_identifier(self):
self.assertEqual(self.mets.unique_identifier, 'foo', 'Right identifier after change')

def test_unique_identifier_from_nothing(self):
self.mets = OcrdMets(content=METS_XML_EMPTY)
self.assertEqual(self.mets.unique_identifier, None, 'no identifier')
self.mets.unique_identifier = 'foo'
self.assertEqual(self.mets.unique_identifier, 'foo', 'Right identifier after change')
mets = OcrdMets.empty_mets()
self.assertEqual(mets.unique_identifier, None, 'no identifier')
mets.unique_identifier = 'foo'
self.assertEqual(mets.unique_identifier, 'foo', 'Right identifier after change')
as_string = mets.to_xml().decode('utf-8')
self.assertIn('ocrd/core v%s' % VERSION, as_string)
self.assertIn('CREATEDATE="2018-', as_string)

def test_file_groups(self):
self.assertEqual(len(self.mets.file_groups), 17, '17 file groups')
Expand Down

0 comments on commit 6e6c97a

Please sign in to comment.