diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b6fe6702..95a454baa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Changed: + + * Creating METS from scratch will set creator agent and creation date, #147 + ## [0.8.3] - 2018-09-20 Changed: diff --git a/ocrd/model/mets-empty.xml b/ocrd/model/mets-empty.xml index 37bdc4900..dfa207fb8 100644 --- a/ocrd/model/mets-empty.xml +++ b/ocrd/model/mets-empty.xml @@ -1,22 +1,21 @@ - - - DFG-Koordinierungsprojekt zur Weiterentwicklung von Verfahren der Optical Character Recognition (OCR-D) - OCR-D - - - - - - - - - - - - - - + + + ocrd/core v{{ VERSION }} + + + + + + + + + + + + + + diff --git a/ocrd/model/ocrd_mets.py b/ocrd/model/ocrd_mets.py index 1c72aa6d3..9fdf46c7c 100644 --- a/ocrd/model/ocrd_mets.py +++ b/ocrd/model/ocrd_mets.py @@ -1,10 +1,27 @@ -from ocrd.constants import NAMESPACES as NS, TAG_METS_FILE, TAG_METS_FILEGRP, IDENTIFIER_PRIORITY, TAG_MODS_IDENTIFIER +from datetime import datetime + +from ocrd.constants import ( + NAMESPACES as NS, + TAG_METS_FILE, + TAG_METS_FILEGRP, + IDENTIFIER_PRIORITY, + TAG_MODS_IDENTIFIER, + METS_XML_EMPTY, + VERSION +) from .ocrd_xml_base import OcrdXmlDocument, ET from .ocrd_file import OcrdFile class OcrdMets(OcrdXmlDocument): + @staticmethod + def empty_mets(): + tpl = METS_XML_EMPTY.decode('utf-8') + tpl = tpl.replace('{{ VERSION }}', VERSION) + tpl = tpl.replace('{{ NOW }}', '%s' % datetime.now()) + return OcrdMets(content=tpl.encode('utf-8')) + def __init__(self, file_by_id=None, **kwargs): super(OcrdMets, self).__init__(**kwargs) if file_by_id is None: diff --git a/ocrd/resolver.py b/ocrd/resolver.py index 35ba222ed..349d11b19 100644 --- a/ocrd/resolver.py +++ b/ocrd/resolver.py @@ -4,7 +4,7 @@ import tempfile import requests -from ocrd.constants import METS_XML_EMPTY, TMP_PREFIX, EXT_TO_MIME +from ocrd.constants import TMP_PREFIX, EXT_TO_MIME from ocrd.utils import getLogger, safe_filename from ocrd.workspace import Workspace from ocrd.model import OcrdMets @@ -221,7 +221,7 @@ def workspace_from_nothing(self, directory, mets_basename='mets.xml', clobber_me mets_fpath = os.path.join(directory, mets_basename) if not clobber_mets and os.path.exists(mets_fpath): raise Exception("Not clobbering existing mets.xml in '%s'." % directory) - mets = OcrdMets(content=METS_XML_EMPTY) + mets = OcrdMets.empty_mets() with open(mets_fpath, 'wb') as fmets: log.info("Writing %s", mets_fpath) fmets.write(mets.to_xml(xmllint=True)) @@ -244,7 +244,7 @@ def workspace_from_folder(self, directory, return_mets=False, clobber_mets=False if not clobber_mets and os.path.exists(os.path.join(directory, 'mets.xml')): raise Exception("Not clobbering existing mets.xml in '%s'." % directory) - mets = OcrdMets(content=METS_XML_EMPTY) + mets = OcrdMets.empty_mets() if not os.path.exists(directory): os.makedirs(directory) diff --git a/test/model/test_ocrd_mets.py b/test/model/test_ocrd_mets.py index d3ab3df8d..b91fe2ee4 100644 --- a/test/model/test_ocrd_mets.py +++ b/test/model/test_ocrd_mets.py @@ -1,6 +1,6 @@ from test.base import TestCase, main, assets -from ocrd.constants import MIMETYPE_PAGE, METS_XML_EMPTY +from ocrd.constants import MIMETYPE_PAGE, VERSION from ocrd.model import OcrdMets class TestOcrdMets(TestCase): @@ -14,10 +14,13 @@ def test_unique_identifier(self): self.assertEqual(self.mets.unique_identifier, 'foo', 'Right identifier after change') def test_unique_identifier_from_nothing(self): - self.mets = OcrdMets(content=METS_XML_EMPTY) - self.assertEqual(self.mets.unique_identifier, None, 'no identifier') - self.mets.unique_identifier = 'foo' - self.assertEqual(self.mets.unique_identifier, 'foo', 'Right identifier after change') + mets = OcrdMets.empty_mets() + self.assertEqual(mets.unique_identifier, None, 'no identifier') + mets.unique_identifier = 'foo' + self.assertEqual(mets.unique_identifier, 'foo', 'Right identifier after change') + as_string = mets.to_xml().decode('utf-8') + self.assertIn('ocrd/core v%s' % VERSION, as_string) + self.assertIn('CREATEDATE="2018-', as_string) def test_file_groups(self): self.assertEqual(len(self.mets.file_groups), 17, '17 file groups')