From 381f8587c21bddba6600be0b47133a4ed806458a Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Fri, 21 Oct 2022 12:55:54 +0200 Subject: [PATCH 1/7] Pass the force parameter when merging --- ocrd_models/ocrd_models/ocrd_mets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ocrd_models/ocrd_models/ocrd_mets.py b/ocrd_models/ocrd_models/ocrd_mets.py index 0e4a3e2dd..cd9c467af 100644 --- a/ocrd_models/ocrd_models/ocrd_mets.py +++ b/ocrd_models/ocrd_models/ocrd_mets.py @@ -509,7 +509,8 @@ def merge(self, other_mets, fileGrp_mapping=None, fileId_mapping=None, pageId_ma mimetype=f_src.mimetype, url=f_src.url, ID=fileId_mapping.get(f_src.ID, f_src.ID), - pageId=pageId_mapping.get(f_src.pageId, f_src.pageId)) + pageId=pageId_mapping.get(f_src.pageId, f_src.pageId), + force=True) # FIXME: merge metsHdr, amdSec, dmdSec as well # FIXME: merge structMap logical and structLink as well if after_add_cb: From ef5ba4e8df159e702fda69cd7be878af823b240d Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Fri, 21 Oct 2022 14:04:10 +0200 Subject: [PATCH 2/7] Accept overwrite parameter instead of passing just True --- ocrd/ocrd/cli/workspace.py | 2 +- ocrd_models/ocrd_models/ocrd_mets.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ocrd/ocrd/cli/workspace.py b/ocrd/ocrd/cli/workspace.py index d956d5845..51afda425 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/ocrd/ocrd/cli/workspace.py @@ -615,7 +615,7 @@ def merge(ctx, overwrite, copy_files, filegrp_mapping, fileid_mapping, pageid_ma file_grp=file_grp, file_id=file_id, page_id=page_id, - mimetype=mimetype, + mimetype=mimetype ) workspace.save_mets() diff --git a/ocrd_models/ocrd_models/ocrd_mets.py b/ocrd_models/ocrd_models/ocrd_mets.py index cd9c467af..f7410c1be 100644 --- a/ocrd_models/ocrd_models/ocrd_mets.py +++ b/ocrd_models/ocrd_models/ocrd_mets.py @@ -485,7 +485,7 @@ def remove_physical_page_fptr(self, fileId): mets_div.remove(mets_fptr) return ret - def merge(self, other_mets, fileGrp_mapping=None, fileId_mapping=None, pageId_mapping=None, after_add_cb=None, **kwargs): + def merge(self, other_mets, overwrite=True, fileGrp_mapping=None, fileId_mapping=None, pageId_mapping=None, after_add_cb=None, **kwargs): """ Add all files from other_mets. @@ -510,7 +510,7 @@ def merge(self, other_mets, fileGrp_mapping=None, fileId_mapping=None, pageId_ma url=f_src.url, ID=fileId_mapping.get(f_src.ID, f_src.ID), pageId=pageId_mapping.get(f_src.pageId, f_src.pageId), - force=True) + force=overwrite) # FIXME: merge metsHdr, amdSec, dmdSec as well # FIXME: merge structMap logical and structLink as well if after_add_cb: From 6c934d91023d3aa4ae158719b51622bd043fe446 Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Fri, 21 Oct 2022 16:11:03 +0200 Subject: [PATCH 3/7] Keep consistency --- ocrd/ocrd/cli/workspace.py | 2 +- ocrd_models/ocrd_models/ocrd_mets.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ocrd/ocrd/cli/workspace.py b/ocrd/ocrd/cli/workspace.py index 51afda425..fe95bcaae 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/ocrd/ocrd/cli/workspace.py @@ -607,7 +607,7 @@ def merge(ctx, overwrite, copy_files, filegrp_mapping, fileid_mapping, pageid_ma other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name)) workspace.merge( other_workspace, - overwrite=overwrite, + overwrite=True, # This does not work as well! copy_files=copy_files, fileGrp_mapping=filegrp_mapping, fileId_mapping=fileid_mapping, diff --git a/ocrd_models/ocrd_models/ocrd_mets.py b/ocrd_models/ocrd_models/ocrd_mets.py index f7410c1be..d35bc1bf7 100644 --- a/ocrd_models/ocrd_models/ocrd_mets.py +++ b/ocrd_models/ocrd_models/ocrd_mets.py @@ -485,7 +485,7 @@ def remove_physical_page_fptr(self, fileId): mets_div.remove(mets_fptr) return ret - def merge(self, other_mets, overwrite=True, fileGrp_mapping=None, fileId_mapping=None, pageId_mapping=None, after_add_cb=None, **kwargs): + def merge(self, other_mets, force=False, fileGrp_mapping=None, fileId_mapping=None, pageId_mapping=None, after_add_cb=None, **kwargs): """ Add all files from other_mets. @@ -510,7 +510,7 @@ def merge(self, other_mets, overwrite=True, fileGrp_mapping=None, fileId_mapping url=f_src.url, ID=fileId_mapping.get(f_src.ID, f_src.ID), pageId=pageId_mapping.get(f_src.pageId, f_src.pageId), - force=overwrite) + force=force) # FIXME: merge metsHdr, amdSec, dmdSec as well # FIXME: merge structMap logical and structLink as well if after_add_cb: From 59221079e2a5bf77efe715943b9dbc1ca255c45a Mon Sep 17 00:00:00 2001 From: mehmedGIT Date: Fri, 21 Oct 2022 16:15:43 +0200 Subject: [PATCH 4/7] Use the cli overwrite --- ocrd/ocrd/cli/workspace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd/ocrd/cli/workspace.py b/ocrd/ocrd/cli/workspace.py index fe95bcaae..b4859c5d5 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/ocrd/ocrd/cli/workspace.py @@ -607,7 +607,7 @@ def merge(ctx, overwrite, copy_files, filegrp_mapping, fileid_mapping, pageid_ma other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name)) workspace.merge( other_workspace, - overwrite=True, # This does not work as well! + force=overwrite, copy_files=copy_files, fileGrp_mapping=filegrp_mapping, fileId_mapping=fileid_mapping, From 56f711e0aef7d7eeb8aaca43a4baefb97de5c7c5 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 21 Oct 2022 16:28:02 +0200 Subject: [PATCH 5/7] ocrd workspace merge: support both --force and --overwrite, test --- ocrd/ocrd/cli/workspace.py | 8 +++++--- tests/test_workspace.py | 23 +++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/ocrd/ocrd/cli/workspace.py b/ocrd/ocrd/cli/workspace.py index b4859c5d5..871d1e26b 100644 --- a/ocrd/ocrd/cli/workspace.py +++ b/ocrd/ocrd/cli/workspace.py @@ -582,14 +582,15 @@ def _handle_json_option(ctx, param, value): @workspace_cli.command('merge') @click.argument('METS_PATH') -@click.option('--overwrite/--no-overwrite', is_flag=True, default=False, help="Overwrite in case of file name conflicts with data from METS_PATH") +@click.option('--overwrite/--no-overwrite', is_flag=True, default=False, help="Overwrite on-disk file in case of file name conflicts with data from METS_PATH") +@click.option('--force/--no-force', is_flag=True, default=False, help="Overwrite mets:file from --mets with mets:file from METS_PATH if IDs clash") @click.option('--copy-files/--no-copy-files', is_flag=True, help="Copy files as well", default=True, show_default=True) @click.option('--fileGrp-mapping', help="JSON object mapping src to dest fileGrp", callback=_handle_json_option) @click.option('--fileId-mapping', help="JSON object mapping src to dest file ID", callback=_handle_json_option) @click.option('--pageId-mapping', help="JSON object mapping src to dest page ID", callback=_handle_json_option) @mets_find_options @pass_workspace -def merge(ctx, overwrite, copy_files, filegrp_mapping, fileid_mapping, pageid_mapping, file_grp, file_id, page_id, mimetype, mets_path): # pylint: disable=redefined-builtin +def merge(ctx, overwrite, force, copy_files, filegrp_mapping, fileid_mapping, pageid_mapping, file_grp, file_id, page_id, mimetype, mets_path): # pylint: disable=redefined-builtin """ Merges this workspace with the workspace that contains ``METS_PATH`` @@ -607,7 +608,8 @@ def merge(ctx, overwrite, copy_files, filegrp_mapping, fileid_mapping, pageid_ma other_workspace = Workspace(ctx.resolver, directory=str(mets_path.parent), mets_basename=str(mets_path.name)) workspace.merge( other_workspace, - force=overwrite, + force=force, + overwrite=overwrite, copy_files=copy_files, fileGrp_mapping=filegrp_mapping, fileId_mapping=fileid_mapping, diff --git a/tests/test_workspace.py b/tests/test_workspace.py index 6b049f0c2..755f5ff6f 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -679,6 +679,29 @@ def test_merge_with_filter(plain_workspace, tmp_path): assert f.pageId in [page_id1, page_id2] assert f.ID in [file_id1, file_id2] +def test_merge_force(plain_workspace, tmp_path): + resolver = Resolver() + + # target ws + page_id1, file_id1, file_grp1 = 'page1', 'ID1', 'GRP1' + plain_workspace.add_file(file_grp1, file_id=file_id1, mimetype='image/tiff', page_id=page_id1) + + # source ws + dst_path2 = tmp_path / 'foo' + ws2 = resolver.workspace_from_nothing(directory=dst_path2) + page_id2, file_id2, file_grp2 = 'page2', 'ID1', 'GRP2' + ws2.add_file(file_grp2, file_id=file_id2, mimetype='image/tiff', page_id=page_id2, url='bar') + + # fails because force is false + with pytest.raises(Exception) as fn_exc: + plain_workspace.merge(ws2, force=False) + + # works because force overrides ID clash + plain_workspace.merge(ws2, force=True) + + files = list(plain_workspace.find_files()) + assert len(files) == 1 + if __name__ == '__main__': main(__file__) From db049227a12bf959c41989eda3d937094ab9c9ef Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 21 Oct 2022 17:35:01 +0200 Subject: [PATCH 6/7] fix kwarg conflict inm merge --- ocrd_models/ocrd_models/ocrd_mets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/ocrd_models/ocrd_models/ocrd_mets.py b/ocrd_models/ocrd_models/ocrd_mets.py index 50b3136b9..4a6a80e81 100644 --- a/ocrd_models/ocrd_models/ocrd_mets.py +++ b/ocrd_models/ocrd_models/ocrd_mets.py @@ -509,7 +509,6 @@ def merge(self, other_mets, force=False, fileGrp_mapping=None, fileId_mapping=No fileGrp_mapping.get(f_src.fileGrp, f_src.fileGrp), mimetype=f_src.mimetype, url=f_src.url, - force=force, ID=fileId_mapping.get(f_src.ID, f_src.ID), pageId=pageId_mapping.get(f_src.pageId, f_src.pageId), force=force) From bc9d150e3a0ddb43e29d8a13dafe8a602cff592c Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 21 Oct 2022 17:49:19 +0200 Subject: [PATCH 7/7] test_merge_force: adapt to stricter ID clash check in add_file after #861 --- tests/test_workspace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_workspace.py b/tests/test_workspace.py index 43fec3547..7b19b7d34 100644 --- a/tests/test_workspace.py +++ b/tests/test_workspace.py @@ -716,7 +716,7 @@ def test_merge_force(plain_workspace, tmp_path): # source ws dst_path2 = tmp_path / 'foo' ws2 = resolver.workspace_from_nothing(directory=dst_path2) - page_id2, file_id2, file_grp2 = 'page2', 'ID1', 'GRP2' + page_id2, file_id2, file_grp2 = 'page1', 'ID1', 'GRP1' ws2.add_file(file_grp2, file_id=file_id2, mimetype='image/tiff', page_id=page_id2, url='bar') # fails because force is false