From ffbb9cd2c1e95627e3f64f886d140510446b3775 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 7 Nov 2023 18:12:59 +0100 Subject: [PATCH 1/6] Fix iterjobs() without database --- pyiron_base/project/generic.py | 43 +++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 8 deletions(-) diff --git a/pyiron_base/project/generic.py b/pyiron_base/project/generic.py index 276d32989..c43efb4b4 100644 --- a/pyiron_base/project/generic.py +++ b/pyiron_base/project/generic.py @@ -581,14 +581,41 @@ def iter_jobs( case, you may seriously wish to consider setting `convert_to_object=False` and access only the HDF5/JobCore representation of the jobs instead. """ - job_id_lst = self.job_table(recursive=recursive, **kwargs)["id"] - if progress: - job_id_lst = tqdm(job_id_lst) - for job_id in job_id_lst: - if path is not None: - yield self.inspect(job_id)[path] - else: # Backwards compatibility - in future the option convert_to_object should be removed - yield self.load(job_id, convert_to_object=convert_to_object) + if not isinstance(self.db, FileTable): + job_id_lst = self.job_table(recursive=recursive, **kwargs)["id"] + if progress: + job_id_lst = tqdm(job_id_lst) + for job_id in job_id_lst: + if path is not None: + yield self.inspect(job_id)[path] + else: # Backwards compatibility - in future the option convert_to_object should be removed + yield self.load(job_id, convert_to_object=convert_to_object) + else: + db_entry_lst = [row[1].to_dict() for row in self.job_table().iterrows()] + table_columns = [ + "job", + "subjob", + "projectpath", + "project", + "status", + "hamilton", + "hamversion", + ] + if progress: + db_entry_lst = tqdm(db_entry_lst) + for db_entry in db_entry_lst: + if path is not None: + yield self.load_from_jobpath( + job_id=None, + db_entry={column: db_entry[column] for column in table_columns}, + convert_to_object=False, + )[path] + else: + yield self.load_from_jobpath( + job_id=None, + db_entry={column: db_entry[column] for column in table_columns}, + convert_to_object=True, + ) def iter_output(self, recursive=True): """ From a449c018b7dda24669b4b84b83ef084ed89c499b Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 8 Nov 2023 08:37:27 +0100 Subject: [PATCH 2/6] refactor --- pyiron_base/project/generic.py | 50 ++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/pyiron_base/project/generic.py b/pyiron_base/project/generic.py index c43efb4b4..aa63a449b 100644 --- a/pyiron_base/project/generic.py +++ b/pyiron_base/project/generic.py @@ -582,16 +582,11 @@ def iter_jobs( representation of the jobs instead. """ if not isinstance(self.db, FileTable): - job_id_lst = self.job_table(recursive=recursive, **kwargs)["id"] - if progress: - job_id_lst = tqdm(job_id_lst) - for job_id in job_id_lst: - if path is not None: - yield self.inspect(job_id)[path] - else: # Backwards compatibility - in future the option convert_to_object should be removed - yield self.load(job_id, convert_to_object=convert_to_object) + job_lst = [ + [job_id, None] + for job_id in self.job_table(recursive=recursive, **kwargs)["id"] + ] else: - db_entry_lst = [row[1].to_dict() for row in self.job_table().iterrows()] table_columns = [ "job", "subjob", @@ -601,21 +596,28 @@ def iter_jobs( "hamilton", "hamversion", ] - if progress: - db_entry_lst = tqdm(db_entry_lst) - for db_entry in db_entry_lst: - if path is not None: - yield self.load_from_jobpath( - job_id=None, - db_entry={column: db_entry[column] for column in table_columns}, - convert_to_object=False, - )[path] - else: - yield self.load_from_jobpath( - job_id=None, - db_entry={column: db_entry[column] for column in table_columns}, - convert_to_object=True, - ) + job_lst = [ + [None, {column: db_entry[column] for column in table_columns}] + for db_entry in [ + row[1].to_dict() for row in self.job_table().iterrows() + ] + ] + + if progress: + job_lst = tqdm(job_lst) + for job_id, db_entry in job_lst: + if path is not None: + yield self.load_from_jobpath( + job_id=job_id, + db_entry=db_entry, + convert_to_object=False, + )[path] + else: # Backwards compatibility - in future the option convert_to_object should be removed + yield self.load_from_jobpath( + job_id=job_id, + db_entry=db_entry, + convert_to_object=True, + ) def iter_output(self, recursive=True): """ From 1620bf8f574076a41d3a2e07f34b506063d1e409 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 8 Nov 2023 08:40:36 +0100 Subject: [PATCH 3/6] call job_table() only once --- pyiron_base/project/generic.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pyiron_base/project/generic.py b/pyiron_base/project/generic.py index aa63a449b..155431044 100644 --- a/pyiron_base/project/generic.py +++ b/pyiron_base/project/generic.py @@ -581,11 +581,9 @@ def iter_jobs( case, you may seriously wish to consider setting `convert_to_object=False` and access only the HDF5/JobCore representation of the jobs instead. """ + job_table = self.job_table(recursive=recursive, **kwargs) if not isinstance(self.db, FileTable): - job_lst = [ - [job_id, None] - for job_id in self.job_table(recursive=recursive, **kwargs)["id"] - ] + job_lst = [[job_id, None] for job_id in job_table["id"]] else: table_columns = [ "job", @@ -598,9 +596,7 @@ def iter_jobs( ] job_lst = [ [None, {column: db_entry[column] for column in table_columns}] - for db_entry in [ - row[1].to_dict() for row in self.job_table().iterrows() - ] + for db_entry in [row[1].to_dict() for row in job_table.iterrows()] ] if progress: From fe830f8284d95f3e85e2c1c4d6efcff0f27b8e5f Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 21 Nov 2023 18:32:05 +0100 Subject: [PATCH 4/6] Add comment about database columns --- pyiron_base/project/generic.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyiron_base/project/generic.py b/pyiron_base/project/generic.py index 155431044..b154c81d1 100644 --- a/pyiron_base/project/generic.py +++ b/pyiron_base/project/generic.py @@ -585,6 +585,10 @@ def iter_jobs( if not isinstance(self.db, FileTable): job_lst = [[job_id, None] for job_id in job_table["id"]] else: + # From all the possible database columns, the following ones are removed: + # ["id", "chemicalformula", "timestart", "computer", "parentid", + # "username", "timestop", "totalcputime", "masterid"] + # because those are not used when running without database and can lead errors. table_columns = [ "job", "subjob", From 6967a040d86a56a10fed0fcf5d179ee2b22e158f Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Wed, 22 Nov 2023 07:03:42 +0100 Subject: [PATCH 5/6] Update pyiron_base/project/generic.py --- pyiron_base/project/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/project/generic.py b/pyiron_base/project/generic.py index b154c81d1..af14c40db 100644 --- a/pyiron_base/project/generic.py +++ b/pyiron_base/project/generic.py @@ -616,7 +616,7 @@ def iter_jobs( yield self.load_from_jobpath( job_id=job_id, db_entry=db_entry, - convert_to_object=True, + convert_to_object=convert_to_object, ) def iter_output(self, recursive=True): From d0b4fe7c9e2b8838bf6f829e708911f3b633a9e1 Mon Sep 17 00:00:00 2001 From: liamhuber Date: Tue, 21 Nov 2023 11:02:11 -0800 Subject: [PATCH 6/6] Add the test from the issue --- tests/project/test_project.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/project/test_project.py b/tests/project/test_project.py index 3402fe18c..399d57772 100644 --- a/tests/project/test_project.py +++ b/tests/project/test_project.py @@ -207,6 +207,33 @@ def test_get_iter_jobs(self): self.assertIsInstance([val for val in self.project.iter_jobs(recursive=True, status="suspended", convert_to_object=True)][0], ToyJob) + def test_iter_jobs_without_database(self): + pr = Project('test_iter_jobs_without_database') + database_disabled = pr.state.settings.configuration["disable_database"] + pr.state.update({"disable_database": True}) + + pr_2 = pr.open("sub_1") + job_1 = pr_2.create_job(ToyJob, "toy1") + job_1.run() + + pr_3 = pr.open("sub_2") + job_1 = pr_3.create_job(ToyJob, "toy2") + job_1.run() + + try: + job_names = [] + for job in pr.iter_jobs(status="finished"): + job_names.append(job.job_name) + self.assertListEqual( + ["toy1", "toy2"], + job_names, + msg="Expected to iterate among nested projects even without database" + ) + finally: + pr.remove_jobs(recursive=True, silently=True) + pr.remove(enable=True) + pr.state.update({"disable_database": database_disabled}) + def test_maintenance_get_repository_status(self): df = self.project.maintenance.get_repository_status() self.assertIn('pyiron_base', df.Module.values)