From 0f2c2df613a1a4849b49d95f97fe0e9ee399a920 Mon Sep 17 00:00:00 2001 From: afinit Date: Sat, 8 Jul 2023 13:30:40 -0500 Subject: [PATCH 1/3] Use with to open file in save_results --- plantcv/plantcv/classes.py | 68 +++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/plantcv/plantcv/classes.py b/plantcv/plantcv/classes.py index 4bde9fd90..3fc5f2b66 100644 --- a/plantcv/plantcv/classes.py +++ b/plantcv/plantcv/classes.py @@ -150,40 +150,40 @@ def save_results(self, filename, outformat="json"): json.dump(hierarchical_data, f) elif outformat.upper() == "CSV": # Open output CSV file - csv_table = open(filename, "w") - # Write the header - csv_table.write(",".join(map(str, ["sample", "trait", "value", "label"])) + "\n") - # Iterate over data samples - for sample in self.observations: - # Iterate over traits for each sample - for var in self.observations[sample]: - val = self.observations[sample][var]["value"] - # If the data type is a list or tuple we need to unpack the data - if isinstance(val, (list, tuple)): - # Combine each value with its label - for value, label in zip(self.observations[sample][var]["value"], - self.observations[sample][var]["label"]): - # Skip list of tuple data types - if not isinstance(value, tuple): - # Save one row per value-label - row = [sample, var, value, label] - csv_table.write(",".join(map(str, row)) + "\n") - # If the data type is Boolean, store as a numeric 1/0 instead of True/False - elif isinstance(val, bool): - row = [sample, - var, - int(self.observations[sample][var]["value"]), - self.observations[sample][var]["label"]] - csv_table.write(",".join(map(str, row)) + "\n") - # For all other supported data types, save one row per trait - # Assumes no unusual data types are present (possibly a bad assumption) - else: - row = [sample, - var, - self.observations[sample][var]["value"], - self.observations[sample][var]["label"] - ] - csv_table.write(",".join(map(str, row)) + "\n") + with open(filename, "w") as csv_table: + # Write the header + csv_table.write(",".join(map(str, ["sample", "trait", "value", "label"])) + "\n") + # Iterate over data samples + for sample in self.observations: + # Iterate over traits for each sample + for var in self.observations[sample]: + val = self.observations[sample][var]["value"] + # If the data type is a list or tuple we need to unpack the data + if isinstance(val, (list, tuple)): + # Combine each value with its label + for value, label in zip(self.observations[sample][var]["value"], + self.observations[sample][var]["label"]): + # Skip list of tuple data types + if not isinstance(value, tuple): + # Save one row per value-label + row = [sample, var, value, label] + csv_table.write(",".join(map(str, row)) + "\n") + # If the data type is Boolean, store as a numeric 1/0 instead of True/False + elif isinstance(val, bool): + row = [sample, + var, + int(self.observations[sample][var]["value"]), + self.observations[sample][var]["label"]] + csv_table.write(",".join(map(str, row)) + "\n") + # For all other supported data types, save one row per trait + # Assumes no unusual data types are present (possibly a bad assumption) + else: + row = [sample, + var, + self.observations[sample][var]["value"], + self.observations[sample][var]["label"] + ] + csv_table.write(",".join(map(str, row)) + "\n") def plot_dists(self, variable): """Plot a distribution of data. From 4f9fe8cb5a25222677cabd043843061a3833f2bf Mon Sep 17 00:00:00 2001 From: afinit Date: Sat, 8 Jul 2023 13:36:18 -0500 Subject: [PATCH 2/3] add error handling for invalid outformat in outputs.save_results --- plantcv/plantcv/classes.py | 2 ++ tests/plantcv/test_outputs.py | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/plantcv/plantcv/classes.py b/plantcv/plantcv/classes.py index 3fc5f2b66..4dc64cb13 100644 --- a/plantcv/plantcv/classes.py +++ b/plantcv/plantcv/classes.py @@ -184,6 +184,8 @@ def save_results(self, filename, outformat="json"): self.observations[sample][var]["label"] ] csv_table.write(",".join(map(str, row)) + "\n") + else: + raise ValueError("outformat must be one of (case insensitive): ['json', 'csv']") def plot_dists(self, variable): """Plot a distribution of data. diff --git a/tests/plantcv/test_outputs.py b/tests/plantcv/test_outputs.py index 040ecb3cb..f720bdc39 100644 --- a/tests/plantcv/test_outputs.py +++ b/tests/plantcv/test_outputs.py @@ -81,6 +81,14 @@ def test_save_results_csv(test_data, tmpdir): assert results == test_results +def test_save_results_invalid_outformat(): + """Test for PlantCV.""" + # Create output instance + outputs = Outputs() + with pytest.raises(ValueError): + outputs.save_results(filename="filename does not matter for this test", outformat="INVALIDOUTFORMAT") + + def test_clear_outputs(): """Test for PlantCV.""" # Create output instance From 8f02f2a3d5398bd69e6397fd9b7443c306f7fd02 Mon Sep 17 00:00:00 2001 From: afinit Date: Sat, 8 Jul 2023 15:54:21 -0500 Subject: [PATCH 3/3] Add ability to append to existing files with save_result. make append the default --- docs/jupyter.md | 2 +- docs/outputs.md | 11 +++++--- docs/updating.md | 1 + plantcv/plantcv/classes.py | 45 ++++++++++++++++++------------- tests/plantcv/test_outputs.py | 50 +++++++++++++++++++++++++++++++---- 5 files changed, 81 insertions(+), 28 deletions(-) diff --git a/docs/jupyter.md b/docs/jupyter.md index d099ae57c..e8a0b45be 100644 --- a/docs/jupyter.md +++ b/docs/jupyter.md @@ -132,7 +132,7 @@ img, imgpath, imgname = pcv.readimage(filename=args.image) # Jupyter here # Print data that gets collected into the Outputs -pcv.outputs.save_results(filename=args.result, outformat="json") +pcv.outputs.save_results(filename=args.result, outformat="json", append=True) ``` diff --git a/docs/outputs.md b/docs/outputs.md index eeeccb04b..295927dca 100644 --- a/docs/outputs.md +++ b/docs/outputs.md @@ -68,12 +68,15 @@ Methods are accessed as plantcv.outputs.*method*. * label: The label for each value, which will be useful when the data is a frequency table (e.g. hues). -**save_results**(*filename, outformat="json"*): Save results to a file +**save_results**(*filename, outformat="json", append=True*): Save results to a file * filename: Path and name of the output file * outformat: Output file format (default = "json"). Supports "json" and "csv" formats +* append: When true, **save_results** will append to existing files. In a json file, this overwrites existing observations with the same label. +In a csv file, this duplicates the same label. When this is false, **save_results** will overwrite existing files. + **Example use:** - [Use In VIS/NIR Tutorial](tutorials/vis_nir_tutorial.md) @@ -91,7 +94,7 @@ shape_img = pcv.analyze.size(img=img, labeled_mask=mask, n_labels=1, label="defa plant_area = pcv.outputs.observations['default1']['pixel_area']['value'] # Write shape data to results file -pcv.outputs.save_results(filename=args.result, outformat="json") +pcv.outputs.save_results(filename=args.result, outformat="json", append=True) # Will will print out results again, so clear the outputs before running NIR analysis pcv.outputs.clear() @@ -102,7 +105,7 @@ nir_hist = pcv.analyze.grayscale(gray_img=nir2, labeled_mask=nir_combinedmask, n shape_img = pcv.analyze.size(img=nir2, labeled_mask=nir_combinedmask, n_labels=1, label="default") # Write the NIR and shape data to a file -pcv.outputs.save_results(filename=args.coresult, outformat="json") +pcv.outputs.save_results(filename=args.coresult, outformat="json", append=True) ``` @@ -125,7 +128,7 @@ pcv.outputs.add_observation(sample='default', variable='percent_diseased', value=percent_diseased, label='percent') # Write custom data to results file -pcv.outputs.save_results(filename=args.result, outformat="json") +pcv.outputs.save_results(filename=args.result, outformat="json", append=True) ``` diff --git a/docs/updating.md b/docs/updating.md index 960cc70cd..27e6609f7 100644 --- a/docs/updating.md +++ b/docs/updating.md @@ -684,6 +684,7 @@ pages for more details on the input and output variable types. * pre v3.12: NA * post v3.12: **plantcv.outputs.save_results**(*filename, outformat="json"*) +* post v4.0: **plantcv.outputs.save_results**(*filename, outformat="json", append=True*) #### plantcv.photosynthesis.analyze_fvfm diff --git a/plantcv/plantcv/classes.py b/plantcv/plantcv/classes.py index 4dc64cb13..87a18b5a3 100644 --- a/plantcv/plantcv/classes.py +++ b/plantcv/plantcv/classes.py @@ -128,7 +128,7 @@ def add_observation(self, sample, variable, trait, method, scale, datatype, valu } # Method to save observations to a file - def save_results(self, filename, outformat="json"): + def save_results(self, filename, outformat="json", append=True): """Save results to a file. Keyword arguments/parameters: @@ -142,17 +142,28 @@ def save_results(self, filename, outformat="json"): if os.path.isfile(filename): with open(filename, 'r') as f: hierarchical_data = json.load(f) - hierarchical_data["observations"] = self.observations + if append: + observations = hierarchical_data["observations"] | self.observations + else: + observations = self.observations + + hierarchical_data["observations"] = observations else: hierarchical_data = {"metadata": {}, "observations": self.observations} with open(filename, mode='w') as f: json.dump(hierarchical_data, f) elif outformat.upper() == "CSV": + if append and os.path.isfile(filename): + mode = "a" + else: + mode = "w" # Open output CSV file - with open(filename, "w") as csv_table: - # Write the header - csv_table.write(",".join(map(str, ["sample", "trait", "value", "label"])) + "\n") + with open(filename, mode) as csv_table: + # Write the header if not appending + if mode == "w": + csv_table.write(",".join(map(str, ["sample", "trait", "value", "label"])) + "\n") + # Iterate over data samples for sample in self.observations: # Iterate over traits for each sample @@ -168,21 +179,19 @@ def save_results(self, filename, outformat="json"): # Save one row per value-label row = [sample, var, value, label] csv_table.write(",".join(map(str, row)) + "\n") - # If the data type is Boolean, store as a numeric 1/0 instead of True/False - elif isinstance(val, bool): - row = [sample, - var, - int(self.observations[sample][var]["value"]), - self.observations[sample][var]["label"]] - csv_table.write(",".join(map(str, row)) + "\n") - # For all other supported data types, save one row per trait - # Assumes no unusual data types are present (possibly a bad assumption) else: + # If the data type is Boolean, store as a numeric 1/0 instead of True/False + if isinstance(val, bool): + outputValue = int(val) + # For all other supported data types, save one row per trait + # Assumes no unusual data types are present (possibly a bad assumption) + else: + outputValue = val + row = [sample, - var, - self.observations[sample][var]["value"], - self.observations[sample][var]["label"] - ] + var, + outputValue, + self.observations[sample][var]["label"]] csv_table.write(",".join(map(str, row)) + "\n") else: raise ValueError("outformat must be one of (case insensitive): ['json', 'csv']") diff --git a/tests/plantcv/test_outputs.py b/tests/plantcv/test_outputs.py index f720bdc39..1289e8106 100644 --- a/tests/plantcv/test_outputs.py +++ b/tests/plantcv/test_outputs.py @@ -35,11 +35,24 @@ def test_save_results_json_newfile(tmpdir): outputs = Outputs() outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none', datatype=str, value="test", label="none") - outputs.save_results(filename=outfile, outformat="json") + outputs.save_results(filename=outfile, outformat="json", append=False) with open(outfile, "r") as fp: results = json.load(fp) assert results["observations"]["default"]["test"]["value"] == "test" +def test_save_results_json_newfile_append(tmpdir): + """Test for PlantCV.""" + # Create a test tmp directory + cache_dir = tmpdir.mkdir("cache") + outfile = os.path.join(cache_dir, "results.json") + # Create output instance + outputs = Outputs() + outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none', + datatype=str, value="test", label="none") + outputs.save_results(filename=outfile, outformat="json", append=True) + with open(outfile, "r") as fp: + results = json.load(fp) + assert results["observations"]["default"]["test"]["value"] == "test" def test_save_results_json_existing_file(test_data, tmpdir): """Test for PlantCV.""" @@ -51,11 +64,34 @@ def test_save_results_json_existing_file(test_data, tmpdir): outputs = Outputs() outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none', datatype=str, value="test", label="none") - outputs.save_results(filename=outfile, outformat="json") + outputs.save_results(filename=outfile, outformat="json", append=False) with open(outfile, "r") as fp: results = json.load(fp) assert results["observations"]["default"]["test"]["value"] == "test" +def test_save_results_json_existing_file_append(test_data, tmpdir): + """Test for PlantCV.""" + # Create a test tmp directory + cache_dir = tmpdir.mkdir("cache") + outfile = os.path.join(cache_dir, os.path.basename(test_data.outputs_results_json)) + copyfile(test_data.outputs_results_json, outfile) + # Create output instance + outputs = Outputs() + outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none', + datatype=str, value="test", label="none") + outputs.save_results(filename=outfile, outformat="json", append=True) + + outputs2 = Outputs() + outputs2.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none', + datatype=str, value="this data overwrites the original value for this", label="none") + outputs2.add_observation(sample='newdatas', variable='test', trait='test variable', method='test', scale='none', + datatype=str, value="some data here", label="none") + outputs2.save_results(filename=outfile, outformat="json", append=True) + + with open(outfile, "r") as fp: + results = json.load(fp) + assert results["observations"]["default"]["test"]["value"] == "this data overwrites the original value for this" + assert results["observations"]["newdatas"]["test"]["value"] == "some data here" def test_save_results_csv(test_data, tmpdir): """Test for PlantCV.""" @@ -69,11 +105,15 @@ def test_save_results_csv(test_data, tmpdir): scale='none', datatype=bool, value=True, label="none") outputs.add_observation(sample='default', variable='list', trait='list variable', method='list', scale='none', datatype=list, value=[1, 2, 3], label=[1, 2, 3]) - outputs.add_observation(sample='default', variable='tuple', trait='tuple variable', method='tuple', + outputs.save_results(filename=outfile, outformat="csv", append=True) + + outputs2 = Outputs() + outputs2.add_observation(sample='default', variable='tuple', trait='tuple variable', method='tuple', scale='none', datatype=tuple, value=(1, 2), label=(1, 2)) - outputs.add_observation(sample='default', variable='tuple_list', trait='list of tuples variable', + outputs2.add_observation(sample='default', variable='tuple_list', trait='list of tuples variable', method='tuple_list', scale='none', datatype=list, value=[(1, 2), (3, 4)], label=[1, 2]) - outputs.save_results(filename=outfile, outformat="csv") + outputs2.save_results(filename=outfile, outformat="csv", append=True) + with open(outfile, "r") as fp: results = fp.read() with open(test_data.outputs_results_csv, "r") as fp: