From 699c1abc8728a4d145445aaaca215010866d1b23 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 29 Aug 2024 17:02:00 +0100 Subject: [PATCH 01/33] Used the same kwargs method as in the CNV function --- malariagen_data/anoph/g123.py | 18 ++++++++++++++---- malariagen_data/anoph/h12.py | 18 ++++++++++++++---- malariagen_data/anoph/h1x.py | 18 ++++++++++++++---- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/malariagen_data/anoph/g123.py b/malariagen_data/anoph/g123.py index 0e6da1ea2..639058021 100644 --- a/malariagen_data/anoph/g123.py +++ b/malariagen_data/anoph/g123.py @@ -319,6 +319,7 @@ def plot_g123_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, + circle_kwargs: Optional[gplt_params.circle_kwargs] = None, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -375,15 +376,22 @@ def plot_g123_gwss_track( output_backend=output_backend, ) + circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} + circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) + circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) + circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( + "line_color", "black" + ) + circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( + "fill_color", None + ) + # plot G123 fig.scatter( x=x, y=g123, - size=3, marker="circle", - line_width=1, - line_color="black", - fill_color=None, + **circle_kwargs_mutable, ) # tidy up the plot @@ -420,6 +428,7 @@ def plot_g123_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 170, + circle_kwargs: Optional[gplt_params.circle_kwargs] = None, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -441,6 +450,7 @@ def plot_g123_gwss( sizing_mode=sizing_mode, width=width, height=track_height, + circle_kwargs=circle_kwargs, show=False, output_backend=output_backend, inline_array=inline_array, diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index e01510b8b..91f5b7484 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -325,6 +325,7 @@ def plot_h12_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, + circle_kwargs: Optional[gplt_params.circle_kwargs] = None, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -377,15 +378,22 @@ def plot_h12_gwss_track( output_backend=output_backend, ) + circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} + circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) + circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) + circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( + "line_color", "black" + ) + circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( + "fill_color", None + ) + # Plot H12. fig.scatter( x=x, y=h12, marker="circle", - size=3, - line_width=1, - line_color="black", - fill_color=None, + **circle_kwargs_mutable, ) # Tidy up the plot. @@ -422,6 +430,7 @@ def plot_h12_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 170, + circle_kwargs: Optional[gplt_params.circle_kwargs] = None, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -441,6 +450,7 @@ def plot_h12_gwss( sizing_mode=sizing_mode, width=width, height=track_height, + circle_kwargs=circle_kwargs, show=False, output_backend=output_backend, ) diff --git a/malariagen_data/anoph/h1x.py b/malariagen_data/anoph/h1x.py index 732a07636..22ba797ea 100644 --- a/malariagen_data/anoph/h1x.py +++ b/malariagen_data/anoph/h1x.py @@ -166,6 +166,7 @@ def plot_h1x_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, + circle_kwargs: Optional[gplt_params.circle_kwargs] = None, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -219,15 +220,22 @@ def plot_h1x_gwss_track( output_backend=output_backend, ) + circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} + circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) + circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) + circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( + "line_color", "black" + ) + circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( + "fill_color", None + ) + # Plot H1X. fig.scatter( x=x, y=h1x, marker="circle", - size=3, - line_width=1, - line_color="black", - fill_color=None, + **circle_kwargs_mutable, ) # Tidy up the plot. @@ -268,6 +276,7 @@ def plot_h1x_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 190, + circle_kwargs: Optional[gplt_params.circle_kwargs] = None, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -288,6 +297,7 @@ def plot_h1x_gwss( sizing_mode=sizing_mode, width=width, height=track_height, + circle_kwargs=circle_kwargs, show=False, output_backend=output_backend, ) From e7e6b71ab1bba414bfef5255ddcef62a61ad36a9 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:19:18 +0100 Subject: [PATCH 02/33] Color depending on contig. Still imperfect. --- malariagen_data/anoph/h12.py | 113 +++++++++++++++++++++++++++++++++-- 1 file changed, 107 insertions(+), 6 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 91f5b7484..1a0694d6a 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -208,7 +208,7 @@ def plot_h12_calibration( else: return fig - def _h12_gwss( + def _h12_gwss_contig( self, contig, analysis, @@ -242,11 +242,97 @@ def _h12_gwss( # Compute window midpoints. pos = ds_haps["variant_position"].values x = allel.moving_statistic(pos, statistic=np.mean, size=window_size) + contigs = allel.moving_statistic( + ds_haps["variant_contig"].values, statistic=np.median, size=window_size + ) - results = dict(x=x, h12=h12) + results = dict(x=x, h12=h12, contigs=contigs) return results + def _h12_gwss( + self, + contig, + analysis, + window_size, + sample_sets, + sample_query, + cohort_size, + min_cohort_size, + max_cohort_size, + random_seed, + ): + results_tmp = self._h12_gwss_contig( + contig=contig, + analysis=analysis, + window_size=window_size, + sample_query=sample_query, + sample_sets=sample_sets, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + random_seed=random_seed, + ) + results = dict(x=results_tmp["x"], h12=results_tmp["h12"]) + + return results + + @check_types + @doc( + summary="Run h12 genome-wide selection scan.", + returns=dict( + x="An array containing the window centre point genomic positions.", + h12="An array with h12 statistic values for each window.", + ), + ) + def h12_gwss_contig( + self, + contig: base_params.contig, + window_size: h12_params.window_size, + analysis: hap_params.analysis = base_params.DEFAULT, + sample_query: Optional[base_params.sample_query] = None, + sample_sets: Optional[base_params.sample_sets] = None, + cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default, + min_cohort_size: Optional[ + base_params.min_cohort_size + ] = h12_params.min_cohort_size_default, + max_cohort_size: Optional[ + base_params.max_cohort_size + ] = h12_params.max_cohort_size_default, + random_seed: base_params.random_seed = 42, + ) -> Tuple[np.ndarray, np.ndarray]: + # Change this name if you ever change the behaviour of this function, to + # invalidate any previously cached data. + name = "h12_gwss_v1" + + params = dict( + contig=contig, + analysis=self._prep_phasing_analysis_param(analysis=analysis), + window_size=window_size, + sample_sets=self._prep_sample_sets_param(sample_sets=sample_sets), + # N.B., do not be tempted to convert this sample query into integer + # indices using _prep_sample_selection_params, because the indices + # are different in the haplotype data. + sample_query=sample_query, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + random_seed=random_seed, + ) + + # try: + # results = self.results_cache_get(name=name, params=params) + + # except CacheMiss: + results = self._h12_gwss_contig(**params) + self.results_cache_set(name=name, params=params, results=results) + + x = results["x"] + h12 = results["h12"] + contigs = results["contigs"] + + return x, h12, contigs + @check_types @doc( summary="Run h12 genome-wide selection scan.", @@ -331,7 +417,7 @@ def plot_h12_gwss_track( output_backend: gplt_params.output_backend = gplt_params.output_backend_default, ) -> gplt_params.figure: # Compute H12. - x, h12 = self.h12_gwss( + x, h12, contigs = self.h12_gwss_contig( contig=contig, analysis=analysis, window_size=window_size, @@ -343,6 +429,9 @@ def plot_h12_gwss_track( random_seed=random_seed, ) + # Hard coded contig colors: not good + color_dict = {0: "red", 1: "blue", 2: "orange", 3: "green", 4: "purple"} + # Determine X axis range. x_min = x[0] x_max = x[-1] @@ -381,9 +470,9 @@ def plot_h12_gwss_track( circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) - circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( - "line_color", "black" - ) + # circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( + # "line_color", "black" + # ) circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( "fill_color", None ) @@ -396,6 +485,18 @@ def plot_h12_gwss_track( **circle_kwargs_mutable, ) + # Plot H12. + for s in set(contigs): + color = color_dict[s] + idxs = contigs == s + fig.scatter( + x=x[idxs], + y=h12[idxs], + marker="circle", + line_color=color, + **circle_kwargs_mutable, + ) + # Tidy up the plot. fig.yaxis.axis_label = "H12" fig.yaxis.ticker = [0, 1] From 787045a2bbfdfe7b6f29f5b71a6105fb170d2902 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:23:12 +0100 Subject: [PATCH 03/33] Corrected wrong type. --- malariagen_data/anoph/h12.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 1a0694d6a..47379730e 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -300,7 +300,7 @@ def h12_gwss_contig( base_params.max_cohort_size ] = h12_params.max_cohort_size_default, random_seed: base_params.random_seed = 42, - ) -> Tuple[np.ndarray, np.ndarray]: + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: # Change this name if you ever change the behaviour of this function, to # invalidate any previously cached data. name = "h12_gwss_v1" From 2ae87be0fa0db12927b1af3d32e51ceb8c441a23 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:27:10 +0100 Subject: [PATCH 04/33] Corrected the doc. --- malariagen_data/anoph/h12.py | 1 + 1 file changed, 1 insertion(+) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 47379730e..f2e22ec49 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -283,6 +283,7 @@ def _h12_gwss( returns=dict( x="An array containing the window centre point genomic positions.", h12="An array with h12 statistic values for each window.", + contigs="An array with the contig for each window. The median is chosen for windows overlapping a change of contig.", ), ) def h12_gwss_contig( From c3834bd2f861eda2198031ed0ff38a7fea5ca6c2 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:40:06 +0100 Subject: [PATCH 05/33] Something weird changed the test while they were being performed. --- malariagen_data/anoph/h12.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 3c85d11c4..ab93d6ced 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -277,6 +277,8 @@ def _h12_gwss( min_cohort_size, max_cohort_size, random_seed, + chunks, + inline_array, ): results_tmp = self._h12_gwss_contig( contig=contig, @@ -288,6 +290,8 @@ def _h12_gwss( min_cohort_size=min_cohort_size, max_cohort_size=max_cohort_size, random_seed=random_seed, + chunks=chunks, + inline_array=inline_array, ) results = dict(x=results_tmp["x"], h12=results_tmp["h12"]) From df0a0c5a7aad14e63e38634118cc3c57510f8da5 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:52:43 +0100 Subject: [PATCH 06/33] Still a lot of weird stuff. --- malariagen_data/anoph/h12.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index ab93d6ced..f5f21a971 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -321,6 +321,8 @@ def h12_gwss_contig( base_params.max_cohort_size ] = h12_params.max_cohort_size_default, random_seed: base_params.random_seed = 42, + chunks: base_params.chunks = base_params.native_chunks, + inline_array: base_params.inline_array = base_params.inline_array_default, ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: # Change this name if you ever change the behaviour of this function, to # invalidate any previously cached data. @@ -339,14 +341,18 @@ def h12_gwss_contig( min_cohort_size=min_cohort_size, max_cohort_size=max_cohort_size, random_seed=random_seed, + chunks=chunks, + inline_array=inline_array, ) - # try: - # results = self.results_cache_get(name=name, params=params) + try: + results = self.results_cache_get(name=name, params=params) - # except CacheMiss: - results = self._h12_gwss_contig(**params) - self.results_cache_set(name=name, params=params, results=results) + except CacheMiss: + results = self._h12_gwss_contig( + chunks=chunks, inline_array=inline_array, **params + ) + self.results_cache_set(name=name, params=params, results=results) x = results["x"] h12 = results["h12"] From 52acfef6aeb367b38aaa5effde328e44dcfa392c Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:04:44 +0100 Subject: [PATCH 07/33] Still more weird stuff. --- malariagen_data/anoph/h12.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index f5f21a971..c02e89598 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -349,9 +349,7 @@ def h12_gwss_contig( results = self.results_cache_get(name=name, params=params) except CacheMiss: - results = self._h12_gwss_contig( - chunks=chunks, inline_array=inline_array, **params - ) + results = self._h12_gwss_contig(**params) self.results_cache_set(name=name, params=params, results=results) x = results["x"] From 212b164aad96a35dd39c91c9d1dbfc442a8fd2b2 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 30 Sep 2024 17:07:18 +0100 Subject: [PATCH 08/33] Even more weird stuff. --- malariagen_data/anoph/h12.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index c02e89598..9c9330c9e 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -341,15 +341,15 @@ def h12_gwss_contig( min_cohort_size=min_cohort_size, max_cohort_size=max_cohort_size, random_seed=random_seed, - chunks=chunks, - inline_array=inline_array, ) try: results = self.results_cache_get(name=name, params=params) except CacheMiss: - results = self._h12_gwss_contig(**params) + results = self._h12_gwss_contig( + chunks=chunks, inline_array=inline_array, **params + ) self.results_cache_set(name=name, params=params, results=results) x = results["x"] From bda3efee22cb828137767e4fc4cfa07ff311047f Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 30 Sep 2024 18:02:35 +0100 Subject: [PATCH 09/33] The solution was obvious. --- malariagen_data/anoph/h12.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 9c9330c9e..19d44a442 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -326,7 +326,7 @@ def h12_gwss_contig( ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: # Change this name if you ever change the behaviour of this function, to # invalidate any previously cached data. - name = "h12_gwss_v1" + name = "h12_gwss_contig_v1" params = dict( contig=contig, From c63b572c85c2d9170ef72d4ecad032a21db9f9e8 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Tue, 1 Oct 2024 12:11:15 +0100 Subject: [PATCH 10/33] Time for a detour. --- malariagen_data/anoph/gplt_params.py | 10 +++++++++- malariagen_data/anoph/h12.py | 27 +++++++-------------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py index 6612e981d..c0f521ecc 100644 --- a/malariagen_data/anoph/gplt_params.py +++ b/malariagen_data/anoph/gplt_params.py @@ -1,7 +1,7 @@ """Parameters for genome plotting functions. N.B., genome plots are always plotted with bokeh.""" -from typing import Literal, Mapping, Optional, Union +from typing import Literal, Mapping, Optional, Union, Final import bokeh.models from typing_extensions import Annotated, TypeAlias @@ -103,3 +103,11 @@ Mapping, "Passed through to bokeh line() function.", ] + +circle_kwargs_dict: Final[dict[int, circle_kwargs]] = { + 0: {"line_color": "red", "size": 3, "line_width": 1, "fill_color": None}, + 1: {"line_color": "blue", "size": 3, "line_width": 1, "fill_color": None}, + 2: {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, + 3: {"line_color": "green", "size": 3, "line_width": 1, "fill_color": None}, + 4: {"line_color": "purple", "size": 3, "line_width": 1, "fill_color": None}, +} diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 19d44a442..5a5248d79 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -460,9 +460,6 @@ def plot_h12_gwss_track( inline_array=inline_array, ) - # Hard coded contig colors: not good - color_dict = {0: "red", 1: "blue", 2: "orange", 3: "green", 4: "purple"} - # Determine X axis range. x_min = x[0] x_max = x[-1] @@ -498,34 +495,24 @@ def plot_h12_gwss_track( output_backend=output_backend, ) - circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} - circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) - circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) + # circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} + # circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) + # circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) # circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( # "line_color", "black" # ) - circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( - "fill_color", None - ) - - # Plot H12. - fig.scatter( - x=x, - y=h12, - marker="circle", - **circle_kwargs_mutable, - ) + # circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( + # "fill_color", None + # ) # Plot H12. for s in set(contigs): - color = color_dict[s] idxs = contigs == s fig.scatter( x=x[idxs], y=h12[idxs], marker="circle", - line_color=color, - **circle_kwargs_mutable, + **gplt_params.circle_kwargs_dict[s], ) # Tidy up the plot. From 7fd9c4b60e0ef2fd0a47bb0a0f9c13d4f42abd2a Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:27:07 +0100 Subject: [PATCH 11/33] Uniformised the basic and user-provided option, somewhat. --- malariagen_data/anoph/h12.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 5a5248d79..2edd92d99 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -508,11 +508,24 @@ def plot_h12_gwss_track( # Plot H12. for s in set(contigs): idxs = contigs == s + circle_kwargs_mutable = dict(circle_kwargs[s]) if circle_kwargs else {} + circle_kwargs_mutable["size"] = circle_kwargs_mutable.get( + "size", gplt_params.circle_kwargs_dict[s]["size"] + ) + circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get( + "line_width", gplt_params.circle_kwargs_dict[s]["line_width"] + ) + circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( + "line_color", gplt_params.circle_kwargs_dict[s]["line_color"] + ) + circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( + "fill_color", gplt_params.circle_kwargs_dict[s]["fill_color"] + ) fig.scatter( x=x[idxs], y=h12[idxs], marker="circle", - **gplt_params.circle_kwargs_dict[s], + **circle_kwargs_mutable, ) # Tidy up the plot. From 5a79a11db347e6863d125a380776d65b5ea999ea Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:35:54 +0100 Subject: [PATCH 12/33] A bit of clean-up. --- malariagen_data/anoph/h12.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 2edd92d99..d0bcee7cc 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -495,16 +495,6 @@ def plot_h12_gwss_track( output_backend=output_backend, ) - # circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} - # circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) - # circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) - # circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( - # "line_color", "black" - # ) - # circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( - # "fill_color", None - # ) - # Plot H12. for s in set(contigs): idxs = contigs == s From 87e27f2a1a4d4f776772e83811df286bc545bd47 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:49:55 +0100 Subject: [PATCH 13/33] Corrected a wrong type. --- malariagen_data/anoph/h12.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index d0bcee7cc..e96ab7a65 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -438,7 +438,7 @@ def plot_h12_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - circle_kwargs: Optional[gplt_params.circle_kwargs] = None, + circle_kwargs_dict: Optional[gplt_params.circle_kwargs_dict] = None, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -498,18 +498,20 @@ def plot_h12_gwss_track( # Plot H12. for s in set(contigs): idxs = contigs == s - circle_kwargs_mutable = dict(circle_kwargs[s]) if circle_kwargs else {} + circle_kwargs_mutable = ( + dict(circle_kwargs_dict[s]) if circle_kwargs_dict else {} + ) circle_kwargs_mutable["size"] = circle_kwargs_mutable.get( - "size", gplt_params.circle_kwargs_dict[s]["size"] + "size", gplt_params.default_circle_kwargs_dict[s]["size"] ) circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get( - "line_width", gplt_params.circle_kwargs_dict[s]["line_width"] + "line_width", gplt_params.default_circle_kwargs_dict[s]["line_width"] ) circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( - "line_color", gplt_params.circle_kwargs_dict[s]["line_color"] + "line_color", gplt_params.default_circle_kwargs_dict[s]["line_color"] ) circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( - "fill_color", gplt_params.circle_kwargs_dict[s]["fill_color"] + "fill_color", gplt_params.default_circle_kwargs_dict[s]["fill_color"] ) fig.scatter( x=x[idxs], From 948f86bba4e429126b96fecd721ab81cd979c549 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:56:24 +0100 Subject: [PATCH 14/33] Added the wrong params file. --- malariagen_data/anoph/gplt_params.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py index c0f521ecc..90d93ac97 100644 --- a/malariagen_data/anoph/gplt_params.py +++ b/malariagen_data/anoph/gplt_params.py @@ -104,7 +104,12 @@ "Passed through to bokeh line() function.", ] -circle_kwargs_dict: Final[dict[int, circle_kwargs]] = { +circle_kwargs_dict: TypeAlias = Annotated[ + dict[int, circle_kwargs], + "A dictionary of arguments passed through to bokeh scatter() function with marker = 'circle' with a value per contig.", +] + +default_circle_kwargs_dict: Final[dict[int, circle_kwargs]] = { 0: {"line_color": "red", "size": 3, "line_width": 1, "fill_color": None}, 1: {"line_color": "blue", "size": 3, "line_width": 1, "fill_color": None}, 2: {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, From e49755fe2d9383ca5cf5979b8a84213d7fa10f45 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Tue, 1 Oct 2024 15:06:44 +0100 Subject: [PATCH 15/33] Forgot to forward a change. --- malariagen_data/anoph/h12.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index e96ab7a65..2f71f80b5 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -554,7 +554,7 @@ def plot_h12_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 170, - circle_kwargs: Optional[gplt_params.circle_kwargs] = None, + circle_kwargs_dict: Optional[gplt_params.circle_kwargs_dict] = None, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -576,7 +576,7 @@ def plot_h12_gwss( sizing_mode=sizing_mode, width=width, height=track_height, - circle_kwargs=circle_kwargs, + circle_kwargs_dict=circle_kwargs_dict, show=False, output_backend=output_backend, chunks=chunks, From 01ad5e34160c778eb1d454eb199ca883479b80db Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Wed, 2 Oct 2024 09:52:30 +0100 Subject: [PATCH 16/33] Updated the tests a bit. --- tests/anoph/test_h12.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/tests/anoph/test_h12.py b/tests/anoph/test_h12.py index d0a2d9980..b516b88eb 100644 --- a/tests/anoph/test_h12.py +++ b/tests/anoph/test_h12.py @@ -142,10 +142,22 @@ def check_h12_gwss(*, api, h12_params): assert np.all(h12 >= 0) assert np.all(h12 <= 1) + circle_kwargs_dict = { + 0: {"line_color": "black", "size": 5, "line_width": 1, "fill_color": None}, + 1: {"line_color": "green", "size": 4, "line_width": 2, "fill_color": "black"}, + 2: {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, + 3: {"line_color": "green", "size": 2, "line_width": 2, "fill_color": "black"}, + 4: {"line_color": "purple", "size": 1, "line_width": 1, "fill_color": None}, + } + # Check plotting functions. - fig = api.plot_h12_gwss_track(**h12_params, show=False) + fig = api.plot_h12_gwss_track( + **h12_params, circle_kwargs_dict=circle_kwargs_dict, show=False + ) assert isinstance(fig, bokeh.models.Plot) - fig = api.plot_h12_gwss(**h12_params, show=False) + fig = api.plot_h12_gwss( + **h12_params, circle_kwargs_dict=circle_kwargs_dict, show=False + ) assert isinstance(fig, bokeh.models.GridPlot) From b2d9b0ae572e0b51f0d46ea98b7a7aa14c58ce18 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Wed, 2 Oct 2024 16:31:58 +0100 Subject: [PATCH 17/33] Dealt with h1x. --- malariagen_data/anoph/h1x.py | 154 ++++++++++++++++++++++++++++++----- tests/anoph/test_h1x.py | 12 ++- 2 files changed, 143 insertions(+), 23 deletions(-) diff --git a/malariagen_data/anoph/h1x.py b/malariagen_data/anoph/h1x.py index 2f0276853..994728b10 100644 --- a/malariagen_data/anoph/h1x.py +++ b/malariagen_data/anoph/h1x.py @@ -24,7 +24,7 @@ def __init__( # to the superclass constructor. super().__init__(**kwargs) - def _h1x_gwss( + def _h1x_gwss_contig( self, contig, analysis, @@ -80,11 +80,114 @@ def _h1x_gwss( # Compute window midpoints. pos = ds1["variant_position"].values x = allel.moving_statistic(pos, statistic=np.mean, size=window_size) + contigs = allel.moving_statistic( + ds1["variant_contig"].values, statistic=np.median, size=window_size + ) - results = dict(x=x, h1x=h1x) + results = dict(x=x, h1x=h1x, contigs=contigs) return results + def _h1x_gwss( + self, + contig, + analysis, + window_size, + sample_sets, + cohort1_query, + cohort2_query, + cohort_size, + min_cohort_size, + max_cohort_size, + random_seed, + chunks, + inline_array, + ): + results_tmp = self._h1x_gwss_contig( + contig=contig, + analysis=analysis, + window_size=window_size, + cohort1_query=cohort1_query, + cohort2_query=cohort2_query, + sample_sets=sample_sets, + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + random_seed=random_seed, + chunks=chunks, + inline_array=inline_array, + ) + + results = dict(x=results_tmp["x"], h1x=results_tmp["h1x"]) + + return results + + @check_types + @doc( + summary=""" + Run a H1X genome-wide scan to detect genome regions with + shared selective sweeps between two cohorts. + """, + returns=dict( + x="An array containing the window centre point genomic positions.", + h1x="An array with H1X statistic values for each window.", + contigs="An array with the contig for each window. The median is chosen for windows overlapping a change of contig.", + ), + ) + def h1x_gwss_contig( + self, + contig: base_params.contig, + window_size: h12_params.window_size, + cohort1_query: base_params.sample_query, + cohort2_query: base_params.sample_query, + analysis: hap_params.analysis = base_params.DEFAULT, + sample_sets: Optional[base_params.sample_sets] = None, + cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default, + min_cohort_size: Optional[ + base_params.min_cohort_size + ] = h12_params.min_cohort_size_default, + max_cohort_size: Optional[ + base_params.max_cohort_size + ] = h12_params.max_cohort_size_default, + random_seed: base_params.random_seed = 42, + chunks: base_params.chunks = base_params.native_chunks, + inline_array: base_params.inline_array = base_params.inline_array_default, + ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: + # Change this name if you ever change the behaviour of this function, to + # invalidate any previously cached data. + name = "h1x_gwss_contig_v1" + + params = dict( + contig=contig, + analysis=self._prep_phasing_analysis_param(analysis=analysis), + window_size=window_size, + # N.B., do not be tempted to convert these sample queries into integer + # indices using _prep_sample_selection_params, because the indices + # are different in the haplotype data. + cohort1_query=cohort1_query, + cohort2_query=cohort2_query, + sample_sets=self._prep_sample_sets_param(sample_sets=sample_sets), + cohort_size=cohort_size, + min_cohort_size=min_cohort_size, + max_cohort_size=max_cohort_size, + random_seed=random_seed, + ) + + try: + results = self.results_cache_get(name=name, params=params) + + except CacheMiss: + results = self._h1x_gwss_contig( + chunks=chunks, inline_array=inline_array, **params + ) + self.results_cache_set(name=name, params=params, results=results) + + x = results["x"] + h1x = results["h1x"] + contigs = results["contigs"] + + return x, h1x, contigs + @check_types @doc( summary=""" @@ -174,7 +277,7 @@ def plot_h1x_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - circle_kwargs: Optional[gplt_params.circle_kwargs] = None, + circle_kwargs_dict: Optional[gplt_params.circle_kwargs_dict] = None, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -182,7 +285,7 @@ def plot_h1x_gwss_track( inline_array: base_params.inline_array = base_params.inline_array_default, ) -> gplt_params.figure: # Compute H1X. - x, h1x = self.h1x_gwss( + x, h1x, contigs = self.h1x_gwss_contig( contig=contig, analysis=analysis, window_size=window_size, @@ -232,23 +335,30 @@ def plot_h1x_gwss_track( output_backend=output_backend, ) - circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} - circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) - circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) - circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( - "line_color", "black" - ) - circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( - "fill_color", None - ) - # Plot H1X. - fig.scatter( - x=x, - y=h1x, - marker="circle", - **circle_kwargs_mutable, - ) + for s in set(contigs): + idxs = contigs == s + circle_kwargs_mutable = ( + dict(circle_kwargs_dict[s]) if circle_kwargs_dict else {} + ) + circle_kwargs_mutable["size"] = circle_kwargs_mutable.get( + "size", gplt_params.default_circle_kwargs_dict[s]["size"] + ) + circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get( + "line_width", gplt_params.default_circle_kwargs_dict[s]["line_width"] + ) + circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( + "line_color", gplt_params.default_circle_kwargs_dict[s]["line_color"] + ) + circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( + "fill_color", gplt_params.default_circle_kwargs_dict[s]["fill_color"] + ) + fig.scatter( + x=x[idxs], + y=h1x[idxs], + marker="circle", + **circle_kwargs_mutable, + ) # Tidy up the plot. fig.yaxis.axis_label = "H1X" @@ -288,7 +398,7 @@ def plot_h1x_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 190, - circle_kwargs: Optional[gplt_params.circle_kwargs] = None, + circle_kwargs_dict: Optional[gplt_params.circle_kwargs_dict] = None, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -311,7 +421,7 @@ def plot_h1x_gwss( sizing_mode=sizing_mode, width=width, height=track_height, - circle_kwargs=circle_kwargs, + circle_kwargs_dict=circle_kwargs_dict, show=False, output_backend=output_backend, chunks=chunks, diff --git a/tests/anoph/test_h1x.py b/tests/anoph/test_h1x.py index 47720a76b..b1e982036 100644 --- a/tests/anoph/test_h1x.py +++ b/tests/anoph/test_h1x.py @@ -123,10 +123,20 @@ def check_h1x_gwss(*, api, h1x_params): assert np.all(h1x >= 0) assert np.all(h1x <= 1) + circle_kwargs_dict = { + 0: {"line_color": "black", "size": 5, "line_width": 1, "fill_color": None}, + 1: {"line_color": "green", "size": 4, "line_width": 2, "fill_color": "black"}, + 2: {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, + 3: {"line_color": "green", "size": 2, "line_width": 2, "fill_color": "black"}, + 4: {"line_color": "purple", "size": 1, "line_width": 1, "fill_color": None}, + } + # Check plotting functions. fig = api.plot_h1x_gwss_track(**h1x_params, show=False) assert isinstance(fig, bokeh.models.Plot) - fig = api.plot_h1x_gwss(**h1x_params, show=False) + fig = api.plot_h1x_gwss( + **h1x_params, circle_kwargs_dict=circle_kwargs_dict, show=False + ) assert isinstance(fig, bokeh.models.GridPlot) From 20947b3c647691e49b2bb249997789823134ebde Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:00:35 +0100 Subject: [PATCH 18/33] Gave the users more options. --- malariagen_data/anoph/gplt_params.py | 10 +++ malariagen_data/anoph/h12.py | 113 ++++++++++++++++++++++----- malariagen_data/anoph/h1x.py | 113 ++++++++++++++++++++++----- tests/anoph/test_h12.py | 4 +- tests/anoph/test_h1x.py | 2 +- 5 files changed, 203 insertions(+), 39 deletions(-) diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py index 90d93ac97..5a3f7009c 100644 --- a/malariagen_data/anoph/gplt_params.py +++ b/malariagen_data/anoph/gplt_params.py @@ -116,3 +116,13 @@ 3: {"line_color": "green", "size": 3, "line_width": 1, "fill_color": None}, 4: {"line_color": "purple", "size": 3, "line_width": 1, "fill_color": None}, } + +circle_kwargs_list: TypeAlias = Annotated[ + list[circle_kwargs], + "A list of arguments passed through to bokeh scatter() function with marker = 'circle' with a value per contig.", +] + +circle_kwargs_param: TypeAlias = Annotated[ + Union[circle_kwargs, circle_kwargs_dict, circle_kwargs_list], + "A set of arguments passed through to bokeh scatter() function with marker = 'circle'.", +] diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 2f71f80b5..4aba93744 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -438,7 +438,7 @@ def plot_h12_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - circle_kwargs_dict: Optional[gplt_params.circle_kwargs_dict] = None, + circle_kwargs_param: Optional[gplt_params.circle_kwargs_param] = None, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -460,6 +460,97 @@ def plot_h12_gwss_track( inline_array=inline_array, ) + if circle_kwargs_param is None: + circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict + elif isinstance(circle_kwargs_param, list): + if len(circle_kwargs_param) >= 5: + circle_kwargs_param_dict = { + i: circle_kwargs_param[i] for i in range(0, 5) + } + else: + circle_kwargs_param_dict = { + i: circle_kwargs_param[i] + for i in range(0, len(circle_kwargs_param)) + } + circle_kwargs_param_dict.update( + { + i: gplt_params.default_circle_kwargs_dict[i] + for i in range(len(circle_kwargs_param), 5) + } + ) + elif isinstance(circle_kwargs_param, dict): + if isinstance(list(circle_kwargs_param.keys())[0], str): + if list(circle_kwargs_param.keys())[0] in [ + "2L", + "2R", + "3L", + "3R", + "X", + "2RL", + "3RL", + ]: + circle_kwargs_param_dict = {} + for i in range(0, 5): + if i == 0: + if "2L" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["2L"] + elif "2RL" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["2RL"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 1: + if "2R" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["2R"] + elif "3RL" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["3RL"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 2: + if "3L" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["3L"] + elif "X" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["X"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 3: + if "3R" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["3R"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 4: + if ( + "X" in circle_kwargs_param.keys() + ): # Ag3. Will also get a value for Af1 but it will be ignored. + circle_kwargs_param_dict[i] = circle_kwargs_param["X"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + else: + print("circle_kwargs") + circle_kwargs_param_dict = { + i: circle_kwargs_param for i in range(0, 5) + } + elif isinstance(list(circle_kwargs_param.keys())[0], int): + circle_kwargs_param_dict = {} + for i in range(0, 5): + if i in list(circle_kwargs_param.keys()): + circle_kwargs_param_dict[i] = circle_kwargs_param[i] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + else: + circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict + # Determine X axis range. x_min = x[0] x_max = x[-1] @@ -498,21 +589,7 @@ def plot_h12_gwss_track( # Plot H12. for s in set(contigs): idxs = contigs == s - circle_kwargs_mutable = ( - dict(circle_kwargs_dict[s]) if circle_kwargs_dict else {} - ) - circle_kwargs_mutable["size"] = circle_kwargs_mutable.get( - "size", gplt_params.default_circle_kwargs_dict[s]["size"] - ) - circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get( - "line_width", gplt_params.default_circle_kwargs_dict[s]["line_width"] - ) - circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( - "line_color", gplt_params.default_circle_kwargs_dict[s]["line_color"] - ) - circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( - "fill_color", gplt_params.default_circle_kwargs_dict[s]["fill_color"] - ) + circle_kwargs_mutable = circle_kwargs_param_dict[s] fig.scatter( x=x[idxs], y=h12[idxs], @@ -554,7 +631,7 @@ def plot_h12_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 170, - circle_kwargs_dict: Optional[gplt_params.circle_kwargs_dict] = None, + circle_kwargs_param: Optional[gplt_params.circle_kwargs_param] = None, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -576,7 +653,7 @@ def plot_h12_gwss( sizing_mode=sizing_mode, width=width, height=track_height, - circle_kwargs_dict=circle_kwargs_dict, + circle_kwargs_param=circle_kwargs_param, show=False, output_backend=output_backend, chunks=chunks, diff --git a/malariagen_data/anoph/h1x.py b/malariagen_data/anoph/h1x.py index 994728b10..86c41b307 100644 --- a/malariagen_data/anoph/h1x.py +++ b/malariagen_data/anoph/h1x.py @@ -277,7 +277,7 @@ def plot_h1x_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - circle_kwargs_dict: Optional[gplt_params.circle_kwargs_dict] = None, + circle_kwargs_param: Optional[gplt_params.circle_kwargs_param] = None, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -300,6 +300,97 @@ def plot_h1x_gwss_track( inline_array=inline_array, ) + if circle_kwargs_param is None: + circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict + elif isinstance(circle_kwargs_param, list): + if len(circle_kwargs_param) >= 5: + circle_kwargs_param_dict = { + i: circle_kwargs_param[i] for i in range(0, 5) + } + else: + circle_kwargs_param_dict = { + i: circle_kwargs_param[i] + for i in range(0, len(circle_kwargs_param)) + } + circle_kwargs_param_dict.update( + { + i: gplt_params.default_circle_kwargs_dict[i] + for i in range(len(circle_kwargs_param), 5) + } + ) + elif isinstance(circle_kwargs_param, dict): + if isinstance(list(circle_kwargs_param.keys())[0], str): + if list(circle_kwargs_param.keys())[0] in [ + "2L", + "2R", + "3L", + "3R", + "X", + "2RL", + "3RL", + ]: + circle_kwargs_param_dict = {} + for i in range(0, 5): + if i == 0: + if "2L" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["2L"] + elif "2RL" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["2RL"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 1: + if "2R" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["2R"] + elif "3RL" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["3RL"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 2: + if "3L" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["3L"] + elif "X" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["X"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 3: + if "3R" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["3R"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 4: + if ( + "X" in circle_kwargs_param.keys() + ): # Ag3. Will also get a value for Af1 but it will be ignored. + circle_kwargs_param_dict[i] = circle_kwargs_param["X"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + else: + print("circle_kwargs") + circle_kwargs_param_dict = { + i: circle_kwargs_param for i in range(0, 5) + } + elif isinstance(list(circle_kwargs_param.keys())[0], int): + circle_kwargs_param_dict = {} + for i in range(0, 5): + if i in list(circle_kwargs_param.keys()): + circle_kwargs_param_dict[i] = circle_kwargs_param[i] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + else: + circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict + # Determine X axis range. x_min = x[0] x_max = x[-1] @@ -338,21 +429,7 @@ def plot_h1x_gwss_track( # Plot H1X. for s in set(contigs): idxs = contigs == s - circle_kwargs_mutable = ( - dict(circle_kwargs_dict[s]) if circle_kwargs_dict else {} - ) - circle_kwargs_mutable["size"] = circle_kwargs_mutable.get( - "size", gplt_params.default_circle_kwargs_dict[s]["size"] - ) - circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get( - "line_width", gplt_params.default_circle_kwargs_dict[s]["line_width"] - ) - circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( - "line_color", gplt_params.default_circle_kwargs_dict[s]["line_color"] - ) - circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( - "fill_color", gplt_params.default_circle_kwargs_dict[s]["fill_color"] - ) + circle_kwargs_mutable = circle_kwargs_param_dict[s] fig.scatter( x=x[idxs], y=h1x[idxs], @@ -398,7 +475,7 @@ def plot_h1x_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 190, - circle_kwargs_dict: Optional[gplt_params.circle_kwargs_dict] = None, + circle_kwargs_param: Optional[gplt_params.circle_kwargs_param] = None, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -421,7 +498,7 @@ def plot_h1x_gwss( sizing_mode=sizing_mode, width=width, height=track_height, - circle_kwargs_dict=circle_kwargs_dict, + circle_kwargs_param=circle_kwargs_param, show=False, output_backend=output_backend, chunks=chunks, diff --git a/tests/anoph/test_h12.py b/tests/anoph/test_h12.py index b516b88eb..7e7490b97 100644 --- a/tests/anoph/test_h12.py +++ b/tests/anoph/test_h12.py @@ -152,11 +152,11 @@ def check_h12_gwss(*, api, h12_params): # Check plotting functions. fig = api.plot_h12_gwss_track( - **h12_params, circle_kwargs_dict=circle_kwargs_dict, show=False + **h12_params, circle_kwargs_param=circle_kwargs_dict, show=False ) assert isinstance(fig, bokeh.models.Plot) fig = api.plot_h12_gwss( - **h12_params, circle_kwargs_dict=circle_kwargs_dict, show=False + **h12_params, circle_kwargs_param=circle_kwargs_dict[0], show=False ) assert isinstance(fig, bokeh.models.GridPlot) diff --git a/tests/anoph/test_h1x.py b/tests/anoph/test_h1x.py index b1e982036..3f759a13c 100644 --- a/tests/anoph/test_h1x.py +++ b/tests/anoph/test_h1x.py @@ -135,7 +135,7 @@ def check_h1x_gwss(*, api, h1x_params): fig = api.plot_h1x_gwss_track(**h1x_params, show=False) assert isinstance(fig, bokeh.models.Plot) fig = api.plot_h1x_gwss( - **h1x_params, circle_kwargs_dict=circle_kwargs_dict, show=False + **h1x_params, circle_kwargs_param=circle_kwargs_dict, show=False ) assert isinstance(fig, bokeh.models.GridPlot) From 5f84b9d70c692a350629e5b475276b612ba01ec4 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:36:30 +0100 Subject: [PATCH 19/33] Refining a type to defeat linting. --- malariagen_data/anoph/gplt_params.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py index 5a3f7009c..9e8ac9bcf 100644 --- a/malariagen_data/anoph/gplt_params.py +++ b/malariagen_data/anoph/gplt_params.py @@ -105,7 +105,7 @@ ] circle_kwargs_dict: TypeAlias = Annotated[ - dict[int, circle_kwargs], + Union[dict[int, circle_kwargs], dict[str, circle_kwargs]], "A dictionary of arguments passed through to bokeh scatter() function with marker = 'circle' with a value per contig.", ] From 90c5982532500058fa332544e29822265d82cc4c Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:48:51 +0100 Subject: [PATCH 20/33] Same problem, different solution. --- malariagen_data/anoph/h12.py | 2 +- malariagen_data/anoph/h1x.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 4aba93744..70761f4ef 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -489,7 +489,7 @@ def plot_h12_gwss_track( "2RL", "3RL", ]: - circle_kwargs_param_dict = {} + circle_kwargs_param_dict: dict[int, gplt_params.circle_kwargs] = {} for i in range(0, 5): if i == 0: if "2L" in circle_kwargs_param.keys(): # Ag3 diff --git a/malariagen_data/anoph/h1x.py b/malariagen_data/anoph/h1x.py index 86c41b307..8689112bd 100644 --- a/malariagen_data/anoph/h1x.py +++ b/malariagen_data/anoph/h1x.py @@ -329,7 +329,7 @@ def plot_h1x_gwss_track( "2RL", "3RL", ]: - circle_kwargs_param_dict = {} + circle_kwargs_param_dict: dict[int, gplt_params.circle_kwargs] = {} for i in range(0, 5): if i == 0: if "2L" in circle_kwargs_param.keys(): # Ag3 From 528d3ec91b5119b94ec053822398f6b5f25a106b Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:54:57 +0100 Subject: [PATCH 21/33] Same problem, not s different solution. --- malariagen_data/anoph/h12.py | 2 +- malariagen_data/anoph/h1x.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 70761f4ef..b5f413515 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -460,6 +460,7 @@ def plot_h12_gwss_track( inline_array=inline_array, ) + circle_kwargs_param_dict: dict[int, gplt_params.circle_kwargs] = {} if circle_kwargs_param is None: circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict elif isinstance(circle_kwargs_param, list): @@ -489,7 +490,6 @@ def plot_h12_gwss_track( "2RL", "3RL", ]: - circle_kwargs_param_dict: dict[int, gplt_params.circle_kwargs] = {} for i in range(0, 5): if i == 0: if "2L" in circle_kwargs_param.keys(): # Ag3 diff --git a/malariagen_data/anoph/h1x.py b/malariagen_data/anoph/h1x.py index 8689112bd..2a89fbbd1 100644 --- a/malariagen_data/anoph/h1x.py +++ b/malariagen_data/anoph/h1x.py @@ -300,6 +300,7 @@ def plot_h1x_gwss_track( inline_array=inline_array, ) + circle_kwargs_param_dict: dict[int, gplt_params.circle_kwargs] = {} if circle_kwargs_param is None: circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict elif isinstance(circle_kwargs_param, list): @@ -329,7 +330,6 @@ def plot_h1x_gwss_track( "2RL", "3RL", ]: - circle_kwargs_param_dict: dict[int, gplt_params.circle_kwargs] = {} for i in range(0, 5): if i == 0: if "2L" in circle_kwargs_param.keys(): # Ag3 From fdef7ebfa93933a8a55d048627ae25edd928d4a5 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:58:49 +0100 Subject: [PATCH 22/33] Same problem, not a solution. --- malariagen_data/anoph/h12.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index b5f413515..d99fb24c8 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -493,9 +493,17 @@ def plot_h12_gwss_track( for i in range(0, 5): if i == 0: if "2L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["2L"] + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[ + i + ] # circle_kwargs_param["2L"] elif "2RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["2RL"] + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[ + i + ] # circle_kwargs_param["2RL"] else: circle_kwargs_param_dict[ i From f141953a0e9ba5fc39e92de45089f1dab8fcb36d Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 3 Oct 2024 15:03:31 +0100 Subject: [PATCH 23/33] Trying a new idea. --- malariagen_data/anoph/h12.py | 147 +++++++++++++++++------------------ 1 file changed, 71 insertions(+), 76 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index d99fb24c8..bf744af42 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -479,83 +479,78 @@ def plot_h12_gwss_track( for i in range(len(circle_kwargs_param), 5) } ) - elif isinstance(circle_kwargs_param, dict): - if isinstance(list(circle_kwargs_param.keys())[0], str): - if list(circle_kwargs_param.keys())[0] in [ - "2L", - "2R", - "3L", - "3R", - "X", - "2RL", - "3RL", - ]: - for i in range(0, 5): - if i == 0: - if "2L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[ - i - ] # circle_kwargs_param["2L"] - elif "2RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[ - i - ] # circle_kwargs_param["2RL"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 1: - if "2R" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["2R"] - elif "3RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["3RL"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 2: - if "3L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["3L"] - elif "X" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["X"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 3: - if "3R" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["3R"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 4: - if ( - "X" in circle_kwargs_param.keys() - ): # Ag3. Will also get a value for Af1 but it will be ignored. - circle_kwargs_param_dict[i] = circle_kwargs_param["X"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - else: - print("circle_kwargs") - circle_kwargs_param_dict = { - i: circle_kwargs_param for i in range(0, 5) - } - elif isinstance(list(circle_kwargs_param.keys())[0], int): - circle_kwargs_param_dict = {} + elif isinstance(circle_kwargs_param, dict[str, dict]): + if list(circle_kwargs_param.keys())[0] in [ + "2L", + "2R", + "3L", + "3R", + "X", + "2RL", + "3RL", + ]: for i in range(0, 5): - if i in list(circle_kwargs_param.keys()): - circle_kwargs_param_dict[i] = circle_kwargs_param[i] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] + if i == 0: + if "2L" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[ + i + ] # circle_kwargs_param["2L"] + elif "2RL" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[ + i + ] # circle_kwargs_param["2RL"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 1: + if "2R" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["2R"] + elif "3RL" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["3RL"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 2: + if "3L" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["3L"] + elif "X" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["X"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 3: + if "3R" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["3R"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 4: + if ( + "X" in circle_kwargs_param.keys() + ): # Ag3. Will also get a value for Af1 but it will be ignored. + circle_kwargs_param_dict[i] = circle_kwargs_param["X"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + else: + circle_kwargs_param_dict = {i: circle_kwargs_param for i in range(0, 5)} + elif isinstance(circle_kwargs_param, dict[int, dict]): + for i in range(0, 5): + if i in list(circle_kwargs_param.keys()): + circle_kwargs_param_dict[i] = circle_kwargs_param[i] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] else: circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict From 2e34ff15abe5ed0c1eabbeef8bac6a48ead3c42c Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 3 Oct 2024 15:10:28 +0100 Subject: [PATCH 24/33] Going the opposite way. --- malariagen_data/anoph/h12.py | 142 +++++++++++++++++------------------ 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index bf744af42..dc0a22efc 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -479,78 +479,78 @@ def plot_h12_gwss_track( for i in range(len(circle_kwargs_param), 5) } ) - elif isinstance(circle_kwargs_param, dict[str, dict]): - if list(circle_kwargs_param.keys())[0] in [ - "2L", - "2R", - "3L", - "3R", - "X", - "2RL", - "3RL", - ]: - for i in range(0, 5): - if i == 0: - if "2L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[ - i - ] # circle_kwargs_param["2L"] - elif "2RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[ - i - ] # circle_kwargs_param["2RL"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 1: - if "2R" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["2R"] - elif "3RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["3RL"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 2: - if "3L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["3L"] - elif "X" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["X"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 3: - if "3R" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["3R"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 4: - if ( - "X" in circle_kwargs_param.keys() - ): # Ag3. Will also get a value for Af1 but it will be ignored. - circle_kwargs_param_dict[i] = circle_kwargs_param["X"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - else: - circle_kwargs_param_dict = {i: circle_kwargs_param for i in range(0, 5)} - elif isinstance(circle_kwargs_param, dict[int, dict]): - for i in range(0, 5): - if i in list(circle_kwargs_param.keys()): - circle_kwargs_param_dict[i] = circle_kwargs_param[i] + elif isinstance(circle_kwargs_param, dict): + if isinstance(list(circle_kwargs_param.keys())[0], str): + if list(circle_kwargs_param.keys())[0] in [ + "2L", + "2R", + "3L", + "3R", + "X", + "2RL", + "3RL", + ]: + for i in range(0, 5): + if i == 0: + ck = circle_kwargs_param["2L"] + if "2L" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = ck + elif "2RL" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[ + i + ] # circle_kwargs_param["2RL"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 1: + if "2R" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["2R"] + elif "3RL" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["3RL"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 2: + if "3L" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["3L"] + elif "X" in circle_kwargs_param.keys(): # Af1 + circle_kwargs_param_dict[i] = circle_kwargs_param["X"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 3: + if "3R" in circle_kwargs_param.keys(): # Ag3 + circle_kwargs_param_dict[i] = circle_kwargs_param["3R"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] + elif i == 4: + if ( + "X" in circle_kwargs_param.keys() + ): # Ag3. Will also get a value for Af1 but it will be ignored. + circle_kwargs_param_dict[i] = circle_kwargs_param["X"] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] + circle_kwargs_param_dict = { + i: circle_kwargs_param for i in range(0, 5) + } + elif isinstance(list(circle_kwargs_param.keys())[0], int): + for i in range(0, 5): + if i in list(circle_kwargs_param.keys()): + circle_kwargs_param_dict[i] = circle_kwargs_param[i] + else: + circle_kwargs_param_dict[ + i + ] = gplt_params.default_circle_kwargs_dict[i] else: circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict From ff544057908e4aa6fe2c11895ae948f6b663e804 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Thu, 3 Oct 2024 16:27:36 +0100 Subject: [PATCH 25/33] More tests. --- tests/anoph/test_h1x.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/anoph/test_h1x.py b/tests/anoph/test_h1x.py index 3f759a13c..020c46ae1 100644 --- a/tests/anoph/test_h1x.py +++ b/tests/anoph/test_h1x.py @@ -124,11 +124,21 @@ def check_h1x_gwss(*, api, h1x_params): assert np.all(h1x <= 1) circle_kwargs_dict = { - 0: {"line_color": "black", "size": 5, "line_width": 1, "fill_color": None}, - 1: {"line_color": "green", "size": 4, "line_width": 2, "fill_color": "black"}, - 2: {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, - 3: {"line_color": "green", "size": 2, "line_width": 2, "fill_color": "black"}, - 4: {"line_color": "purple", "size": 1, "line_width": 1, "fill_color": None}, + "2L": {"line_color": "black", "size": 5, "line_width": 1, "fill_color": None}, + "2R": { + "line_color": "green", + "size": 4, + "line_width": 2, + "fill_color": "black", + }, + "3L": {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, + "3R": { + "line_color": "green", + "size": 2, + "line_width": 2, + "fill_color": "black", + }, + "X": {"line_color": "purple", "size": 1, "line_width": 1, "fill_color": None}, } # Check plotting functions. From f98d6017fcd0e1ed14a939a63e4c765ad081abb4 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:08:44 +0100 Subject: [PATCH 26/33] Simplified everything --- malariagen_data/anoph/gplt_params.py | 21 ++--- malariagen_data/anoph/h12.py | 116 ++++----------------------- malariagen_data/anoph/h1x.py | 113 ++++---------------------- tests/anoph/test_h12.py | 31 ++++--- tests/anoph/test_h1x.py | 37 ++++----- 5 files changed, 70 insertions(+), 248 deletions(-) diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py index 9e8ac9bcf..9221dd4d9 100644 --- a/malariagen_data/anoph/gplt_params.py +++ b/malariagen_data/anoph/gplt_params.py @@ -1,7 +1,7 @@ """Parameters for genome plotting functions. N.B., genome plots are always plotted with bokeh.""" -from typing import Literal, Mapping, Optional, Union, Final +from typing import Literal, Mapping, Optional, Union, Final, Sequence import bokeh.models from typing_extensions import Annotated, TypeAlias @@ -109,20 +109,9 @@ "A dictionary of arguments passed through to bokeh scatter() function with marker = 'circle' with a value per contig.", ] -default_circle_kwargs_dict: Final[dict[int, circle_kwargs]] = { - 0: {"line_color": "red", "size": 3, "line_width": 1, "fill_color": None}, - 1: {"line_color": "blue", "size": 3, "line_width": 1, "fill_color": None}, - 2: {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, - 3: {"line_color": "green", "size": 3, "line_width": 1, "fill_color": None}, - 4: {"line_color": "purple", "size": 3, "line_width": 1, "fill_color": None}, -} - -circle_kwargs_list: TypeAlias = Annotated[ - list[circle_kwargs], - "A list of arguments passed through to bokeh scatter() function with marker = 'circle' with a value per contig.", +contig_colors: TypeAlias = Annotated[ + Sequence[str], + "A sequence of colors.", ] -circle_kwargs_param: TypeAlias = Annotated[ - Union[circle_kwargs, circle_kwargs_dict, circle_kwargs_list], - "A set of arguments passed through to bokeh scatter() function with marker = 'circle'.", -] +contig_colors_default: Final[contig_colors] = bokeh.palettes.d3["Category20b"][5] diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index dc0a22efc..241804e6c 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -258,8 +258,13 @@ def _h12_gwss_contig( # Compute window midpoints. pos = ds_haps["variant_position"].values x = allel.moving_statistic(pos, statistic=np.mean, size=window_size) - contigs = allel.moving_statistic( - ds_haps["variant_contig"].values, statistic=np.median, size=window_size + contigs = np.asarray( + allel.moving_statistic( + ds_haps["variant_contig"].values, + statistic=np.median, + size=window_size, + ), + dtype=int, ) results = dict(x=x, h12=h12, contigs=contigs) @@ -438,7 +443,9 @@ def plot_h12_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - circle_kwargs_param: Optional[gplt_params.circle_kwargs_param] = None, + contig_colors: Optional[ + gplt_params.contig_colors + ] = gplt_params.contig_colors_default, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -460,100 +467,6 @@ def plot_h12_gwss_track( inline_array=inline_array, ) - circle_kwargs_param_dict: dict[int, gplt_params.circle_kwargs] = {} - if circle_kwargs_param is None: - circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict - elif isinstance(circle_kwargs_param, list): - if len(circle_kwargs_param) >= 5: - circle_kwargs_param_dict = { - i: circle_kwargs_param[i] for i in range(0, 5) - } - else: - circle_kwargs_param_dict = { - i: circle_kwargs_param[i] - for i in range(0, len(circle_kwargs_param)) - } - circle_kwargs_param_dict.update( - { - i: gplt_params.default_circle_kwargs_dict[i] - for i in range(len(circle_kwargs_param), 5) - } - ) - elif isinstance(circle_kwargs_param, dict): - if isinstance(list(circle_kwargs_param.keys())[0], str): - if list(circle_kwargs_param.keys())[0] in [ - "2L", - "2R", - "3L", - "3R", - "X", - "2RL", - "3RL", - ]: - for i in range(0, 5): - if i == 0: - ck = circle_kwargs_param["2L"] - if "2L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = ck - elif "2RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[ - i - ] # circle_kwargs_param["2RL"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 1: - if "2R" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["2R"] - elif "3RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["3RL"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 2: - if "3L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["3L"] - elif "X" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["X"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 3: - if "3R" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["3R"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 4: - if ( - "X" in circle_kwargs_param.keys() - ): # Ag3. Will also get a value for Af1 but it will be ignored. - circle_kwargs_param_dict[i] = circle_kwargs_param["X"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - else: - circle_kwargs_param_dict = { - i: circle_kwargs_param for i in range(0, 5) - } - elif isinstance(list(circle_kwargs_param.keys())[0], int): - for i in range(0, 5): - if i in list(circle_kwargs_param.keys()): - circle_kwargs_param_dict[i] = circle_kwargs_param[i] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - else: - circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict - # Determine X axis range. x_min = x[0] x_max = x[-1] @@ -592,12 +505,11 @@ def plot_h12_gwss_track( # Plot H12. for s in set(contigs): idxs = contigs == s - circle_kwargs_mutable = circle_kwargs_param_dict[s] fig.scatter( x=x[idxs], y=h12[idxs], marker="circle", - **circle_kwargs_mutable, + color=contig_colors[s % len(contig_colors)], ) # Tidy up the plot. @@ -634,7 +546,9 @@ def plot_h12_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 170, - circle_kwargs_param: Optional[gplt_params.circle_kwargs_param] = None, + contig_colors: Optional[ + gplt_params.contig_colors + ] = gplt_params.contig_colors_default, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -656,7 +570,7 @@ def plot_h12_gwss( sizing_mode=sizing_mode, width=width, height=track_height, - circle_kwargs_param=circle_kwargs_param, + contig_colors=contig_colors, show=False, output_backend=output_backend, chunks=chunks, diff --git a/malariagen_data/anoph/h1x.py b/malariagen_data/anoph/h1x.py index 2a89fbbd1..16dfc5bed 100644 --- a/malariagen_data/anoph/h1x.py +++ b/malariagen_data/anoph/h1x.py @@ -80,8 +80,13 @@ def _h1x_gwss_contig( # Compute window midpoints. pos = ds1["variant_position"].values x = allel.moving_statistic(pos, statistic=np.mean, size=window_size) - contigs = allel.moving_statistic( - ds1["variant_contig"].values, statistic=np.median, size=window_size + contigs = np.asarray( + allel.moving_statistic( + ds1["variant_contig"].values, + statistic=np.median, + size=window_size, + ), + dtype=int, ) results = dict(x=x, h1x=h1x, contigs=contigs) @@ -277,7 +282,9 @@ def plot_h1x_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - circle_kwargs_param: Optional[gplt_params.circle_kwargs_param] = None, + contig_colors: Optional[ + gplt_params.contig_colors + ] = gplt_params.contig_colors_default, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -300,97 +307,6 @@ def plot_h1x_gwss_track( inline_array=inline_array, ) - circle_kwargs_param_dict: dict[int, gplt_params.circle_kwargs] = {} - if circle_kwargs_param is None: - circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict - elif isinstance(circle_kwargs_param, list): - if len(circle_kwargs_param) >= 5: - circle_kwargs_param_dict = { - i: circle_kwargs_param[i] for i in range(0, 5) - } - else: - circle_kwargs_param_dict = { - i: circle_kwargs_param[i] - for i in range(0, len(circle_kwargs_param)) - } - circle_kwargs_param_dict.update( - { - i: gplt_params.default_circle_kwargs_dict[i] - for i in range(len(circle_kwargs_param), 5) - } - ) - elif isinstance(circle_kwargs_param, dict): - if isinstance(list(circle_kwargs_param.keys())[0], str): - if list(circle_kwargs_param.keys())[0] in [ - "2L", - "2R", - "3L", - "3R", - "X", - "2RL", - "3RL", - ]: - for i in range(0, 5): - if i == 0: - if "2L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["2L"] - elif "2RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["2RL"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 1: - if "2R" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["2R"] - elif "3RL" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["3RL"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 2: - if "3L" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["3L"] - elif "X" in circle_kwargs_param.keys(): # Af1 - circle_kwargs_param_dict[i] = circle_kwargs_param["X"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 3: - if "3R" in circle_kwargs_param.keys(): # Ag3 - circle_kwargs_param_dict[i] = circle_kwargs_param["3R"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - elif i == 4: - if ( - "X" in circle_kwargs_param.keys() - ): # Ag3. Will also get a value for Af1 but it will be ignored. - circle_kwargs_param_dict[i] = circle_kwargs_param["X"] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - else: - print("circle_kwargs") - circle_kwargs_param_dict = { - i: circle_kwargs_param for i in range(0, 5) - } - elif isinstance(list(circle_kwargs_param.keys())[0], int): - circle_kwargs_param_dict = {} - for i in range(0, 5): - if i in list(circle_kwargs_param.keys()): - circle_kwargs_param_dict[i] = circle_kwargs_param[i] - else: - circle_kwargs_param_dict[ - i - ] = gplt_params.default_circle_kwargs_dict[i] - else: - circle_kwargs_param_dict = gplt_params.default_circle_kwargs_dict - # Determine X axis range. x_min = x[0] x_max = x[-1] @@ -429,12 +345,11 @@ def plot_h1x_gwss_track( # Plot H1X. for s in set(contigs): idxs = contigs == s - circle_kwargs_mutable = circle_kwargs_param_dict[s] fig.scatter( x=x[idxs], y=h1x[idxs], marker="circle", - **circle_kwargs_mutable, + color=contig_colors[s % len(contig_colors)], ) # Tidy up the plot. @@ -475,7 +390,9 @@ def plot_h1x_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 190, - circle_kwargs_param: Optional[gplt_params.circle_kwargs_param] = None, + contig_colors: Optional[ + gplt_params.contig_colors + ] = gplt_params.contig_colors_default, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -498,7 +415,7 @@ def plot_h1x_gwss( sizing_mode=sizing_mode, width=width, height=track_height, - circle_kwargs_param=circle_kwargs_param, + contig_colors=contig_colors, show=False, output_backend=output_backend, chunks=chunks, diff --git a/tests/anoph/test_h12.py b/tests/anoph/test_h12.py index 7e7490b97..aa0b906a0 100644 --- a/tests/anoph/test_h12.py +++ b/tests/anoph/test_h12.py @@ -142,22 +142,27 @@ def check_h12_gwss(*, api, h12_params): assert np.all(h12 >= 0) assert np.all(h12 <= 1) - circle_kwargs_dict = { - 0: {"line_color": "black", "size": 5, "line_width": 1, "fill_color": None}, - 1: {"line_color": "green", "size": 4, "line_width": 2, "fill_color": "black"}, - 2: {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, - 3: {"line_color": "green", "size": 2, "line_width": 2, "fill_color": "black"}, - 4: {"line_color": "purple", "size": 1, "line_width": 1, "fill_color": None}, - } + x, h12, contigs = api.h12_gwss_contig(**h12_params) + + # Check results. + assert isinstance(x, np.ndarray) + assert isinstance(h12, np.ndarray) + assert isinstance(contigs, np.ndarray) + assert x.ndim == 1 + assert h12.ndim == 1 + assert contigs.ndim == 1 + assert x.shape == h12.shape + assert x.shape == contigs.shape + assert x.dtype.kind == "f" + assert h12.dtype.kind == "f" + assert contigs.dtype.kind == "i" + assert np.all(h12 >= 0) + assert np.all(h12 <= 1) # Check plotting functions. - fig = api.plot_h12_gwss_track( - **h12_params, circle_kwargs_param=circle_kwargs_dict, show=False - ) + fig = api.plot_h12_gwss_track(**h12_params, show=False) assert isinstance(fig, bokeh.models.Plot) - fig = api.plot_h12_gwss( - **h12_params, circle_kwargs_param=circle_kwargs_dict[0], show=False - ) + fig = api.plot_h12_gwss(**h12_params, contig_colors=["black", "red"], show=False) assert isinstance(fig, bokeh.models.GridPlot) diff --git a/tests/anoph/test_h1x.py b/tests/anoph/test_h1x.py index 020c46ae1..6cbb0df9e 100644 --- a/tests/anoph/test_h1x.py +++ b/tests/anoph/test_h1x.py @@ -123,30 +123,27 @@ def check_h1x_gwss(*, api, h1x_params): assert np.all(h1x >= 0) assert np.all(h1x <= 1) - circle_kwargs_dict = { - "2L": {"line_color": "black", "size": 5, "line_width": 1, "fill_color": None}, - "2R": { - "line_color": "green", - "size": 4, - "line_width": 2, - "fill_color": "black", - }, - "3L": {"line_color": "orange", "size": 3, "line_width": 1, "fill_color": None}, - "3R": { - "line_color": "green", - "size": 2, - "line_width": 2, - "fill_color": "black", - }, - "X": {"line_color": "purple", "size": 1, "line_width": 1, "fill_color": None}, - } + x, h1x, contigs = api.h1x_gwss_contig(**h1x_params) + + # Check results. + assert isinstance(x, np.ndarray) + assert isinstance(h1x, np.ndarray) + assert isinstance(contigs, np.ndarray) + assert x.ndim == 1 + assert h1x.ndim == 1 + assert contigs.ndim == 1 + assert x.shape == h1x.shape + assert x.shape == contigs.shape + assert x.dtype.kind == "f" + assert h1x.dtype.kind == "f" + assert contigs.dtype.kind == "i" + assert np.all(h1x >= 0) + assert np.all(h1x <= 1) # Check plotting functions. fig = api.plot_h1x_gwss_track(**h1x_params, show=False) assert isinstance(fig, bokeh.models.Plot) - fig = api.plot_h1x_gwss( - **h1x_params, circle_kwargs_param=circle_kwargs_dict, show=False - ) + fig = api.plot_h1x_gwss(**h1x_params, contig_colors=["black", "red"], show=False) assert isinstance(fig, bokeh.models.GridPlot) From 311f5872fdd6667d3a49dfc9e5285193eda6df3b Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:15:42 +0100 Subject: [PATCH 27/33] Changed sequence to list --- malariagen_data/anoph/gplt_params.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py index 9221dd4d9..795dfbfe6 100644 --- a/malariagen_data/anoph/gplt_params.py +++ b/malariagen_data/anoph/gplt_params.py @@ -1,7 +1,7 @@ """Parameters for genome plotting functions. N.B., genome plots are always plotted with bokeh.""" -from typing import Literal, Mapping, Optional, Union, Final, Sequence +from typing import Literal, Mapping, Optional, Union, Final import bokeh.models from typing_extensions import Annotated, TypeAlias @@ -104,14 +104,9 @@ "Passed through to bokeh line() function.", ] -circle_kwargs_dict: TypeAlias = Annotated[ - Union[dict[int, circle_kwargs], dict[str, circle_kwargs]], - "A dictionary of arguments passed through to bokeh scatter() function with marker = 'circle' with a value per contig.", -] - contig_colors: TypeAlias = Annotated[ - Sequence[str], + list[str], "A sequence of colors.", ] -contig_colors_default: Final[contig_colors] = bokeh.palettes.d3["Category20b"][5] +contig_colors_default: Final[contig_colors] = list(bokeh.palettes.d3["Category20b"][5]) From 986004ec6f56c2d5d68dbf1a94a97c27f5dc4c2b Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:22:43 +0100 Subject: [PATCH 28/33] Fighting with len --- malariagen_data/anoph/h12.py | 8 ++------ malariagen_data/anoph/h1x.py | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 241804e6c..5fc94ffc6 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -443,9 +443,7 @@ def plot_h12_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - contig_colors: Optional[ - gplt_params.contig_colors - ] = gplt_params.contig_colors_default, + contig_colors: gplt_params.contig_colors = gplt_params.contig_colors_default, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -546,9 +544,7 @@ def plot_h12_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 170, - contig_colors: Optional[ - gplt_params.contig_colors - ] = gplt_params.contig_colors_default, + contig_colors: gplt_params.contig_colors = gplt_params.contig_colors_default, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, diff --git a/malariagen_data/anoph/h1x.py b/malariagen_data/anoph/h1x.py index 16dfc5bed..8ce791c64 100644 --- a/malariagen_data/anoph/h1x.py +++ b/malariagen_data/anoph/h1x.py @@ -282,9 +282,7 @@ def plot_h1x_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - contig_colors: Optional[ - gplt_params.contig_colors - ] = gplt_params.contig_colors_default, + contig_colors: gplt_params.contig_colors = gplt_params.contig_colors_default, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -390,9 +388,7 @@ def plot_h1x_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 190, - contig_colors: Optional[ - gplt_params.contig_colors - ] = gplt_params.contig_colors_default, + contig_colors: gplt_params.contig_colors = gplt_params.contig_colors_default, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, From bceb950f1f3b6688272519912b84cc28be7302ed Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:59:30 +0100 Subject: [PATCH 29/33] Corrected the merge --- malariagen_data/anoph/h12.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 1ffaed292..5492b6ba8 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -286,6 +286,7 @@ def _h12_gwss( window_size, sample_sets, sample_query, + sample_query_options, cohort_size, min_cohort_size, max_cohort_size, @@ -299,6 +300,7 @@ def _h12_gwss( window_size=window_size, sample_query=sample_query, sample_sets=sample_sets, + sample_query_options=sample_query_options, cohort_size=cohort_size, min_cohort_size=min_cohort_size, max_cohort_size=max_cohort_size, @@ -325,6 +327,7 @@ def h12_gwss_contig( window_size: h12_params.window_size, analysis: hap_params.analysis = base_params.DEFAULT, sample_query: Optional[base_params.sample_query] = None, + sample_query_options: Optional[base_params.sample_query_options] = None, sample_sets: Optional[base_params.sample_sets] = None, cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default, min_cohort_size: Optional[ @@ -350,6 +353,7 @@ def h12_gwss_contig( # indices using _prep_sample_selection_params, because the indices # are different in the haplotype data. sample_query=sample_query, + sample_query_options=sample_query_options, cohort_size=cohort_size, min_cohort_size=min_cohort_size, max_cohort_size=max_cohort_size, From 48fb21e91625e75699c0847be70e7f69b83bda15 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 7 Oct 2024 17:23:42 +0100 Subject: [PATCH 30/33] Returned g123 to its normal state --- malariagen_data/anoph/g123.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/malariagen_data/anoph/g123.py b/malariagen_data/anoph/g123.py index 328defc8f..66fde99d0 100644 --- a/malariagen_data/anoph/g123.py +++ b/malariagen_data/anoph/g123.py @@ -330,7 +330,6 @@ def plot_g123_gwss_track( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, height: gplt_params.height = 200, - circle_kwargs: Optional[gplt_params.circle_kwargs] = None, show: gplt_params.show = True, x_range: Optional[gplt_params.x_range] = None, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -388,22 +387,15 @@ def plot_g123_gwss_track( output_backend=output_backend, ) - circle_kwargs_mutable = dict(circle_kwargs) if circle_kwargs else {} - circle_kwargs_mutable["size"] = circle_kwargs_mutable.get("size", 3) - circle_kwargs_mutable["line_width"] = circle_kwargs_mutable.get("line_width", 1) - circle_kwargs_mutable["line_color"] = circle_kwargs_mutable.get( - "line_color", "black" - ) - circle_kwargs_mutable["fill_color"] = circle_kwargs_mutable.get( - "fill_color", None - ) - # plot G123 fig.scatter( x=x, y=g123, + size=3, marker="circle", - **circle_kwargs_mutable, + line_width=1, + line_color="black", + fill_color=None, ) # tidy up the plot From 60780e13b358c93736c8a353e5dea0fb38966d14 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Mon, 7 Oct 2024 17:24:36 +0100 Subject: [PATCH 31/33] Forgot 2 lines --- malariagen_data/anoph/g123.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/malariagen_data/anoph/g123.py b/malariagen_data/anoph/g123.py index 66fde99d0..df6cdc492 100644 --- a/malariagen_data/anoph/g123.py +++ b/malariagen_data/anoph/g123.py @@ -433,7 +433,6 @@ def plot_g123_gwss( sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default, width: gplt_params.width = gplt_params.width_default, track_height: gplt_params.track_height = 170, - circle_kwargs: Optional[gplt_params.circle_kwargs] = None, genes_height: gplt_params.genes_height = gplt_params.genes_height_default, show: gplt_params.show = True, output_backend: gplt_params.output_backend = gplt_params.output_backend_default, @@ -456,7 +455,6 @@ def plot_g123_gwss( sizing_mode=sizing_mode, width=width, height=track_height, - circle_kwargs=circle_kwargs, show=False, output_backend=output_backend, inline_array=inline_array, From 05a0fcc84a21253db7cf4086bf04a25f185f5bf3 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Sat, 19 Oct 2024 14:56:21 +0100 Subject: [PATCH 32/33] Merged the _contig versions into the main versions --- malariagen_data/anoph/h12.py | 102 ++------------------------------- malariagen_data/anoph/h1x.py | 106 ++--------------------------------- tests/anoph/test_h12.py | 14 +---- tests/anoph/test_h1x.py | 15 +---- 4 files changed, 10 insertions(+), 227 deletions(-) diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index 5492b6ba8..059354997 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -226,7 +226,7 @@ def plot_h12_calibration( else: return fig - def _h12_gwss_contig( + def _h12_gwss( self, contig, analysis, @@ -279,39 +279,6 @@ def _h12_gwss_contig( return results - def _h12_gwss( - self, - contig, - analysis, - window_size, - sample_sets, - sample_query, - sample_query_options, - cohort_size, - min_cohort_size, - max_cohort_size, - random_seed, - chunks, - inline_array, - ): - results_tmp = self._h12_gwss_contig( - contig=contig, - analysis=analysis, - window_size=window_size, - sample_query=sample_query, - sample_sets=sample_sets, - sample_query_options=sample_query_options, - cohort_size=cohort_size, - min_cohort_size=min_cohort_size, - max_cohort_size=max_cohort_size, - random_seed=random_seed, - chunks=chunks, - inline_array=inline_array, - ) - results = dict(x=results_tmp["x"], h12=results_tmp["h12"]) - - return results - @check_types @doc( summary="Run h12 genome-wide selection scan.", @@ -321,7 +288,7 @@ def _h12_gwss( contigs="An array with the contig for each window. The median is chosen for windows overlapping a change of contig.", ), ) - def h12_gwss_contig( + def h12_gwss( self, contig: base_params.contig, window_size: h12_params.window_size, @@ -364,9 +331,7 @@ def h12_gwss_contig( results = self.results_cache_get(name=name, params=params) except CacheMiss: - results = self._h12_gwss_contig( - chunks=chunks, inline_array=inline_array, **params - ) + results = self._h12_gwss(chunks=chunks, inline_array=inline_array, **params) self.results_cache_set(name=name, params=params, results=results) x = results["x"] @@ -375,65 +340,6 @@ def h12_gwss_contig( return x, h12, contigs - @check_types - @doc( - summary="Run h12 genome-wide selection scan.", - returns=dict( - x="An array containing the window centre point genomic positions.", - h12="An array with h12 statistic values for each window.", - ), - ) - def h12_gwss( - self, - contig: base_params.contig, - window_size: h12_params.window_size, - analysis: hap_params.analysis = base_params.DEFAULT, - sample_query: Optional[base_params.sample_query] = None, - sample_query_options: Optional[base_params.sample_query_options] = None, - sample_sets: Optional[base_params.sample_sets] = None, - cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default, - min_cohort_size: Optional[ - base_params.min_cohort_size - ] = h12_params.min_cohort_size_default, - max_cohort_size: Optional[ - base_params.max_cohort_size - ] = h12_params.max_cohort_size_default, - random_seed: base_params.random_seed = 42, - chunks: base_params.chunks = base_params.native_chunks, - inline_array: base_params.inline_array = base_params.inline_array_default, - ) -> Tuple[np.ndarray, np.ndarray]: - # Change this name if you ever change the behaviour of this function, to - # invalidate any previously cached data. - name = "h12_gwss_v1" - - params = dict( - contig=contig, - analysis=self._prep_phasing_analysis_param(analysis=analysis), - window_size=window_size, - sample_sets=self._prep_sample_sets_param(sample_sets=sample_sets), - # N.B., do not be tempted to convert this sample query into integer - # indices using _prep_sample_selection_params, because the indices - # are different in the haplotype data. - sample_query=sample_query, - sample_query_options=sample_query_options, - cohort_size=cohort_size, - min_cohort_size=min_cohort_size, - max_cohort_size=max_cohort_size, - random_seed=random_seed, - ) - - try: - results = self.results_cache_get(name=name, params=params) - - except CacheMiss: - results = self._h12_gwss(chunks=chunks, inline_array=inline_array, **params) - self.results_cache_set(name=name, params=params, results=results) - - x = results["x"] - h12 = results["h12"] - - return x, h12 - @check_types @doc( summary="Plot h12 GWSS data.", @@ -466,7 +372,7 @@ def plot_h12_gwss_track( inline_array: base_params.inline_array = base_params.inline_array_default, ) -> gplt_params.figure: # Compute H12. - x, h12, contigs = self.h12_gwss_contig( + x, h12, contigs = self.h12_gwss( contig=contig, analysis=analysis, window_size=window_size, diff --git a/malariagen_data/anoph/h1x.py b/malariagen_data/anoph/h1x.py index 1fcbd6b0a..8ee29d373 100644 --- a/malariagen_data/anoph/h1x.py +++ b/malariagen_data/anoph/h1x.py @@ -24,7 +24,7 @@ def __init__( # to the superclass constructor. super().__init__(**kwargs) - def _h1x_gwss_contig( + def _h1x_gwss( self, contig, analysis, @@ -93,40 +93,6 @@ def _h1x_gwss_contig( return results - def _h1x_gwss( - self, - contig, - analysis, - window_size, - sample_sets, - cohort1_query, - cohort2_query, - cohort_size, - min_cohort_size, - max_cohort_size, - random_seed, - chunks, - inline_array, - ): - results_tmp = self._h1x_gwss_contig( - contig=contig, - analysis=analysis, - window_size=window_size, - cohort1_query=cohort1_query, - cohort2_query=cohort2_query, - sample_sets=sample_sets, - cohort_size=cohort_size, - min_cohort_size=min_cohort_size, - max_cohort_size=max_cohort_size, - random_seed=random_seed, - chunks=chunks, - inline_array=inline_array, - ) - - results = dict(x=results_tmp["x"], h1x=results_tmp["h1x"]) - - return results - @check_types @doc( summary=""" @@ -139,7 +105,7 @@ def _h1x_gwss( contigs="An array with the contig for each window. The median is chosen for windows overlapping a change of contig.", ), ) - def h1x_gwss_contig( + def h1x_gwss( self, contig: base_params.contig, window_size: h12_params.window_size, @@ -182,9 +148,7 @@ def h1x_gwss_contig( results = self.results_cache_get(name=name, params=params) except CacheMiss: - results = self._h1x_gwss_contig( - chunks=chunks, inline_array=inline_array, **params - ) + results = self._h1x_gwss(chunks=chunks, inline_array=inline_array, **params) self.results_cache_set(name=name, params=params, results=results) x = results["x"] @@ -193,68 +157,6 @@ def h1x_gwss_contig( return x, h1x, contigs - @check_types - @doc( - summary=""" - Run a H1X genome-wide scan to detect genome regions with - shared selective sweeps between two cohorts. - """, - returns=dict( - x="An array containing the window centre point genomic positions.", - h1x="An array with H1X statistic values for each window.", - ), - ) - def h1x_gwss( - self, - contig: base_params.contig, - window_size: h12_params.window_size, - cohort1_query: base_params.sample_query, - cohort2_query: base_params.sample_query, - analysis: hap_params.analysis = base_params.DEFAULT, - sample_sets: Optional[base_params.sample_sets] = None, - cohort_size: Optional[base_params.cohort_size] = h12_params.cohort_size_default, - min_cohort_size: Optional[ - base_params.min_cohort_size - ] = h12_params.min_cohort_size_default, - max_cohort_size: Optional[ - base_params.max_cohort_size - ] = h12_params.max_cohort_size_default, - random_seed: base_params.random_seed = 42, - chunks: base_params.chunks = base_params.native_chunks, - inline_array: base_params.inline_array = base_params.inline_array_default, - ) -> Tuple[np.ndarray, np.ndarray]: - # Change this name if you ever change the behaviour of this function, to - # invalidate any previously cached data. - name = "h1x_gwss_v1" - - params = dict( - contig=contig, - analysis=self._prep_phasing_analysis_param(analysis=analysis), - window_size=window_size, - # N.B., do not be tempted to convert these sample queries into integer - # indices using _prep_sample_selection_params, because the indices - # are different in the haplotype data. - cohort1_query=cohort1_query, - cohort2_query=cohort2_query, - sample_sets=self._prep_sample_sets_param(sample_sets=sample_sets), - cohort_size=cohort_size, - min_cohort_size=min_cohort_size, - max_cohort_size=max_cohort_size, - random_seed=random_seed, - ) - - try: - results = self.results_cache_get(name=name, params=params) - - except CacheMiss: - results = self._h1x_gwss(chunks=chunks, inline_array=inline_array, **params) - self.results_cache_set(name=name, params=params, results=results) - - x = results["x"] - h1x = results["h1x"] - - return x, h1x - @check_types @doc( summary=""" @@ -290,7 +192,7 @@ def plot_h1x_gwss_track( inline_array: base_params.inline_array = base_params.inline_array_default, ) -> gplt_params.figure: # Compute H1X. - x, h1x, contigs = self.h1x_gwss_contig( + x, h1x, contigs = self.h1x_gwss( contig=contig, analysis=analysis, window_size=window_size, diff --git a/tests/anoph/test_h12.py b/tests/anoph/test_h12.py index aa0b906a0..4a8e0db12 100644 --- a/tests/anoph/test_h12.py +++ b/tests/anoph/test_h12.py @@ -129,20 +129,8 @@ def test_h12_calibration(fixture, api: AnophelesH12Analysis): def check_h12_gwss(*, api, h12_params): # Run main gwss function under test. - x, h12 = api.h12_gwss(**h12_params) - # Check results. - assert isinstance(x, np.ndarray) - assert isinstance(h12, np.ndarray) - assert x.ndim == 1 - assert h12.ndim == 1 - assert x.shape == h12.shape - assert x.dtype.kind == "f" - assert h12.dtype.kind == "f" - assert np.all(h12 >= 0) - assert np.all(h12 <= 1) - - x, h12, contigs = api.h12_gwss_contig(**h12_params) + x, h12, contigs = api.h12_gwss(**h12_params) # Check results. assert isinstance(x, np.ndarray) diff --git a/tests/anoph/test_h1x.py b/tests/anoph/test_h1x.py index 6cbb0df9e..b7b5362d6 100644 --- a/tests/anoph/test_h1x.py +++ b/tests/anoph/test_h1x.py @@ -110,20 +110,7 @@ def test_haplotype_joint_frequencies(): def check_h1x_gwss(*, api, h1x_params): # Run main gwss function under test. - x, h1x = api.h1x_gwss(**h1x_params) - - # Check results. - assert isinstance(x, np.ndarray) - assert isinstance(h1x, np.ndarray) - assert x.ndim == 1 - assert h1x.ndim == 1 - assert x.shape == h1x.shape - assert x.dtype.kind == "f" - assert h1x.dtype.kind == "f" - assert np.all(h1x >= 0) - assert np.all(h1x <= 1) - - x, h1x, contigs = api.h1x_gwss_contig(**h1x_params) + x, h1x, contigs = api.h1x_gwss(**h1x_params) # Check results. assert isinstance(x, np.ndarray) From b25acb2a482b920ec3877aa60769039e10acc483 Mon Sep 17 00:00:00 2001 From: jonbrenas <51911846+jonbrenas@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:48:04 +0000 Subject: [PATCH 33/33] Added some examples in the notebook. --- malariagen_data/anoph/gplt_params.py | 4 +- malariagen_data/anoph/h12.py | 4 +- notebooks/plot_h12_h1x.ipynb | 67 +++++++++++++++++++++++----- 3 files changed, 61 insertions(+), 14 deletions(-) diff --git a/malariagen_data/anoph/gplt_params.py b/malariagen_data/anoph/gplt_params.py index 903a60267..838ff286c 100644 --- a/malariagen_data/anoph/gplt_params.py +++ b/malariagen_data/anoph/gplt_params.py @@ -1,7 +1,7 @@ """Parameters for genome plotting functions. N.B., genome plots are always plotted with bokeh.""" -from typing import Literal, Mapping, Optional, Union, Final +from typing import Literal, Mapping, Optional, Union, Final, Sequence import bokeh.models from typing_extensions import Annotated, TypeAlias @@ -118,3 +118,5 @@ ] contig_colors_default: Final[contig_colors] = list(bokeh.palettes.d3["Category20b"][5]) + +colors: TypeAlias = Annotated[Sequence[str], "List of colors."] diff --git a/malariagen_data/anoph/h12.py b/malariagen_data/anoph/h12.py index b18c75703..8f1ccf730 100644 --- a/malariagen_data/anoph/h12.py +++ b/malariagen_data/anoph/h12.py @@ -587,7 +587,7 @@ def plot_h12_gwss_multi_overlay_track( ) # Determine X axis range. - x, _ = res[list(cohort_queries.keys())[0]] + x, _, _ = res[list(cohort_queries.keys())[0]] x_min = x[0] x_max = x[-1] if x_range is None: @@ -622,7 +622,7 @@ def plot_h12_gwss_multi_overlay_track( ) # Plot H12. - for i, (cohort_label, (x, h12)) in enumerate(res.items()): + for i, (cohort_label, (x, h12, contig)) in enumerate(res.items()): fig.scatter( x=x, y=h12, diff --git a/notebooks/plot_h12_h1x.ipynb b/notebooks/plot_h12_h1x.ipynb index 3586907de..33e00f53a 100644 --- a/notebooks/plot_h12_h1x.ipynb +++ b/notebooks/plot_h12_h1x.ipynb @@ -75,7 +75,8 @@ "coh2 = \"ML-2_Kati_gamb_2014\"\n", "coh1_query = f\"cohort_admin2_year == '{coh1}'\"\n", "coh2_query = f\"cohort_admin2_year == '{coh2}'\"\n", - "contig = \"2L\"" + "contig = \"2L\"\n", + "contigs = \"2RL\"" ] }, { @@ -114,6 +115,23 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "4470d24c-8cf1-4d22-b774-0121b4560e27", + "metadata": {}, + "outputs": [], + "source": [ + "ag3.plot_h12_gwss(\n", + " contig=contigs,\n", + " analysis=\"gamb_colu\",\n", + " window_size=2000,\n", + " sample_query=coh1_query,\n", + " sample_sets=\"3.0\",\n", + " cohort_size=20,\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -173,6 +191,25 @@ ")" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4b7a8d2-95d0-48dc-a32f-3bc96aacfb9f", + "metadata": {}, + "outputs": [], + "source": [ + "ag3.plot_h1x_gwss(\n", + " contig=contigs,\n", + " window_size=2000,\n", + " cohort1_query=coh1_query,\n", + " cohort2_query=coh2_query,\n", + " sample_sets=\"3.0\",\n", + " analysis=\"gamb_colu\",\n", + " cohort_size=20,\n", + " contig_colors=[\"red\", \"green\"]\n", + ")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -261,6 +298,22 @@ "contig = \"2RL\"" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "1aaa0573-723c-43b1-baea-750172c4dabc", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ffc7dc06-6bdb-42d2-a1fb-878612d10dd1", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -364,14 +417,6 @@ " cohort_size=20,\n", ")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67e3bfcc", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -382,7 +427,7 @@ "uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m125" }, "kernelspec": { - "display_name": "malariagen-data-python", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -396,7 +441,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.15" + "version": "3.10.11" }, "vscode": { "interpreter": {