Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Colors in GWSS plots #591

Merged
merged 44 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from 43 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
699c1ab
Used the same kwargs method as in the CNV function
jonbrenas Aug 29, 2024
bfabef4
Merge branch 'master' into 583-GWSS-colors
leehart Sep 16, 2024
2e8b4c4
Merge branch 'master' into 583-GWSS-colors
leehart Sep 19, 2024
8db7a1c
Merge branch 'master' into 583-GWSS-colors
leehart Sep 19, 2024
e7e6b71
Color depending on contig. Still imperfect.
jonbrenas Sep 30, 2024
787045a
Corrected wrong type.
jonbrenas Sep 30, 2024
2ae87be
Corrected the doc.
jonbrenas Sep 30, 2024
80ae2a4
Merge branch 'master' into 583-GWSS-colors
jonbrenas Sep 30, 2024
c3834bd
Something weird changed the test while they were being performed.
jonbrenas Sep 30, 2024
df0a0c5
Still a lot of weird stuff.
jonbrenas Sep 30, 2024
52acfef
Still more weird stuff.
jonbrenas Sep 30, 2024
212b164
Even more weird stuff.
jonbrenas Sep 30, 2024
bda3efe
The solution was obvious.
jonbrenas Sep 30, 2024
c63b572
Time for a detour.
jonbrenas Oct 1, 2024
7fd9c4b
Uniformised the basic and user-provided option, somewhat.
jonbrenas Oct 1, 2024
5a79a11
A bit of clean-up.
jonbrenas Oct 1, 2024
87e27f2
Corrected a wrong type.
jonbrenas Oct 1, 2024
948f86b
Added the wrong params file.
jonbrenas Oct 1, 2024
e49755f
Forgot to forward a change.
jonbrenas Oct 1, 2024
9281b50
Merge branch 'master' into 583-GWSS-colors
jonbrenas Oct 1, 2024
01ad5e3
Updated the tests a bit.
jonbrenas Oct 2, 2024
b2d9b0a
Dealt with h1x.
jonbrenas Oct 2, 2024
20947b3
Gave the users more options.
jonbrenas Oct 3, 2024
5f84b9d
Refining a type to defeat linting.
jonbrenas Oct 3, 2024
90c5982
Same problem, different solution.
jonbrenas Oct 3, 2024
528d3ec
Same problem, not s different solution.
jonbrenas Oct 3, 2024
fdef7eb
Same problem, not a solution.
jonbrenas Oct 3, 2024
f141953
Trying a new idea.
jonbrenas Oct 3, 2024
2e34ff1
Going the opposite way.
jonbrenas Oct 3, 2024
ff54405
More tests.
jonbrenas Oct 3, 2024
f98d601
Simplified everything
jonbrenas Oct 7, 2024
311f587
Changed sequence to list
jonbrenas Oct 7, 2024
986004e
Fighting with len
jonbrenas Oct 7, 2024
609c6a2
Merge branch 'master' into 583-GWSS-colors
jonbrenas Oct 7, 2024
bceb950
Corrected the merge
jonbrenas Oct 7, 2024
48fb21e
Returned g123 to its normal state
jonbrenas Oct 7, 2024
60780e1
Forgot 2 lines
jonbrenas Oct 7, 2024
e5da8a3
Merge branch 'master' into 583-GWSS-colors
jonbrenas Oct 15, 2024
d8e7e97
Merge branch 'master' into 583-GWSS-colors
jonbrenas Oct 19, 2024
05a0fcc
Merged the _contig versions into the main versions
jonbrenas Oct 19, 2024
7b08e61
Merge branch 'master' into 583-GWSS-colors
jonbrenas Nov 5, 2024
b25acb2
Added some examples in the notebook.
jonbrenas Nov 5, 2024
af6bac1
Merge branch 'master' into 583-GWSS-colors
jonbrenas Nov 5, 2024
5a9c28a
Merge branch 'master' into 583-GWSS-colors
alimanfoo Nov 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion malariagen_data/anoph/gplt_params.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Parameters for genome plotting functions. N.B., genome plots are always
plotted with bokeh."""

from typing import Literal, Mapping, Optional, Union, Sequence
from typing import Literal, Mapping, Optional, Union, Final, Sequence

import bokeh.models
from typing_extensions import Annotated, TypeAlias
Expand Down Expand Up @@ -112,4 +112,11 @@
"Passed through to bokeh line() function.",
]

contig_colors: TypeAlias = Annotated[
list[str],
"A sequence of colors.",
]

contig_colors_default: Final[contig_colors] = list(bokeh.palettes.d3["Category20b"][5])

colors: TypeAlias = Annotated[Sequence[str], "List of colors."]
44 changes: 28 additions & 16 deletions malariagen_data/anoph/h12.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,16 @@ def _h12_gwss(
# Compute window midpoints.
pos = ds_haps["variant_position"].values
x = allel.moving_statistic(pos, statistic=np.mean, size=window_size)
contigs = np.asarray(
allel.moving_statistic(
ds_haps["variant_contig"].values,
statistic=np.median,
size=window_size,
),
dtype=int,
)

results = dict(x=x, h12=h12)
results = dict(x=x, h12=h12, contigs=contigs)

return results

Expand All @@ -277,6 +285,7 @@ def _h12_gwss(
returns=dict(
x="An array containing the window centre point genomic positions.",
h12="An array with h12 statistic values for each window.",
contigs="An array with the contig for each window. The median is chosen for windows overlapping a change of contig.",
),
)
def h12_gwss(
Expand All @@ -297,10 +306,10 @@ def h12_gwss(
random_seed: base_params.random_seed = 42,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
) -> Tuple[np.ndarray, np.ndarray]:
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
# Change this name if you ever change the behaviour of this function, to
# invalidate any previously cached data.
name = "h12_gwss_v1"
name = "h12_gwss_contig_v1"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
name = "h12_gwss_contig_v1"
name = "h12_gwss_v2"

Nit, convention is to use the function name then add a version number.


params = dict(
contig=contig,
Expand All @@ -327,8 +336,9 @@ def h12_gwss(

x = results["x"]
h12 = results["h12"]
contigs = results["contigs"]

return x, h12
return x, h12, contigs

@check_types
@doc(
Expand All @@ -354,14 +364,15 @@ def plot_h12_gwss_track(
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
width: gplt_params.width = gplt_params.width_default,
height: gplt_params.height = 200,
contig_colors: gplt_params.contig_colors = gplt_params.contig_colors_default,
show: gplt_params.show = True,
x_range: Optional[gplt_params.x_range] = None,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
) -> gplt_params.figure:
# Compute H12.
x, h12 = self.h12_gwss(
x, h12, contigs = self.h12_gwss(
contig=contig,
analysis=analysis,
window_size=window_size,
Expand Down Expand Up @@ -412,15 +423,14 @@ def plot_h12_gwss_track(
)

# Plot H12.
fig.scatter(
x=x,
y=h12,
marker="circle",
size=3,
line_width=1,
line_color="black",
fill_color=None,
)
for s in set(contigs):
idxs = contigs == s
fig.scatter(
x=x[idxs],
y=h12[idxs],
marker="circle",
color=contig_colors[s % len(contig_colors)],
)

# Tidy up the plot.
fig.yaxis.axis_label = "H12"
Expand Down Expand Up @@ -457,6 +467,7 @@ def plot_h12_gwss(
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
width: gplt_params.width = gplt_params.width_default,
track_height: gplt_params.track_height = 170,
contig_colors: gplt_params.contig_colors = gplt_params.contig_colors_default,
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
Expand All @@ -479,6 +490,7 @@ def plot_h12_gwss(
sizing_mode=sizing_mode,
width=width,
height=track_height,
contig_colors=contig_colors,
show=False,
output_backend=output_backend,
chunks=chunks,
Expand Down Expand Up @@ -575,7 +587,7 @@ def plot_h12_gwss_multi_overlay_track(
)

# Determine X axis range.
x, _ = res[list(cohort_queries.keys())[0]]
x, _, _ = res[list(cohort_queries.keys())[0]]
x_min = x[0]
x_max = x[-1]
if x_range is None:
Expand Down Expand Up @@ -610,7 +622,7 @@ def plot_h12_gwss_multi_overlay_track(
)

# Plot H12.
for i, (cohort_label, (x, h12)) in enumerate(res.items()):
for i, (cohort_label, (x, h12, contig)) in enumerate(res.items()):
fig.scatter(
x=x,
y=h12,
Expand Down
40 changes: 26 additions & 14 deletions malariagen_data/anoph/h1x.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,16 @@ def _h1x_gwss(
# Compute window midpoints.
pos = ds1["variant_position"].values
x = allel.moving_statistic(pos, statistic=np.mean, size=window_size)
contigs = np.asarray(
allel.moving_statistic(
ds1["variant_contig"].values,
statistic=np.median,
size=window_size,
),
dtype=int,
)

results = dict(x=x, h1x=h1x)
results = dict(x=x, h1x=h1x, contigs=contigs)

return results

Expand All @@ -98,6 +106,7 @@ def _h1x_gwss(
returns=dict(
x="An array containing the window centre point genomic positions.",
h1x="An array with H1X statistic values for each window.",
contigs="An array with the contig for each window. The median is chosen for windows overlapping a change of contig.",
),
)
def h1x_gwss(
Expand All @@ -119,10 +128,10 @@ def h1x_gwss(
random_seed: base_params.random_seed = 42,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
) -> Tuple[np.ndarray, np.ndarray]:
) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
# Change this name if you ever change the behaviour of this function, to
# invalidate any previously cached data.
name = "h1x_gwss_v1"
name = "h1x_gwss_contig_v1"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
name = "h1x_gwss_contig_v1"
name = "h1x_gwss_v2"


params = dict(
contig=contig,
Expand Down Expand Up @@ -150,8 +159,9 @@ def h1x_gwss(

x = results["x"]
h1x = results["h1x"]
contigs = results["contigs"]

return x, h1x
return x, h1x, contigs

@check_types
@doc(
Expand Down Expand Up @@ -181,14 +191,15 @@ def plot_h1x_gwss_track(
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
width: gplt_params.width = gplt_params.width_default,
height: gplt_params.height = 200,
contig_colors: gplt_params.contig_colors = gplt_params.contig_colors_default,
show: gplt_params.show = True,
x_range: Optional[gplt_params.x_range] = None,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
) -> gplt_params.figure:
# Compute H1X.
x, h1x = self.h1x_gwss(
x, h1x, contigs = self.h1x_gwss(
contig=contig,
analysis=analysis,
window_size=window_size,
Expand Down Expand Up @@ -240,15 +251,14 @@ def plot_h1x_gwss_track(
)

# Plot H1X.
fig.scatter(
x=x,
y=h1x,
marker="circle",
size=3,
line_width=1,
line_color="black",
fill_color=None,
)
for s in set(contigs):
idxs = contigs == s
fig.scatter(
x=x[idxs],
y=h1x[idxs],
marker="circle",
color=contig_colors[s % len(contig_colors)],
)

# Tidy up the plot.
fig.yaxis.axis_label = "H1X"
Expand Down Expand Up @@ -289,6 +299,7 @@ def plot_h1x_gwss(
sizing_mode: gplt_params.sizing_mode = gplt_params.sizing_mode_default,
width: gplt_params.width = gplt_params.width_default,
track_height: gplt_params.track_height = 190,
contig_colors: gplt_params.contig_colors = gplt_params.contig_colors_default,
genes_height: gplt_params.genes_height = gplt_params.genes_height_default,
show: gplt_params.show = True,
output_backend: gplt_params.output_backend = gplt_params.output_backend_default,
Expand All @@ -312,6 +323,7 @@ def plot_h1x_gwss(
sizing_mode=sizing_mode,
width=width,
height=track_height,
contig_colors=contig_colors,
show=False,
output_backend=output_backend,
chunks=chunks,
Expand Down
67 changes: 56 additions & 11 deletions notebooks/plot_h12_h1x.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@
"coh2 = \"ML-2_Kati_gamb_2014\"\n",
"coh1_query = f\"cohort_admin2_year == '{coh1}'\"\n",
"coh2_query = f\"cohort_admin2_year == '{coh2}'\"\n",
"contig = \"2L\""
"contig = \"2L\"\n",
"contigs = \"2RL\""
]
},
{
Expand Down Expand Up @@ -114,6 +115,23 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4470d24c-8cf1-4d22-b774-0121b4560e27",
"metadata": {},
"outputs": [],
"source": [
"ag3.plot_h12_gwss(\n",
" contig=contigs,\n",
" analysis=\"gamb_colu\",\n",
" window_size=2000,\n",
" sample_query=coh1_query,\n",
" sample_sets=\"3.0\",\n",
" cohort_size=20,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -173,6 +191,25 @@
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b4b7a8d2-95d0-48dc-a32f-3bc96aacfb9f",
"metadata": {},
"outputs": [],
"source": [
"ag3.plot_h1x_gwss(\n",
" contig=contigs,\n",
" window_size=2000,\n",
" cohort1_query=coh1_query,\n",
" cohort2_query=coh2_query,\n",
" sample_sets=\"3.0\",\n",
" analysis=\"gamb_colu\",\n",
" cohort_size=20,\n",
" contig_colors=[\"red\", \"green\"]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -261,6 +298,22 @@
"contig = \"2RL\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1aaa0573-723c-43b1-baea-750172c4dabc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ffc7dc06-6bdb-42d2-a1fb-878612d10dd1",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -364,14 +417,6 @@
" cohort_size=20,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "67e3bfcc",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand All @@ -382,7 +427,7 @@
"uri": "us-docker.pkg.dev/deeplearning-platform-release/gcr.io/workbench-notebooks:m125"
},
"kernelspec": {
"display_name": "malariagen-data-python",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -396,7 +441,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.15"
"version": "3.10.11"
},
"vscode": {
"interpreter": {
Expand Down
9 changes: 7 additions & 2 deletions tests/anoph/test_h12.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,23 +129,28 @@ def test_h12_calibration(fixture, api: AnophelesH12Analysis):

def check_h12_gwss(*, api, h12_params):
# Run main gwss function under test.
x, h12 = api.h12_gwss(**h12_params)

x, h12, contigs = api.h12_gwss(**h12_params)

# Check results.
assert isinstance(x, np.ndarray)
assert isinstance(h12, np.ndarray)
assert isinstance(contigs, np.ndarray)
assert x.ndim == 1
assert h12.ndim == 1
assert contigs.ndim == 1
assert x.shape == h12.shape
assert x.shape == contigs.shape
assert x.dtype.kind == "f"
assert h12.dtype.kind == "f"
assert contigs.dtype.kind == "i"
assert np.all(h12 >= 0)
assert np.all(h12 <= 1)

# Check plotting functions.
fig = api.plot_h12_gwss_track(**h12_params, show=False)
assert isinstance(fig, bokeh.models.Plot)
fig = api.plot_h12_gwss(**h12_params, show=False)
fig = api.plot_h12_gwss(**h12_params, contig_colors=["black", "red"], show=False)
assert isinstance(fig, bokeh.models.GridPlot)


Expand Down
Loading
Loading