Skip to content

Commit

Permalink
AnaPla: mallard
Browse files Browse the repository at this point in the history
  • Loading branch information
petrelharp committed Apr 9, 2021
1 parent 0f6824e commit b0cff18
Show file tree
Hide file tree
Showing 4 changed files with 380 additions and 0 deletions.
4 changes: 4 additions & 0 deletions stdpopsim/catalog/AnaPla/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""
Catalog definitions for AnaPla (Ensembl ID='anas_platyrhynchos')
"""
from . import species # noqa: F401
48 changes: 48 additions & 0 deletions stdpopsim/catalog/AnaPla/genome_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# File autogenerated from Ensembl REST API. Do not edit.
data = {
"assembly_accession": "GCA_008746955.1",
"assembly_name": "ASM874695v1",
"chromosomes": {
"1": {"length": 208326429, "synonyms": []},
"2": {"length": 162939446, "synonyms": []},
"3": {"length": 119723720, "synonyms": []},
"4": {"length": 77626585, "synonyms": []},
"5": {"length": 64988622, "synonyms": []},
"6": {"length": 39543408, "synonyms": []},
"7": {"length": 37812880, "synonyms": []},
"8": {"length": 33348632, "synonyms": []},
"9": {"length": 26742597, "synonyms": []},
"10": {"length": 22933227, "synonyms": []},
"11": {"length": 22193879, "synonyms": []},
"12": {"length": 22338721, "synonyms": []},
"13": {"length": 21714986, "synonyms": []},
"14": {"length": 20320564, "synonyms": []},
"15": {"length": 18227546, "synonyms": []},
"16": {"length": 16053328, "synonyms": []},
"17": {"length": 15319648, "synonyms": []},
"18": {"length": 13333155, "synonyms": []},
"19": {"length": 12198306, "synonyms": []},
"20": {"length": 12091001, "synonyms": []},
"21": {"length": 8553409, "synonyms": []},
"22": {"length": 16160689, "synonyms": []},
"23": {"length": 7977799, "synonyms": []},
"24": {"length": 7737077, "synonyms": []},
"25": {"length": 7574731, "synonyms": []},
"26": {"length": 6918023, "synonyms": []},
"27": {"length": 6270716, "synonyms": []},
"28": {"length": 5960150, "synonyms": []},
"29": {"length": 1456683, "synonyms": []},
"30": {"length": 1872559, "synonyms": []},
"Z": {"length": 81233375, "synonyms": []},
"31": {"length": 2637124, "synonyms": []},
"32": {"length": 3473573, "synonyms": []},
"33": {"length": 2151773, "synonyms": []},
"34": {"length": 7214884, "synonyms": []},
"35": {"length": 5548691, "synonyms": []},
"36": {"length": 3997205, "synonyms": []},
"37": {"length": 3148754, "synonyms": []},
"38": {"length": 2836164, "synonyms": []},
"39": {"length": 2018729, "synonyms": []},
"40": {"length": 1354177, "synonyms": []},
},
}
193 changes: 193 additions & 0 deletions stdpopsim/catalog/AnaPla/species.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
import stdpopsim

from . import genome_data

_HuangEtAl2006 = stdpopsim.Citation(
author="Huang et al.",
year=2006,
doi="https://dx.doi.org/10.1534%2Fgenetics.105.053256",
reasons={stdpopsim.CiteReason.REC_RATE},
)

_LavretskyEtAl2019 = stdpopsim.Citation(
author="Lavretsky et al.",
year=2019,
doi="https://doi.org/10.1111/mec.15343",
reasons={stdpopsim.CiteReason.GEN_TIME, stdpopsim.CiteReason.MUT_RATE},
)

_GuoEtAl2020 = stdpopsim.Citation(
author="Guo et al.",
year=2020,
doi="https://doi.org/10.24272/j.issn.2095-8137.2020.133",
reasons={stdpopsim.CiteReason.POP_SIZE},
)


# def _print_recomb_rates():
# """
# Code to produce the following rates, which follows Lavretsky et al 2019
# in taking the total map lengths from chromosomes 1-7 from a microsat study
# (Huang et al 2006) to give average rates. This assumes the total lengths for
# chromsomes 1-7 is as in Huang et al, and applies the average per-bp rate to
# the rest of the chromosomes.
# """
# from genome_data import data as mallard
#
# chroms = mallard["chromosomes"]
# # from Huang et al 2006
# huang_chroms = {
# "1": {"M": 3.17},
# "2": {"M": 2.26},
# "3": {"M": 1.12},
# "4": {"M": 0.93},
# "5": {"M": 0.79},
# "6": {"M": 1.20},
# "7": {"M": 0.98},
# }
# for c in huang_chroms:
# h = huang_chroms[c]
# h["bp"] = mallard["chromosomes"][c]["length"]
# h["rate"] = h["M"] / h["bp"]
# total_M = sum([x["M"] for x in huang_chroms.values()])
# total_bp = sum([x["bp"] for x in huang_chroms.values()])
# mean_rate = total_M / total_bp
# print(f"_default_recombination_rate = {mean_rate}")
# print("_recombination_rate = {")
# for c in chroms:
# if c in huang_chroms:
# print(f"\"{c}\": {huang_chroms[c]['rate']:.2e},")
# else:
# print(f'"{c}": _default_recombination_rate,')
# print("}")


_default_recombination_rate = 1.47e-08


_recombination_rate = {
"1": 1.52e-08,
"2": 1.39e-08,
"3": 9.35e-09,
"4": 1.20e-08,
"5": 1.22e-08,
"6": 3.03e-08,
"7": 2.59e-08,
"8": _default_recombination_rate,
"9": _default_recombination_rate,
"10": _default_recombination_rate,
"11": _default_recombination_rate,
"12": _default_recombination_rate,
"13": _default_recombination_rate,
"14": _default_recombination_rate,
"15": _default_recombination_rate,
"16": _default_recombination_rate,
"17": _default_recombination_rate,
"18": _default_recombination_rate,
"19": _default_recombination_rate,
"20": _default_recombination_rate,
"21": _default_recombination_rate,
"22": _default_recombination_rate,
"23": _default_recombination_rate,
"24": _default_recombination_rate,
"25": _default_recombination_rate,
"26": _default_recombination_rate,
"27": _default_recombination_rate,
"28": _default_recombination_rate,
"29": _default_recombination_rate,
"30": _default_recombination_rate,
"Z": _default_recombination_rate,
"31": _default_recombination_rate,
"32": _default_recombination_rate,
"33": _default_recombination_rate,
"34": _default_recombination_rate,
"35": _default_recombination_rate,
"36": _default_recombination_rate,
"37": _default_recombination_rate,
"38": _default_recombination_rate,
"39": _default_recombination_rate,
"40": _default_recombination_rate,
}


# value used in Lavretsky et al 2019, as obtained for nuclear genes in other
# ducks by Peters, Zhuravlev, Fefelov, Humphries, & Omland, 2008
_overall_mutation_rate = 4.83e-9 # per generation

_mutation_rate = {
"1": _overall_mutation_rate,
"2": _overall_mutation_rate,
"3": _overall_mutation_rate,
"4": _overall_mutation_rate,
"5": _overall_mutation_rate,
"6": _overall_mutation_rate,
"7": _overall_mutation_rate,
"8": _overall_mutation_rate,
"9": _overall_mutation_rate,
"10": _overall_mutation_rate,
"11": _overall_mutation_rate,
"12": _overall_mutation_rate,
"13": _overall_mutation_rate,
"14": _overall_mutation_rate,
"15": _overall_mutation_rate,
"16": _overall_mutation_rate,
"17": _overall_mutation_rate,
"18": _overall_mutation_rate,
"19": _overall_mutation_rate,
"20": _overall_mutation_rate,
"21": _overall_mutation_rate,
"22": _overall_mutation_rate,
"23": _overall_mutation_rate,
"24": _overall_mutation_rate,
"25": _overall_mutation_rate,
"26": _overall_mutation_rate,
"27": _overall_mutation_rate,
"28": _overall_mutation_rate,
"29": _overall_mutation_rate,
"30": _overall_mutation_rate,
"Z": _overall_mutation_rate,
"31": _overall_mutation_rate,
"32": _overall_mutation_rate,
"33": _overall_mutation_rate,
"34": _overall_mutation_rate,
"35": _overall_mutation_rate,
"36": _overall_mutation_rate,
"37": _overall_mutation_rate,
"38": _overall_mutation_rate,
"39": _overall_mutation_rate,
"40": _overall_mutation_rate,
}

_genome = stdpopsim.Genome.from_data(
genome_data.data,
recombination_rate=_recombination_rate,
mutation_rate=_mutation_rate,
citations=[
_LavretskyEtAl2019,
_HuangEtAl2006,
],
)

_species = stdpopsim.Species(
id="AnaPla",
ensembl_id="anas_platyrhynchos",
name="Anas platyrhynchos",
common_name="Mallard",
# description="The 'mallard' species complex consists of 14 hybridizing and "
# "recently diverged species living around the world, ranging from the holarctic "
# "mallard with >15M individuals today in North America alone to "
# "endangered endemics in Hawaii and New Zealand. The assembly, "
# "recombination rates, and default Ne were estimtaed with wild Chinese "
# "mallards.",
genome=_genome,
generation_time=4,
# choosing Ne based on theta = 4 Ne u from Guo et al 2020
# theta = 0.003 (Figure 1), u as above (the paper uses a rate from chicken)
population_size=156000,
citations=[
_LavretskyEtAl2019,
_GuoEtAl2020,
],
)

stdpopsim.register_species(_species)
135 changes: 135 additions & 0 deletions tests/test_AnaPla.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import pytest

import stdpopsim
from tests import test_species


class TestSpeciesData(test_species.SpeciesTestBase):

species = stdpopsim.get_species("AnaPla")

def test_ensembl_id(self):
assert self.species.ensembl_id == "anas_platyrhynchos"

def test_name(self):
assert self.species.name == "Anas platyrhynchos"

def test_common_name(self):
assert self.species.common_name == "Mallard"

# QC Tests. These tests are performed by another contributor
# independently referring to the citations provided in the
# species definition, filling in the appropriate values
# and deleting the pytest "skip" annotations.
@pytest.mark.skip("Population size QC not done yet")
def test_qc_population_size(self):
assert self.species.population_size == -1

@pytest.mark.skip("Generation time QC not done yet")
def test_qc_generation_time(self):
assert self.species.generation_time == -1


class TestGenomeData(test_species.GenomeTestBase):

genome = stdpopsim.get_species("AnaPla").genome

@pytest.mark.skip("Recombination rate QC not done yet")
@pytest.mark.parametrize(
["name", "rate"],
{
"1": -1,
"2": -1,
"3": -1,
"4": -1,
"5": -1,
"6": -1,
"7": -1,
"8": -1,
"9": -1,
"10": -1,
"11": -1,
"12": -1,
"13": -1,
"14": -1,
"15": -1,
"16": -1,
"17": -1,
"18": -1,
"19": -1,
"20": -1,
"21": -1,
"22": -1,
"23": -1,
"24": -1,
"25": -1,
"26": -1,
"27": -1,
"28": -1,
"29": -1,
"30": -1,
"Z": -1,
"31": -1,
"32": -1,
"33": -1,
"34": -1,
"35": -1,
"36": -1,
"37": -1,
"38": -1,
"39": -1,
"40": -1,
}.items(),
)
def test_recombination_rate(self, name, rate):
assert pytest.approx(rate, self.genome.get_chromosome(name).recombination_rate)

@pytest.mark.skip("Mutation rate QC not done yet")
@pytest.mark.parametrize(
["name", "rate"],
{
"1": -1,
"2": -1,
"3": -1,
"4": -1,
"5": -1,
"6": -1,
"7": -1,
"8": -1,
"9": -1,
"10": -1,
"11": -1,
"12": -1,
"13": -1,
"14": -1,
"15": -1,
"16": -1,
"17": -1,
"18": -1,
"19": -1,
"20": -1,
"21": -1,
"22": -1,
"23": -1,
"24": -1,
"25": -1,
"26": -1,
"27": -1,
"28": -1,
"29": -1,
"30": -1,
"Z": -1,
"31": -1,
"32": -1,
"33": -1,
"34": -1,
"35": -1,
"36": -1,
"37": -1,
"38": -1,
"39": -1,
"40": -1,
}.items(),
)
def test_mutation_rate(self, name, rate):
assert pytest.approx(rate, self.genome.get_chromosome(name).mutation_rate)

0 comments on commit b0cff18

Please sign in to comment.