Skip to content

Commit

Permalink
Merge pull request #872 from jradrion/drosec
Browse files Browse the repository at this point in the history
added drosophila sechellia
  • Loading branch information
petrelharp authored Apr 8, 2021
2 parents a15ef75 + 82ae608 commit 0f6824e
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 0 deletions.
4 changes: 4 additions & 0 deletions stdpopsim/catalog/DroSec/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""
Catalog definitions for DroSec (Ensembl ID='drosophila_sechellia')
"""
from . import species # noqa: F401
13 changes: 13 additions & 0 deletions stdpopsim/catalog/DroSec/genome_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# File created manually from https://www.ncbi.nlm.nih.gov/assembly/GCF_004382195.1
data = {
"assembly_accession": "GCA_004382195.1",
"assembly_name": "ASM438219v1",
"chromosomes": {
"2L": {"length": 24956976, "synonyms": []},
"2R": {"length": 21536224, "synonyms": []},
"3L": {"length": 28131630, "synonyms": []},
"3R": {"length": 30464902, "synonyms": []},
"X": {"length": 22909512, "synonyms": []},
"4": {"length": 1277805, "synonyms": []},
},
}
88 changes: 88 additions & 0 deletions stdpopsim/catalog/DroSec/species.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import stdpopsim

from . import genome_data

_ChakrabortyEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1101/gr.263442.120",
year=2021,
author="Chakraborty et al.",
reasons={stdpopsim.CiteReason.ASSEMBLY},
)

_ComeronEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1371/journal.pgen.1002905",
year=2012,
author="Comeron et al.",
reasons={stdpopsim.CiteReason.REC_RATE},
)

_LegrandEtAl = stdpopsim.Citation(
doi="https://doi.org/10.1534/genetics.108.092080",
year=2009,
author="Legrand et al.",
reasons={
stdpopsim.CiteReason.GEN_TIME,
stdpopsim.CiteReason.MUT_RATE,
stdpopsim.CiteReason.POP_SIZE,
},
)

# We used recombination rates estimated in DroMel.
# Mean chromosomal rates, calculated from the Comeron 2012 map.
# Chromosome 4 isn't in this map, so the weighted mean of 2L, 2R, 3L and 3R
# was used instead.
_recombination_rate = {
"2L": 2.4125016027908946e-08,
"2R": 2.2366522822806982e-08,
"3L": 1.7985794693631893e-08,
"3R": 1.7165556232922828e-08,
"X": 2.9151053903465754e-08,
"4": 2.0085234464525437e-08,
}

# Mutation rate is set to that used by
# Legrand et al. in an ABC selection of
# demographic scenarios (page 1200).
# They state this is an estimate taken
# from D. simulans (Wall et al. 2002).
_overall_rate = 1.5e-9
_mutation_rate = {
"2L": _overall_rate,
"2R": _overall_rate,
"3L": _overall_rate,
"3R": _overall_rate,
"X": _overall_rate,
"4": _overall_rate,
}

# We could not auto-pull the genome data from ensemble
# so instead we used the most up-to-date assembly
# currently available from NCBI.
_genome = stdpopsim.Genome.from_data(
genome_data.data,
recombination_rate=_recombination_rate,
mutation_rate=_mutation_rate,
citations=[
_ChakrabortyEtAl,
_ComeronEtAl,
_LegrandEtAl,
],
)


# Generation time was set to that used by
# by Legrand et al. in an ABC selection of demographic
# scenarios (page 1200).
# Population size was estimated in the same paper (page 1202).
_species = stdpopsim.Species(
id="DroSec",
ensembl_id="drosophila_sechellia",
name="Drosophila sechellia",
common_name="Drosophila sechellia",
genome=_genome,
generation_time=0.05,
population_size=100000,
citations=[_LegrandEtAl],
)

stdpopsim.register_species(_species)
45 changes: 45 additions & 0 deletions tests/test_DroSec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pytest

import stdpopsim
from tests import test_species


class TestSpeciesData(test_species.SpeciesTestBase):

species = stdpopsim.get_species("DroSec")

def test_ensembl_id(self):
assert self.species.ensembl_id == "drosophila_sechellia"

def test_name(self):
assert self.species.name == "Drosophila sechellia"

def test_common_name(self):
assert self.species.common_name == "Drosophila sechellia"

# QC Tests. These tests are performed by another contributor
# independently referring to the citations provided in the
# species definition, filling in the appropriate values
# and deleting the pytest "skip" annotations.
@pytest.mark.skip("Population size QC not done yet")
def test_qc_population_size(self):
assert self.species.population_size == -1

@pytest.mark.skip("Generation time QC not done yet")
def test_qc_generation_time(self):
assert self.species.generation_time == -1


class TestGenomeData(test_species.GenomeTestBase):

genome = stdpopsim.get_species("DroSec").genome

@pytest.mark.skip("Recombination rate QC not done yet")
@pytest.mark.parametrize(["name", "rate"], {}.items())
def test_recombination_rate(self, name, rate):
assert pytest.approx(rate, self.genome.get_chromosome(name).recombination_rate)

@pytest.mark.skip("Mutation rate QC not done yet")
@pytest.mark.parametrize(["name", "rate"], {}.items())
def test_mutation_rate(self, name, rate):
assert pytest.approx(rate, self.genome.get_chromosome(name).mutation_rate)

0 comments on commit 0f6824e

Please sign in to comment.