Skip to content

Commit

Permalink
data: add microC chr17
Browse files Browse the repository at this point in the history
  • Loading branch information
js2264 committed Nov 7, 2023
1 parent 1f7e2b2 commit bf76a9e
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 3 deletions.
3 changes: 2 additions & 1 deletion R/globals.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ HiContactsDataFiles <- rbind(
c('yeast_eco1', 'mcool', 'S288C', 'Eco1-AID+IAA', 'multi-res .mcool file', 'EH7704'),
c('yeast_eco1', 'pairs.gz', 'S288C', 'Eco1-AID+IAA', 'only pairs from chrII are provided', 'EH7705'),
c('mESCs', 'mcool', 'mm10', 'mESCs', 'multi-res .mcool file', 'EH7706'),
c('mESCs', 'pairs.gz', 'mm10', 'mESCs', 'only pairs from chr13 are provided', 'EH7707')
c('mESCs', 'pairs.gz', 'mm10', 'mESCs', 'only pairs from chr13 are provided', 'EH7707'),
c('microC', 'mcool', 'GRCh38', 'HFFc6', 'multi-res .mcool file, only chr17 is provided', '')
)
colnames(HiContactsDataFiles) <- c(
'sample', 'format', 'genome', 'condition', 'notes', 'EHID'
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ Several files are avaible using this function, namely:
- From [Bonev et al., Cell 2017](https://doi.org/10.1016/j.cell.2017.09.043):
- mESCs.mcool (`sample`: `mESCs`, `format` = `mcool`)
- mESCs.pairs.gz for chr13 only (`sample`: `mESCs`, `format` = `pairs`)

- From [Krietenstein et al., Mol. Cell 2020](https://doi.org/10.1016/j.molcel.2020.03.003):
- microC_HFFc6_chr17.mcool (`sample`: `microC`, `format` = `mcool`)
1 change: 1 addition & 0 deletions inst/extdata/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
"Eco1-AID yeast .pairs file","Hi-C performed on Eco1-AID mutant yeast strain upon IAA treatment processed with tinyMapper and represented as a filtered pairs file. Data representation derived from SRA run results SRR16250953.",3.16,"S288C","Zip","https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM5918405","Aug 17 2022","Saccharomyces cerevisiae",4932,"FALSE","Jacques Serizay","Jacques Serizay <[email protected]>","character","FilePath","HiContactsData/S288C_Eco1-AID.pairs.gz","HiCData"
"mESC .mcool contact matrix","Hi-C performed on mouse embryonic stem cells (mESCs) processed with tinyMapper and represented as a multi-resolution mcool file. Data representation derived from SRA run results SRR5339749.",3.16,"mm10","HDF5","https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM2533818","Aug 17 2022","Mus musculus",10090,"FALSE","Jacques Serizay","Jacques Serizay <[email protected]>","character","FilePath","HiContactsData/mESCs.mcool","HiCData"
"mESC .pairs file","Hi-C performed on mouse embryonic stem cells (mESCs) processed with tinyMapper and represented as a filtered pairs file. Data representation derived from SRA run results SRR5339749.",3.16,"mm10","TXT","https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM2533818","Aug 17 2022","Mus musculus",10090,"FALSE","Jacques Serizay","Jacques Serizay <[email protected]>","character","FilePath","HiContactsData/mESCs.pairs.gz","HiCData"
"HFFc6 .mcool contact matrix","Hi-C performed on Human foreskin fibroblast cell line (HFFc6). Data obtained from 4DN data portal (ID 4DNFI9FVHJZQ) and filtered to only retain chr17.",3.18,"GRCh38","HDF5","https://data.4dnucleome.org/files-processed/4DNFI9FVHJZQ/","Aug 17 2022","Homo sapiens",9606,"FALSE","Jacques Serizay","Jacques Serizay <[email protected]>","character","FilePath","HiContactsData/microC_HFFc6_chr17.mcool","HiCData"
9 changes: 8 additions & 1 deletion inst/scripts/make-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

# curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR533/009/SRR5339749/SRR5339749_1.fastq.gz -o mESCs_R1.fq.gz
# curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR533/009/SRR5339749/SRR5339749_2.fastq.gz -o mESC_R2.fq.gz
# tinyMapper.sh --mode HiC --sample mESCs --genome ~/genomes/mm10/mm10 --output HiC --threads 16 --resolutions 10000,20000,40000,80000,160000,320000,640000,1280000,2560000
# tinyMapper.sh --mode HiC --sample mESCs --genome ~/genomes/mm10/mm10 --output HiC --threads 16 --resolutions 10000

### ---------- Subset and compress files

Expand Down Expand Up @@ -58,3 +58,10 @@
# --hicpro_maps \
# --max_cpus 18 \
# --max_memory '32.GB'

### ---------- Filter microC mcool

# cooler dump -t chroms /.cache/R/fourDNData/4d434d8538a0_4DNFI9FVHJZQ.mcool::/resolutions/250000 | grep -P 'chr17\t' > inst/extdata/chromsizes
# cooler dump -t pixels -r chr17 --join /.cache/R/fourDNData/4d434d8538a0_4DNFI9FVHJZQ.mcool::/resolutions/5000 > inst/extdata/chr17_5000.bg2
# cooler cload pairs inst/extdata/chromsizes:5000 inst/extdata/chr17_5000.bg2 -c1 1 -p1 2 -c2 4 -p2 5 -0 inst/extdata/chr17.cool
# cooler zoomify --resolutions 5000,100000,250000 --balance inst/extdata/chr17.cool -o inst/extdata/chr17.mcool
17 changes: 17 additions & 0 deletions inst/scripts/make-metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,23 @@ toydata <- list(
DispatchClass = "FilePath",
RDataPath = "HiContactsData/mESCs.pairs.gz",
Tags = "HiCData"
),
list(Title = "HFFc6 .mcool contact matrix",
Description = "Hi-C performed on Human foreskin fibroblast cell line (HFFc6). Data obtained from 4DN data portal (ID 4DNFI9FVHJZQ) and filtered to only retain chr17.",
BiocVersion = "3.16",
Genome = "GRCh38",
SourceType = "HDF5",
SourceUrl = "https://data.4dnucleome.org/files-processed/4DNFI9FVHJZQ/",
SourceVersion = "Aug 17 2022",
Species = "Homo sapiens",
TaxonomyId = "9606",
Coordinate_1_based = "FALSE",
DataProvider = "Jacques Serizay",
Maintainer = "Jacques Serizay <[email protected]>",
RDataClass = "character",
DispatchClass = "FilePath",
RDataPath = "HiContactsData/microC_HFFc6_chr17.mcool",
Tags = "HiCData"
)
)
toydata <- do.call(rbind, toydata) |> as.data.frame() |> apply(2, unlist) |> as.data.frame()
Expand Down
4 changes: 3 additions & 1 deletion vignettes/HiContactsData.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,11 @@ Several files are avaible using this function, namely:
- S288C_Eco1-AID.pairs.gz for chrII only (`sample`: `yeast_Eco1`, `format` = `pairs`)
- mESCs.mcool (`sample`: `mESCs`, `format` = `mcool`)
- mESCs.pairs.gz for chr13 only (`sample`: `mESCs`, `format` = `pairs`)
- microC_HFFc6_chr17.mcool (`sample`: `microC`, `format` = `mcool`)

Yeast data comes from [Bastie, Chapard et al., Nature Structural & Molecular Biology 2022](https://doi.org/10.1038/s41594-022-00780-0)
and mouse ESC data comes from [Bonev et al., Cell 2017](https://doi.org/10.1016/j.cell.2017.09.043).
and mouse ESC data comes from [Bonev et al., Cell 2017](https://doi.org/10.1016/j.cell.2017.09.043).
Human HcFF6 micro-C data comes from [Krietenstein et al., Mol. Cell 2020](https://doi.org/10.1016/j.molcel.2020.03.003).

To download one of these files, one can specify a `sample` and a file `format`:

Expand Down

0 comments on commit bf76a9e

Please sign in to comment.