diff --git a/R/globals.R b/R/globals.R index 558f2a8..fcce0ed 100644 --- a/R/globals.R +++ b/R/globals.R @@ -12,7 +12,8 @@ HiContactsDataFiles <- rbind( c('yeast_eco1', 'mcool', 'S288C', 'Eco1-AID+IAA', 'multi-res .mcool file', 'EH7704'), c('yeast_eco1', 'pairs.gz', 'S288C', 'Eco1-AID+IAA', 'only pairs from chrII are provided', 'EH7705'), c('mESCs', 'mcool', 'mm10', 'mESCs', 'multi-res .mcool file', 'EH7706'), - c('mESCs', 'pairs.gz', 'mm10', 'mESCs', 'only pairs from chr13 are provided', 'EH7707') + c('mESCs', 'pairs.gz', 'mm10', 'mESCs', 'only pairs from chr13 are provided', 'EH7707'), + c('microC', 'mcool', 'GRCh38', 'HFFc6', 'multi-res .mcool file, only chr17 is provided', '') ) colnames(HiContactsDataFiles) <- c( 'sample', 'format', 'genome', 'condition', 'notes', 'EHID' diff --git a/README.md b/README.md index 549a7b6..5f27ee1 100644 --- a/README.md +++ b/README.md @@ -28,3 +28,6 @@ Several files are avaible using this function, namely: - From [Bonev et al., Cell 2017](https://doi.org/10.1016/j.cell.2017.09.043): - mESCs.mcool (`sample`: `mESCs`, `format` = `mcool`) - mESCs.pairs.gz for chr13 only (`sample`: `mESCs`, `format` = `pairs`) + +- From [Krietenstein et al., Mol. Cell 2020](https://doi.org/10.1016/j.molcel.2020.03.003): + - microC_HFFc6_chr17.mcool (`sample`: `microC`, `format` = `mcool`) diff --git a/inst/extdata/metadata.csv b/inst/extdata/metadata.csv index 250830d..d76d734 100644 --- a/inst/extdata/metadata.csv +++ b/inst/extdata/metadata.csv @@ -13,3 +13,4 @@ "Eco1-AID yeast .pairs file","Hi-C performed on Eco1-AID mutant yeast strain upon IAA treatment processed with tinyMapper and represented as a filtered pairs file. Data representation derived from SRA run results SRR16250953.",3.16,"S288C","Zip","https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM5918405","Aug 17 2022","Saccharomyces cerevisiae",4932,"FALSE","Jacques Serizay","Jacques Serizay ","character","FilePath","HiContactsData/S288C_Eco1-AID.pairs.gz","HiCData" "mESC .mcool contact matrix","Hi-C performed on mouse embryonic stem cells (mESCs) processed with tinyMapper and represented as a multi-resolution mcool file. Data representation derived from SRA run results SRR5339749.",3.16,"mm10","HDF5","https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM2533818","Aug 17 2022","Mus musculus",10090,"FALSE","Jacques Serizay","Jacques Serizay ","character","FilePath","HiContactsData/mESCs.mcool","HiCData" "mESC .pairs file","Hi-C performed on mouse embryonic stem cells (mESCs) processed with tinyMapper and represented as a filtered pairs file. Data representation derived from SRA run results SRR5339749.",3.16,"mm10","TXT","https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM2533818","Aug 17 2022","Mus musculus",10090,"FALSE","Jacques Serizay","Jacques Serizay ","character","FilePath","HiContactsData/mESCs.pairs.gz","HiCData" +"HFFc6 .mcool contact matrix","Hi-C performed on Human foreskin fibroblast cell line (HFFc6). Data obtained from 4DN data portal (ID 4DNFI9FVHJZQ) and filtered to only retain chr17.",3.18,"GRCh38","HDF5","https://data.4dnucleome.org/files-processed/4DNFI9FVHJZQ/","Aug 17 2022","Homo sapiens",9606,"FALSE","Jacques Serizay","Jacques Serizay ","character","FilePath","HiContactsData/microC_HFFc6_chr17.mcool","HiCData" diff --git a/inst/scripts/make-data.R b/inst/scripts/make-data.R index 4c031e2..e3ae910 100644 --- a/inst/scripts/make-data.R +++ b/inst/scripts/make-data.R @@ -15,7 +15,7 @@ # curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR533/009/SRR5339749/SRR5339749_1.fastq.gz -o mESCs_R1.fq.gz # curl -L ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR533/009/SRR5339749/SRR5339749_2.fastq.gz -o mESC_R2.fq.gz -# tinyMapper.sh --mode HiC --sample mESCs --genome ~/genomes/mm10/mm10 --output HiC --threads 16 --resolutions 10000,20000,40000,80000,160000,320000,640000,1280000,2560000 +# tinyMapper.sh --mode HiC --sample mESCs --genome ~/genomes/mm10/mm10 --output HiC --threads 16 --resolutions 10000 ### ---------- Subset and compress files @@ -58,3 +58,10 @@ # --hicpro_maps \ # --max_cpus 18 \ # --max_memory '32.GB' + +### ---------- Filter microC mcool + +# cooler dump -t chroms /.cache/R/fourDNData/4d434d8538a0_4DNFI9FVHJZQ.mcool::/resolutions/250000 | grep -P 'chr17\t' > inst/extdata/chromsizes +# cooler dump -t pixels -r chr17 --join /.cache/R/fourDNData/4d434d8538a0_4DNFI9FVHJZQ.mcool::/resolutions/5000 > inst/extdata/chr17_5000.bg2 +# cooler cload pairs inst/extdata/chromsizes:5000 inst/extdata/chr17_5000.bg2 -c1 1 -p1 2 -c2 4 -p2 5 -0 inst/extdata/chr17.cool +# cooler zoomify --resolutions 5000,100000,250000 --balance inst/extdata/chr17.cool -o inst/extdata/chr17.mcool diff --git a/inst/scripts/make-metadata.R b/inst/scripts/make-metadata.R index 0caed7c..f986746 100644 --- a/inst/scripts/make-metadata.R +++ b/inst/scripts/make-metadata.R @@ -237,6 +237,23 @@ toydata <- list( DispatchClass = "FilePath", RDataPath = "HiContactsData/mESCs.pairs.gz", Tags = "HiCData" + ), + list(Title = "HFFc6 .mcool contact matrix", + Description = "Hi-C performed on Human foreskin fibroblast cell line (HFFc6). Data obtained from 4DN data portal (ID 4DNFI9FVHJZQ) and filtered to only retain chr17.", + BiocVersion = "3.16", + Genome = "GRCh38", + SourceType = "HDF5", + SourceUrl = "https://data.4dnucleome.org/files-processed/4DNFI9FVHJZQ/", + SourceVersion = "Aug 17 2022", + Species = "Homo sapiens", + TaxonomyId = "9606", + Coordinate_1_based = "FALSE", + DataProvider = "Jacques Serizay", + Maintainer = "Jacques Serizay ", + RDataClass = "character", + DispatchClass = "FilePath", + RDataPath = "HiContactsData/microC_HFFc6_chr17.mcool", + Tags = "HiCData" ) ) toydata <- do.call(rbind, toydata) |> as.data.frame() |> apply(2, unlist) |> as.data.frame() diff --git a/vignettes/HiContactsData.Rmd b/vignettes/HiContactsData.Rmd index c95f0ac..d911c36 100644 --- a/vignettes/HiContactsData.Rmd +++ b/vignettes/HiContactsData.Rmd @@ -45,9 +45,11 @@ Several files are avaible using this function, namely: - S288C_Eco1-AID.pairs.gz for chrII only (`sample`: `yeast_Eco1`, `format` = `pairs`) - mESCs.mcool (`sample`: `mESCs`, `format` = `mcool`) - mESCs.pairs.gz for chr13 only (`sample`: `mESCs`, `format` = `pairs`) +- microC_HFFc6_chr17.mcool (`sample`: `microC`, `format` = `mcool`) Yeast data comes from [Bastie, Chapard et al., Nature Structural & Molecular Biology 2022](https://doi.org/10.1038/s41594-022-00780-0) -and mouse ESC data comes from [Bonev et al., Cell 2017](https://doi.org/10.1016/j.cell.2017.09.043). +and mouse ESC data comes from [Bonev et al., Cell 2017](https://doi.org/10.1016/j.cell.2017.09.043). +Human HcFF6 micro-C data comes from [Krietenstein et al., Mol. Cell 2020](https://doi.org/10.1016/j.molcel.2020.03.003). To download one of these files, one can specify a `sample` and a file `format`: