Skip to content

Commit

Permalink
Create summary HTML files
Browse files Browse the repository at this point in the history
  • Loading branch information
olliecheng committed Nov 27, 2024
1 parent bd70d17 commit 94fefb9
Show file tree
Hide file tree
Showing 7 changed files with 267 additions and 16 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
/temp

.DS_Store
.idea

# testing files
/tests/samples/local
Expand Down
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ clap = { version = "4.5.7", features = ["derive"] }
crossbeam = "0.8.4"
csv = "1.3.0"
env_logger = "0.11.3"
handlebars = "6.2.0"
indexmap = "2.5.0"
log = "0.4.22"
needletail = { git = "https://github.com/olliecheng/needletail" }
Expand Down
4 changes: 4 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ pub enum Commands {
/// the index file
#[arg(long)]
index: String,

/// output file
#[arg(long, default_value = "summary.html")]
output: String,
},

/// Generate a consensus-called 'cleaned up' file
Expand Down
22 changes: 16 additions & 6 deletions src/duplicates.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::file::FastqFile;
use anyhow::{Context, Result};
use csv::ReaderBuilder;
use indexmap::IndexMap;
use serde::Serialize;
use std::collections::BTreeMap;

use anyhow::Result;
use indexmap::IndexMap;
use std::io::{BufRead, BufReader};

pub type DuplicateMap = IndexMap<RecordIdentifier, Vec<usize>>;

Expand Down Expand Up @@ -32,7 +33,7 @@ pub struct DuplicateStatistics {
pub distribution: BTreeMap<usize, usize>,
}

pub fn get_duplicates(index: &str) -> Result<(DuplicateMap, DuplicateStatistics)> {
pub fn get_duplicates(index: &str) -> Result<(DuplicateMap, DuplicateStatistics, FastqFile)> {
let mut map = IndexMap::<RecordIdentifier, Vec<usize>>::new();
let mut stats = DuplicateStatistics {
total_reads: 0,
Expand All @@ -42,10 +43,19 @@ pub fn get_duplicates(index: &str) -> Result<(DuplicateMap, DuplicateStatistics)
distribution: BTreeMap::new(),
};

let file = std::fs::File::open(index)?;
let mut file = BufReader::new(file);

let mut header = String::new();
file.read_line(&mut header).context("Could not read the first line")?;

assert!(header.starts_with('#'));
let info: FastqFile = serde_json::from_str(&header[1..])?;

let mut reader = ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(true)
.from_path(index)?;
.from_reader(&mut file);

for read in reader.records() {
let record = read?;
Expand Down Expand Up @@ -95,5 +105,5 @@ pub fn get_duplicates(index: &str) -> Result<(DuplicateMap, DuplicateStatistics)

stats.proportion_duplicate = stats.duplicate_reads as f64 / stats.total_reads as f64;

Ok((map, stats))
Ok((map, stats, info))
}
15 changes: 5 additions & 10 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ mod generate_index;
mod cli;
mod preset;
mod file;
mod summary;

use cli::{Cli, Commands};
// mod ordered_rayon;
Expand All @@ -45,14 +46,8 @@ fn try_main() -> Result<()> {
println!("nailpolish v{}", cli::VERSION);

match &cli.command {
Commands::Summary { index } => {
info!("Summarising index at {index}");
let (_, statistics) = duplicates::get_duplicates(index)?;

println!(
"{}",
serde_json::to_string_pretty(&statistics).expect("Should be serialisable")
);
Commands::Summary { index, output } => {
summary::summarize(index, output)?;
}
Commands::Index {
file,
Expand Down Expand Up @@ -86,7 +81,7 @@ fn try_main() -> Result<()> {
report_original_reads,
} => {
info!("Collecting duplicates... {}", duplicates_only);
let (duplicates, _statistics) =
let (duplicates, _statistics, _) =
duplicates::get_duplicates(index).expect("Could not parse index.");
info!("Iterating through individual duplicates");

Expand Down Expand Up @@ -122,7 +117,7 @@ fn try_main() -> Result<()> {
);

info!("Collecting duplicates...");
let (duplicates, _statistics) =
let (duplicates, statistics, _) =
duplicates::get_duplicates(index).expect("Could not parse index.");
info!("Iterating through individual duplicates");

Expand Down
28 changes: 28 additions & 0 deletions src/summary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
use crate::duplicates;
use anyhow::{Context, Result};
use serde_json::json;
use std::collections::HashMap;

// include the template HTML file at compile time as a string literal
const TEMPLATE_HTML: &str = include_str!("summary_template.html");

pub fn summarize(index: &str, output: &str) -> Result<()> {
info!("Summarising index at {index}");
let (_, statistics, info) = duplicates::get_duplicates(index)?;

let mut data = serde_json::to_value(info).context("Could not serialize info")?;

println!("{}", serde_json::to_string(&statistics)?);
data["stats"] = serde_json::Value::String(serde_json::to_string(&statistics)?);

println!(
"{}",
serde_json::to_string_pretty(&data).context("Should be serialisable")?
);

let file = std::fs::File::create(output)?;
let mut reg = handlebars::Handlebars::new();
reg.render_template_to_write(TEMPLATE_HTML, &data, file);

Ok(())
}
212 changes: 212 additions & 0 deletions src/summary_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta content="width=device-width, initial-scale=1.0" name="viewport">
<title>Bar Chart from JSON</title>
<style>
body {
font-family: Arial, sans-serif;
padding: 20px;
margin: auto;
max-width: 800px;
min-height: 100%;
background-color: white;
}

html {
background-color: #beabc2;
}

td {
vertical-align: top;
}

td:nth-child(1) {
/* your stuff here */
min-width: 150px;
}

td:nth-child(2) {
font-family: monospace;
padding-left: 10px;
font-size: 1.1em;
}

canvas {
margin-top: 20px;
}
</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.3.0/chart.umd.min.js"></script>
<script>
let stats = {{{ stats }}};
let data = stats.distribution;
</script>
</head>
<body>
<h1>💅 nailpolish summary report</h1>
<h2>Summary table</h2>
<table>
<tr>
<td>
nailpolish version
</td>
<td>
{{ nailpolish_version }}
</td>
</tr>

<tr>
<td>
file path
</td>
<td>
{{ file_path }}
</td>
</tr>
<tr>
<td>
dataset size
</td>
<td>
{{ gb }}
</td>
</tr>
<tr>
<td>
index date
</td>
<td>
{{ index_date }}
</td>
</tr>

<tr>
<td>
total read count
</td>
<td>
{{ read_count }}
</td>
</tr>
<tr>
<td>
matched reads
</td>
<td>
{{ matched_read_count }}
</td>
</tr>
<tr>
<td>
unmatched reads
</td>
<td>
{{ unmatched_read_count }}
</td>
</tr>

<tr>
<td>
average quality
</td>
<td>
{{ avg_qual }}
</td>
</tr>
<tr>
<td>
average length
</td>
<td>
{{ avg_len }}
</td>
</tr>
</table>
<h2>
By UMI group
</h2>

A 'UMI group' is a group of reads which all share the same barcode and UMI.

<canvas id="byUmi"></canvas>

<h2>
By read
</h2>

Each read is classified by the number of reads in its corresponding UMI group.

<canvas id="byRead"></canvas>

<script>
const umi_dup_data = {
"labels": Object.keys(data),
"datasets": [{
label: "",
// "label": "duplicate count",
// "backgroundcolor": "rgba(75, 192, 192, 0.2)",
// "bordercolor": "rgba(75, 192, 192, 1)",
"data": Object.values(data)
}]
};

const ctxUmi = document.getElementById('byUmi').getContext('2d');
new Chart(ctxUmi, {
type: 'bar',
data: umi_dup_data,
options: {
plugins: {
legend: {
display: false
},
},
responsive: true,
scales: {
y: {
beginAtZero: true
},
x: {
title: "Duplicate count"
}
}
}
});

let read_data = Object.fromEntries(Object.entries(data).map(([k, v]) => ([k, v*k])));

const read_dup_data = {
"labels": Object.keys(read_data),
"datasets": [{
label: "",
// "label": "duplicate count",
// "backgroundcolor": "rgba(75, 192, 192, 0.2)",
// "bordercolor": "rgba(75, 192, 192, 1)",
"data": Object.values(read_data)
}]
};

const ctxRead = document.getElementById('byRead').getContext('2d');
new Chart(ctxRead, {
type: 'bar',
data: read_dup_data,
options: {
plugins: {
legend: {
display: false
},
},
responsive: true,
scales: {
y: {
beginAtZero: true
},
x: {
title: "duplicate count"
}
}
}
});
</script>
</body>
</html>

0 comments on commit 94fefb9

Please sign in to comment.