diff --git a/src/main.rs b/src/main.rs index 383a8e9..a484191 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,6 +25,7 @@ use chrono::Utc; use curl::easy::Easy; use fs2::FileExt; use github::{CreateTag, Github}; +use rayon::prelude::*; use crate::config::{Channel, Config}; @@ -172,6 +173,18 @@ impl Context { self.assert_all_components_present()?; + // Quickly produce gzip compressed artifacts that are needed for successful manifest + // building. + let recompress = [ + self.dl_dir() + .join("rust-nightly-x86_64-unknown-linux-gnu.tar.xz"), + self.dl_dir() + .join("cargo-nightly-x86_64-unknown-linux-gnu.tar.xz"), + ]; + recompress.par_iter().try_for_each(|tarball| { + recompress::recompress_file(tarball, false, flate2::Compression::fast(), false) + })?; + // Ok we've now determined that a release needs to be done. let mut signer = Signer::new(&self.config)?; diff --git a/src/recompress.rs b/src/recompress.rs index f42ecf0..693189e 100644 --- a/src/recompress.rs +++ b/src/recompress.rs @@ -18,6 +18,121 @@ use std::path::Path; use std::time::{Duration, Instant}; use xz2::read::XzDecoder; +pub(crate) fn recompress_file( + xz_path: &Path, + recompress_gz: bool, + gz_compression_level: flate2::Compression, + recompress_xz: bool, +) -> anyhow::Result<()> { + println!("recompressing {}...", xz_path.display()); + let file_start = Instant::now(); + let gz_path = xz_path.with_extension("gz"); + + let mut destinations: Vec<(&str, Box)> = Vec::new(); + + // Produce gzip if explicitly enabled or the destination file doesn't exist. + if recompress_gz || !gz_path.is_file() { + let gz = File::create(gz_path)?; + destinations.push(( + "gz", + Box::new(flate2::write::GzEncoder::new(gz, gz_compression_level)), + )); + } + + // xz recompression with more aggressive settings than we want to take the time + // for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs. + // + // Note that this is using a single-threaded compressor as we're parallelizing + // via rayon already. In rust-lang/rust we were trying to use parallel + // compression, but the default block size for that is 3*dict_size so we + // weren't actually using more than one core in most of the builders with + // <192MB uncompressed tarballs. In promote-release since we're recompressing + // 100s of tarballs there's no need for each individual compression to be + // parallel. + let xz_recompressed = xz_path.with_extension("xz_recompressed"); + if recompress_xz { + let mut filters = xz2::stream::Filters::new(); + let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap(); + // This sets the overall dictionary size, which is also how much memory (baseline) + // is needed for decompression. + lzma_ops.dict_size(64 * 1024 * 1024); + // Use the best match finder for compression ratio. + lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4); + lzma_ops.mode(xz2::stream::Mode::Normal); + // Set nice len to the maximum for best compression ratio + lzma_ops.nice_len(273); + // Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives + // good results. + lzma_ops.depth(1000); + // 2 is the default and does well for most files + lzma_ops.position_bits(2); + // 0 is the default and does well for most files + lzma_ops.literal_position_bits(0); + // 3 is the default and does well for most files + lzma_ops.literal_context_bits(3); + + filters.lzma2(&lzma_ops); + + // FIXME: Do we want a checksum as part of compression? + let stream = + xz2::stream::Stream::new_stream_encoder(&filters, xz2::stream::Check::None).unwrap(); + let xz_out = File::create(&xz_recompressed)?; + destinations.push(( + "xz", + Box::new(xz2::write::XzEncoder::new_stream( + std::io::BufWriter::new(xz_out), + stream, + )), + )); + } + + // We only decompress once and then write into each of the compressors before + // moving on. + // + // This code assumes that compression with `write_all` will never fail (i.e., we + // can take arbitrary amounts of data as input). That seems like a reasonable + // assumption though. + let mut decompressor = XzDecoder::new(File::open(xz_path)?); + let mut buffer = vec![0u8; 4 * 1024 * 1024]; + let mut decompress_time = Duration::ZERO; + let mut time_by_dest = vec![Duration::ZERO; destinations.len()]; + loop { + let start = Instant::now(); + let length = decompressor.read(&mut buffer)?; + decompress_time += start.elapsed(); + if length == 0 { + break; + } + for (idx, (_, destination)) in destinations.iter_mut().enumerate() { + let start = std::time::Instant::now(); + destination.write_all(&buffer[..length])?; + time_by_dest[idx] += start.elapsed(); + } + } + + let mut compression_times = String::new(); + for (idx, (name, _)) in destinations.iter().enumerate() { + write!( + compression_times, + ", {:.2?} {} compression", + time_by_dest[idx], name + )?; + } + println!( + "recompressed {}: {:.2?} total, {:.2?} decompression{}", + xz_path.display(), + file_start.elapsed(), + decompress_time, + compression_times + ); + + if recompress_xz { + fs::rename(&xz_recompressed, xz_path)?; + } + + Ok(()) +} + impl Context { pub fn recompress(&self, directory: &Path) -> anyhow::Result<()> { let mut to_recompress = Vec::new(); @@ -77,114 +192,7 @@ impl Context { let path = to_recompress.lock().unwrap().pop(); path } { - println!("recompressing {}...", xz_path.display()); - let file_start = Instant::now(); - let gz_path = xz_path.with_extension("gz"); - - let mut destinations: Vec<(&str, Box)> = Vec::new(); - - // Produce gzip if explicitly enabled or the destination file doesn't exist. - if recompress_gz || !gz_path.is_file() { - let gz = File::create(gz_path)?; - destinations.push(( - "gz", - Box::new(flate2::write::GzEncoder::new(gz, compression_level)), - )); - } - - // xz recompression with more aggressive settings than we want to take the time - // for in rust-lang/rust CI. This cuts 5-15% off of the produced tarballs. - // - // Note that this is using a single-threaded compressor as we're parallelizing - // via rayon already. In rust-lang/rust we were trying to use parallel - // compression, but the default block size for that is 3*dict_size so we - // weren't actually using more than one core in most of the builders with - // <192MB uncompressed tarballs. In promote-release since we're recompressing - // 100s of tarballs there's no need for each individual compression to be - // parallel. - let xz_recompressed = xz_path.with_extension("xz_recompressed"); - if recompress_xz { - let mut filters = xz2::stream::Filters::new(); - let mut lzma_ops = xz2::stream::LzmaOptions::new_preset(9).unwrap(); - // This sets the overall dictionary size, which is also how much memory (baseline) - // is needed for decompression. - lzma_ops.dict_size(64 * 1024 * 1024); - // Use the best match finder for compression ratio. - lzma_ops.match_finder(xz2::stream::MatchFinder::BinaryTree4); - lzma_ops.mode(xz2::stream::Mode::Normal); - // Set nice len to the maximum for best compression ratio - lzma_ops.nice_len(273); - // Set depth to a reasonable value, 0 means auto, 1000 is somwhat high but gives - // good results. - lzma_ops.depth(1000); - // 2 is the default and does well for most files - lzma_ops.position_bits(2); - // 0 is the default and does well for most files - lzma_ops.literal_position_bits(0); - // 3 is the default and does well for most files - lzma_ops.literal_context_bits(3); - - filters.lzma2(&lzma_ops); - - // FIXME: Do we want a checksum as part of compression? - let stream = xz2::stream::Stream::new_stream_encoder( - &filters, - xz2::stream::Check::None, - ) - .unwrap(); - let xz_out = File::create(&xz_recompressed)?; - destinations.push(( - "xz", - Box::new(xz2::write::XzEncoder::new_stream( - std::io::BufWriter::new(xz_out), - stream, - )), - )); - } - - // We only decompress once and then write into each of the compressors before - // moving on. - // - // This code assumes that compression with `write_all` will never fail (i.e., we - // can take arbitrary amounts of data as input). That seems like a reasonable - // assumption though. - let mut decompressor = XzDecoder::new(File::open(&xz_path)?); - let mut buffer = vec![0u8; 4 * 1024 * 1024]; - let mut decompress_time = Duration::ZERO; - let mut time_by_dest = vec![Duration::ZERO; destinations.len()]; - loop { - let start = Instant::now(); - let length = decompressor.read(&mut buffer)?; - decompress_time += start.elapsed(); - if length == 0 { - break; - } - for (idx, (_, destination)) in destinations.iter_mut().enumerate() { - let start = std::time::Instant::now(); - destination.write_all(&buffer[..length])?; - time_by_dest[idx] += start.elapsed(); - } - } - - let mut compression_times = String::new(); - for (idx, (name, _)) in destinations.iter().enumerate() { - write!( - compression_times, - ", {:.2?} {} compression", - time_by_dest[idx], name - )?; - } - println!( - "recompressed {}: {:.2?} total, {:.2?} decompression{}", - xz_path.display(), - file_start.elapsed(), - decompress_time, - compression_times - ); - - if recompress_xz { - fs::rename(&xz_recompressed, xz_path)?; - } + recompress_file(&xz_path, recompress_gz, compression_level, recompress_xz)?; } Ok::<_, anyhow::Error>(())