Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Faster cde rejection #255

Merged
merged 5 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ walkdir = "2.5.0"
time = { workspace = true, features = ["formatting", "macros"] }
anyhow = "1"
clap = { version = "=4.4.18", features = ["derive"] }
tempdir = "0.3.7"
tempfile = "3"

[features]
aes-crypto = ["aes", "constant_time_eq", "hmac", "pbkdf2", "sha1", "rand", "zeroize"]
Expand Down
20 changes: 18 additions & 2 deletions benches/read_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::io::{self, prelude::*, Cursor};

use bencher::Bencher;
use getrandom::getrandom;
use tempdir::TempDir;
use tempfile::TempDir;
use zip::write::SimpleFileOptions;
use zip::{result::ZipResult, CompressionMethod, ZipArchive, ZipWriter};

Expand Down Expand Up @@ -102,7 +102,7 @@ fn parse_stream_archive(bench: &mut Bencher) {
let bytes = generate_random_archive(STREAM_ZIP_ENTRIES, STREAM_FILE_SIZE).unwrap();

/* Write to a temporary file path to incur some filesystem overhead from repeated reads */
let dir = TempDir::new("stream-bench").unwrap();
let dir = TempDir::with_prefix("stream-bench").unwrap();
let out = dir.path().join("bench-out.zip");
fs::write(&out, &bytes).unwrap();

Expand All @@ -116,11 +116,27 @@ fn parse_stream_archive(bench: &mut Bencher) {
bench.bytes = bytes.len() as u64;
}

fn parse_large_non_zip(bench: &mut Bencher) {
const FILE_SIZE: usize = 17_000_000;

// Create a large file that doesn't have a zip header (generating random data _might_ make a zip magic
// number somewhere which is _not_ what we're trying to test).
let dir = TempDir::with_prefix("large-non-zip-bench").unwrap();
let file = dir.path().join("zeros");
let buf = vec![0u8; FILE_SIZE];
fs::write(&file, &buf).unwrap();

bench.iter(|| {
assert!(zip::ZipArchive::new(std::fs::File::open(&file).unwrap()).is_err());
})
}

benchmark_group!(
benches,
read_metadata,
parse_archive_with_comment,
parse_zip64_archive_with_comment,
parse_stream_archive,
parse_large_non_zip,
);
benchmark_main!(benches);
4 changes: 2 additions & 2 deletions src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1554,7 +1554,7 @@
/// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this,
/// [`ZipFile::enclosed_name`] is the better option in most scenarios.
///
/// [`ParentDir`]: `PathBuf::Component::ParentDir`

Check warning on line 1557 in src/read.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--all-features)

unresolved link to `PathBuf::Component::ParentDir`

Check warning on line 1557 in src/read.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--no-default-features)

unresolved link to `PathBuf::Component::ParentDir`

Check warning on line 1557 in src/read.rs

View workflow job for this annotation

GitHub Actions / style_and_docs

unresolved link to `PathBuf::Component::ParentDir`
pub fn mangled_name(&self) -> PathBuf {
self.get_metadata().file_name_sanitized()
}
Expand Down Expand Up @@ -1777,7 +1777,7 @@
use crate::CompressionMethod::Stored;
use crate::{ZipArchive, ZipWriter};
use std::io::{Cursor, Read, Write};
use tempdir::TempDir;
use tempfile::TempDir;

#[test]
fn invalid_offset() {
Expand Down Expand Up @@ -1979,7 +1979,7 @@
v.extend_from_slice(include_bytes!("../tests/data/symlink.zip"));
let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
assert!(reader.by_index(0).unwrap().is_symlink());
let tempdir = TempDir::new("test_is_symlink")?;
let tempdir = TempDir::with_prefix("test_is_symlink")?;
reader.extract(&tempdir).unwrap();
assert!(tempdir.path().join("bar").is_symlink());
Ok(())
Expand Down
13 changes: 10 additions & 3 deletions src/spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
/// The file size at which a ZIP64 record becomes necessary.
///
/// If a file larger than this threshold attempts to be written, compressed or uncompressed, and
/// [`FileOptions::large_file()`](crate::write::FileOptions) was not true, then [`ZipWriter`] will

Check warning on line 93 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--all-features)

unresolved link to `ZipWriter`

Check warning on line 93 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--no-default-features)

unresolved link to `ZipWriter`

Check warning on line 93 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs

unresolved link to `ZipWriter`
/// raise an [`io::Error`] with [`io::ErrorKind::Other`].
///
/// If the zip file itself is larger than this value, then a zip64 central directory record will be
Expand Down Expand Up @@ -135,7 +135,7 @@
/// The number of entries within a single zip necessary to allocate a zip64 central
/// directory record.
///
/// If more than this number of entries is written to a [`ZipWriter`], then [`ZipWriter::finish()`]

Check warning on line 138 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--all-features)

unresolved link to `ZipWriter`

Check warning on line 138 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--all-features)

unresolved link to `ZipWriter::finish`

Check warning on line 138 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--no-default-features)

unresolved link to `ZipWriter`

Check warning on line 138 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--no-default-features)

unresolved link to `ZipWriter::finish`

Check warning on line 138 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs

unresolved link to `ZipWriter`

Check warning on line 138 in src/spec.rs

View workflow job for this annotation

GitHub Actions / style_and_docs

unresolved link to `ZipWriter::finish`
/// will write out extra zip64 data to the end of the zip file.
pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize;

Expand Down Expand Up @@ -353,9 +353,16 @@
return Err(ZipError::InvalidArchive("Invalid zip header"));
}

let search_lower_bound = 0;

const END_WINDOW_SIZE: usize = 512;
// The End Of Central Directory Record should be the last thing in
// the file and so searching the last 65557 bytes of the file should
// be enough. However, not all zips are well-formed and other
// programs may consume zips with extra junk at the end without
// error, so we go back 128K to be compatible with them. 128K is
// arbitrary, but it matches what Info-Zip does.
const EOCDR_SEARCH_SIZE: u64 = 128 * 1024;
let search_lower_bound = file_length.saturating_sub(EOCDR_SEARCH_SIZE);

const END_WINDOW_SIZE: usize = 8192;
/* TODO: use static_assertions!() */
debug_assert!(END_WINDOW_SIZE > mem::size_of::<Magic>());

Expand Down
4 changes: 2 additions & 2 deletions tests/extract_symlink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
#[cfg(all(unix, feature = "_deflate-any"))]
fn extract_should_respect_links() {
use std::{fs, io, path::PathBuf, str::FromStr};
use tempdir::TempDir;
use tempfile::TempDir;
use zip::ZipArchive;

let mut v = Vec::new();
v.extend_from_slice(include_bytes!("data/pandoc_soft_links.zip"));
let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file");
let temp_dir = TempDir::new("pandoc_soft_links").unwrap();
let temp_dir = TempDir::with_prefix("pandoc_soft_links").unwrap();
archive.extract(&temp_dir).unwrap();

let symlink_path = temp_dir.path().join("pandoc-3.2-arm64/bin/pandoc-lua");
Expand Down
4 changes: 2 additions & 2 deletions tests/repro_old423.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
#[test]
fn repro_old423() -> zip::result::ZipResult<()> {
use std::io;
use tempdir::TempDir;
use tempfile::TempDir;
use zip::ZipArchive;

let mut v = Vec::new();
v.extend_from_slice(include_bytes!("data/lin-ub_iwd-v11.zip"));
let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file");
archive.extract(TempDir::new("repro_old423")?)
archive.extract(TempDir::with_prefix("repro_old423")?)
}
Loading