Skip to content

Commit

Permalink
fix: Some archives with over u16::MAX files were handled incorrectly …
Browse files Browse the repository at this point in the history
…or slowly (#189)
  • Loading branch information
Pr0methean committed Jun 22, 2024
1 parent 8bb3be0 commit c934c82
Showing 1 changed file with 119 additions and 56 deletions.
175 changes: 119 additions & 56 deletions src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ pub(crate) mod zip_archive {
pub(crate) files: Vec<super::ZipFileData>,
pub(super) offset: u64,
pub(super) dir_start: u64,
pub(super) dir_end: u64,
// This isn't yet used anywhere, but it is here for use cases in the future.
#[allow(dead_code)]
pub(super) config: super::Config,
Expand Down Expand Up @@ -405,9 +404,11 @@ pub(crate) fn make_reader(
pub(crate) struct CentralDirectoryInfo {
pub(crate) archive_offset: u64,
pub(crate) directory_start: u64,
pub(crate) cde_position: u64,
pub(crate) number_of_files: usize,
pub(crate) disk_number: u32,
pub(crate) disk_with_central_directory: u32,
pub(crate) is_zip64: bool,
}

impl<R> ZipArchive<R> {
Expand Down Expand Up @@ -560,6 +561,8 @@ impl<R: Read + Seek> ZipArchive<R> {
number_of_files,
disk_number: footer.disk_number as u32,
disk_with_central_directory: footer.disk_with_central_directory as u32,
cde_position: cde_start_pos,
is_zip64: false
})
}

Expand Down Expand Up @@ -662,6 +665,8 @@ impl<R: Read + Seek> ZipArchive<R> {
number_of_files: footer64.number_of_files as usize,
disk_number: footer64.disk_number,
disk_with_central_directory: footer64.disk_with_central_directory,
cde_position: cde_start_pos,
is_zip64: true,
})
}
}).collect();
Expand All @@ -674,56 +679,93 @@ impl<R: Read + Seek> ZipArchive<R> {
config: Config,
reader: &mut R,
) -> ZipResult<(Zip32CentralDirectoryEnd, Shared)> {
let mut invalid_errors = Vec::new();
let mut unsupported_errors = Vec::new();
let mut invalid_errors_32 = Vec::new();
let mut unsupported_errors_32 = Vec::new();
let mut invalid_errors_64 = Vec::new();
let mut unsupported_errors_64 = Vec::new();
let mut ok_results = Vec::new();
let cde_locations = spec::Zip32CentralDirectoryEnd::find_and_parse(reader)?;
cde_locations
.into_vec()
.into_iter()
.for_each(|(footer, cde_start_pos)| {
cde_locations.into_vec().into_iter().for_each(|(footer, cde_start_pos)| {
let zip32_result =
Self::get_directory_info_zip32(&config, reader, &footer, cde_start_pos);
Self::sort_result(
zip32_result
.and_then(|result| Self::read_central_header(result, config, reader)),
&mut invalid_errors,
&mut unsupported_errors,
zip32_result,
&mut invalid_errors_32,
&mut unsupported_errors_32,
&mut ok_results,
&footer,
);
let mut inner_results = Vec::with_capacity(1);
// Check if file has a zip64 footer
if let Ok(zip64_footers) =
Self::get_directory_info_zip64(&config, reader, &footer, cde_start_pos)
let zip64_vec_result =
Self::get_directory_info_zip64(&config, reader, &footer, cde_start_pos);
Self::sort_result(
zip64_vec_result,
&mut invalid_errors_64,
&mut unsupported_errors_64,
&mut inner_results,
&(),
);
inner_results.into_iter().for_each(|(_, results)| {
results.into_iter().for_each(|result| {
Self::sort_result(
result,
&mut invalid_errors_64,
&mut unsupported_errors_64,
&mut ok_results,
&footer,
);
});
});
}
);
ok_results.sort_by_key(|(_, result)| (
!result.is_zip64, // try ZIP64 first
u64::MAX - result.cde_position, // try the last one first
));
let mut best_result = None;
for (footer, result) in ok_results {
let mut inner_result = Vec::with_capacity(1);
let is_zip64 = result.is_zip64;
Self::sort_result(
Self::read_central_header(result, config, reader),
if is_zip64 {
&mut invalid_errors_64
} else {
&mut invalid_errors_32
},
if is_zip64 {
&mut unsupported_errors_64
} else {
&mut unsupported_errors_32
},
&mut inner_result,
&()
);
if let Some((_, shared)) = inner_result.into_iter().next() {
if shared.files.len() == footer.number_of_files as usize
|| (is_zip64 && footer.number_of_files == ZIP64_ENTRY_THR as u16)
{
zip64_footers
.into_iter()
.map(|result| {
result.and_then(|dir_info| {
Self::read_central_header(dir_info, config, reader)
})
})
.for_each(|result| {
Self::sort_result(
result,
&mut invalid_errors,
&mut unsupported_errors,
&mut ok_results,
&footer,
)
});
best_result = Some((footer, shared));
break;
} else {
if is_zip64 {
&mut invalid_errors_64
} else {
&mut invalid_errors_32
}.push(InvalidArchive("wrong number of files"))
}
});
if ok_results.is_empty() {
return Err(unsupported_errors
.into_iter()
.next()
.unwrap_or_else(|| invalid_errors.into_iter().next().unwrap()));
}
}
let (footer, shared) = ok_results
.into_iter()
.max_by_key(|(_, shared)| (shared.dir_end, u64::MAX - shared.dir_start))
.unwrap();
let Some((footer, shared)) = best_result else {
return Err(unsupported_errors_32.into_iter()
.chain(unsupported_errors_64.into_iter())

Check failure on line 762 in src/read.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--no-default-features)

explicit call to `.into_iter()` in function argument accepting `IntoIterator`
.chain(invalid_errors_32.into_iter())

Check failure on line 763 in src/read.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--no-default-features)

explicit call to `.into_iter()` in function argument accepting `IntoIterator`
.chain(invalid_errors_64.into_iter())

Check failure on line 764 in src/read.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--no-default-features)

explicit call to `.into_iter()` in function argument accepting `IntoIterator`

.next()
.unwrap());
};
reader.seek(io::SeekFrom::Start(shared.dir_start))?;
Ok((Rc::try_unwrap(footer).unwrap(), shared.build()))
}
Expand All @@ -749,37 +791,27 @@ impl<R: Read + Seek> ZipArchive<R> {
let file = central_header_to_zip_file(reader, dir_info.archive_offset)?;
files.push(file);
}
let dir_end = reader.stream_position()?;
Ok(SharedBuilder {
files,
offset: dir_info.archive_offset,
dir_start: dir_info.directory_start,
dir_end,
config,
})
}

fn sort_result(
result: Result<SharedBuilder, ZipError>,
fn sort_result<T, U: Clone>(
result: ZipResult<T>,
invalid_errors: &mut Vec<ZipError>,
unsupported_errors: &mut Vec<ZipError>,
ok_results: &mut Vec<(Rc<Zip32CentralDirectoryEnd>, SharedBuilder)>,
footer: &Rc<Zip32CentralDirectoryEnd>,
ok_results: &mut Vec<(U, T)>,
footer: &U,
) {
match result {
Err(ZipError::UnsupportedArchive(e)) => {
unsupported_errors.push(ZipError::UnsupportedArchive(e))
}
Err(e) => invalid_errors.push(e),
Ok(o) => {
if o.files.len() == footer.number_of_files as usize
|| footer.number_of_files == ZIP64_ENTRY_THR as u16
{
ok_results.push((footer.clone(), o))
} else {
invalid_errors.push(InvalidArchive("wrong number of files"))
}
}
Ok(o) => ok_results.push((footer.clone(), o)),
}
}

Expand Down Expand Up @@ -1659,9 +1691,12 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult<Opt

#[cfg(test)]
mod test {
use crate::ZipArchive;
use std::io::Cursor;
use crate::{ZipArchive, ZipWriter};
use std::io::{Cursor, Read, Write};
use tempdir::TempDir;
use crate::CompressionMethod::Stored;
use crate::result::ZipResult;
use crate::write::SimpleFileOptions;

#[test]
fn invalid_offset() {
Expand Down Expand Up @@ -1893,4 +1928,32 @@ mod test {
let mut reader = ZipArchive::new(Cursor::new(v)).unwrap();
reader.by_name("你好.txt").unwrap();
}

#[test]
fn test_64k_files() -> ZipResult<()> {
let mut writer = ZipWriter::new(Cursor::new(Vec::new()));
let options = SimpleFileOptions {compression_method: Stored, ..Default::default()};
for i in 0..=u16::MAX {
let file_name = format!("{i}.txt");
writer.start_file(&*file_name, options)?;
writer.write_all(i.to_string().as_bytes())?;
}

let mut reader = ZipArchive::new(writer.finish()?)?;
for i in 0..=u16::MAX {
let expected_name = format!("{i}.txt");
let expected_contents = i.to_string();
let expected_contents = expected_contents.as_bytes();
let mut file = reader.by_name(&expected_name)?;
let mut contents = Vec::with_capacity(expected_contents.len());
file.read_to_end(&mut contents)?;
assert_eq!(contents, expected_contents);
drop(file);
contents.clear();
let mut file = reader.by_index(i as usize)?;
file.read_to_end(&mut contents)?;
assert_eq!(contents, expected_contents);
}
Ok(())
}
}

0 comments on commit c934c82

Please sign in to comment.