Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for searching within ZIP and gzip files #305

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ path = "tests/tests.rs"

[dependencies]
bytecount = "0.1.4"
byteorder = { version = "1.0.0", optional = true }
clap = "2.19.0"
crc = { version = "1.4.0", optional = true }
env_logger = { version = "0.3", default-features = false }
flate2 = { version = "0.2.14", optional = true }
grep = { version = "0.1.4", path = "grep" }
ignore = { version = "0.1.5", path = "ignore" }
lazy_static = "0.2"
Expand All @@ -48,8 +51,10 @@ clap = "2.18"
lazy_static = "0.2"

[features]
default = ["uncompress"]
avx-accel = ["bytecount/avx-accel"]
simd-accel = ["bytecount/simd-accel", "regex/simd-accel"]
uncompress = ["byteorder", "flate2", "crc"]

[profile.release]
debug = true
20 changes: 18 additions & 2 deletions src/app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
};
let flag = |name| arg(name).long(name);

App::new("ripgrep")
let mut app = App::new("ripgrep")
.author(crate_authors!())
.version(crate_version!())
.about(ABOUT)
Expand Down Expand Up @@ -168,7 +168,16 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
.multiple(true).number_of_values(1))
.arg(flag("type-clear")
.value_name("TYPE").takes_value(true)
.multiple(true).number_of_values(1))
.multiple(true).number_of_values(1));

if cfg!(feature = "uncompress") {
app = app
.arg(flag("uncompress"))
.arg(flag("uncompress-depth").value_name("NUM")
.takes_value(true).validator(validate_number));
}

app
}

struct Usage {
Expand Down Expand Up @@ -458,6 +467,13 @@ lazy_static! {
of ripgrep.\n\nNote that this MUST be passed to every \
invocation of ripgrep. Type settings are NOT persisted.");

if cfg!(feature = "uncompress") {
doc!(h, "uncompress",
"Attempt to search within compressed files.");
doc!(h, "uncompress-depth",
"Limit the depth of nested compressed files.");
}

h
};
}
Expand Down
5 changes: 5 additions & 0 deletions src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ pub struct Args {
threads: usize,
type_list: bool,
types: Types,
uncompress_depth: usize,
with_filename: bool,
}

Expand Down Expand Up @@ -218,6 +219,7 @@ impl Args {
.no_messages(self.no_messages)
.quiet(self.quiet)
.text(self.text)
.uncompress_depth(self.uncompress_depth)
.build()
}

Expand Down Expand Up @@ -342,6 +344,9 @@ impl<'a> ArgMatches<'a> {
threads: try!(self.threads()),
type_list: self.is_present("type-list"),
types: try!(self.types()),
uncompress_depth: try!(self.usize_of("uncompress-depth"))
.unwrap_or_else(
|| if self.is_present("uncompress") { 1 } else { 0 }),
with_filename: with_filename,
};
if args.mmap {
Expand Down
163 changes: 163 additions & 0 deletions src/magic.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
use std::cmp::min;
use std::io::{ErrorKind, Read, Result};

#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Magic {
Unknown,
Gzip,
Zip,
}

impl Magic {
pub fn from_slice(buf: &[u8]) -> Magic {
if buf.starts_with(b"\x1f\x8b") {
Magic::Gzip
} else if buf.starts_with(b"\x50\x4b\x03\x04") {
// Local file header
Magic::Zip
} else if buf.starts_with(b"\x50\x4b\x05\x06") {
// End of central directory (empty ZIP file)
Magic::Zip
} else {
Magic::Unknown
}
}
}

/// Wrapper around `Read` which peeks the first few bytes for a magic number.
#[derive(Debug)]
pub struct MagicReader<R> {
/// The inner reader.
inner: R,
/// Position of the first byte within `buf` which has not yet been returned
/// by `read()`.
pos: usize,
/// Number of valid bytes within `buf`.
lim: usize,
/// Copy of the first few bytes obtained from the inner reader.
buf: [u8; 4],
}

impl<R: Read> MagicReader<R> {
/// Creates a new `MagicReader`, reading and buffering the magic number.
pub fn new(mut inner: R) -> Result<MagicReader<R>> {
let mut buf = [0; 4];
let lim = try!(read_all(&mut inner, &mut buf));

Ok(MagicReader {
inner: inner,
pos: 0,
lim: lim,
buf: buf,
})
}

pub fn magic(&self) -> Magic {
Magic::from_slice(&self.buf[..self.lim])
}
}

impl<R: Read> Read for MagicReader<R> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
if self.pos < self.lim {
let count = min(self.lim - self.pos, buf.len());
buf[..count].copy_from_slice(&self.buf[self.pos..self.pos + count]);
self.pos += count;

// Attempt to fill the whole buffer (is_binary depends on this).
if count < buf.len() {
match self.inner.read(&mut buf[count..]) {
Ok(read) => Ok(count + read),
Err(..) => Ok(count),
}
} else {
Ok(count)
}
} else {
self.inner.read(buf)
}
}
}

fn read_all<R: Read>(reader: &mut R, mut buf: &mut [u8]) -> Result<usize> {
let mut read = 0;
while !buf.is_empty() {
match reader.read(buf) {
Ok(0) => break,
Ok(n) => {
read += n;
let tmp = buf;
buf = &mut tmp[n..];
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
Err(e) => return Err(e),
}
}
Ok(read)
}

#[cfg(test)]
mod tests {
use std::io::Read;
use super::{Magic, MagicReader};

#[test]
fn empty() {
const DATA: &'static [u8] = &[];
assert_eq!(Magic::from_slice(DATA), Magic::Unknown);

let mut rdr = MagicReader::new(DATA).unwrap();
assert_eq!(rdr.magic(), Magic::Unknown);

let mut buf = vec![0; DATA.len()];
assert_eq!(rdr.read(&mut buf).unwrap(), DATA.len());
assert_eq!(&buf[..], DATA);
}

#[test]
fn gzip() {
const DATA: &'static [u8] = &[
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
0x02, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
];
assert_eq!(Magic::from_slice(DATA), Magic::Gzip);

let mut rdr = MagicReader::new(DATA).unwrap();
assert_eq!(rdr.magic(), Magic::Gzip);

let mut buf = vec![0; DATA.len()];
assert_eq!(rdr.read(&mut buf).unwrap(), DATA.len());
assert_eq!(&buf[..], DATA);
}

#[test]
fn zip() {
const DATA: &'static [u8] = &[
0x50, 0x4b, 0x03, 0x04, 0x0a, 0x00, 0x00, 0x00,
];
assert_eq!(Magic::from_slice(DATA), Magic::Zip);

let mut rdr = MagicReader::new(DATA).unwrap();
assert_eq!(rdr.magic(), Magic::Zip);

let mut buf = vec![0; DATA.len()];
assert_eq!(rdr.read(&mut buf).unwrap(), DATA.len());
assert_eq!(&buf[..], DATA);
}

#[test]
fn zip_empty() {
const DATA: &'static [u8] = &[
0x50, 0x4b, 0x05, 0x06, 0x00, 0x00, 0x00, 0x00,
];
assert_eq!(Magic::from_slice(DATA), Magic::Zip);

let mut rdr = MagicReader::new(DATA).unwrap();
assert_eq!(rdr.magic(), Magic::Zip);

let mut buf = vec![0; DATA.len()];
assert_eq!(rdr.read(&mut buf).unwrap(), DATA.len());
assert_eq!(&buf[..], DATA);
}
}
9 changes: 9 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
extern crate bytecount;
#[cfg(feature = "uncompress")]
extern crate byteorder;
#[macro_use]
extern crate clap;
#[cfg(feature = "uncompress")]
extern crate crc;
extern crate env_logger;
#[cfg(feature = "uncompress")]
extern crate flate2;
extern crate grep;
extern crate ignore;
#[cfg(windows)]
Expand Down Expand Up @@ -46,12 +52,15 @@ macro_rules! eprintln {
mod app;
mod args;
mod atty;
mod magic;
mod pathutil;
mod printer;
mod search_buffer;
mod search_stream;
mod unescape;
mod worker;
#[cfg(feature = "uncompress")]
mod zip;

pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;

Expand Down
Loading