From 83b05a9b44df953fe97ee8eb67b0ff3577918a7d Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 1 Dec 2019 01:04:16 +0100 Subject: [PATCH 1/4] fix: safe slicing for macho fat binaries --- debuginfo/src/macho.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/debuginfo/src/macho.rs b/debuginfo/src/macho.rs index 1ab92b911..54fff132b 100644 --- a/debuginfo/src/macho.rs +++ b/debuginfo/src/macho.rs @@ -461,7 +461,11 @@ impl<'d, 'a> Iterator for FatMachObjectIterator<'d, 'a> { self.remaining -= 1; match self.iter.next() { - Some(Ok(arch)) => Some(MachObject::parse(arch.slice(self.data))), + Some(Ok(arch)) => { + let start = (arch.offset as usize).min(self.data.len()); + let end = ((arch.offset + arch.size) as usize).min(self.data.len()); + Some(MachObject::parse(&self.data[start..end])) + } Some(Err(error)) => Some(Err(MachError::BadObject(error))), None => None, } @@ -523,7 +527,9 @@ impl<'d> FatMachO<'d> { None => return Ok(None), }; - MachObject::parse(arch.slice(self.data)).map(Some) + let start = (arch.offset as usize).min(self.data.len()); + let end = ((arch.offset + arch.size) as usize).min(self.data.len()); + MachObject::parse(&self.data[start..end]).map(Some) } } From 76fb679d6ffb7223de7f55128176397e18614e17 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 1 Dec 2019 01:19:45 +0100 Subject: [PATCH 2/4] fix: Fix mis-detecting JAR files as macho fat archives --- debuginfo/src/object.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/debuginfo/src/object.rs b/debuginfo/src/object.rs index 5c2305078..187d72c15 100644 --- a/debuginfo/src/object.rs +++ b/debuginfo/src/object.rs @@ -110,7 +110,24 @@ pub fn peek(data: &[u8], archive: bool) -> FileFormat { match goblin::peek_bytes(&magic) { Ok(Hint::Elf(_)) => return FileFormat::Elf, Ok(Hint::Mach(_)) => return FileFormat::MachO, - Ok(Hint::MachFat(_)) if archive => return FileFormat::MachO, + Ok(Hint::MachFat(narchs)) if archive => { + // so this is kind of stupid but java class files share the same cutsey magic + // as a macho fat file (CAFEBABE). This means that we often claim that a java + // class file is actually a macho binary but it's not. The next 32 bytes encode + // the number of embedded architectures in a fat mach. In case of a JAR file + // we have 2 bytes for major version and 2 bytes for minor version of the class + // file format. + // + // The internet suggests the first public version of Java had the class version + // 45. Thus the logic applied here is that if the number is >= 45 we're more + // likely to have a java class file than a macho file with 45 architectures + // which should be very rare. + return if narchs >= 45 { + FileFormat::Unknown + } else { + FileFormat::MachO + }; + } Ok(Hint::PE) => return FileFormat::Pe, _ => (), } From 6c5ac8367fb7de879130048516ea58c32fc31514 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 1 Dec 2019 01:23:28 +0100 Subject: [PATCH 3/4] doc: Added reference to class file doc --- debuginfo/src/object.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/debuginfo/src/object.rs b/debuginfo/src/object.rs index 187d72c15..1e00ec850 100644 --- a/debuginfo/src/object.rs +++ b/debuginfo/src/object.rs @@ -115,13 +115,15 @@ pub fn peek(data: &[u8], archive: bool) -> FileFormat { // as a macho fat file (CAFEBABE). This means that we often claim that a java // class file is actually a macho binary but it's not. The next 32 bytes encode // the number of embedded architectures in a fat mach. In case of a JAR file - // we have 2 bytes for major version and 2 bytes for minor version of the class + // we have 2 bytes for minor version and 2 bytes for major version of the class // file format. // // The internet suggests the first public version of Java had the class version // 45. Thus the logic applied here is that if the number is >= 45 we're more // likely to have a java class file than a macho file with 45 architectures // which should be very rare. + // + // https://docs.oracle.com/javase/specs/jvms/se6/html/ClassFile.doc.html return if narchs >= 45 { FileFormat::Unknown } else { From 2e66919fc4d6035a5d320c1eda8287bfc491360d Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 1 Dec 2019 01:30:22 +0100 Subject: [PATCH 4/4] ref: Move jar disambiguation code into MachO::test --- debuginfo/src/macho.rs | 17 ++++++++++++++++- debuginfo/src/object.rs | 23 +++-------------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/debuginfo/src/macho.rs b/debuginfo/src/macho.rs index 54fff132b..1c40593b4 100644 --- a/debuginfo/src/macho.rs +++ b/debuginfo/src/macho.rs @@ -603,7 +603,22 @@ impl<'d> MachArchive<'d> { pub fn test(data: &[u8]) -> bool { match goblin::peek(&mut Cursor::new(data)) { Ok(goblin::Hint::Mach(_)) => true, - Ok(goblin::Hint::MachFat(_)) => true, + Ok(goblin::Hint::MachFat(narchs)) => { + // so this is kind of stupid but java class files share the same cutsey magic + // as a macho fat file (CAFEBABE). This means that we often claim that a java + // class file is actually a macho binary but it's not. The next 32 bytes encode + // the number of embedded architectures in a fat mach. In case of a JAR file + // we have 2 bytes for minor version and 2 bytes for major version of the class + // file format. + // + // The internet suggests the first public version of Java had the class version + // 45. Thus the logic applied here is that if the number is >= 45 we're more + // likely to have a java class file than a macho file with 45 architectures + // which should be very rare. + // + // https://docs.oracle.com/javase/specs/jvms/se6/html/ClassFile.doc.html + narchs < 45 + } _ => false, } } diff --git a/debuginfo/src/object.rs b/debuginfo/src/object.rs index 1e00ec850..1efcc55c4 100644 --- a/debuginfo/src/object.rs +++ b/debuginfo/src/object.rs @@ -110,26 +110,9 @@ pub fn peek(data: &[u8], archive: bool) -> FileFormat { match goblin::peek_bytes(&magic) { Ok(Hint::Elf(_)) => return FileFormat::Elf, Ok(Hint::Mach(_)) => return FileFormat::MachO, - Ok(Hint::MachFat(narchs)) if archive => { - // so this is kind of stupid but java class files share the same cutsey magic - // as a macho fat file (CAFEBABE). This means that we often claim that a java - // class file is actually a macho binary but it's not. The next 32 bytes encode - // the number of embedded architectures in a fat mach. In case of a JAR file - // we have 2 bytes for minor version and 2 bytes for major version of the class - // file format. - // - // The internet suggests the first public version of Java had the class version - // 45. Thus the logic applied here is that if the number is >= 45 we're more - // likely to have a java class file than a macho file with 45 architectures - // which should be very rare. - // - // https://docs.oracle.com/javase/specs/jvms/se6/html/ClassFile.doc.html - return if narchs >= 45 { - FileFormat::Unknown - } else { - FileFormat::MachO - }; - } + // mach fat needs to be tested through `MachArchive::test` because of special + // handling that is required due to disambiguation with Java class files. + Ok(Hint::MachFat(_)) if archive && MachArchive::test(data) => return FileFormat::MachO, Ok(Hint::PE) => return FileFormat::Pe, _ => (), }