Skip to content

Commit

Permalink
Allow very big tils to occupy multiple ID0 entries
Browse files Browse the repository at this point in the history
  • Loading branch information
rbran committed Dec 4, 2024
1 parent b32991c commit 15b113f
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 102 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ clap = { version = "4.5", features = ["derive"] }
bincode = "1.3.3"
flate2 = "1.0.31"
serde = { version = "1.0", features = ["derive"] }
itertools = "0.13.0"

[[bin]]
name = "idb-tools"
Expand Down
2 changes: 1 addition & 1 deletion src/id0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ pub struct EntryPoint {
pub entry_type: Option<til::Type>,
}

fn parse_number(data: &[u8], big_endian: bool, is_64: bool) -> Option<u64> {
pub(crate) fn parse_number(data: &[u8], big_endian: bool, is_64: bool) -> Option<u64> {
Some(match (data.len(), is_64, big_endian) {
(8, true, true) => u64::from_be_bytes(data.try_into().unwrap()),
(8, true, false) => u64::from_le_bytes(data.try_into().unwrap()),
Expand Down
175 changes: 129 additions & 46 deletions src/id0/address_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use anyhow::{anyhow, Result};

use crate::til;

use super::parse_maybe_cstr;
use super::{parse_maybe_cstr, ID0Entry};

#[derive(Clone, Debug)]
pub enum AddressInfo<'a> {
Expand All @@ -12,19 +12,53 @@ pub enum AddressInfo<'a> {
Other { key: &'a [u8], value: &'a [u8] },
}

impl<'a> AddressInfo<'a> {
pub(crate) fn parse(key: &'a [u8], value: &'a [u8], is_64: bool) -> Result<Self> {
let [sub_type, id @ ..] = key else {
return Err(anyhow!("Missing SubType"));
};
let id_value = if is_64 {
<[u8; 8]>::try_from(id).ok().map(u64::from_be_bytes)
} else {
<[u8; 4]>::try_from(id)
.ok()
.map(u32::from_be_bytes)
.map(u64::from)
#[derive(Clone, Debug)]
pub enum Comments<'a> {
Comment(&'a [u8]),
RepeatableComment(&'a [u8]),
PreComment(&'a [u8]),
PostComment(&'a [u8]),
}

impl<'a> Comments<'a> {
/// The message on the comment, NOTE that IDA don't have a default character encoding
pub fn message(&self) -> &'a [u8] {
match self {
Comments::Comment(x)
| Comments::RepeatableComment(x)
| Comments::PreComment(x)
| Comments::PostComment(x) => x,
}
}
}

pub(crate) struct AddressInfoIter<'a> {
entries: &'a [ID0Entry],
is_64: bool,
}

impl<'a> AddressInfoIter<'a> {
pub fn new(entries: &'a [ID0Entry], is_64: bool) -> Self {
Self { entries, is_64 }
}
}

impl<'a> Iterator for AddressInfoIter<'a> {
type Item = Result<(u64, AddressInfo<'a>)>;

fn next(&mut self) -> Option<Self::Item> {
let (current, rest) = self.entries.split_first()?;
self.entries = rest;
let value = &current.value[..];
// 1.. because it starts with '.'
let addr_len = if self.is_64 { 8 } else { 4 };
let key_start = addr_len + 1;
let address = super::parse_number(&current.key[1..key_start], true, self.is_64).unwrap();
let key = &current.key[key_start..];
let Some((sub_type, id_value)) = id_subkey_from_idx(key, self.is_64) else {
return Some(Err(anyhow!("Missing SubType")));
};

// Non UTF-8 comment: "C:\\Documents and Settings\\Administrator\\\xb9\xd9\xc5\xc1 \xc8\xad\xb8\xe9\ls"
// \xb9\xd9\xc5\xc1 \xc8\xad\xb8\xe9 = "바탕 화면" = "Desktop" in Korean encoded using Extended Unix Code
#[allow(clippy::wildcard_in_or_patterns)]
Expand All @@ -34,29 +68,82 @@ impl<'a> AddressInfo<'a> {
// pre comments start at index 1000
// post comments start at index 2000
// if you create more then a 1000 pre/post comments ida start acting strange, BUG?
(b'S', Some(1000..=1999)) => Ok(Self::Comment(Comments::PreComment(parse_maybe_cstr(
value,
).ok_or_else(|| anyhow!("Pre-Comment is not valid CStr"))?))),
(b'S', Some(2000..=2999)) => Ok(Self::Comment(Comments::PostComment(parse_maybe_cstr(
value,
).ok_or_else(|| anyhow!("Post-Comment is not valid CStr"))?))),
(b'S', Some(0x0)) => Ok(Self::Comment(Comments::Comment(parse_maybe_cstr(
value,
).ok_or_else(|| anyhow!("Comment is not valid CStr"))?))),
(b'S', Some(1000..=1999)) => {
let Some(comment) = parse_maybe_cstr(value) else {
return Some(Err(anyhow!("Pre-Comment is not valid CStr")));
};
Some(Ok((address, AddressInfo::Comment(Comments::PreComment(comment)))))
},
(b'S', Some(2000..=2999)) => {
let Some(comment) = parse_maybe_cstr(value) else {
return Some(Err(anyhow!("Post-Comment is not valid CStr")));
};
Some(Ok((address, AddressInfo::Comment(Comments::PreComment(comment)))))
},
(b'S', Some(0x0)) => {
let Some(comment) = parse_maybe_cstr(value) else {
return Some(Err(anyhow!("Comment is not valid CStr")));
};
Some(Ok((address, AddressInfo::Comment(Comments::PreComment(comment)))))
},
// Repeatable comment
(b'S', Some(0x1)) => Ok(Self::Comment(Comments::RepeatableComment(parse_maybe_cstr(
value,
).ok_or_else(|| anyhow!("Repeatable Comment is not valid CStr"))?))),
(b'S', Some(0x1)) => {
let Some(comment) = parse_maybe_cstr(value) else {
return Some(Err(anyhow!("Repeatable Comment is not valid CStr")));
};
Some(Ok((address, AddressInfo::Comment(Comments::PreComment(comment)))))
},

// Type at this address
(b'S', Some(0x3000)) => Ok(Self::TilType(til::Type::new_from_id0(value)?)),
// TODO followed by (b'S', Some(0x3001)) data with unknown meaning
(b'S', Some(0x3000)) => {
// take the field names (optional?) and the continuation (optional!)
let last = rest.iter().position(|entry| {
let Some((sub_type, id)) = entry.key[key_start..].split_first() else {
return true
};
let id_value = id_from_key(id, self.is_64);
match (*sub_type, id_value) {
(b'S', Some(0x3000..=0x3999)) => false,
_ => true,
}
}).unwrap_or(0);
// TODO enforce sequential index for the id?
// get the entry for field names and rest of data
let (fields, continuation) = match &rest[..last] {
[fields, rest @ ..] if matches!(id_subkey_from_idx(&fields.key, self.is_64), Some((b'S', Some(0x3001)))) => {
// convert the value into fields
let Some(fields) = crate::ida_reader::split_strings_from_array(&fields.value) else {
return Some(Err(anyhow!("Invalid Fields for TIL Type")));
};
(Some(fields), rest)
}
rest => (None, rest),
};
self.entries = &rest[last..];

// condensate the data into a single buffer
let buf: Vec<u8> = current.value.iter().chain(continuation.iter().map(|entry| &entry.value[..]).flatten()).copied().collect();
// create the raw type
let til = match til::Type::new_from_id0(&buf[..], fields) {
Ok(til) => til,
Err(err) => return Some(Err(err)),
};
Some(Ok((address, AddressInfo::TilType(til))))
},
// field names and continuation in from the previous til type [citation needed]
(b'S', Some(0x3001..=0x3999)) => {
Some(Err(anyhow!("ID0 Til type info without a previous TIL type")))
},

// Name, aka a label to this memory address
(b'N', None) => {
let label_raw = parse_maybe_cstr(value).ok_or_else(|| anyhow!("Label is not a valid CStr"))?;
let label = core::str::from_utf8(label_raw).map_err(|_| anyhow!("Label is not valid UTF-8"))?;
Ok(Self::Label(label))
let Some(label_raw) = parse_maybe_cstr(value) else {
return Some(Err(anyhow!("Label is not a valid CStr")));
};
let Some(label) = core::str::from_utf8(label_raw).ok() else {
return Some(Err(anyhow!("Label is not valid UTF-8")))
};
Some(Ok((address, AddressInfo::Label(label))))
},

// Seems related to datatype, maybe cstr, align and stuff like that
Expand All @@ -74,27 +161,23 @@ impl<'a> AddressInfo<'a> {
// The oposite of 'D", is a memory location that points to other
(b'd', Some(_)) |
// other unknown values
_ => Ok(Self::Other { key, value }),
_ => Some(Ok((address, AddressInfo::Other { key, value }))),
}
}
}

#[derive(Clone, Debug)]
pub enum Comments<'a> {
Comment(&'a [u8]),
RepeatableComment(&'a [u8]),
PreComment(&'a [u8]),
PostComment(&'a [u8]),
fn id_subkey_from_idx(key: &[u8], is_64: bool) -> Option<(u8, Option<u64>)> {
let (sub_type, id) = key.split_first()?;
Some((*sub_type, id_from_key(id, is_64)))
}

impl<'a> Comments<'a> {
/// The message on the comment, NOTE that IDA don't have a default character encoding
pub fn message(&self) -> &'a [u8] {
match self {
Comments::Comment(x)
| Comments::RepeatableComment(x)
| Comments::PreComment(x)
| Comments::PostComment(x) => x,
}
fn id_from_key(key: &[u8], is_64: bool) -> Option<u64> {
if is_64 {
<[u8; 8]>::try_from(key).ok().map(u64::from_be_bytes)
} else {
<[u8; 4]>::try_from(key)
.ok()
.map(u32::from_be_bytes)
.map(u64::from)
}
}
52 changes: 21 additions & 31 deletions src/id0/btree.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::{ffi::CStr, io::Read};

use anyhow::Result;
use itertools::Itertools;

use crate::ida_reader::{IdaGenericBufUnpack, IdaGenericUnpack};

Expand Down Expand Up @@ -594,7 +595,7 @@ impl ID0Section {
let key = parse_number(key, true, self.is_64).unwrap();
// TODO handle other values for the key
if key == key_find {
return til::Type::new_from_id0(&entry.value)
return til::Type::new_from_id0(&entry.value, None)
.map(Option::Some)
.map_err(|e| {
todo!("Error parsing {:#04x?}: {e:?}", &entry.value);
Expand All @@ -609,27 +610,21 @@ impl ID0Section {
&self,
version: u16,
) -> Result<impl Iterator<Item = Result<(u64, AddressInfo)>>> {
let regions = self.file_regions(version)?;
// TODO remove the Vec/for-loop here if you want to use `itertools::flatten_ok` or implement it yourself
let mut info = vec![];
for region in regions {
let region = region?;
let start_key: Vec<u8> = key_from_address(region.start, self.is_64).collect();
let end_key: Vec<u8> = key_from_address(region.end, self.is_64).collect();
let start = self.binary_search(&start_key).unwrap_or_else(|start| start);
let end = self.binary_search(&end_key).unwrap_or_else(|end| end);

let entries = &self.entries[start..end];
info.extend(entries.iter().map(|entry| {
let key = &entry.key[start_key.len()..];
// 1.. because it starts with '.'
let address =
parse_number(&entry.key[1..start_key.len()], true, self.is_64).unwrap();
let info = address_info::AddressInfo::parse(key, &entry.value, self.is_64)?;
Ok((address, info))
}));
}
Ok(info.into_iter())
Ok(self
.file_regions(version)?
.map(|region| {
let region = region?;
let start_key: Vec<u8> = key_from_address(region.start, self.is_64).collect();
let end_key: Vec<u8> = key_from_address(region.end, self.is_64).collect();
let start = self.binary_search(&start_key).unwrap_or_else(|start| start);
let end = self.binary_search(&end_key).unwrap_or_else(|end| end);

let entries = &self.entries[start..end];
Ok(AddressInfoIter::new(entries, self.is_64))
})
.flatten_ok()
.flatten_ok())
//Ok(info.into_iter())
}

/// read the address information for the address
Expand All @@ -643,15 +638,10 @@ impl ID0Section {
let end = self.binary_search_end(&key).unwrap_or_else(|end| end);

let entries = &self.entries[start..end];
let key_len = key.len();
Ok(entries.iter().map(move |entry| {
let key = &entry.key[key_len..];
// 1.. because it starts with '.'
let key_address = parse_number(&entry.key[1..key_len], true, self.is_64).unwrap();
assert_eq!(key_address, address);
let info = address_info::AddressInfo::parse(key, &entry.value, self.is_64)?;
Ok(info)
}))
// ignore the address, it will always be the same, the one request
let iter = AddressInfoIter::new(entries, self.is_64)
.map(|value| value.map(|(_addr, value)| value));
Ok(iter)
}

/// read the label set at address, if any
Expand Down
47 changes: 27 additions & 20 deletions src/ida_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,26 +186,8 @@ pub trait IdaGenericBufUnpack: IdaGenericUnpack + BufRead {
// TODO rename this
fn read_c_string_vec(&mut self) -> Result<Vec<Vec<u8>>> {
let buf = self.read_c_string_raw()?;
if buf.is_empty() {
return Ok(vec![]);
}

let mut result = vec![];
// NOTE never 0 because this came from a CStr
let mut len = buf[0] - 1;
// NOTE zero len (buf[0] == 1) string is allowed
let mut current = &buf[1..];
loop {
ensure!(current.len() >= len.into(), "Invalid len on Vec of CStr");
let (value, rest) = current.split_at(len.into());
result.push(value.to_owned());
if rest.is_empty() {
break;
}
len = rest[0] - 1;
current = &rest[1..];
}
Ok(result)
split_strings_from_array(&buf)
.ok_or_else(|| anyhow!("Invalid len on Vec of CStr {buf:02x?}"))
}

fn peek_u8(&mut self) -> Result<Option<u8>> {
Expand Down Expand Up @@ -483,3 +465,28 @@ pub trait IdaGenericUnpack: Read {
}

impl<R: Read> IdaGenericUnpack for R {}

pub fn split_strings_from_array(buf: &[u8]) -> Option<Vec<Vec<u8>>> {
if buf.is_empty() {
return Some(vec![]);
}

let mut result = vec![];
// NOTE never 0 because this came from a CStr
let mut len = buf[0] - 1;
// NOTE zero len (buf[0] == 1) string is allowed
let mut current = &buf[1..];
loop {
if current.len() < len.into() {
return None;
}
let (value, rest) = current.split_at(len.into());
result.push(value.to_owned());
if rest.is_empty() {
break;
}
len = rest[0] - 1;
current = &rest[1..];
}
Some(result)
}
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ mod test {
0x3d, 0x08, 0x48, 0x4d, 0x4f, 0x44, 0x55, 0x4c, 0x45, 0x3d, 0x06, 0x44, 0x57, 0x4f,
0x52, 0x44, 0x00,
];
let _til = til::Type::new_from_id0(&function).unwrap();
let _til = til::Type::new_from_id0(&function, None).unwrap();
}

#[test]
Expand All @@ -565,7 +565,7 @@ mod test {
0x82, 0x54, // ???? the 0x94 value?
0x00, // the final value always present
];
let _til = til::Type::new_from_id0(&function).unwrap();
let _til = til::Type::new_from_id0(&function, None).unwrap();
}

#[test]
Expand Down
Loading

0 comments on commit 15b113f

Please sign in to comment.