use std::io::{BufRead, Cursor, Read};
use byteorder::{BigEndian, ByteOrder, LittleEndian, ReadBytesExt};
use failure::{format_err, Error, ResultExt};
use crate::{
error,
header::Header,
types::{
read::{
AnnotationsDirectoryOffsets, ClassData, ClassDefData, CodeItem, DebugInfo, FieldIdData,
MethodIdData, PrototypeIdData,
},
Annotation, AnnotationsDirectory, Array, Class, FieldAnnotations, MethodAnnotations,
ParameterAnnotations, Prototype, ShortyDescriptor, Type,
},
};
#[derive(Debug)]
pub struct DexReader {
pub(crate) file_cursor: Cursor<Box<[u8]>>,
pub(crate) header: Header,
pub(crate) strings: Vec<String>,
pub(crate) types: Vec<Type>,
pub(crate) prototypes: Vec<Prototype>,
pub(crate) field_ids: Vec<FieldIdData>,
pub(crate) method_ids: Vec<MethodIdData>,
pub(crate) classes: Vec<Class>,
pub(crate) annotation_set_ref_list: Vec<Box<[u32]>>,
pub(crate) annotation_sets: Vec<Box<[u32]>>,
pub(crate) code_segments: Vec<(u32, CodeItem)>,
pub(crate) debug_info: Vec<(u32, DebugInfo)>,
pub(crate) arrays: Vec<(u32, Array)>,
pub(crate) annotations_directories: Vec<(u32, AnnotationsDirectory)>,
}
impl DexReader {
pub fn from_read<R, S>(mut file: R, size: S) -> Result<Self, Error>
where
R: Read + ReadBytesExt,
S: Into<Option<usize>>,
{
let mut file_contents = if let Some(size) = size.into() {
Vec::with_capacity(size)
} else {
Vec::new()
};
let _ = file
.read_to_end(&mut file_contents)
.context("could not read dex file contents")?;
let mut file_cursor = Cursor::new(file_contents.into_boxed_slice());
let header =
Header::from_reader(&mut file_cursor).context("could not read dex file header")?;
let strings = Vec::with_capacity(header.get_string_ids_size() as usize);
let types = Vec::with_capacity(header.get_type_ids_size() as usize);
let prototypes = Vec::with_capacity(header.get_prototype_ids_size() as usize);
let field_ids = Vec::with_capacity(header.get_field_ids_size() as usize);
let method_ids = Vec::with_capacity(header.get_method_ids_size() as usize);
Ok(Self {
file_cursor,
header,
strings,
types,
prototypes,
field_ids,
method_ids,
classes: Vec::new(),
annotation_set_ref_list: Vec::new(),
annotation_sets: Vec::new(),
code_segments: Vec::new(),
debug_info: Vec::new(),
arrays: Vec::new(),
annotations_directories: Vec::new(),
})
}
pub fn read_data(&mut self) -> Result<(), Error> {
if self.header.is_little_endian() {
self.read_endian_data::<LittleEndian>()
} else {
self.read_endian_data::<BigEndian>()
}
}
fn read_endian_data<B>(&mut self) -> Result<(), Error>
where
B: ByteOrder,
{
if let Some(offset) = self.header.get_string_ids_offset() {
self.file_cursor.set_position(u64::from(offset));
self.read_string_list::<B>()
.context("could not read string list")?;
}
if let Some(offset) = self.header.get_type_ids_offset() {
self.file_cursor.set_position(u64::from(offset));
self.read_all_types::<B>()
.context("could not read type list")?;
}
if let Some(offset) = self.header.get_prototype_ids_offset() {
self.file_cursor.set_position(u64::from(offset));
self.read_prototype_list::<B>()
.context("could not read prototype list")?;
}
if let Some(offset) = self.header.get_field_ids_offset() {
self.file_cursor.set_position(u64::from(offset));
self.read_field_id_list::<B>()
.context("could not read field ID list")?;
}
if let Some(offset) = self.header.get_method_ids_offset() {
self.file_cursor.set_position(u64::from(offset));
self.read_method_id_list::<B>()
.context("could not read method ID list")?;
}
if let Some(offset) = self.header.get_class_defs_offset() {
self.file_cursor.set_position(u64::from(offset));
self.read_class_list::<B>()
.context("could not read class list")?;
}
Ok(())
}
fn read_string_list<B>(&mut self) -> Result<(), Error>
where
B: ByteOrder,
{
for _ in 0..self.header.get_string_ids_size() {
let current_offset = self.file_cursor.position();
let offset = self.file_cursor.read_u32::<B>().context(format_err!(
"could not read string offset from string ID at offset {:#010x}",
current_offset
))?;
let current_offset = self.file_cursor.position();
self.file_cursor.set_position(u64::from(offset));
let str_data = self.read_string()?;
self.strings.push(str_data);
self.file_cursor.set_position(current_offset);
}
Ok(())
}
fn read_string(&mut self) -> Result<String, Error> {
let (size, _) = uleb128(&mut self.file_cursor).context("could not read string size")?;
let mut data = Vec::with_capacity(size as usize);
if size > 0 {
let _ = self.file_cursor.read_until(0, &mut data)?;
let _ = data.pop();
}
let string = String::from_utf8(data).context("error decoding UTF-8 from string data")?;
let char_count = string.chars().count();
if char_count == size as usize {
Ok(string)
} else {
Err(error::Parse::StringSizeMismatch {
expected_size: size,
actual_size: char_count,
}
.into())
}
}
fn read_all_types<B>(&mut self) -> Result<(), Error>
where
B: ByteOrder,
{
for _ in 0..self.header.get_type_ids_size() {
let current_offset = self.file_cursor.position();
let index = self.file_cursor.read_u32::<B>().context(format_err!(
"could not read type ID at offset {:#010x}",
current_offset
))?;
let type_str = self
.strings
.get(index as usize)
.ok_or_else(|| error::Parse::UnknownStringIndex { index })?;
self.types
.push(type_str.parse::<Type>().context(format_err!(
"could not read type descriptor from string at index {} (`{}`)",
index,
type_str
))?);
}
Ok(())
}
fn read_prototype_list<B>(&mut self) -> Result<(), Error>
where
B: ByteOrder,
{
for _ in 0..self.header.get_prototype_ids_size() {
let current_offset = self.file_cursor.position();
let prototype_id = PrototypeIdData::from_reader::<_, B>(&mut self.file_cursor)
.context(format_err!(
"could not read prototype ID at offset {:#010x}",
current_offset
))?;
let parameters = if let Some(off) = prototype_id.parameters_offset() {
let current_offset = self.file_cursor.position();
self.file_cursor.set_position(u64::from(off));
let parameters = self
.read_type_list::<B>()
.context("could not read parameter list")?;
self.file_cursor.set_position(current_offset);
Some(parameters)
} else {
None
};
let shorty_str = self
.strings
.get(prototype_id.shorty_index() as usize)
.ok_or_else(|| error::Parse::UnknownStringIndex {
index: prototype_id.shorty_index(),
})?;
let shorty_descriptor = shorty_str.parse::<ShortyDescriptor>().context(format_err!(
"could not read shorty descriptor from string at index {} (`{}`)",
prototype_id.shorty_index(),
shorty_str
))?;
let return_type = self
.types
.get(prototype_id.return_type_index() as usize)
.ok_or_else(|| error::Parse::UnknownTypeIndex {
index: prototype_id.return_type_index(),
})?
.clone();
self.prototypes
.push(Prototype::new(shorty_descriptor, return_type, parameters));
}
Ok(())
}
fn read_type_list<B>(&mut self) -> Result<Box<[Type]>, Error>
where
B: ByteOrder,
{
let current_offset = self.file_cursor.position();
let size = self.file_cursor.read_u32::<B>().context(format_err!(
"error reading the size of the type list at offset {:#010x}",
current_offset
))?;
let mut type_list = Vec::with_capacity(size as usize);
for _ in 0..size {
let current_offset = self.file_cursor.position();
let index = self.file_cursor.read_u16::<B>().context(format_err!(
"error reading type index for type list item at offset {:#010x}",
current_offset
))?;
type_list.push(
self.types
.get(index as usize)
.ok_or_else(|| error::Parse::UnknownTypeIndex {
index: u32::from(index),
})?
.clone(),
);
}
Ok(type_list.into_boxed_slice())
}
fn read_field_id_list<B>(&mut self) -> Result<(), Error>
where
B: ByteOrder,
{
for _ in 0..self.header.get_field_ids_size() {
let current_offset = self.file_cursor.position();
self.field_ids.push(
FieldIdData::from_reader::<_, B>(&mut self.file_cursor).context(format_err!(
"could not read field ID at offset {:#010x}",
current_offset
))?,
);
}
Ok(())
}
fn read_method_id_list<B>(&mut self) -> Result<(), Error>
where
B: ByteOrder,
{
for _ in 0..self.header.get_method_ids_size() {
let current_offset = self.file_cursor.position();
self.method_ids.push(
MethodIdData::from_reader::<_, B>(&mut self.file_cursor).context(format_err!(
"could not read method ID at offset {:#010x}",
current_offset
))?,
);
}
Ok(())
}
fn read_class_list<B>(&mut self) -> Result<(), Error>
where
B: ByteOrder,
{
for _ in 0..self.header.get_class_defs_size() {
let class_offset = self.file_cursor.position();
let class_def =
ClassDefData::from_reader::<_, B>(&mut self.file_cursor).context(format_err!(
"could not read class definition data at offset {:#010x}",
class_offset
))?;
let new_offset = self.file_cursor.position();
let interfaces = if let Some(offset) = class_def.interfaces_offset() {
self.file_cursor.set_position(u64::from(offset));
self.read_type_list::<B>().context(format_err!(
"could not read interfaces list at offset {:#010x} for class at offset {:#010x}",
offset,
class_offset
))?
} else {
Vec::new().into_boxed_slice()
};
let annotations = if let Some(offset) = class_def.annotations_offset() {
self.file_cursor.set_position(u64::from(offset));
Some(self.read_annotations_directory::<B>().context(format_err!(
"could not read annotation list at offset {:#010x} for class at offset {:#010x}",
offset,
class_offset
))?)
} else {
None
};
let class_data = if let Some(offset) = class_def.class_data_offset() {
self.file_cursor.set_position(u64::from(offset));
Some(
ClassData::from_reader(&mut self.file_cursor).context(format_err!(
"could not read class data at offset {:#010x} for class at offset {:#010x}",
offset,
class_offset
))?,
)
} else {
None
};
let static_values = if let Some(offset) = class_def.static_values_offset() {
self.file_cursor.set_position(u64::from(offset));
Some(
Array::from_reader(&mut self.file_cursor).context(format_err!(
"could not read encoded array at offset {:#010x}",
offset
))?,
)
} else {
None
};
self.file_cursor.set_position(new_offset);
self.classes.push(Class::new(
class_def.class_index(),
class_def.access_flags(),
class_def.superclass_index(),
interfaces,
class_def.source_file_index(),
annotations,
class_data,
static_values,
));
}
Ok(())
}
fn read_annotations_directory<B: ByteOrder>(&mut self) -> Result<AnnotationsDirectory, Error> {
let current_offset = self.file_cursor.position();
let read = AnnotationsDirectoryOffsets::from_reader::<_, B>(&mut self.file_cursor)
.context(format_err!(
"could not read annotation directory at offset {:#010x}",
current_offset
))?;
let class_annotations = if let Some(off) = read.class_annotations_offset() {
self.file_cursor.set_position(u64::from(off));
self.read_annotation_set::<B>()
.context("could not read class annotations set")?
} else {
Vec::new().into_boxed_slice()
};
let mut field_annotations = Vec::with_capacity(read.field_annotations().len());
for fa_off in read.field_annotations() {
self.file_cursor.set_position(u64::from(fa_off.offset()));
field_annotations.push(FieldAnnotations::new(
fa_off.field_index(),
self.read_annotation_set::<B>()
.context("could not read field annotations set")?,
));
}
let mut method_annotations = Vec::with_capacity(read.method_annotations().len());
for ma_off in read.method_annotations() {
self.file_cursor.set_position(u64::from(ma_off.offset()));
method_annotations.push(MethodAnnotations::new(
ma_off.method_index(),
self.read_annotation_set::<B>()
.context("could not read method annotations set")?,
));
}
let mut parameter_annotations = Vec::with_capacity(read.parameter_annotations().len());
for pa_off in read.parameter_annotations() {
self.file_cursor.set_position(u64::from(pa_off.offset()));
parameter_annotations.push(ParameterAnnotations::new(
pa_off.method_index(),
self.read_annotation_set::<B>()
.context("could not read parameter annotations set")?,
));
}
Ok(AnnotationsDirectory::new(
class_annotations,
field_annotations.into_boxed_slice(),
method_annotations.into_boxed_slice(),
parameter_annotations.into_boxed_slice(),
))
}
fn read_annotation_set<B>(&mut self) -> Result<Box<[Annotation]>, Error>
where
B: ByteOrder,
{
let current_offset = self.file_cursor.position();
let size = self.file_cursor.read_u32::<B>().context(format_err!(
"error reading annotation set size at offset {:#010x}",
current_offset
))?;
let mut annotation_set = Vec::with_capacity(size as usize);
for _ in 0..size {
let current_offset = self.file_cursor.position();
let annotation_offset = self.file_cursor.read_u32::<B>().context(format_err!(
"error reading annotation offset at offset {:#010x}",
current_offset
))?;
let current_offset = self.file_cursor.position();
self.file_cursor.set_position(u64::from(annotation_offset));
annotation_set.push(self.read_annotation()?);
self.file_cursor.set_position(current_offset);
}
Ok(annotation_set.into_boxed_slice())
}
fn read_annotation(&mut self) -> Result<Annotation, Error> {
let current_offset = self.file_cursor.position();
let annotation = Annotation::from_reader(&mut self.file_cursor).context(format_err!(
"could not read annotation at offset {:#010x}",
current_offset
))?;
Ok(annotation)
}
}
pub fn uleb128<R>(reader: &mut R) -> Result<(u32, u32), Error>
where
R: Read,
{
let mut result = 0;
let mut read = 0;
for (i, byte) in reader.bytes().enumerate() {
let byte = byte.context(format_err!("could not read byte {}", i))?;
let payload = u32::from(byte & 0b0111_1111);
match i {
0..=4 => result |= payload << (i * 7),
_ => return Err(error::Parse::InvalidLeb128.into()),
}
if byte & 0b1000_0000 == 0x00 {
read = i + 1;
break;
}
}
Ok((result, read as u32))
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum U32p1 {
MinusOne,
U32(u32),
}
impl Into<Option<u32>> for U32p1 {
fn into(self) -> Option<u32> {
if let Self::U32(n) = self {
Some(n)
} else {
None
}
}
}
pub fn uleb128p1<R>(reader: &mut R) -> Result<(U32p1, u32), Error>
where
R: Read,
{
let (uleb128, read) = uleb128(reader)?;
let res = if uleb128 == 0 {
U32p1::MinusOne
} else {
U32p1::U32(uleb128.wrapping_sub(1))
};
Ok((res, read))
}
pub fn sleb128<R>(reader: &mut R) -> Result<(i32, u32), Error>
where
R: Read,
{
let (uleb128, read) = uleb128(reader)?;
let s_bits = read * 7;
let mut signed = uleb128 as i32;
if (signed & (1 << (s_bits - 1))) != 0 {
signed |= -1 << s_bits;
}
Ok((signed, read))
}
#[cfg(test)]
mod tests {
use super::{sleb128, uleb128, uleb128p1, U32p1};
use std::io::Cursor;
#[test]
fn ut_sleb128() {
assert_eq!(sleb128(&mut Cursor::new(&[0x00_u8])).unwrap().0, 0);
assert_eq!(sleb128(&mut Cursor::new(&[0x01_u8])).unwrap().0, 1);
assert_eq!(sleb128(&mut Cursor::new(&[0x7f_u8])).unwrap().0, -1);
assert_eq!(
sleb128(&mut Cursor::new(&[0x80_u8, 0x7f_u8])).unwrap().0,
-128
);
}
#[test]
fn ut_uleb128() {
assert_eq!(uleb128(&mut Cursor::new(&[0x00_u8])).unwrap().0, 0);
assert_eq!(uleb128(&mut Cursor::new(&[0x01_u8])).unwrap().0, 1);
assert_eq!(uleb128(&mut Cursor::new(&[0x7f_u8])).unwrap().0, 127);
assert_eq!(
uleb128(&mut Cursor::new(&[0x80_u8, 0x7f_u8])).unwrap().0,
16256
);
}
#[test]
fn ut_uleb128p1() {
assert_eq!(
uleb128p1(&mut Cursor::new(&[0x00_u8])).unwrap().0,
U32p1::MinusOne
);
assert_eq!(
uleb128p1(&mut Cursor::new(&[0x01_u8])).unwrap().0,
U32p1::U32(0)
);
assert_eq!(
uleb128p1(&mut Cursor::new(&[0x7f_u8])).unwrap().0,
U32p1::U32(126)
);
assert_eq!(
uleb128p1(&mut Cursor::new(&[0x80_u8, 0x7f_u8])).unwrap().0,
U32p1::U32(16255)
);
}
}