diff options
author | pommicket <pommicket@gmail.com> | 2022-11-05 11:43:27 -0400 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2022-11-05 11:43:27 -0400 |
commit | bae6a477b04f7a767678c88089e28dd9e5c5b879 (patch) | |
tree | eeb29337826fc89d5ad0179598fb99719faf5405 /src | |
parent | bd54f4f57a56de5b2d8b80949acfd499a62a0bbf (diff) |
cleaned up code a bit
Diffstat (limited to 'src')
-rw-r--r-- | src/elf.rs | 15 | ||||
-rw-r--r-- | src/main.rs | 267 | ||||
-rw-r--r-- | src/util.rs | 3 |
3 files changed, 141 insertions, 144 deletions
@@ -31,7 +31,7 @@ pub const DT_SYMENT: u32 = 11; pub const DT_REL: u32 = 17; pub const DT_RELSZ: u32 = 18; pub const DT_RELENT: u32 = 19; - + pub const PT_DYNAMIC: u32 = 2; pub const PT_INTERP: u32 = 3; @@ -123,7 +123,7 @@ impl Ehdr32 { pub fn size_of() -> usize { mem::size_of::<Self>() } - + pub fn section_offset(&self, ndx: u16) -> u64 { ndx as u64 * self.shentsize as u64 + self.shoff as u64 } @@ -205,20 +205,19 @@ pub struct Rel32 { } macro_rules! impl_bytes { - ($r#type: ident, $n: literal) => { - impl FromBytes<$n> for $r#type { + ($type: ident, $n: literal) => { + impl FromBytes<$n> for $type { fn from_bytes(bytes: [u8; $n]) -> Self { unsafe { mem::transmute(bytes) } } } - - impl ToBytes<$n> for $r#type { + + impl ToBytes<$n> for $type { fn to_bytes(self) -> [u8; $n] { unsafe { mem::transmute(self) } } } - - } + }; } impl_bytes!(Ehdr32, 0x34); diff --git a/src/main.rs b/src/main.rs index 4717932..6006840 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,10 @@ use std::{fmt, fs, io, mem, ptr}; compile_error! {"WHY do you have a big endian machine???? it's the 21st century, buddy. this program won't work fuck you"} mod elf; +mod util; + use elf::{FromBytes, ToBytes}; +use util::u32_from_le_slice; pub enum LinkError { IO(io::Error), @@ -170,7 +173,7 @@ impl SymbolNames { #[allow(dead_code)] fn get(&self, name: &str) -> Option<SymbolName> { - self.by_string.get(name).map(|r| *r) + self.by_string.get(name).copied() } } @@ -264,13 +267,13 @@ impl Symbols { .get(&(source, name)) .or_else(|| self.global.get(&name)) .or_else(|| self.weak.get(&name)) - .map(|r| *r) + .copied() } fn get_location_from_id(&self, id: SymbolId) -> Option<(SourceId, SymbolName)> { - self.locations.get(&id).map(|r| *r) + self.locations.get(&id).copied() } - + fn count(&self) -> usize { self.info.len() } @@ -291,7 +294,7 @@ impl RelocationType { _ => return Err(ElfError::UnsupportedRelocation(id)), }) } - + fn to_x86_u8(self) -> u8 { use RelocationType::*; match self { @@ -317,11 +320,12 @@ struct Linker { symbols: Symbols, symbol_names: SymbolNames, relocations: Vec<Relocation>, + undefined_relocations: Vec<Relocation>, // library relocations sections: Vec<elf::Shdr32>, sources: Vec<String>, - bss_size: u64, // output bss size - bss_addr: u64, // output bss address - data_addr: u64, // output data address + bss_size: u64, // output bss size + bss_addr: u64, // output bss address + data_addr: u64, // output data address symbol_data_offsets: HashMap<SymbolId, u64>, // for symbols with data, this holds the offsets into the data segment. warn: fn(LinkWarning), } @@ -363,8 +367,6 @@ impl AddrMap { // this is needed so we don't emit anything for unused symbols. type SymbolGraph = HashMap<SymbolId, Vec<SymbolId>>; -const MAX_REL_SIZE: usize = 8; // this seems reasonable - struct Executable { interp: Vec<u8>, load_addr: u64, @@ -387,32 +389,31 @@ impl Executable { strtab: vec![0], } } - + pub fn set_bss(&mut self, addr: u64, size: u64) { self.bss = Some((addr, size)); } - + pub fn set_interp(&mut self, interp: &str) { self.interp = interp.as_bytes().into(); self.interp.push(b'\0'); } - - + fn add_string(&mut self, s: &str) -> u64 { let ret = self.strtab.len() as u64; self.strtab.extend(s.as_bytes()); self.strtab.push(b'\0'); ret } - + pub fn add_lib(&mut self, lib: &str) { let s = self.add_string(lib); self.lib_strtab_offsets.push(s); } - + pub fn add_relocation(&mut self, symbol_names: &SymbolNames, rel: &Relocation, addr: u64) { let name = rel.sym; - + if self.symbol_strtab_offsets.get(&name).is_none() { let s = symbol_names.get_str(name).unwrap(); let offset = self.add_string(s); @@ -420,7 +421,7 @@ impl Executable { } self.relocations.push((rel.clone(), addr)); } - + fn segment_count(&self) -> u16 { let mut count = 1 /*data*/; if !self.interp.is_empty() { @@ -431,30 +432,30 @@ impl Executable { } count } - + fn ph_offset(&self) -> u64 { elf::Ehdr32::size_of() as u64 } - + fn ph_size(&self) -> u64 { elf::Phdr32::size_of() as u64 * u64::from(self.segment_count()) } - + fn data_offset(&self) -> u64 { self.ph_offset() + self.ph_size() } - + pub fn data_addr(&self) -> u64 { self.load_addr + self.data_offset() } - + pub fn write<T: Write + Seek>(&self, data: &[u8], out: &mut T) -> LinkResult<()> { let load_addr = self.load_addr as u32; - + // start by writing data. out.seek(io::SeekFrom::Start(self.data_offset()))?; out.write_all(data)?; - + let mut interp_offset = 0; let mut dyntab_offset = 0; let mut interp_size = 0; @@ -503,27 +504,33 @@ impl Executable { out.write_all(&u32::to_le_bytes(1))?; // nbucket out.write_all(&u32::to_le_bytes(nsymbols + 1))?; // nchain out.write_all(&u32::to_le_bytes(0))?; // bucket begins at 0 - // chain 1 -> 2 -> 3 -> ... -> n -> 0 + // chain 1 -> 2 -> 3 -> ... -> n -> 0 for i in 1..nsymbols { out.write_all(&u32::to_le_bytes(i))?; } out.write_all(&u32::to_le_bytes(0))?; // i don't know why this needs to be here. out.write_all(&u32::to_le_bytes(0))?; - - - + // now dyntab dyntab_offset = out.stream_position()?; let mut dyn_data = vec![ - elf::DT_RELSZ, reltab_size as u32, - elf::DT_RELENT, 8, - elf::DT_REL, load_addr + reltab_offset as u32, - elf::DT_STRSZ, self.strtab.len() as u32, - elf::DT_STRTAB, load_addr + strtab_offset as u32, - elf::DT_SYMENT, 16, - elf::DT_SYMTAB, load_addr + symtab_offset as u32, - elf::DT_HASH, load_addr + hashtab_offset as u32, + elf::DT_RELSZ, + reltab_size as u32, + elf::DT_RELENT, + 8, + elf::DT_REL, + load_addr + reltab_offset as u32, + elf::DT_STRSZ, + self.strtab.len() as u32, + elf::DT_STRTAB, + load_addr + strtab_offset as u32, + elf::DT_SYMENT, + 16, + elf::DT_SYMTAB, + load_addr + symtab_offset as u32, + elf::DT_HASH, + load_addr + hashtab_offset as u32, ]; for lib in &self.lib_strtab_offsets { dyn_data.extend([elf::DT_NEEDED, *lib as u32]); @@ -536,17 +543,25 @@ impl Executable { dyntab_size = dyn_bytes.len() as u32; out.write_all(&dyn_bytes)?; } - - let file_size: u32 = out.stream_position()?.try_into().map_err(|_| LinkError::TooLarge)?; - + + let file_size: u32 = out + .stream_position()? + .try_into() + .map_err(|_| LinkError::TooLarge)?; + out.seek(io::SeekFrom::Start(0))?; - - let mut ehdr = elf::Ehdr32::default(); - ehdr.phnum = self.segment_count(); - ehdr.phoff = elf::Ehdr32::size_of() as u32; - ehdr.entry = self.data_addr().try_into().map_err(|_| LinkError::TooLarge)?; + + let ehdr = elf::Ehdr32 { + phnum: self.segment_count(), + phoff: elf::Ehdr32::size_of() as u32, + entry: self + .data_addr() + .try_into() + .map_err(|_| LinkError::TooLarge)?, + ..Default::default() + }; out.write_all(&ehdr.to_bytes())?; - + let phdr_data = elf::Phdr32 { flags: elf::PF_R | elf::PF_W | elf::PF_X, // read, write, execute offset: 0, @@ -556,7 +571,7 @@ impl Executable { ..Default::default() }; out.write_all(&phdr_data.to_bytes())?; - + if let Some((bss_addr, bss_size)) = self.bss { // for some reason, linux doesn't like executables // with memsz > filesz != 0 @@ -572,7 +587,7 @@ impl Executable { }; out.write_all(&phdr_bss.to_bytes())?; } - + if !self.interp.is_empty() { let phdr_interp = elf::Phdr32 { r#type: elf::PT_INTERP, @@ -585,7 +600,7 @@ impl Executable { ..Default::default() }; out.write_all(&phdr_interp.to_bytes())?; - + let phdr_dynamic = elf::Phdr32 { r#type: elf::PT_DYNAMIC, flags: elf::PF_R, @@ -598,7 +613,7 @@ impl Executable { }; out.write_all(&phdr_dynamic.to_bytes())?; } - + Ok(()) } } @@ -628,6 +643,7 @@ impl Linker { data_addr: 0, sections: vec![], relocations: vec![], + undefined_relocations: vec![], sources: vec![], symbol_data_offsets: HashMap::new(), warn: Self::default_warn_handler, @@ -967,21 +983,32 @@ impl Linker { fn get_symbol_value(&self, sym: SymbolId) -> Option<u64> { let info = self.symbols.get_info_from_id(sym)?; use SymbolValue::*; - match (&info.value).as_ref()? { - Data(_) => self.symbol_data_offsets.get(&sym).map(|r| *r + self.data_addr), + match info.value.as_ref()? { + Data(_) => self + .symbol_data_offsets + .get(&sym) + .map(|&o| o + self.data_addr), Bss(x) => Some(self.bss_addr + *x), Absolute(a) => Some(*a), } } - fn get_relocation_data( - &mut self, - rel: &Relocation, - pc: u64, - data: &mut [u8; MAX_REL_SIZE], - ) -> Result<Option<usize>, LinkError> { + fn get_rel_apply_data_offset(&self, rel: &Relocation) -> Option<u64> { + let apply_symbol = rel.r#where.0; + let r = self.symbol_data_offsets.get(&apply_symbol)?; + Some(*r + rel.r#where.1) + } + + fn apply_relocation(&mut self, rel: Relocation, data: &mut [u8]) -> Result<(), LinkError> { + let apply_symbol = rel.r#where.0; + let apply_offset = match self.get_rel_apply_data_offset(&rel) { + Some(data_offset) => data_offset, + None => return Ok(()), // this relocation isn't in a data section so there's nothing we can do about it + }; + let pc = apply_offset + self.data_addr; + let symbol = match self.get_symbol_id(rel.source_id, rel.sym) { - None => return Ok(Some(0)), // we emitted a warning in get_symbol_id + None => return Ok(()), // we emitted a warning in get_symbol_id Some(sym) => sym, }; @@ -989,54 +1016,24 @@ impl Linker { None => { // this symbol is defined in a library //self.emit_warning(LinkWarning::RelNoValue(self.symbol_id_location_string(symbol))); - return Ok(None) - }, + self.undefined_relocations.push(rel); + return Ok(()); + } Some(v) => v, }; - + let addend = rel.addend; - + enum Value { U32(u32), } - use Value::*; use RelocationType::*; - + use Value::*; + let value = match rel.r#type { Direct32 => U32(symbol_value as u32 + addend as u32), Pc32 => U32(symbol_value as u32 + addend as u32 - pc as u32), }; - - match value { - U32(u) => { - (&mut data[..4]).copy_from_slice(&u32::to_le_bytes(u)); - Ok(Some(4)) - }, - } - } - - fn get_rel_apply_data_offset(&self, rel: &Relocation) -> Option<u64> { - let apply_symbol = rel.r#where.0; - let r = self.symbol_data_offsets.get(&apply_symbol)?; - Some(*r + rel.r#where.1) - } - - fn apply_relocation(&mut self, rel: &Relocation, data: &mut [u8]) -> Result<(), LinkError> { - let apply_symbol = rel.r#where.0; - let apply_offset = match self.get_rel_apply_data_offset(&rel) { - Some(data_offset) => data_offset, - None => return Ok(()) // this relocation isn't in a data section so there's nothing we can do about it - }; - let apply_addr = apply_offset + self.data_addr; - - let mut rel_data = [0; MAX_REL_SIZE]; - let rel_data_size = self.get_relocation_data(&rel, apply_addr, &mut rel_data)?; - let rel_data_size = match rel_data_size { - None => return Ok(()), // defined in a library - Some(s) => s, - }; - - let rel_data = &rel_data[..rel_data_size]; let apply_symbol_info = match self.symbols.get_mut_info_from_id(apply_symbol) { Some(info) => info, @@ -1048,15 +1045,29 @@ impl Linker { }; use SymbolValue::*; - let mut oob = false; + + // guarantee failure if apply_offset can't be converted to usize. + let apply_start = apply_offset.try_into().unwrap_or(usize::MAX - 32); + match apply_symbol_info.value { Some(Data(_)) => { - let apply_start = apply_offset as usize; - let apply_end = apply_start + rel_data.len(); - if apply_end < apply_start || apply_end > data.len() { - oob = true; - } else { - (&mut data[apply_start..apply_end]).copy_from_slice(rel_data); + let mut in_bounds = true; + match value { + U32(u) => { + if let Some(apply_to) = data.get_mut(apply_start..apply_start + 4) { + let curr_val = u32_from_le_slice(apply_to); + apply_to.copy_from_slice(&(u + curr_val).to_le_bytes()); + } else { + in_bounds = false; + } + } + }; + + if !in_bounds { + self.emit_warning(LinkWarning::RelOOB( + self.symbol_id_location_string(apply_symbol), + apply_offset, + )); } } _ => { @@ -1067,14 +1078,6 @@ impl Linker { } } - if oob { - // prevent double mut borrow by moving this here - self.emit_warning(LinkWarning::RelOOB( - self.symbol_id_location_string(apply_symbol), - apply_offset, - )); - } - Ok(()) } @@ -1090,28 +1093,24 @@ impl Linker { if self.symbol_data_offsets.contains_key(&id) { return Ok(()); } - + if let Some(info) = self.symbols.get_info_from_id(id) { - match &info.value { - Some(SymbolValue::Data(d)) => { - // set address - self.symbol_data_offsets - .insert(id, data.len() as u64); - // add data - data.extend(d); - } - _ => {}, + if let Some(SymbolValue::Data(d)) = &info.value { + // set address + self.symbol_data_offsets.insert(id, data.len() as u64); + // add data + data.extend(d); } } - + for reference in symbol_graph.get(&id).unwrap_or(&vec![]) { self.add_data_for_symbol(data, symbol_graph, *reference)?; } Ok(()) } - - pub fn link<T: Write + Seek>(&mut self, out: &mut BufWriter<T>) -> LinkResult<()> { + + pub fn link<T: Write + Seek>(mut self, out: &mut BufWriter<T>) -> LinkResult<()> { let mut symbol_graph = SymbolGraph::with_capacity(self.symbols.count()); let relocations = mem::take(&mut self.relocations); @@ -1139,8 +1138,7 @@ impl Linker { exec.set_bss(self.bss_addr, self.bss_size); exec.set_interp("/lib/ld-linux.so.2"); exec.add_lib("libc.so.6"); - - + self.data_addr = exec.data_addr(); let entry_name_str = "main"; @@ -1156,19 +1154,16 @@ impl Linker { let mut data = vec![]; self.add_data_for_symbol(&mut data, &symbol_graph, entry_id)?; - for rel in relocations.iter() { - self.apply_relocation(&rel, &mut data)?; - } - for rel in relocations { - let mut _data = [0; MAX_REL_SIZE]; - if self.get_relocation_data(&rel, 0, &mut _data)?.is_none() { - if let Some(data_offset) = self.get_rel_apply_data_offset(&rel) { - exec.add_relocation(&self.symbol_names, &rel, self.data_addr + data_offset); - } + self.apply_relocation(rel, &mut data)?; + } + + for rel in mem::take(&mut self.undefined_relocations) { + if let Some(data_offset) = self.get_rel_apply_data_offset(&rel) { + exec.add_relocation(&self.symbol_names, &rel, self.data_addr + data_offset); } } - + exec.write(&data, out) } } diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..d8ab9c8 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,3 @@ +pub fn u32_from_le_slice(data: &[u8]) -> u32 { + u32::from_le_bytes([data[0], data[1], data[2], data[3]]) +} |