From aad605d8a0c60d301a768d616e360fbb72d68743 Mon Sep 17 00:00:00 2001 From: pommicket Date: Sun, 6 Nov 2022 19:01:24 -0500 Subject: move things around so link takes &self instead of self --- src/linker.rs | 96 +++++++++++++++++++++++++++++++---------------------------- src/main.rs | 10 +++++++ 2 files changed, 61 insertions(+), 45 deletions(-) diff --git a/src/linker.rs b/src/linker.rs index 99d9edc..7120034 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -241,7 +241,7 @@ impl Symbols { self.global.insert(name, id); id } - + fn get_info_from_id(&self, id: SymbolId) -> &SymbolInfo { // Self::add_ is the only function that constructs SymbolIds. // unless someone uses a SymbolId across Symbols instances (why would you do that), @@ -291,12 +291,11 @@ pub struct Linker<'a> { symbols: Symbols, symbol_names: SymbolNames, relocations: Vec, - sources: Vec, // object files + sources: Vec, // object files libraries: Vec, - bss_size: u64, // output bss size - bss_addr: u64, // output bss address - data_addr: u64, // output data address - symbol_data_offsets: HashMap, // for symbols with data, this holds the offsets into the data segment. + /// Output bss size. + /// As more objects are added, this grows. + bss_size: u64, warn: Box, } @@ -337,21 +336,27 @@ impl SymbolOffsetMap { // this is needed so we don't emit anything for unused symbols. type SymbolGraph = HashMap>; -struct Executable { +struct LinkerOutput { + /// for symbols with data, this holds the offsets into the data segment. + symbol_data_offsets: HashMap, interp: Vec, load_addr: u64, bss: Option<(u64, u64)>, + /// these bytes will make up the text+data section of our executable. + data: Vec, relocations: Vec<(Relocation, u64)>, strtab: Vec, symbol_strtab_offsets: HashMap, lib_strtab_offsets: Vec, } -impl Executable { +impl LinkerOutput { pub fn new(load_addr: u64) -> Self { Self { + symbol_data_offsets: HashMap::new(), bss: None, load_addr, + data: vec![], interp: vec![], relocations: vec![], lib_strtab_offsets: vec![], @@ -419,12 +424,16 @@ impl Executable { self.load_addr + self.data_offset() } - pub fn write(&self, data: &[u8], mut out: impl Write + Seek) -> LinkResult<()> { + pub fn bss_addr(&self) -> Option { + self.bss.map(|(a, _)| a) + } + + pub fn write(&self, mut out: impl Write + Seek) -> LinkResult<()> { let load_addr = self.load_addr as u32; // start by writing data. out.seek(io::SeekFrom::Start(self.data_offset()))?; - out.write_all(data)?; + out.write_all(&self.data)?; let mut interp_offset = 0; let mut dyntab_offset = 0; @@ -603,13 +612,10 @@ impl<'a> Linker<'a> { Linker { symbols: Symbols::new(), symbol_names: SymbolNames::new(), - bss_addr: 0, bss_size: 0, - data_addr: 0, relocations: vec![], sources: vec![], libraries: vec![], - symbol_data_offsets: HashMap::new(), warn: Box::new(Self::default_warn_handler), } } @@ -746,40 +752,44 @@ impl<'a> Linker<'a> { } /// Get value of symbol (e.g. ID of main → address of main). - fn get_symbol_value(&self, sym: SymbolId) -> u64 { + fn get_symbol_value(&self, exec: &LinkerOutput, sym: SymbolId) -> u64 { let info = self.symbols.get_info_from_id(sym); use SymbolValue::*; match info.value { Data(_) => { - self + exec .symbol_data_offsets .get(&sym) .unwrap() // @TODO: can this panic? - + self.data_addr + + exec.data_addr() + } + Bss(x) => { + // this shouldn't panic, since we always generate a bss section + // @TODO: make bss optional + exec.bss_addr().expect("no bss") + x } - Bss(x) => self.bss_addr + x, Absolute(a) => a, } } /// Get offset in data section where relocation should be applied. - fn get_rel_apply_data_offset(&self, rel: &Relocation) -> Option { + fn get_rel_apply_data_offset(&self, exec: &LinkerOutput, rel: &Relocation) -> Option { let apply_symbol = rel.r#where.0; - let r = self.symbol_data_offsets.get(&apply_symbol)?; + let r = exec.symbol_data_offsets.get(&apply_symbol)?; Some(*r + rel.r#where.1) } /// Apply relocation to data. /// Returns `Ok(true)` if the relocation was dealt with, and /// `Ok(false)` if the symbol is not defined (so it needs to be loaded from a dynamic library). - fn apply_relocation(&self, rel: &Relocation, data: &mut [u8]) -> LinkResult { + fn apply_relocation(&self, exec: &mut LinkerOutput, rel: &Relocation) -> LinkResult { let apply_symbol = rel.r#where.0; - let apply_offset = match self.get_rel_apply_data_offset(&rel) { + let apply_offset = match self.get_rel_apply_data_offset(exec, &rel) { Some(data_offset) => data_offset, None => return Ok(true), // this relocation isn't in a data section so there's nothing we can do about it }; - let pc = apply_offset + self.data_addr; - + let pc = apply_offset + exec.data_addr(); + let symbol = match self.get_symbol_id(rel.source_id, rel.sym) { None => { // symbol not defined. it should come from a library. @@ -788,7 +798,7 @@ impl<'a> Linker<'a> { Some(sym) => sym, }; - let symbol_value = self.get_symbol_value(symbol); + let symbol_value = self.get_symbol_value(exec, symbol); let addend = rel.addend; @@ -819,7 +829,7 @@ impl<'a> Linker<'a> { let mut in_bounds = true; match value { U32(u) => { - if let Some(apply_to) = data.get_mut(apply_start..apply_start + 4) { + if let Some(apply_to) = exec.data.get_mut(apply_start..apply_start + 4) { let curr_val = u32_from_le_slice(apply_to); apply_to.copy_from_slice(&(u + curr_val).to_le_bytes()); } else { @@ -889,26 +899,26 @@ impl<'a> Linker<'a> { // we don't want to link unused symbols. // we start by calling this on the entry function, then it recursively calls itself for each symbol used. fn add_data_for_symbol( - &mut self, - data: &mut Vec, + &self, + exec: &mut LinkerOutput, symbol_graph: &SymbolGraph, id: SymbolId, ) -> Result<(), LinkError> { // deal with cycles - if self.symbol_data_offsets.contains_key(&id) { + if exec.symbol_data_offsets.contains_key(&id) { return Ok(()); } let info = self.symbols.get_info_from_id(id); if let SymbolValue::Data(d) = &info.value { // set address - self.symbol_data_offsets.insert(id, data.len() as u64); + exec.symbol_data_offsets.insert(id, exec.data.len() as u64); // add data - data.extend(d); + exec.data.extend(d); } for reference in symbol_graph.get(&id).unwrap_or(&vec![]) { - self.add_data_for_symbol(data, symbol_graph, *reference)?; + self.add_data_for_symbol(exec, symbol_graph, *reference)?; } Ok(()) @@ -917,7 +927,7 @@ impl<'a> Linker<'a> { /// Link everything together. /// Currently this drops `self` (you probably don't need to link multiple times). /// That might change in a future version. - pub fn link(mut self, out: impl Write + Seek, entry: &str) -> LinkResult<()> { + pub fn link(&self, out: impl Write + Seek, entry: &str) -> LinkResult<()> { let mut symbol_graph = SymbolGraph::with_capacity(self.symbols.count()); // compute symbol graph @@ -938,16 +948,13 @@ impl<'a> Linker<'a> { let symbol_graph = symbol_graph; // no more mutating - let mut exec = Executable::new(0x400000); - self.bss_addr = 0x50000000; - exec.set_bss(self.bss_addr, self.bss_size); + let mut exec = LinkerOutput::new(0x400000); + exec.set_bss(0x70000000, self.bss_size); exec.set_interp("/lib/ld-linux.so.2"); for lib in self.libraries.iter() { exec.add_lib(lib); } - self.data_addr = exec.data_addr(); - let entry_name_id = self .symbol_names .get(entry) @@ -957,23 +964,22 @@ impl<'a> Linker<'a> { .get_id_from_name(SourceId::NONE, entry_name_id) .ok_or_else(|| LinkError::EntryNotDefined(entry.into()))?; - let mut data = vec![]; - self.add_data_for_symbol(&mut data, &symbol_graph, entry_id)?; + self.add_data_for_symbol(&mut exec, &symbol_graph, entry_id)?; for rel in self.relocations.iter() { - if !self.apply_relocation(rel, &mut data)? { + if !self.apply_relocation(&mut exec, rel)? { // dynamic library relocation - if let Some(data_offset) = self.get_rel_apply_data_offset(rel) { - exec.add_relocation(&self.symbol_names, rel, self.data_addr + data_offset); + if let Some(data_offset) = self.get_rel_apply_data_offset(&exec, rel) { + exec.add_relocation(&self.symbol_names, rel, exec.data_addr() + data_offset); } } } - - exec.write(&data, out) + + exec.write(out) } /// Easy linking API. Just provide a path. - pub fn link_to_file(self, path: impl AsRef, entry: &str) -> Result<(), String> { + pub fn link_to_file(&self, path: impl AsRef, entry: &str) -> Result<(), String> { let path = path.as_ref(); let mut out_options = fs::OpenOptions::new(); out_options.write(true).create(true).truncate(true); diff --git a/src/main.rs b/src/main.rs index c6c45a8..9ad34e3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,13 @@ +/* +@TODO: +- move symbol_data_offsets stuff inside LinkerOutput +- bounds check on bss +- make bss optional +- finish docs +- make sure --no-stdlib generates a tiny executable +- make executables more tiny (overlap sections, etc.) +*/ + // you will need gcc-multilib to compile a 32-bit executable (with stdlib) // you need to use -fno-pic with gcc -- got,plt relocations aren't supported // and also make the executable bigger. -- cgit v1.2.3