diff options
Diffstat (limited to 'src/main.rs')
-rw-r--r-- | src/main.rs | 213 |
1 files changed, 133 insertions, 80 deletions
diff --git a/src/main.rs b/src/main.rs index d5b4bb7..53d0cf7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,8 @@ mod elf; pub enum LinkError { IO(io::Error), TooLarge, + NoEntry(String), // no entry point + EntryNotDefined(String), // entry point is declared, but not defined } impl fmt::Display for LinkError { @@ -16,6 +18,8 @@ impl fmt::Display for LinkError { match self { IO(e) => write!(f, "IO error: {e}"), TooLarge => write!(f, "executable file would be too large."), + NoEntry(name) => write!(f, "entry point {name} not found."), + EntryNotDefined(name) => write!(f, "entry point {name} declared, but not defined."), } } } @@ -42,7 +46,10 @@ impl fmt::Display for LinkWarning { use LinkWarning::*; match self { SymNotFound(s) => write!(f, "symbol not found: {s}"), - RelocationIgnored(offset) => write!(f, "offset {offset} not in a data/text section. relocation will not be applied."), + RelocationIgnored(offset) => write!( + f, + "offset {offset} not in a data/text section. relocation will not be applied." + ), } } } @@ -155,6 +162,13 @@ impl SymbolNames { #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] struct SourceId(u32); +impl SourceId { + const NONE: Self = Self(u32::MAX); +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +struct SymbolId(u32); + #[derive(Copy, Clone, Debug)] enum SymbolType { Function, @@ -162,10 +176,10 @@ enum SymbolType { Other, } -#[derive(Copy, Clone, Debug)] +#[derive(Debug)] enum SymbolValue { Bss(u64), - Data(usize), // index into Linker.symbol_data + Data(Vec<u8>), Absolute(u64), } @@ -178,37 +192,61 @@ struct SymbolInfo { } struct Symbols { - global: HashMap<SymbolName, SymbolInfo>, - weak: HashMap<SymbolName, SymbolInfo>, - local: HashMap<(SourceId, SymbolName), SymbolInfo>, + info: Vec<SymbolInfo>, + global: HashMap<SymbolName, SymbolId>, + weak: HashMap<SymbolName, SymbolId>, + local: HashMap<(SourceId, SymbolName), SymbolId>, } impl Symbols { fn new() -> Self { Self { + info: vec![], global: HashMap::new(), weak: HashMap::new(), local: HashMap::new(), } } - fn add_weak(&mut self, name: SymbolName, info: SymbolInfo) { - self.weak.insert(name, info); + fn add_(&mut self, info: SymbolInfo) -> SymbolId { + let id = SymbolId(self.info.len() as _); + self.info.push(info); + id + } + + fn add_weak(&mut self, name: SymbolName, info: SymbolInfo) -> SymbolId { + let id = self.add_(info); + self.weak.insert(name, id); + id } - fn add_local(&mut self, source: SourceId, name: SymbolName, info: SymbolInfo) { - self.local.insert((source, name), info); + fn add_local(&mut self, source: SourceId, name: SymbolName, info: SymbolInfo) -> SymbolId { + let id = self.add_(info); + self.local.insert((source, name), id); + id } - fn add_global(&mut self, name: SymbolName, info: SymbolInfo) { - self.global.insert(name, info); + fn add_global(&mut self, name: SymbolName, info: SymbolInfo) -> SymbolId { + let id = self.add_(info); + self.global.insert(name, id); + id } - fn get(&self, source: SourceId, name: SymbolName) -> Option<&SymbolInfo> { - self.local + fn get_info_from_id(&self, id: SymbolId) -> Option<&SymbolInfo> { + self.info.get(id.0 as usize) + } + + fn get_id_from_name(&self, source: SourceId, name: SymbolName) -> Option<SymbolId> { + self + .local .get(&(source, name)) .or_else(|| self.global.get(&name)) .or_else(|| self.weak.get(&name)) + .map(|r| *r) + } + + fn get_info_from_name(&self, source: SourceId, name: SymbolName) -> Option<&SymbolInfo> { + self.get_info_from_id(self.get_id_from_name(source, name)?) } } @@ -237,7 +275,7 @@ impl RelocationType { #[derive(Debug, Clone)] #[allow(dead_code)] // @TODO @TEMPORARY struct Relocation { - data_idx: usize, + r#where: (SymbolId, u64), // (symbol containing relocation, offset in symbol where relocation needs to be applied) source_id: SourceId, sym: SymbolName, r#type: RelocationType, @@ -252,14 +290,13 @@ struct Linker { relocations: Vec<Relocation>, sections: Vec<elf::Shdr32>, warnings: Vec<LinkWarning>, - symbol_data: Vec<u8>, bss_size: u64, } -// this maps between offsets in this object and indices in self.symbol_data. -// (needed to translate relocation addresses to symbol_data offsets.) +// this maps between offsets in an object file and symbols defined in that file. +// this is used to figure out where relocations are taking place. struct AddrMap { - map: BTreeMap<(u64, u64), usize>, + map: BTreeMap<(u64, u64), SymbolId>, } impl AddrMap { @@ -269,18 +306,20 @@ impl AddrMap { } } - fn add_symbol(&mut self, offset: u64, size: u64, data_idx: usize) { + fn add_symbol(&mut self, offset: u64, size: u64, id: SymbolId) { if size > 0 { - self.map.insert((offset, offset + size), data_idx); + self.map.insert((offset, offset + size), id); } } - fn offset_to_data_idx(&self, offset: u64) -> Option<usize> { + // returns symbol, offset in symbol. + // e.g. a relocation might happen at main+0x33. + fn get(&self, offset: u64) -> Option<(SymbolId, u64)> { let mut r = self.map.range(..(offset, u64::MAX)); let (key, value) = r.next_back()?; if offset >= key.0 && offset < key.1 { // offset corresponds to somewhere in this symbol - Some(*value + (offset - key.0) as usize) + Some((*value, offset - key.0)) } else { None } @@ -297,7 +336,6 @@ impl Linker { bss_size: 0, sections: vec![], relocations: vec![], - symbol_data: vec![], warnings: vec![], } } @@ -336,40 +374,29 @@ impl Linker { let name_id = self.symbol_names.add(name); let size = sym.size as u64; - const STT_OBJECT: u8 = 1; - const STT_FUNC: u8 = 2; - const STB_LOCAL: u8 = 0; - const STB_GLOBAL: u8 = 1; - const STB_WEAK: u8 = 2; - const SHN_UNDEF: u16 = 0; - const SHN_ABS: u16 = 0xfff1; - const SHN_COMMON: u16 = 0xfff2; - let r#type = match r#type { - STT_OBJECT => SymbolType::Object, - STT_FUNC => SymbolType::Function, + elf::STT_OBJECT => SymbolType::Object, + elf::STT_FUNC => SymbolType::Function, _ => SymbolType::Other, }; + let mut data_offset = None; + let value = match sym.shndx { - SHN_UNDEF | SHN_COMMON => None, - SHN_ABS => Some(SymbolValue::Absolute(sym.value as u64)), + elf::SHN_UNDEF | elf::SHN_COMMON => None, + elf::SHN_ABS => Some(SymbolValue::Absolute(sym.value as u64)), ndx if (ndx as usize) < self.sections.len() => { let ndx = ndx as usize; - match self.get_str(reader, self.sections[ndx].name)?.as_str() { - ".text" | ".data" | ".data1" | ".rodata" | ".rodata1" => { - // add to symbol_data - let data_idx = self.symbol_data.len(); + match self.sections[ndx].r#type { + elf::SHT_PROGBITS => { let offset = self.sections[ndx].offset as u64 + sym.value as u64; - - addr_map.add_symbol(offset, size, data_idx); - + data_offset = Some(offset); reader.seek(io::SeekFrom::Start(offset))?; - self.symbol_data.resize(data_idx + size as usize, 0); - reader.read_exact(&mut self.symbol_data[data_idx..])?; - Some(SymbolValue::Data(data_idx)) + let mut data = vec![0; size as usize]; + reader.read_exact(&mut data)?; + Some(SymbolValue::Data(data)) } - ".bss" => { + elf::SHT_NOBITS => { let p = self.bss_size; self.bss_size += size; Some(SymbolValue::Bss(p)) @@ -385,16 +412,19 @@ impl Linker { value, size, }; - match bind { - STB_LOCAL => self.symbols.add_local(source, name_id, info), - STB_GLOBAL => self.symbols.add_global(name_id, info), - STB_WEAK => self.symbols.add_weak(name_id, info), - _ => {} - } + let symbol_id = match bind { + elf::STB_LOCAL => self.symbols.add_local(source, name_id, info), + elf::STB_GLOBAL => self.symbols.add_global(name_id, info), + elf::STB_WEAK => self.symbols.add_weak(name_id, info), + _ => return Ok(name_id), + }; + if let Some(offset) = data_offset { + addr_map.add_symbol(offset, size, symbol_id); + } Ok(name_id) } - + fn add_relocation_x86( &mut self, symtab: &HashMap<u32, SymbolName>, @@ -407,17 +437,17 @@ impl Linker { let r#type = info as u8; let sym_idx = info >> 8; - if let Some(data_idx) = addr_map.offset_to_data_idx(offset) { + if let Some(r#where) = addr_map.get(offset) { match symtab.get(&sym_idx) { Some(sym) => { self.relocations.push(Relocation { - data_idx, + r#where, source_id, sym: *sym, r#type: RelocationType::from_x86_u8(r#type)?, addend: addend.into(), }); - }, + } None => return Err(ElfError::BadSymIdx(sym_idx.into())), } } else { @@ -562,10 +592,8 @@ impl Linker { ) }; - const SHT_RELA: u32 = 4; - const SHT_REL: u32 = 9; match shdr.r#type { - SHT_RELA => { + elf::SHT_RELA => { #[repr(C)] struct ElfRela { offset: u32, @@ -582,7 +610,7 @@ impl Linker { )?; } } - SHT_REL => { + elf::SHT_REL => { #[repr(C)] struct ElfRel { offset: u32, @@ -600,15 +628,15 @@ impl Linker { Ok(()) } - fn get_sym_name(&self, id: SymbolName) -> Option<&str> { + fn get_name_str(&self, id: SymbolName) -> Option<&str> { self.symbol_names.get_str(id) } // get symbol, producing a warning if it does not exist. - fn get_symbol(&mut self, source_id: SourceId, id: SymbolName) -> Option<&SymbolInfo> { - let sym = self.symbols.get(source_id, id); + fn get_symbol(&mut self, source_id: SourceId, name: SymbolName) -> Option<&SymbolInfo> { + let sym = self.symbols.get_info_from_name(source_id, name); if sym.is_none() { - let warn = LinkWarning::SymNotFound(self.get_sym_name(id).unwrap_or("???").into()); + let warn = LinkWarning::SymNotFound(self.get_name_str(name).unwrap_or("???").into()); self.warnings.push(warn); } sym @@ -622,6 +650,22 @@ impl Linker { println!("{rel:?} {symbol:?}"); Ok(()) } + + // we don't want to link unused symbols. + // we start by calling this on the entry function, then it recursively calls itself for each symbol used. + pub fn add_data_for_symbol(&mut self, data: &mut Vec<u8>, symbol_graph: &HashMap<SymbolId, Vec<SymbolId>>, + symbol_addrs: &mut HashMap<SymbolId, u64>, id: SymbolId) -> Result<(), LinkError> { + // deal with cycles + if symbol_addrs.contains_key(&id) { + return Ok(()); + } + symbol_addrs.insert(id, data.len() as u64); + for reference in symbol_graph.get(&id).unwrap_or(&vec![]) { + self.add_data_for_symbol(data, symbol_graph, symbol_addrs, *reference)?; + } + + Ok(()) + } pub fn link<T: Write>( &mut self, @@ -647,19 +691,26 @@ impl Linker { header.phoff = ehdr_size; header.entry = entry_point; out.write_all(&header.to_bytes())?; - - let data_addr = segment_addr + header_size; - let bss_addr = segment_addr + file_size; - let bss_size: u32 = self.bss_size.try_into().map_err(|_| LinkError::TooLarge)?; - let _get_symbol_value = |val: SymbolValue| -> u64 { - use SymbolValue::*; - match val { - Absolute(n) => n, - Bss(x) => bss_addr as u64 + x, - Data(d) => data_addr as u64 + d as u64, - } - }; + //let data_addr = segment_addr + header_size; + //let bss_addr = segment_addr + file_size; + let bss_size: u32 = self.bss_size.try_into().map_err(|_| LinkError::TooLarge)?; + + let entry_name_str = "entry"; + let entry_name_id = self.symbol_names.get(entry_name_str).ok_or_else(|| LinkError::NoEntry(entry_name_str.into()))?; + let entry_id = self.symbols.get_id_from_name(SourceId::NONE, entry_name_id).ok_or_else(|| LinkError::EntryNotDefined(entry_name_str.into()))?; + let mut symbol_addrs = HashMap::new(); + + self.add_data_for_symbol(&mut symbol_addrs, entry_id); +// +// let _get_symbol_value = |val: SymbolValue| -> u64 { +// use SymbolValue::*; +// match val { +// Absolute(n) => n, +// Bss(x) => bss_addr as u64 + x, +// Data(_d) => todo!(), +// } +// }; let phdr = elf::Phdr32 { flags: 0b111, // read, write, execute @@ -739,8 +790,10 @@ fn main() { match linker.link(&mut output) { Err(e) => eprintln!("Error linking: {e}"), - Ok(warnings) => for warning in warnings { + Ok(warnings) => { + for warning in warnings { eprintln!("Warning: {warning}"); - }, + } + } } } |