diff options
author | pommicket <pommicket@gmail.com> | 2022-11-05 15:46:57 -0400 |
---|---|---|
committer | pommicket <pommicket@gmail.com> | 2022-11-05 15:46:57 -0400 |
commit | d0704ccd705242c5da61a43f029f39e6562028d7 (patch) | |
tree | 66568c17f7973356766138b2f207bedacc04f9c7 | |
parent | bae6a477b04f7a767678c88089e28dd9e5c5b879 (diff) |
a lot of reoganizing. not working yet.
-rw-r--r-- | src/elf.rs | 487 | ||||
-rw-r--r-- | src/main.rs | 405 |
2 files changed, 548 insertions, 344 deletions
@@ -1,6 +1,7 @@ // basic ELF types and constants -use std::{io, mem}; +use std::{io, mem, fmt}; +use io::{BufRead, Seek}; pub trait ToBytes<const N: usize> { fn to_bytes(self) -> [u8; N]; @@ -10,6 +11,8 @@ pub trait FromBytes<const N: usize> { fn from_bytes(bytes: [u8; N]) -> Self; } +// @TODO: make all of these constants private + // executable type pub const ET_REL: u16 = 1; pub const ET_EXEC: u16 = 2; @@ -35,28 +38,18 @@ pub const DT_RELENT: u32 = 19; pub const PT_DYNAMIC: u32 = 2; pub const PT_INTERP: u32 = 3; -#[allow(unused)] pub const SHT_PROGBITS: u32 = 1; // Program data -#[allow(unused)] pub const SHT_SYMTAB: u32 = 2; // Symbol table -#[allow(unused)] -pub const SHT_STRTAB: u32 = 3; // String table -#[allow(unused)] +//pub const SHT_STRTAB: u32 = 3; // String table pub const SHT_RELA: u32 = 4; // Relocation entries with addends -#[allow(unused)] -pub const SHT_HASH: u32 = 5; // Symbol hash table -#[allow(unused)] -pub const SHT_DYNAMIC: u32 = 6; // Dynamic linking information -#[allow(unused)] -pub const SHT_NOTE: u32 = 7; // Notes -#[allow(unused)] +//pub const SHT_DYNAMIC: u32 = 6; // Dynamic linking information pub const SHT_NOBITS: u32 = 8; // Program space with no data (bss) -#[allow(unused)] pub const SHT_REL: u32 = 9; // Relocation entries, no addends // symbol type pub const STT_OBJECT: u8 = 1; pub const STT_FUNC: u8 = 2; +pub const STT_SECTION: u8 = 3; // symbol bind pub const STB_LOCAL: u8 = 0; @@ -66,7 +59,7 @@ pub const STB_WEAK: u8 = 2; // section number (for relocations) pub const SHN_UNDEF: u16 = 0; pub const SHN_ABS: u16 = 0xfff1; -pub const SHN_COMMON: u16 = 0xfff2; +//pub const SHN_COMMON: u16 = 0xfff2; #[repr(C)] pub struct Ehdr32 { @@ -123,14 +116,6 @@ impl Ehdr32 { pub fn size_of() -> usize { mem::size_of::<Self>() } - - pub fn section_offset(&self, ndx: u16) -> u64 { - ndx as u64 * self.shentsize as u64 + self.shoff as u64 - } - - pub fn section_seek(&self, ndx: u16) -> io::SeekFrom { - io::SeekFrom::Start(self.section_offset(ndx)) - } } #[repr(C)] @@ -221,7 +206,463 @@ macro_rules! impl_bytes { } impl_bytes!(Ehdr32, 0x34); +impl_bytes!(Shdr32, 0x28); impl_bytes!(Phdr32, 0x20); impl_bytes!(Sym32, 16); impl_bytes!(Rela32, 12); impl_bytes!(Rel32, 8); + +#[derive(Debug)] +pub enum Error { + IO(io::Error), + NotAnElf, + BadUtf8, + BadVersion, + UnsupportedClass(u8, u8), + BadShStrNdx(u16), + BadSymShNdx(u16), + BadSymIndex(u64), + BadLink(u32), + BadSectionIndex(u16), + NoStrtab, +} + + +impl From<io::Error> for Error { + fn from(e: io::Error) -> Error { + Error::IO(e) + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result { + use Error::*; + match self { + IO(i) if i.kind() == io::ErrorKind::UnexpectedEof => write!(f, "unexpected EOF"), + IO(i) => write!(f, "IO error: {i}"), + NotAnElf => write!(f, "Not an ELF file."), + UnsupportedClass(class, data) => { + let class_str = match class { + 1 => "32", + 2 => "64", + _ => "??", + }; + let data_str = match data { + 1 => "little", + 2 => "big", + _ => "??", + }; + write!(f, "This type of executable ({class_str}-bit {data_str}-endian) is not supported.") + }, + BadVersion => write!(f, "Apparently you're living in the future. Where I'm from, there's only ELF version 1"), + BadUtf8 => write!(f, "Bad UTF-8 in ELF file."), + BadShStrNdx(n) => write!(f, "e_shstrndx ({n}) does not refer to a valid section."), + BadSymShNdx(n) => write!(f, "Bad symbol shndx field: {n}."), + BadSymIndex(x) => write!(f, "Bad symbol index: {x}"), + NoStrtab => write!(f, "No .strtab section found."), + BadLink(x) => write!(f, "Bad section link: {x}"), + BadSectionIndex(x) => write!(f, "Bad section index: {x}"), + } + } +} + +impl From<&Error> for String { + fn from(e: &Error) -> String { + format!("{e}") + } +} + +type Result<T> = std::result::Result<T, Error>; + +fn bytes_to_string(bytes: Vec<u8>) -> Result<String> { + String::from_utf8(bytes).map_err(|_| Error::BadUtf8) +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum Machine { + X86, + Amd64, + Other(u16), +} + +impl From<u16> for Machine { + fn from(x: u16) -> Self { + use Machine::*; + match x { + 3 => X86, + 0x3e => Amd64, + _ => Other(x), + } + } +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum SectionType { + ProgBits, + NoBits, + Rel, + Rela, + Symtab, + Other(u32) +} + +impl From<u32> for SectionType { + fn from(x: u32) -> Self { + use SectionType::*; + match x { + SHT_PROGBITS => ProgBits, + SHT_NOBITS => NoBits, + SHT_REL => Rel, + SHT_RELA => Rela, + SHT_SYMTAB => Symtab, + _ => Other(x), + } + } +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum Type { + Rel, + Exec, + Other(u16) +} + +impl From<u16> for Type { + fn from(x: u16) -> Self { + use Type::*; + match x { + 1 => Rel, + 2 => Exec, + _ => Other(x), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum SymbolBind { + Global, + Weak, + Local, + Other(u8) +} + +impl From<u8> for SymbolBind { + fn from(x: u8) -> Self { + use SymbolBind::*; + match x { + STB_GLOBAL => Global, + STB_WEAK => Weak, + STB_LOCAL => Local, + _ => Other(x), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum SymbolType { + Function, + Object, + Section, + Other(u8) +} + +impl From<u8> for SymbolType { + fn from(x: u8) -> Self { + use SymbolType::*; + match x { + STT_FUNC => Function, + STT_OBJECT => Object, + STT_SECTION => Section, + _ => Other(x), + } + } +} + +#[derive(Debug, Copy, Clone)] +pub enum SymbolValue { + Undefined, + Absolute(u64), + SectionOffset(u16, u64), +} + +#[derive(Debug, Clone)] +pub struct Symbol { + name: u64, // offset into .strtab + pub size: u64, + pub value: SymbolValue, + pub bind: SymbolBind, + pub r#type: SymbolType, +} + +#[derive(Debug, Clone, Copy)] +pub enum RelType { + Direct32, + Pc32, + Other(u8), +} + +impl RelType { + fn from_u8(id: u8, machine: Machine) -> Self { + use RelType::*; + use Machine::*; + match (machine, id) { + (X86, 1) => Direct32, + (X86, 2) => Pc32, + _ => RelType::Other(id), + } + } + + pub fn to_x86_u8(self) -> Option<u8> { + use RelType::*; + Some(match self { + Direct32 => 1, + Pc32 => 2, + Other(x) => x, + }) + } +} + + +pub struct Relocation { + pub r#type: RelType, + pub offset: u64, // where the relocation should be applied. for ET_REL, this is a file offset; otherwise, it's an address. + pub symbol: Symbol, + pub addend: i64, +} + +pub trait Reader where Self: Sized { + fn new<T: BufRead + Seek>(reader: T) -> Result<Self>; + fn r#type(&self) -> Type; + fn machine(&self) -> Machine; + fn entry(&self) -> u64; + fn symbols(&self) -> &[Symbol]; + fn relocations(&self) -> &[Relocation]; + fn symbol_name(&self, sym: &Symbol) -> Result<String>; + fn section_type(&self, idx: u16) -> Option<SectionType>; + fn read_section_data_exact(&self, idx: u16, offset: u64, data: &mut [u8]) -> Result<()>; +} + +pub struct Reader32LE { + ehdr: Ehdr32, + shdrs: Vec<Shdr32>, + symbols: Vec<Symbol>, + strtab_idx: Option<u16>, + section_data: Vec<Vec<u8>>, + relocations: Vec<Relocation>, +} + +impl Reader32LE { + pub fn section_offset(&self, index: u16) -> Option<u64> { + let index = usize::from(index); + if index >= self.shdrs.len() { + None + } else { + Some(self.shdrs[index].offset.into()) + } + } +} + +impl Reader for Reader32LE { + fn new<T: BufRead + Seek>(mut reader: T) -> Result<Self> { + use Error::*; + + let mut hdr_buf = [0; 0x34]; + reader.read_exact(&mut hdr_buf)?; + let ehdr = Ehdr32::from_bytes(hdr_buf); + + if ehdr.ident != [0x7f, b'E', b'L', b'F'] { + return Err(NotAnElf); + } + if ehdr.class != 1 || ehdr.data != 1 { + return Err(UnsupportedClass(ehdr.class, ehdr.data)); + } + if ehdr.version != 1 || ehdr.version2 != 1 { + return Err(BadVersion); + } + + let mut shdrs = Vec::with_capacity(ehdr.shnum.into()); + for i in 0..ehdr.shnum { + let offset = u64::from(ehdr.shoff) + u64::from(ehdr.shentsize) * u64::from(i); + reader.seek(io::SeekFrom::Start(offset))?; + let mut shdr_buf = [0; 0x28]; + reader.read_exact(&mut shdr_buf)?; + shdrs.push(Shdr32::from_bytes(shdr_buf)); + } + + let mut symtabs = Vec::with_capacity(ehdr.shnum.into()); + let mut symbols = vec![]; + let mut section_data = Vec::with_capacity(ehdr.shnum.into()); + let mut strtab_idx = None; + + + + for (s_idx, shdr) in shdrs.iter().enumerate() { + let mut data = vec![0; shdr.size as usize]; + reader.read_exact(&mut data)?; + section_data.push(data); + + if let Some(shstrhdr) = shdrs.get(ehdr.shstrndx as usize) { + // get name + reader.seek(io::SeekFrom::Start( + shstrhdr.offset as u64 + shdr.name as u64, + ))?; + let mut bytes = vec![]; + reader.read_until(0, &mut bytes)?; + bytes.pop(); // remove terminating \0 + let name = bytes_to_string(bytes)?; + + if name == ".strtab" { + strtab_idx = Some(s_idx as u16); + } + } + + + + let mut symtab = vec![]; + if shdr.r#type == SHT_SYMTAB && shdr.entsize as usize >= mem::size_of::<Sym32>() { + // read symbol table + for i in 0..shdr.size / shdr.entsize { + let offset = u64::from(shdr.offset) + u64::from(shdr.entsize) * u64::from(i); + reader.seek(io::SeekFrom::Start(offset))?; + let mut sym_buf = [0; 16]; + reader.read_exact(&mut sym_buf)?; + let sym = Sym32::from_bytes(sym_buf); + let value = match sym.shndx { + SHN_UNDEF => SymbolValue::Undefined, + SHN_ABS => SymbolValue::Absolute(sym.value.into()), + idx if idx < ehdr.shnum => SymbolValue::SectionOffset(idx, sym.value.into()), + x => return Err(BadSymShNdx(x)), + }; + + let symbol = Symbol { + name: sym.name.into(), + value, + r#type: (sym.info & 0xf).into(), + bind: (sym.info >> 4).into(), + size: sym.size.into(), + }; + symtab.push(symbols.len()); + symbols.push(symbol); + } + } + symtabs.push(symtab); + } + + // read relocations + let mut relocations = vec![]; + for shdr in shdrs.iter() { + let r#type = shdr.r#type; + if !(r#type == SHT_REL || r#type == SHT_RELA) { + continue; + } + let is_rela = r#type == SHT_RELA; + + if shdr.entsize < 8 { + continue; + } + let count = shdr.size / shdr.entsize; + + reader.seek(io::SeekFrom::Start(shdr.offset.into()))?; + + let my_symbols = symtabs.get(shdr.link as usize).ok_or(BadLink(shdr.link))?; + for _ in 0..count { + + let info; + let mut offset; + let addend; + + if is_rela { + let mut rela_buf = [0; 12]; + reader.read_exact(&mut rela_buf)?; + let rela = Rela32::from_bytes(rela_buf); + info = rela.info; + offset = rela.offset; + addend = rela.addend; + } else { + let mut rel_buf = [0; 8]; + reader.read_exact(&mut rel_buf)?; + let rel = Rel32::from_bytes(rel_buf); + info = rel.info; + offset = rel.offset; + addend = 0; + }; + + + if ehdr.r#type == ET_REL { + // rel.offset is relative to section + if let Some(info_hdr) = shdrs.get(shdr.info as usize) { + offset += info_hdr.offset; + } + } + + + let sym_idx = info >> 8; + let symbols_idx = my_symbols.get(sym_idx as usize).ok_or(BadSymIndex(sym_idx.into()))?; + let symbol = &symbols[*symbols_idx]; + + relocations.push(Relocation { + r#type: RelType::from_u8(info as u8, ehdr.machine.into()), + symbol: symbol.clone(), + addend: addend.into(), + offset: offset.into(), + }); + } + } + + Ok(Self { + ehdr, + shdrs, + symbols, + strtab_idx, + relocations, + section_data + }) + } + + fn r#type(&self) -> Type { + self.ehdr.r#type.into() + } + + fn machine(&self) -> Machine { + self.ehdr.machine.into() + } + + fn entry(&self) -> u64 { + self.ehdr.entry.into() + } + + fn relocations(&self) -> &[Relocation] { + &self.relocations + } + + fn symbols(&self) -> &[Symbol] { + &self.symbols + } + + fn symbol_name(&self, sym: &Symbol) -> Result<String> { + let strtab = &self.section_data[self.strtab_idx.ok_or(Error::NoStrtab)? as usize]; + let i = sym.name as usize; + let mut end = i; + while end < strtab.len() && strtab[end] != b'\0' { + end += 1; + } + bytes_to_string((&strtab[i..end]).to_vec()) + } + + fn section_type(&self, idx: u16) -> Option<SectionType> { + self.shdrs.get(idx as usize).map(|shdr| shdr.r#type.into()) + } + + fn read_section_data_exact(&self, idx: u16, offset: u64, data: &mut [u8]) -> Result<()> { + let section = self.section_data.get(usize::from(idx)).ok_or(Error::BadSectionIndex(idx))?; + if offset + data.len() as u64 > section.len() as u64 { + return Err(Error::IO(io::Error::from(io::ErrorKind::UnexpectedEof))); + } + + let offset = offset as usize; + + data.copy_from_slice(§ion[offset..offset + data.len()]); + + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 6006840..7640a69 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,9 +2,9 @@ // you need to use -fno-pic with gcc -- got,plt relocations aren't supported // and also make the executable bigger. use fs::File; -use io::{BufRead, BufReader, BufWriter, Read, Seek, Write}; +use io::{BufReader, BufWriter, Seek, Write}; use std::collections::{BTreeMap, HashMap}; -use std::{fmt, fs, io, mem, ptr}; +use std::{fmt, fs, io, mem}; #[cfg(target_endian = "big")] compile_error! {"WHY do you have a big endian machine???? it's the 21st century, buddy. this program won't work fuck you"} @@ -12,7 +12,8 @@ compile_error! {"WHY do you have a big endian machine???? it's the 21st century, mod elf; mod util; -use elf::{FromBytes, ToBytes}; +use elf::ToBytes; +use elf::Reader as ELFReader; use util::u32_from_le_slice; pub enum LinkError { @@ -50,6 +51,7 @@ impl From<&LinkError> for String { pub enum LinkWarning { RelSymNotFound { source: String, name: String }, + RelUnsupported(u8), RelOOB(String, u64), RelNoData(String, u64), RelNoValue(String), @@ -66,6 +68,7 @@ impl fmt::Display for LinkWarning { "offset {source}+0x{offset:x} not in a data/text section. relocation will be ignored." ), RelNoValue(name) => write!(f, "can't figure out value of symbol '{name}' (relocation ignored)."), + RelUnsupported(x) => write!(f, "Unsupported relocation type {x} (relocation ignored)."), } } } @@ -76,13 +79,9 @@ impl From<&LinkWarning> for String { } } -pub enum ElfError { - NotAnElf, - Not32Bit, - NotLE, - BadVersion, +pub enum ObjectError { + Elf(elf::Error), BadType, - BadMachine, BadUtf8, BadSymtab, BadLink(u64), @@ -90,32 +89,28 @@ pub enum ElfError { UnsupportedRelocation(u8), BadSymIdx(u64), NoStrtab, - IO(io::Error), } -impl From<&ElfError> for String { - fn from(e: &ElfError) -> String { +impl From<elf::Error> for ObjectError { + fn from(e: elf::Error) -> Self { + Self::Elf(e) + } +} + +impl From<&ObjectError> for String { + fn from(e: &ObjectError) -> String { format!("{e}") } } -impl fmt::Display for ElfError { +impl fmt::Display for ObjectError { fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { - use ElfError::*; + use ObjectError::*; match self { // Display for UnexpectedEof *should* be this but is less clear // ("failed to fill whole buffer") - IO(i) if i.kind() == io::ErrorKind::UnexpectedEof => write!(f, "unexpected EOF"), - IO(i) => write!(f, "IO error: {i}"), - NotAnElf => write!(f, "not an ELF file"), - Not32Bit => write!(f, "ELF file is not 32-bit"), - NotLE => write!(f, "ELF file is not little-endian"), - BadVersion => write!(f, "ELF version is not 1 (are you living in the future?)"), - BadType => write!(f, "wrong type of ELF file"), - BadMachine => write!( - f, - "unsupported architecture (only x86 is currently supported)" - ), + Elf(e) => write!(f, "{e}"), + BadType => write!(f, "wrong type of ELF file (not an object file)"), BadUtf8 => write!(f, "bad UTF-8 in ELF file"), BadSymtab => write!(f, "bad ELF symbol table"), BadRelHeader => write!(f, "bad ELF relocation header"), @@ -127,12 +122,6 @@ impl fmt::Display for ElfError { } } -impl From<io::Error> for ElfError { - fn from(e: io::Error) -> ElfError { - ElfError::IO(e) - } -} - // to be more efficientâ„¢, we use integers to keep track of symbol names. type SymbolNameType = u32; #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] @@ -189,6 +178,7 @@ type SymbolIdType = u32; struct SymbolId(SymbolIdType); #[derive(Copy, Clone, Debug)] +#[allow(dead_code)] // @TODO @TEMPORARY enum SymbolType { Function, Object, @@ -205,7 +195,7 @@ enum SymbolValue { #[allow(dead_code)] // @TODO @TEMPORARY #[derive(Debug)] struct SymbolInfo { - r#type: SymbolType, + r#type: elf::SymbolType, value: Option<SymbolValue>, size: u64, } @@ -279,49 +269,20 @@ impl Symbols { } } -#[derive(Debug, Clone, Copy)] -enum RelocationType { - Direct32, - Pc32, -} - -impl RelocationType { - fn from_x86_u8(id: u8) -> Result<Self, ElfError> { - use RelocationType::*; - Ok(match id { - 1 => Direct32, - 2 => Pc32, - _ => return Err(ElfError::UnsupportedRelocation(id)), - }) - } - - fn to_x86_u8(self) -> u8 { - use RelocationType::*; - match self { - Direct32 => 1, - Pc32 => 2, - } - } -} - #[derive(Debug, Clone)] struct Relocation { r#where: (SymbolId, u64), // (symbol containing relocation, offset in symbol where relocation needs to be applied) source_id: SourceId, - source_offset: u64, sym: SymbolName, - r#type: RelocationType, + r#type: elf::RelType, addend: i64, } struct Linker { - src_strtab_offset: u64, // .strtab offset in current object file - src_shstrtab_offset: u64, // .shstrtab offset in current object file symbols: Symbols, symbol_names: SymbolNames, relocations: Vec<Relocation>, undefined_relocations: Vec<Relocation>, // library relocations - sections: Vec<elf::Shdr32>, sources: Vec<String>, bss_size: u64, // output bss size bss_addr: u64, // output bss address @@ -332,13 +293,13 @@ struct Linker { // this maps between offsets in an object file and symbols defined in that file. // this is used to figure out where relocations are taking place. -struct AddrMap { +struct SymbolOffsetMap { map: BTreeMap<(u64, u64), SymbolId>, } -impl AddrMap { +impl SymbolOffsetMap { fn new() -> Self { - AddrMap { + SymbolOffsetMap { map: BTreeMap::new(), } } @@ -492,7 +453,7 @@ impl Executable { let index = *symbols.get(&reloc.sym).unwrap(); let rel = elf::Rel32 { offset: *addr as u32, - info: index << 8 | u32::from(reloc.r#type.to_x86_u8()), + info: index << 8 | u32::from(reloc.r#type.to_x86_u8().unwrap()), }; out.write_all(&rel.to_bytes())?; } @@ -636,12 +597,9 @@ impl Linker { Linker { symbols: Symbols::new(), symbol_names: SymbolNames::new(), - src_strtab_offset: 0, - src_shstrtab_offset: 0, bss_addr: 0, bss_size: 0, data_addr: 0, - sections: vec![], relocations: vec![], undefined_relocations: vec![], sources: vec![], @@ -654,122 +612,51 @@ impl Linker { &self.sources[id.0 as usize] } - fn get_shstrtab(&self, reader: &mut BufReader<File>, offset: u32) -> Result<String, ElfError> { - reader.seek(io::SeekFrom::Start( - offset as u64 + self.src_shstrtab_offset, - ))?; - let mut bytes = vec![]; - reader.read_until(0, &mut bytes)?; - bytes.pop(); // remove terminating \0 - String::from_utf8(bytes).map_err(|_| ElfError::BadUtf8) - } - - fn get_strtab(&self, reader: &mut BufReader<File>, offset: u32) -> Result<String, ElfError> { - reader.seek(io::SeekFrom::Start(offset as u64 + self.src_strtab_offset))?; - let mut bytes = vec![]; - reader.read_until(0, &mut bytes)?; - bytes.pop(); // remove terminating \0 - String::from_utf8(bytes).map_err(|_| ElfError::BadUtf8) - } - - // returns SymbolName corresponding to the symbol - fn read_symbol( + fn add_symbol( &mut self, source: SourceId, - addr_map: &mut AddrMap, - reader: &mut BufReader<File>, - ) -> Result<SymbolName, ElfError> { - let mut sym_buf = [0u8; 16]; - reader.read_exact(&mut sym_buf)?; - let sym = elf::Sym32::from_bytes(sym_buf); - let r#type = sym.info & 0xf; - let bind = sym.info >> 4; - let name = self.get_strtab(reader, sym.name)?; - let name_id = self.symbol_names.add(name); - let size = sym.size as u64; - - let r#type = match r#type { - elf::STT_OBJECT => SymbolType::Object, - elf::STT_FUNC => SymbolType::Function, - _ => SymbolType::Other, - }; - + elf: &elf::Reader32LE, + offset_map: &mut SymbolOffsetMap, + symbol: &elf::Symbol, + ) -> Result<(), ObjectError> { let mut data_offset = None; - - let value = match sym.shndx { - elf::SHN_UNDEF | elf::SHN_COMMON => None, - elf::SHN_ABS => Some(SymbolValue::Absolute(sym.value as u64)), - ndx if (ndx as usize) < self.sections.len() => { - let ndx = ndx as usize; - match self.sections[ndx].r#type { - elf::SHT_PROGBITS => { - let offset = self.sections[ndx].offset as u64 + sym.value as u64; - data_offset = Some(offset); - reader.seek(io::SeekFrom::Start(offset))?; - let mut data = vec![0; size as usize]; - reader.read_exact(&mut data)?; + let name_id = self.symbol_names.add(elf.symbol_name(symbol)?); + + let value = match symbol.value { + elf::SymbolValue::Undefined => None, + elf::SymbolValue::Absolute(n) => Some(SymbolValue::Absolute(n)), + elf::SymbolValue::SectionOffset(shndx, offset) => { + match elf.section_type(shndx) { + Some(elf::SectionType::ProgBits) => { + let mut data = vec![0; symbol.size as usize]; + data_offset = Some(elf.section_offset(shndx).unwrap() + offset); + elf.read_section_data_exact(shndx, offset, &mut data)?; Some(SymbolValue::Data(data)) - } - elf::SHT_NOBITS => { + }, + Some(elf::SectionType::NoBits) => { let p = self.bss_size; - self.bss_size += size; + self.bss_size += symbol.size; Some(SymbolValue::Bss(p)) - } + }, _ => None, // huh } } - _ => None, }; let info = SymbolInfo { - r#type, + r#type: symbol.r#type, value, - size, + size: symbol.size, }; - let symbol_id = match bind { - elf::STB_LOCAL => self.symbols.add_local(source, name_id, info), - elf::STB_GLOBAL => self.symbols.add_global(source, name_id, info), - elf::STB_WEAK => self.symbols.add_weak(source, name_id, info), - _ => return Ok(name_id), + let symbol_id = match symbol.bind { + elf::SymbolBind::Local => self.symbols.add_local(source, name_id, info), + elf::SymbolBind::Global => self.symbols.add_global(source, name_id, info), + elf::SymbolBind::Weak => self.symbols.add_weak(source, name_id, info), + _ => return Ok(()), // eh }; if let Some(offset) = data_offset { - addr_map.add_symbol(offset, size, symbol_id); - } - Ok(name_id) - } - - fn add_relocation_x86( - &mut self, - symtab: &[SymbolName], - addr_map: &AddrMap, - source_id: SourceId, - offset: u64, - info: u32, - addend: i32, - ) -> Result<(), ElfError> { - let r#type = info as u8; - let sym_idx = info >> 8; - - if let Some(r#where) = addr_map.get(offset) { - match symtab.get(sym_idx as usize) { - Some(sym) => { - self.relocations.push(Relocation { - r#where, - source_id, - source_offset: offset, - sym: *sym, - r#type: RelocationType::from_x86_u8(r#type)?, - addend: addend.into(), - }); - } - None => return Err(ElfError::BadSymIdx(sym_idx.into())), - } - } else { - self.emit_warning(LinkWarning::RelNoData( - self.source_name(source_id).into(), - offset, - )); + offset_map.add_symbol(offset, symbol.size, symbol_id); } Ok(()) } @@ -778,164 +665,38 @@ impl Linker { &mut self, name: &str, reader: &mut BufReader<File>, - ) -> Result<(), ElfError> { - use ElfError::*; + ) -> Result<(), ObjectError> { + use ObjectError::*; - let mut addr_map = AddrMap::new(); - - reader.seek(io::SeekFrom::Start(0))?; + let mut offset_map = SymbolOffsetMap::new(); let source_id = SourceId(self.sources.len() as _); self.sources.push(name.into()); - let mut elf = [0u8; 0x34]; - reader.read_exact(&mut elf)?; - let elf: elf::Ehdr32 = unsafe { mem::transmute(elf) }; - - if elf.ident != [0x7f, b'E', b'L', b'F'] { - return Err(NotAnElf); - } - if elf.class != 1 { - return Err(Not32Bit); - } - if elf.data != 1 { - return Err(NotLE); - } - if elf.version != 1 || elf.version2 != 1 { - return Err(BadVersion); - } - if elf.r#type != elf::ET_REL { + let elf = elf::Reader32LE::new(reader)?; + if elf.r#type() != elf::Type::Rel { return Err(BadType); } - if elf.machine != 3 { - return Err(BadMachine); - } - - let mut shdr_buf = [0u8; 0x28]; - self.src_shstrtab_offset = { - // read .shstrtab header - reader.seek(elf.section_seek(elf.shstrndx))?; - reader.read_exact(&mut shdr_buf)?; - let shdr: elf::Shdr32 = unsafe { mem::transmute(shdr_buf) }; - shdr.offset as u64 - }; - - let mut sections_by_name = HashMap::with_capacity(elf.shnum as _); - self.sections.reserve(elf.shnum as _); - for s_idx in 0..elf.shnum { - reader.seek(elf.section_seek(s_idx))?; - reader.read_exact(&mut shdr_buf)?; - let shdr: elf::Shdr32 = unsafe { mem::transmute(shdr_buf) }; - let name = self.get_shstrtab(reader, shdr.name)?; - sections_by_name.insert(name.clone(), shdr.clone()); - self.sections.push(shdr); - } - - self.src_strtab_offset = if let Some(strtab) = sections_by_name.get(".strtab") { - strtab.offset.into() - } else { - return Err(NoStrtab); - }; - - let mut symtab = vec![]; - if let Some(shdr) = sections_by_name.get(".symtab") { - // read .symtab - let size = shdr.size as u64; - let entsize = shdr.entsize as u64; - let offset = shdr.offset as u64; - if size % entsize != 0 || entsize < 16 { - return Err(BadSymtab); - } - let count: u32 = (size / entsize).try_into().map_err(|_| BadSymtab)?; // 4 billion symbols is ridiculous - symtab.reserve(count as usize); - for sym_idx in 0..count { - reader.seek(io::SeekFrom::Start(offset + sym_idx as u64 * entsize))?; - let name = self.read_symbol(source_id, &mut addr_map, reader)?; - symtab.push(name); - } - } - - for shdr in sections_by_name.values() { - // @TODO @FIX we only process relocations relating to .symtab currently. - match self.sections.get(shdr.link as usize) { - None => continue, - Some(h) => { - if self.get_shstrtab(reader, h.name)? != ".symtab" { - continue; - } - } - } - - fn read_relocations<const N: usize, RelType: FromBytes<N> + ToBytes<N>>( - reader: &mut BufReader<File>, - shdr: &elf::Shdr32, - ) -> Result<Vec<RelType>, ElfError> { - let offset = shdr.offset as u64; - let size = shdr.size as u64; - let entsize = shdr.entsize as u64; - if size % entsize != 0 || entsize < mem::size_of::<RelType>() as u64 { - return Err(BadRelHeader); - } - let count = size / entsize; - let mut relocations = Vec::with_capacity(count as _); - let mut rel_buf = [0; N]; - - for rel_idx in 0..count { - reader.seek(io::SeekFrom::Start(offset + rel_idx * entsize))?; - - reader.read_exact(&mut rel_buf)?; - let mut rel = mem::MaybeUninit::uninit(); - let rel = unsafe { - ptr::copy_nonoverlapping( - (&rel_buf[0]) as *const u8, - rel.as_mut_ptr() as *mut u8, - mem::size_of::<RelType>(), - ); - rel.assume_init() - }; - - relocations.push(rel); - } - Ok(relocations) - } - - let info_section_offset = self - .sections - .get(shdr.info as usize) - .ok_or(BadLink(shdr.info as u64))? - .offset as u64; - - let add_relocation_x86 = - |me: &mut Self, offset: u32, info: u32, addend: i32| -> Result<(), ElfError> { - me.add_relocation_x86( - &symtab, - &addr_map, - source_id, - info_section_offset + offset as u64, - info, - addend, - ) - }; - - match shdr.r#type { - elf::SHT_RELA => { - let rels: Vec<elf::Rela32> = read_relocations(reader, shdr)?; - for rela in rels { - add_relocation_x86( - self, - rela.offset as _, - rela.info as _, - rela.addend as _, - )?; - } - } - elf::SHT_REL => { - let rels: Vec<elf::Rel32> = read_relocations(reader, shdr)?; - for rel in rels { - add_relocation_x86(self, rel.offset as _, rel.info as _, 0)?; - } - } - _ => {} + + for symbol in elf.symbols() { + self.add_symbol(source_id, &elf, &mut offset_map, symbol)?; + } + + for rel in elf.relocations() { + if let Some(r#where) = offset_map.get(rel.offset) { + let sym = self.symbol_names.add(elf.symbol_name(&rel.symbol)?); + self.relocations.push(Relocation { + r#where, + source_id, + sym, + r#type: rel.r#type, + addend: rel.addend, + }); + } else { + self.emit_warning(LinkWarning::RelNoData( + self.source_name(source_id).into(), + 0, // @TODO + )); } } @@ -1027,12 +788,13 @@ impl Linker { enum Value { U32(u32), } - use RelocationType::*; + use elf::RelType::*; use Value::*; let value = match rel.r#type { Direct32 => U32(symbol_value as u32 + addend as u32), Pc32 => U32(symbol_value as u32 + addend as u32 - pc as u32), + Other(x) => {self.emit_warning(LinkWarning::RelUnsupported(x)); return Ok(()) }, }; let apply_symbol_info = match self.symbols.get_mut_info_from_id(apply_symbol) { @@ -1073,7 +835,7 @@ impl Linker { _ => { self.emit_warning(LinkWarning::RelNoData( self.source_name(rel.source_id).into(), - rel.source_offset, + apply_offset, )); } } @@ -1234,3 +996,4 @@ fn main() { eprintln!("Error linking: {e}"); } } + |