diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/elf.rs | 26 | ||||
-rw-r--r-- | src/linker.rs | 44 | ||||
-rw-r--r-- | src/main.rs | 3 | ||||
-rw-r--r-- | src/util.rs | 3 |
4 files changed, 50 insertions, 26 deletions
@@ -419,32 +419,45 @@ impl RelType { pub struct Relocation { pub r#type: RelType, - pub entry_offset: u64, // file offset of relocation metadata (for debugging) - pub offset: u64, // where the relocation should be applied. for ET_REL, this is a file offset; otherwise, it's an address. + /// file offset of relocation metadata (for debugging) + pub entry_offset: u64, + /// where the relocation should be applied. for [ET_REL], this is a file offset; otherwise, it's an address. + pub offset: u64, + /// symbol which should be inserted at the offset. pub symbol: Symbol, + /// to be added to the symbol's value pub addend: i64, } +/// There are multiple formats of ELF file (32-bit/64-bit, little/big-endian), +/// so we can make types which read those formats derive from this trait. pub trait Reader where Self: Sized, { - fn new<T: BufRead + Seek>(reader: T) -> Result<Self>; + fn new(reader: impl BufRead + Seek) -> Result<Self>; fn r#type(&self) -> Type; fn machine(&self) -> Machine; fn entry(&self) -> u64; fn symbols(&self) -> &[Symbol]; fn relocations(&self) -> &[Relocation]; fn symbol_name(&self, sym: &Symbol) -> Result<String>; + /// type of section with index `idx` fn section_type(&self, idx: u16) -> Option<SectionType>; + /// read data from the section with index `idx` at offset `offset`. fn read_section_data_exact(&self, idx: u16, offset: u64, data: &mut [u8]) -> Result<()>; } +/// reader for 32-bit little-endian ELF files. pub struct Reader32LE { ehdr: Ehdr32, shdrs: Vec<Shdr32>, symbols: Vec<Symbol>, + /// index of .strtab section strtab_idx: Option<u16>, + /// All data of all sections. + /// We put it all in memory. + /// Object files usually aren't huge or anything. section_data: Vec<Vec<u8>>, relocations: Vec<Relocation>, } @@ -461,7 +474,7 @@ impl Reader32LE { } impl Reader for Reader32LE { - fn new<T: BufRead + Seek>(mut reader: T) -> Result<Self> { + fn new(mut reader: impl BufRead + Seek) -> Result<Self> { use Error::*; let mut hdr_buf = [0; 0x34]; @@ -479,6 +492,7 @@ impl Reader for Reader32LE { } let mut shdrs = Vec::with_capacity(ehdr.shnum.into()); + // read section headers for i in 0..ehdr.shnum { let offset = u64::from(ehdr.shoff) + u64::from(ehdr.shentsize) * u64::from(i); reader.seek(io::SeekFrom::Start(offset))?; @@ -487,7 +501,9 @@ impl Reader for Reader32LE { shdrs.push(Shdr32::from_bytes(shdr_buf)); } + // symtabs[i] = symbol table in section #i , or vec![] if section #i isn't a symbol table. let mut symtabs = Vec::with_capacity(ehdr.shnum.into()); + // all the symbols let mut symbols = vec![]; let mut section_data = Vec::with_capacity(ehdr.shnum.into()); let mut strtab_idx = None; @@ -530,7 +546,7 @@ impl Reader for Reader32LE { SHN_UNDEF => SymbolValue::Undefined, SHN_ABS => SymbolValue::Absolute(sym.value.into()), idx if idx < ehdr.shnum => { - if r#type == SymbolType::Section { + if r#type == SymbolType::Section && size == 0 { // section symbols have a size of 0, it seems. // i don't know why they don't just use the size of the section. // i'm replacing it here. it makes the code easier to write. diff --git a/src/linker.rs b/src/linker.rs index 8aaff33..f8f5760 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -1,8 +1,9 @@ /*! Linker producing small executables. Smallness is the *only* goal. -This linker makes "bad" executables in many ways. -You shouldn't use it unless all you want is a tiny little executable file. +This linker makes "bad" executables in many ways. For example, +all initialized data will be executable. All code will be writable. +You shouldn't use this unless all you want is a tiny little executable file. Currently, only 32-bit ELF is supported. If you are using C, you will need `gcc-multilib` for the 32-bit headers. @@ -23,14 +24,13 @@ As such, the resulting executable will be difficult to debug and *C++ exceptions may not work*. */ -use crate::{elf, util}; +use crate::elf; use io::{BufRead, Seek, Write}; use std::collections::{BTreeMap, HashMap}; use std::{fmt, fs, io, mem, path}; use elf::Reader as ELFReader; use elf::ToBytes; -use util::u32_from_le_slice; pub enum LinkError { IO(io::Error), @@ -38,7 +38,7 @@ pub enum LinkError { TooLarge, /// entry point not found NoEntry(String), - /// entry point was declared, and (probably) used, but not defined + /// entry point was declared, but not defined EntryNotDefined(String), } @@ -71,10 +71,10 @@ impl From<&LinkError> for String { pub enum LinkWarning { /// unsupported relocation type RelUnsupported(u8), - /// relocation is too large to fit inside its owner + /// relocation is too large to fit inside its symbol RelOOB(String, u64), - /// relocation is in a BSS section or some shit - RelNoData(String, u64), + /// relocation does not take place in a symbol's data + RelNoSym(String, u64), } impl fmt::Display for LinkWarning { @@ -82,7 +82,7 @@ impl fmt::Display for LinkWarning { use LinkWarning::*; match self { RelOOB(text, offset) => write!(f, "relocation applied to {text}+0x{offset:x}, which goes outside of the symbol (it will be ignored)."), - RelNoData(source, offset) => write!( + RelNoSym(source, offset) => write!( f, "relocation {source}+0x{offset:x} not in a data/text section. it will be ignored." ), @@ -131,7 +131,8 @@ impl fmt::Display for ObjectError { type SymbolNameType = u32; /// To be more efficientâ„¢, we use integers to keep track of symbol names. -/// A SymbolName doesn't need to refer to a symbol which has been defined. +/// +/// A `SymbolName` doesn't need to refer to a symbol which has been defined. #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] struct SymbolName(SymbolNameType); /// Keeps track of string-[SymbolName] conversion. @@ -182,14 +183,18 @@ impl SourceId { } type SymbolIdType = u32; -//// A symbol ID refers to a symbol *which has a definition*, unlike [SymbolName]. + +/// A symbol ID refers to a specific *definition* of a symbol. +/// +/// There might be multiple `SymbolId`s corresponding to a single [SymbolName], +/// since local symbols with the same name can be defined in separate object files. #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] struct SymbolId(SymbolIdType); /// Value of a symbol. #[derive(Debug)] enum SymbolValue { - /// We make one big BSS section, this is an offset into it. + /// offset into BSS section Bss(u64), /// Data associated with this symbol (machine code for functions, /// bytes making up string literals, etc.) @@ -317,8 +322,9 @@ pub struct Linker<'a> { warn: Box<dyn Fn(LinkWarning) + 'a>, } -/// maps between offsets in an object file and symbols defined in that file. -/// (Note: it is specific to a single object file, and only kept around temporarily +/// Maps between offsets in an object file and symbols defined in that file. +/// +/// (Note: this is specific to a single object file, and only kept around temporarily /// during a call to [Linker::add_object].) /// This is used to figure out where relocations are taking place. struct SymbolOffsetMap { @@ -353,6 +359,7 @@ impl SymbolOffsetMap { } /// Graph of which symbols depend on which symbols. +/// /// This is needed so we don't emit anything for unused symbols. struct SymbolGraph { graph: Vec<Vec<SymbolId>>, @@ -809,7 +816,7 @@ impl<'a> Linker<'a> { addend: rel.addend, }); } else { - self.emit_warning(LinkWarning::RelNoData( + self.emit_warning(LinkWarning::RelNoSym( self.source_name(source_id).into(), rel.entry_offset, )); @@ -905,6 +912,11 @@ impl<'a> Linker<'a> { // guarantee failure if apply_offset can't be converted to usize. let apply_start = apply_offset.try_into().unwrap_or(usize::MAX - 1000); + fn u32_from_le_slice(data: &[u8]) -> u32 { + u32::from_le_bytes([data[0], data[1], data[2], data[3]]) + } + + match apply_symbol_info.value { Data(_) => { let mut in_bounds = true; @@ -927,7 +939,7 @@ impl<'a> Linker<'a> { } } _ => { - self.emit_warning(LinkWarning::RelNoData( + self.emit_warning(LinkWarning::RelNoSym( self.source_name(rel.source_id).into(), apply_offset, )); diff --git a/src/main.rs b/src/main.rs index ec66464..e25b837 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,8 +2,8 @@ @TODO: - bounds check on bss - make bss optional -- finish docs - disable "warning: relocation XXX not in a data/text section" for .rel.eh_frame + - these warnings are being generated in two places. do they need to be? - make sure --no-stdlib generates a tiny executable - make executables more tiny (overlap sections, etc.) - static libraries @@ -18,7 +18,6 @@ compile_error! {"WHY do you have a big endian machine???? it's the 21st century, mod elf; pub mod linker; -mod util; #[derive(Parser, Debug)] struct Args { diff --git a/src/util.rs b/src/util.rs deleted file mode 100644 index d8ab9c8..0000000 --- a/src/util.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub fn u32_from_le_slice(data: &[u8]) -> u32 { - u32::from_le_bytes([data[0], data[1], data[2], data[3]]) -} |