summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-11-06 21:52:27 -0500
committerpommicket <pommicket@gmail.com>2022-11-06 21:52:27 -0500
commitdab85a8d1e9b99cbef225b8f5cc7fc001405828d (patch)
tree5075a75a7d18f44e7ee0c00b0d5441a13d225988 /src
parent6b7d46d33cdb9c99852f2d4378243ab08ba876ba (diff)
docs done for now
Diffstat (limited to 'src')
-rw-r--r--src/elf.rs26
-rw-r--r--src/linker.rs44
-rw-r--r--src/main.rs3
-rw-r--r--src/util.rs3
4 files changed, 50 insertions, 26 deletions
diff --git a/src/elf.rs b/src/elf.rs
index 4fe2759..2a51082 100644
--- a/src/elf.rs
+++ b/src/elf.rs
@@ -419,32 +419,45 @@ impl RelType {
pub struct Relocation {
pub r#type: RelType,
- pub entry_offset: u64, // file offset of relocation metadata (for debugging)
- pub offset: u64, // where the relocation should be applied. for ET_REL, this is a file offset; otherwise, it's an address.
+ /// file offset of relocation metadata (for debugging)
+ pub entry_offset: u64,
+ /// where the relocation should be applied. for [ET_REL], this is a file offset; otherwise, it's an address.
+ pub offset: u64,
+ /// symbol which should be inserted at the offset.
pub symbol: Symbol,
+ /// to be added to the symbol's value
pub addend: i64,
}
+/// There are multiple formats of ELF file (32-bit/64-bit, little/big-endian),
+/// so we can make types which read those formats derive from this trait.
pub trait Reader
where
Self: Sized,
{
- fn new<T: BufRead + Seek>(reader: T) -> Result<Self>;
+ fn new(reader: impl BufRead + Seek) -> Result<Self>;
fn r#type(&self) -> Type;
fn machine(&self) -> Machine;
fn entry(&self) -> u64;
fn symbols(&self) -> &[Symbol];
fn relocations(&self) -> &[Relocation];
fn symbol_name(&self, sym: &Symbol) -> Result<String>;
+ /// type of section with index `idx`
fn section_type(&self, idx: u16) -> Option<SectionType>;
+ /// read data from the section with index `idx` at offset `offset`.
fn read_section_data_exact(&self, idx: u16, offset: u64, data: &mut [u8]) -> Result<()>;
}
+/// reader for 32-bit little-endian ELF files.
pub struct Reader32LE {
ehdr: Ehdr32,
shdrs: Vec<Shdr32>,
symbols: Vec<Symbol>,
+ /// index of .strtab section
strtab_idx: Option<u16>,
+ /// All data of all sections.
+ /// We put it all in memory.
+ /// Object files usually aren't huge or anything.
section_data: Vec<Vec<u8>>,
relocations: Vec<Relocation>,
}
@@ -461,7 +474,7 @@ impl Reader32LE {
}
impl Reader for Reader32LE {
- fn new<T: BufRead + Seek>(mut reader: T) -> Result<Self> {
+ fn new(mut reader: impl BufRead + Seek) -> Result<Self> {
use Error::*;
let mut hdr_buf = [0; 0x34];
@@ -479,6 +492,7 @@ impl Reader for Reader32LE {
}
let mut shdrs = Vec::with_capacity(ehdr.shnum.into());
+ // read section headers
for i in 0..ehdr.shnum {
let offset = u64::from(ehdr.shoff) + u64::from(ehdr.shentsize) * u64::from(i);
reader.seek(io::SeekFrom::Start(offset))?;
@@ -487,7 +501,9 @@ impl Reader for Reader32LE {
shdrs.push(Shdr32::from_bytes(shdr_buf));
}
+ // symtabs[i] = symbol table in section #i , or vec![] if section #i isn't a symbol table.
let mut symtabs = Vec::with_capacity(ehdr.shnum.into());
+ // all the symbols
let mut symbols = vec![];
let mut section_data = Vec::with_capacity(ehdr.shnum.into());
let mut strtab_idx = None;
@@ -530,7 +546,7 @@ impl Reader for Reader32LE {
SHN_UNDEF => SymbolValue::Undefined,
SHN_ABS => SymbolValue::Absolute(sym.value.into()),
idx if idx < ehdr.shnum => {
- if r#type == SymbolType::Section {
+ if r#type == SymbolType::Section && size == 0 {
// section symbols have a size of 0, it seems.
// i don't know why they don't just use the size of the section.
// i'm replacing it here. it makes the code easier to write.
diff --git a/src/linker.rs b/src/linker.rs
index 8aaff33..f8f5760 100644
--- a/src/linker.rs
+++ b/src/linker.rs
@@ -1,8 +1,9 @@
/*!
Linker producing small executables.
Smallness is the *only* goal.
-This linker makes "bad" executables in many ways.
-You shouldn't use it unless all you want is a tiny little executable file.
+This linker makes "bad" executables in many ways. For example,
+all initialized data will be executable. All code will be writable.
+You shouldn't use this unless all you want is a tiny little executable file.
Currently, only 32-bit ELF is supported.
If you are using C, you will need `gcc-multilib` for the 32-bit headers.
@@ -23,14 +24,13 @@ As such, the resulting executable will be difficult to debug and *C++ exceptions
may not work*.
*/
-use crate::{elf, util};
+use crate::elf;
use io::{BufRead, Seek, Write};
use std::collections::{BTreeMap, HashMap};
use std::{fmt, fs, io, mem, path};
use elf::Reader as ELFReader;
use elf::ToBytes;
-use util::u32_from_le_slice;
pub enum LinkError {
IO(io::Error),
@@ -38,7 +38,7 @@ pub enum LinkError {
TooLarge,
/// entry point not found
NoEntry(String),
- /// entry point was declared, and (probably) used, but not defined
+ /// entry point was declared, but not defined
EntryNotDefined(String),
}
@@ -71,10 +71,10 @@ impl From<&LinkError> for String {
pub enum LinkWarning {
/// unsupported relocation type
RelUnsupported(u8),
- /// relocation is too large to fit inside its owner
+ /// relocation is too large to fit inside its symbol
RelOOB(String, u64),
- /// relocation is in a BSS section or some shit
- RelNoData(String, u64),
+ /// relocation does not take place in a symbol's data
+ RelNoSym(String, u64),
}
impl fmt::Display for LinkWarning {
@@ -82,7 +82,7 @@ impl fmt::Display for LinkWarning {
use LinkWarning::*;
match self {
RelOOB(text, offset) => write!(f, "relocation applied to {text}+0x{offset:x}, which goes outside of the symbol (it will be ignored)."),
- RelNoData(source, offset) => write!(
+ RelNoSym(source, offset) => write!(
f,
"relocation {source}+0x{offset:x} not in a data/text section. it will be ignored."
),
@@ -131,7 +131,8 @@ impl fmt::Display for ObjectError {
type SymbolNameType = u32;
/// To be more efficientâ„¢, we use integers to keep track of symbol names.
-/// A SymbolName doesn't need to refer to a symbol which has been defined.
+///
+/// A `SymbolName` doesn't need to refer to a symbol which has been defined.
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
struct SymbolName(SymbolNameType);
/// Keeps track of string-[SymbolName] conversion.
@@ -182,14 +183,18 @@ impl SourceId {
}
type SymbolIdType = u32;
-//// A symbol ID refers to a symbol *which has a definition*, unlike [SymbolName].
+
+/// A symbol ID refers to a specific *definition* of a symbol.
+///
+/// There might be multiple `SymbolId`s corresponding to a single [SymbolName],
+/// since local symbols with the same name can be defined in separate object files.
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
struct SymbolId(SymbolIdType);
/// Value of a symbol.
#[derive(Debug)]
enum SymbolValue {
- /// We make one big BSS section, this is an offset into it.
+ /// offset into BSS section
Bss(u64),
/// Data associated with this symbol (machine code for functions,
/// bytes making up string literals, etc.)
@@ -317,8 +322,9 @@ pub struct Linker<'a> {
warn: Box<dyn Fn(LinkWarning) + 'a>,
}
-/// maps between offsets in an object file and symbols defined in that file.
-/// (Note: it is specific to a single object file, and only kept around temporarily
+/// Maps between offsets in an object file and symbols defined in that file.
+///
+/// (Note: this is specific to a single object file, and only kept around temporarily
/// during a call to [Linker::add_object].)
/// This is used to figure out where relocations are taking place.
struct SymbolOffsetMap {
@@ -353,6 +359,7 @@ impl SymbolOffsetMap {
}
/// Graph of which symbols depend on which symbols.
+///
/// This is needed so we don't emit anything for unused symbols.
struct SymbolGraph {
graph: Vec<Vec<SymbolId>>,
@@ -809,7 +816,7 @@ impl<'a> Linker<'a> {
addend: rel.addend,
});
} else {
- self.emit_warning(LinkWarning::RelNoData(
+ self.emit_warning(LinkWarning::RelNoSym(
self.source_name(source_id).into(),
rel.entry_offset,
));
@@ -905,6 +912,11 @@ impl<'a> Linker<'a> {
// guarantee failure if apply_offset can't be converted to usize.
let apply_start = apply_offset.try_into().unwrap_or(usize::MAX - 1000);
+ fn u32_from_le_slice(data: &[u8]) -> u32 {
+ u32::from_le_bytes([data[0], data[1], data[2], data[3]])
+ }
+
+
match apply_symbol_info.value {
Data(_) => {
let mut in_bounds = true;
@@ -927,7 +939,7 @@ impl<'a> Linker<'a> {
}
}
_ => {
- self.emit_warning(LinkWarning::RelNoData(
+ self.emit_warning(LinkWarning::RelNoSym(
self.source_name(rel.source_id).into(),
apply_offset,
));
diff --git a/src/main.rs b/src/main.rs
index ec66464..e25b837 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,8 +2,8 @@
@TODO:
- bounds check on bss
- make bss optional
-- finish docs
- disable "warning: relocation XXX not in a data/text section" for .rel.eh_frame
+ - these warnings are being generated in two places. do they need to be?
- make sure --no-stdlib generates a tiny executable
- make executables more tiny (overlap sections, etc.)
- static libraries
@@ -18,7 +18,6 @@ compile_error! {"WHY do you have a big endian machine???? it's the 21st century,
mod elf;
pub mod linker;
-mod util;
#[derive(Parser, Debug)]
struct Args {
diff --git a/src/util.rs b/src/util.rs
deleted file mode 100644
index d8ab9c8..0000000
--- a/src/util.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-pub fn u32_from_le_slice(data: &[u8]) -> u32 {
- u32::from_le_bytes([data[0], data[1], data[2], data[3]])
-}