summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-11-05 11:43:27 -0400
committerpommicket <pommicket@gmail.com>2022-11-05 11:43:27 -0400
commitbae6a477b04f7a767678c88089e28dd9e5c5b879 (patch)
treeeeb29337826fc89d5ad0179598fb99719faf5405
parentbd54f4f57a56de5b2d8b80949acfd499a62a0bbf (diff)
cleaned up code a bit
-rw-r--r--src/elf.rs15
-rw-r--r--src/main.rs267
-rw-r--r--src/util.rs3
-rw-r--r--test.c5
4 files changed, 144 insertions, 146 deletions
diff --git a/src/elf.rs b/src/elf.rs
index 1f9f0dd..23dd246 100644
--- a/src/elf.rs
+++ b/src/elf.rs
@@ -31,7 +31,7 @@ pub const DT_SYMENT: u32 = 11;
pub const DT_REL: u32 = 17;
pub const DT_RELSZ: u32 = 18;
pub const DT_RELENT: u32 = 19;
-
+
pub const PT_DYNAMIC: u32 = 2;
pub const PT_INTERP: u32 = 3;
@@ -123,7 +123,7 @@ impl Ehdr32 {
pub fn size_of() -> usize {
mem::size_of::<Self>()
}
-
+
pub fn section_offset(&self, ndx: u16) -> u64 {
ndx as u64 * self.shentsize as u64 + self.shoff as u64
}
@@ -205,20 +205,19 @@ pub struct Rel32 {
}
macro_rules! impl_bytes {
- ($r#type: ident, $n: literal) => {
- impl FromBytes<$n> for $r#type {
+ ($type: ident, $n: literal) => {
+ impl FromBytes<$n> for $type {
fn from_bytes(bytes: [u8; $n]) -> Self {
unsafe { mem::transmute(bytes) }
}
}
-
- impl ToBytes<$n> for $r#type {
+
+ impl ToBytes<$n> for $type {
fn to_bytes(self) -> [u8; $n] {
unsafe { mem::transmute(self) }
}
}
-
- }
+ };
}
impl_bytes!(Ehdr32, 0x34);
diff --git a/src/main.rs b/src/main.rs
index 4717932..6006840 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,7 +10,10 @@ use std::{fmt, fs, io, mem, ptr};
compile_error! {"WHY do you have a big endian machine???? it's the 21st century, buddy. this program won't work fuck you"}
mod elf;
+mod util;
+
use elf::{FromBytes, ToBytes};
+use util::u32_from_le_slice;
pub enum LinkError {
IO(io::Error),
@@ -170,7 +173,7 @@ impl SymbolNames {
#[allow(dead_code)]
fn get(&self, name: &str) -> Option<SymbolName> {
- self.by_string.get(name).map(|r| *r)
+ self.by_string.get(name).copied()
}
}
@@ -264,13 +267,13 @@ impl Symbols {
.get(&(source, name))
.or_else(|| self.global.get(&name))
.or_else(|| self.weak.get(&name))
- .map(|r| *r)
+ .copied()
}
fn get_location_from_id(&self, id: SymbolId) -> Option<(SourceId, SymbolName)> {
- self.locations.get(&id).map(|r| *r)
+ self.locations.get(&id).copied()
}
-
+
fn count(&self) -> usize {
self.info.len()
}
@@ -291,7 +294,7 @@ impl RelocationType {
_ => return Err(ElfError::UnsupportedRelocation(id)),
})
}
-
+
fn to_x86_u8(self) -> u8 {
use RelocationType::*;
match self {
@@ -317,11 +320,12 @@ struct Linker {
symbols: Symbols,
symbol_names: SymbolNames,
relocations: Vec<Relocation>,
+ undefined_relocations: Vec<Relocation>, // library relocations
sections: Vec<elf::Shdr32>,
sources: Vec<String>,
- bss_size: u64, // output bss size
- bss_addr: u64, // output bss address
- data_addr: u64, // output data address
+ bss_size: u64, // output bss size
+ bss_addr: u64, // output bss address
+ data_addr: u64, // output data address
symbol_data_offsets: HashMap<SymbolId, u64>, // for symbols with data, this holds the offsets into the data segment.
warn: fn(LinkWarning),
}
@@ -363,8 +367,6 @@ impl AddrMap {
// this is needed so we don't emit anything for unused symbols.
type SymbolGraph = HashMap<SymbolId, Vec<SymbolId>>;
-const MAX_REL_SIZE: usize = 8; // this seems reasonable
-
struct Executable {
interp: Vec<u8>,
load_addr: u64,
@@ -387,32 +389,31 @@ impl Executable {
strtab: vec![0],
}
}
-
+
pub fn set_bss(&mut self, addr: u64, size: u64) {
self.bss = Some((addr, size));
}
-
+
pub fn set_interp(&mut self, interp: &str) {
self.interp = interp.as_bytes().into();
self.interp.push(b'\0');
}
-
-
+
fn add_string(&mut self, s: &str) -> u64 {
let ret = self.strtab.len() as u64;
self.strtab.extend(s.as_bytes());
self.strtab.push(b'\0');
ret
}
-
+
pub fn add_lib(&mut self, lib: &str) {
let s = self.add_string(lib);
self.lib_strtab_offsets.push(s);
}
-
+
pub fn add_relocation(&mut self, symbol_names: &SymbolNames, rel: &Relocation, addr: u64) {
let name = rel.sym;
-
+
if self.symbol_strtab_offsets.get(&name).is_none() {
let s = symbol_names.get_str(name).unwrap();
let offset = self.add_string(s);
@@ -420,7 +421,7 @@ impl Executable {
}
self.relocations.push((rel.clone(), addr));
}
-
+
fn segment_count(&self) -> u16 {
let mut count = 1 /*data*/;
if !self.interp.is_empty() {
@@ -431,30 +432,30 @@ impl Executable {
}
count
}
-
+
fn ph_offset(&self) -> u64 {
elf::Ehdr32::size_of() as u64
}
-
+
fn ph_size(&self) -> u64 {
elf::Phdr32::size_of() as u64 * u64::from(self.segment_count())
}
-
+
fn data_offset(&self) -> u64 {
self.ph_offset() + self.ph_size()
}
-
+
pub fn data_addr(&self) -> u64 {
self.load_addr + self.data_offset()
}
-
+
pub fn write<T: Write + Seek>(&self, data: &[u8], out: &mut T) -> LinkResult<()> {
let load_addr = self.load_addr as u32;
-
+
// start by writing data.
out.seek(io::SeekFrom::Start(self.data_offset()))?;
out.write_all(data)?;
-
+
let mut interp_offset = 0;
let mut dyntab_offset = 0;
let mut interp_size = 0;
@@ -503,27 +504,33 @@ impl Executable {
out.write_all(&u32::to_le_bytes(1))?; // nbucket
out.write_all(&u32::to_le_bytes(nsymbols + 1))?; // nchain
out.write_all(&u32::to_le_bytes(0))?; // bucket begins at 0
- // chain 1 -> 2 -> 3 -> ... -> n -> 0
+ // chain 1 -> 2 -> 3 -> ... -> n -> 0
for i in 1..nsymbols {
out.write_all(&u32::to_le_bytes(i))?;
}
out.write_all(&u32::to_le_bytes(0))?;
// i don't know why this needs to be here.
out.write_all(&u32::to_le_bytes(0))?;
-
-
-
+
// now dyntab
dyntab_offset = out.stream_position()?;
let mut dyn_data = vec![
- elf::DT_RELSZ, reltab_size as u32,
- elf::DT_RELENT, 8,
- elf::DT_REL, load_addr + reltab_offset as u32,
- elf::DT_STRSZ, self.strtab.len() as u32,
- elf::DT_STRTAB, load_addr + strtab_offset as u32,
- elf::DT_SYMENT, 16,
- elf::DT_SYMTAB, load_addr + symtab_offset as u32,
- elf::DT_HASH, load_addr + hashtab_offset as u32,
+ elf::DT_RELSZ,
+ reltab_size as u32,
+ elf::DT_RELENT,
+ 8,
+ elf::DT_REL,
+ load_addr + reltab_offset as u32,
+ elf::DT_STRSZ,
+ self.strtab.len() as u32,
+ elf::DT_STRTAB,
+ load_addr + strtab_offset as u32,
+ elf::DT_SYMENT,
+ 16,
+ elf::DT_SYMTAB,
+ load_addr + symtab_offset as u32,
+ elf::DT_HASH,
+ load_addr + hashtab_offset as u32,
];
for lib in &self.lib_strtab_offsets {
dyn_data.extend([elf::DT_NEEDED, *lib as u32]);
@@ -536,17 +543,25 @@ impl Executable {
dyntab_size = dyn_bytes.len() as u32;
out.write_all(&dyn_bytes)?;
}
-
- let file_size: u32 = out.stream_position()?.try_into().map_err(|_| LinkError::TooLarge)?;
-
+
+ let file_size: u32 = out
+ .stream_position()?
+ .try_into()
+ .map_err(|_| LinkError::TooLarge)?;
+
out.seek(io::SeekFrom::Start(0))?;
-
- let mut ehdr = elf::Ehdr32::default();
- ehdr.phnum = self.segment_count();
- ehdr.phoff = elf::Ehdr32::size_of() as u32;
- ehdr.entry = self.data_addr().try_into().map_err(|_| LinkError::TooLarge)?;
+
+ let ehdr = elf::Ehdr32 {
+ phnum: self.segment_count(),
+ phoff: elf::Ehdr32::size_of() as u32,
+ entry: self
+ .data_addr()
+ .try_into()
+ .map_err(|_| LinkError::TooLarge)?,
+ ..Default::default()
+ };
out.write_all(&ehdr.to_bytes())?;
-
+
let phdr_data = elf::Phdr32 {
flags: elf::PF_R | elf::PF_W | elf::PF_X, // read, write, execute
offset: 0,
@@ -556,7 +571,7 @@ impl Executable {
..Default::default()
};
out.write_all(&phdr_data.to_bytes())?;
-
+
if let Some((bss_addr, bss_size)) = self.bss {
// for some reason, linux doesn't like executables
// with memsz > filesz != 0
@@ -572,7 +587,7 @@ impl Executable {
};
out.write_all(&phdr_bss.to_bytes())?;
}
-
+
if !self.interp.is_empty() {
let phdr_interp = elf::Phdr32 {
r#type: elf::PT_INTERP,
@@ -585,7 +600,7 @@ impl Executable {
..Default::default()
};
out.write_all(&phdr_interp.to_bytes())?;
-
+
let phdr_dynamic = elf::Phdr32 {
r#type: elf::PT_DYNAMIC,
flags: elf::PF_R,
@@ -598,7 +613,7 @@ impl Executable {
};
out.write_all(&phdr_dynamic.to_bytes())?;
}
-
+
Ok(())
}
}
@@ -628,6 +643,7 @@ impl Linker {
data_addr: 0,
sections: vec![],
relocations: vec![],
+ undefined_relocations: vec![],
sources: vec![],
symbol_data_offsets: HashMap::new(),
warn: Self::default_warn_handler,
@@ -967,21 +983,32 @@ impl Linker {
fn get_symbol_value(&self, sym: SymbolId) -> Option<u64> {
let info = self.symbols.get_info_from_id(sym)?;
use SymbolValue::*;
- match (&info.value).as_ref()? {
- Data(_) => self.symbol_data_offsets.get(&sym).map(|r| *r + self.data_addr),
+ match info.value.as_ref()? {
+ Data(_) => self
+ .symbol_data_offsets
+ .get(&sym)
+ .map(|&o| o + self.data_addr),
Bss(x) => Some(self.bss_addr + *x),
Absolute(a) => Some(*a),
}
}
- fn get_relocation_data(
- &mut self,
- rel: &Relocation,
- pc: u64,
- data: &mut [u8; MAX_REL_SIZE],
- ) -> Result<Option<usize>, LinkError> {
+ fn get_rel_apply_data_offset(&self, rel: &Relocation) -> Option<u64> {
+ let apply_symbol = rel.r#where.0;
+ let r = self.symbol_data_offsets.get(&apply_symbol)?;
+ Some(*r + rel.r#where.1)
+ }
+
+ fn apply_relocation(&mut self, rel: Relocation, data: &mut [u8]) -> Result<(), LinkError> {
+ let apply_symbol = rel.r#where.0;
+ let apply_offset = match self.get_rel_apply_data_offset(&rel) {
+ Some(data_offset) => data_offset,
+ None => return Ok(()), // this relocation isn't in a data section so there's nothing we can do about it
+ };
+ let pc = apply_offset + self.data_addr;
+
let symbol = match self.get_symbol_id(rel.source_id, rel.sym) {
- None => return Ok(Some(0)), // we emitted a warning in get_symbol_id
+ None => return Ok(()), // we emitted a warning in get_symbol_id
Some(sym) => sym,
};
@@ -989,54 +1016,24 @@ impl Linker {
None => {
// this symbol is defined in a library
//self.emit_warning(LinkWarning::RelNoValue(self.symbol_id_location_string(symbol)));
- return Ok(None)
- },
+ self.undefined_relocations.push(rel);
+ return Ok(());
+ }
Some(v) => v,
};
-
+
let addend = rel.addend;
-
+
enum Value {
U32(u32),
}
- use Value::*;
use RelocationType::*;
-
+ use Value::*;
+
let value = match rel.r#type {
Direct32 => U32(symbol_value as u32 + addend as u32),
Pc32 => U32(symbol_value as u32 + addend as u32 - pc as u32),
};
-
- match value {
- U32(u) => {
- (&mut data[..4]).copy_from_slice(&u32::to_le_bytes(u));
- Ok(Some(4))
- },
- }
- }
-
- fn get_rel_apply_data_offset(&self, rel: &Relocation) -> Option<u64> {
- let apply_symbol = rel.r#where.0;
- let r = self.symbol_data_offsets.get(&apply_symbol)?;
- Some(*r + rel.r#where.1)
- }
-
- fn apply_relocation(&mut self, rel: &Relocation, data: &mut [u8]) -> Result<(), LinkError> {
- let apply_symbol = rel.r#where.0;
- let apply_offset = match self.get_rel_apply_data_offset(&rel) {
- Some(data_offset) => data_offset,
- None => return Ok(()) // this relocation isn't in a data section so there's nothing we can do about it
- };
- let apply_addr = apply_offset + self.data_addr;
-
- let mut rel_data = [0; MAX_REL_SIZE];
- let rel_data_size = self.get_relocation_data(&rel, apply_addr, &mut rel_data)?;
- let rel_data_size = match rel_data_size {
- None => return Ok(()), // defined in a library
- Some(s) => s,
- };
-
- let rel_data = &rel_data[..rel_data_size];
let apply_symbol_info = match self.symbols.get_mut_info_from_id(apply_symbol) {
Some(info) => info,
@@ -1048,15 +1045,29 @@ impl Linker {
};
use SymbolValue::*;
- let mut oob = false;
+
+ // guarantee failure if apply_offset can't be converted to usize.
+ let apply_start = apply_offset.try_into().unwrap_or(usize::MAX - 32);
+
match apply_symbol_info.value {
Some(Data(_)) => {
- let apply_start = apply_offset as usize;
- let apply_end = apply_start + rel_data.len();
- if apply_end < apply_start || apply_end > data.len() {
- oob = true;
- } else {
- (&mut data[apply_start..apply_end]).copy_from_slice(rel_data);
+ let mut in_bounds = true;
+ match value {
+ U32(u) => {
+ if let Some(apply_to) = data.get_mut(apply_start..apply_start + 4) {
+ let curr_val = u32_from_le_slice(apply_to);
+ apply_to.copy_from_slice(&(u + curr_val).to_le_bytes());
+ } else {
+ in_bounds = false;
+ }
+ }
+ };
+
+ if !in_bounds {
+ self.emit_warning(LinkWarning::RelOOB(
+ self.symbol_id_location_string(apply_symbol),
+ apply_offset,
+ ));
}
}
_ => {
@@ -1067,14 +1078,6 @@ impl Linker {
}
}
- if oob {
- // prevent double mut borrow by moving this here
- self.emit_warning(LinkWarning::RelOOB(
- self.symbol_id_location_string(apply_symbol),
- apply_offset,
- ));
- }
-
Ok(())
}
@@ -1090,28 +1093,24 @@ impl Linker {
if self.symbol_data_offsets.contains_key(&id) {
return Ok(());
}
-
+
if let Some(info) = self.symbols.get_info_from_id(id) {
- match &info.value {
- Some(SymbolValue::Data(d)) => {
- // set address
- self.symbol_data_offsets
- .insert(id, data.len() as u64);
- // add data
- data.extend(d);
- }
- _ => {},
+ if let Some(SymbolValue::Data(d)) = &info.value {
+ // set address
+ self.symbol_data_offsets.insert(id, data.len() as u64);
+ // add data
+ data.extend(d);
}
}
-
+
for reference in symbol_graph.get(&id).unwrap_or(&vec![]) {
self.add_data_for_symbol(data, symbol_graph, *reference)?;
}
Ok(())
}
-
- pub fn link<T: Write + Seek>(&mut self, out: &mut BufWriter<T>) -> LinkResult<()> {
+
+ pub fn link<T: Write + Seek>(mut self, out: &mut BufWriter<T>) -> LinkResult<()> {
let mut symbol_graph = SymbolGraph::with_capacity(self.symbols.count());
let relocations = mem::take(&mut self.relocations);
@@ -1139,8 +1138,7 @@ impl Linker {
exec.set_bss(self.bss_addr, self.bss_size);
exec.set_interp("/lib/ld-linux.so.2");
exec.add_lib("libc.so.6");
-
-
+
self.data_addr = exec.data_addr();
let entry_name_str = "main";
@@ -1156,19 +1154,16 @@ impl Linker {
let mut data = vec![];
self.add_data_for_symbol(&mut data, &symbol_graph, entry_id)?;
- for rel in relocations.iter() {
- self.apply_relocation(&rel, &mut data)?;
- }
-
for rel in relocations {
- let mut _data = [0; MAX_REL_SIZE];
- if self.get_relocation_data(&rel, 0, &mut _data)?.is_none() {
- if let Some(data_offset) = self.get_rel_apply_data_offset(&rel) {
- exec.add_relocation(&self.symbol_names, &rel, self.data_addr + data_offset);
- }
+ self.apply_relocation(rel, &mut data)?;
+ }
+
+ for rel in mem::take(&mut self.undefined_relocations) {
+ if let Some(data_offset) = self.get_rel_apply_data_offset(&rel) {
+ exec.add_relocation(&self.symbol_names, &rel, self.data_addr + data_offset);
}
}
-
+
exec.write(&data, out)
}
}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644
index 0000000..d8ab9c8
--- /dev/null
+++ b/src/util.rs
@@ -0,0 +1,3 @@
+pub fn u32_from_le_slice(data: &[u8]) -> u32 {
+ u32::from_le_bytes([data[0], data[1], data[2], data[3]])
+}
diff --git a/test.c b/test.c
index f34d4e2..8cf9a82 100644
--- a/test.c
+++ b/test.c
@@ -4,7 +4,8 @@
int x;
void main() {
puts("HellO");
- printf("hello\n");
- _exit(123);
+ puts("there");
+ //printf("hello\n");
+ exit(123);
}