summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-11-04 13:02:56 -0400
committerpommicket <pommicket@gmail.com>2022-11-04 13:02:56 -0400
commit05de0fe271024ba67aa265974bc4d456dbfa3eba (patch)
treec0911e9bd3f27ab78df2120aa78b2c5a8ea94a4e
parent09e8d960e56efc3c6e00d3bcc6dd812d015bb4fe (diff)
start undefined relocations
-rw-r--r--src/elf.rs33
-rw-r--r--src/main.rs181
-rw-r--r--test.c14
-rw-r--r--tiny.asm160
4 files changed, 333 insertions, 55 deletions
diff --git a/src/elf.rs b/src/elf.rs
index 23c267f..8d714c4 100644
--- a/src/elf.rs
+++ b/src/elf.rs
@@ -6,6 +6,27 @@ use std::{io, mem};
pub const ET_REL: u16 = 1;
pub const ET_EXEC: u16 = 2;
+// segment type
+pub const PT_LOAD: u32 = 1;
+// segment flags
+pub const PF_X: u32 = 1 << 0;
+pub const PF_W: u32 = 1 << 1;
+pub const PF_R: u32 = 1 << 2;
+
+
+pub const DT_NEEDED: u32 = 1;
+pub const DT_HASH: u32 = 4;
+pub const DT_STRTAB: u32 = 5;
+pub const DT_SYMTAB: u32 = 6;
+pub const DT_STRSZ: u32 = 10;
+pub const DT_SYMENT: u32 = 11;
+pub const DT_REL: u32 = 17;
+pub const DT_RELSZ: u32 = 18;
+pub const DT_RELENT: u32 = 19;
+
+pub const PT_DYNAMIC: u32 = 2;
+pub const PT_INTERP: u32 = 3;
+
#[allow(unused)]
pub const SHT_PROGBITS: u32 = 1; // Program data
#[allow(unused)]
@@ -40,7 +61,7 @@ pub const SHN_ABS: u16 = 0xfff1;
pub const SHN_COMMON: u16 = 0xfff2;
#[repr(C)]
-pub struct Header32 {
+pub struct Ehdr32 {
pub ident: [u8; 4],
pub class: u8,
pub data: u8,
@@ -63,7 +84,7 @@ pub struct Header32 {
pub shstrndx: u16,
}
-impl Default for Header32 {
+impl Default for Ehdr32 {
fn default() -> Self {
Self {
ident: [0x7F, b'E', b'L', b'F'],
@@ -90,7 +111,11 @@ impl Default for Header32 {
}
}
-impl Header32 {
+impl Ehdr32 {
+ pub fn offsetof_entry(&self) -> usize {
+ 0x18
+ }
+
pub fn section_offset(&self, ndx: u16) -> u64 {
ndx as u64 * self.shentsize as u64 + self.shoff as u64
}
@@ -119,8 +144,6 @@ pub struct Shdr32 {
pub entsize: u32,
}
-pub const PT_LOAD: u32 = 0;
-
#[repr(C)]
pub struct Phdr32 {
pub r#type: u32,
diff --git a/src/main.rs b/src/main.rs
index e4ac72c..fd536a0 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,3 +1,6 @@
+// you will need gcc-multilib to compile a 32-bit executable (with stdlib)
+// you need to use -fno-pic with gcc -- got,plt relocations aren't supported
+// and also make the executable bigger.
use fs::File;
use io::{BufRead, BufReader, BufWriter, Read, Seek, Write};
use std::collections::{BTreeMap, HashMap};
@@ -274,13 +277,10 @@ impl Symbols {
}
}
-#[allow(dead_code)] // @TODO @TEMPORARY
#[derive(Debug, Clone, Copy)]
enum RelocationType {
Direct32,
Pc32,
- GotOff32,
- GotPc32,
}
impl RelocationType {
@@ -289,15 +289,12 @@ impl RelocationType {
Ok(match id {
1 => Direct32,
2 => Pc32,
- 9 => GotOff32,
- 10 => GotPc32,
_ => return Err(ElfError::UnsupportedRelocation(id)),
})
}
}
#[derive(Debug, Clone)]
-#[allow(dead_code)] // @TODO @TEMPORARY
struct Relocation {
r#where: (SymbolId, u64), // (symbol containing relocation, offset in symbol where relocation needs to be applied)
source_id: SourceId,
@@ -313,12 +310,13 @@ struct Linker {
symbols: Symbols,
symbol_names: SymbolNames,
relocations: Vec<Relocation>,
+ undefined_relocations: Vec<Relocation>, // stuff from libc, etc.
sections: Vec<elf::Shdr32>,
sources: Vec<String>,
bss_size: u64, // output bss size
bss_addr: u64, // output bss address
data_addr: u64, // output data address
- symbol_addrs: HashMap<SymbolId, u64>, // output addresses of symbols
+ symbol_data_offsets: HashMap<SymbolId, u64>, // for symbols with data, this holds the offsets into the data segment.
warn: fn(LinkWarning),
}
@@ -387,7 +385,8 @@ impl Linker {
sections: vec![],
relocations: vec![],
sources: vec![],
- symbol_addrs: HashMap::new(),
+ undefined_relocations: vec![],
+ symbol_data_offsets: HashMap::new(),
warn: Self::default_warn_handler,
}
}
@@ -542,7 +541,7 @@ impl Linker {
let mut elf = [0u8; 0x34];
reader.read_exact(&mut elf)?;
- let elf: elf::Header32 = unsafe { mem::transmute(elf) };
+ let elf: elf::Ehdr32 = unsafe { mem::transmute(elf) };
if elf.ident != [0x7f, b'E', b'L', b'F'] {
return Err(NotAnElf);
@@ -750,14 +749,14 @@ impl Linker {
let info = self.symbols.get_info_from_id(sym)?;
use SymbolValue::*;
match (&info.value).as_ref()? {
- Data(_) => self.symbol_addrs.get(&sym).map(|r| *r),
+ Data(_) => self.symbol_data_offsets.get(&sym).map(|r| *r + self.data_addr),
Bss(x) => Some(self.bss_addr + *x),
Absolute(a) => Some(*a),
}
}
fn get_relocation_data(
- &self,
+ &mut self,
rel: &Relocation,
pc: u64,
data: &mut [u8; MAX_REL_SIZE],
@@ -771,7 +770,8 @@ impl Linker {
let symbol_value = match self.get_symbol_value(symbol) {
None => {
- self.emit_warning(LinkWarning::RelNoValue(self.symbol_id_location_string(symbol)));
+ self.undefined_relocations.push(rel.clone());
+ //self.emit_warning(LinkWarning::RelNoValue(self.symbol_id_location_string(symbol)));
return Ok(0)
},
Some(v) => v,
@@ -788,7 +788,6 @@ impl Linker {
let value = match rel.r#type {
Direct32 => U32(symbol_value as u32 + addend as u32),
Pc32 => U32(symbol_value as u32 + addend as u32 - pc as u32),
- _ => todo!(),
};
match value {
@@ -799,14 +798,13 @@ impl Linker {
}
}
- fn apply_relocation(&mut self, rel: Relocation) -> Result<(), LinkError> {
+ fn apply_relocation(&mut self, rel: Relocation, data: &mut [u8]) -> Result<(), LinkError> {
let apply_symbol = rel.r#where.0;
- let apply_offset = rel.r#where.1;
-
- let apply_addr = match self.symbol_addrs.get(&apply_symbol) {
+ let apply_offset = rel.r#where.1 + match self.symbol_data_offsets.get(&apply_symbol) {
None => return Ok(()), // this relocation isn't in a section we care about
Some(a) => *a,
};
+ let apply_addr = apply_offset + self.data_addr;
let mut rel_data = [0; MAX_REL_SIZE];
let rel_data_size = self.get_relocation_data(&rel, apply_addr, &mut rel_data)?;
@@ -823,8 +821,8 @@ impl Linker {
use SymbolValue::*;
let mut oob = false;
- match &mut apply_symbol_info.value {
- Some(Data(data)) => {
+ match apply_symbol_info.value {
+ Some(Data(_)) => {
let apply_start = apply_offset as usize;
let apply_end = apply_start + rel_data.len();
if apply_end < apply_start || apply_end > data.len() {
@@ -861,11 +859,23 @@ impl Linker {
id: SymbolId,
) -> Result<(), LinkError> {
// deal with cycles
- if self.symbol_addrs.contains_key(&id) {
+ if self.symbol_data_offsets.contains_key(&id) {
return Ok(());
}
- self.symbol_addrs
- .insert(id, self.data_addr + (data.len() as u64));
+
+ if let Some(info) = self.symbols.get_info_from_id(id) {
+ match &info.value {
+ Some(SymbolValue::Data(d)) => {
+ // set address
+ self.symbol_data_offsets
+ .insert(id, data.len() as u64);
+ // add data
+ data.extend(d);
+ }
+ _ => {},
+ }
+ }
+
for reference in symbol_graph.get(&id).unwrap_or(&vec![]) {
self.add_data_for_symbol(data, symbol_graph, *reference)?;
}
@@ -873,7 +883,7 @@ impl Linker {
Ok(())
}
- pub fn link<T: Write>(&mut self, out: &mut BufWriter<T>) -> Result<(), LinkError> {
+ pub fn link<T: Write + Seek>(&mut self, out: &mut BufWriter<T>) -> Result<(), LinkError> {
let mut symbol_graph = SymbolGraph::with_capacity(self.symbols.count());
let relocations = mem::take(&mut self.relocations);
@@ -898,26 +908,94 @@ impl Linker {
let segment_addr: u32 = 0x400000;
- let data_size = 0;
-
- let mut header = elf::Header32::default();
- let ehdr_size: u32 = header.ehsize.into();
- let phdr_size: u32 = header.phentsize.into();
- let header_size = ehdr_size + phdr_size;
- let file_size = header_size + data_size;
- let entry_point = segment_addr + header_size;
- header.phnum = 1;
- header.phoff = ehdr_size;
- header.entry = entry_point;
- out.write_all(&header.to_bytes())?;
-
- let data_addr = segment_addr + header_size;
- self.data_addr = data_addr.into();
- let bss_addr = segment_addr + file_size;
+ let mut ehdr = elf::Ehdr32::default();
+ let ehdr_size: u32 = ehdr.ehsize.into();
+ let phdr_size: u32 = ehdr.phentsize.into();
+ let num_segments: u16 = 4; // interp, dynamic, data, bss
+
+ let header_size = ehdr_size + phdr_size * u32::from(num_segments);
+ let interp_offset = header_size;
+ let interp = "/lib/ld-linux.so.2\0";
+ let interp_size = interp.len() as u32;
+ let nlibs = 1;
+ let dynamic_offset = interp_offset + interp_size;
+ let dynamic_size = 16 * 4 + nlibs * 8;
+ let bss_addr: u32 = 0x9000000;
self.bss_addr = bss_addr.into();
let bss_size: u32 = self.bss_size.try_into().map_err(|_| LinkError::TooLarge)?;
+
+ ehdr.phnum = num_segments;
+ ehdr.phoff = ehdr_size;
+ let ehdr = ehdr;
+ let entry_point_offset = ehdr.offsetof_entry();
+ out.write_all(&ehdr.to_bytes())?;
+
+ let phdr_interp = elf::Phdr32 {
+ r#type: elf::PT_INTERP,
+ flags: elf::PF_R,
+ offset: interp_offset,
+ vaddr: segment_addr + interp_offset,
+ filesz: interp_size,
+ memsz: interp_size,
+ align: 1,
+ ..Default::default()
+ };
- let entry_name_str = "entry";
+ let phdr_dynamic = elf::Phdr32 {
+ r#type: elf::PT_DYNAMIC,
+ flags: elf::PF_R,
+ offset: dynamic_offset,
+ vaddr: segment_addr + dynamic_offset,
+ filesz: dynamic_size,
+ memsz: dynamic_size,
+ align: 1,
+ ..Default::default()
+ };
+
+ // for some reason, linux doesn't like executables
+ // with memsz > filesz != 0
+ // so we need two segments.
+ let phdr_data = elf::Phdr32 {..Default::default() };
+ let phdr_bss = elf::Phdr32 {
+ flags: elf::PF_R | elf::PF_W, // read, write
+ offset: 0,
+ vaddr: bss_addr,
+ filesz: 0,
+ memsz: bss_size,
+ ..Default::default()
+ };
+ out.write_all(&phdr_interp.to_bytes())?;
+ out.write_all(&phdr_dynamic.to_bytes())?;
+ let dyn_data = vec![
+ elf::DT_RELSZ, 0,
+ elf::DT_RELENT, 0,
+ elf::DT_REL, 0,
+ elf::DT_STRSZ, 0,
+ elf::DT_STRTAB, 0,
+ elf::DT_SYMENT, 0,
+ elf::DT_SYMTAB, 0,
+ elf::DT_HASH, 0,
+ elf::DT_NEEDED, 0,
+ ];
+ let mut dyn_bytes = Vec::with_capacity(dyn_data.len() * 4);
+ for x in dyn_data {
+ dyn_bytes.extend(u32::to_le_bytes(x));
+ }
+
+ let phdr_data_offset = out.stream_position()?;
+ out.write_all(&phdr_data.to_bytes())?;
+ out.write_all(&phdr_bss.to_bytes())?;
+ out.write_all(interp.as_bytes())?;
+ out.write_all(&dyn_bytes)?;
+
+
+ let data_addr: u32 = out.stream_position()? as u32 + segment_addr;
+ self.data_addr = data_addr.into();
+
+ out.seek(io::SeekFrom::End(0))?;
+
+
+ let entry_name_str = "main";
let entry_name_id = self
.symbol_names
.get(entry_name_str)
@@ -931,18 +1009,29 @@ impl Linker {
self.add_data_for_symbol(&mut data, &symbol_graph, entry_id)?;
for rel in relocations {
- self.apply_relocation(rel)?;
+ self.apply_relocation(rel, &mut data)?;
}
+
+ out.write_all(&data)?;
+
+ let file_size = out.stream_position()?.try_into()
+ .map_err(|_| LinkError::TooLarge)?;
+ let entry_point = data_addr; // the entry point is the first thing we output data for
- let phdr = elf::Phdr32 {
- flags: 0b111, // read, write, execute
+ out.seek(io::SeekFrom::Start(entry_point_offset as u64))?;
+ out.write_all(&entry_point.to_le_bytes())?;
+
+ let phdr_data = elf::Phdr32 {
+ flags: elf::PF_R | elf::PF_W | elf::PF_X, // read, write, execute
offset: 0,
- vaddr: segment_addr,
+ vaddr: data_addr,
filesz: file_size,
- memsz: file_size + bss_size,
+ memsz: file_size,
..Default::default()
};
- out.write_all(&phdr.to_bytes())?;
+
+ out.seek(io::SeekFrom::Start(phdr_data_offset))?;
+ out.write_all(&phdr_data.to_bytes())?;
Ok(())
}
diff --git a/test.c b/test.c
index c80d8b6..e2db834 100644
--- a/test.c
+++ b/test.c
@@ -1,6 +1,12 @@
+#include <stdio.h>
int x;
-void entry() {
- x += 1;
- __asm__("xor %ebx, %ebx\n"
- "int $0x80\n");
+void main() {
+ x = 123;
+ printf("hi");
+ __asm__ ("movl $1, %%eax\n"
+ "movl %0, %%ebx\n"
+ "int $0x80\n"
+ :
+ : "r" (x) : "ebx", "eax");
+
}
diff --git a/tiny.asm b/tiny.asm
new file mode 100644
index 0000000..562b7c4
--- /dev/null
+++ b/tiny.asm
@@ -0,0 +1,160 @@
+; https://www.muppetlabs.com/~breadbox/software/tiny/somewhat.html
+
+ ; tiny.asm
+
+ BITS 32
+
+ %define ET_EXEC 2
+ %define EM_386 3
+ %define EV_CURRENT 1
+
+ %define PT_LOAD 1
+ %define PT_DYNAMIC 2
+ %define PT_INTERP 3
+
+ %define PF_X 1
+ %define PF_W 2
+ %define PF_R 4
+
+ %define STT_FUNC 2
+
+ %define STB_GLOBAL 1
+
+ %define R_386_32 1
+
+ %define DT_NULL 0
+ %define DT_NEEDED 1
+ %define DT_HASH 4
+ %define DT_STRTAB 5
+ %define DT_SYMTAB 6
+ %define DT_STRSZ 10
+ %define DT_SYMENT 11
+ %define DT_REL 17
+ %define DT_RELSZ 18
+ %define DT_RELENT 19
+
+ %define R_INFO(s, t) (((s) << 8) | (t))
+
+ shentsz equ 0x28
+
+ org 0x15FF0000
+
+ ehdr: ; Elf32_Ehdr
+ db 0x7F, "ELF", 1, 1, 1 ; e_ident
+ times 9 db 0
+ dw ET_EXEC ; e_type
+ dw EM_386 ; e_machine
+ dd EV_CURRENT ; e_version
+ dd _start ; e_entry
+ dd phdr - $$ ; e_phoff
+ dd 0 ; e_shoff
+ dd 0 ; e_flags
+ dw ehdrsz ; e_ehsize
+ dw phentsz ; e_phentsize
+ dw 3 ; e_phnum
+ dw shentsz ; e_shentsize
+ dw 0 ; e_shnum
+ dw 0 ; e_shstrndx
+ ehdrsz equ $ - ehdr
+
+ ;; The interpreter segment
+
+ interp: db '/lib/ld-linux.so.2'
+
+ interpsz equ $ - interp + 1
+
+ ;; The string table
+
+ strtab:
+ db 0
+ libc_name equ $ - strtab
+ db 'libc.so.6', 0
+ exit_name equ $ - strtab
+ db '_exit', 0
+ strtabsz equ $ - strtab
+
+ align 4
+
+ ;; The relocation table
+
+ reltab: ; Elf32_Rel
+ dd exit_ptr ; r_offset
+ dd R_INFO(1, R_386_32) ; r_info
+ relentsz equ $ - reltab
+ reltabsz equ $ - reltab
+
+ ;; The program segment header table, hash table, symbol table,
+ ;; and dynamic section.
+
+ phdr: ; Elf32_Phdr
+ dd PT_LOAD ; p_type
+ dd 0 ; p_offset
+ dw 0 ; p_vaddr
+ part2: call [exit_ptr] ; p_paddr
+ dd filesz ; p_filesz
+ dd memsz ; p_memsz
+ dd PF_R | PF_W | PF_X ; p_flags
+ dd 0x1000 ; p_align
+ phentsz equ $ - phdr
+ dd PT_DYNAMIC ; p_type
+ dd dyntab - $$ ; p_offset
+ dd dyntab ; p_vaddr
+ _start: push byte 42 ; p_paddr
+ jmp short part2
+ dd dyntabsz ; p_filesz
+ dd dyntabsz ; p_memsz
+ dd PF_R | PF_W ; p_flags
+ dd 4 ; p_align
+
+
+ dd PT_INTERP ; p_type
+ dd interp - $$ ; p_offset
+ dd interp ; p_vaddr
+ dd 0 ; p_paddr
+ dd interpsz ; p_filesz
+ dd interpsz ; p_memsz
+ dd PF_R ; p_flags
+ ; p_align = 1
+
+ hashtab:
+ dd 1 ; no. of buckets
+ dd 2 ; no. of symbols
+ dd 1 ; the bucket: symbol #1
+ ; two links, both zero
+
+ symtab: ; Elf32_Sym
+ dd 0 ; st_name
+ dd 0 ; st_value
+ dd 0 ; st_size
+ db 0 ; st_info
+ db 0 ; st_other
+ dw 0 ; st_shndx
+ symentsz equ $ - symtab
+ dd exit_name ; st_name
+ dd 0 ; st_value
+ dd 0 ; st_size
+ ; st_info = 18
+ ; st_other = 0
+ ; st_shndx = 0
+ ;; The dynamic section
+
+ dyntab:
+ dd DT_RELSZ, reltabsz
+ dd DT_RELENT, relentsz
+ dd DT_REL, reltab
+ dd DT_STRSZ, strtabsz
+ dd DT_STRTAB, strtab
+ dd DT_SYMENT, symentsz
+ dd DT_SYMTAB, symtab
+ dd DT_HASH, hashtab
+ dd DT_NEEDED
+ db libc_name
+ dyntabsz equ $ - dyntab + 11
+
+ exit_ptr equ $ + 11
+ _end equ $ + 15
+
+ ;; End of the file image.
+
+ filesz equ $ - $$
+ memsz equ _end - $$