diff options
-rw-r--r-- | src/elf.rs | 33 | ||||
-rw-r--r-- | src/main.rs | 181 | ||||
-rw-r--r-- | test.c | 14 | ||||
-rw-r--r-- | tiny.asm | 160 |
4 files changed, 333 insertions, 55 deletions
@@ -6,6 +6,27 @@ use std::{io, mem}; pub const ET_REL: u16 = 1; pub const ET_EXEC: u16 = 2; +// segment type +pub const PT_LOAD: u32 = 1; +// segment flags +pub const PF_X: u32 = 1 << 0; +pub const PF_W: u32 = 1 << 1; +pub const PF_R: u32 = 1 << 2; + + +pub const DT_NEEDED: u32 = 1; +pub const DT_HASH: u32 = 4; +pub const DT_STRTAB: u32 = 5; +pub const DT_SYMTAB: u32 = 6; +pub const DT_STRSZ: u32 = 10; +pub const DT_SYMENT: u32 = 11; +pub const DT_REL: u32 = 17; +pub const DT_RELSZ: u32 = 18; +pub const DT_RELENT: u32 = 19; + +pub const PT_DYNAMIC: u32 = 2; +pub const PT_INTERP: u32 = 3; + #[allow(unused)] pub const SHT_PROGBITS: u32 = 1; // Program data #[allow(unused)] @@ -40,7 +61,7 @@ pub const SHN_ABS: u16 = 0xfff1; pub const SHN_COMMON: u16 = 0xfff2; #[repr(C)] -pub struct Header32 { +pub struct Ehdr32 { pub ident: [u8; 4], pub class: u8, pub data: u8, @@ -63,7 +84,7 @@ pub struct Header32 { pub shstrndx: u16, } -impl Default for Header32 { +impl Default for Ehdr32 { fn default() -> Self { Self { ident: [0x7F, b'E', b'L', b'F'], @@ -90,7 +111,11 @@ impl Default for Header32 { } } -impl Header32 { +impl Ehdr32 { + pub fn offsetof_entry(&self) -> usize { + 0x18 + } + pub fn section_offset(&self, ndx: u16) -> u64 { ndx as u64 * self.shentsize as u64 + self.shoff as u64 } @@ -119,8 +144,6 @@ pub struct Shdr32 { pub entsize: u32, } -pub const PT_LOAD: u32 = 0; - #[repr(C)] pub struct Phdr32 { pub r#type: u32, diff --git a/src/main.rs b/src/main.rs index e4ac72c..fd536a0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,6 @@ +// you will need gcc-multilib to compile a 32-bit executable (with stdlib) +// you need to use -fno-pic with gcc -- got,plt relocations aren't supported +// and also make the executable bigger. use fs::File; use io::{BufRead, BufReader, BufWriter, Read, Seek, Write}; use std::collections::{BTreeMap, HashMap}; @@ -274,13 +277,10 @@ impl Symbols { } } -#[allow(dead_code)] // @TODO @TEMPORARY #[derive(Debug, Clone, Copy)] enum RelocationType { Direct32, Pc32, - GotOff32, - GotPc32, } impl RelocationType { @@ -289,15 +289,12 @@ impl RelocationType { Ok(match id { 1 => Direct32, 2 => Pc32, - 9 => GotOff32, - 10 => GotPc32, _ => return Err(ElfError::UnsupportedRelocation(id)), }) } } #[derive(Debug, Clone)] -#[allow(dead_code)] // @TODO @TEMPORARY struct Relocation { r#where: (SymbolId, u64), // (symbol containing relocation, offset in symbol where relocation needs to be applied) source_id: SourceId, @@ -313,12 +310,13 @@ struct Linker { symbols: Symbols, symbol_names: SymbolNames, relocations: Vec<Relocation>, + undefined_relocations: Vec<Relocation>, // stuff from libc, etc. sections: Vec<elf::Shdr32>, sources: Vec<String>, bss_size: u64, // output bss size bss_addr: u64, // output bss address data_addr: u64, // output data address - symbol_addrs: HashMap<SymbolId, u64>, // output addresses of symbols + symbol_data_offsets: HashMap<SymbolId, u64>, // for symbols with data, this holds the offsets into the data segment. warn: fn(LinkWarning), } @@ -387,7 +385,8 @@ impl Linker { sections: vec![], relocations: vec![], sources: vec![], - symbol_addrs: HashMap::new(), + undefined_relocations: vec![], + symbol_data_offsets: HashMap::new(), warn: Self::default_warn_handler, } } @@ -542,7 +541,7 @@ impl Linker { let mut elf = [0u8; 0x34]; reader.read_exact(&mut elf)?; - let elf: elf::Header32 = unsafe { mem::transmute(elf) }; + let elf: elf::Ehdr32 = unsafe { mem::transmute(elf) }; if elf.ident != [0x7f, b'E', b'L', b'F'] { return Err(NotAnElf); @@ -750,14 +749,14 @@ impl Linker { let info = self.symbols.get_info_from_id(sym)?; use SymbolValue::*; match (&info.value).as_ref()? { - Data(_) => self.symbol_addrs.get(&sym).map(|r| *r), + Data(_) => self.symbol_data_offsets.get(&sym).map(|r| *r + self.data_addr), Bss(x) => Some(self.bss_addr + *x), Absolute(a) => Some(*a), } } fn get_relocation_data( - &self, + &mut self, rel: &Relocation, pc: u64, data: &mut [u8; MAX_REL_SIZE], @@ -771,7 +770,8 @@ impl Linker { let symbol_value = match self.get_symbol_value(symbol) { None => { - self.emit_warning(LinkWarning::RelNoValue(self.symbol_id_location_string(symbol))); + self.undefined_relocations.push(rel.clone()); + //self.emit_warning(LinkWarning::RelNoValue(self.symbol_id_location_string(symbol))); return Ok(0) }, Some(v) => v, @@ -788,7 +788,6 @@ impl Linker { let value = match rel.r#type { Direct32 => U32(symbol_value as u32 + addend as u32), Pc32 => U32(symbol_value as u32 + addend as u32 - pc as u32), - _ => todo!(), }; match value { @@ -799,14 +798,13 @@ impl Linker { } } - fn apply_relocation(&mut self, rel: Relocation) -> Result<(), LinkError> { + fn apply_relocation(&mut self, rel: Relocation, data: &mut [u8]) -> Result<(), LinkError> { let apply_symbol = rel.r#where.0; - let apply_offset = rel.r#where.1; - - let apply_addr = match self.symbol_addrs.get(&apply_symbol) { + let apply_offset = rel.r#where.1 + match self.symbol_data_offsets.get(&apply_symbol) { None => return Ok(()), // this relocation isn't in a section we care about Some(a) => *a, }; + let apply_addr = apply_offset + self.data_addr; let mut rel_data = [0; MAX_REL_SIZE]; let rel_data_size = self.get_relocation_data(&rel, apply_addr, &mut rel_data)?; @@ -823,8 +821,8 @@ impl Linker { use SymbolValue::*; let mut oob = false; - match &mut apply_symbol_info.value { - Some(Data(data)) => { + match apply_symbol_info.value { + Some(Data(_)) => { let apply_start = apply_offset as usize; let apply_end = apply_start + rel_data.len(); if apply_end < apply_start || apply_end > data.len() { @@ -861,11 +859,23 @@ impl Linker { id: SymbolId, ) -> Result<(), LinkError> { // deal with cycles - if self.symbol_addrs.contains_key(&id) { + if self.symbol_data_offsets.contains_key(&id) { return Ok(()); } - self.symbol_addrs - .insert(id, self.data_addr + (data.len() as u64)); + + if let Some(info) = self.symbols.get_info_from_id(id) { + match &info.value { + Some(SymbolValue::Data(d)) => { + // set address + self.symbol_data_offsets + .insert(id, data.len() as u64); + // add data + data.extend(d); + } + _ => {}, + } + } + for reference in symbol_graph.get(&id).unwrap_or(&vec![]) { self.add_data_for_symbol(data, symbol_graph, *reference)?; } @@ -873,7 +883,7 @@ impl Linker { Ok(()) } - pub fn link<T: Write>(&mut self, out: &mut BufWriter<T>) -> Result<(), LinkError> { + pub fn link<T: Write + Seek>(&mut self, out: &mut BufWriter<T>) -> Result<(), LinkError> { let mut symbol_graph = SymbolGraph::with_capacity(self.symbols.count()); let relocations = mem::take(&mut self.relocations); @@ -898,26 +908,94 @@ impl Linker { let segment_addr: u32 = 0x400000; - let data_size = 0; - - let mut header = elf::Header32::default(); - let ehdr_size: u32 = header.ehsize.into(); - let phdr_size: u32 = header.phentsize.into(); - let header_size = ehdr_size + phdr_size; - let file_size = header_size + data_size; - let entry_point = segment_addr + header_size; - header.phnum = 1; - header.phoff = ehdr_size; - header.entry = entry_point; - out.write_all(&header.to_bytes())?; - - let data_addr = segment_addr + header_size; - self.data_addr = data_addr.into(); - let bss_addr = segment_addr + file_size; + let mut ehdr = elf::Ehdr32::default(); + let ehdr_size: u32 = ehdr.ehsize.into(); + let phdr_size: u32 = ehdr.phentsize.into(); + let num_segments: u16 = 4; // interp, dynamic, data, bss + + let header_size = ehdr_size + phdr_size * u32::from(num_segments); + let interp_offset = header_size; + let interp = "/lib/ld-linux.so.2\0"; + let interp_size = interp.len() as u32; + let nlibs = 1; + let dynamic_offset = interp_offset + interp_size; + let dynamic_size = 16 * 4 + nlibs * 8; + let bss_addr: u32 = 0x9000000; self.bss_addr = bss_addr.into(); let bss_size: u32 = self.bss_size.try_into().map_err(|_| LinkError::TooLarge)?; + + ehdr.phnum = num_segments; + ehdr.phoff = ehdr_size; + let ehdr = ehdr; + let entry_point_offset = ehdr.offsetof_entry(); + out.write_all(&ehdr.to_bytes())?; + + let phdr_interp = elf::Phdr32 { + r#type: elf::PT_INTERP, + flags: elf::PF_R, + offset: interp_offset, + vaddr: segment_addr + interp_offset, + filesz: interp_size, + memsz: interp_size, + align: 1, + ..Default::default() + }; - let entry_name_str = "entry"; + let phdr_dynamic = elf::Phdr32 { + r#type: elf::PT_DYNAMIC, + flags: elf::PF_R, + offset: dynamic_offset, + vaddr: segment_addr + dynamic_offset, + filesz: dynamic_size, + memsz: dynamic_size, + align: 1, + ..Default::default() + }; + + // for some reason, linux doesn't like executables + // with memsz > filesz != 0 + // so we need two segments. + let phdr_data = elf::Phdr32 {..Default::default() }; + let phdr_bss = elf::Phdr32 { + flags: elf::PF_R | elf::PF_W, // read, write + offset: 0, + vaddr: bss_addr, + filesz: 0, + memsz: bss_size, + ..Default::default() + }; + out.write_all(&phdr_interp.to_bytes())?; + out.write_all(&phdr_dynamic.to_bytes())?; + let dyn_data = vec![ + elf::DT_RELSZ, 0, + elf::DT_RELENT, 0, + elf::DT_REL, 0, + elf::DT_STRSZ, 0, + elf::DT_STRTAB, 0, + elf::DT_SYMENT, 0, + elf::DT_SYMTAB, 0, + elf::DT_HASH, 0, + elf::DT_NEEDED, 0, + ]; + let mut dyn_bytes = Vec::with_capacity(dyn_data.len() * 4); + for x in dyn_data { + dyn_bytes.extend(u32::to_le_bytes(x)); + } + + let phdr_data_offset = out.stream_position()?; + out.write_all(&phdr_data.to_bytes())?; + out.write_all(&phdr_bss.to_bytes())?; + out.write_all(interp.as_bytes())?; + out.write_all(&dyn_bytes)?; + + + let data_addr: u32 = out.stream_position()? as u32 + segment_addr; + self.data_addr = data_addr.into(); + + out.seek(io::SeekFrom::End(0))?; + + + let entry_name_str = "main"; let entry_name_id = self .symbol_names .get(entry_name_str) @@ -931,18 +1009,29 @@ impl Linker { self.add_data_for_symbol(&mut data, &symbol_graph, entry_id)?; for rel in relocations { - self.apply_relocation(rel)?; + self.apply_relocation(rel, &mut data)?; } + + out.write_all(&data)?; + + let file_size = out.stream_position()?.try_into() + .map_err(|_| LinkError::TooLarge)?; + let entry_point = data_addr; // the entry point is the first thing we output data for - let phdr = elf::Phdr32 { - flags: 0b111, // read, write, execute + out.seek(io::SeekFrom::Start(entry_point_offset as u64))?; + out.write_all(&entry_point.to_le_bytes())?; + + let phdr_data = elf::Phdr32 { + flags: elf::PF_R | elf::PF_W | elf::PF_X, // read, write, execute offset: 0, - vaddr: segment_addr, + vaddr: data_addr, filesz: file_size, - memsz: file_size + bss_size, + memsz: file_size, ..Default::default() }; - out.write_all(&phdr.to_bytes())?; + + out.seek(io::SeekFrom::Start(phdr_data_offset))?; + out.write_all(&phdr_data.to_bytes())?; Ok(()) } @@ -1,6 +1,12 @@ +#include <stdio.h> int x; -void entry() { - x += 1; - __asm__("xor %ebx, %ebx\n" - "int $0x80\n"); +void main() { + x = 123; + printf("hi"); + __asm__ ("movl $1, %%eax\n" + "movl %0, %%ebx\n" + "int $0x80\n" + : + : "r" (x) : "ebx", "eax"); + } diff --git a/tiny.asm b/tiny.asm new file mode 100644 index 0000000..562b7c4 --- /dev/null +++ b/tiny.asm @@ -0,0 +1,160 @@ +; https://www.muppetlabs.com/~breadbox/software/tiny/somewhat.html + + ; tiny.asm + + BITS 32 + + %define ET_EXEC 2 + %define EM_386 3 + %define EV_CURRENT 1 + + %define PT_LOAD 1 + %define PT_DYNAMIC 2 + %define PT_INTERP 3 + + %define PF_X 1 + %define PF_W 2 + %define PF_R 4 + + %define STT_FUNC 2 + + %define STB_GLOBAL 1 + + %define R_386_32 1 + + %define DT_NULL 0 + %define DT_NEEDED 1 + %define DT_HASH 4 + %define DT_STRTAB 5 + %define DT_SYMTAB 6 + %define DT_STRSZ 10 + %define DT_SYMENT 11 + %define DT_REL 17 + %define DT_RELSZ 18 + %define DT_RELENT 19 + + %define R_INFO(s, t) (((s) << 8) | (t)) + + shentsz equ 0x28 + + org 0x15FF0000 + + ehdr: ; Elf32_Ehdr + db 0x7F, "ELF", 1, 1, 1 ; e_ident + times 9 db 0 + dw ET_EXEC ; e_type + dw EM_386 ; e_machine + dd EV_CURRENT ; e_version + dd _start ; e_entry + dd phdr - $$ ; e_phoff + dd 0 ; e_shoff + dd 0 ; e_flags + dw ehdrsz ; e_ehsize + dw phentsz ; e_phentsize + dw 3 ; e_phnum + dw shentsz ; e_shentsize + dw 0 ; e_shnum + dw 0 ; e_shstrndx + ehdrsz equ $ - ehdr + + ;; The interpreter segment + + interp: db '/lib/ld-linux.so.2' + + interpsz equ $ - interp + 1 + + ;; The string table + + strtab: + db 0 + libc_name equ $ - strtab + db 'libc.so.6', 0 + exit_name equ $ - strtab + db '_exit', 0 + strtabsz equ $ - strtab + + align 4 + + ;; The relocation table + + reltab: ; Elf32_Rel + dd exit_ptr ; r_offset + dd R_INFO(1, R_386_32) ; r_info + relentsz equ $ - reltab + reltabsz equ $ - reltab + + ;; The program segment header table, hash table, symbol table, + ;; and dynamic section. + + phdr: ; Elf32_Phdr + dd PT_LOAD ; p_type + dd 0 ; p_offset + dw 0 ; p_vaddr + part2: call [exit_ptr] ; p_paddr + dd filesz ; p_filesz + dd memsz ; p_memsz + dd PF_R | PF_W | PF_X ; p_flags + dd 0x1000 ; p_align + phentsz equ $ - phdr + dd PT_DYNAMIC ; p_type + dd dyntab - $$ ; p_offset + dd dyntab ; p_vaddr + _start: push byte 42 ; p_paddr + jmp short part2 + dd dyntabsz ; p_filesz + dd dyntabsz ; p_memsz + dd PF_R | PF_W ; p_flags + dd 4 ; p_align + + + dd PT_INTERP ; p_type + dd interp - $$ ; p_offset + dd interp ; p_vaddr + dd 0 ; p_paddr + dd interpsz ; p_filesz + dd interpsz ; p_memsz + dd PF_R ; p_flags + ; p_align = 1 + + hashtab: + dd 1 ; no. of buckets + dd 2 ; no. of symbols + dd 1 ; the bucket: symbol #1 + ; two links, both zero + + symtab: ; Elf32_Sym + dd 0 ; st_name + dd 0 ; st_value + dd 0 ; st_size + db 0 ; st_info + db 0 ; st_other + dw 0 ; st_shndx + symentsz equ $ - symtab + dd exit_name ; st_name + dd 0 ; st_value + dd 0 ; st_size + ; st_info = 18 + ; st_other = 0 + ; st_shndx = 0 + ;; The dynamic section + + dyntab: + dd DT_RELSZ, reltabsz + dd DT_RELENT, relentsz + dd DT_REL, reltab + dd DT_STRSZ, strtabsz + dd DT_STRTAB, strtab + dd DT_SYMENT, symentsz + dd DT_SYMTAB, symtab + dd DT_HASH, hashtab + dd DT_NEEDED + db libc_name + dyntabsz equ $ - dyntab + 11 + + exit_ptr equ $ + 11 + _end equ $ + 15 + + ;; End of the file image. + + filesz equ $ - $$ + memsz equ _end - $$ |