summaryrefslogtreecommitdiff
path: root/src/main.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/main.rs')
-rw-r--r--src/main.rs1026
1 files changed, 49 insertions, 977 deletions
diff --git a/src/main.rs b/src/main.rs
index 2300c27..bae6d71 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,1001 +1,73 @@
// you will need gcc-multilib to compile a 32-bit executable (with stdlib)
// you need to use -fno-pic with gcc -- got,plt relocations aren't supported
// and also make the executable bigger.
-use fs::File;
-use io::{BufReader, BufWriter, Seek, Write};
-use std::collections::{BTreeMap, HashMap};
-use std::{fmt, fs, io, mem};
+extern crate clap;
+
+use clap::Parser;
#[cfg(target_endian = "big")]
compile_error! {"WHY do you have a big endian machine???? it's the 21st century, buddy. this program won't work fuck you"}
mod elf;
mod util;
-
-use elf::ToBytes;
-use elf::Reader as ELFReader;
-use util::u32_from_le_slice;
-
-pub enum LinkError {
- IO(io::Error),
- TooLarge,
- NoEntry(String), // no entry point
- EntryNotDefined(String), // entry point is declared, but not defined
-}
-
-type LinkResult<T> = Result<T, LinkError>;
-
-impl fmt::Display for LinkError {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- use LinkError::*;
- match self {
- IO(e) => write!(f, "IO error: {e}"),
- TooLarge => write!(f, "executable file would be too large."),
- NoEntry(name) => write!(f, "entry point '{name}' not found."),
- EntryNotDefined(name) => write!(f, "entry point '{name}' declared, but not defined."),
- }
- }
-}
-
-impl From<io::Error> for LinkError {
- fn from(e: io::Error) -> Self {
- Self::IO(e)
- }
-}
-
-impl From<&LinkError> for String {
- fn from(e: &LinkError) -> Self {
- format!("{e}")
- }
-}
-
-pub enum LinkWarning {
- RelSymNotFound { source: String, name: String },
- RelUnsupported(u8),
- RelOOB(String, u64),
- RelNoData(String, u64),
- RelNoValue(String),
-}
-
-impl fmt::Display for LinkWarning {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- use LinkWarning::*;
- match self {
- RelSymNotFound { source, name } => write!(f, "undefined symbol '{name}' (in {source}) (relocation ignored)."),
- RelOOB(text, offset) => write!(f, "relocation applied to {text}+0x{offset:x}, which goes outside of the symbol (it will be ignored)."),
- RelNoData(source, offset) => write!(
- f,
- "offset {source}+0x{offset:x} not in a data/text section. relocation will be ignored."
- ),
- RelNoValue(name) => write!(f, "can't figure out value of symbol '{name}' (relocation ignored)."),
- RelUnsupported(x) => write!(f, "Unsupported relocation type {x} (relocation ignored)."),
- }
- }
-}
-
-impl From<&LinkWarning> for String {
- fn from(e: &LinkWarning) -> Self {
- format!("{e}")
- }
-}
-
-pub enum ObjectError {
- Elf(elf::Error),
- BadType,
- BadUtf8,
- BadSymtab,
- BadLink(u64),
- BadRelHeader,
- UnsupportedRelocation(u8),
- BadSymIdx(u64),
- NoStrtab,
-}
-
-impl From<elf::Error> for ObjectError {
- fn from(e: elf::Error) -> Self {
- Self::Elf(e)
- }
-}
-
-impl From<&ObjectError> for String {
- fn from(e: &ObjectError) -> String {
- format!("{e}")
- }
-}
-
-impl fmt::Display for ObjectError {
- fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
- use ObjectError::*;
- match self {
- // Display for UnexpectedEof *should* be this but is less clear
- // ("failed to fill whole buffer")
- Elf(e) => write!(f, "{e}"),
- BadType => write!(f, "wrong type of ELF file (not an object file)"),
- BadUtf8 => write!(f, "bad UTF-8 in ELF file"),
- BadSymtab => write!(f, "bad ELF symbol table"),
- BadRelHeader => write!(f, "bad ELF relocation header"),
- UnsupportedRelocation(x) => write!(f, "unsupported relocation type: {x}"),
- BadLink(i) => write!(f, "bad ELF link: {i}"),
- BadSymIdx(i) => write!(f, "bad symbol index: {i}"),
- NoStrtab => write!(f, "object has no .strtab section"),
- }
- }
-}
-
-// to be more efficientâ„¢, we use integers to keep track of symbol names.
-type SymbolNameType = u32;
-#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
-struct SymbolName(SymbolNameType);
-struct SymbolNames {
- count: SymbolNameType,
- to_string: Vec<String>,
- by_string: HashMap<String, SymbolName>,
-}
-
-impl SymbolNames {
- fn new() -> Self {
- Self {
- count: 0,
- to_string: vec![],
- by_string: HashMap::new(),
- }
- }
-
- fn add(&mut self, name: String) -> SymbolName {
- match self.by_string.get(&name) {
- Some(id) => *id,
- None => {
- // new symbol
- let id = SymbolName(self.count);
- self.count += 1;
- self.by_string.insert(name.clone(), id);
- self.to_string.push(name);
- id
- }
- }
- }
-
- #[allow(dead_code)]
- fn get_str(&self, id: SymbolName) -> Option<&str> {
- self.to_string.get(id.0 as usize).map(|s| &s[..])
- }
-
- #[allow(dead_code)]
- fn get(&self, name: &str) -> Option<SymbolName> {
- self.by_string.get(name).copied()
- }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
-struct SourceId(u32);
-
-impl SourceId {
- const NONE: Self = Self(u32::MAX);
-}
-
-type SymbolIdType = u32;
-#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
-struct SymbolId(SymbolIdType);
-
-#[derive(Copy, Clone, Debug)]
-#[allow(dead_code)] // @TODO @TEMPORARY
-enum SymbolType {
- Function,
- Object,
- Other,
-}
-
-#[derive(Debug)]
-enum SymbolValue {
- Bss(u64),
- Data(Vec<u8>),
- Absolute(u64),
-}
-
-#[allow(dead_code)] // @TODO @TEMPORARY
-#[derive(Debug)]
-struct SymbolInfo {
- r#type: elf::SymbolType,
- value: Option<SymbolValue>,
- size: u64,
-}
-
-struct Symbols {
- info: Vec<SymbolInfo>,
- locations: HashMap<SymbolId, (SourceId, SymbolName)>,
- global: HashMap<SymbolName, SymbolId>,
- weak: HashMap<SymbolName, SymbolId>,
- local: HashMap<(SourceId, SymbolName), SymbolId>,
-}
-
-impl Symbols {
- fn new() -> Self {
- Self {
- info: vec![],
- global: HashMap::new(),
- weak: HashMap::new(),
- local: HashMap::new(),
- locations: HashMap::new(),
- }
- }
-
- fn add_(&mut self, source: SourceId, name: SymbolName, info: SymbolInfo) -> SymbolId {
- let id = SymbolId(self.info.len() as _);
- self.info.push(info);
- self.locations.insert(id, (source, name));
- id
- }
-
- fn add_weak(&mut self, source: SourceId, name: SymbolName, info: SymbolInfo) -> SymbolId {
- let id = self.add_(source, name, info);
- self.weak.insert(name, id);
- id
- }
-
- fn add_local(&mut self, source: SourceId, name: SymbolName, info: SymbolInfo) -> SymbolId {
- let id = self.add_(source, name, info);
- self.local.insert((source, name), id);
- id
- }
-
- fn add_global(&mut self, source: SourceId, name: SymbolName, info: SymbolInfo) -> SymbolId {
- let id = self.add_(source, name, info);
- self.global.insert(name, id);
- id
- }
-
- fn get_mut_info_from_id(&mut self, id: SymbolId) -> Option<&mut SymbolInfo> {
- self.info.get_mut(id.0 as usize)
- }
-
- fn get_info_from_id(&self, id: SymbolId) -> Option<&SymbolInfo> {
- self.info.get(id.0 as usize)
- }
-
- fn get_id_from_name(&self, source: SourceId, name: SymbolName) -> Option<SymbolId> {
- self.local
- .get(&(source, name))
- .or_else(|| self.global.get(&name))
- .or_else(|| self.weak.get(&name))
- .copied()
- }
-
- fn get_location_from_id(&self, id: SymbolId) -> Option<(SourceId, SymbolName)> {
- self.locations.get(&id).copied()
- }
-
- fn count(&self) -> usize {
- self.info.len()
- }
-}
-
-#[derive(Debug, Clone)]
-struct Relocation {
- r#where: (SymbolId, u64), // (symbol containing relocation, offset in symbol where relocation needs to be applied)
- source_id: SourceId,
- sym: SymbolName,
- r#type: elf::RelType,
- addend: i64,
-}
-
-struct Linker {
- symbols: Symbols,
- symbol_names: SymbolNames,
- relocations: Vec<Relocation>,
- undefined_relocations: Vec<Relocation>, // library relocations
- sources: Vec<String>,
- bss_size: u64, // output bss size
- bss_addr: u64, // output bss address
- data_addr: u64, // output data address
- symbol_data_offsets: HashMap<SymbolId, u64>, // for symbols with data, this holds the offsets into the data segment.
- warn: fn(LinkWarning),
-}
-
-// this maps between offsets in an object file and symbols defined in that file.
-// this is used to figure out where relocations are taking place.
-struct SymbolOffsetMap {
- map: BTreeMap<(u64, u64), SymbolId>,
-}
-
-impl SymbolOffsetMap {
- fn new() -> Self {
- SymbolOffsetMap {
- map: BTreeMap::new(),
- }
- }
-
- fn add_symbol(&mut self, offset: u64, size: u64, id: SymbolId) {
- if size > 0 {
- self.map.insert((offset, offset + size), id);
- }
- }
-
- // returns symbol, offset in symbol.
- // e.g. a relocation might happen at main+0x33.
- fn get(&self, offset: u64) -> Option<(SymbolId, u64)> {
- let mut r = self.map.range(..(offset, u64::MAX));
- let (key, value) = r.next_back()?;
- if offset >= key.0 && offset < key.1 {
- // offset corresponds to somewhere in this symbol
- Some((*value, offset - key.0))
- } else {
- None
- }
- }
-}
-
-// graph of which symbols use which symbols
-// this is needed so we don't emit anything for unused symbols.
-type SymbolGraph = HashMap<SymbolId, Vec<SymbolId>>;
-
-struct Executable {
- interp: Vec<u8>,
- load_addr: u64,
- bss: Option<(u64, u64)>,
- relocations: Vec<(Relocation, u64)>,
- strtab: Vec<u8>,
- symbol_strtab_offsets: HashMap<SymbolName, u64>,
- lib_strtab_offsets: Vec<u64>,
-}
-
-impl Executable {
- pub fn new(load_addr: u64) -> Self {
- Self {
- bss: None,
- load_addr,
- interp: vec![],
- relocations: vec![],
- lib_strtab_offsets: vec![],
- symbol_strtab_offsets: HashMap::new(),
- strtab: vec![0],
- }
- }
-
- pub fn set_bss(&mut self, addr: u64, size: u64) {
- self.bss = Some((addr, size));
- }
-
- pub fn set_interp(&mut self, interp: &str) {
- self.interp = interp.as_bytes().into();
- self.interp.push(b'\0');
- }
-
- fn add_string(&mut self, s: &str) -> u64 {
- let ret = self.strtab.len() as u64;
- self.strtab.extend(s.as_bytes());
- self.strtab.push(b'\0');
- ret
- }
-
- pub fn add_lib(&mut self, lib: &str) {
- let s = self.add_string(lib);
- self.lib_strtab_offsets.push(s);
- }
-
- pub fn add_relocation(&mut self, symbol_names: &SymbolNames, rel: &Relocation, addr: u64) {
- let name = rel.sym;
-
- if self.symbol_strtab_offsets.get(&name).is_none() {
- let s = symbol_names.get_str(name).unwrap();
- let offset = self.add_string(s);
- self.symbol_strtab_offsets.insert(name, offset);
- }
- self.relocations.push((rel.clone(), addr));
- }
-
- fn segment_count(&self) -> u16 {
- let mut count = 1 /*data*/;
- if !self.interp.is_empty() {
- count += 2 /*interp,dyntab*/;
- }
- if self.bss.is_some() {
- count += 1 /*bss*/;
- }
- count
- }
-
- fn ph_offset(&self) -> u64 {
- elf::Ehdr32::size_of() as u64
- }
-
- fn ph_size(&self) -> u64 {
- elf::Phdr32::size_of() as u64 * u64::from(self.segment_count())
- }
-
- fn data_offset(&self) -> u64 {
- self.ph_offset() + self.ph_size()
- }
-
- pub fn data_addr(&self) -> u64 {
- self.load_addr + self.data_offset()
- }
-
- pub fn write<T: Write + Seek>(&self, data: &[u8], out: &mut T) -> LinkResult<()> {
- let load_addr = self.load_addr as u32;
-
- // start by writing data.
- out.seek(io::SeekFrom::Start(self.data_offset()))?;
- out.write_all(data)?;
-
- let mut interp_offset = 0;
- let mut dyntab_offset = 0;
- let mut interp_size = 0;
- let mut dyntab_size = 0;
- if !self.interp.is_empty() {
- // now interp
- interp_offset = out.stream_position()?;
- out.write_all(&self.interp)?;
- interp_size = self.interp.len() as u32;
- // now strtab
- let strtab_offset = out.stream_position()?;
- out.write_all(&self.strtab)?;
- // now symtab
- let symtab_offset = out.stream_position()?;
- let null_symbol = [0; mem::size_of::<elf::Sym32>()];
- out.write_all(&null_symbol)?;
- let mut symbols: HashMap<SymbolName, u32> = HashMap::new();
- for (i, (sym, strtab_offset)) in self.symbol_strtab_offsets.iter().enumerate() {
- symbols.insert(*sym, (i + 1) as u32);
- // @TODO: allow STT_OBJECT as fell
- let sym = elf::Sym32 {
- name: *strtab_offset as u32,
- info: elf::STB_GLOBAL << 4 | elf::STT_FUNC,
- value: 0,
- size: 0,
- other: 0,
- shndx: 0,
- };
- out.write_all(&sym.to_bytes())?;
- }
- // now reltab
- let reltab_offset = out.stream_position()?;
- for (reloc, addr) in self.relocations.iter() {
- let index = *symbols.get(&reloc.sym).unwrap();
- let rel = elf::Rel32 {
- offset: *addr as u32,
- info: index << 8 | u32::from(reloc.r#type.to_x86_u8().unwrap()),
- };
- out.write_all(&rel.to_bytes())?;
- }
- let reltab_size = out.stream_position()? - reltab_offset;
- // now hash
- let hashtab_offset = out.stream_position()?;
- // put everything in a single bucket
- let nsymbols = symbols.len() as u32;
- out.write_all(&u32::to_le_bytes(1))?; // nbucket
- out.write_all(&u32::to_le_bytes(nsymbols + 1))?; // nchain
- out.write_all(&u32::to_le_bytes(0))?; // bucket begins at 0
- // chain 1 -> 2 -> 3 -> ... -> n -> 0
- for i in 1..nsymbols {
- out.write_all(&u32::to_le_bytes(i))?;
- }
- out.write_all(&u32::to_le_bytes(0))?;
- // i don't know why this needs to be here.
- out.write_all(&u32::to_le_bytes(0))?;
-
- // now dyntab
- dyntab_offset = out.stream_position()?;
- let mut dyn_data = vec![
- elf::DT_RELSZ,
- reltab_size as u32,
- elf::DT_RELENT,
- 8,
- elf::DT_REL,
- load_addr + reltab_offset as u32,
- elf::DT_STRSZ,
- self.strtab.len() as u32,
- elf::DT_STRTAB,
- load_addr + strtab_offset as u32,
- elf::DT_SYMENT,
- 16,
- elf::DT_SYMTAB,
- load_addr + symtab_offset as u32,
- elf::DT_HASH,
- load_addr + hashtab_offset as u32,
- ];
- for lib in &self.lib_strtab_offsets {
- dyn_data.extend([elf::DT_NEEDED, *lib as u32]);
- }
- dyn_data.extend([elf::DT_NULL, 0]);
- let mut dyn_bytes = Vec::with_capacity(dyn_data.len() * 4);
- for x in dyn_data {
- dyn_bytes.extend(u32::to_le_bytes(x));
- }
- dyntab_size = dyn_bytes.len() as u32;
- out.write_all(&dyn_bytes)?;
- }
-
- let file_size: u32 = out
- .stream_position()?
- .try_into()
- .map_err(|_| LinkError::TooLarge)?;
-
- out.seek(io::SeekFrom::Start(0))?;
-
- let ehdr = elf::Ehdr32 {
- phnum: self.segment_count(),
- phoff: elf::Ehdr32::size_of() as u32,
- entry: self
- .data_addr()
- .try_into()
- .map_err(|_| LinkError::TooLarge)?,
- ..Default::default()
- };
- out.write_all(&ehdr.to_bytes())?;
-
- let phdr_data = elf::Phdr32 {
- flags: elf::PF_R | elf::PF_W | elf::PF_X, // read, write, execute
- offset: 0,
- vaddr: load_addr,
- filesz: file_size,
- memsz: file_size,
- ..Default::default()
- };
- out.write_all(&phdr_data.to_bytes())?;
-
- if let Some((bss_addr, bss_size)) = self.bss {
- // for some reason, linux doesn't like executables
- // with memsz > filesz != 0
- // so we need two segments.
- let bss_size: u32 = bss_size.try_into().map_err(|_| LinkError::TooLarge)?;
- let phdr_bss = elf::Phdr32 {
- flags: elf::PF_R | elf::PF_W, // read, write
- offset: 0,
- vaddr: bss_addr as u32,
- filesz: 0,
- memsz: bss_size as u32,
- ..Default::default()
- };
- out.write_all(&phdr_bss.to_bytes())?;
- }
-
- if !self.interp.is_empty() {
- let phdr_interp = elf::Phdr32 {
- r#type: elf::PT_INTERP,
- flags: elf::PF_R,
- offset: interp_offset as u32,
- vaddr: load_addr + interp_offset as u32,
- filesz: interp_size as u32,
- memsz: interp_size as u32,
- align: 1,
- ..Default::default()
- };
- out.write_all(&phdr_interp.to_bytes())?;
-
- let phdr_dynamic = elf::Phdr32 {
- r#type: elf::PT_DYNAMIC,
- flags: elf::PF_R,
- offset: dyntab_offset as u32,
- vaddr: load_addr + dyntab_offset as u32,
- filesz: dyntab_size as u32,
- memsz: dyntab_size as u32,
- align: 1,
- ..Default::default()
- };
- out.write_all(&phdr_dynamic.to_bytes())?;
- }
-
- Ok(())
- }
-}
-
-impl Linker {
- fn default_warn_handler(warning: LinkWarning) {
- eprintln!("warning: {warning}");
- }
-
- // why use fn of all things to transmit warnings?
- // well, it's very nice for stuff to not need a mutable reference
- // to emit warnings, and this is basically the only way of doing it.
- // if you need to mutate state in your warning handler, you can always
- // use a mutex.
- pub fn _set_warning_handler(&mut self, warn: fn(LinkWarning)) {
- self.warn = warn;
- }
-
- pub fn new() -> Self {
- Linker {
- symbols: Symbols::new(),
- symbol_names: SymbolNames::new(),
- bss_addr: 0,
- bss_size: 0,
- data_addr: 0,
- relocations: vec![],
- undefined_relocations: vec![],
- sources: vec![],
- symbol_data_offsets: HashMap::new(),
- warn: Self::default_warn_handler,
- }
- }
-
- fn source_name(&self, id: SourceId) -> &str {
- &self.sources[id.0 as usize]
- }
-
- fn add_symbol(
- &mut self,
- source: SourceId,
- elf: &elf::Reader32LE,
- offset_map: &mut SymbolOffsetMap,
- symbol: &elf::Symbol,
- ) -> Result<(), ObjectError> {
- let mut data_offset = None;
- let name = elf.symbol_name(symbol)?;
- println!("{name}");
- let name_id = self.symbol_names.add(name);
-
- let value = match symbol.value {
- elf::SymbolValue::Undefined => None,
- elf::SymbolValue::Absolute(n) => Some(SymbolValue::Absolute(n)),
- elf::SymbolValue::SectionOffset(shndx, offset) => {
- match elf.section_type(shndx) {
- Some(elf::SectionType::ProgBits) => {
- let mut data = vec![0; symbol.size as usize];
- data_offset = Some(elf.section_offset(shndx).unwrap() + offset);
- elf.read_section_data_exact(shndx, offset, &mut data)?;
- Some(SymbolValue::Data(data))
- },
- Some(elf::SectionType::NoBits) => {
- let p = self.bss_size;
- self.bss_size += symbol.size;
- Some(SymbolValue::Bss(p))
- },
- _ => None, // huh
- }
- }
- };
-
- let info = SymbolInfo {
- r#type: symbol.r#type,
- value,
- size: symbol.size,
- };
- let symbol_id = match symbol.bind {
- elf::SymbolBind::Local => self.symbols.add_local(source, name_id, info),
- elf::SymbolBind::Global => self.symbols.add_global(source, name_id, info),
- elf::SymbolBind::Weak => self.symbols.add_weak(source, name_id, info),
- _ => return Ok(()), // eh
- };
-
- if let Some(offset) = data_offset {
- offset_map.add_symbol(offset, symbol.size, symbol_id);
- }
- Ok(())
- }
-
- pub fn process_object(
- &mut self,
- name: &str,
- reader: &mut BufReader<File>,
- ) -> Result<(), ObjectError> {
- use ObjectError::*;
-
- let mut offset_map = SymbolOffsetMap::new();
-
- let source_id = SourceId(self.sources.len() as _);
- self.sources.push(name.into());
-
- let elf = elf::Reader32LE::new(reader)?;
- if elf.r#type() != elf::Type::Rel {
- return Err(BadType);
- }
-
- for symbol in elf.symbols() {
- self.add_symbol(source_id, &elf, &mut offset_map, symbol)?;
- }
-
- for rel in elf.relocations() {
- if let Some(r#where) = offset_map.get(rel.offset) {
- let sym = self.symbol_names.add(elf.symbol_name(&rel.symbol)?);
- self.relocations.push(Relocation {
- r#where,
- source_id,
- sym,
- r#type: rel.r#type,
- addend: rel.addend,
- });
- } else {
- self.emit_warning(LinkWarning::RelNoData(
- self.source_name(source_id).into(),
- rel.entry_offset
- ));
- }
- }
-
- Ok(())
- }
-
- fn symbol_name_str(&self, id: SymbolName) -> &str {
- self.symbol_names.get_str(id).unwrap_or("???")
- }
-
- fn emit_warning(&self, warning: LinkWarning) {
- (self.warn)(warning);
- }
-
- fn emit_warning_rel_sym_not_found(&self, source: SourceId, name: SymbolName) {
- let warn = LinkWarning::RelSymNotFound {
- source: self.source_name(source).into(),
- name: self.symbol_name_str(name).into(),
- };
- self.emit_warning(warn);
- }
-
- // get symbol ID, producing a warning if it does not exist.
- fn get_symbol_id(&self, source_id: SourceId, name: SymbolName) -> Option<SymbolId> {
- // @TODO: don't warn about the same symbol twice
- let sym = self.symbols.get_id_from_name(source_id, name);
- if sym.is_none() {
- self.emit_warning_rel_sym_not_found(source_id, name);
- }
- sym
- }
-
- // generates a string like main.c:some_function
- fn symbol_id_location_string(&self, id: SymbolId) -> String {
- if let Some((source, name)) = self.symbols.get_location_from_id(id) {
- return format!(
- "{}:{}",
- self.source_name(source),
- self.symbol_name_str(name)
- );
- }
- "???".into()
- }
-
- fn get_symbol_value(&self, sym: SymbolId) -> Option<u64> {
- let info = self.symbols.get_info_from_id(sym)?;
- use SymbolValue::*;
- match info.value.as_ref()? {
- Data(_) => self
- .symbol_data_offsets
- .get(&sym)
- .map(|&o| o + self.data_addr),
- Bss(x) => Some(self.bss_addr + *x),
- Absolute(a) => Some(*a),
- }
- }
-
- fn get_rel_apply_data_offset(&self, rel: &Relocation) -> Option<u64> {
- let apply_symbol = rel.r#where.0;
- let r = self.symbol_data_offsets.get(&apply_symbol)?;
- Some(*r + rel.r#where.1)
- }
-
- fn apply_relocation(&mut self, rel: Relocation, data: &mut [u8]) -> Result<(), LinkError> {
- let apply_symbol = rel.r#where.0;
- let apply_offset = match self.get_rel_apply_data_offset(&rel) {
- Some(data_offset) => data_offset,
- None => return Ok(()), // this relocation isn't in a data section so there's nothing we can do about it
- };
- let pc = apply_offset + self.data_addr;
-
- let symbol = match self.get_symbol_id(rel.source_id, rel.sym) {
- None => return Ok(()), // we emitted a warning in get_symbol_id
- Some(sym) => sym,
- };
-
- let symbol_value = match self.get_symbol_value(symbol) {
- None => {
- // this symbol is defined in a library
- //self.emit_warning(LinkWarning::RelNoValue(self.symbol_id_location_string(symbol)));
- self.undefined_relocations.push(rel);
- return Ok(());
- }
- Some(v) => v,
- };
-
- let addend = rel.addend;
-
- enum Value {
- U32(u32),
- }
- use elf::RelType::*;
- use Value::*;
-
- let value = match rel.r#type {
- Direct32 => U32(symbol_value as u32 + addend as u32),
- Pc32 => U32(symbol_value as u32 + addend as u32 - pc as u32),
- Other(x) => {self.emit_warning(LinkWarning::RelUnsupported(x)); return Ok(()) },
- };
-
- let apply_symbol_info = match self.symbols.get_mut_info_from_id(apply_symbol) {
- Some(info) => info,
- None => {
- // this shouldn't happen.
- self.emit_warning_rel_sym_not_found(rel.source_id, rel.sym);
- return Ok(());
- }
- };
-
- use SymbolValue::*;
-
- // guarantee failure if apply_offset can't be converted to usize.
- let apply_start = apply_offset.try_into().unwrap_or(usize::MAX - 32);
-
- match apply_symbol_info.value {
- Some(Data(_)) => {
- let mut in_bounds = true;
- match value {
- U32(u) => {
- if let Some(apply_to) = data.get_mut(apply_start..apply_start + 4) {
- let curr_val = u32_from_le_slice(apply_to);
- apply_to.copy_from_slice(&(u + curr_val).to_le_bytes());
- } else {
- in_bounds = false;
- }
- }
- };
-
- if !in_bounds {
- self.emit_warning(LinkWarning::RelOOB(
- self.symbol_id_location_string(apply_symbol),
- apply_offset,
- ));
- }
- }
- _ => {
- self.emit_warning(LinkWarning::RelNoData(
- self.source_name(rel.source_id).into(),
- apply_offset,
- ));
- }
- }
-
- Ok(())
- }
-
- // we don't want to link unused symbols.
- // we start by calling this on the entry function, then it recursively calls itself for each symbol used.
- pub fn add_data_for_symbol(
- &mut self,
- data: &mut Vec<u8>,
- symbol_graph: &SymbolGraph,
- id: SymbolId,
- ) -> Result<(), LinkError> {
- // deal with cycles
- if self.symbol_data_offsets.contains_key(&id) {
- return Ok(());
- }
-
- if let Some(info) = self.symbols.get_info_from_id(id) {
- if let Some(SymbolValue::Data(d)) = &info.value {
- // set address
- self.symbol_data_offsets.insert(id, data.len() as u64);
- // add data
- data.extend(d);
- }
- }
-
- for reference in symbol_graph.get(&id).unwrap_or(&vec![]) {
- self.add_data_for_symbol(data, symbol_graph, *reference)?;
- }
-
- Ok(())
- }
-
- pub fn link<T: Write + Seek>(mut self, out: &mut BufWriter<T>) -> LinkResult<()> {
- let mut symbol_graph = SymbolGraph::with_capacity(self.symbols.count());
-
- let relocations = mem::take(&mut self.relocations);
-
- // compute symbol graph
- for rel in relocations.iter() {
- use std::collections::hash_map::Entry;
- if let Some(symbol) = self.get_symbol_id(rel.source_id, rel.sym) {
- let apply_symbol = rel.r#where.0;
- match symbol_graph.entry(apply_symbol) {
- Entry::Occupied(mut o) => {
- o.get_mut().push(symbol);
- }
- Entry::Vacant(v) => {
- v.insert(vec![symbol]);
- }
- }
- }
- }
-
- let symbol_graph = symbol_graph; // no more mutating
-
- let mut exec = Executable::new(0x400000);
- self.bss_addr = 0x50000000;
- exec.set_bss(self.bss_addr, self.bss_size);
- exec.set_interp("/lib/ld-linux.so.2");
- exec.add_lib("libc.so.6");
-
- self.data_addr = exec.data_addr();
-
- let entry_name_str = "main";
- let entry_name_id = self
- .symbol_names
- .get(entry_name_str)
- .ok_or_else(|| LinkError::NoEntry(entry_name_str.into()))?;
- let entry_id = self
- .symbols
- .get_id_from_name(SourceId::NONE, entry_name_id)
- .ok_or_else(|| LinkError::EntryNotDefined(entry_name_str.into()))?;
-
- let mut data = vec![];
- self.add_data_for_symbol(&mut data, &symbol_graph, entry_id)?;
-
- for rel in relocations {
- self.apply_relocation(rel, &mut data)?;
- }
-
- for rel in mem::take(&mut self.undefined_relocations) {
- if let Some(data_offset) = self.get_rel_apply_data_offset(&rel) {
- exec.add_relocation(&self.symbol_names, &rel, self.data_addr + data_offset);
- }
- }
-
- exec.write(&data, out)
- }
-}
-
-fn main() {
- let mut args = std::env::args();
- args.next(); // program name
- let args: Vec<String> = args.collect();
- if args.len() == 1 && args[0] == "--nya" {
+mod linker;
+
+#[derive(Parser, Debug)]
+struct Args {
+ /// Input files: object files (.o) and shared libraries (.so) are supported.
+ inputs: Vec<String>,
+ /// If set, the program will not be linked against libc.
+ ///
+ /// This makes the executable smaller.
+ #[arg(long = "no-std-lib", default_value_t = false)]
+ no_std_lib: bool,
+ /// Output executable path.
+ #[arg(short = 'o', long = "output", default_value = "a.out")]
+ output: String,
+ /// The name of the function which will be used as the entry point.
+ #[arg(short = 'e', long = "entry", default_value = "entry")]
+ entry: String,
+ /// :3
+ #[arg(long = "nya")]
+ nya: bool
+}
+
+fn main_() -> Result<(), String> {
+ let args = Args::parse();
+
+ if args.nya {
println!("hai uwu ^_^");
- return;
+ return Ok(());
}
- let mut inputs: Vec<String> = args;
+
+ let inputs = &args.inputs;
+
+ let mut linker = linker::Linker::new();
+
if inputs.is_empty() {
if cfg!(debug_assertions) {
- inputs.push("test.o".into());
+ // ease of use when debugging
+ linker.add_input("test.o")?;
} else {
- eprintln!("no arguments provided.");
- return;
+ return Err("no inputs provided.".into());
}
}
- let mut object_files = vec![];
- let mut libraries = vec![];
-
- for input in inputs {
- if input.ends_with(".o") {
- object_files.push(input);
- } else if input.ends_with(".so") {
- libraries.push(input);
- }
+
+ if !args.no_std_lib {
+ linker.add_input("libc.so.6")?;
}
- let mut linker = Linker::new();
-
- for filename in &object_files {
- let file = match File::open(filename) {
- Ok(file) => file,
- Err(e) => {
- eprintln!("Error opening {filename}: {e}");
- return;
- }
- };
- let mut file = BufReader::new(file);
- if let Err(e) = linker.process_object(filename, &mut file) {
- eprintln!("Error processing object file {filename}: {e}");
- return;
- }
+ for input in inputs.iter() {
+ linker.add_input(input)?;
}
+
+ linker.link_to_file(&args.output, &args.entry)
+}
- use std::os::unix::fs::OpenOptionsExt;
- let mut out_options = fs::OpenOptions::new();
- out_options
- .write(true)
- .create(true)
- .truncate(true)
- .mode(0o755);
-
- let mut output = match out_options.open("a.out") {
- Ok(out) => BufWriter::new(out),
- Err(e) => {
- eprintln!("Error opening output file: {e}");
- return;
- }
- };
-
- if let Err(e) = linker.link(&mut output) {
- eprintln!("Error linking: {e}");
+fn main() {
+ if let Err(e) = main_() {
+ eprintln!("{e}");
}
}