docs done for now

author: pommicket <pommicket@gmail.com> 2022-11-06 21:52:27 -0500
committer: pommicket <pommicket@gmail.com> 2022-11-06 21:52:27 -0500
commit: dab85a8d1e9b99cbef225b8f5cc7fc001405828d (patch)
tree: 5075a75a7d18f44e7ee0c00b0d5441a13d225988 /src
parent: 6b7d46d33cdb9c99852f2d4378243ab08ba876ba (diff)
4 files changed, 50 insertions, 26 deletions
diff --git a/src/elf.rs b/src/elf.rs
index 4fe2759..2a51082 100644
--- a/src/elf.rs
+++ b/src/elf.rs
@@ -419,32 +419,45 @@ impl RelType {
 
 pub struct Relocation {
 	pub r#type: RelType,
-	pub entry_offset: u64, // file offset of relocation metadata (for debugging)
-	pub offset: u64, // where the relocation should be applied. for ET_REL, this is a file offset; otherwise, it's an address.
+	/// file offset of relocation metadata (for debugging)
+	pub entry_offset: u64, 
+	/// where the relocation should be applied. for [ET_REL], this is a file offset; otherwise, it's an address.
+	pub offset: u64,
+	/// symbol which should be inserted at the offset.
 	pub symbol: Symbol,
+	/// to be added to the symbol's value
 	pub addend: i64,
 }
 
+/// There are multiple formats of ELF file (32-bit/64-bit, little/big-endian),
+/// so we can make types which read those formats derive from this trait.
 pub trait Reader
 where
 	Self: Sized,
 {
-	fn new<T: BufRead + Seek>(reader: T) -> Result<Self>;
+	fn new(reader: impl BufRead + Seek) -> Result<Self>;
 	fn r#type(&self) -> Type;
 	fn machine(&self) -> Machine;
 	fn entry(&self) -> u64;
 	fn symbols(&self) -> &[Symbol];
 	fn relocations(&self) -> &[Relocation];
 	fn symbol_name(&self, sym: &Symbol) -> Result<String>;
+	/// type of section with index `idx`
 	fn section_type(&self, idx: u16) -> Option<SectionType>;
+	/// read data from the section with index `idx` at offset `offset`.
 	fn read_section_data_exact(&self, idx: u16, offset: u64, data: &mut [u8]) -> Result<()>;
 }
 
+/// reader for 32-bit little-endian ELF files.
 pub struct Reader32LE {
 	ehdr: Ehdr32,
 	shdrs: Vec<Shdr32>,
 	symbols: Vec<Symbol>,
+	/// index of .strtab section
 	strtab_idx: Option<u16>,
+	/// All data of all sections.
+	/// We put it all in memory.
+	/// Object files usually aren't huge or anything.
 	section_data: Vec<Vec<u8>>,
 	relocations: Vec<Relocation>,
 }
@@ -461,7 +474,7 @@ impl Reader32LE {
 }
 
 impl Reader for Reader32LE {
-	fn new<T: BufRead + Seek>(mut reader: T) -> Result<Self> {
+	fn new(mut reader: impl BufRead + Seek) -> Result<Self> {
 		use Error::*;
 
 		let mut hdr_buf = [0; 0x34];
@@ -479,6 +492,7 @@ impl Reader for Reader32LE {
 		}
 
 		let mut shdrs = Vec::with_capacity(ehdr.shnum.into());
+		// read section headers
 		for i in 0..ehdr.shnum {
 			let offset = u64::from(ehdr.shoff) + u64::from(ehdr.shentsize) * u64::from(i);
 			reader.seek(io::SeekFrom::Start(offset))?;
@@ -487,7 +501,9 @@ impl Reader for Reader32LE {
 			shdrs.push(Shdr32::from_bytes(shdr_buf));
 		}
 
+		// symtabs[i] = symbol table in section #i , or vec![] if section #i isn't a symbol table.
 		let mut symtabs = Vec::with_capacity(ehdr.shnum.into());
+		// all the symbols
 		let mut symbols = vec![];
 		let mut section_data = Vec::with_capacity(ehdr.shnum.into());
 		let mut strtab_idx = None;
@@ -530,7 +546,7 @@ impl Reader for Reader32LE {
 						SHN_UNDEF => SymbolValue::Undefined,
 						SHN_ABS => SymbolValue::Absolute(sym.value.into()),
 						idx if idx < ehdr.shnum => {
-							if r#type == SymbolType::Section {
+							if r#type == SymbolType::Section && size == 0 {
 								// section symbols have a size of 0, it seems.
 								// i don't know why they don't just use the size of the section.
 								// i'm replacing it here. it makes the code easier to write.
diff --git a/src/linker.rs b/src/linker.rs
index 8aaff33..f8f5760 100644
--- a/src/linker.rs
+++ b/src/linker.rs
@@ -1,8 +1,9 @@
 /*!
 Linker producing small executables.
 Smallness is the *only* goal.
-This linker makes "bad" executables in many ways.
-You shouldn't use it unless all you want is a tiny little executable file.
+This linker makes "bad" executables in many ways. For example,
+all initialized data will be executable. All code will be writable.
+You shouldn't use this unless all you want is a tiny little executable file.
 
 Currently, only 32-bit ELF is supported.
 If you are using C, you will need `gcc-multilib` for the 32-bit headers.
@@ -23,14 +24,13 @@ As such, the resulting executable will be difficult to debug and *C++ exceptions
 may not work*. 
 */
 
-use crate::{elf, util};
+use crate::elf;
 use io::{BufRead, Seek, Write};
 use std::collections::{BTreeMap, HashMap};
 use std::{fmt, fs, io, mem, path};
 
 use elf::Reader as ELFReader;
 use elf::ToBytes;
-use util::u32_from_le_slice;
 
 pub enum LinkError {
 	IO(io::Error),
@@ -38,7 +38,7 @@ pub enum LinkError {
 	TooLarge,
 	/// entry point not found
 	NoEntry(String),
-	/// entry point was declared, and (probably) used, but not defined
+	/// entry point was declared, but not defined
 	EntryNotDefined(String),
 }
 
@@ -71,10 +71,10 @@ impl From<&LinkError> for String {
 pub enum LinkWarning {
 	/// unsupported relocation type
 	RelUnsupported(u8),
-	/// relocation is too large to fit inside its owner
+	/// relocation is too large to fit inside its symbol
 	RelOOB(String, u64),
-	/// relocation is in a BSS section or some shit
-	RelNoData(String, u64),
+	/// relocation does not take place in a symbol's data
+	RelNoSym(String, u64),
 }
 
 impl fmt::Display for LinkWarning {
@@ -82,7 +82,7 @@ impl fmt::Display for LinkWarning {
 		use LinkWarning::*;
 		match self {
 			RelOOB(text, offset) => write!(f, "relocation applied to {text}+0x{offset:x}, which goes outside of the symbol (it will be ignored)."),
-			RelNoData(source, offset) => write!(
+			RelNoSym(source, offset) => write!(
 				f,
 				"relocation {source}+0x{offset:x} not in a data/text section. it will be ignored."
 			),
@@ -131,7 +131,8 @@ impl fmt::Display for ObjectError {
 
 type SymbolNameType = u32;
 /// To be more efficient™, we use integers to keep track of symbol names.
-/// A SymbolName doesn't need to refer to a symbol which has been defined.
+///
+/// A `SymbolName` doesn't need to refer to a symbol which has been defined.
 #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
 struct SymbolName(SymbolNameType);
 /// Keeps track of string-[SymbolName] conversion.
@@ -182,14 +183,18 @@ impl SourceId {
 }
 
 type SymbolIdType = u32;
-//// A symbol ID refers to a symbol *which has a definition*, unlike [SymbolName].
+
+/// A symbol ID refers to a specific *definition* of a symbol.
+///
+/// There might be multiple `SymbolId`s corresponding to a single [SymbolName],
+/// since local symbols with the same name can be defined in separate object files.
 #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
 struct SymbolId(SymbolIdType);
 
 /// Value of a symbol.
 #[derive(Debug)]
 enum SymbolValue {
-	/// We make one big BSS section, this is an offset into it.
+	/// offset into BSS section
 	Bss(u64),
 	/// Data associated with this symbol (machine code for functions,
 	/// bytes making up string literals, etc.)
@@ -317,8 +322,9 @@ pub struct Linker<'a> {
 	warn: Box<dyn Fn(LinkWarning) + 'a>,
 }
 
-/// maps between offsets in an object file and symbols defined in that file.
-/// (Note: it is specific to a single object file, and only kept around temporarily
+/// Maps between offsets in an object file and symbols defined in that file.
+///
+/// (Note: this is specific to a single object file, and only kept around temporarily
 /// during a call to [Linker::add_object].)
 /// This is used to figure out where relocations are taking place.
 struct SymbolOffsetMap {
@@ -353,6 +359,7 @@ impl SymbolOffsetMap {
 }
 
 /// Graph of which symbols depend on which symbols.
+///
 /// This is needed so we don't emit anything for unused symbols.
 struct SymbolGraph {
 	graph: Vec<Vec<SymbolId>>,
@@ -809,7 +816,7 @@ impl<'a> Linker<'a> {
 					addend: rel.addend,
 				});
 			} else {
-				self.emit_warning(LinkWarning::RelNoData(
+				self.emit_warning(LinkWarning::RelNoSym(
 					self.source_name(source_id).into(),
 					rel.entry_offset,
 				));
@@ -905,6 +912,11 @@ impl<'a> Linker<'a> {
 		// guarantee failure if apply_offset can't be converted to usize.
 		let apply_start = apply_offset.try_into().unwrap_or(usize::MAX - 1000);
 
+		fn u32_from_le_slice(data: &[u8]) -> u32 {
+			u32::from_le_bytes([data[0], data[1], data[2], data[3]])
+		}
+		
+
 		match apply_symbol_info.value {
 			Data(_) => {
 				let mut in_bounds = true;
@@ -927,7 +939,7 @@ impl<'a> Linker<'a> {
 				}
 			}
 			_ => {
-				self.emit_warning(LinkWarning::RelNoData(
+				self.emit_warning(LinkWarning::RelNoSym(
 					self.source_name(rel.source_id).into(),
 					apply_offset,
 				));
diff --git a/src/main.rs b/src/main.rs
index ec66464..e25b837 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,8 +2,8 @@
 @TODO:
 - bounds check on bss
 - make bss optional
-- finish docs
 - disable "warning: relocation XXX not in a data/text section" for .rel.eh_frame
+    - these warnings are being generated in two places. do they need to be?
 - make sure --no-stdlib generates a tiny executable
 - make executables more tiny (overlap sections, etc.)
 - static libraries
@@ -18,7 +18,6 @@ compile_error! {"WHY do you have a big endian machine???? it's the 21st century,
 
 mod elf;
 pub mod linker;
-mod util;
 
 #[derive(Parser, Debug)]
 struct Args {
diff --git a/src/util.rs b/src/util.rs
deleted file mode 100644
index d8ab9c8..0000000
--- a/src/util.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-pub fn u32_from_le_slice(data: &[u8]) -> u32 {
-	u32::from_le_bytes([data[0], data[1], data[2], data[3]])
-}
author	pommicket <pommicket@gmail.com>	2022-11-06 21:52:27 -0500
committer	pommicket <pommicket@gmail.com>	2022-11-06 21:52:27 -0500
commit	dab85a8d1e9b99cbef225b8f5cc7fc001405828d (patch)
tree	5075a75a7d18f44e7ee0c00b0d5441a13d225988 /src
parent	6b7d46d33cdb9c99852f2d4378243ab08ba876ba (diff)