From 7b14e052884a1cebf77045d66a686cadfe4d38f6 Mon Sep 17 00:00:00 2001 From: pommicket Date: Wed, 9 Nov 2022 11:45:22 -0500 Subject: static libraries also fixed bug where MultipleDeclarations were getting emitted erroneously --- .gitignore | 1 + src/ar.rs | 198 ++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 +- src/linker.rs | 89 +++++++++++++++---- src/main.rs | 15 ++-- tests/static-lib-test.c | 14 +++ tests/static-lib.h | 3 + tests/static-lib1.c | 7 ++ tests/static-lib2-long-name.c | 8 ++ tests/tests.rs | 47 ++++++++++ 10 files changed, 358 insertions(+), 27 deletions(-) create mode 100644 src/ar.rs create mode 100644 tests/static-lib-test.c create mode 100644 tests/static-lib.h create mode 100644 tests/static-lib1.c create mode 100644 tests/static-lib2-long-name.c diff --git a/.gitignore b/.gitignore index 437eab9..7388046 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ /target *.o *.so +*.a tags TAGS *.out diff --git a/src/ar.rs b/src/ar.rs new file mode 100644 index 0000000..6b7b47d --- /dev/null +++ b/src/ar.rs @@ -0,0 +1,198 @@ +use io::{BufRead, Seek, SeekFrom}; +/// reads .a files +use std::{fmt, io, mem}; + +#[derive(Debug)] +pub enum Error { + IO(io::Error), + NotAnArchive, + BadNumber, + BadUtf8, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use Error::*; + match self { + IO(i) if i.kind() == io::ErrorKind::UnexpectedEof => write!(f, "unexpected EOF"), + IO(e) => write!(f, "IO error: {e}"), + NotAnArchive => write!(f, "Not an archive file."), + BadNumber => write!(f, "Bad number in archive file (file corrupt?)"), + BadUtf8 => write!(f, "Bad UTF-8 in file name."), + } + } +} + +impl From for String { + fn from(e: Error) -> Self { + format!("{e}") + } +} + +impl From for Error { + fn from(e: io::Error) -> Self { + Self::IO(e) + } +} + +type Result = std::result::Result; + +struct FileMetadata { + name: String, + offset: u64, + size: u64, +} + +pub struct Archive { + archive: T, + files: Vec, +} + +impl Archive { + pub fn new(mut archive: T) -> Result { + use Error::*; + + fn parse_decimal(decimal: &[u8]) -> Result { + let s = std::str::from_utf8(decimal).map_err(|_| Error::BadNumber)?; + s.trim_end().parse().map_err(|_| Error::BadNumber) + } + + fn parse_name(bytes: &[u8]) -> Result<&str> { + let s = std::str::from_utf8(bytes).map_err(|_| Error::BadUtf8)?; + Ok(&s[..=s.rfind(|c| c != ' ').unwrap_or(0)]) + } + + let mut signature = [0; 8]; + archive.read_exact(&mut signature)?; + if &signature != b"!\n" { + return Err(NotAnArchive); + } + + #[repr(C)] + #[derive(Debug)] + struct RawMetadata { + name: [u8; 16], + _timestamp: [u8; 12], + _owner_id: [u8; 6], + _group_id: [u8; 6], + _mode: [u8; 8], + size: [u8; 10], + _end_char: [u8; 2], + } + + struct Metadata { + name: [u8; 16], + offset: u64, + size: u64, + } + + let mut metadata = vec![]; + + loop { + let mut buf = [0; mem::size_of::()]; + let size = archive.read(&mut buf)?; + if size < buf.len() { + break; + } + let raw: RawMetadata = unsafe { mem::transmute(buf) }; + let parsed = Metadata { + name: raw.name, + offset: archive.stream_position()?, + size: parse_decimal(&raw.size)?, + }; + // this can't panic, since size is 10 digits max. + let size: i64 = parsed.size.try_into().unwrap(); + let offset = archive.seek(SeekFrom::Current(size))?; + if offset % 2 == 1 { + // metadata is aligned to 2 bytes + archive.seek(SeekFrom::Current(1))?; + } + metadata.push(parsed); + } + + let mut long_filenames; + + // see https://github.com/rust-lang/rust-clippy/issues/9274 + #[allow(clippy::read_zero_byte_vec)] + { + long_filenames = vec![]; + + // in GNU archives, long filenames are stored in the "//" file. + for f in metadata.iter() { + if parse_name(&f.name)? == "//" { + // we found it! + archive.seek(SeekFrom::Start(f.offset))?; + long_filenames = vec![0; f.size as usize]; + archive.read_exact(&mut long_filenames)?; + break; + } + } + } + + let mut files = vec![]; + for f in metadata.iter() { + let name = parse_name(&f.name)?; + if name == "/" || name == "//" { + continue; + } + let slice = if let Some('/') = name.chars().next() { + // a long filename + let offset_str = name[1..].trim_end(); + let offset: usize = offset_str.parse().map_err(|_| BadNumber)?; + let len = long_filenames[offset..] + .iter() + .position(|&x| x == b'/') + .unwrap_or(0); + let bytes = &long_filenames[offset..offset + len]; + std::str::from_utf8(bytes).map_err(|_| BadUtf8)? + } else if let Some('/') = name.chars().last() { + // filename is ended with / in GNU archives + &name[..name.len() - 1] + } else { + name + }; + let filename = String::from(slice); + files.push(FileMetadata { + name: filename, + offset: f.offset, + size: f.size, + }); + } + + Ok(Self { archive, files }) + } + + /// Get number of files in archive. + pub fn file_count(&self) -> usize { + self.files.len() + } + + /// Get name of file. + pub fn file_name(&self, index: usize) -> &str { + &self.files[index].name + } + + /// Get all file data into memory. + /// + /// I tried making a "sub-file" type but it was a mess. + pub fn file_data(&mut self, index: usize) -> Result> { + self.archive + .seek(SeekFrom::Start(self.files[index].offset))?; + let mut data = vec![0; self.files[index].size as usize]; + self.archive.read_exact(&mut data)?; + Ok(data) + } +} + +/// example usage. prints out the contents of the archive file. (panics on error.) +pub fn _list(path: &str) { + let f = std::fs::File::open(path).unwrap(); + let mut ar = Archive::new(io::BufReader::new(f)).unwrap(); + for i in 0..ar.file_count() { + use io::Write; + println!("\x1b[1m---{}---\x1b[0m", ar.file_name(i)); + let bytes = ar.file_data(i).unwrap(); + std::io::stdout().write_all(&bytes).unwrap(); + println!("\n"); + } +} diff --git a/src/lib.rs b/src/lib.rs index e11d058..74fba84 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,3 @@ -mod elf; +pub mod ar; +pub mod elf; pub mod linker; diff --git a/src/linker.rs b/src/linker.rs index c82d1d3..2ec1dac 100644 --- a/src/linker.rs +++ b/src/linker.rs @@ -27,8 +27,8 @@ Notes about using C/C++: Otherwise you will get a segfault/illegal instruction/etc: ```c (extern "C") void entry() { - ... - exit(0); + ... + exit(0); } ``` - You will need `gcc-multilib` for the 32-bit headers. @@ -38,10 +38,10 @@ Notes about using C++: - I recommend you do something like this: ```c extern "C" void entry() { - exit(main()); + exit(main()); } int main() { - ... + ... } ``` This ensures that all destructors are called for local objects in main. @@ -65,14 +65,16 @@ Notes on executable size: it is used. It (thankfully) doesn't seem to be worth it to use `dlsym`. */ -use crate::elf; +use crate::{ar, elf}; use io::{BufRead, Seek, Write}; use std::collections::{BTreeMap, HashMap}; use std::{fmt, fs, io, mem, path}; +use ar::Archive; use elf::Reader as ELFReader; use elf::ToBytes; +#[derive(Debug)] pub enum LinkError { IO(io::Error), /// executable is too large (>4GB on 32-bit platforms) @@ -124,6 +126,7 @@ impl From<&LinkError> for String { } } +#[derive(Debug)] pub enum LinkWarning { /// unsupported relocation type RelUnsupported(u8), @@ -154,10 +157,13 @@ impl From<&LinkWarning> for String { } /// error produced by [Linker::add_object] +#[derive(Debug)] pub enum ObjectError { IO(io::Error), /// ELF format error Elf(elf::Error), + /// Static library (.a) format error + Archive(ar::Error), /// wrong type of ELF file BadType, /// compile command failed @@ -178,6 +184,12 @@ impl From for ObjectError { } } +impl From for ObjectError { + fn from(e: ar::Error) -> Self { + Self::Archive(e) + } +} + impl From<&ObjectError> for String { fn from(e: &ObjectError) -> String { format!("{e}") @@ -190,6 +202,7 @@ impl fmt::Display for ObjectError { match self { IO(e) => write!(f, "{e}"), Elf(e) => write!(f, "{e}"), + Archive(e) => write!(f, "{e}"), BadType => write!(f, "wrong type of ELF file (not an object file)"), CommandFailed(status) => write!(f, "command failed: {status}"), } @@ -890,11 +903,11 @@ impl LinkerOutput { }; out.write_all(&phdr_dynamic.to_bytes())?; } - + out.seek(io::SeekFrom::End(0))?; Ok(LinkInfo { data_size: self.data.len() as u64, - exec_size: out.stream_position()? + exec_size: out.stream_position()?, }) } } @@ -980,14 +993,10 @@ impl<'a> Linker<'a> { if name == "_GLOBAL_OFFSET_TABLE_" { self.emit_warning(LinkWarning::MaybePic(self.source_name(source).into())); } - + let name_id = self.symbol_names.add(name); let size = symbol.size; - if self.symbols.get_id_from_name(source, name_id).is_some() { - self.emit_warning(LinkWarning::MultipleDefinitions(elf.symbol_name(symbol)?)); - } - let value = match symbol.value { elf::SymbolValue::Undefined => None, elf::SymbolValue::Absolute(n) => Some(SymbolValue::Absolute(n)), @@ -1012,6 +1021,10 @@ impl<'a> Linker<'a> { }; if let Some(value) = value { + if self.symbols.get_id_from_name(source, name_id).is_some() { + self.emit_warning(LinkWarning::MultipleDefinitions(elf.symbol_name(symbol)?)); + } + let info = SymbolInfo { value }; match symbol.bind { elf::SymbolBind::Local => self.symbols.add_local(source, name_id, info), @@ -1023,9 +1036,10 @@ impl<'a> Linker<'a> { Ok(()) } - /// add an object file (.o). - /// name doesn't need to correspond to the actual file name. - /// it only exists for debugging purposes. + /// Add an object file (.o). + /// + /// `name` doesn't need to correspond to the actual file name. + /// It only exists for debugging purposes. pub fn add_object(&mut self, name: &str, reader: impl BufRead + Seek) -> ObjectResult<()> { use ObjectError::*; @@ -1071,6 +1085,38 @@ impl<'a> Linker<'a> { self.add_object(&path.to_string_lossy(), &mut file) } + /// Add a static library (.a) + /// + /// `name` doesn't need to correspond to the actual file name. + /// It only exists for debugging purposes. + pub fn add_static_library( + &mut self, + name: &str, + reader: impl BufRead + Seek, + ) -> ObjectResult<()> { + let mut archive = Archive::new(reader)?; + for i in 0..archive.file_count() { + let mut objname = String::from(name); + objname.push('('); + objname += archive.file_name(i); + objname.push(')'); + let bytes = archive.file_data(i)?; + let reader = io::Cursor::new(&bytes[..]); + self.add_object(&objname, reader)?; + } + Ok(()) + } + + pub fn add_static_library_from_file( + &mut self, + path: impl AsRef, + ) -> ObjectResult<()> { + let path = path.as_ref(); + let file = fs::File::open(path)?; + let mut file = io::BufReader::new(file); + self.add_static_library(&path.to_string_lossy(), &mut file) + } + /// Add a dynamic library (.so). `name` can be a full path or /// something like "libc.so.6" --- any string you would pass to `dlopen`. pub fn add_dynamic_library(&mut self, name: &str) -> ObjectResult<()> { @@ -1115,6 +1161,7 @@ impl<'a> Linker<'a> { enum FileType { Object, DynamicLibrary, + StaticLibrary, C, CPlusPlus, Other, @@ -1129,6 +1176,9 @@ impl<'a> Linker<'a> { if input.ends_with(".c") { return C; } + if input.ends_with(".a") { + return StaticLibrary; + } if input.ends_with(".cpp") || input.ends_with(".cc") || input.ends_with(".cxx") @@ -1159,6 +1209,9 @@ impl<'a> Linker<'a> { DynamicLibrary => self .add_dynamic_library(input) .map_err(|e| format!("Failed to process library file {input}: {e}")), + StaticLibrary => self + .add_static_library_from_file(input) + .map_err(|e| format!("Failed to process static library {input}: {e}")), Other => Err(format!("Unrecognized file type: {input}")), } } @@ -1349,7 +1402,11 @@ impl<'a> Linker<'a> { /// Instead, define `void
(void)`, and make sure you call `exit`, /// or do an exit system interrupt at the end of the function --- if you just return, /// you'll get a segmentation fault. - pub fn link_to_file(&self, path: impl AsRef, entry: &str) -> Result { + pub fn link_to_file( + &self, + path: impl AsRef, + entry: &str, + ) -> Result { let path = path.as_ref(); let mut out_options = fs::OpenOptions::new(); out_options.write(true).create(true).truncate(true); diff --git a/src/main.rs b/src/main.rs index 9ff86d9..0142847 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,8 @@ -/* -@TODO: -- static libraries -*/ - extern crate clap; -use std::io; -use io::Write; use clap::Parser; +use io::Write; +use std::io; #[cfg(target_endian = "big")] compile_error! {"WHY do you have a big endian machine???? it's the 21st century, buddy. this program won't work fuck you"} @@ -110,16 +105,16 @@ fn main_() -> Result<(), String> { if args.verbose { print!("linking {}... ", args.output); } - + io::stdout().flush().unwrap_or(()); let info = linker.link_to_file(&args.output, &args.entry)?; - + if args.verbose { println!("\x1b[92msuccess!\x1b[0m"); println!("data size: {:7} bytes", info.data_size); println!("executable size:{:7} bytes", info.exec_size); } - + Ok(()) } diff --git a/tests/static-lib-test.c b/tests/static-lib-test.c new file mode 100644 index 0000000..2b4d1dd --- /dev/null +++ b/tests/static-lib-test.c @@ -0,0 +1,14 @@ +#include "static-lib.h" + +#include +#include + +int main(void) { + printf("%d\n",f()); + printf("%d\n",f()); + return 0; +} + +void entry(void) { + exit(main()); +} diff --git a/tests/static-lib.h b/tests/static-lib.h new file mode 100644 index 0000000..c848962 --- /dev/null +++ b/tests/static-lib.h @@ -0,0 +1,3 @@ +extern int p; +int f(); +int g(); diff --git a/tests/static-lib1.c b/tests/static-lib1.c new file mode 100644 index 0000000..56a5c00 --- /dev/null +++ b/tests/static-lib1.c @@ -0,0 +1,7 @@ +#include "static-lib.h" + +int p; + +int f() { + return 17 + g(); +} diff --git a/tests/static-lib2-long-name.c b/tests/static-lib2-long-name.c new file mode 100644 index 0000000..13b9854 --- /dev/null +++ b/tests/static-lib2-long-name.c @@ -0,0 +1,8 @@ +#include "static-lib.h" +#include + +int g() { + ++p; + printf("call %d\n", p); + return p; +} diff --git a/tests/tests.rs b/tests/tests.rs index 14f90bc..24e1881 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -91,6 +91,53 @@ mod tests { assert_eq!(output.stdout, b"7\n8\n"); } + #[test] + fn static_lib_c() { + // compile .o files + let status = Command::new("gcc") + .args(&[ + "-m32", + "-fno-pic", + "-c", + &file("static-lib1.c"), + "-o", + &file("static-lib1.o"), + ]) + .status() + .unwrap(); + assert!(status.success()); + let status = Command::new("gcc") + .args(&[ + "-m32", + "-fno-pic", + "-c", + &file("static-lib2-long-name.c"), + "-o", + &file("static-lib2-long-name.o"), + ]) + .status() + .unwrap(); + assert!(status.success()); + // make .a file + let status = Command::new("ar") + .args(&[ + "rc", + &file("static-lib.a"), + &file("static-lib1.o"), + &file("static-lib2-long-name.o"), + ]) + .status() + .unwrap(); + assert!(status.success()); + let mut linker = test_linker(); + add(&mut linker, "static-lib.a", true); + add(&mut linker, "static-lib-test.c", true); + add(&mut linker, "libc.so.6", false); + link(&linker, "static-lib-test.out", "entry"); + let output = run("static-lib-test.out"); + assert_eq!(output.stdout, b"call 1\n18\ncall 2\n19\n"); + } + #[test] fn cpp() { let mut linker = test_linker(); -- cgit v1.2.3