summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpommicket <pommicket@gmail.com>2022-11-09 11:45:22 -0500
committerpommicket <pommicket@gmail.com>2022-11-09 11:45:22 -0500
commit7b14e052884a1cebf77045d66a686cadfe4d38f6 (patch)
treea158e2f214b77c357b5d0f7a0261d6f7a8c03fc1
parent0049dc9cf781c65f5a633f51f0e705274853060c (diff)
static libraries
also fixed bug where MultipleDeclarations were getting emitted erroneously
-rw-r--r--.gitignore1
-rw-r--r--src/ar.rs198
-rw-r--r--src/lib.rs3
-rw-r--r--src/linker.rs89
-rw-r--r--src/main.rs15
-rw-r--r--tests/static-lib-test.c14
-rw-r--r--tests/static-lib.h3
-rw-r--r--tests/static-lib1.c7
-rw-r--r--tests/static-lib2-long-name.c8
-rw-r--r--tests/tests.rs47
10 files changed, 358 insertions, 27 deletions
diff --git a/.gitignore b/.gitignore
index 437eab9..7388046 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
/target
*.o
*.so
+*.a
tags
TAGS
*.out
diff --git a/src/ar.rs b/src/ar.rs
new file mode 100644
index 0000000..6b7b47d
--- /dev/null
+++ b/src/ar.rs
@@ -0,0 +1,198 @@
+use io::{BufRead, Seek, SeekFrom};
+/// reads .a files
+use std::{fmt, io, mem};
+
+#[derive(Debug)]
+pub enum Error {
+ IO(io::Error),
+ NotAnArchive,
+ BadNumber,
+ BadUtf8,
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ use Error::*;
+ match self {
+ IO(i) if i.kind() == io::ErrorKind::UnexpectedEof => write!(f, "unexpected EOF"),
+ IO(e) => write!(f, "IO error: {e}"),
+ NotAnArchive => write!(f, "Not an archive file."),
+ BadNumber => write!(f, "Bad number in archive file (file corrupt?)"),
+ BadUtf8 => write!(f, "Bad UTF-8 in file name."),
+ }
+ }
+}
+
+impl From<Error> for String {
+ fn from(e: Error) -> Self {
+ format!("{e}")
+ }
+}
+
+impl From<io::Error> for Error {
+ fn from(e: io::Error) -> Self {
+ Self::IO(e)
+ }
+}
+
+type Result<T> = std::result::Result<T, Error>;
+
+struct FileMetadata {
+ name: String,
+ offset: u64,
+ size: u64,
+}
+
+pub struct Archive<T: BufRead + Seek> {
+ archive: T,
+ files: Vec<FileMetadata>,
+}
+
+impl<T: BufRead + Seek> Archive<T> {
+ pub fn new(mut archive: T) -> Result<Self> {
+ use Error::*;
+
+ fn parse_decimal(decimal: &[u8]) -> Result<u64> {
+ let s = std::str::from_utf8(decimal).map_err(|_| Error::BadNumber)?;
+ s.trim_end().parse().map_err(|_| Error::BadNumber)
+ }
+
+ fn parse_name(bytes: &[u8]) -> Result<&str> {
+ let s = std::str::from_utf8(bytes).map_err(|_| Error::BadUtf8)?;
+ Ok(&s[..=s.rfind(|c| c != ' ').unwrap_or(0)])
+ }
+
+ let mut signature = [0; 8];
+ archive.read_exact(&mut signature)?;
+ if &signature != b"!<arch>\n" {
+ return Err(NotAnArchive);
+ }
+
+ #[repr(C)]
+ #[derive(Debug)]
+ struct RawMetadata {
+ name: [u8; 16],
+ _timestamp: [u8; 12],
+ _owner_id: [u8; 6],
+ _group_id: [u8; 6],
+ _mode: [u8; 8],
+ size: [u8; 10],
+ _end_char: [u8; 2],
+ }
+
+ struct Metadata {
+ name: [u8; 16],
+ offset: u64,
+ size: u64,
+ }
+
+ let mut metadata = vec![];
+
+ loop {
+ let mut buf = [0; mem::size_of::<RawMetadata>()];
+ let size = archive.read(&mut buf)?;
+ if size < buf.len() {
+ break;
+ }
+ let raw: RawMetadata = unsafe { mem::transmute(buf) };
+ let parsed = Metadata {
+ name: raw.name,
+ offset: archive.stream_position()?,
+ size: parse_decimal(&raw.size)?,
+ };
+ // this can't panic, since size is 10 digits max.
+ let size: i64 = parsed.size.try_into().unwrap();
+ let offset = archive.seek(SeekFrom::Current(size))?;
+ if offset % 2 == 1 {
+ // metadata is aligned to 2 bytes
+ archive.seek(SeekFrom::Current(1))?;
+ }
+ metadata.push(parsed);
+ }
+
+ let mut long_filenames;
+
+ // see https://github.com/rust-lang/rust-clippy/issues/9274
+ #[allow(clippy::read_zero_byte_vec)]
+ {
+ long_filenames = vec![];
+
+ // in GNU archives, long filenames are stored in the "//" file.
+ for f in metadata.iter() {
+ if parse_name(&f.name)? == "//" {
+ // we found it!
+ archive.seek(SeekFrom::Start(f.offset))?;
+ long_filenames = vec![0; f.size as usize];
+ archive.read_exact(&mut long_filenames)?;
+ break;
+ }
+ }
+ }
+
+ let mut files = vec![];
+ for f in metadata.iter() {
+ let name = parse_name(&f.name)?;
+ if name == "/" || name == "//" {
+ continue;
+ }
+ let slice = if let Some('/') = name.chars().next() {
+ // a long filename
+ let offset_str = name[1..].trim_end();
+ let offset: usize = offset_str.parse().map_err(|_| BadNumber)?;
+ let len = long_filenames[offset..]
+ .iter()
+ .position(|&x| x == b'/')
+ .unwrap_or(0);
+ let bytes = &long_filenames[offset..offset + len];
+ std::str::from_utf8(bytes).map_err(|_| BadUtf8)?
+ } else if let Some('/') = name.chars().last() {
+ // filename is ended with / in GNU archives
+ &name[..name.len() - 1]
+ } else {
+ name
+ };
+ let filename = String::from(slice);
+ files.push(FileMetadata {
+ name: filename,
+ offset: f.offset,
+ size: f.size,
+ });
+ }
+
+ Ok(Self { archive, files })
+ }
+
+ /// Get number of files in archive.
+ pub fn file_count(&self) -> usize {
+ self.files.len()
+ }
+
+ /// Get name of file.
+ pub fn file_name(&self, index: usize) -> &str {
+ &self.files[index].name
+ }
+
+ /// Get all file data into memory.
+ ///
+ /// I tried making a "sub-file" type but it was a mess.
+ pub fn file_data(&mut self, index: usize) -> Result<Vec<u8>> {
+ self.archive
+ .seek(SeekFrom::Start(self.files[index].offset))?;
+ let mut data = vec![0; self.files[index].size as usize];
+ self.archive.read_exact(&mut data)?;
+ Ok(data)
+ }
+}
+
+/// example usage. prints out the contents of the archive file. (panics on error.)
+pub fn _list(path: &str) {
+ let f = std::fs::File::open(path).unwrap();
+ let mut ar = Archive::new(io::BufReader::new(f)).unwrap();
+ for i in 0..ar.file_count() {
+ use io::Write;
+ println!("\x1b[1m---{}---\x1b[0m", ar.file_name(i));
+ let bytes = ar.file_data(i).unwrap();
+ std::io::stdout().write_all(&bytes).unwrap();
+ println!("\n");
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index e11d058..74fba84 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,2 +1,3 @@
-mod elf;
+pub mod ar;
+pub mod elf;
pub mod linker;
diff --git a/src/linker.rs b/src/linker.rs
index c82d1d3..2ec1dac 100644
--- a/src/linker.rs
+++ b/src/linker.rs
@@ -27,8 +27,8 @@ Notes about using C/C++:
Otherwise you will get a segfault/illegal instruction/etc:
```c
(extern "C") void entry() {
- ...
- exit(0);
+ ...
+ exit(0);
}
```
- You will need `gcc-multilib` for the 32-bit headers.
@@ -38,10 +38,10 @@ Notes about using C++:
- I recommend you do something like this:
```c
extern "C" void entry() {
- exit(main());
+ exit(main());
}
int main() {
- ...
+ ...
}
```
This ensures that all destructors are called for local objects in main.
@@ -65,14 +65,16 @@ Notes on executable size:
it is used. It (thankfully) doesn't seem to be worth it to use `dlsym`.
*/
-use crate::elf;
+use crate::{ar, elf};
use io::{BufRead, Seek, Write};
use std::collections::{BTreeMap, HashMap};
use std::{fmt, fs, io, mem, path};
+use ar::Archive;
use elf::Reader as ELFReader;
use elf::ToBytes;
+#[derive(Debug)]
pub enum LinkError {
IO(io::Error),
/// executable is too large (>4GB on 32-bit platforms)
@@ -124,6 +126,7 @@ impl From<&LinkError> for String {
}
}
+#[derive(Debug)]
pub enum LinkWarning {
/// unsupported relocation type
RelUnsupported(u8),
@@ -154,10 +157,13 @@ impl From<&LinkWarning> for String {
}
/// error produced by [Linker::add_object]
+#[derive(Debug)]
pub enum ObjectError {
IO(io::Error),
/// ELF format error
Elf(elf::Error),
+ /// Static library (.a) format error
+ Archive(ar::Error),
/// wrong type of ELF file
BadType,
/// compile command failed
@@ -178,6 +184,12 @@ impl From<elf::Error> for ObjectError {
}
}
+impl From<ar::Error> for ObjectError {
+ fn from(e: ar::Error) -> Self {
+ Self::Archive(e)
+ }
+}
+
impl From<&ObjectError> for String {
fn from(e: &ObjectError) -> String {
format!("{e}")
@@ -190,6 +202,7 @@ impl fmt::Display for ObjectError {
match self {
IO(e) => write!(f, "{e}"),
Elf(e) => write!(f, "{e}"),
+ Archive(e) => write!(f, "{e}"),
BadType => write!(f, "wrong type of ELF file (not an object file)"),
CommandFailed(status) => write!(f, "command failed: {status}"),
}
@@ -890,11 +903,11 @@ impl LinkerOutput {
};
out.write_all(&phdr_dynamic.to_bytes())?;
}
-
+
out.seek(io::SeekFrom::End(0))?;
Ok(LinkInfo {
data_size: self.data.len() as u64,
- exec_size: out.stream_position()?
+ exec_size: out.stream_position()?,
})
}
}
@@ -980,14 +993,10 @@ impl<'a> Linker<'a> {
if name == "_GLOBAL_OFFSET_TABLE_" {
self.emit_warning(LinkWarning::MaybePic(self.source_name(source).into()));
}
-
+
let name_id = self.symbol_names.add(name);
let size = symbol.size;
- if self.symbols.get_id_from_name(source, name_id).is_some() {
- self.emit_warning(LinkWarning::MultipleDefinitions(elf.symbol_name(symbol)?));
- }
-
let value = match symbol.value {
elf::SymbolValue::Undefined => None,
elf::SymbolValue::Absolute(n) => Some(SymbolValue::Absolute(n)),
@@ -1012,6 +1021,10 @@ impl<'a> Linker<'a> {
};
if let Some(value) = value {
+ if self.symbols.get_id_from_name(source, name_id).is_some() {
+ self.emit_warning(LinkWarning::MultipleDefinitions(elf.symbol_name(symbol)?));
+ }
+
let info = SymbolInfo { value };
match symbol.bind {
elf::SymbolBind::Local => self.symbols.add_local(source, name_id, info),
@@ -1023,9 +1036,10 @@ impl<'a> Linker<'a> {
Ok(())
}
- /// add an object file (.o).
- /// name doesn't need to correspond to the actual file name.
- /// it only exists for debugging purposes.
+ /// Add an object file (.o).
+ ///
+ /// `name` doesn't need to correspond to the actual file name.
+ /// It only exists for debugging purposes.
pub fn add_object(&mut self, name: &str, reader: impl BufRead + Seek) -> ObjectResult<()> {
use ObjectError::*;
@@ -1071,6 +1085,38 @@ impl<'a> Linker<'a> {
self.add_object(&path.to_string_lossy(), &mut file)
}
+ /// Add a static library (.a)
+ ///
+ /// `name` doesn't need to correspond to the actual file name.
+ /// It only exists for debugging purposes.
+ pub fn add_static_library(
+ &mut self,
+ name: &str,
+ reader: impl BufRead + Seek,
+ ) -> ObjectResult<()> {
+ let mut archive = Archive::new(reader)?;
+ for i in 0..archive.file_count() {
+ let mut objname = String::from(name);
+ objname.push('(');
+ objname += archive.file_name(i);
+ objname.push(')');
+ let bytes = archive.file_data(i)?;
+ let reader = io::Cursor::new(&bytes[..]);
+ self.add_object(&objname, reader)?;
+ }
+ Ok(())
+ }
+
+ pub fn add_static_library_from_file(
+ &mut self,
+ path: impl AsRef<path::Path>,
+ ) -> ObjectResult<()> {
+ let path = path.as_ref();
+ let file = fs::File::open(path)?;
+ let mut file = io::BufReader::new(file);
+ self.add_static_library(&path.to_string_lossy(), &mut file)
+ }
+
/// Add a dynamic library (.so). `name` can be a full path or
/// something like "libc.so.6" --- any string you would pass to `dlopen`.
pub fn add_dynamic_library(&mut self, name: &str) -> ObjectResult<()> {
@@ -1115,6 +1161,7 @@ impl<'a> Linker<'a> {
enum FileType {
Object,
DynamicLibrary,
+ StaticLibrary,
C,
CPlusPlus,
Other,
@@ -1129,6 +1176,9 @@ impl<'a> Linker<'a> {
if input.ends_with(".c") {
return C;
}
+ if input.ends_with(".a") {
+ return StaticLibrary;
+ }
if input.ends_with(".cpp")
|| input.ends_with(".cc")
|| input.ends_with(".cxx")
@@ -1159,6 +1209,9 @@ impl<'a> Linker<'a> {
DynamicLibrary => self
.add_dynamic_library(input)
.map_err(|e| format!("Failed to process library file {input}: {e}")),
+ StaticLibrary => self
+ .add_static_library_from_file(input)
+ .map_err(|e| format!("Failed to process static library {input}: {e}")),
Other => Err(format!("Unrecognized file type: {input}")),
}
}
@@ -1349,7 +1402,11 @@ impl<'a> Linker<'a> {
/// Instead, define `void <main/entry/something_else>(void)`, and make sure you call `exit`,
/// or do an exit system interrupt at the end of the function --- if you just return,
/// you'll get a segmentation fault.
- pub fn link_to_file(&self, path: impl AsRef<path::Path>, entry: &str) -> Result<LinkInfo, String> {
+ pub fn link_to_file(
+ &self,
+ path: impl AsRef<path::Path>,
+ entry: &str,
+ ) -> Result<LinkInfo, String> {
let path = path.as_ref();
let mut out_options = fs::OpenOptions::new();
out_options.write(true).create(true).truncate(true);
diff --git a/src/main.rs b/src/main.rs
index 9ff86d9..0142847 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,13 +1,8 @@
-/*
-@TODO:
-- static libraries
-*/
-
extern crate clap;
-use std::io;
-use io::Write;
use clap::Parser;
+use io::Write;
+use std::io;
#[cfg(target_endian = "big")]
compile_error! {"WHY do you have a big endian machine???? it's the 21st century, buddy. this program won't work fuck you"}
@@ -110,16 +105,16 @@ fn main_() -> Result<(), String> {
if args.verbose {
print!("linking {}... ", args.output);
}
-
+
io::stdout().flush().unwrap_or(());
let info = linker.link_to_file(&args.output, &args.entry)?;
-
+
if args.verbose {
println!("\x1b[92msuccess!\x1b[0m");
println!("data size: {:7} bytes", info.data_size);
println!("executable size:{:7} bytes", info.exec_size);
}
-
+
Ok(())
}
diff --git a/tests/static-lib-test.c b/tests/static-lib-test.c
new file mode 100644
index 0000000..2b4d1dd
--- /dev/null
+++ b/tests/static-lib-test.c
@@ -0,0 +1,14 @@
+#include "static-lib.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(void) {
+ printf("%d\n",f());
+ printf("%d\n",f());
+ return 0;
+}
+
+void entry(void) {
+ exit(main());
+}
diff --git a/tests/static-lib.h b/tests/static-lib.h
new file mode 100644
index 0000000..c848962
--- /dev/null
+++ b/tests/static-lib.h
@@ -0,0 +1,3 @@
+extern int p;
+int f();
+int g();
diff --git a/tests/static-lib1.c b/tests/static-lib1.c
new file mode 100644
index 0000000..56a5c00
--- /dev/null
+++ b/tests/static-lib1.c
@@ -0,0 +1,7 @@
+#include "static-lib.h"
+
+int p;
+
+int f() {
+ return 17 + g();
+}
diff --git a/tests/static-lib2-long-name.c b/tests/static-lib2-long-name.c
new file mode 100644
index 0000000..13b9854
--- /dev/null
+++ b/tests/static-lib2-long-name.c
@@ -0,0 +1,8 @@
+#include "static-lib.h"
+#include <stdio.h>
+
+int g() {
+ ++p;
+ printf("call %d\n", p);
+ return p;
+}
diff --git a/tests/tests.rs b/tests/tests.rs
index 14f90bc..24e1881 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -92,6 +92,53 @@ mod tests {
}
#[test]
+ fn static_lib_c() {
+ // compile .o files
+ let status = Command::new("gcc")
+ .args(&[
+ "-m32",
+ "-fno-pic",
+ "-c",
+ &file("static-lib1.c"),
+ "-o",
+ &file("static-lib1.o"),
+ ])
+ .status()
+ .unwrap();
+ assert!(status.success());
+ let status = Command::new("gcc")
+ .args(&[
+ "-m32",
+ "-fno-pic",
+ "-c",
+ &file("static-lib2-long-name.c"),
+ "-o",
+ &file("static-lib2-long-name.o"),
+ ])
+ .status()
+ .unwrap();
+ assert!(status.success());
+ // make .a file
+ let status = Command::new("ar")
+ .args(&[
+ "rc",
+ &file("static-lib.a"),
+ &file("static-lib1.o"),
+ &file("static-lib2-long-name.o"),
+ ])
+ .status()
+ .unwrap();
+ assert!(status.success());
+ let mut linker = test_linker();
+ add(&mut linker, "static-lib.a", true);
+ add(&mut linker, "static-lib-test.c", true);
+ add(&mut linker, "libc.so.6", false);
+ link(&linker, "static-lib-test.out", "entry");
+ let output = run("static-lib-test.out");
+ assert_eq!(output.stdout, b"call 1\n18\ncall 2\n19\n");
+ }
+
+ #[test]
fn cpp() {
let mut linker = test_linker();
add(&mut linker, "cpp.cpp", true);