binfarce-0.2.1/.cargo_vcs_info.json0000644000000001120000000000000125760ustar { "git": { "sha1": "214df14772b3f98f12f5c6848d3a8d1a94bc29f3" } } binfarce-0.2.1/.github/workflows/main.yml000064400000000000000000000003360000000000000164200ustar 00000000000000name: binfarce on: [push, pull_request] env: CARGO_TERM_COLOR: always jobs: build: runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v2 - name: Build run: cargo test binfarce-0.2.1/.gitignore000064400000000000000000000000530000000000000133400ustar 00000000000000/target/ **/*.rs.bk .idea *.iml Cargo.lock binfarce-0.2.1/CHANGELOG.md000064400000000000000000000012160000000000000131630ustar 00000000000000# Change Log All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] ## [0.2.1] - 2021-07-03 ### Fixed - Make `UnexpectedEof` type public. ## [0.2.0] - 2021-07-03 ### Added - Allow selecting a custom symbols section for ELF. ### Changed - Improve error processing. [Unreleased]: https://github.com/Shnatsel/binfarce/compare/v0.2.1...HEAD [0.2.1]: https://github.com/Shnatsel/binfarce/compare/v0.2.0...v0.2.1 [0.2.0]: https://github.com/Shnatsel/binfarce/compare/v0.1.0...v0.2.0 binfarce-0.2.1/Cargo.toml0000644000000015740000000000000106110ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "binfarce" version = "0.2.1" authors = ["Evgeniy Reizner ", "Sergey \"Shnatsel\" Davidoff "] description = "Extremely minimal parser for ELF, Mach-O, PE and ar." readme = "README.md" categories = ["parser-implementations"] license = "MIT" repository = "https://github.com/Shnatsel/binfarce" binfarce-0.2.1/Cargo.toml.orig000064400000000000000000000005570000000000000142500ustar 00000000000000[package] name = "binfarce" version = "0.2.1" authors = ["Evgeniy Reizner ", "Sergey \"Shnatsel\" Davidoff "] description = "Extremely minimal parser for ELF, Mach-O, PE and ar." repository = "https://github.com/Shnatsel/binfarce" categories = ["parser-implementations"] readme = "README.md" license = "MIT" edition = "2018" binfarce-0.2.1/LICENSE000064400000000000000000000020720000000000000123600ustar 00000000000000The MIT License (MIT) Copyright (c) 2018 Reizner Evgeniy Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. binfarce-0.2.1/README.md000064400000000000000000000026150000000000000126350ustar 00000000000000## binfarce Extremely minimal parser for ELF/PE/Mach-o/ar. This crate is used mostly for sharing code between `cargo-bloat` and `auditable-extract` crates. It implements just enough features for those tools to work. If you're looking for a fully-featured parser, see [`goblin`](https://crates.io/crates/goblin). **Goals:** - 100% safe code all the way down. This includes all dependencies. - Simple code that's easy to audit. No fancy tricks such as proc macros. **Non-goals:** - Highest possible performance. Parsing these things is stupidly cheap as it is. - Full format support. ### Feature status **Header parsing** and **section extraction** are used by both tools. They are zero-allocation and hardened against untrusted inputs. `#[forbid(unsafe_code)]` ensures absence of code execution vulnerabilities. Absence of heap allocations ensures that it can't exhaust RAM. Static analysis via opt-in Clippy lints and fuzzing are used to ensure absence of panics. **Symbol extraction** is used by `cargo-bloat` only. It allocates unbounded amounts of memory on the heap and may panic given an untrusted input. PRs with functionality required for your own tool are welcome as long as they adhere to the above goals and keep existing tools working. However, you should consider using the full-featured [`goblin`](https://crates.io/crates/goblin) crate instead. _This project was briefly known as "kuduk"._ binfarce-0.2.1/src/ar.rs000064400000000000000000000056250000000000000131210ustar 00000000000000use std::str; use crate::ByteOrder; use crate::parser::*; use crate::demangle::SymbolName; pub fn parse(data: &[u8]) -> Result, UnexpectedEof> { const MAGIC: &[u8] = b"!\x0A"; if data.get(0..8) != Some(MAGIC) { return Ok(Vec::new()); } let mut s = Stream::new(&data[8..], ByteOrder::BigEndian); while !s.at_end() { // Align the offset. if s.offset() & 1 == 1 { s.skip_len(1)?; } let identifier = str::from_utf8(s.read_bytes(16)?).unwrap(); s.skip_len(12)?; // timestamp s.skip_len(6)?; // owner_id s.skip_len(6)?; // group_id s.skip_len(8)?; // mode let file_size = str::from_utf8(s.read_bytes(10)?).unwrap(); let terminator = s.read_bytes(2)?; assert_eq!(terminator, &[0x60, 0x0A]); // Check for BSD name. let mut name = ""; let mut raw_name_len: usize = 0; if identifier.starts_with("#1/") { raw_name_len = identifier[3..].trim().parse().unwrap(); let raw_name = s.read_bytes(raw_name_len)?; name = str::from_utf8(raw_name).unwrap(); name = name.trim_end_matches('\0'); } let mut file_size: usize = file_size.trim().parse() .expect("invalid file size in member header"); file_size -= raw_name_len; if name.is_empty() && identifier == "/ " { let index_data = s.read_bytes(file_size)?; return parse_sysv(index_data); } else if name == "__.SYMDEF" { let index_data = s.read_bytes(file_size)?; return parse_bsd(index_data); } else { s.skip_len(file_size)?; } } Ok(Vec::new()) } fn parse_sysv(data: &[u8]) -> Result, UnexpectedEof> { let mut symbols = Vec::new(); let count = { let mut s = Stream::new(data, ByteOrder::BigEndian); s.read::()? as usize }; // Skip offsets. let mut i = 4 + 4 * count; // u32 + u32 * size for _ in 0..count { if let Some(s) = parse_null_string(data, i) { symbols.push(SymbolName::demangle(s).complete); i += s.len() + 1; } else { i += 1; } if i >= data.len() { break; } } Ok(symbols) } fn parse_bsd(data: &[u8]) -> Result, UnexpectedEof> { let mut symbols = Vec::new(); let mut s = Stream::new(data, ByteOrder::LittleEndian); let entries_len = s.read::()? as usize; s.skip_len(entries_len)?; let strings_len = s.read::()? as usize; let strings = s.read_bytes(strings_len)?; let mut i = 0; while i < strings.len() { if let Some(s) = parse_null_string(strings, i) { symbols.push(SymbolName::demangle(s).complete); i += s.len() + 1; } else { i += 1; } } Ok(symbols) } binfarce-0.2.1/src/demangle/legacy.rs000064400000000000000000000157160000000000000155410ustar 00000000000000use std::char; use std::fmt; /// Representation of a demangled symbol name. pub struct Demangle<'a> { inner: &'a str, /// The number of ::-separated elements in the original name. elements: usize, } /// De-mangles a Rust symbol into a more readable version /// /// All Rust symbols by default are mangled as they contain characters that /// cannot be represented in all object files. The mangling mechanism is similar /// to C++'s, but Rust has a few specifics to handle items like lifetimes in /// symbols. /// /// This function will take a **mangled** symbol and return a value. When printed, /// the de-mangled version will be written. If the symbol does not look like /// a mangled symbol, the original value will be written instead. /// /// # Examples /// /// ```ignore /// use rustc_demangle::demangle; /// /// assert_eq!(demangle("_ZN4testE").to_string(), "test"); /// assert_eq!(demangle("_ZN3foo3barE").to_string(), "foo::bar"); /// assert_eq!(demangle("foo").to_string(), "foo"); /// ``` // All Rust symbols are in theory lists of "::"-separated identifiers. Some // assemblers, however, can't handle these characters in symbol names. To get // around this, we use C++-style mangling. The mangling method is: // // 1. Prefix the symbol with "_ZN" // 2. For each element of the path, emit the length plus the element // 3. End the path with "E" // // For example, "_ZN4testE" => "test" and "_ZN3foo3barE" => "foo::bar". // // We're the ones printing our backtraces, so we can't rely on anything else to // demangle our symbols. It's *much* nicer to look at demangled symbols, so // this function is implemented to give us nice pretty output. // // Note that this demangler isn't quite as fancy as it could be. We have lots // of other information in our symbols like hashes, version, type information, // etc. Additionally, this doesn't handle glue symbols at all. pub fn demangle(s: &str) -> Result<(Demangle, &str), ()> { // First validate the symbol. If it doesn't look like anything we're // expecting, we just print it literally. Note that we must handle non-Rust // symbols because we could have any function in the backtrace. let inner = if s.starts_with("_ZN") { &s[3..] } else if s.starts_with("ZN") { // On Windows, dbghelp strips leading underscores, so we accept "ZN...E" // form too. &s[2..] } else if s.starts_with("__ZN") { // On OSX, symbols are prefixed with an extra _ &s[4..] } else { return Err(()); }; // only work with ascii text if inner.bytes().any(|c| c & 0x80 != 0) { return Err(()); } let mut elements = 0; let mut chars = inner.chars(); let mut c = chars.next().ok_or(())?; while c != 'E' { // Decode an identifier element's length. if !c.is_digit(10) { return Err(()); } let mut len = 0usize; while let Some(d) = c.to_digit(10) { len = len.checked_mul(10) .and_then(|len| len.checked_add(d as usize)) .ok_or(())?; c = chars.next().ok_or(())?; } // `c` already contains the first character of this identifier, skip it and // all the other characters of this identifier, to reach the next element. for _ in 0..len { c = chars.next().ok_or(())?; } elements += 1; } Ok((Demangle { inner, elements, }, chars.as_str())) } // Rust hashes are hex digits with an `h` prepended. fn is_rust_hash(s: &str) -> bool { s.starts_with('h') && s[1..].chars().all(|c| c.is_digit(16)) } impl<'a> fmt::Display for Demangle<'a> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // Alright, let's do this. let mut inner = self.inner; for element in 0..self.elements { let mut rest = inner; while rest.chars().next().unwrap().is_digit(10) { rest = &rest[1..]; } let i: usize = inner[..(inner.len() - rest.len())].parse().unwrap(); inner = &rest[i..]; rest = &rest[..i]; // Skip printing the hash if alternate formatting // was requested. if f.alternate() && element+1 == self.elements && is_rust_hash(&rest) { break; } if element != 0 { f.write_str("::")?; } if rest.starts_with("_$") { rest = &rest[1..]; } loop { if rest.starts_with('.') { if let Some('.') = rest[1..].chars().next() { f.write_str("::")?; rest = &rest[2..]; } else { f.write_str(".")?; rest = &rest[1..]; } } else if rest.starts_with('$') { let (escape, after_escape) = if let Some(end) = rest[1..].find('$') { (&rest[1..end + 1], &rest[end + 2..]) } else { break; }; // see src/librustc_codegen_utils/symbol_names/legacy.rs for these mappings let unescaped = match escape { "SP" => "@", "BP" => "*", "RF" => "&", "LT" => "<", "GT" => ">", "LP" => "(", "RP" => ")", "C" => ",", _ => { if escape.starts_with('u') { let digits = &escape[1..]; let all_lower_hex = digits.chars().all(|c| match c { '0'..='9' | 'a'..='f' => true, _ => false, }); let c = u32::from_str_radix(digits, 16).ok() .and_then(char::from_u32); if let (true, Some(c)) = (all_lower_hex, c) { // FIXME(eddyb) do we need to filter out control codepoints? if !c.is_control() { c.fmt(f)?; rest = after_escape; continue; } } } break; } }; f.write_str(unescaped)?; rest = after_escape; } else if let Some(i) = rest.find(|c| c == '$' || c == '.') { f.write_str(&rest[..i])?; rest = &rest[i..]; } else { break; } } f.write_str(rest)?; } Ok(()) } } binfarce-0.2.1/src/demangle/mod.rs000064400000000000000000000112200000000000000150360ustar 00000000000000/// This module is a fork of https://github.com/alexcrichton/rustc-demangle /// /// Changes: /// /// - Added `SymbolName` struct. /// - `v0` demangler prints into a String and not into fmt::Formatter. /// - `v0` demangler can omit disambiguator hashes. /// - `v0` demangler stores a crate name. /// - Updated to Rust 2018. /// - Ignore LLVM suffixes. mod legacy; mod v0; #[derive(Clone, Copy, PartialEq)] pub enum Kind { Unknown, Legacy, V0, } pub struct SymbolData { pub name: SymbolName, pub address: u64, pub size: u64, } pub struct SymbolName { pub complete: String, pub trimmed: String, pub crate_name: Option, pub kind: Kind, } impl SymbolName { pub fn demangle(name: &str) -> Self { let d = demangle(name); match d.style { Some(DemangleStyle::Legacy(ref d)) => { let complete = d.to_string(); let mut trimmed = complete.clone(); // crate::mod::fn::h5fbe0f2f0b5c7342 -> crate::mod::fn if let Some(pos) = trimmed.bytes().rposition(|b| b == b':') { trimmed.drain((pos - 1)..); } SymbolName { complete, trimmed, crate_name: None, // We will parse a crate name later. kind: Kind::Legacy, } } Some(DemangleStyle::V0(ref d)) => { d.demangle() } None => { SymbolName { complete: name.to_string(), trimmed: name.to_string(), crate_name: None, // Unknown. kind: Kind::Unknown, } } } } } /// Representation of a demangled symbol name. pub struct Demangle<'a> { style: Option>, } enum DemangleStyle<'a> { Legacy(legacy::Demangle<'a>), V0(v0::Demangle<'a>), } /// De-mangles a Rust symbol into a more readable version /// /// This function will take a **mangled** symbol and return a value. When printed, /// the de-mangled version will be written. If the symbol does not look like /// a mangled symbol, the original value will be written instead. /// /// # Examples /// /// ```ignore /// use rustc_demangle::demangle; /// /// assert_eq!(demangle("_ZN4testE").to_string(), "test"); /// assert_eq!(demangle("_ZN3foo3barE").to_string(), "foo::bar"); /// assert_eq!(demangle("foo").to_string(), "foo"); /// ``` pub fn demangle(mut s: &str) -> Demangle { // During ThinLTO LLVM may import and rename internal symbols, so strip out // those endings first as they're one of the last manglings applied to symbol // names. let llvm = ".llvm."; if let Some(i) = s.find(llvm) { let candidate = &s[i + llvm.len()..]; let all_hex = candidate.chars().all(|c| { match c { 'A' ..= 'F' | '0' ..= '9' | '@' => true, _ => false, } }); if all_hex { s = &s[..i]; } } let mut suffix = ""; let mut style = match legacy::demangle(s) { Ok((d, s)) => { suffix = s; Some(DemangleStyle::Legacy(d)) } Err(()) => match v0::demangle(s) { Ok((d, s)) => { suffix = s; Some(DemangleStyle::V0(d)) } Err(v0::Invalid) => None, }, }; // Output like LLVM IR adds extra period-delimited words. See if // we are in that case and save the trailing words if so. if !suffix.is_empty() { if suffix.starts_with(".") && is_symbol_like(suffix) { // Keep the suffix. } else { // Reset the suffix and invalidate the demangling. style = None; } } Demangle { style } } fn is_symbol_like(s: &str) -> bool { s.chars().all(|c| { // Once `char::is_ascii_punctuation` and `char::is_ascii_alphanumeric` // have been stable for long enough, use those instead for clarity is_ascii_alphanumeric(c) || is_ascii_punctuation(c) }) } // Copied from the documentation of `char::is_ascii_alphanumeric` fn is_ascii_alphanumeric(c: char) -> bool { match c { '\u{0041}' ..= '\u{005A}' | '\u{0061}' ..= '\u{007A}' | '\u{0030}' ..= '\u{0039}' => true, _ => false, } } // Copied from the documentation of `char::is_ascii_punctuation` fn is_ascii_punctuation(c: char) -> bool { match c { '\u{0021}' ..= '\u{002F}' | '\u{003A}' ..= '\u{0040}' | '\u{005B}' ..= '\u{0060}' | '\u{007B}' ..= '\u{007E}' => true, _ => false, } } binfarce-0.2.1/src/demangle/v0.rs000064400000000000000000001031140000000000000146100ustar 00000000000000use std::char; use std::fmt::{self, Display, Write}; /// Representation of a demangled symbol name. pub struct Demangle<'a> { inner: &'a str, } /// De-mangles a Rust symbol into a more readable version /// /// This function will take a **mangled** symbol and return a value. When printed, /// the de-mangled version will be written. If the symbol does not look like /// a mangled symbol, the original value will be written instead. pub fn demangle(s: &str) -> Result<(Demangle, &str), Invalid> { // First validate the symbol. If it doesn't look like anything we're // expecting, we just print it literally. Note that we must handle non-Rust // symbols because we could have any function in the backtrace. let inner; if s.len() > 2 && s.starts_with("_R") { inner = &s[2..]; } else if s.len() > 1 && s.starts_with("R") { // On Windows, dbghelp strips leading underscores, so we accept "R..." // form too. inner = &s[1..]; } else if s.len() > 3 && s.starts_with("__R") { // On OSX, symbols are prefixed with an extra _ inner = &s[3..]; } else { return Err(Invalid); } // Paths always start with uppercase characters. match inner.as_bytes()[0] { b'A'..=b'Z' => {} _ => return Err(Invalid), } // only work with ascii text if inner.bytes().any(|c| c & 0x80 != 0) { return Err(Invalid); } // Verify that the symbol is indeed a valid path. let mut parser = Parser { sym: inner, next: 0, }; parser.skip_path()?; // Instantiating crate (paths always start with uppercase characters). match parser.sym.as_bytes().get(parser.next) { Some(&(b'A'..=b'Z')) => { parser.skip_path()?; } _ => {} } Ok((Demangle { inner, }, &parser.sym[parser.next..])) } impl Demangle<'_> { pub fn demangle(&self) -> super::SymbolName { let mut complete = String::new(); let crate_name = { let mut printer = Printer { parser: Ok(Parser { sym: self.inner, next: 0, }), out: &mut complete, bound_lifetime_depth: 0, print_disambiguator: true, crate_name: None, }; printer.print_path(true).unwrap(); printer.crate_name }; // Parse again, but without printing disambiguator. let mut trimmed = String::new(); { let mut printer = Printer { parser: Ok(Parser { sym: self.inner, next: 0, }), out: &mut trimmed, bound_lifetime_depth: 0, print_disambiguator: false, crate_name: None, }; printer.print_path(true).unwrap(); } super::SymbolName { complete, trimmed, crate_name, kind: super::Kind::V0, } } } #[derive(PartialEq, Eq)] pub struct Invalid; struct Ident<'s> { /// ASCII part of the identifier. ascii: &'s str, /// Punycode insertion codes for Unicode codepoints, if any. punycode: &'s str, } const SMALL_PUNYCODE_LEN: usize = 128; impl<'s> Ident<'s> { /// Attempt to decode punycode on the stack (allocation-free), /// and pass the char slice to the closure, if successful. /// This supports up to `SMALL_PUNYCODE_LEN` characters. fn try_small_punycode_decode R, R>( &self, f: F, ) -> Option { let mut out = ['\0'; SMALL_PUNYCODE_LEN]; let mut out_len = 0; let r = self.punycode_decode(|i, c| { // Check there's space left for another character. out.get(out_len).ok_or(())?; // Move the characters after the insert position. let mut j = out_len; out_len += 1; while j > i { out[j] = out[j - 1]; j -= 1; } // Insert the new character. out[i] = c; Ok(()) }); if r.is_ok() { Some(f(&out[..out_len])) } else { None } } /// Decode punycode as insertion positions and characters /// and pass them to the closure, which can return `Err(())` /// to stop the decoding process. fn punycode_decode Result<(), ()>>( &self, mut insert: F, ) -> Result<(), ()> { let mut punycode_bytes = self.punycode.bytes().peekable(); if punycode_bytes.peek().is_none() { return Err(()); } let mut len = 0; // Populate initial output from ASCII fragment. for c in self.ascii.chars() { insert(len, c)?; len += 1; } // Punycode parameters and initial state. let base = 36; let t_min = 1; let t_max = 26; let skew = 38; let mut damp = 700; let mut bias = 72; let mut i: usize = 0; let mut n: usize = 0x80; loop { // Read one delta value. let mut delta: usize = 0; let mut w = 1; let mut k: usize = 0; loop { use core::cmp::{min, max}; k += base; let t = min(max(k.saturating_sub(bias), t_min), t_max); let d = match punycode_bytes.next() { Some(d @ b'a'..=b'z') => d - b'a', Some(d @ b'0'..=b'9') => 26 + (d - b'0'), _ => return Err(()), }; let d = d as usize; delta = delta.checked_add( d.checked_mul(w).ok_or(())? ).ok_or(())?; if d < t { break; } w = w.checked_mul(base - t).ok_or(())?; } // Compute the new insert position and character. len += 1; i = i.checked_add(delta).ok_or(())?; n = n.checked_add(i / len).ok_or(())?; i %= len; let n_u32 = n as u32; let c = if n_u32 as usize == n { char::from_u32(n_u32).ok_or(())? } else { return Err(()); }; // Insert the new character and increment the insert position. insert(i, c)?; i += 1; // If there are no more deltas, decoding is complete. if punycode_bytes.peek().is_none() { return Ok(()); } // Perform bias adaptation. delta /= damp; damp = 2; delta += delta / len; let mut k = 0; while delta > ((base - t_min) * t_max) / 2 { delta /= base - t_min; k += base; } bias = k + ((base - t_min + 1) * delta) / (delta + skew); } } } impl<'s> Display for Ident<'s> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.try_small_punycode_decode(|chars| { for &c in chars { c.fmt(f)?; } Ok(()) }).unwrap_or_else(|| { if !self.punycode.is_empty() { f.write_str("punycode{")?; // Reconstruct a standard Punycode encoding, // by using `-` as the separator. if !self.ascii.is_empty() { f.write_str(self.ascii)?; f.write_str("-")?; } f.write_str(self.punycode)?; f.write_str("}") } else { f.write_str(self.ascii) } }) } } fn basic_type(tag: u8) -> Option<&'static str> { Some(match tag { b'b' => "bool", b'c' => "char", b'e' => "str", b'u' => "()", b'a' => "i8", b's' => "i16", b'l' => "i32", b'x' => "i64", b'n' => "i128", b'i' => "isize", b'h' => "u8", b't' => "u16", b'm' => "u32", b'y' => "u64", b'o' => "u128", b'j' => "usize", b'f' => "f32", b'd' => "f64", b'z' => "!", b'p' => "_", b'v' => "...", _ => return None, }) } struct Parser<'s> { sym: &'s str, next: usize, } impl<'s> Parser<'s> { fn peek(&self) -> Option { self.sym.as_bytes().get(self.next).cloned() } fn eat(&mut self, b: u8) -> bool { if self.peek() == Some(b) { self.next += 1; true } else { false } } fn next(&mut self) -> Result { let b = self.peek().ok_or(Invalid)?; self.next += 1; Ok(b) } fn hex_nibbles(&mut self) -> Result<&'s str, Invalid> { let start = self.next; loop { match self.next()? { b'0'..=b'9' | b'a'..=b'f' => {} b'_' => break, _ => return Err(Invalid), } } Ok(&self.sym[start..self.next - 1]) } fn digit_10(&mut self) -> Result { let d = match self.peek() { Some(d @ b'0'..=b'9') => d - b'0', _ => return Err(Invalid), }; self.next += 1; Ok(d) } fn digit_62(&mut self) -> Result { let d = match self.peek() { Some(d @ b'0'..=b'9') => d - b'0', Some(d @ b'a'..=b'z') => 10 + (d - b'a'), Some(d @ b'A'..=b'Z') => 10 + 26 + (d - b'A'), _ => return Err(Invalid), }; self.next += 1; Ok(d) } fn integer_62(&mut self) -> Result { if self.eat(b'_') { return Ok(0); } let mut x: u64 = 0; while !self.eat(b'_') { let d = self.digit_62()? as u64; x = x.checked_mul(62).ok_or(Invalid)?; x = x.checked_add(d).ok_or(Invalid)?; } x.checked_add(1).ok_or(Invalid) } fn opt_integer_62(&mut self, tag: u8) -> Result { if !self.eat(tag) { return Ok(0); } self.integer_62()?.checked_add(1).ok_or(Invalid) } fn disambiguator(&mut self) -> Result { self.opt_integer_62(b's') } fn namespace(&mut self) -> Result, Invalid> { match self.next()? { // Special namespaces, like closures and shims. ns @ b'A'..=b'Z' => Ok(Some(ns as char)), // Implementation-specific/unspecified namespaces. b'a'..=b'z' => Ok(None), _ => Err(Invalid), } } fn backref(&mut self) -> Result, Invalid> { let s_start = self.next - 1; let i = self.integer_62()?; if i >= s_start as u64 { return Err(Invalid); } Ok(Parser { sym: self.sym, next: i as usize, }) } fn ident(&mut self) -> Result, Invalid> { let is_punycode = self.eat(b'u'); let mut len = self.digit_10()? as usize; if len != 0 { loop { match self.digit_10() { Ok(d) => { len = len.checked_mul(10).ok_or(Invalid)?; len = len.checked_add(d as usize).ok_or(Invalid)?; } Err(Invalid) => break, } } } // Skip past the optional `_` separator. self.eat(b'_'); let start = self.next; self.next = self.next.checked_add(len).ok_or(Invalid)?; if self.next > self.sym.len() { return Err(Invalid); } let ident = &self.sym[start..self.next]; if is_punycode { let ident = match ident.bytes().rposition(|b| b == b'_') { Some(i) => Ident { ascii: &ident[..i], punycode: &ident[i + 1..], }, None => Ident { ascii: "", punycode: ident, }, }; if ident.punycode.is_empty() { return Err(Invalid); } Ok(ident) } else { Ok(Ident { ascii: ident, punycode: "", }) } } fn skip_path(&mut self) -> Result<(), Invalid> { match self.next()? { b'C' => { self.disambiguator()?; self.ident()?; } b'N' => { self.namespace()?; self.skip_path()?; self.disambiguator()?; self.ident()?; } b'M' => { self.disambiguator()?; self.skip_path()?; self.skip_type()?; } b'X' => { self.disambiguator()?; self.skip_path()?; self.skip_type()?; self.skip_path()?; } b'Y' => { self.skip_type()?; self.skip_path()?; } b'I' => { self.skip_path()?; while !self.eat(b'E') { self.skip_generic_arg()?; } } b'B' => { self.backref()?; } _ => return Err(Invalid), } Ok(()) } fn skip_generic_arg(&mut self) -> Result<(), Invalid> { if self.eat(b'L') { self.integer_62()?; Ok(()) } else if self.eat(b'K') { self.skip_const() } else { self.skip_type() } } fn skip_type(&mut self) -> Result<(), Invalid> { match self.next()? { tag if basic_type(tag).is_some() => {} b'R' | b'Q' => { if self.eat(b'L') { self.integer_62()?; } self.skip_type()?; } b'P' | b'O' | b'S' => self.skip_type()?, b'A' => { self.skip_type()?; self.skip_const()?; } b'T' => while !self.eat(b'E') { self.skip_type()?; }, b'F' => { let _binder = self.opt_integer_62(b'G')?; let _is_unsafe = self.eat(b'U'); if self.eat(b'K') { let c_abi = self.eat(b'C'); if !c_abi { let abi = self.ident()?; if abi.ascii.is_empty() || !abi.punycode.is_empty() { return Err(Invalid); } } } while !self.eat(b'E') { self.skip_type()?; } self.skip_type()?; } b'D' => { let _binder = self.opt_integer_62(b'G')?; while !self.eat(b'E') { self.skip_path()?; while self.eat(b'p') { self.ident()?; self.skip_type()?; } } if !self.eat(b'L') { return Err(Invalid); } self.integer_62()?; } b'B' => { self.backref()?; } _ => { // Go back to the tag, so `skip_path` also sees it. self.next -= 1; self.skip_path()?; } } Ok(()) } fn skip_const(&mut self) -> Result<(), Invalid> { if self.eat(b'B') { self.backref()?; return Ok(()); } match self.next()? { // Unsigned integer types. b'h' | b't' | b'm' | b'y' | b'o' | b'j' => {} _ => return Err(Invalid), } if self.eat(b'p') { return Ok(()); } self.hex_nibbles()?; Ok(()) } } struct Printer<'a, 's> { parser: Result, Invalid>, out: &'a mut String, bound_lifetime_depth: u32, print_disambiguator: bool, crate_name: Option, } /// Mark the parser as errored, print `?` and return early. /// This allows callers to keep printing the approximate /// syntax of the path/type/const, despite having errors. /// E.g. `Vec<[(A, ?); ?]>` instead of `Vec<[(A, ?`. macro_rules! invalid { ($printer:ident) => {{ $printer.parser = Err(Invalid); $printer.out.push('?'); return Ok(()); }} } /// Call a parser method (if the parser hasn't errored yet), /// and mark the parser as errored if it returns `Err(Invalid)`. /// /// If the parser errored, before or now, prints `?`, and /// returns early the current function (see `invalid!` above). macro_rules! parse { ($printer:ident, $method:ident $(($($arg:expr),*))*) => { match $printer.parser_mut().and_then(|p| p.$method($($($arg),*)*)) { Ok(x) => x, Err(Invalid) => invalid!($printer), } }; } impl<'a, 's> Printer<'a, 's> { fn parser_mut<'c>(&'c mut self) -> Result<&'c mut Parser<'s>, Invalid> { self.parser.as_mut().map_err(|_| Invalid) } /// Eat the given character from the parser, /// returning `false` if the parser errored. fn eat(&mut self, b: u8) -> bool { self.parser_mut().map(|p| p.eat(b)) == Ok(true) } /// Return a nested parser for a backref. fn backref_printer<'c>(&'c mut self) -> Printer<'c, 's> { Printer { parser: self.parser_mut().and_then(|p| p.backref()), out: self.out, bound_lifetime_depth: self.bound_lifetime_depth, print_disambiguator: self.print_disambiguator, crate_name: self.crate_name.clone(), } } /// Print the lifetime according to the previously decoded index. /// An index of `0` always refers to `'_`, but starting with `1`, /// indices refer to late-bound lifetimes introduced by a binder. fn print_lifetime_from_index(&mut self, lt: u64) -> fmt::Result { self.out.write_str("'")?; if lt == 0 { return self.out.write_str("_"); } match (self.bound_lifetime_depth as u64).checked_sub(lt) { Some(depth) => { // Try to print lifetimes alphabetically first. if depth < 26 { let c = (b'a' + depth as u8) as char; self.out.write_fmt(format_args!("{}", c)) } else { // Use `'_123` after running out of letters. self.out.write_str("_")?; self.out.write_fmt(format_args!("{}", depth)) } } None => invalid!(self), } } /// Optionally enter a binder ('G') for late-bound lifetimes, /// printing e.g. `for<'a, 'b> ` before calling the closure, /// and make those lifetimes visible to it (via depth level). fn in_binder(&mut self, f: F) -> fmt::Result where F: FnOnce(&mut Self) -> fmt::Result, { let bound_lifetimes = parse!(self, opt_integer_62(b'G')); if bound_lifetimes > 0 { self.out.push_str("for<"); for i in 0..bound_lifetimes { if i > 0 { self.out.push_str(", "); } self.bound_lifetime_depth += 1; self.print_lifetime_from_index(1)?; } self.out.push_str("> "); } let r = f(self); // Restore `bound_lifetime_depth` to the previous value. self.bound_lifetime_depth -= bound_lifetimes as u32; r } /// Print list elements using the given closure and separator, /// until the end of the list ('E') is found, or the parser errors. /// Returns the number of elements printed. fn print_sep_list(&mut self, f: F, sep: &str) -> Result where F: Fn(&mut Self) -> fmt::Result, { let mut i = 0; while self.parser.is_ok() && !self.eat(b'E') { if i > 0 { self.out.push_str(sep); } f(self)?; i += 1; } Ok(i) } fn print_path(&mut self, in_value: bool) -> fmt::Result { let tag = parse!(self, next); match tag { b'C' => { let dis = parse!(self, disambiguator); let name = parse!(self, ident); if self.crate_name.is_none() { self.crate_name = Some(name.to_string()); } self.out.write_fmt(format_args!("{}", name))?; if self.print_disambiguator { self.out.push_str("["); self.out.write_fmt(format_args!("{:x}", dis))?; self.out.push_str("]"); } } b'N' => { let ns = parse!(self, namespace); self.print_path(in_value)?; let dis = parse!(self, disambiguator); let name = parse!(self, ident); match ns { // Special namespaces, like closures and shims. Some(ns) => { self.out.push_str("::{"); match ns { 'C' => self.out.push_str("closure"), 'S' => self.out.push_str("shim"), _ => self.out.write_fmt(format_args!("{}", ns))?, } if !name.ascii.is_empty() || !name.punycode.is_empty() { self.out.push_str(":"); self.out.write_fmt(format_args!("{}", name))?; } self.out.push_str("#"); self.out.write_fmt(format_args!("{}", dis))?; self.out.push_str("}"); } // Implementation-specific/unspecified namespaces. None => { if !name.ascii.is_empty() || !name.punycode.is_empty() { self.out.push_str("::"); self.out.write_fmt(format_args!("{}", name))?; } } } } b'M' | b'X' | b'Y' => { if tag != b'Y' { // Ignore the `impl`'s own path. parse!(self, disambiguator); // `rustc-demangle` ignores this path since it makes // a symbol name too verbose, but we need it to extract a crate name. // So we will print it as usual and then remove it from an output buffer. let start = self.out.len(); self.print_path(in_value)?; self.out.drain(start..); } self.out.push_str("<"); self.print_type()?; if tag != b'M' { self.out.push_str(" as "); self.print_path(false)?; } self.out.push_str(">"); } b'I' => { self.print_path(in_value)?; if in_value { self.out.push_str("::"); } self.out.push_str("<"); self.print_sep_list(Self::print_generic_arg, ", ")?; self.out.push_str(">"); } b'B' => { self.backref_printer().print_path(in_value)?; } _ => invalid!(self), } Ok(()) } fn print_generic_arg(&mut self) -> fmt::Result { if self.eat(b'L') { let lt = parse!(self, integer_62); self.print_lifetime_from_index(lt) } else if self.eat(b'K') { self.print_const() } else { self.print_type() } } fn print_type(&mut self) -> fmt::Result { let tag = parse!(self, next); match basic_type(tag) { Some(ty) => return self.out.write_str(ty), None => {} } match tag { b'R' | b'Q' => { self.out.write_str("&")?; if self.eat(b'L') { let lt = parse!(self, integer_62); if lt != 0 { self.print_lifetime_from_index(lt)?; self.out.write_str(" ")?; } } if tag != b'R' { self.out.write_str("mut ")?; } self.print_type()?; } b'P' | b'O' => { self.out.write_str("*")?; if tag != b'P' { self.out.write_str("mut ")?; } else { self.out.write_str("const ")?; } self.print_type()?; } b'A' | b'S' => { self.out.write_str("[")?; self.print_type()?; if tag == b'A' { self.out.write_str("; ")?; self.print_const()?; } self.out.write_str("]")?; } b'T' => { self.out.write_str("(")?; let count = self.print_sep_list(Self::print_type, ", ")?; if count == 1 { self.out.write_str(",")?; } self.out.write_str(")")?; } b'F' => self.in_binder(|this| { let is_unsafe = this.eat(b'U'); let abi = if this.eat(b'K') { if this.eat(b'C') { Some("C") } else { let abi = parse!(this, ident); if abi.ascii.is_empty() || !abi.punycode.is_empty() { invalid!(this); } Some(abi.ascii) } } else { None }; if is_unsafe { this.out.write_str("unsafe ")?; } match abi { Some(abi) => { this.out.write_str("extern \"")?; // If the ABI had any `-`, they were replaced with `_`, // so the parts between `_` have to be re-joined with `-`. let mut parts = abi.split('_'); this.out.write_str(parts.next().unwrap())?; for part in parts { this.out.write_str("-")?; this.out.write_str(part)?; } this.out.write_str("\" ")?; } None => {} } this.out.write_str("fn(")?; this.print_sep_list(Self::print_type, ", ")?; this.out.write_str(")")?; if this.eat(b'u') { // Skip printing the return type if it's 'u', i.e. `()`. } else { this.out.write_str(" -> ")?; this.print_type()?; } Ok(()) })?, b'D' => { self.out.write_str("dyn ")?; self.in_binder(|this| { this.print_sep_list(Self::print_dyn_trait, " + ")?; Ok(()) })?; if !self.eat(b'L') { invalid!(self); } let lt = parse!(self, integer_62); if lt != 0 { self.out.write_str(" + ")?; self.print_lifetime_from_index(lt)?; } } b'B' => { self.backref_printer().print_type()?; } _ => { // Go back to the tag, so `print_path` also sees it. let _ = self.parser_mut().map(|p| p.next -= 1); self.print_path(false)?; } } Ok(()) } /// A trait in a trait object may have some "existential projections" /// (i.e. associated type bindings) after it, which should be printed /// in the `<...>` of the trait, e.g. `dyn Trait`. /// To this end, this method will keep the `<...>` of an 'I' path /// open, by omitting the `>`, and return `Ok(true)` in that case. fn print_path_maybe_open_generics(&mut self) -> Result { if self.eat(b'B') { self.backref_printer().print_path_maybe_open_generics() } else if self.eat(b'I') { self.print_path(false)?; self.out.write_str("<")?; self.print_sep_list(Self::print_generic_arg, ", ")?; Ok(true) } else { self.print_path(false)?; Ok(false) } } fn print_dyn_trait(&mut self) -> fmt::Result { let mut open = self.print_path_maybe_open_generics()?; while self.eat(b'p') { if !open { self.out.write_str("<")?; open = true; } else { self.out.write_str(", ")?; } let name = parse!(self, ident); self.out.write_fmt(format_args!("{}", name))?; self.out.write_str(" = ")?; self.print_type()?; } if open { self.out.write_str(">")?; } Ok(()) } fn print_const(&mut self) -> fmt::Result { if self.eat(b'B') { return self.backref_printer().print_const(); } let ty_tag = parse!(self, next); let ty = match ty_tag { // Unsigned integer types. b'h' | b't' | b'm' | b'y' | b'o' | b'j' => { basic_type(ty_tag).unwrap() } _ => invalid!(self), }; if self.eat(b'p') { self.out.write_str("_")?; } else { self.print_const_uint()?; } self.out.write_str(": ")?; self.out.write_str(ty)?; Ok(()) } fn print_const_uint(&mut self) -> fmt::Result { let hex = parse!(self, hex_nibbles); // Print anything that doesn't fit in `u64` verbatim. if hex.len() > 16 { self.out.push_str("0x"); self.out.push_str(hex); return Ok(()); } let mut v = 0; for c in hex.chars() { v = (v << 4) | (c.to_digit(16).unwrap() as u64); } self.out.write_fmt(format_args!("{}", v)) } } #[cfg(test)] mod tests { use crate::demangle::SymbolName; macro_rules! t_nohash { ($a:expr, $b:expr) => ({ assert_eq!(SymbolName::demangle($a).trimmed, $b); }) } macro_rules! t_nohash_type { ($a:expr, $b:expr) => ( t_nohash!(concat!("_RMC0", $a), concat!("<", $b, ">")) ) } #[test] fn demangle_crate_with_leading_digit() { t_nohash!( "_RNvC6_123foo3bar", "123foo::bar" ); } #[test] fn demangle_utf8_idents() { t_nohash!( "_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y", "utf8_idents::საჭმელად_გემრიელი_სადილი" ); } #[test] fn demangle_closure() { t_nohash!( "_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_", "cc::spawn::{closure#0}::{closure#0}" ); t_nohash!( "_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_", " as core::iter::iterator::Iterator>::rposition::::{closure#0}" ); } #[test] fn demangle_dyn_trait() { t_nohash!( "_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std", "alloc::alloc::box_free::>" ); } #[test] fn demangle_const_generics() { // NOTE(eddyb) this was hand-written, before rustc had working // const generics support (but the mangling format did include them). t_nohash_type!( "INtC8arrayvec8ArrayVechKj7b_E", "arrayvec::ArrayVec" ); } #[test] fn demangle_exponential_explosion() { // NOTE(eddyb) because of the prefix added by `t_nohash_type!` is // 3 bytes long, `B2_` refers to the start of the type, not `B_`. // 6 backrefs (`B8_E` through `B3_E`) result in 2^6 = 64 copies of `_`. // Also, because the `p` (`_`) type is after all of the starts of the // backrefs, it can be replaced with any other type, independently. t_nohash_type!( concat!("TTTTTT", "p", "B8_E", "B7_E", "B6_E", "B5_E", "B4_E", "B3_E"), "((((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _)))), \ ((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _))))), \ (((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _)))), \ ((((_, _), (_, _)), ((_, _), (_, _))), (((_, _), (_, _)), ((_, _), (_, _))))))" ); } #[test] fn demangle_thinlto() { t_nohash!("_RC3foo.llvm.9D1C9369", "foo"); t_nohash!("_RC3foo.llvm.9D1C9369@@16", "foo"); t_nohash!("_RNvC9backtrace3foo.llvm.A5310EB9", "backtrace::foo"); } } binfarce-0.2.1/src/elf32.rs000064400000000000000000000176110000000000000134300ustar 00000000000000// Prohibit dangerous things we definitely don't want #![deny(clippy::integer_arithmetic)] #![deny(clippy::cast_possible_truncation)] #![deny(clippy::indexing_slicing)] // Style lints #![warn(clippy::cast_lossless)] use std::{convert::TryInto, ops::Range, mem::size_of}; use crate::ByteOrder; use crate::demangle::SymbolData; use crate::parser::*; use crate::ParseError; mod elf { pub type Address = u32; pub type Offset = u32; pub type Half = u16; pub type Word = u32; } mod section_type { pub const SYMBOL_TABLE: super::elf::Word = 2; pub const STRING_TABLE: super::elf::Word = 3; } const RAW_ELF_HEADER_SIZE: usize = size_of::(); const RAW_SECTION_HEADER_SIZE: usize = size_of::(); #[derive(Debug, Clone, Copy)] pub struct Elf32Header { pub elf_type: elf::Half, pub machine: elf::Half, pub version: elf::Word, pub entry: elf::Address, pub phoff: elf::Offset, pub shoff: elf::Offset, pub flags: elf::Word, pub ehsize: elf::Half, pub phentsize: elf::Half, pub phnum: elf::Half, pub shentsize: elf::Half, pub shnum: elf::Half, pub shstrndx: elf::Half, } fn parse_elf_header(data: &[u8], byte_order: ByteOrder) -> Result { let mut s = Stream::new(&data.get(16..).ok_or(UnexpectedEof{})?, byte_order); if s.remaining() >= RAW_ELF_HEADER_SIZE { Ok(Elf32Header { elf_type: s.read()?, machine: s.read()?, version: s.read()?, entry: s.read()?, phoff: s.read()?, shoff: s.read()?, flags: s.read()?, ehsize: s.read()?, phentsize: s.read()?, phnum: s.read()?, shentsize: s.read()?, shnum: s.read()?, shstrndx: s.read()?, }) } else { Err(UnexpectedEof {}) } } #[derive(Debug, Clone, Copy)] pub struct Section { index: u16, name_offset: u32, kind: u32, link: u32, offset: u32, size: u32, entry_size: u32, } impl Section { pub fn range(&self) -> Result, ParseError> { let start: usize = self.offset.try_into()?; let end: usize = start.checked_add(self.size.try_into()?).ok_or(ParseError::MalformedInput)?; Ok(start..end) } pub fn entries(&self) -> u32 { self.size.checked_div(self.entry_size).unwrap_or(0) } fn from_raw(rs: RawSection, index: u16) -> Section { Section { index, name_offset: rs.name, kind: rs.kind, link: rs.link, offset: rs.offset, size: rs.size, entry_size: rs.entry_size, } } pub fn name<'a>(&self, parent: &Elf32<'a>) -> Option<&'a str> { self.__name(parent.data, parent.header, parent.byte_order).unwrap_or(None) } fn __name<'a>(&self, data: &'a [u8], header: Elf32Header, byte_order: ByteOrder) -> Result, ParseError> { let section_offset: usize = header.shoff.try_into()?; let mut s = Stream::new_at(data, section_offset, byte_order)?; let number_of_section_with_section_names = header.shstrndx; s.skip_len(RAW_SECTION_HEADER_SIZE.checked_mul(number_of_section_with_section_names.into()) .ok_or(ParseError::MalformedInput)?)?; let section_with_section_names = Section::from_raw(read_section(&mut s)?, number_of_section_with_section_names); let section_name_strings = &data.get(section_with_section_names.range()?) .ok_or(UnexpectedEof{})?; Ok(parse_null_string(section_name_strings, self.name_offset as usize)) } } pub struct Elf32<'a> { data: &'a [u8], byte_order: ByteOrder, header: Elf32Header, } #[derive(Debug, Clone, Copy)] struct RawSection { name: elf::Word, kind: elf::Word, flags: elf::Word, addr: elf::Address, offset: elf::Offset, size: elf::Word, link: elf::Word, info: elf::Word, addralign: elf::Word, entry_size: elf::Word, } pub fn parse(data: &[u8], byte_order: ByteOrder) -> Result { let header = parse_elf_header(data, byte_order)?; Ok(Elf32 { data, byte_order, header }) } impl<'a> Elf32<'a> { pub fn header(&self) -> Elf32Header { self.header } pub fn section_with_name(&self, name: &str) -> Result, ParseError> { let callback = |section: Section| { section.name(self) == Some(name) }; self.find_section(callback) } pub fn find_section bool>(&self, callback: F) -> Result, ParseError> { let section_count = self.header.shnum; let section_offset: usize = self.header.shoff.try_into()?; let mut s = Stream::new_at(self.data, section_offset, self.byte_order)?; for i in 0..section_count { let rs = read_section(&mut s)?; let section = Section::from_raw(rs, i); if callback(section) { return Ok(Some(section)); } } Ok(None) } pub fn symbols(&self, section_name: &str) -> Result<(Vec, u64), ParseError> { let text_section = self.section_with_name(section_name)? .ok_or(ParseError::SymbolsSectionIsMissing)?; let symbols_section = self.find_section(|v| v.kind == section_type::SYMBOL_TABLE)? .ok_or(ParseError::SectionIsMissing(".symtab"))?; let linked_section = self.find_section(|v| u32::from(v.index) == symbols_section.link)? .ok_or(ParseError::SectionIsMissing(".strtab"))?; if linked_section.kind != section_type::STRING_TABLE { return Err(ParseError::UnexpectedSectionType { expected: section_type::STRING_TABLE, actual: linked_section.kind, }); } let strings = self.data.get(linked_section.range()?) .ok_or(ParseError::UnexpectedEof)?; let symbols_data_range = &self.data.get(symbols_section.range()?) .ok_or(ParseError::UnexpectedEof)?; let s = Stream::new(symbols_data_range, self.byte_order); let symbols_count: usize = symbols_section.entries().try_into()?; let symbols = parse_symbols(s, symbols_count, strings, text_section)?; Ok((symbols, text_section.size.into())) } } fn read_section(s: &mut Stream) -> Result { Ok(RawSection { name: s.read()?, kind: s.read()?, flags: s.read()?, addr: s.read()?, offset: s.read()?, size: s.read()?, link: s.read()?, info: s.read()?, addralign: s.read()?, entry_size: s.read()?, }) } fn parse_symbols( mut s: Stream, count: usize, strings: &[u8], text_section: Section, ) -> Result, UnexpectedEof> { let mut symbols = Vec::with_capacity(count); while !s.at_end() { // Note: the order of fields in 32 and 64 bit ELF is different. let name_offset = s.read::()? as usize; let value: elf::Address = s.read()?; let size: elf::Word = s.read()?; let info: u8 = s.read()?; s.skip::()?; // other let shndx: elf::Half = s.read()?; if shndx != text_section.index { continue; } // Ignore symbols with zero size. if size == 0 { continue; } // Ignore symbols without a name. if name_offset == 0 { continue; } // Ignore symbols that aren't functions. const STT_FUNC: u8 = 2; let kind = info & 0xf; if kind != STT_FUNC { continue; } if let Some(s) = parse_null_string(strings, name_offset) { symbols.push(SymbolData { name: crate::demangle::SymbolName::demangle(s), address: value.into(), size: size.into(), }); } } Ok(symbols) } binfarce-0.2.1/src/elf64.rs000064400000000000000000000176130000000000000134370ustar 00000000000000// Prohibit dangerous things we definitely don't want #![deny(clippy::integer_arithmetic)] #![deny(clippy::cast_possible_truncation)] #![deny(clippy::indexing_slicing)] // Style lints #![warn(clippy::cast_lossless)] use std::{convert::TryInto, ops::Range, mem::size_of}; use crate::ByteOrder; use crate::demangle::SymbolData; use crate::parser::*; use crate::ParseError; mod elf { pub type Address = u64; pub type Offset = u64; pub type Half = u16; pub type Word = u32; pub type XWord = u64; } mod section_type { pub const SYMBOL_TABLE: super::elf::Word = 2; pub const STRING_TABLE: super::elf::Word = 3; } const RAW_ELF_HEADER_SIZE: usize = size_of::(); const RAW_SECTION_HEADER_SIZE: usize = size_of::(); #[derive(Debug, Clone, Copy)] pub struct Elf64Header { pub elf_type: elf::Half, pub machine: elf::Half, pub version: elf::Word, pub entry: elf::Address, pub phoff: elf::Offset, pub shoff: elf::Offset, pub flags: elf::Word, pub ehsize: elf::Half, pub phentsize: elf::Half, pub phnum: elf::Half, pub shentsize: elf::Half, pub shnum: elf::Half, pub shstrndx: elf::Half, } fn parse_elf_header(data: &[u8], byte_order: ByteOrder) -> Result { let mut s = Stream::new(&data.get(16..).ok_or(UnexpectedEof{})?, byte_order); if s.remaining() >= RAW_ELF_HEADER_SIZE { Ok(Elf64Header { elf_type: s.read()?, machine: s.read()?, version: s.read()?, entry: s.read()?, phoff: s.read()?, shoff: s.read()?, flags: s.read()?, ehsize: s.read()?, phentsize: s.read()?, phnum: s.read()?, shentsize: s.read()?, shnum: s.read()?, shstrndx: s.read()?, }) } else { Err(UnexpectedEof {}) } } #[derive(Debug, Clone, Copy)] pub struct Section { index: u16, name_offset: u32, kind: u32, link: u32, offset: u64, size: u64, entry_size: u64, } impl Section { pub fn range(&self) -> Result, ParseError> { let start: usize = self.offset.try_into()?; let end: usize = start.checked_add(self.size.try_into()?).ok_or(ParseError::MalformedInput)?; Ok(start..end) } pub fn entries(&self) -> u64 { self.size.checked_div(self.entry_size).unwrap_or(0) } fn from_raw(rs: RawSection, index: u16) -> Section { Section { index, name_offset: rs.name, kind: rs.kind, link: rs.link, offset: rs.offset, size: rs.size, entry_size: rs.entry_size, } } pub fn name<'a>(&self, parent: &Elf64<'a>) -> Option<&'a str> { self.__name(parent.data, parent.header, parent.byte_order).unwrap_or(None) } fn __name<'a>(&self, data: &'a [u8], header: Elf64Header, byte_order: ByteOrder) -> Result, ParseError> { let section_offset: usize = header.shoff.try_into()?; let mut s = Stream::new_at(data, section_offset, byte_order)?; let number_of_section_with_section_names = header.shstrndx; s.skip_len(RAW_SECTION_HEADER_SIZE.checked_mul(number_of_section_with_section_names.into()) .ok_or(ParseError::MalformedInput)?)?; let section_with_section_names = Section::from_raw(read_section(&mut s)?, number_of_section_with_section_names); let section_name_strings = &data.get(section_with_section_names.range()?) .ok_or(UnexpectedEof{})?; Ok(parse_null_string(section_name_strings, self.name_offset as usize)) } } pub struct Elf64<'a> { data: &'a [u8], byte_order: ByteOrder, header: Elf64Header, } #[derive(Debug, Clone, Copy)] struct RawSection { name: elf::Word, kind: elf::Word, flags: elf::XWord, addr: elf::Address, offset: elf::Offset, size: elf::XWord, link: elf::Word, info: elf::Word, addralign: elf::XWord, entry_size: elf::XWord, } pub fn parse(data: &[u8], byte_order: ByteOrder) -> Result { let header = parse_elf_header(data, byte_order)?; Ok(Elf64 { data, byte_order, header}) } impl<'a> Elf64<'a> { pub fn header(&self) -> Elf64Header { self.header } pub fn section_with_name(&self, name: &str) -> Result, ParseError> { let callback = |section: Section| { section.name(self) == Some(name) }; self.find_section(callback) } pub fn find_section bool>(&self, callback: F) -> Result, ParseError> { let section_count = self.header.shnum; let section_offset: usize = self.header.shoff.try_into()?; let mut s = Stream::new_at(self.data, section_offset, self.byte_order)?; for i in 0..section_count { let rs = read_section(&mut s)?; let section = Section::from_raw(rs, i); if callback(section) { return Ok(Some(section)); } } Ok(None) } pub fn symbols(&self, section_name: &str) -> Result<(Vec, u64), ParseError> { let text_section = self.section_with_name(section_name)? .ok_or(ParseError::SymbolsSectionIsMissing)?; let symbols_section = self.find_section(|v| v.kind == section_type::SYMBOL_TABLE)? .ok_or(ParseError::SectionIsMissing(".symtab"))?; let linked_section = self.find_section(|v| u32::from(v.index) == symbols_section.link)? .ok_or(ParseError::SectionIsMissing(".strtab"))?; if linked_section.kind != section_type::STRING_TABLE { return Err(ParseError::UnexpectedSectionType { expected: section_type::STRING_TABLE, actual: linked_section.kind, }); } let strings = self.data.get(linked_section.range()?) .ok_or(ParseError::UnexpectedEof)?; let symbols_data_range = &self.data.get(symbols_section.range()?) .ok_or(ParseError::UnexpectedEof)?; let s = Stream::new(symbols_data_range, self.byte_order); let symbols_count: usize = symbols_section.entries().try_into()?; let symbols = parse_symbols(s, symbols_count, strings, text_section)?; Ok((symbols, text_section.size)) } } fn read_section(s: &mut Stream) -> Result { Ok(RawSection { name: s.read()?, kind: s.read()?, flags: s.read()?, addr: s.read()?, offset: s.read()?, size: s.read()?, link: s.read()?, info: s.read()?, addralign: s.read()?, entry_size: s.read()?, }) } fn parse_symbols( mut s: Stream, count: usize, strings: &[u8], text_section: Section, ) -> Result, UnexpectedEof> { let mut symbols = Vec::with_capacity(count); while !s.at_end() { // Note: the order of fields in 32 and 64 bit ELF is different. let name_offset = s.read::()? as usize; let info: u8 = s.read()?; s.skip::()?; // other let shndx: elf::Half = s.read()?; let value: elf::Address = s.read()?; let size: elf::XWord = s.read()?; if shndx != text_section.index { continue; } // Ignore symbols with zero size. if size == 0 { continue; } // Ignore symbols without a name. if name_offset == 0 { continue; } // Ignore symbols that aren't functions. const STT_FUNC: u8 = 2; let kind = info & 0xf; if kind != STT_FUNC { continue; } if let Some(s) = parse_null_string(strings, name_offset) { symbols.push(SymbolData { name: crate::demangle::SymbolName::demangle(s), address: value, size, }); } } Ok(symbols) } binfarce-0.2.1/src/error.rs000064400000000000000000000023260000000000000136430ustar 00000000000000use std::{fmt::{Debug, Display}, error::Error}; #[derive(Debug, Copy, Clone)] pub enum ParseError { SymbolsSectionIsMissing, SectionIsMissing(&'static str), UnexpectedSectionType { expected: u32, actual: u32 }, MalformedInput, UnexpectedEof, } impl Error for ParseError {} impl Display for ParseError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { ParseError::SymbolsSectionIsMissing => write!(f, "symbols section is missing"), ParseError::SectionIsMissing(name) => write!(f, "section {} is missing", name), ParseError::UnexpectedSectionType { expected, actual } => write!(f, "expected section type {} but found {}", expected, actual), ParseError::MalformedInput => write!(f, "malformed input file"), ParseError::UnexpectedEof => write!(f, "unexpected end of file"), } } } impl From for ParseError { fn from(_: std::num::TryFromIntError) -> Self { ParseError::MalformedInput } } impl From for ParseError { fn from(_: crate::parser::UnexpectedEof) -> Self { ParseError::UnexpectedEof } } binfarce-0.2.1/src/lib.rs000064400000000000000000000040130000000000000132530ustar 00000000000000#![forbid(unsafe_code)] //! Extremely minimal parser for ELF/PE/Mach-o/ar. //! //! This crate is used mostly for sharing code between `cargo-bloat` and `auditable-extract` crates. //! It implements just enough features for those tools to work. //! If you're looking for a fully-featured parser, see [`goblin`](https://crates.io/crates/goblin). // Style lints on which pre-existing code disagrees with Clippy #![allow(clippy::single_match)] #![allow(clippy::while_let_loop)] #![allow(clippy::single_char_pattern)] #![allow(clippy::many_single_char_names)] // I find this more readable #![allow(clippy::skip_while_next)] pub mod ar; pub mod demangle; pub mod elf32; pub mod elf64; pub mod macho; pub mod pe; mod parser; mod error; pub use crate::error::ParseError; pub use crate::parser::UnexpectedEof; #[derive(Clone, Copy, PartialEq, Debug)] pub enum ByteOrder { LittleEndian, BigEndian, } pub enum Format { Elf32 {byte_order: ByteOrder}, Elf64 {byte_order: ByteOrder}, Macho, PE, Unknown, } pub fn detect_format(data: &[u8]) -> Format { if data.len() < 8 {return Format::Unknown}; let macho_signatures = [ b"\xCA\xFE\xBA\xBE", // multi-architecture macOS b"\xFE\xED\xFA\xCE", // 32-bit macOS b"\xFE\xED\xFA\xCF", // 64-bit macOS b"\xCE\xFA\xED\xFE", // and now the same in reverse order b"\xCF\xFA\xED\xFE", // because they could ]; if data.starts_with(b"\x7FELF") { let byte_order = match data[5] { 1 => ByteOrder::LittleEndian, 2 => ByteOrder::BigEndian, _ => return Format::Unknown, }; return match data[4] { 1 => Format::Elf32{byte_order}, 2 => Format::Elf64{byte_order}, _ => Format::Unknown, }; } else if data.starts_with(b"MZ") { return Format::PE; } else { for signature in &macho_signatures { if data.starts_with(*signature) { return Format::Macho; } } } Format::Unknown } binfarce-0.2.1/src/macho.rs000064400000000000000000000236530000000000000136070ustar 00000000000000// Prohibit dangerous things we definitely don't want #![deny(clippy::integer_arithmetic)] #![deny(clippy::cast_possible_truncation)] #![deny(clippy::indexing_slicing)] // Style lints #![warn(clippy::cast_lossless)] use crate::ByteOrder; use crate::demangle::SymbolData; use crate::parser::*; use crate::ParseError; use std::ops::Range; use std::convert::TryInto; const LC_SYMTAB: u32 = 0x2; const LC_SEGMENT_64: u32 = 0x19; #[derive(Debug, Clone, Copy)] struct Cmd { kind: u32, offset: usize, } #[derive(Debug, Clone, Copy)] pub struct Section <'a> { segment_name: &'a str, section_name: &'a str, address: u64, offset: u32, size: u64, } impl Section <'_> { pub fn range(&self) -> Result, ParseError> { let start: usize = self.offset.try_into()?; let end: usize = start.checked_add(self.size.try_into()?).ok_or(ParseError::MalformedInput)?; Ok(start..end) } } #[derive(Debug, Clone, Copy)] pub struct MachoHeader { cputype: u32, cpusubtype: u32, /// type of file - exec, dylib, ... filetype: u32, /// number of load commands ncmds: u32, /// size of load command region sizeofcmds: u32, flags: u32, } #[derive(Debug, Clone)] pub struct Macho <'a> { data: &'a [u8], header: MachoHeader, } fn parse_macho_header(s: &mut Stream) -> Result { s.skip::()?; // magic let header = MachoHeader { cputype: s.read()?, cpusubtype: s.read()?, filetype: s.read()?, ncmds: s.read()?, sizeofcmds: s.read()?, flags: s.read()?, }; s.skip::()?; // reserved Ok(header) } struct MachoCommandsIterator<'a> { stream: Stream<'a>, number_of_commands: u32, commands_already_read: u32, result: Result<(), ParseError>, } impl Iterator for MachoCommandsIterator<'_> { type Item = Result; fn next(&mut self) -> Option { if self.commands_already_read < self.number_of_commands && self.result.is_ok() { let s = &mut self.stream; let cmd_kind: u32 = s.read().ok()?; let cmd_size: u32 = s.read().ok()?; let item = Cmd {kind: cmd_kind, offset: s.offset()}; self.commands_already_read = self.commands_already_read.checked_add(1)?; // cmd_size is a size of a whole command data, // so we have to remove the header size first. let to_skip = (cmd_size as usize).checked_sub(8); // Skip the rest of the command to get to the start of the next one. // If we encounter EOF or if the command size makes no sense, // make the iterator return None from now on. self.result = match to_skip { None => Err(ParseError::MalformedInput), Some(len) => s.skip_len(len).map_err(|_| ParseError::UnexpectedEof), }; match self.result { Ok(()) => Some(Ok(item)), Err(err_val) => Some(Err(err_val)), } } else { None } } } pub fn parse(data: &[u8]) -> Result { let mut s = Stream::new(&data, ByteOrder::LittleEndian); let header = parse_macho_header(&mut s)?; Ok(Macho{ data, header, }) } impl <'a> Macho<'a> { pub fn header(&self) -> MachoHeader { self.header } pub fn find_section bool>(&self, callback: F) -> Result, ParseError> { for cmd in self.commands() { let cmd = cmd?; if cmd.kind == LC_SEGMENT_64 { let mut s = Stream::new_at(self.data, cmd.offset, ByteOrder::LittleEndian)?; s.skip_len(16)?; // segname s.skip::()?; // vmaddr s.skip::()?; // vmsize s.skip::()?; // fileoff s.skip::()?; // filesize s.skip::()?; // maxprot s.skip::()?; // initprot let sections_count: u32 = s.read()?; s.skip::()?; // flags for _ in 0..sections_count { let section_name = parse_null_string(s.read_bytes(16)?, 0); let segment_name = parse_null_string(s.read_bytes(16)?, 0); let address: u64 = s.read()?; let size: u64 = s.read()?; let offset: u32 = s.read()?; s.skip::()?; // align s.skip::()?; // reloff s.skip::()?; // nreloc s.skip::()?; // flags s.skip_len(12)?; // padding if let (Some(segment), Some(section)) = (segment_name, section_name) { let section = Section { segment_name: segment, section_name: section, address, offset, size, }; if callback(section) { return Ok(Some(section)); } } } } } Ok(None) } pub fn section_with_name(&self, segment_name: &str, section_name: &str) -> Result, ParseError> { let callback = |section: Section| { section.segment_name == segment_name && section.section_name == section_name }; self.find_section(callback) } fn commands(&self) -> MachoCommandsIterator { let mut s = Stream::new(&self.data, ByteOrder::LittleEndian); let _ = parse_macho_header(&mut s); // skip the header MachoCommandsIterator { stream: s, number_of_commands: self.header.ncmds, commands_already_read: 0, result: Ok(()) } } #[allow(clippy::indexing_slicing)] pub fn symbols(&self) -> Result<(Vec, u64), ParseError> { let text_section = self.section_with_name("__TEXT", "__text")? .ok_or(ParseError::SymbolsSectionIsMissing)?; assert_ne!(text_section.size, 0); if let Some(cmd) = self.commands().find(|v| v.unwrap().kind == LC_SYMTAB) { let mut s = Stream::new(&self.data[cmd.unwrap().offset..], ByteOrder::LittleEndian); let symbols_offset: u32 = s.read()?; let number_of_symbols: u32 = s.read()?; let strings_offset: u32 = s.read()?; let strings_size: u32 = s.read()?; let strings = { let start = strings_offset as usize; let end = start.checked_add(strings_size as usize).ok_or(ParseError::MalformedInput)?; &self.data[start..end] }; let symbols_data = &self.data[symbols_offset as usize..]; return Ok(( parse_symbols(symbols_data, number_of_symbols, strings, text_section)?, text_section.size, )); } Ok((Vec::new(), 0)) } } #[derive(Clone, Copy, Debug)] struct RawSymbol { string_index: u32, kind: u8, section: u8, address: u64, } // only used by cargo-bloat which operates on trusted data, // so it's not hardened against malicious inputs #[allow(clippy::integer_arithmetic)] #[allow(clippy::indexing_slicing)] fn parse_symbols( data: &[u8], count: u32, strings: &[u8], text_section: Section, ) -> Result, UnexpectedEof> { let mut raw_symbols = Vec::with_capacity(count as usize); let mut s = Stream::new(data, ByteOrder::LittleEndian); for _ in 0..count { let string_index: u32 = s.read()?; let kind: u8 = s.read()?; let section: u8 = s.read()?; s.skip::()?; // description let value: u64 = s.read()?; if value == 0 { continue; } raw_symbols.push(RawSymbol { string_index, kind, section, address: value, }); } // To find symbol sizes, we have to sort them by address. raw_symbols.sort_by_key(|v| v.address); // Add the __TEXT section end address, which will be used // to calculate the size of the last symbol. raw_symbols.push(RawSymbol { string_index: 0, kind: 0, section: 0, address: text_section.address + text_section.size, }); let mut symbols = Vec::with_capacity(count as usize); for i in 0..raw_symbols.len() - 1 { let sym = &raw_symbols[i]; if sym.string_index == 0 { continue; } const N_TYPE: u8 = 0x0E; const INDIRECT: u8 = 0xA; const SECTION: u8 = 0xE; let sub_type = sym.kind & N_TYPE; // Ignore indirect symbols. if sub_type & INDIRECT == 0 { continue; } // Ignore symbols without a section. if sub_type & SECTION == 0 { continue; } // Ignore symbols that aren't in the first section. // The first section is usually __TEXT,__text. if sym.section != 1 { continue; } // Mach-O format doesn't store the symbols size, // so we have to calculate it by subtracting an address of the next symbol // from the current. // Next symbol can have the same address as the current one, // so we have to find the one that has a different address. let next_sym = raw_symbols[i..].iter().skip_while(|s| s.address == sym.address).next(); let size = match next_sym { Some(next) => next.address - sym.address, None => continue, }; if let Some(s) = parse_null_string(strings, sym.string_index as usize) { symbols.push(SymbolData { name: crate::demangle::SymbolName::demangle(s), address: sym.address, size, }); } } Ok(symbols) } binfarce-0.2.1/src/parser.rs000064400000000000000000000107030000000000000140040ustar 00000000000000// Prohibit dangerous things we definitely don't want #![deny(clippy::integer_arithmetic)] #![deny(clippy::cast_possible_truncation)] #![deny(clippy::indexing_slicing)] // Style lints #![warn(clippy::cast_lossless)] use std::{str, mem, convert::TryInto}; use crate::ByteOrder; #[derive(Debug, Copy, Clone)] pub struct UnexpectedEof {} impl std::fmt::Display for UnexpectedEof { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "Unexpected end of file") } } impl std::error::Error for UnexpectedEof {} pub trait RawNumber: Sized { fn parse(s: &mut Stream) -> Option; } impl RawNumber for u8 { #[inline] fn parse(s: &mut Stream) -> Option { s.data.get(s.offset).copied() } } impl RawNumber for i8 { #[inline] fn parse(s: &mut Stream) -> Option { s.data.get(s.offset).map(|x| *x as i8) } } impl RawNumber for u16 { #[inline] fn parse(s: &mut Stream) -> Option { let start = s.offset; let end = s.offset.checked_add(mem::size_of::())?; let num = u16::from_ne_bytes(s.data.get(start..end)?.try_into().unwrap()); match s.byte_order { ByteOrder::LittleEndian => Some(num), ByteOrder::BigEndian => Some(num.to_be()), } } } impl RawNumber for i16 { #[inline] fn parse(s: &mut Stream) -> Option { u16::parse(s).map(|x| x as i16) } } impl RawNumber for u32 { #[inline] fn parse(s: &mut Stream) -> Option { let start = s.offset; let end = s.offset.checked_add(mem::size_of::())?; let num = u32::from_ne_bytes(s.data.get(start..end)?.try_into().unwrap()); match s.byte_order { ByteOrder::LittleEndian => Some(num), ByteOrder::BigEndian => Some(num.to_be()), } } } impl RawNumber for u64 { #[inline] fn parse(s: &mut Stream) -> Option { let start = s.offset; let end = s.offset.checked_add(mem::size_of::())?; let num = u64::from_ne_bytes(s.data.get(start..end)?.try_into().unwrap()); match s.byte_order { ByteOrder::LittleEndian => Some(num), ByteOrder::BigEndian => Some(num.to_be()), } } } #[derive(Clone, Copy)] pub struct Stream<'a> { data: &'a [u8], offset: usize, byte_order: ByteOrder, } impl<'a> Stream<'a> { #[inline] pub fn new(data: &'a [u8], byte_order: ByteOrder) -> Self { Stream { data, offset: 0, byte_order, } } #[inline] pub fn new_at(data: &'a [u8], offset: usize, byte_order: ByteOrder) -> Result { if offset < data.len() { Ok(Stream { data, offset, byte_order, }) } else { Err(UnexpectedEof{}) } } #[inline] pub fn at_end(&self) -> bool { self.offset >= self.data.len() } #[inline] pub fn offset(&self) -> usize { self.offset } #[inline] pub fn skip(&mut self) -> Result<(), UnexpectedEof> { self.skip_len(mem::size_of::()) } #[inline] pub fn skip_len(&mut self, len: usize) -> Result<(), UnexpectedEof> { let new_offset = self.offset.checked_add(len); match new_offset { Some(valid_offset) => {self.offset = valid_offset; Ok(())} None => {Err(UnexpectedEof{})} } } #[inline] pub fn read(&mut self) -> Result { let new_offset = self.offset.checked_add(mem::size_of::()).ok_or(UnexpectedEof{})?; let v = T::parse(self).ok_or(UnexpectedEof{})?; self.offset = new_offset; Ok(v) } #[inline] pub fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], UnexpectedEof> { let new_offset = self.offset.checked_add(len) .ok_or(UnexpectedEof{})?; let bytes = &self.data.get(self.offset..new_offset) .ok_or(UnexpectedEof{})?; self.offset = new_offset; Ok(bytes) } #[inline] pub fn remaining(&self) -> usize { self.data.len().saturating_sub(self.offset) } } pub fn parse_null_string(data: &[u8], start: usize) -> Option<&str> { match data.get(start..)?.iter().position(|c| *c == b'\0') { Some(i) if i != 0 => str::from_utf8(data.get(start..start.checked_add(i)?)?).ok(), _ => None, } } binfarce-0.2.1/src/pe.rs000064400000000000000000000171700000000000000131210ustar 00000000000000// See https://github.com/m4b/goblin/blob/master/src/pe/symbol.rs for details. // Prohibit dangerous things we definitely don't want #![deny(clippy::integer_arithmetic)] #![deny(clippy::cast_possible_truncation)] #![deny(clippy::indexing_slicing)] // Style lints #![warn(clippy::cast_lossless)] use crate::ByteOrder; use crate::demangle::SymbolData; use crate::parser::*; use crate::ParseError; use std::ops::Range; use std::convert::TryInto; const PE_POINTER_OFFSET: usize = 0x3c; const COFF_SYMBOL_SIZE: usize = 18; const IMAGE_SYM_CLASS_EXTERNAL: u8 = 2; const IMAGE_SYM_DTYPE_SHIFT: usize = 4; const IMAGE_SYM_DTYPE_FUNCTION: u16 = 2; const SIZEOF_PE_MAGIC: usize = 4; const SIZEOF_COFF_HEADER: usize = 20; #[derive(Debug, Copy, Clone)] pub struct PeHeader { machine: u16, number_of_sections: u16, time_date_stamp: u32, pointer_to_symbol_table: u32, number_of_symbols: u32, size_of_optional_header: u16, characteristics: u16, } #[derive(Debug, Copy, Clone)] pub struct Section<'a> { name: &'a str, virtual_size: u32, size_of_raw_data: u32, pointer_to_raw_data: u32, index: usize } impl Section <'_> { pub fn range(&self) -> Result, ParseError> { let start: usize = self.pointer_to_raw_data.try_into()?; let end: usize = start.checked_add(self.size_of_raw_data.try_into()?).ok_or(ParseError::MalformedInput)?; Ok(start..end) } } #[derive(Debug, Clone)] pub struct Pe<'a> { data: &'a [u8], pe_pointer: usize, header: PeHeader, } fn parse_pe_header(s: &mut Stream) -> Result { s.skip::()?; // magic Ok(PeHeader { machine: s.read()?, number_of_sections: s.read()?, time_date_stamp: s.read()?, pointer_to_symbol_table: s.read()?, number_of_symbols: s.read()?, size_of_optional_header: s.read()?, characteristics: s.read()?, }) } pub fn parse(data: &[u8]) -> Result { let mut s = Stream::new_at(data, PE_POINTER_OFFSET, ByteOrder::LittleEndian)?; let pe_pointer = s.read::()? as usize; let mut s = Stream::new_at(data, pe_pointer, ByteOrder::LittleEndian)?; let header = parse_pe_header(&mut s)?; Ok(Pe { data, pe_pointer, header, }) } impl Pe<'_> { pub fn header(&self) -> PeHeader { self.header } pub fn section_with_name(&self, name: &str) -> Result, ParseError> { let callback = |section: Section| { section.name == name }; self.find_section(callback) } pub fn find_section bool>(&self, callback: F) -> Result, ParseError> { let mut sections_offset: usize = 0; // we use a manual loop instead of .sum() to check for overflow for i in &[self.pe_pointer, SIZEOF_PE_MAGIC, SIZEOF_COFF_HEADER, self.header.size_of_optional_header as usize] { sections_offset = sections_offset.checked_add(*i).ok_or(ParseError::MalformedInput)?; } let mut s = Stream::new_at(self.data, sections_offset, ByteOrder::LittleEndian)?; for i in 0..self.header.number_of_sections { let name = s.read_bytes(8)?; let virtual_size: u32 = s.read()?; s.skip::()?; // virtual_address let size_of_raw_data: u32 = s.read()?; let pointer_to_raw_data: u32 = s.read()?; s.skip_len(16)?; // other data let len = name.iter().position(|c| *c == 0).unwrap_or(8); // this slicing operation is infallible, but either clippy or rust-analyzer complain if I just slice let name_slice = name.get(0..len).ok_or(ParseError::MalformedInput)?; // ignore sections with non-UTF8 names since the spec says they must be UTF-8 if let Ok(name_str) = std::str::from_utf8(name_slice) { let section = Section { name: name_str, virtual_size, size_of_raw_data, pointer_to_raw_data, index: i.into(), }; if callback(section) { return Ok(Some(section)); } } } Ok(None) } // only used by cargo-bloat which operates on trusted data, // so it's not hardened against malicious inputs #[allow(clippy::integer_arithmetic)] #[allow(clippy::cast_possible_truncation)] #[allow(clippy::indexing_slicing)] pub fn symbols(&self) -> Result<(Vec, u64), ParseError> { let number_of_symbols = self.header.number_of_symbols as usize; let mut symbols = Vec::with_capacity(number_of_symbols); let text_section = self.section_with_name(".text")? .ok_or(ParseError::SymbolsSectionIsMissing)?; let text_section_size = text_section.size_of_raw_data; let text_section_index = text_section.index; // Add the .text section size, which will be used // to calculate the size of the last symbol. symbols.push(SymbolData { name: crate::demangle::SymbolName::demangle(".text"), address: text_section_size.into(), size: 0, }); let mut s = Stream::new_at(self.data, self.header.pointer_to_symbol_table as usize, ByteOrder::LittleEndian)?; let symbols_data = s.read_bytes(number_of_symbols * COFF_SYMBOL_SIZE)?; let string_table_offset = s.offset(); let mut s = Stream::new(symbols_data, ByteOrder::LittleEndian); while !s.at_end() { let name = s.read_bytes(8)?; let value: u32 = s.read()?; let section_number: i16 = s.read()?; let kind: u16 = s.read()?; let storage_class: u8 = s.read()?; let number_of_aux_symbols: u8 = s.read()?; s.skip_len(number_of_aux_symbols as usize * COFF_SYMBOL_SIZE)?; if (kind >> IMAGE_SYM_DTYPE_SHIFT) != IMAGE_SYM_DTYPE_FUNCTION { continue; } if storage_class != IMAGE_SYM_CLASS_EXTERNAL { continue; } // `section_number` starts from 1. if section_number - 1 != text_section_index as i16 { continue; } let name = if !name.starts_with(&[0, 0, 0, 0]) { let len = name.iter().position(|c| *c == 0).unwrap_or(8); std::str::from_utf8(&name[0..len]).ok() } else { let mut s2 = Stream::new(&name[4..], ByteOrder::LittleEndian); let name_offset: u32 = s2.read()?; parse_null_string(self.data, string_table_offset + name_offset as usize) }; if let Some(s) = name { symbols.push(SymbolData { name: crate::demangle::SymbolName::demangle(s), address: value.into(), size: 0, }); } } // To find symbol sizes, we have to sort them by address. symbols.sort_by_key(|v| v.address); // PE format doesn't store the symbols size, // so we have to calculate it by subtracting an address of the next symbol // from the current. for i in 1..symbols.len() { let curr = symbols[i].address; let next_sym = symbols[i..].iter().skip_while(|s| s.address == curr).next(); if let Some(next_sym) = next_sym { symbols[i].size = next_sym.address - curr; } } // Remove the last symbol, which is `.text` section size. symbols.pop(); Ok((symbols, text_section_size.into())) } }