llvm-bitcode-0.1.2/.cargo_vcs_info.json0000644000000001120000000000000134060ustar { "git": { "sha1": "ff7727c8428e692faf025cae64061b9a0b4d0659" } } llvm-bitcode-0.1.2/.github/workflows/CI.yml000064400000000000000000000025460000000000000166040ustar 00000000000000on: [push, pull_request] name: CI jobs: check: name: Check runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - uses: actions-rs/cargo@v1 with: command: check test: name: Test Suite runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - uses: actions-rs/cargo@v1 with: command: test - name: Run cargo-tarpaulin uses: actions-rs/tarpaulin@v0.1 with: args: --all if: matrix.os == 'ubuntu-latest' - name: Upload to codecov.io uses: codecov/codecov-action@v1 if: matrix.os == 'ubuntu-latest' fmt: name: Rustfmt runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - run: rustup component add rustfmt - uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check llvm-bitcode-0.1.2/.gitignore000064400000000000000000000000230000000000000141450ustar 00000000000000/target Cargo.lock llvm-bitcode-0.1.2/Cargo.toml0000644000000015260000000000000114160ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "llvm-bitcode" version = "0.1.2" authors = ["messense "] description = "LLVM Bitcode parser in Rust" readme = "README.md" keywords = ["llvm", "bitcode"] license = "MIT" repository = "https://github.com/messense/llvm-bitcode-rs.git" [dependencies.num_enum] version = "0.5.1" llvm-bitcode-0.1.2/Cargo.toml.orig000064400000000000000000000006440000000000000150550ustar 00000000000000[package] name = "llvm-bitcode" version = "0.1.2" authors = ["messense "] description = "LLVM Bitcode parser in Rust" edition = "2018" keywords = ["llvm", "bitcode"] readme = "README.md" license = "MIT" repository = "https://github.com/messense/llvm-bitcode-rs.git" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] num_enum = "0.5.1" llvm-bitcode-0.1.2/LICENSE000064400000000000000000000020510000000000000131650ustar 00000000000000MIT License Copyright (c) 2021 messense Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. llvm-bitcode-0.1.2/README.md000064400000000000000000000015660000000000000134510ustar 00000000000000# llvm-bitcode-rs [![GitHub Actions](https://github.com/messense/llvm-bitcode-rs/workflows/CI/badge.svg)](https://github.com/messense/llvm-bitcode-rs/actions?query=workflow%3ACI) [![codecov](https://codecov.io/gh/messense/llvm-bitcode-rs/branch/master/graph/badge.svg)](https://codecov.io/gh/messense/llvm-bitcode-rs) [![Crates.io](https://img.shields.io/crates/v/llvm-bitcode.svg)](https://crates.io/crates/llvm-bitcode) [![docs.rs](https://docs.rs/llvm-bitcode/badge.svg)](https://docs.rs/llvm-bitcode/) LLVM Bitcode parser in Rust ## Installation Add it to your ``Cargo.toml``: ```toml [dependencies] llvm-bitcode = "0.1" ``` then you are good to go. If you are using Rust 2015 you have to add ``extern crate llvm_bitcode`` to your crate root as well. ## License This work is released under the MIT license. A copy of the license is provided in the [LICENSE](./LICENSE) file.llvm-bitcode-0.1.2/src/bitcode.rs000064400000000000000000000125530000000000000147360ustar 00000000000000use std::collections::HashMap; use crate::bits::Bits; use crate::read::{BitStreamReader, Error}; use crate::visitor::{BitStreamVisitor, CollectingVisitor}; const LLVM_BITCODE_WRAPPER_MAGIC: u32 = 0x0B17C0DE; /// Represents the contents of a file encoded using the /// [LLVM bitstream container format](https://llvm.org/docs/BitCodeFormat.html#bitstream-container-format) #[derive(Debug, Clone)] pub struct Bitcode { pub signature: Signature, pub elements: Vec, pub block_info: HashMap, } /// Blocks in a bitstream denote nested regions of the stream, /// and are identified by a content-specific id number /// /// Block IDs 0-7 are reserved for [standard blocks](https://llvm.org/docs/BitCodeFormat.html#standard-blocks) /// whose meaning is defined by Bitcode; /// block IDs 8 and greater are application specific. #[derive(Debug, Clone)] pub struct Block { /// Block ID pub id: u64, /// Block elements pub elements: Vec, } #[derive(Debug, Clone)] pub enum Payload { Array(Vec), Char6String(String), Blob(Vec), } /// Data records consist of a record code and a number of (up to) 64-bit integer values /// /// The interpretation of the code and values is application specific and may vary between different block types. #[derive(Debug, Clone)] pub struct Record { /// Record code pub id: u64, /// An abbreviated record has a abbreviation id followed by a set of fields pub fields: Vec, /// Array and Blob encoding has payload pub payload: Option, } /// Bitcode element #[derive(Debug, Clone)] pub enum BitcodeElement { /// Block Block(Block), /// Data record Record(Record), } impl BitcodeElement { /// Returns true if it is a `Block` pub fn is_block(&self) -> bool { matches!(self, BitcodeElement::Block(_)) } /// If it is a `Block`, returns the associated block. Returns `None` otherwise. pub fn as_block(&self) -> Option<&Block> { match self { BitcodeElement::Block(block) => Some(block), BitcodeElement::Record(_) => None, } } /// If it is a `Block`, returns the associated mutable block. Returns `None` otherwise. pub fn as_block_mut(&mut self) -> Option<&mut Block> { match self { BitcodeElement::Block(block) => Some(block), BitcodeElement::Record(_) => None, } } /// Returns true if it is a `Record` pub fn is_record(&self) -> bool { matches!(self, BitcodeElement::Record(_)) } /// If it is a `Record`, returns the associated record. Returns `None` otherwise. pub fn as_record(&self) -> Option<&Record> { match self { BitcodeElement::Block(_) => None, BitcodeElement::Record(record) => Some(record), } } /// If it is a `Record`, returns the associated mutable record. Returns `None` otherwise. pub fn as_record_mut(&mut self) -> Option<&mut Record> { match self { BitcodeElement::Block(_) => None, BitcodeElement::Record(record) => Some(record), } } } /// Block information #[derive(Debug, Clone, Default)] pub struct BlockInfo { /// Block name pub name: String, /// Data record names pub record_names: HashMap, } /// aka. Magic number #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)] pub struct Signature(u32); impl Signature { pub fn new(val: u32) -> Self { Self(val) } pub fn into_inner(self) -> u32 { self.0 } } impl Bitcode { fn clean(data: &[u8]) -> (Signature, &[u8]) { assert!(data.len() > 4); let signature = Bits::new(data).read_bits(0, 32) as u32; if signature == LLVM_BITCODE_WRAPPER_MAGIC { // It is a LLVM Bitcode wrapper, remove wrapper header assert!(data.len() > 20); let offset = u32::from_le_bytes([data[8], data[9], data[10], data[11]]) as usize; let size = u32::from_le_bytes([data[12], data[13], data[14], data[15]]) as usize; let data = &data[offset..offset + size]; let signature = Bits::new(data).read_bits(0, 32) as u32; (Signature(signature), &data[4..]) } else { (Signature(signature), &data[4..]) } } /// Parse bitcode from bytes /// /// Accepts both LLVM bitcode and bitcode wrapper formats pub fn new(data: &[u8]) -> Result { let (signature, stream) = Self::clean(data); let mut reader = BitStreamReader::new(stream); let mut visitor = CollectingVisitor::new(); reader.read_block(BitStreamReader::TOP_LEVEL_BLOCK_ID, 2, &mut visitor)?; Ok(Self { signature, elements: visitor.finalize_top_level_elements(), block_info: reader.block_info, }) } /// Read bitcode from bytes with a visitor /// /// Accepts both LLVM bitcode and bitcode wrapper formats pub fn read(data: &[u8], visitor: &mut V) -> Result<(), Error> where V: BitStreamVisitor, { let (signature, stream) = Self::clean(data); if !visitor.validate(signature) { return Err(Error::InvalidSignature(signature.into_inner())); } let mut reader = BitStreamReader::new(stream); reader.read_block(BitStreamReader::TOP_LEVEL_BLOCK_ID, 2, visitor) } } llvm-bitcode-0.1.2/src/bits.rs000064400000000000000000000104530000000000000142630ustar 00000000000000use std::{error, fmt}; #[derive(Debug, Clone)] pub enum Error { BufferOverflow, VbrOverflow, } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Error::BufferOverflow => write!(f, "buffer overflow"), Error::VbrOverflow => write!(f, "vbr overflow"), } } } impl error::Error for Error {} #[derive(Debug, Clone)] pub struct Bits<'a> { buffer: &'a [u8], start_index: usize, end_index: usize, } impl<'a> Bits<'a> { pub fn new(buffer: &'a [u8]) -> Self { let end_index = buffer.len() * 8; Self { buffer, start_index: 0, end_index, } } pub fn read_bits(&self, offset: usize, count: usize) -> u64 { let upper_bound = offset.wrapping_add(count); assert!(count <= 64); assert!(upper_bound >= offset); assert!(upper_bound <= self.end_index); let top_byte_index = upper_bound >> 3; let mut res = 0; if upper_bound & 7 != 0 { let mask = (1u8 << (upper_bound & 7) as u8).wrapping_sub(1); res = u64::from(self.buffer[top_byte_index] & mask); } for i in ((offset >> 3)..(upper_bound >> 3)).rev() { res <<= 8; res |= u64::from(self.buffer[i]); } if offset & 7 != 0 { res >>= offset as u64 & 7; } res } pub fn len(&self) -> usize { self.end_index } } #[derive(Debug, Clone)] pub struct Cursor<'a> { buffer: Bits<'a>, offset: usize, } impl<'a> Cursor<'a> { pub fn new(buffer: Bits<'a>) -> Self { Self { buffer, offset: 0 } } pub fn is_at_start(&self) -> bool { self.offset == self.buffer.start_index } pub fn is_at_end(&self) -> bool { // TODO: verify this self.offset == self.buffer.len() } pub fn peek(&self, count: usize) -> Result { if self.buffer.len() - self.offset < count { return Err(Error::BufferOverflow); } Ok(self.buffer.read_bits(self.offset, count)) } pub fn read(&mut self, count: usize) -> Result { let res = self.peek(count)?; self.offset += count; Ok(res) } pub fn read_bytes(&mut self, count: usize) -> Result, Error> { assert_eq!(self.offset & 0b111, 0); let offset = self.offset.wrapping_add(count << 3); assert!(offset >= self.offset); if offset > self.buffer.len() { return Err(Error::BufferOverflow); } let bytes: Vec = self .buffer .buffer .iter() .skip(self.offset >> 3) .take((offset - self.offset) >> 3) .cloned() .collect(); self.offset = offset; Ok(bytes) } pub fn skip_bytes(&mut self, count: usize) -> Result<(), Error> { assert_eq!(self.offset & 0b111, 0); let offset = self.offset.wrapping_add(count << 3); assert!(offset >= self.offset); if offset > self.buffer.len() { return Err(Error::BufferOverflow); } self.offset = offset; Ok(()) } pub fn read_vbr(&mut self, width: usize) -> Result { assert!(width > 1); let test_bit = (1 << width.wrapping_sub(1)) as u64; let mask = test_bit.wrapping_sub(1); let mut res = 0; let mut offset = 0; let mut next; loop { next = self.read(width)?; res |= (next & mask) << offset; offset += width.wrapping_sub(1); if offset > 64 { return Err(Error::VbrOverflow); } if next & test_bit == 0 { break; } } Ok(res) } pub fn advance(&mut self, align: usize) -> Result<(), Error> { assert!(self.offset.wrapping_add(align.wrapping_sub(1)) >= self.offset); assert_eq!(align & align.wrapping_sub(1), 0); if self.offset % align == 0 { return Ok(()); } let offset = (self.offset.wrapping_add(align)) & !(align.wrapping_sub(1)); if offset > self.buffer.len() { return Err(Error::BufferOverflow); } self.offset = offset; Ok(()) } } llvm-bitcode-0.1.2/src/bitstream.rs000064400000000000000000000065000000000000000153120ustar 00000000000000use num_enum::TryFromPrimitive; /// An `Abbreviation` represents the encoding definition for a user-defined /// record. An `Abbreviation` is the primary form of compression available in /// a bitstream file. #[derive(Debug, Clone)] pub struct Abbreviation { /// Abbreviation operands pub operands: Vec, } /// Abbreviation operand #[derive(Debug, Clone)] pub enum Operand { /// A literal value (emitted as a VBR8 field) Literal(u64), /// A fixed-width field Fixed(u8), /// A VBR-encoded value with the provided chunk width Vbr(u8), /// An array of values. This expects another operand encoded /// directly after indicating the element type. /// The array will begin with a vbr6 value indicating the length of /// the following array. Array(Box), /// A char6-encoded ASCII character Char6, /// Emitted as a vbr6 value, padded to a 32-bit boundary and then /// an array of 8-bit objects Blob, } impl Operand { /// Whether this case is payload pub fn is_payload(&self) -> bool { use Operand::*; match self { Array(_) | Blob => true, Literal(_) | Fixed(_) | Vbr(_) | Char6 => false, } } /// Whether this case is the `literal` case pub fn is_literal(&self) -> bool { matches!(self, Operand::Literal(_)) } pub fn is_array(&self) -> bool { matches!(self, Operand::Array(_)) } pub fn is_blob(&self) -> bool { matches!(self, Operand::Blob) } /// The llvm::BitCodeAbbrevOp::Encoding value this /// enum case represents. /// - note: Must match the encoding in /// http://llvm.org/docs/BitCodeFormat.html#define-abbrev-encoding pub fn encoded_kind(&self) -> u8 { use Operand::*; match self { Literal(_) => 0, Fixed(_) => 1, Vbr(_) => 2, Array(_) => 3, Char6 => 4, Blob => 5, } } } /// A `BlockInfoCode` enumerates the bits that occur in the metadata for /// a block or record. Of these bits, only `SetBid` is required. If /// a name is given to a block or record with `BlockName` or /// `SetRecordName`, debugging tools like `llvm-bcanalyzer` can be used to /// introspect the structure of blocks and records in the bitstream file. #[derive(Debug, Clone, Copy, TryFromPrimitive)] #[repr(u8)] pub enum BlockInfoCode { /// Indicates which block ID is being described. SetBid = 1, /// An optional element that records which bytes of the record are the /// name of the block. BlockName = 2, /// An optional element that records the record ID number and the bytes /// for the name of the corresponding record. SetRecordName = 3, } /// An abbreviation id is a fixed-width field that occurs at the start of /// abbreviated data records and inside block definitions. /// /// Bitstream reserves 4 special abbreviation IDs for its own bookkeeping. #[derive(Debug, Clone, Copy, TryFromPrimitive)] #[repr(u64)] pub enum BuiltinAbbreviationId { /// Marks the end of the current block. EndBlock = 0, /// Marks the beginning of a new block. EnterSubBlock = 1, /// Marks the definition of a new abbreviation. DefineAbbreviation = 2, /// Marks the definition of a new unabbreviated record. UnabbreviatedRecord = 3, } llvm-bitcode-0.1.2/src/lib.rs000064400000000000000000000004720000000000000140700ustar 00000000000000//! LLVM Bitcode parser in Rust /// Bitcode definitions pub mod bitcode; mod bits; /// Bitstream definitions pub mod bitstream; /// Bitstream reader pub mod read; /// Bitstream visitor pub mod visitor; pub use self::bitcode::Bitcode; pub use self::read::BitStreamReader; pub use self::visitor::BitStreamVisitor; llvm-bitcode-0.1.2/src/read.rs000064400000000000000000000353700000000000000142420ustar 00000000000000use std::{collections::HashMap, convert::TryFrom, error, fmt, mem}; use crate::bitcode::{BlockInfo, Payload, Record, Signature}; use crate::bits::{self, Bits, Cursor}; use crate::bitstream::{Abbreviation, BlockInfoCode, BuiltinAbbreviationId, Operand}; use crate::visitor::BitStreamVisitor; /// Bitstream reader errors #[derive(Debug, Clone)] pub enum Error { InvalidSignature(u32), InvalidAbbrev, NestedBlockInBlockInfo, MissingSetBid, InvalidBlockInfoRecord(u64), AbbrevWidthTooSmall(usize), NoSuchAbbrev { block_id: u64, abbrev_id: usize }, MissingEndBlock(u64), ReadBits(bits::Error), } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Error::InvalidSignature(sig) => { write!(f, "invalid signature (magic number): 0x{:x}", sig) } Error::InvalidAbbrev => write!(f, "invalid abbreviation"), Error::NestedBlockInBlockInfo => write!(f, "nested block in block info"), Error::MissingSetBid => write!(f, "missing SETBID"), Error::InvalidBlockInfoRecord(record_id) => { write!(f, "invalid block info record `{}`", record_id) } Error::AbbrevWidthTooSmall(width) => { write!(f, "abbreviation width `{}` is too small", width) } Error::NoSuchAbbrev { block_id, abbrev_id, } => write!( f, "no such abbreviation `{}` in block `{}`", abbrev_id, block_id ), Error::MissingEndBlock(block_id) => write!(f, "missing end block for `{}`", block_id), Error::ReadBits(err) => err.fmt(f), } } } impl error::Error for Error {} impl From for Error { fn from(err: bits::Error) -> Self { Self::ReadBits(err) } } /// Bitstream reader #[derive(Debug, Clone)] pub struct BitStreamReader<'a> { cursor: Cursor<'a>, /// Block information pub(crate) block_info: HashMap, global_abbrevs: HashMap>, } impl<'a> BitStreamReader<'a> { /// Top level fake block ID pub const TOP_LEVEL_BLOCK_ID: u64 = u64::MAX; /// Create a new reader from bytes pub fn new(buffer: &'a [u8]) -> Self { let cursor = Cursor::new(Bits::new(buffer)); Self { cursor, block_info: HashMap::new(), global_abbrevs: HashMap::new(), } } /// Read signature, aka. Magic Number pub fn read_signature(&mut self) -> Result { assert!(self.cursor.is_at_start()); let bits = self.cursor.read(mem::size_of::() * 8)? as u32; Ok(Signature::new(bits)) } /// Read abbreviated operand pub fn read_abbrev_op(&mut self) -> Result { let is_literal = self.cursor.read(1)?; if is_literal == 1 { return Ok(Operand::Literal(self.cursor.read_vbr(8)?)); } let op_type = self.cursor.read(3)?; let op = match op_type { 1 => Operand::Fixed(self.cursor.read_vbr(5)? as u8), 2 => Operand::Vbr(self.cursor.read_vbr(5)? as u8), 3 => Operand::Array(Box::new(self.read_abbrev_op()?)), 4 => Operand::Char6, 5 => Operand::Blob, _ => return Err(Error::InvalidAbbrev), }; Ok(op) } /// Read abbreviation pub fn read_abbrev(&mut self, num_ops: usize) -> Result { if num_ops == 0 { return Err(Error::InvalidAbbrev); } let mut operands = Vec::new(); for i in 0..num_ops { let op = self.read_abbrev_op()?; let is_array = op.is_array(); let is_blob = op.is_blob(); operands.push(op); if is_array { if i == num_ops - 2 { break; } else { return Err(Error::InvalidAbbrev); } } else if is_blob { if i != num_ops - 1 { return Err(Error::InvalidAbbrev); } } } Ok(Abbreviation { operands }) } fn read_single_abbreviated_record_operand(&mut self, operand: &Operand) -> Result { match operand { Operand::Char6 => { let value = self.cursor.read(6)?; return match value { 0..=25 => Ok(value + u64::from('a' as u32)), 26..=51 => Ok(value + u64::from('A' as u32) - 26), 52..=61 => Ok(value + u64::from('0' as u32) - 52), 62 => Ok(u64::from('.' as u32)), 63 => Ok(u64::from('_' as u32)), _ => Err(Error::InvalidAbbrev), }; } Operand::Literal(value) => Ok(*value), Operand::Fixed(width) => Ok(self.cursor.read(*width as usize)?), Operand::Vbr(width) => Ok(self.cursor.read_vbr(*width as usize)?), Operand::Array(_) | Operand::Blob => Err(Error::InvalidAbbrev), } } /// Read abbreviated data record pub fn read_abbreviated_record(&mut self, abbrev: &Abbreviation) -> Result { let code = self.read_single_abbreviated_record_operand(&abbrev.operands.first().unwrap())?; let last_operand = abbrev.operands.last().unwrap(); let last_regular_operand_index = abbrev.operands.len() - (if last_operand.is_payload() { 1 } else { 0 }); let mut fields = Vec::new(); for op in &abbrev.operands[1..last_regular_operand_index] { fields.push(self.read_single_abbreviated_record_operand(op)?); } let payload = if last_operand.is_payload() { match last_operand { Operand::Array(element) => { let length = self.cursor.read_vbr(6)? as usize; let mut elements = Vec::with_capacity(length); for _ in 0..length { elements.push(self.read_single_abbreviated_record_operand(element)?); } if matches!(**element, Operand::Char6) { let s: String = elements .into_iter() .map(|x| std::char::from_u32(x as u32).unwrap()) .collect(); Some(Payload::Char6String(s)) } else { Some(Payload::Array(elements)) } } Operand::Blob => { let length = self.cursor.read_vbr(6)? as usize; self.cursor.advance(32)?; let data = self.cursor.read_bytes(length)?; self.cursor.advance(32)?; Some(Payload::Blob(data)) } _ => unreachable!(), } } else { None }; Ok(Record { id: code, fields, payload, }) } /// Read block info block pub fn read_block_info_block(&mut self, abbrev_width: usize) -> Result<(), Error> { use BuiltinAbbreviationId::*; let mut current_block_id = None; loop { let abbrev_id = self.cursor.read(abbrev_width)?; match BuiltinAbbreviationId::try_from(abbrev_id).map_err(|_| Error::NoSuchAbbrev { block_id: 0, abbrev_id: abbrev_id as usize, })? { EndBlock => { self.cursor.advance(32)?; return Ok(()); } EnterSubBlock => { return Err(Error::NestedBlockInBlockInfo); } DefineAbbreviation => { if let Some(block_id) = current_block_id { let num_ops = self.cursor.read_vbr(5)? as usize; let abbrev = self.read_abbrev(num_ops)?; let abbrevs = self .global_abbrevs .entry(block_id) .or_insert_with(|| Vec::new()); abbrevs.push(abbrev); } else { return Err(Error::MissingSetBid); } } UnabbreviatedRecord => { let code = self.cursor.read_vbr(6)?; let num_ops = self.cursor.read_vbr(6)? as usize; let mut operands = Vec::with_capacity(num_ops); for _ in 0..num_ops { operands.push(self.cursor.read_vbr(6)?); } match BlockInfoCode::try_from( u8::try_from(code).map_err(|_| Error::InvalidBlockInfoRecord(code))?, ) .map_err(|_| Error::InvalidBlockInfoRecord(code))? { BlockInfoCode::SetBid => { if operands.len() != 1 { return Err(Error::InvalidBlockInfoRecord(code)); } current_block_id = operands.first().cloned(); } BlockInfoCode::BlockName => { if let Some(block_id) = current_block_id { let block_info = self .block_info .entry(block_id) .or_insert_with(|| BlockInfo::default()); let name = String::from_utf8( operands.into_iter().map(|x| x as u8).collect::>(), ) .unwrap_or_else(|_| "".to_string()); block_info.name = name; } else { return Err(Error::MissingSetBid); } } BlockInfoCode::SetRecordName => { if let Some(block_id) = current_block_id { if let Some(record_id) = operands.first().cloned() { let block_info = self .block_info .entry(block_id) .or_insert_with(|| BlockInfo::default()); let name = String::from_utf8( operands .into_iter() .skip(1) .map(|x| x as u8) .collect::>(), ) .unwrap_or_else(|_| "".to_string()); block_info.record_names.insert(record_id, name); } else { return Err(Error::InvalidBlockInfoRecord(code)); } } else { return Err(Error::MissingSetBid); } } } } } } } /// Read block with visitor pub fn read_block( &mut self, id: u64, abbrev_width: usize, visitor: &mut V, ) -> Result<(), Error> { use BuiltinAbbreviationId::*; while !self.cursor.is_at_end() { let abbrev_id = self.cursor.read(abbrev_width)?; match BuiltinAbbreviationId::try_from(abbrev_id) { Ok(abbrev_id) => match abbrev_id { EndBlock => { self.cursor.advance(32)?; visitor.did_exit_block(); return Ok(()); } EnterSubBlock => { let block_id = self.cursor.read_vbr(8)?; let new_abbrev_width = self.cursor.read_vbr(4)? as usize; self.cursor.advance(32)?; let block_length = self.cursor.read(32)? as usize * 4; match block_id { 0 => self.read_block_info_block(new_abbrev_width)?, _ => { if !visitor.should_enter_block(block_id) { self.cursor.skip_bytes(block_length)?; break; } self.read_block(block_id, new_abbrev_width, visitor)?; } } } DefineAbbreviation => { let num_ops = self.cursor.read_vbr(5)? as usize; let abbrev = self.read_abbrev(num_ops)?; let abbrev_info = self.global_abbrevs.entry(id).or_insert_with(|| Vec::new()); abbrev_info.push(abbrev); } UnabbreviatedRecord => { let code = self.cursor.read_vbr(6)?; let num_ops = self.cursor.read_vbr(6)? as usize; let mut operands = Vec::with_capacity(num_ops); for _ in 0..num_ops { operands.push(self.cursor.read_vbr(6)?); } visitor.visit(Record { id: code, fields: operands, payload: None, }); } }, Err(_) => { if let Some(abbrev_info) = self.global_abbrevs.get(&id).cloned() { let abbrev_id = abbrev_id as usize; if abbrev_id - 4 < abbrev_info.len() { visitor .visit(self.read_abbreviated_record(&abbrev_info[abbrev_id - 4])?); continue; } } return Err(Error::NoSuchAbbrev { block_id: id, abbrev_id: abbrev_id as usize, }); } } } if id != Self::TOP_LEVEL_BLOCK_ID { return Err(Error::MissingEndBlock(id)); } Ok(()) } } llvm-bitcode-0.1.2/src/visitor.rs000064400000000000000000000033110000000000000150140ustar 00000000000000use crate::bitcode::{BitcodeElement, Block, Record, Signature}; use crate::BitStreamReader; /// A visitor which receives callbacks while reading a bitstream. pub trait BitStreamVisitor { /// Validate a bitstream's signature or "magic number". fn validate(&self, _signature: Signature) -> bool { true } /// Called when a new block is encountered. Return `true` to enter the block /// and read its contents, or `false` to skip it. fn should_enter_block(&mut self, id: u64) -> bool; /// Called when a block is exited. fn did_exit_block(&mut self); /// Called whenever a record is encountered. fn visit(&mut self, record: Record); } /// A basic visitor that collects all the blocks and records in a stream. pub struct CollectingVisitor { stack: Vec<(u64, Vec)>, } impl CollectingVisitor { pub fn new() -> Self { Self { stack: vec![(BitStreamReader::TOP_LEVEL_BLOCK_ID, Vec::new())], } } pub fn finalize_top_level_elements(mut self) -> Vec { assert_eq!(self.stack.len(), 1); self.stack.pop().unwrap().1 } } impl BitStreamVisitor for CollectingVisitor { fn should_enter_block(&mut self, id: u64) -> bool { self.stack.push((id, Vec::new())); true } fn did_exit_block(&mut self) { if let Some((id, elements)) = self.stack.pop() { let block = Block { id, elements }; let last = self.stack.last_mut().unwrap(); last.1.push(BitcodeElement::Block(block)); } } fn visit(&mut self, record: Record) { let last = self.stack.last_mut().unwrap(); last.1.push(BitcodeElement::Record(record)); } } llvm-bitcode-0.1.2/tests/fixtures/serialized.dia000064400000000000000000000041140000000000000200070ustar 00000000000000DIAG 0 d @ laaAH (!aC"B:9;;lHYqpps( H1 @P!!aCbB:C9B8C9l0SH|HzX$Rd( B(AE T(B(D @!@2 D ZD @!J( A!  %-9/Users/owenvoorhees/Documents/Development/Blink for Mac/Blink for Mac/ItemDetailViewController.swiftj8@@ expected member name following '.'%j8``'default' label can only appear inside a 'switch' statement% expected '}' at end of brace statement% expected declaration%-(8/Users/owenvoorhees/Documents/Development/Blink for Mac/Blink for Mac/StoreSearchCoordinator.swift4*@@ expected member name following '.'%4*expected ',' separator)@@E,%4*@@ expected expression in list of expressions%4*expected ',' separator)@@E,%)8-/Users/owenvoorhees/Documents/Development/Blink for Mac/Blink for Mac/Defaults.swiftL8@@ expected member name following '.'%L8``'default' label can only appear inside a 'switch' statement% expected '}' at end of brace statement% expected declaration%-H@@//Users/owenvoorhees/Documents/Development/Blink for Mac/Blink for Mac/NetworkImageCache.swift4\ expected identifier after '.' expression%4\expected ',' separatorI .,%4\@@ expected expression in list of expressions%4\expected ',' separatorI .,%.X*/Users/owenvoorhees/Documents/Development/Blink for Mac/Observable.swiftD use of unresolved identifier 'DispatchQueue'V@@"llvm-bitcode-0.1.2/tests/fixtures/simple.bc000064400000000000000000000044600000000000000170000ustar 00000000000000  BC5b 0$mӴOO QL!  !#AI29 %b EB Bd28K 22Hp!#DAd CF 22*(*1|\ 2" d!!ㄡL dLs`PPRQ'p (@€wy(qvȇ6ww r 6 tt rhyy60xhvz@A!ܡ aa`AΡ !aaAءڀ!ء !؁0p`y(0yv(6wHwr ؁a Aaaʡ !0p`y(htp6`r8p`6rzxyh{Hrt   ڠ0p`y(pwhuv6pv(y!a  ڠ0p`y(pwhyHspr 6Їrw60xhvz@ޡ A!ڠ0p`y(yxq(6zqX~aa~A! a֡ Aށa ġA`Aps(zhqzAAΡ ̡Aa5|;`5|6Xsy6Xvh|`5p@s`5y(6Xyr6Xyr6Xyr76Xyy(9`5<wp0rhs6hpt!a aʡ ́svrI2|;;6xv(6hpw|8p8p8`  wz0rs mrpqs z0r w z`tv@mv@z`tpq xzvs z`tr! yIJ80$6u2 L &GCBPP3f=C8ÌByxsq 3 BΡf0=C8=C==xtp{yHppzpvxp 0n0P3!!af0;;C9<<;v`{h7hrh7pp`v(vvxw_qry,0bȡ̡a!āa֐C9C9C9C98C8;/<;; ipXrpthx`ttSP@ P3 (A!܁fQ8C:;P$v`{h7`wxxQLP3ja!~!aT8;C=C9 false, BitcodeElement::Block(block) => block.id == 8, }) .unwrap(); let target_triple_record = module_block .as_block() .unwrap() .elements .iter() .find(|ele| match ele { BitcodeElement::Record(record) => record.id == 2, BitcodeElement::Block(_) => false, }) .unwrap() .as_record() .unwrap(); let fields: Vec = target_triple_record .fields .iter() .map(|x| *x as u8) .collect(); let target_triple = std::str::from_utf8(&fields).unwrap(); assert_eq!(target_triple, "x86_64-apple-macosx11.0.0"); } #[test] fn test_bitstream_reader() { struct LoggingVisitor(Vec); impl BitStreamVisitor for LoggingVisitor { fn should_enter_block(&mut self, id: u64) -> bool { self.0.push(format!("entering block: {}", id)); true } fn did_exit_block(&mut self) { self.0.push("exiting block".to_string()); } fn visit(&mut self, record: Record) { let payload = if let Some(payload) = &record.payload { match payload { Payload::Array(ele) => format!("array({} elements)", ele.len()), Payload::Blob(blob) => format!("blob({} bytes)", blob.len()), Payload::Char6String(s) => s.to_string(), } } else { "none".to_string() }; self.0.push(format!( "Record (id: {}, fields: {:?}, payload: {}", record.id, record.fields, payload )); } } let data = fs::read("tests/fixtures/serialized.dia").unwrap(); let mut visitor = LoggingVisitor(Vec::new()); Bitcode::read(&data, &mut visitor).unwrap(); assert_eq!( visitor.0, &[ "entering block: 8", "Record (id: 1, fields: [1], payload: none", "exiting block", "entering block: 9", "Record (id: 6, fields: [1, 0, 0, 100], payload: blob(100 bytes)", "Record (id: 2, fields: [3, 1, 53, 28, 0, 0, 0, 34], payload: blob(34 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 1, 53, 28, 0, 0, 0, 59], payload: blob(59 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 1, 113, 1, 0, 0, 0, 38], payload: blob(38 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 1, 113, 1, 0, 0, 0, 20], payload: blob(20 bytes)", "exiting block", "entering block: 9", "Record (id: 6, fields: [2, 0, 0, 98], payload: blob(98 bytes)", "Record (id: 2, fields: [3, 2, 21, 69, 0, 0, 0, 34], payload: blob(34 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 2, 21, 69, 0, 0, 0, 22], payload: blob(22 bytes)", "Record (id: 7, fields: [2, 21, 69, 0, 2, 21, 69, 0, 1], payload: blob(1 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 2, 21, 69, 0, 0, 0, 42], payload: blob(42 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 2, 21, 69, 0, 0, 0, 22], payload: blob(22 bytes)", "Record (id: 7, fields: [2, 21, 69, 0, 2, 21, 69, 0, 1], payload: blob(1 bytes)", "exiting block", "entering block: 9", "Record (id: 6, fields: [3, 0, 0, 84], payload: blob(84 bytes)", "Record (id: 2, fields: [3, 3, 38, 28, 0, 0, 0, 34], payload: blob(34 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 3, 38, 28, 0, 0, 0, 59], payload: blob(59 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 3, 66, 1, 0, 0, 0, 38], payload: blob(38 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 3, 66, 1, 0, 0, 0, 20], payload: blob(20 bytes)", "exiting block", "entering block: 9", "Record (id: 6, fields: [4, 0, 0, 93], payload: blob(93 bytes)", "Record (id: 2, fields: [3, 4, 15, 46, 0, 0, 0, 40], payload: blob(40 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 4, 15, 46, 0, 0, 0, 22], payload: blob(22 bytes)", "Record (id: 7, fields: [4, 15, 46, 0, 4, 15, 46, 0, 1], payload: blob(1 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 4, 15, 46, 0, 0, 0, 42], payload: blob(42 bytes)", "exiting block", "entering block: 9", "Record (id: 2, fields: [3, 4, 15, 46, 0, 0, 0, 22], payload: blob(22 bytes)", "Record (id: 7, fields: [4, 15, 46, 0, 4, 15, 46, 0, 1], payload: blob(1 bytes)", "exiting block", "entering block: 9", "Record (id: 6, fields: [5, 0, 0, 72], payload: blob(72 bytes)", "Record (id: 2, fields: [3, 5, 34, 13, 0, 0, 0, 44], payload: blob(44 bytes)", "Record (id: 3, fields: [5, 34, 13, 0, 5, 34, 26, 0], payload: none", "exiting block" ] ) }