pdb-0.8.0/.cargo_vcs_info.json0000644000000001360000000000100116060ustar { "git": { "sha1": "b052964e09d03eb190c8a60dc76344150ff8a9df" }, "path_in_vcs": "" }pdb-0.8.0/.github/workflows/ci.yml000064400000000000000000000025300072674642500151410ustar 00000000000000name: CI on: push: branches: [master] pull_request: branches: [master] env: CARGO_TERM_COLOR: always jobs: lint: name: Lint runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 with: toolchain: stable profile: minimal components: clippy, rustfmt, rust-docs override: true - uses: swatinem/rust-cache@v1 - name: Run Rustfmt uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check - name: Run Clippy uses: actions-rs/cargo@v1 with: command: clippy args: --all-targets --all-features -- -D warnings - name: Rust Doc Comments uses: actions-rs/cargo@v1 env: RUSTDOCFLAGS: -Dwarnings with: command: doc args: --no-deps --document-private-items test: name: Test runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 with: toolchain: stable profile: minimal override: true - uses: swatinem/rust-cache@v1 - name: Download Fixtures run: scripts/download - name: Run Cargo Tests uses: actions-rs/cargo@v1 with: command: test pdb-0.8.0/.gitignore000064400000000000000000000000550072674642500124160ustar 00000000000000target *.iml .idea fixtures/symbol_server/* pdb-0.8.0/Cargo.lock0000644000000023120000000000100075570ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "fallible-iterator" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" [[package]] name = "getopts" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" dependencies = [ "unicode-width", ] [[package]] name = "pdb" version = "0.8.0" dependencies = [ "fallible-iterator", "getopts", "scroll", "uuid", ] [[package]] name = "scroll" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" [[package]] name = "unicode-width" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" [[package]] name = "uuid" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8cfcd319456c4d6ea10087ed423473267e1a071f3bc0aa89f80d60997843c6f0" pdb-0.8.0/Cargo.toml0000644000000022470000000000100076110ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "pdb" version = "0.8.0" authors = [ "Jan Michael Auer ", "Will Glynn ", ] exclude = [ "fixtures/*", "scripts/*", ] description = "A parser for Microsoft PDB (Program Database) debugging information" readme = "README.md" license = "MIT OR Apache-2.0" repository = "https://github.com/willglynn/pdb" [package.metadata.release] pre-release-commit-message = "Release {{version}}" tag-name = "{{version}}" tag-message = "Release {{version}}" dev-version = false [dependencies.fallible-iterator] version = "0.2.0" [dependencies.scroll] version = "0.11.0" [dependencies.uuid] version = "1.0.0" [dev-dependencies.getopts] version = "0.2.21" pdb-0.8.0/Cargo.toml.orig000064400000000000000000000012310072674642500133120ustar 00000000000000[package] name = "pdb" version = "0.8.0" description = "A parser for Microsoft PDB (Program Database) debugging information" repository = "https://github.com/willglynn/pdb" authors = ["Jan Michael Auer ", "Will Glynn "] readme = "README.md" license = "MIT OR Apache-2.0" edition = "2018" exclude = [ "fixtures/*", "scripts/*", ] [dependencies] fallible-iterator = "0.2.0" scroll = "0.11.0" uuid = "1.0.0" [dev-dependencies] # for examples/ getopts = "0.2.21" [package.metadata.release] pre-release-commit-message = "Release {{version}}" tag-name = "{{version}}" tag-message = "Release {{version}}" dev-version = false pdb-0.8.0/LICENSE-APACHE000064400000000000000000000251370072674642500123620ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. pdb-0.8.0/LICENSE-MIT000064400000000000000000000020420072674642500120600ustar 00000000000000Copyright (c) 2017 pdb Developers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pdb-0.8.0/README.md000064400000000000000000000067030072674642500117130ustar 00000000000000`pdb` === [![](https://img.shields.io/crates/v/pdb.svg)](https://crates.io/crates/pdb) [![](https://docs.rs/pdb/badge.svg)](https://docs.rs/pdb/) ![Build Status](https://github.com/willglynn/pdb/actions/workflows/ci.yml/badge.svg) This is a Rust library that parses Microsoft PDB (Program Database) files. These files contain debugging information produced by most compilers that target Windows, including information about symbols, types, modules, and so on. The PDB format is not documented per sé, but Microsoft has [published information](https://github.com/Microsoft/microsoft-pdb) in the form of C++ code relating to its use. The PDB format is full of... history, including support for debugging 16-bit executables, COBOL user-defined types, and myriad other features. `pdb` does not understand everything about the PDB format, but it does cover enough to be useful for typical programs compiled today. [Documentation on docs.rs](https://docs.rs/pdb/). Design --- `pdb`'s design objectives are similar to [`gimli`](https://github.com/gimli-rs/gimli): * `pdb` works with the original data as it's formatted on-disk as long as possible. * `pdb` parses only what you ask. * `pdb` can read PDBs anywhere. There's no dependency on Windows, on the [DIA SDK](https://msdn.microsoft.com/en-us/library/x93ctkx8.aspx), or on the target's native byte ordering. Usage Example --- ```rust use pdb::FallibleIterator; use std::fs::File; fn main() -> pdb::Result<()> { let file = File::open("fixtures/self/foo.pdb")?; let mut pdb = pdb::PDB::open(file)?; let symbol_table = pdb.global_symbols()?; let address_map = pdb.address_map()?; let mut symbols = symbol_table.iter(); while let Some(symbol) = symbols.next()? { match symbol.parse() { Ok(pdb::SymbolData::Public(data)) if data.function => { // we found the location of a function! let rva = data.offset.to_rva(&address_map).unwrap_or_default println!("{} is {}", rva, data.name); } _ => {} } } Ok(()) } ``` Example Programs --- Run with `cargo run --release --example `: * [`pdb_symbols`](examples/pdb_symbols.rs) is a toy program that prints the name and location of every function and data value defined in the symbol table. * [`pdb2hpp`](examples/pdb2hpp.rs) is a somewhat larger program that prints an approximation of a C++ header file for a requested type given only a PDB. * [`pdb_lines`](examples/pdb_lines.rs) outputs line number information for every symbol in every module contained in a PDB. Real-world examples: * [`mstange/pdb-addr2line`](https://github.com/mstange/pdb-addr2line) resolves addresses to function names, and to file name and line number information, with the help of a PDB file. Inline stacks are supported. * [`getsentry/symbolic`](https://github.com/getsentry/symbolic) is a high-level symbolication library supporting most common debug file formats, demangling, and more. License --- Licensed under either of * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your option. Contribution --- Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. pdb-0.8.0/examples/pdb2hpp.rs000064400000000000000000000471640072674642500141650ustar 00000000000000use std::collections::BTreeSet; use std::fmt; use pdb::FallibleIterator; type TypeSet = BTreeSet; pub fn type_name<'p>( type_finder: &pdb::TypeFinder<'p>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result { let mut name = match type_finder.find(type_index)?.parse()? { pdb::TypeData::Primitive(data) => { let mut name = match data.kind { pdb::PrimitiveKind::Void => "void".to_string(), pdb::PrimitiveKind::Char => "char".to_string(), pdb::PrimitiveKind::UChar => "unsigned char".to_string(), pdb::PrimitiveKind::I8 => "int8_t".to_string(), pdb::PrimitiveKind::U8 => "uint8_t".to_string(), pdb::PrimitiveKind::I16 => "int16_t".to_string(), pdb::PrimitiveKind::U16 => "uint16_t".to_string(), pdb::PrimitiveKind::I32 => "int32_t".to_string(), pdb::PrimitiveKind::U32 => "uint32_t".to_string(), pdb::PrimitiveKind::I64 => "int64_t".to_string(), pdb::PrimitiveKind::U64 => "uint64_t".to_string(), pdb::PrimitiveKind::F32 => "float".to_string(), pdb::PrimitiveKind::F64 => "double".to_string(), pdb::PrimitiveKind::Bool8 => "bool".to_string(), _ => format!("unhandled_primitive.kind /* {:?} */", data.kind), }; if data.indirection.is_some() { name.push_str(" *"); } name } pdb::TypeData::Class(data) => { needed_types.insert(type_index); data.name.to_string().into_owned() } pdb::TypeData::Enumeration(data) => { needed_types.insert(type_index); data.name.to_string().into_owned() } pdb::TypeData::Union(data) => { needed_types.insert(type_index); data.name.to_string().into_owned() } pdb::TypeData::Pointer(data) => format!( "{}*", type_name(type_finder, data.underlying_type, needed_types)? ), pdb::TypeData::Modifier(data) => { if data.constant { format!( "const {}", type_name(type_finder, data.underlying_type, needed_types)? ) } else if data.volatile { format!( "volatile {}", type_name(type_finder, data.underlying_type, needed_types)? ) } else { // ? type_name(type_finder, data.underlying_type, needed_types)? } } pdb::TypeData::Array(data) => { let mut name = type_name(type_finder, data.element_type, needed_types)?; for size in data.dimensions { name = format!("{}[{}]", name, size); } name } _ => format!("Type{} /* TODO: figure out how to name it */", type_index), }; // TODO: search and replace std:: patterns if name == "std::basic_string,std::allocator >" { name = "std::string".to_string(); } Ok(name) } #[derive(Debug, Clone, PartialEq, Eq)] struct Class<'p> { kind: pdb::ClassKind, name: pdb::RawString<'p>, base_classes: Vec, fields: Vec>, instance_methods: Vec>, static_methods: Vec>, } impl<'p> Class<'p> { #[allow(clippy::unnecessary_wraps)] fn add_derived_from( &mut self, _: &pdb::TypeFinder<'p>, _: pdb::TypeIndex, _: &mut TypeSet, ) -> pdb::Result<()> { // TODO Ok(()) } fn add_fields( &mut self, type_finder: &pdb::TypeFinder<'p>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result<()> { match type_finder.find(type_index)?.parse()? { pdb::TypeData::FieldList(data) => { for field in &data.fields { self.add_field(type_finder, field, needed_types)?; } if let Some(continuation) = data.continuation { // recurse self.add_fields(type_finder, continuation, needed_types)?; } } other => { println!( "trying to Class::add_fields() got {} -> {:?}", type_index, other ); panic!("unexpected type in Class::add_fields()"); } } Ok(()) } fn add_field( &mut self, type_finder: &pdb::TypeFinder<'p>, field: &pdb::TypeData<'p>, needed_types: &mut TypeSet, ) -> pdb::Result<()> { match *field { pdb::TypeData::Member(ref data) => { // TODO: attributes (static, virtual, etc.) self.fields.push(Field { type_name: type_name(type_finder, data.field_type, needed_types)?, name: data.name, offset: data.offset, }); } pdb::TypeData::Method(ref data) => { let method = Method::find( data.name, data.attributes, type_finder, data.method_type, needed_types, )?; if data.attributes.is_static() { self.static_methods.push(method); } else { self.instance_methods.push(method); } } pdb::TypeData::OverloadedMethod(ref data) => { // this just means we have more than one method with the same name // find the method list match type_finder.find(data.method_list)?.parse()? { pdb::TypeData::MethodList(method_list) => { for pdb::MethodListEntry { attributes, method_type, .. } in method_list.methods { // hooray let method = Method::find( data.name, attributes, type_finder, method_type, needed_types, )?; if attributes.is_static() { self.static_methods.push(method); } else { self.instance_methods.push(method); } } } other => { println!( "processing OverloadedMethod, expected MethodList, got {} -> {:?}", data.method_list, other ); panic!("unexpected type in Class::add_field()"); } } } pdb::TypeData::BaseClass(ref data) => self.base_classes.push(BaseClass { type_name: type_name(type_finder, data.base_class, needed_types)?, offset: data.offset, }), pdb::TypeData::VirtualBaseClass(ref data) => self.base_classes.push(BaseClass { type_name: type_name(type_finder, data.base_class, needed_types)?, offset: data.base_pointer_offset, }), _ => { // ignore everything else even though that's sad } } Ok(()) } } impl fmt::Display for Class<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{} {} ", match self.kind { pdb::ClassKind::Class => "class", pdb::ClassKind::Struct => "struct", pdb::ClassKind::Interface => "interface", // when can this happen? }, self.name.to_string() )?; if !self.base_classes.is_empty() { for (i, base) in self.base_classes.iter().enumerate() { let prefix = match i { 0 => ":", _ => ",", }; write!(f, "{} {}", prefix, base.type_name)?; } } writeln!(f, " {{")?; for base in &self.base_classes { writeln!( f, "\t/* offset {:3} */ /* fields for {} */", base.offset, base.type_name )?; } for field in &self.fields { writeln!( f, "\t/* offset {:3} */ {} {};", field.offset, field.type_name, field.name.to_string() )?; } if !self.instance_methods.is_empty() { writeln!(f, "\t")?; for method in &self.instance_methods { writeln!( f, "\t{}{} {}({});", if method.is_virtual { "virtual " } else { "" }, method.return_type_name, method.name.to_string(), method.arguments.join(", ") )?; } } if !self.static_methods.is_empty() { writeln!(f, "\t")?; for method in &self.static_methods { writeln!( f, "\t{}static {} {}({});", if method.is_virtual { "virtual " } else { "" }, method.return_type_name, method.name.to_string(), method.arguments.join(", ") )?; } } writeln!(f, "}}")?; Ok(()) } } #[derive(Debug, Clone, PartialEq, Eq)] struct BaseClass { type_name: String, offset: u32, } #[derive(Debug, Clone, PartialEq, Eq)] struct Field<'p> { type_name: String, name: pdb::RawString<'p>, offset: u64, } #[derive(Debug, Clone, PartialEq, Eq)] struct Method<'p> { name: pdb::RawString<'p>, return_type_name: String, arguments: Vec, is_virtual: bool, } impl<'p> Method<'p> { fn find( name: pdb::RawString<'p>, attributes: pdb::FieldAttributes, type_finder: &pdb::TypeFinder<'p>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result> { match type_finder.find(type_index)?.parse()? { pdb::TypeData::MemberFunction(data) => Ok(Method { name, return_type_name: type_name(type_finder, data.return_type, needed_types)?, arguments: argument_list(type_finder, data.argument_list, needed_types)?, is_virtual: attributes.is_virtual(), }), other => { println!("other: {:?}", other); Err(pdb::Error::UnimplementedFeature("that")) } } } } fn argument_list<'p>( type_finder: &pdb::TypeFinder<'p>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result> { match type_finder.find(type_index)?.parse()? { pdb::TypeData::ArgumentList(data) => { let mut args: Vec = Vec::new(); for arg_type in data.arguments { args.push(type_name(type_finder, arg_type, needed_types)?); } Ok(args) } _ => Err(pdb::Error::UnimplementedFeature( "argument list of non-argument-list type", )), } } #[derive(Debug, Clone, PartialEq, Eq)] struct Enum<'p> { name: pdb::RawString<'p>, underlying_type_name: String, values: Vec>, } impl<'p> Enum<'p> { fn add_fields( &mut self, type_finder: &pdb::TypeFinder<'p>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result<()> { match type_finder.find(type_index)?.parse()? { pdb::TypeData::FieldList(data) => { for field in &data.fields { self.add_field(type_finder, field, needed_types); } if let Some(continuation) = data.continuation { // recurse self.add_fields(type_finder, continuation, needed_types)?; } } other => { println!( "trying to Enum::add_fields() got {} -> {:?}", type_index, other ); panic!("unexpected type in Enum::add_fields()"); } } Ok(()) } fn add_field(&mut self, _: &pdb::TypeFinder<'p>, field: &pdb::TypeData<'p>, _: &mut TypeSet) { // ignore everything else even though that's sad if let pdb::TypeData::Enumerate(ref data) = field { self.values.push(EnumValue { name: data.name, value: data.value, }); } } } impl fmt::Display for Enum<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, "enum {} /* stored as {} */ {{", self.name.to_string(), self.underlying_type_name )?; for value in &self.values { writeln!( f, "\t{} = {},", value.name.to_string(), match value.value { pdb::Variant::U8(v) => format!("0x{:02x}", v), pdb::Variant::U16(v) => format!("0x{:04x}", v), pdb::Variant::U32(v) => format!("0x{:08x}", v), pdb::Variant::U64(v) => format!("0x{:16x}", v), pdb::Variant::I8(v) => format!("{}", v), pdb::Variant::I16(v) => format!("{}", v), pdb::Variant::I32(v) => format!("{}", v), pdb::Variant::I64(v) => format!("{}", v), } )?; } writeln!(f, "}}")?; Ok(()) } } #[derive(Debug, Clone, PartialEq, Eq)] struct EnumValue<'p> { name: pdb::RawString<'p>, value: pdb::Variant, } #[derive(Debug, Clone, PartialEq, Eq)] struct ForwardReference<'p> { kind: pdb::ClassKind, name: pdb::RawString<'p>, } impl fmt::Display for ForwardReference<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, "{} {};", match self.kind { pdb::ClassKind::Class => "class", pdb::ClassKind::Struct => "struct", pdb::ClassKind::Interface => "interface", // when can this happen? }, self.name.to_string() ) } } #[derive(Debug, Clone, PartialEq, Eq)] struct Data<'p> { forward_references: Vec>, classes: Vec>, enums: Vec>, } impl fmt::Display for Data<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!(f, "// automatically generated by pdb2hpp\n// do not edit")?; if !self.forward_references.is_empty() { writeln!(f)?; for e in &self.forward_references { e.fmt(f)?; } } for e in &self.enums { writeln!(f)?; e.fmt(f)?; } for class in &self.classes { writeln!(f)?; class.fmt(f)?; } Ok(()) } } impl<'p> Data<'p> { fn new() -> Data<'p> { Data { forward_references: Vec::new(), classes: Vec::new(), enums: Vec::new(), } } fn add( &mut self, type_finder: &pdb::TypeFinder<'p>, type_index: pdb::TypeIndex, needed_types: &mut TypeSet, ) -> pdb::Result<()> { match type_finder.find(type_index)?.parse()? { pdb::TypeData::Class(data) => { if data.properties.forward_reference() { self.forward_references.push(ForwardReference { kind: data.kind, name: data.name, }); return Ok(()); } let mut class = Class { kind: data.kind, name: data.name, fields: Vec::new(), base_classes: Vec::new(), instance_methods: Vec::new(), static_methods: Vec::new(), }; if let Some(derived_from) = data.derived_from { class.add_derived_from(type_finder, derived_from, needed_types)?; } if let Some(fields) = data.fields { class.add_fields(type_finder, fields, needed_types)?; } self.classes.insert(0, class); } pdb::TypeData::Enumeration(data) => { let mut e = Enum { name: data.name, underlying_type_name: type_name( type_finder, data.underlying_type, needed_types, )?, values: Vec::new(), }; e.add_fields(type_finder, data.fields, needed_types)?; self.enums.insert(0, e); } // ignore other => eprintln!("warning: don't know how to add {:?}", other), } Ok(()) } } fn write_class(filename: &str, class_name: &str) -> pdb::Result<()> { let file = std::fs::File::open(filename)?; let mut pdb = pdb::PDB::open(file)?; let type_information = pdb.type_information()?; let mut type_finder = type_information.finder(); let mut needed_types = TypeSet::new(); let mut data = Data::new(); let mut type_iter = type_information.iter(); while let Some(typ) = type_iter.next()? { // keep building the index type_finder.update(&type_iter); if let Ok(pdb::TypeData::Class(class)) = typ.parse() { if class.name.as_bytes() == class_name.as_bytes() && !class.properties.forward_reference() { data.add(&type_finder, typ.index(), &mut needed_types)?; break; } } } // add all the needed types iteratively until we're done while let Some(type_index) = needed_types.iter().next_back().copied() { // remove it needed_types.remove(&type_index); // add the type data.add(&type_finder, type_index, &mut needed_types)?; } if data.classes.is_empty() { eprintln!("sorry, class {} was not found", class_name); } else { println!("{}", data); } Ok(()) } fn print_usage(program: &str, opts: getopts::Options) { let brief = format!("Usage: {} input.pdb ClassName", program); print!("{}", opts.usage(&brief)); } fn main() { let args: Vec = std::env::args().collect(); let program = args[0].clone(); let mut opts = getopts::Options::new(); opts.optflag("h", "help", "print this help menu"); let matches = match opts.parse(&args[1..]) { Ok(m) => m, Err(f) => panic!("{}", f.to_string()), }; let (filename, class_name) = if matches.free.len() == 2 { (&matches.free[0], &matches.free[1]) } else { print_usage(&program, opts); return; }; match write_class(filename, class_name) { Ok(_) => (), Err(e) => eprintln!("error dumping PDB: {}", e), } } pdb-0.8.0/examples/pdb_framedata.rs000064400000000000000000000040560072674642500153700ustar 00000000000000use std::env; use getopts::Options; use pdb::FallibleIterator; fn print_usage(program: &str, opts: Options) { let brief = format!("Usage: {} input.pdb", program); print!("{}", opts.usage(&brief)); } fn dump_framedata(filename: &str) -> pdb::Result<()> { let file = std::fs::File::open(filename)?; let mut pdb = pdb::PDB::open(file)?; let string_table = pdb.string_table()?; println!("Frame data:"); println!("Address Blk Size Locals Params StkMax Prolog SavedReg SEH C++EH Start BP Type Program"); println!(); let frame_table = pdb.frame_table()?; let mut frames = frame_table.iter(); while let Some(data) = frames.next()? { let program_string = match data.program { Some(prog_ref) => prog_ref.to_string_lossy(&string_table)?, None => Default::default(), }; println!( "{} {:8x} {:8x} {:8x} {:8x} {:8x} {:8x} {} {} {} {} {:5} {}", data.code_start, data.code_size, data.locals_size, data.params_size, data.max_stack_size.unwrap_or(0), data.prolog_size, data.saved_regs_size, if data.has_structured_eh { 'Y' } else { 'N' }, if data.has_cpp_eh { 'Y' } else { 'N' }, if data.is_function_start { 'Y' } else { 'N' }, if data.uses_base_pointer { 'Y' } else { 'N' }, data.ty, program_string, ); } Ok(()) } fn main() { let args: Vec = env::args().collect(); let program = args[0].clone(); let mut opts = Options::new(); opts.optflag("h", "help", "print this help menu"); let matches = match opts.parse(&args[1..]) { Ok(m) => m, Err(f) => panic!("{}", f.to_string()), }; let filename = if matches.free.len() == 1 { &matches.free[0] } else { print_usage(&program, opts); return; }; match dump_framedata(filename) { Ok(_) => (), Err(e) => eprintln!("error dumping PDB: {}", e), } } pdb-0.8.0/examples/pdb_lines.rs000064400000000000000000000043560072674642500145610ustar 00000000000000use std::env; use std::io::Write; use getopts::Options; use pdb::{FallibleIterator, SymbolData, PDB}; fn dump_pdb(filename: &str) -> pdb::Result<()> { let file = std::fs::File::open(filename)?; let mut pdb = PDB::open(file)?; let address_map = pdb.address_map()?; let string_table = pdb.string_table()?; println!("Module private symbols:"); let dbi = pdb.debug_information()?; let mut modules = dbi.modules()?; while let Some(module) = modules.next()? { println!(); println!("Module: {}", module.module_name()); let info = match pdb.module_info(&module)? { Some(info) => info, None => { println!(" no module info"); continue; } }; let program = info.line_program()?; let mut symbols = info.symbols()?; while let Some(symbol) = symbols.next()? { if let Ok(SymbolData::Procedure(proc)) = symbol.parse() { let sign = if proc.global { "+" } else { "-" }; println!("{} {}", sign, proc.name); let mut lines = program.lines_for_symbol(proc.offset); while let Some(line_info) = lines.next()? { let rva = line_info.offset.to_rva(&address_map).expect("invalid rva"); let file_info = program.get_file_info(line_info.file_index)?; let file_name = file_info.name.to_string_lossy(&string_table)?; println!(" {} {}:{}", rva, file_name, line_info.line_start); } } } } Ok(()) } fn main() { let args: Vec = env::args().collect(); let mut opts = Options::new(); opts.optflag("h", "help", "print this help menu"); let matches = match opts.parse(&args[1..]) { Ok(m) => m, Err(f) => panic!("{}", f.to_string()), }; let filename = if matches.free.len() == 1 { &matches.free[0] } else { //print_usage(&program, opts); println!("specify path to a PDB"); return; }; match dump_pdb(filename) { Ok(_) => {} Err(e) => { writeln!(&mut std::io::stderr(), "error dumping PDB: {}", e).expect("stderr write"); } } } pdb-0.8.0/examples/pdb_symbols.rs000064400000000000000000000050450072674642500151330ustar 00000000000000use std::env; use getopts::Options; use pdb::{FallibleIterator, PdbInternalSectionOffset}; fn print_usage(program: &str, opts: Options) { let brief = format!("Usage: {} input.pdb", program); print!("{}", opts.usage(&brief)); } fn print_row(offset: PdbInternalSectionOffset, kind: &str, name: pdb::RawString<'_>) { println!( "{:x}\t{:x}\t{}\t{}", offset.section, offset.offset, kind, name ); } fn print_symbol(symbol: &pdb::Symbol<'_>) -> pdb::Result<()> { match symbol.parse()? { pdb::SymbolData::Public(data) => { print_row(data.offset, "function", data.name); } pdb::SymbolData::Data(data) => { print_row(data.offset, "data", data.name); } pdb::SymbolData::Procedure(data) => { print_row(data.offset, "function", data.name); } _ => { // ignore everything else } } Ok(()) } fn walk_symbols(mut symbols: pdb::SymbolIter<'_>) -> pdb::Result<()> { println!("segment\toffset\tkind\tname"); while let Some(symbol) = symbols.next()? { match print_symbol(&symbol) { Ok(_) => (), Err(e) => eprintln!("error printing symbol {:?}: {}", symbol, e), } } Ok(()) } fn dump_pdb(filename: &str) -> pdb::Result<()> { let file = std::fs::File::open(filename)?; let mut pdb = pdb::PDB::open(file)?; let symbol_table = pdb.global_symbols()?; println!("Global symbols:"); walk_symbols(symbol_table.iter())?; println!("Module private symbols:"); let dbi = pdb.debug_information()?; let mut modules = dbi.modules()?; while let Some(module) = modules.next()? { println!("Module: {}", module.object_file_name()); let info = match pdb.module_info(&module)? { Some(info) => info, None => { println!(" no module info"); continue; } }; walk_symbols(info.symbols()?)?; } Ok(()) } fn main() { let args: Vec = env::args().collect(); let program = args[0].clone(); let mut opts = Options::new(); opts.optflag("h", "help", "print this help menu"); let matches = match opts.parse(&args[1..]) { Ok(m) => m, Err(f) => panic!("{}", f.to_string()), }; let filename = if matches.free.len() == 1 { &matches.free[0] } else { print_usage(&program, opts); return; }; match dump_pdb(filename) { Ok(_) => (), Err(e) => eprintln!("error dumping PDB: {}", e), } } pdb-0.8.0/examples/stream_names.rs000064400000000000000000000012700072674642500152700ustar 00000000000000use std::ffi::OsStr; fn dump_stream_names(filename: &OsStr) -> pdb::Result<()> { let file = std::fs::File::open(filename)?; let mut pdb = pdb::PDB::open(file)?; let info = pdb.pdb_information()?; let names = info.stream_names()?; println!("index, name"); for name in &names { let stream = pdb.raw_stream(name.stream_id)?.expect("named stream"); println!("{:5}, {} {} bytes", name.stream_id, name.name, stream.len()); } Ok(()) } fn main() { let filename = std::env::args_os().nth(1).expect("Missing PDB filename"); match dump_stream_names(&filename) { Ok(_) => (), Err(e) => eprintln!("error dumping PDB: {}", e), } } pdb-0.8.0/src/common.rs000064400000000000000000001224120072674642500130550ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use std::borrow::Cow; use std::fmt; use std::io; use std::mem; use std::ops::{Add, AddAssign, Sub}; use std::result; use std::slice; use scroll::ctx::TryFromCtx; use scroll::{self, Endian, Pread, LE}; use crate::tpi::constants; /// An error that occurred while reading or parsing the PDB. #[non_exhaustive] #[derive(Debug)] pub enum Error { /// The input data was not recognized as a MSF (PDB) file. UnrecognizedFileFormat, /// The MSF header specifies an invalid page size. InvalidPageSize(u32), /// MSF referred to page number out of range. /// /// This likely indicates file corruption. PageReferenceOutOfRange(u32), /// The requested stream is not stored in this file. StreamNotFound(u32), /// A stream requested by name was not found. StreamNameNotFound, /// Invalid length or alignment of a stream. InvalidStreamLength(&'static str), /// An IO error occurred while reading from the data source. IoError(io::Error), /// Unexpectedly reached end of input. UnexpectedEof, /// This data might be understandable, but the code needed to understand it hasn't been written. UnimplementedFeature(&'static str), /// The global shared symbol table is missing. GlobalSymbolsNotFound, /// A symbol record's length value was impossibly small. SymbolTooShort, /// Support for symbols of this kind is not implemented. UnimplementedSymbolKind(u16), /// The type information header was invalid. InvalidTypeInformationHeader(&'static str), /// A type record's length value was impossibly small. TypeTooShort, /// Type or Id not found. TypeNotFound(u32), /// Type or Id not indexed -- the requested type (`.0`) is larger than the maximum index covered /// by the `ItemFinder` (`.1`). TypeNotIndexed(u32, u32), /// Support for types of this kind is not implemented. UnimplementedTypeKind(u16), /// Type index is not a cross module reference. NotACrossModuleRef(u32), /// Cross module reference not found in imports. CrossModuleRefNotFound(u32), /// Variable-length numeric parsing encountered an unexpected prefix. UnexpectedNumericPrefix(u16), /// Required mapping for virtual addresses (OMAP) was not found. AddressMapNotFound, /// A parse error from scroll. ScrollError(scroll::Error), /// This debug subsection kind is unknown or unimplemented. UnimplementedDebugSubsection(u32), /// This source file checksum kind is unknown or unimplemented. UnimplementedFileChecksumKind(u8), /// There is no source file checksum at the given offset. InvalidFileChecksumOffset(u32), /// The lines table is missing. LinesNotFound, /// A binary annotation was compressed incorrectly. InvalidCompressedAnnotation, /// An unknown binary annotation was encountered. UnknownBinaryAnnotation(u32), } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Self::IoError(error) => Some(error), _ => None, } } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> ::std::result::Result<(), fmt::Error> { match self { Self::PageReferenceOutOfRange(p) => { write!(f, "MSF referred to page number ({}) out of range", p) } Self::InvalidPageSize(n) => write!( f, "The MSF header specifies an invalid page size ({} bytes)", n ), Self::StreamNotFound(s) => { write!(f, "The requested stream ({}) is not stored in this file", s) } Self::InvalidStreamLength(s) => write!( f, "{} stream has an invalid length or alignment for its records", s ), Self::IoError(ref e) => write!(f, "IO error while reading PDB: {}", e), Self::UnimplementedFeature(feature) => { write!(f, "Unimplemented PDB feature: {}", feature) } Self::UnimplementedSymbolKind(kind) => write!( f, "Support for symbols of kind {:#06x} is not implemented", kind ), Self::InvalidTypeInformationHeader(reason) => { write!(f, "The type information header was invalid: {}", reason) } Self::TypeNotFound(type_index) => write!(f, "Type {} not found", type_index), Self::TypeNotIndexed(type_index, indexed_count) => write!( f, "Type {} not indexed (index covers {})", type_index, indexed_count ), Self::UnimplementedTypeKind(kind) => write!( f, "Support for types of kind {:#06x} is not implemented", kind ), Self::NotACrossModuleRef(index) => { write!(f, "Type {:#06x} is not a cross module reference", index) } Self::CrossModuleRefNotFound(index) => write!( f, "Cross module reference {:#06x} not found in imports", index ), Self::UnexpectedNumericPrefix(prefix) => write!( f, "Variable-length numeric parsing encountered an unexpected prefix ({:#06x}", prefix ), Self::UnimplementedDebugSubsection(kind) => write!( f, "Debug module subsection of kind {:#06x} is not implemented", kind ), Self::UnimplementedFileChecksumKind(kind) => { write!(f, "Unknown source file checksum kind {}", kind) } Self::InvalidFileChecksumOffset(offset) => { write!(f, "Invalid source file checksum offset {:#x}", offset) } Self::UnknownBinaryAnnotation(num) => write!(f, "Unknown binary annotation {}", num), _ => fmt::Debug::fmt(self, f), } } } impl From for Error { fn from(e: io::Error) -> Self { Self::IoError(e) } } impl From for Error { fn from(e: scroll::Error) -> Self { match e { // Convert a couple of scroll errors into EOF. scroll::Error::BadOffset(_) | scroll::Error::TooBig { .. } => Self::UnexpectedEof, _ => Self::ScrollError(e), } } } /// The result type returned by this crate. pub type Result = result::Result; /// Implements `Pread` using the inner type. macro_rules! impl_pread { ($type:ty) => { impl<'a> TryFromCtx<'a, Endian> for $type { type Error = scroll::Error; fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, usize)> { TryFromCtx::try_from_ctx(this, le).map(|(i, s)| (Self(i), s)) } } }; } /// Displays the type as hexadecimal number. Debug prints the type name around. macro_rules! impl_hex_fmt { ($type:ty) => { impl fmt::Display for $type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{:#x}", self.0) } } impl fmt::Debug for $type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, concat!(stringify!($type), "({})"), self) } } }; } /// Implements bidirectional conversion traits for the newtype. macro_rules! impl_convert { ($type:ty, $inner:ty) => { impl From<$inner> for $type { fn from(offset: $inner) -> Self { Self(offset) } } impl From<$type> for $inner { fn from(string_ref: $type) -> Self { string_ref.0 } } }; } /// Declares that the given value represents `None`. /// /// - `Type::none` and `Default::default` return the none value. /// - `Type::is_some` and `Type::is_none` check for the none value. macro_rules! impl_opt { ($type:ty, $none:literal) => { impl $type { /// Returns an index that points to no value. #[inline] pub const fn none() -> Self { Self($none) } /// Returns `true` if the index points to a valid value. #[inline] #[must_use] pub fn is_some(self) -> bool { self.0 != $none } /// Returns `true` if the index indicates the absence of a value. #[inline] #[must_use] pub fn is_none(self) -> bool { self.0 == $none } } impl Default for $type { #[inline] fn default() -> Self { Self::none() } } }; } /// Implements common functionality for virtual addresses. macro_rules! impl_va { ($type:ty) => { impl $type { /// Checked addition of an offset. Returns `None` if overflow occurred. pub fn checked_add(self, offset: u32) -> Option { Some(Self(self.0.checked_add(offset)?)) } /// Checked computation of an offset between two addresses. Returns `None` if `other` is /// larger. pub fn checked_sub(self, other: Self) -> Option { self.0.checked_sub(other.0) } /// Saturating addition of an offset, clipped at the numeric bounds. pub fn saturating_add(self, offset: u32) -> Self { Self(self.0.saturating_add(offset)) } /// Saturating computation of an offset between two addresses, clipped at zero. pub fn saturating_sub(self, other: Self) -> u32 { self.0.saturating_sub(other.0) } /// Wrapping (modular) addition of an offset. pub fn wrapping_add(self, offset: u32) -> Self { Self(self.0.wrapping_add(offset)) } /// Wrapping (modular) computation of an offset between two addresses. pub fn wrapping_sub(self, other: Self) -> u32 { self.0.wrapping_sub(other.0) } } impl Add for $type { type Output = Self; /// Adds the given offset to this address. #[inline] fn add(mut self, offset: u32) -> Self { self.0 += offset; self } } impl AddAssign for $type { /// Adds the given offset to this address. #[inline] fn add_assign(&mut self, offset: u32) { self.0 += offset; } } impl Sub for $type { type Output = u32; fn sub(self, other: Self) -> Self::Output { self.0 - other.0 } } impl_convert!($type, u32); impl_hex_fmt!($type); }; } /// A Relative Virtual Address as it appears in a PE file. /// /// RVAs are always relative to the image base address, as it is loaded into process memory. This /// address is reported by debuggers in stack traces and may refer to symbols or instruction /// pointers. #[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Rva(pub u32); impl_va!(Rva); /// A Relative Virtual Address in an unoptimized PE file. /// /// An internal RVA points into the PDB internal address space and may not correspond to RVAs of the /// executable. It can be converted into an actual [`Rva`] suitable for debugging purposes using /// [`to_rva`](Self::to_rva). #[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct PdbInternalRva(pub u32); impl_va!(PdbInternalRva); impl_pread!(PdbInternalRva); /// Implements common functionality for section offsets. macro_rules! impl_section_offset { ($type:ty) => { impl $type { /// Creates a new section offset. pub fn new(section: u16, offset: u32) -> Self { Self { offset, section } } /// Returns whether this section offset points to a valid section or into the void. pub fn is_valid(self) -> bool { self.section != 0 } /// Checked addition of an offset. Returns `None` if overflow occurred. /// /// This does not check whether the offset is still valid within the given section. If /// the offset is out of bounds, the conversion to `Rva` will return `None`. pub fn checked_add(mut self, offset: u32) -> Option { self.offset = self.offset.checked_add(offset)?; Some(self) } /// Saturating addition of an offset, clipped at the numeric bounds. /// /// This does not check whether the offset is still valid within the given section. If /// the offset is out of bounds, the conversion to `Rva` will return `None`. pub fn saturating_add(mut self, offset: u32) -> Self { self.offset = self.offset.saturating_add(offset); self } /// Wrapping (modular) addition of an offset. /// /// This does not check whether the offset is still valid within the given section. If /// the offset is out of bounds, the conversion to `Rva` will return `None`. pub fn wrapping_add(mut self, offset: u32) -> Self { self.offset = self.offset.wrapping_add(offset); self } } impl Add for $type { type Output = Self; /// Adds the given offset to this section offset. /// /// This does not check whether the offset is still valid within the given section. If /// the offset is out of bounds, the conversion to `Rva` will return `None`. #[inline] fn add(mut self, offset: u32) -> Self { self.offset += offset; self } } impl AddAssign for $type { /// Adds the given offset to this section offset. /// /// This does not check whether the offset is still valid within the given section. If /// the offset is out of bounds, the conversion to `Rva` will return `None`. #[inline] fn add_assign(&mut self, offset: u32) { self.offset += offset; } } impl PartialOrd for $type { /// Compares offsets if they reside in the same section. #[inline] fn partial_cmp(&self, other: &Self) -> Option { if self.section == other.section { Some(self.offset.cmp(&other.offset)) } else { None } } } impl fmt::Debug for $type { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct(stringify!($type)) .field("section", &format_args!("{:#x}", self.section)) .field("offset", &format_args!("{:#x}", self.offset)) .finish() } } }; } /// An offset relative to a PE section. /// /// This offset can be converted to an `Rva` to receive the address relative to the entire image. /// Note that this offset applies to the actual PE headers. The PDB debug information actually /// stores [`PdbInternalSectionOffset`]s. #[derive(Clone, Copy, Default, Eq, Hash, PartialEq)] pub struct SectionOffset { /// The memory offset relative from the start of the section's memory. pub offset: u32, /// The index of the section in the PE's section headers list, incremented by `1`. A value of /// `0` indicates an invalid or missing reference. pub section: u16, } impl_section_offset!(SectionOffset); /// An offset relative to a PE section in the original unoptimized binary. /// /// For optimized Microsoft binaries, this offset points to a virtual address space before the /// rearrangement of sections has been performed. This kind of offset is usually stored in PDB debug /// information. It can be converted to an RVA in the transformed address space of the optimized /// binary using [`to_rva`](PdbInternalSectionOffset::to_rva). Likewise, there is a conversion to [`SectionOffset`] in the actual address /// space. /// /// For binaries and their PDBs that have not been optimized, both address spaces are equal and the /// offsets are interchangeable. The conversion operations are cheap no-ops in this case. #[derive(Clone, Copy, Default, Eq, Hash, PartialEq)] pub struct PdbInternalSectionOffset { /// The memory offset relative from the start of the section's memory. pub offset: u32, /// The index of the section in the PDB's section headers list, incremented by `1`. A value of /// `0` indicates an invalid or missing reference. pub section: u16, } impl<'t> TryFromCtx<'t, Endian> for PdbInternalSectionOffset { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { offset: this.gread_with(&mut offset, le)?, section: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } impl_section_offset!(PdbInternalSectionOffset); /// Index of a PDB stream. /// /// This index can either refer to a stream, or indicate the absence of a stream. Check /// [`is_none`](Self::is_none) to see whether a stream should exist. /// /// Use [`get`](Self::get) to load data for this stream. #[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct StreamIndex(pub u16); impl StreamIndex { /// Returns the MSF stream number, if this stream is not a NULL stream. #[inline] pub(crate) fn msf_number(self) -> Option { match self.0 { 0xffff => None, index => Some(u32::from(index)), } } } impl fmt::Display for StreamIndex { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.msf_number() { Some(number) => write!(f, "{}", number), None => write!(f, "None"), } } } impl fmt::Debug for StreamIndex { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "StreamIndex({})", self) } } impl_opt!(StreamIndex, 0xffff); impl_pread!(StreamIndex); /// An index into either the [`TypeInformation`](crate::TypeInformation) or /// [`IdInformation`](crate::IdInformation) stream. pub trait ItemIndex: Copy + Default + fmt::Debug + fmt::Display + PartialEq + PartialOrd + From + Into { /// Returns `true` if this is a cross module reference. /// /// When compiling with LTO, the compiler may reference types and ids across modules. In such /// cases, a lookup in the global streams will not succeed. Instead, the import must be resolved /// using cross module references: /// /// 1. Look up the index in [`CrossModuleImports`](crate::CrossModuleImports) of the current /// module. /// 2. Use [`StringTable`](crate::StringTable) to resolve the name of the referenced module. /// 3. Find the [`Module`](crate::Module) with the same module name and load its /// [`ModuleInfo`](crate::ModuleInfo). Note that this comparison needs to be done /// case-insensitively as the name in the DBI stream and name table are known to not /// have matching cases. /// 4. Resolve the [`Local`](crate::Local) index into a global one using /// [`CrossModuleExports`](crate::CrossModuleExports). /// /// Cross module references are specially formatted indexes with the most significant bit set to /// `1`. The remaining bits are divided into a module and index offset into the /// [`CrossModuleImports`](crate::CrossModuleImports) section. fn is_cross_module(self) -> bool { (self.into() & 0x8000_0000) != 0 } } /// Index of [`TypeData`](crate::TypeData) in the [`TypeInformation`](crate::TypeInformation) stream. /// /// If this index is a [cross module reference](ItemIndex::is_cross_module), it must be resolved /// before lookup in the stream. #[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct TypeIndex(pub u32); impl_convert!(TypeIndex, u32); impl_hex_fmt!(TypeIndex); impl_pread!(TypeIndex); impl ItemIndex for TypeIndex {} /// Index of an [`Id`](crate::Id) in [`IdInformation`](crate::IdInformation) stream. /// /// If this index is a [cross module reference](ItemIndex::is_cross_module), it must be resolved /// before lookup in the stream. #[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct IdIndex(pub u32); impl_convert!(IdIndex, u32); impl_hex_fmt!(IdIndex); impl_pread!(IdIndex); impl ItemIndex for IdIndex {} /// An [`ItemIndex`] that is local to a module. /// /// This index is usually part of a [`CrossModuleRef`](crate::CrossModuleRef). It cannot be used to /// query the [`TypeInformation`](crate::TypeInformation) or [`IdInformation`](crate::IdInformation) /// streams directly. Instead, it must be looked up in the /// [`CrossModuleImports`](crate::CrossModuleImports) of the module it belongs to in order to obtain /// the global index. /// /// See [`ItemIndex::is_cross_module`] for more information. #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Local(pub I); impl fmt::Display for Local where I: ItemIndex + fmt::Display, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) } } /// A reference to a string in the string table. /// /// This type stores an offset into the global string table of the PDB. To retrieve the string /// value, use [`to_raw_string`](Self::to_raw_string), [`to_string_lossy`](Self::to_string_lossy) or /// methods on [`StringTable`](crate::StringTable). #[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct StringRef(pub u32); impl_convert!(StringRef, u32); impl_hex_fmt!(StringRef); impl_pread!(StringRef); /// Index of a file entry in the module. /// /// Use the [`LineProgram`](crate::LineProgram) to resolve information on the file from this offset. #[derive(Clone, Copy, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct FileIndex(pub u32); impl_convert!(FileIndex, u32); impl_hex_fmt!(FileIndex); impl_pread!(FileIndex); /// A reference into the symbol table of a module. /// /// To retrieve the symbol referenced by this index, use /// [`ModuleInfo::symbols_at`](crate::ModuleInfo::symbols_at). When iterating, use /// [`SymbolIter::seek`](crate::SymbolIter::seek) to jump between symbols. #[derive(Clone, Copy, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct SymbolIndex(pub u32); impl_convert!(SymbolIndex, u32); impl_hex_fmt!(SymbolIndex); impl_pread!(SymbolIndex); /// A register referred to by its number. #[derive(Clone, Copy, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Register(pub u16); impl_convert!(Register, u16); impl_pread!(Register); /// Provides little-endian access to a &[u8]. #[derive(Debug, Default, Clone)] pub(crate) struct ParseBuffer<'b>(&'b [u8], usize); macro_rules! def_parse { ( $( ($n:ident, $t:ty) ),* $(,)* ) => { $(#[doc(hidden)] #[inline] #[allow(unused)] pub fn $n(&mut self) -> Result<$t> { self.parse() })* } } macro_rules! def_peek { ( $( ($n:ident, $t:ty) ),* $(,)* ) => { $(#[doc(hidden)] #[inline] pub fn $n(&mut self) -> Result<$t> { Ok(self.0.pread_with(self.1, LE)?) })* } } impl<'b> ParseBuffer<'b> { /// Return the remaining length of the buffer. #[inline] pub fn len(&self) -> usize { self.0.len() - self.1 } /// Determines whether this ParseBuffer has been consumed. #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } /// Return the position within the parent slice. #[inline] pub fn pos(&self) -> usize { self.1 } /// Seek to the given absolute position. #[inline] pub fn seek(&mut self, pos: usize) { self.1 = std::cmp::min(pos, self.0.len()); } /// Truncates the buffer at the given absolute position. #[inline] pub fn truncate(&mut self, len: usize) -> Result<()> { if self.0.len() >= len { self.0 = &self.0[..len]; Ok(()) } else { Err(Error::UnexpectedEof) } } /// Align the current position to the next multiple of `alignment` bytes. #[inline] pub fn align(&mut self, alignment: usize) -> Result<()> { let diff = self.1 % alignment; if diff > 0 { if self.len() < (alignment - diff) { return Err(Error::UnexpectedEof); } self.1 += alignment - diff; } Ok(()) } /// Parse an object that implements `Pread`. pub fn parse(&mut self) -> Result where T: TryFromCtx<'b, Endian, [u8]>, T::Error: From, Error: From, { Ok(self.0.gread_with(&mut self.1, LE)?) } /// Parse an object that implements `Pread` with the given context. pub fn parse_with(&mut self, ctx: C) -> Result where T: TryFromCtx<'b, C, [u8]>, T::Error: From, Error: From, C: Copy, { Ok(self.0.gread_with(&mut self.1, ctx)?) } def_parse!( (parse_u8, u8), (parse_u16, u16), (parse_i16, i16), (parse_u32, u32), (parse_i32, i32), (parse_u64, u64), (parse_i64, i64), ); def_peek!((peek_u8, u8), (peek_u16, u16),); /// Parse a NUL-terminated string from the input. #[inline] pub fn parse_cstring(&mut self) -> Result> { let input = &self.0[self.1..]; let null_idx = input.iter().position(|ch| *ch == 0); if let Some(idx) = null_idx { self.1 += idx + 1; Ok(RawString::from(&input[..idx])) } else { Err(Error::UnexpectedEof) } } /// Parse a u8-length-prefixed string from the input. #[inline] pub fn parse_u8_pascal_string(&mut self) -> Result> { let length = self.parse_u8()? as usize; Ok(RawString::from(self.take(length)?)) } /// Take n bytes from the input #[inline] pub fn take(&mut self, n: usize) -> Result<&'b [u8]> { let input = &self.0[self.1..]; if input.len() >= n { self.1 += n; Ok(&input[..n]) } else { Err(Error::UnexpectedEof) } } } impl<'b> From<&'b [u8]> for ParseBuffer<'b> { fn from(buf: &'b [u8]) -> Self { ParseBuffer(buf, 0) } } impl<'b> fmt::LowerHex for ParseBuffer<'b> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> result::Result<(), fmt::Error> { write!(f, "ParseBuf::from(\"")?; for byte in self.0 { write!(f, "\\x{:02x}", byte)?; } write!(f, "\").as_bytes() at offset {}", self.1) } } /// Value of an enumerate type. #[derive(Debug, Copy, Clone, PartialEq, Eq)] #[allow(missing_docs)] pub enum Variant { U8(u8), U16(u16), U32(u32), U64(u64), I8(i8), I16(i16), I32(i32), I64(i64), } impl fmt::Display for Variant { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::U8(value) => write!(f, "{}", value), Self::U16(value) => write!(f, "{}", value), Self::U32(value) => write!(f, "{}", value), Self::U64(value) => write!(f, "{}", value), Self::I8(value) => write!(f, "{}", value), Self::I16(value) => write!(f, "{}", value), Self::I32(value) => write!(f, "{}", value), Self::I64(value) => write!(f, "{}", value), } } } impl<'a> TryFromCtx<'a, Endian> for Variant { type Error = Error; fn try_from_ctx(this: &'a [u8], le: Endian) -> Result<(Self, usize)> { let mut offset = 0; let variant = match this.gread_with(&mut offset, le)? { value if value < constants::LF_NUMERIC => Self::U16(value), constants::LF_CHAR => Self::U8(this.gread_with(&mut offset, le)?), constants::LF_SHORT => Self::I16(this.gread_with(&mut offset, le)?), constants::LF_LONG => Self::I32(this.gread_with(&mut offset, le)?), constants::LF_QUADWORD => Self::I64(this.gread_with(&mut offset, le)?), constants::LF_USHORT => Self::U16(this.gread_with(&mut offset, le)?), constants::LF_ULONG => Self::U32(this.gread_with(&mut offset, le)?), constants::LF_UQUADWORD => Self::U64(this.gread_with(&mut offset, le)?), _ if cfg!(debug_assertions) => unreachable!(), other => return Err(Error::UnexpectedNumericPrefix(other)), }; Ok((variant, offset)) } } /// `RawString` refers to a `&[u8]` that physically resides somewhere inside a PDB data structure. /// /// A `RawString` may not be valid UTF-8. #[derive(Clone, Copy, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] pub struct RawString<'b>(&'b [u8]); impl fmt::Debug for RawString<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "RawString({:?})", self.to_string()) } } impl fmt::Display for RawString<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.to_string()) } } impl<'b> RawString<'b> { /// Return the raw bytes of this string, as found in the PDB file. #[inline] pub fn as_bytes(&self) -> &'b [u8] { self.0 } /// Return the length of this string in bytes. #[inline] pub fn len(&self) -> usize { self.0.len() } /// Returns a boolean indicating if this string is empty. #[inline] pub fn is_empty(&self) -> bool { self.0.len() == 0 } /// Returns a UTF-8 `String`, substituting in replacement characters as needed. /// /// This uses [`String::from_utf8_lossy`] and thus avoids copying in cases where the original /// string was valid UTF-8. This is the expected case for strings that appear in PDB files, /// since they are almost always composed of printable 7-bit ASCII characters. #[inline] pub fn to_string(&self) -> Cow<'b, str> { String::from_utf8_lossy(self.0) } } impl<'b> From> for &'b [u8] { fn from(str: RawString<'b>) -> Self { str.as_bytes() } } impl<'b> From<&'b str> for RawString<'b> { fn from(buf: &'b str) -> Self { RawString(buf.as_bytes()) } } impl<'b> From<&'b [u8]> for RawString<'b> { fn from(buf: &'b [u8]) -> Self { RawString(buf) } } /// Cast a binary slice to a slice of types. /// /// This function performs a cast of a binary slice to a slice of some type, returning `Some` if the /// following two conditions are met: /// /// 1. The size of the slize must be a multiple of the type's size. /// 2. The slice must be aligned to the alignment of the type. /// /// Note that this function will not convert any endianness. The types must be capable of reading /// endianness correclty in case data from other hosts is read. pub(crate) fn cast_aligned(data: &[u8]) -> Option<&[T]> { let alignment = mem::align_of::(); let size = mem::size_of::(); let ptr = data.as_ptr(); let bytes = data.len(); match (bytes % size, ptr.align_offset(alignment)) { (0, 0) => Some(unsafe { slice::from_raw_parts(ptr as *const T, bytes / size) }), (_, _) => None, } } #[cfg(test)] mod tests { mod parse_buffer { use crate::common::*; #[test] fn test_parse_u8() { let vec: Vec = vec![1, 2, 3, 4]; let mut buf = ParseBuffer::from(vec.as_slice()); assert_eq!(buf.pos(), 0); assert_eq!(buf.peek_u8().expect("peek"), 1); assert_eq!(buf.peek_u8().expect("peek"), 1); assert_eq!(buf.peek_u8().expect("peek"), 1); let val = buf.parse_u8().unwrap(); assert_eq!(buf.len(), 3); assert_eq!(buf.pos(), 1); assert_eq!(val, 1); assert_eq!(buf.peek_u8().expect("peek"), 2); let val = buf.parse_u8().unwrap(); assert_eq!(buf.len(), 2); assert_eq!(buf.pos(), 2); assert_eq!(val, 2); assert_eq!(buf.peek_u8().expect("peek"), 3); let val = buf.parse_u8().unwrap(); assert_eq!(buf.len(), 1); assert_eq!(buf.pos(), 3); assert_eq!(val, 3); assert_eq!(buf.peek_u8().expect("peek"), 4); let val = buf.parse_u8().unwrap(); assert_eq!(buf.len(), 0); assert_eq!(buf.pos(), 4); assert_eq!(val, 4); match buf.parse_u8() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } } #[test] fn test_parse_u16() { let vec: Vec = vec![1, 2, 3]; let mut buf = ParseBuffer::from(vec.as_slice()); assert_eq!(buf.peek_u16().expect("peek"), 0x0201); assert_eq!(buf.peek_u16().expect("peek"), 0x0201); let val = buf.parse_u16().unwrap(); assert_eq!(buf.len(), 1); assert_eq!(buf.pos(), 2); assert_eq!(val, 0x0201); match buf.parse_u16() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } buf.take(1).unwrap(); match buf.parse_u16() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } } #[test] fn test_parse_u32() { let vec: Vec = vec![1, 2, 3, 4, 5, 6, 7]; let mut buf = ParseBuffer::from(vec.as_slice()); let val = buf.parse_u32().unwrap(); assert_eq!(buf.len(), 3); assert_eq!(buf.pos(), 4); assert_eq!(val, 0x0403_0201); match buf.parse_u32() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } buf.take(1).unwrap(); assert_eq!(buf.pos(), 5); match buf.parse_u32() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } buf.take(1).unwrap(); assert_eq!(buf.pos(), 6); match buf.parse_u32() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } buf.take(1).unwrap(); assert_eq!(buf.pos(), 7); match buf.parse_u32() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } } #[test] fn test_parse_u64() { let vec: Vec = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; let mut buf = ParseBuffer::from(vec.as_slice()); let val = buf.parse_u64().unwrap(); assert_eq!(val, 0x0807_0605_0403_0201); match buf.parse_u64() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } } #[test] fn test_parse_i32() { let vec: Vec = vec![254, 255, 255, 255, 5, 6, 7]; let mut buf = ParseBuffer::from(vec.as_slice()); let val = buf.parse_i32().unwrap(); assert_eq!(buf.len(), 3); assert_eq!(val, -2); assert_eq!(buf.pos(), 4); match buf.parse_u32() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } buf.take(1).unwrap(); match buf.parse_u32() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } buf.take(1).unwrap(); match buf.parse_u32() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } buf.take(1).unwrap(); match buf.parse_u32() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } } #[test] fn test_parse_cstring() { let mut buf = ParseBuffer::from(&b"hello\x00world\x00\x00\x01"[..]); let val = buf.parse_cstring().unwrap(); assert_eq!(buf.len(), 8); assert_eq!(buf.pos(), 6); assert_eq!(val, RawString::from(&b"hello"[..])); let val = buf.parse_cstring().unwrap(); assert_eq!(buf.len(), 2); assert_eq!(buf.pos(), 12); assert_eq!(val, RawString::from(&b"world"[..])); let val = buf.parse_cstring().unwrap(); assert_eq!(buf.len(), 1); assert_eq!(buf.pos(), 13); assert_eq!(val, RawString::from(&b""[..])); match buf.parse_cstring() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } } #[test] fn test_parse_u8_pascal_string() { let mut buf = ParseBuffer::from(&b"\x05hello\x05world\x00\x01"[..]); let val = buf.parse_u8_pascal_string().unwrap(); assert_eq!(buf.len(), 8); assert_eq!(buf.pos(), 6); assert_eq!(val, RawString::from(&b"hello"[..])); let val = buf.parse_u8_pascal_string().unwrap(); assert_eq!(buf.len(), 2); assert_eq!(buf.pos(), 12); assert_eq!(val, RawString::from(&b"world"[..])); let val = buf.parse_u8_pascal_string().unwrap(); assert_eq!(buf.len(), 1); assert_eq!(buf.pos(), 13); assert_eq!(val, RawString::from(&b""[..])); match buf.parse_u8_pascal_string() { Err(Error::UnexpectedEof) => (), _ => panic!("expected EOF"), } } #[test] fn test_parse_buffer_align() { let mut buf = ParseBuffer::from(&b"1234"[..]); buf.take(1).unwrap(); assert!(buf.align(4).is_ok()); assert_eq!(buf.pos(), 4); assert_eq!(buf.len(), 0); let mut buf = ParseBuffer::from(&b"1234"[..]); buf.take(3).unwrap(); assert!(buf.align(4).is_ok()); assert_eq!(buf.pos(), 4); assert_eq!(buf.len(), 0); let mut buf = ParseBuffer::from(&b"12345"[..]); buf.take(3).unwrap(); assert!(buf.align(4).is_ok()); assert_eq!(buf.pos(), 4); assert_eq!(buf.len(), 1); let mut buf = ParseBuffer::from(&b"123"[..]); buf.take(3).unwrap(); assert!(buf.align(4).is_err()); } #[test] fn test_seek() { let mut buf = ParseBuffer::from(&b"hello"[..]); buf.seek(5); assert_eq!(buf.pos(), 5); buf.seek(2); assert_eq!(buf.pos(), 2); buf.seek(10); assert_eq!(buf.pos(), 5); } } mod newtypes { use crate::common::*; // These tests use SymbolIndex as a proxy for all other types. #[test] fn test_format_newtype() { let val = SymbolIndex(0x42); assert_eq!(format!("{}", val), "0x42"); } #[test] fn test_debug_newtype() { let val = SymbolIndex(0x42); assert_eq!(format!("{:?}", val), "SymbolIndex(0x42)"); } #[test] fn test_pread() { let mut buf = ParseBuffer::from(&[0x42, 0, 0, 0][..]); let val = buf.parse::().expect("parse"); assert_eq!(val, SymbolIndex(0x42)); assert!(buf.is_empty()); } } mod cast_aligned { use crate::common::cast_aligned; use std::slice; #[test] fn test_cast_aligned() { let data: &[u32] = &[1, 2, 3]; let ptr = data.as_ptr() as *const u8; let bin: &[u8] = unsafe { slice::from_raw_parts(ptr, 12) }; assert_eq!(cast_aligned(bin), Some(data)); } #[test] fn test_cast_empty() { let data: &[u32] = &[]; let ptr = data.as_ptr() as *const u8; let bin: &[u8] = unsafe { slice::from_raw_parts(ptr, 0) }; assert_eq!(cast_aligned(bin), Some(data)); } #[test] fn test_cast_unaligned() { let data: &[u32] = &[1, 2, 3]; let ptr = data.as_ptr() as *const u8; let bin: &[u8] = unsafe { slice::from_raw_parts(ptr.offset(2), 8) }; assert_eq!(cast_aligned::(bin), None); } #[test] fn test_cast_wrong_size() { let data: &[u32] = &[1, 2, 3]; let ptr = data.as_ptr() as *const u8; let bin: &[u8] = unsafe { slice::from_raw_parts(ptr, 11) }; assert_eq!(cast_aligned::(bin), None); } } } pdb-0.8.0/src/dbi.rs000064400000000000000000000560170072674642500123320ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. // DBI = "Debug Information" use std::borrow::Cow; use std::fmt; use std::result; use crate::common::*; use crate::msf::*; use crate::{FallibleIterator, SectionCharacteristics}; /// Provides access to the "DBI" stream inside the PDB. /// /// This is only minimally implemented; it's really just so `PDB` can find the global symbol table. /// /// # Example /// /// ``` /// # use pdb::FallibleIterator; /// # /// # fn test() -> pdb::Result { /// let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = pdb::PDB::open(file)?; /// /// let dbi = pdb.debug_information()?; /// /// # let mut count: usize = 0; /// let mut modules = dbi.modules()?; /// while let Some(module) = modules.next()? { /// println!("module name: {}, object file name: {}", /// module.module_name(), module.object_file_name()); /// # count += 1; /// } /// /// # Ok(count) /// # } /// # assert!(test().expect("test") == 194); #[derive(Debug)] pub struct DebugInformation<'s> { stream: Stream<'s>, header: DBIHeader, header_len: usize, } impl<'s> DebugInformation<'s> { pub(crate) fn parse(stream: Stream<'s>) -> Result { let mut buf = stream.parse_buffer(); let header = DBIHeader::parse_buf(&mut buf)?; let header_len = buf.pos(); Ok(DebugInformation { stream, header, header_len, }) } pub(crate) fn header(&self) -> DBIHeader { self.header } /// Returns the target's machine type (architecture). pub fn machine_type(&self) -> Result { Ok(self.header.machine_type.into()) } /// Returns this PDB's original `age`. /// /// This number is written by the linker and should be equal to the image's `age` value. In /// contrast, [`PDBInformation::age`] may be bumped by other tools and should be greater or /// equal to the image's `age` value. /// /// Old PDB files may not specify an age, in which case only [`PDBInformation::age`] should be /// checked for matching the image. /// /// [`PDBInformation::age`]: crate::PDBInformation::age pub fn age(&self) -> Option { match self.header.age { 0 => None, age => Some(age), } } /// Returns an iterator that can traverse the modules list in sequential order. pub fn modules(&self) -> Result> { let mut buf = self.stream.parse_buffer(); // drop the header buf.take(self.header_len)?; let modules_buf = buf.take(self.header.module_list_size as usize)?; Ok(ModuleIter { buf: modules_buf.into(), }) } /// Returns an iterator that can traverse the section contributions list in sequential order. pub fn section_contributions(&self) -> Result> { let mut buf = self.stream.parse_buffer(); // drop the header and modules list buf.take(self.header_len + self.header.module_list_size as usize)?; let contributions_buf = buf.take(self.header.section_contribution_size as usize)?; DBISectionContributionIter::parse(contributions_buf.into()) } } /// The version of the PDB format. /// /// This version type is used in multiple locations: the DBI header, and the PDBI header. #[non_exhaustive] #[derive(Debug, Copy, Clone)] #[allow(missing_docs)] pub enum HeaderVersion { V41, V50, V60, V70, V110, OtherValue(u32), } impl From for HeaderVersion { #[allow(clippy::inconsistent_digit_grouping)] fn from(v: u32) -> Self { match v { 93_08_03 => Self::V41, 1996_03_07 => Self::V50, 1997_06_06 => Self::V60, 1999_09_03 => Self::V70, 2009_12_01 => Self::V110, _ => Self::OtherValue(v), } } } /// A DBI header -- `NewDBIHdr`, really -- parsed from a stream. /// /// Reference: /// #[derive(Debug, Copy, Clone)] #[allow(dead_code)] // reason = "unused fields added for completeness" pub(crate) struct DBIHeader { pub signature: u32, pub version: HeaderVersion, pub age: u32, pub gs_symbols_stream: StreamIndex, /* https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/dbi/dbi.h#L143-L155: union { struct { USHORT usVerPdbDllMin : 8; // minor version and USHORT usVerPdbDllMaj : 7; // major version and USHORT fNewVerFmt : 1; // flag telling us we have rbld stored elsewhere (high bit of original major version) } vernew; // that built this pdb last. struct { USHORT usVerPdbDllRbld: 4; USHORT usVerPdbDllMin : 7; USHORT usVerPdbDllMaj : 5; } verold; USHORT usVerAll; }; */ pub internal_version: u16, pub ps_symbols_stream: StreamIndex, // "build version of the pdb dll that built this pdb last." pub pdb_dll_build_version: u16, pub symbol_records_stream: StreamIndex, // "rbld version of the pdb dll that built this pdb last." pub pdb_dll_rbld_version: u16, pub module_list_size: u32, pub section_contribution_size: u32, pub section_map_size: u32, pub file_info_size: u32, // "size of the Type Server Map substream" pub type_server_map_size: u32, // "index of MFC type server" pub mfc_type_server_index: u32, // "size of optional DbgHdr info appended to the end of the stream" pub debug_header_size: u32, // "number of bytes in EC substream, or 0 if EC no EC enabled Mods" pub ec_substream_size: u32, /* https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/dbi/dbi.h#L187-L192: USHORT fIncLink:1; // true if linked incrmentally (really just if ilink thunks are present) USHORT fStripped:1; // true if PDB::CopyTo stripped the private data out USHORT fCTypes:1; // true if this PDB is using CTypes. USHORT unused:13; // reserved, must be 0. */ pub flags: u16, pub machine_type: u16, pub reserved: u32, } impl DBIHeader { pub fn parse(stream: Stream<'_>) -> Result { Self::parse_buf(&mut stream.parse_buffer()) } fn parse_buf(buf: &mut ParseBuffer<'_>) -> Result { let header = Self { signature: buf.parse_u32()?, version: From::from(buf.parse_u32()?), age: buf.parse_u32()?, gs_symbols_stream: buf.parse()?, internal_version: buf.parse_u16()?, ps_symbols_stream: buf.parse()?, pdb_dll_build_version: buf.parse_u16()?, symbol_records_stream: buf.parse()?, pdb_dll_rbld_version: buf.parse_u16()?, module_list_size: buf.parse_u32()?, section_contribution_size: buf.parse_u32()?, section_map_size: buf.parse_u32()?, file_info_size: buf.parse_u32()?, type_server_map_size: buf.parse_u32()?, mfc_type_server_index: buf.parse_u32()?, debug_header_size: buf.parse_u32()?, ec_substream_size: buf.parse_u32()?, flags: buf.parse_u16()?, machine_type: buf.parse_u16()?, reserved: buf.parse_u32()?, }; if header.signature != u32::max_value() { // this is likely a DBIHdr, not a NewDBIHdr // it could be promoted: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/dbi/dbi.cpp#L291-L313 // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/langapi/include/pdb.h#L1180-L1184 // but that seems like a lot of work return Err(Error::UnimplementedFeature("ancient DBI header")); } Ok(header) } } /// The target machine's architecture. /// Reference: #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum MachineType { /// The contents of this field are assumed to be applicable to any machine type. Unknown = 0x0, /// Matsushita AM33 Am33 = 0x13, /// x64 Amd64 = 0x8664, /// ARM little endian Arm = 0x1C0, /// ARM64 little endian Arm64 = 0xAA64, /// ARM Thumb-2 little endian ArmNT = 0x1C4, /// EFI byte code Ebc = 0xEBC, /// Intel 386 or later processors and compatible processors X86 = 0x14C, /// Intel Itanium processor family Ia64 = 0x200, /// Mitsubishi M32R little endian M32R = 0x9041, /// MIPS16 Mips16 = 0x266, /// MIPS with FPU MipsFpu = 0x366, /// MIPS16 with FPU MipsFpu16 = 0x466, /// Power PC little endian PowerPC = 0x1F0, /// Power PC with floating point support PowerPCFP = 0x1F1, /// MIPS little endian R4000 = 0x166, /// RISC-V 32-bit address space RiscV32 = 0x5032, /// RISC-V 64-bit address space RiscV64 = 0x5064, /// RISC-V 128-bit address space RiscV128 = 0x5128, /// Hitachi SH3 SH3 = 0x1A2, /// Hitachi SH3 DSP SH3DSP = 0x1A3, /// Hitachi SH4 SH4 = 0x1A6, /// Hitachi SH5 SH5 = 0x1A8, /// Thumb Thumb = 0x1C2, /// MIPS little-endian WCE v2 WceMipsV2 = 0x169, /// Invalid value Invalid = 0xffff, } impl fmt::Display for MachineType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Invalid => write!(f, "Invalid"), Self::Unknown => write!(f, "Unknown"), Self::Am33 => write!(f, "Am33"), Self::Amd64 => write!(f, "Amd64"), Self::Arm => write!(f, "Arm"), Self::Arm64 => write!(f, "Arm64"), Self::ArmNT => write!(f, "ArmNT"), Self::Ebc => write!(f, "Ebc"), Self::X86 => write!(f, "X86"), Self::Ia64 => write!(f, "Ia64"), Self::M32R => write!(f, "M32R"), Self::Mips16 => write!(f, "Mips16"), Self::MipsFpu => write!(f, "MipsFpu"), Self::MipsFpu16 => write!(f, "MipsFpu16"), Self::PowerPC => write!(f, "PowerPC"), Self::PowerPCFP => write!(f, "PowerPCFP"), Self::R4000 => write!(f, "R4000"), Self::RiscV32 => write!(f, "RiscV32"), Self::RiscV64 => write!(f, "RiscV64"), Self::RiscV128 => write!(f, "RiscV128"), Self::SH3 => write!(f, "SH3"), Self::SH3DSP => write!(f, "SH3DSP"), Self::SH4 => write!(f, "SH4"), Self::SH5 => write!(f, "SH5"), Self::Thumb => write!(f, "Thumb"), Self::WceMipsV2 => write!(f, "WceMipsV2"), } } } impl From for MachineType { fn from(value: u16) -> Self { match value { 0xffff => Self::Invalid, 0x0 => Self::Unknown, 0x13 => Self::Am33, 0x8664 => Self::Amd64, 0x1C0 => Self::Arm, 0xAA64 => Self::Arm64, 0x1C4 => Self::ArmNT, 0xEBC => Self::Ebc, 0x14C => Self::X86, 0x200 => Self::Ia64, 0x9041 => Self::M32R, 0x266 => Self::Mips16, 0x366 => Self::MipsFpu, 0x466 => Self::MipsFpu16, 0x1F0 => Self::PowerPC, 0x1F1 => Self::PowerPCFP, 0x166 => Self::R4000, 0x5032 => Self::RiscV32, 0x5064 => Self::RiscV64, 0x5128 => Self::RiscV128, 0x1A2 => Self::SH3, 0x1A3 => Self::SH3DSP, 0x1A6 => Self::SH4, 0x1A8 => Self::SH5, 0x1C2 => Self::Thumb, 0x169 => Self::WceMipsV2, _ => Self::Unknown, } } } /// Information about a module's contribution to a section. /// `struct SC` in Microsoft's code: /// #[derive(Debug, Copy, Clone)] pub struct DBISectionContribution { /// Start offset of the section. pub offset: PdbInternalSectionOffset, /// The size of the contribution, in bytes. pub size: u32, /// The characteristics, which map to [`ImageSectionHeader::characteristics`] in binaries. /// /// [`ImageSectionHeader::characteristics`]: crate::ImageSectionHeader::characteristics pub characteristics: SectionCharacteristics, /// Index of the module in [`DebugInformation::modules`] containing the actual symbol. pub module: usize, /// CRC of the contribution(?) pub data_crc: u32, /// CRC of relocations(?) pub reloc_crc: u32, } impl DBISectionContribution { fn parse(buf: &mut ParseBuffer<'_>) -> Result { let section = buf.parse_u16()?; let _padding = buf.parse_u16()?; let offset = buf.parse_u32()?; let size = buf.parse_u32()?; let characteristics = buf.parse()?; let module = buf.parse_u16()?.into(); let _padding = buf.parse_u16()?; Ok(Self { offset: PdbInternalSectionOffset { offset, section }, size, characteristics, module, data_crc: buf.parse_u32()?, reloc_crc: buf.parse_u32()?, }) } } /// Information about a module parsed from the DBI stream. /// /// Named `MODI` in the Microsoft PDB source: /// #[derive(Debug, Copy, Clone)] #[allow(dead_code)] // reason = "unused fields added for completeness" pub(crate) struct DBIModuleInfo { /// Currently open module. pub opened: u32, /// This module's first section contribution. pub section: DBISectionContribution, /// Flags, expressed as bitfields in the C struct: /// written, EC enabled, unused, tsm /// pub flags: u16, /// Stream number of module debug info (syms, lines, fpo). pub stream: StreamIndex, /// Size of local symbols debug info in `stream`. pub symbols_size: u32, /// Size of line number debug info in `stream`. pub lines_size: u32, /// Size of C13 style line number info in `stream`. pub c13_lines_size: u32, /// Number of files contributing to this module. pub files: u16, _padding: u16, /// Used as a pointer into an array of filename indicies in the Microsoft code. pub filename_offsets: u32, /// Source file name index. pub source: u32, /// Path to compiler PDB name index. pub compiler: u32, } impl DBIModuleInfo { fn parse(buf: &mut ParseBuffer<'_>) -> Result { Ok(Self { opened: buf.parse_u32()?, section: DBISectionContribution::parse(buf)?, flags: buf.parse_u16()?, stream: buf.parse()?, symbols_size: buf.parse_u32()?, lines_size: buf.parse_u32()?, c13_lines_size: buf.parse_u32()?, files: buf.parse_u16()?, _padding: buf.parse_u16()?, filename_offsets: buf.parse_u32()?, source: buf.parse_u32()?, compiler: buf.parse_u32()?, }) } } /// Represents a module from the DBI stream. /// /// A `Module` is a single item that contributes to the binary, such as an object file or import /// library. /// /// Much of the useful information for a `Module` is stored in a separate stream in the PDB. It can /// be retrieved by calling [`PDB::module_info`](crate::PDB::module_info) with a specific module. #[derive(Debug, Clone)] pub struct Module<'m> { info: DBIModuleInfo, module_name: RawString<'m>, object_file_name: RawString<'m>, } impl<'m> Module<'m> { /// The `DBIModuleInfo` from the module info substream in the DBI stream. pub(crate) fn info(&self) -> &DBIModuleInfo { &self.info } /// The module name. /// /// Usually either a full path to an object file or a string of the form `Import:`. pub fn module_name(&self) -> Cow<'m, str> { self.module_name.to_string() } /// The object file name. /// /// May be the same as `module_name` for object files passed directly /// to the linker. For modules from static libraries, this is usually /// the full path to the archive. pub fn object_file_name(&self) -> Cow<'m, str> { self.object_file_name.to_string() } } /// A `ModuleIter` iterates over the modules in the DBI section, producing `Module`s. #[derive(Debug)] pub struct ModuleIter<'m> { buf: ParseBuffer<'m>, } impl<'m> FallibleIterator for ModuleIter<'m> { type Item = Module<'m>; type Error = Error; fn next(&mut self) -> result::Result, Self::Error> { // see if we're at EOF if self.buf.is_empty() { return Ok(None); } let info = DBIModuleInfo::parse(&mut self.buf)?; let module_name = self.buf.parse_cstring()?; let object_file_name = self.buf.parse_cstring()?; self.buf.align(4)?; Ok(Some(Module { info, module_name, object_file_name, })) } } /// The version of the section contribution stream. #[derive(Debug, Copy, Clone, PartialEq)] #[allow(missing_docs)] enum DBISectionContributionStreamVersion { V60, V2, OtherValue(u32), } impl From for DBISectionContributionStreamVersion { fn from(v: u32) -> Self { const V60: u32 = 0xeffe_0000 + 19_970_605; const V2: u32 = 0xeffe_0000 + 20_140_516; match v { V60 => Self::V60, V2 => Self::V2, _ => Self::OtherValue(v), } } } /// A `DBISectionContributionIter` iterates over the section contributions in the DBI section, producing `DBISectionContribution`s. #[derive(Debug)] pub struct DBISectionContributionIter<'c> { buf: ParseBuffer<'c>, version: DBISectionContributionStreamVersion, } impl<'c> DBISectionContributionIter<'c> { fn parse(mut buf: ParseBuffer<'c>) -> Result { let version = buf.parse_u32()?.into(); Ok(Self { buf, version }) } } impl<'c> FallibleIterator for DBISectionContributionIter<'c> { type Item = DBISectionContribution; type Error = Error; fn next(&mut self) -> result::Result, Self::Error> { // see if we're at EOF if self.buf.is_empty() { return Ok(None); } let contribution = DBISectionContribution::parse(&mut self.buf)?; if self.version == DBISectionContributionStreamVersion::V2 { self.buf.parse_u32()?; } Ok(Some(contribution)) } } /// A `DbgDataHdr`, which contains a series of (optional) MSF stream numbers. #[derive(Debug, Copy, Clone)] #[allow(dead_code)] // reason = "unused fields added for completeness" pub(crate) struct DBIExtraStreams { // The struct itself is defined at: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/dbi/dbi.h#L250-L274 // It's just an array of stream numbers; `u16`s where 0xffff means "no stream". // // The array indices are: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/langapi/include/pdb.h#L439-L449 // We'll map those to fields. // // The struct itself can be truncated. This is an internal struct; we'll treat missing fields as // StreamIndex::none() even if it's a short read, so long as the short read stops on a u16 boundary. pub fpo: StreamIndex, pub exception: StreamIndex, pub fixup: StreamIndex, pub omap_to_src: StreamIndex, pub omap_from_src: StreamIndex, pub section_headers: StreamIndex, pub token_rid_map: StreamIndex, pub xdata: StreamIndex, pub pdata: StreamIndex, pub framedata: StreamIndex, pub original_section_headers: StreamIndex, } impl DBIExtraStreams { pub(crate) fn new(debug_info: &DebugInformation<'_>) -> Result { // calculate the location of the extra stream information let header = debug_info.header; let offset = debug_info.header_len + (header.module_list_size + header.section_contribution_size + header.section_map_size + header.file_info_size + header.type_server_map_size + header.ec_substream_size) as usize; // seek let mut buf = debug_info.stream.parse_buffer(); buf.take(offset)?; // grab that section as bytes let bytes = buf.take(header.debug_header_size as _)?; // parse those bytes let mut extra_streams_buf = ParseBuffer::from(bytes); Self::parse(&mut extra_streams_buf) } pub(crate) fn parse(buf: &mut ParseBuffer<'_>) -> Result { // short reads are okay, as are long reads -- this struct is actually an array // what's _not_ okay are if buf.len() % 2 != 0 { return Err(Error::InvalidStreamLength("DbgDataHdr")); } fn next_index(buf: &mut ParseBuffer<'_>) -> Result { if buf.is_empty() { Ok(StreamIndex::none()) } else { buf.parse() } } Ok(Self { fpo: next_index(buf)?, exception: next_index(buf)?, fixup: next_index(buf)?, omap_to_src: next_index(buf)?, omap_from_src: next_index(buf)?, section_headers: next_index(buf)?, token_rid_map: next_index(buf)?, xdata: next_index(buf)?, pdata: next_index(buf)?, framedata: next_index(buf)?, original_section_headers: next_index(buf)?, }) } } #[cfg(test)] mod tests { use crate::dbi::*; #[test] fn test_dbi_extra_streams() { let bytes = vec![0xff, 0xff, 0x01, 0x02, 0x03, 0x04, 0xff, 0xff, 0x05, 0x06]; let mut buf = ParseBuffer::from(bytes.as_slice()); let extra_streams = DBIExtraStreams::parse(&mut buf).expect("parse"); // check readback assert_eq!(extra_streams.fpo, StreamIndex::none()); assert_eq!(extra_streams.exception, StreamIndex(0x0201)); assert_eq!(extra_streams.fixup, StreamIndex(0x0403)); assert_eq!(extra_streams.omap_to_src, StreamIndex::none()); assert_eq!(extra_streams.omap_from_src, StreamIndex(0x0605)); // check that short reads => StreamIndex::none() assert_eq!(extra_streams.section_headers, StreamIndex::none()); assert_eq!(extra_streams.token_rid_map, StreamIndex::none()); assert_eq!(extra_streams.original_section_headers, StreamIndex::none()); } } pdb-0.8.0/src/framedata.rs000064400000000000000000000442070072674642500135160ustar 00000000000000// Copyright 2018 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. //! Facilities for parsing legacy FPO and FrameData streams. use std::cmp::Ordering; use std::fmt; use crate::common::*; use crate::msf::Stream; use crate::FallibleIterator; /// A compiler specific frame type. /// /// This frame type is used by the old FPO data and has been superseeded by program strings. Its /// values are originally specified in [`enum StackFrameTypeEnum`]. /// /// [`enum StackFrameTypeEnum`]: https://docs.microsoft.com/en-us/visualstudio/debugger/debug-interface-access/stackframetypeenum?view=vs-2017 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] #[repr(u8)] pub enum FrameType { /// Frame which does not have any debug info. Unknown = 0xff, /// Frame pointer omitted, FPO info available. FPO = 0, /// Kernel Trap frame. Trap = 1, /// Kernel Trap frame. TSS = 2, /// Standard EBP stackframe. Standard = 3, /// Frame pointer omitted, FrameData info available. FrameData = 4, } impl fmt::Display for FrameType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Unknown => write!(f, "unknown"), Self::FPO => write!(f, "fpo"), Self::Trap => write!(f, "trap"), Self::TSS => write!(f, "tss"), Self::Standard => write!(f, "std"), Self::FrameData => write!(f, "fdata"), } } } /// New frame data format. /// /// This format is used in the `DEBUG_S_FRAMEDATA` subsection in C13 module information, as well as /// in the `dbgFRAMEDATA` stream defined in the optional debug header. Effectively, all recent PDBs /// contain frame infos in this format. /// /// The definition corresponds to [`struct tagFRAMEDATA`]. /// /// ```c /// struct tagFRAMEDATA { /// unsigned long ulRvaStart; /// unsigned long cbBlock; /// unsigned long cbLocals; /// unsigned long cbParams; /// unsigned long cbStkMax; /// unsigned long frameFunc; /// unsigned short cbProlog; /// unsigned short cbSavedRegs; /// /// unsigned long fHasSEH : 1; /// unsigned long fHasEH : 1; /// unsigned long fIsFunctionStart : 1; /// unsigned long reserved : 29; /// }; /// ``` /// /// [`struct tagFRAMEDATA`]: https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L4635 #[repr(C)] struct NewFrameData { code_start: u32, code_size: u32, locals_size: u32, params_size: u32, max_stack_size: u32, frame_func: u32, prolog_size: u16, saved_regs_size: u16, flags: u32, } impl NewFrameData { pub fn code_start(&self) -> PdbInternalRva { PdbInternalRva(u32::from_le(self.code_start)) } pub fn code_size(&self) -> u32 { u32::from_le(self.code_size) } pub fn locals_size(&self) -> u32 { u32::from_le(self.locals_size) } pub fn params_size(&self) -> u32 { u32::from_le(self.params_size) } pub fn max_stack_size(&self) -> u32 { u32::from_le(self.max_stack_size) } pub fn frame_func(&self) -> StringRef { StringRef(u32::from_le(self.frame_func)) } pub fn prolog_size(&self) -> u16 { u16::from_le(self.prolog_size) } pub fn saved_regs_size(&self) -> u16 { u16::from_le(self.saved_regs_size) } pub fn has_seh(&self) -> bool { self.flags() & 1 != 0 } pub fn has_eh(&self) -> bool { self.flags() & 2 != 0 } pub fn is_function_start(&self) -> bool { self.flags() & 4 != 0 } fn flags(&self) -> u32 { u32::from_le(self.flags) } } impl fmt::Debug for NewFrameData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("NewFrameData") .field("code_start", &self.code_start()) .field("code_size", &self.code_size()) .field("locals_size", &self.locals_size()) .field("params_size", &self.params_size()) .field("max_stack_size", &self.max_stack_size()) .field("frame_func", &self.frame_func()) .field("prolog_size", &self.prolog_size()) .field("saved_regs_size", &self.saved_regs_size()) .field("has_seh", &self.has_seh()) .field("has_eh", &self.has_eh()) .field("is_function_start", &self.is_function_start()) .finish() } } /// Initial structure used for describing stack frames. /// /// This structure corresponds to [`struct _FPO_DATA`] in the PE/COFF spec. It was used to describe /// the layout of stack frames in the `dbgFPO` stream defined in the optional debug header. Since, /// it has been superseeded by the `tagFRAMEDATA` structure (see [`NewFrameData`]). /// /// Even if the newer FrameData stream is present, a PDB might still contain an additional FPO /// stream. This is due to the fact that the linker simply copies over the stream. As a result, both /// stream might describe the same RVA. /// /// [`struct _FPO_DATA`]: https://docs.microsoft.com/en-us/windows/desktop/debug/pe-format#debug-type /// /// ```c /// typedef struct _FPO_DATA { /// DWORD ulOffStart; // offset 1st byte of function code /// DWORD cbProcSize; // # bytes in function /// DWORD cdwLocals; // # bytes in locals/4 /// WORD cdwParams; // # bytes in params/4 /// /// WORD cbProlog : 8; // # bytes in prolog /// WORD cbRegs : 3; // # regs saved /// WORD fHasSEH : 1; // TRUE if SEH in func /// WORD fUseBP : 1; // TRUE if EBP has been allocated /// WORD reserved : 1; // reserved for future use /// WORD cbFrame : 2; // frame type /// } FPO_DATA; /// ``` #[repr(C)] struct OldFrameData { code_start: u32, code_size: u32, locals_size: u32, params_size: u16, attributes: u16, } impl OldFrameData { pub fn code_start(&self) -> PdbInternalRva { PdbInternalRva(u32::from_le(self.code_start)) } pub fn code_size(&self) -> u32 { u32::from_le(self.code_size) } pub fn locals_size(&self) -> u32 { u32::from_le(self.locals_size) } pub fn params_size(&self) -> u16 { u16::from_le(self.params_size) } pub fn prolog_size(&self) -> u16 { self.attributes() & 0xf } pub fn saved_regs_size(&self) -> u16 { (self.attributes() >> 8) & 0x7 } pub fn has_seh(&self) -> bool { self.attributes() & 0x200 != 0 } pub fn uses_base_pointer(&self) -> bool { self.attributes() & 0x400 != 0 } pub fn frame_type(&self) -> FrameType { match self.attributes() >> 14 { 0x00 => FrameType::FPO, 0x01 => FrameType::Trap, 0x02 => FrameType::TSS, 0x03 => FrameType::Standard, 0x04 => FrameType::FrameData, _ => FrameType::Unknown, } } fn attributes(&self) -> u16 { u16::from_le(self.attributes) } } impl fmt::Debug for OldFrameData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OldFrameData") .field("code_start", &self.code_start()) .field("code_size", &self.code_size()) .field("locals_size", &self.locals_size()) .field("params_size", &self.params_size()) .field("prolog_size", &self.prolog_size()) .field("saved_regs_size", &self.saved_regs_size()) .field("has_seh", &self.has_seh()) .field("uses_base_pointer", &self.uses_base_pointer()) .field("frame_type", &self.frame_type()) .finish() } } /// Frame data for a code block. #[derive(Clone, Debug)] pub struct FrameData { /// Compiler-specific frame type. pub ty: FrameType, /// Relative virtual address of the start of the code block. /// /// Note that this address is internal to the PDB. To convert this to an actual [`Rva`], use /// [`PdbInternalRva::to_rva`]. pub code_start: PdbInternalRva, /// Size of the code block covered by this frame data in bytes. pub code_size: u32, /// Size of local variables pushed on the stack in bytes. pub locals_size: u32, /// Size of parameters pushed on the stack in bytes. pub params_size: u32, /// Number of bytes of prologue code in the block. pub prolog_size: u16, /// Size of saved registers pushed on the stack in bytes. pub saved_regs_size: u16, /// The maximum number of bytes pushed on the stack. pub max_stack_size: Option, /// Indicates that structured exception handling is in effect. pub has_structured_eh: bool, /// Indicates that C++ exception handling is in effect. pub has_cpp_eh: bool, /// Indicates that this frame is the start of a function. pub is_function_start: bool, /// Indicates that this function uses the EBP register. pub uses_base_pointer: bool, /// A program string allowing to reconstruct register values for this frame. /// /// The program string is a sequence of macros that is interpreted in order to establish the /// prologue. For example, a typical stack frame might use the program string `"$T0 $ebp = $eip /// $T0 4 + ^ = $ebp $T0 ^ = $esp $T0 8 + ="`. The format is reverse polish notation, where the /// operators follow the operands. `T0` represents a temporary variable on the stack. /// /// Note that the program string is specific to the CPU and to the calling convention set up for /// the function represented by the current stack frame. pub program: Option, } impl From<&'_ OldFrameData> for FrameData { fn from(data: &OldFrameData) -> Self { Self { ty: data.frame_type(), code_start: data.code_start(), code_size: data.code_size(), prolog_size: data.prolog_size(), locals_size: data.locals_size() * 4, params_size: u32::from(data.params_size()) * 4, saved_regs_size: data.saved_regs_size() * 4, max_stack_size: None, has_structured_eh: data.has_seh(), has_cpp_eh: false, is_function_start: false, uses_base_pointer: data.uses_base_pointer(), program: None, } } } impl From<&'_ NewFrameData> for FrameData { fn from(data: &NewFrameData) -> Self { Self { ty: FrameType::FrameData, code_start: data.code_start(), code_size: data.code_size(), prolog_size: data.prolog_size(), locals_size: data.locals_size(), params_size: data.params_size(), saved_regs_size: data.saved_regs_size(), max_stack_size: Some(data.max_stack_size()), has_structured_eh: data.has_seh(), has_cpp_eh: data.has_eh(), is_function_start: data.is_function_start(), uses_base_pointer: false, program: Some(data.frame_func()), } } } /// Iterator over entries in a [`FrameTable`]. #[derive(Debug, Default)] pub struct FrameDataIter<'t> { old_frames: &'t [OldFrameData], new_frames: &'t [NewFrameData], old_index: usize, new_index: usize, } impl FallibleIterator for FrameDataIter<'_> { type Item = FrameData; type Error = Error; fn next(&mut self) -> Result> { let old_opt = self.old_frames.get(self.old_index); let new_opt = self.new_frames.get(self.new_index); Ok(Some(match (old_opt, new_opt) { (Some(old_frame), Some(new_frame)) => { match new_frame.code_start().cmp(&old_frame.code_start()) { Ordering::Less => { self.new_index += 1; new_frame.into() } Ordering::Equal => { self.new_index += 1; self.old_index += 1; new_frame.into() } Ordering::Greater => { self.old_index += 1; old_frame.into() } } } (Some(old_frame), None) => { self.old_index += 1; old_frame.into() } (None, Some(new_frame)) => { self.new_index += 1; new_frame.into() } (None, None) => return Ok(None), })) } } /// An object that spans a code range. trait AddrRange { /// The start RVA of the block. fn start(&self) -> PdbInternalRva; /// The size of the block in bytes. fn size(&self) -> u32; /// The non-inclusive end of the block. #[inline] fn end(&self) -> PdbInternalRva { self.start() + self.size() } /// Returns whether this item includes the given Rva. #[inline] fn contains(&self, rva: PdbInternalRva) -> bool { rva >= self.start() && rva < self.end() } } impl AddrRange for OldFrameData { fn start(&self) -> PdbInternalRva { self.code_start() } fn size(&self) -> u32 { self.code_size() } } impl AddrRange for NewFrameData { fn start(&self) -> PdbInternalRva { self.code_start() } fn size(&self) -> u32 { self.code_size() } } /// Searches for a frame data entry covering the given `PdbInternalRva`. fn binary_search_by_rva(frames: &[R], rva: PdbInternalRva) -> usize { match frames.binary_search_by_key(&rva, |f| f.start()) { Ok(index) => index, Err(index) => { if index > 0 && frames[index - 1].contains(rva) { index - 1 } else { index } } } } /// Describes stack frame layout of functions. /// /// The table contains [`FrameData`] entries ordered by [`PdbInternalRva`]. Each entry describes a /// range of instructions starting at `code_rva` for `code_size` bytes. /// /// A procedure/function might be described by multiple entries, with the first one declaring /// `is_function_start`. To retrieve frame information for a specific function, use /// [`FrameTable::iter_at_rva`]. /// /// Not every function in the image file must have frame data defined for it. Those functions that /// do not have frame data are assumed to have normal stack frames. /// /// # Example /// /// ```rust /// # use pdb::{PDB, Rva, FallibleIterator}; /// # /// # fn test() -> pdb::Result<()> { /// # let source = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = PDB::open(source)?; /// /// // Read the frame table once and reuse it /// let frame_table = pdb.frame_table()?; /// let mut frames = frame_table.iter(); /// /// // Iterate frame data in RVA order /// while let Some(frame) = frames.next()? { /// println!("{:#?}", frame); /// } /// # Ok(()) /// # } /// # test().unwrap() /// ``` pub struct FrameTable<'s> { old_stream: Option>, new_stream: Option>, } impl<'s> FrameTable<'s> { /// Parses frame data from raw streams. pub(crate) fn parse( old_stream: Option>, new_stream: Option>, ) -> Result { if let Some(ref stream) = old_stream { if cast_aligned::(stream.as_slice()).is_none() { return Err(Error::InvalidStreamLength("FrameData")); } } if let Some(ref stream) = new_stream { if cast_aligned::(stream.as_slice()).is_none() { return Err(Error::InvalidStreamLength("FPO")); } } Ok(FrameTable { old_stream, new_stream, }) } /// Returns an iterator over all frame data in this table, ordered by `code_rva`. pub fn iter(&self) -> FrameDataIter<'_> { FrameDataIter { old_frames: self.old_frames(), new_frames: self.new_frames(), old_index: 0, new_index: 0, } } /// Returns an iterator over frame data starting at the given `PdbInternalRva`. /// /// The first item returned by this iterator covers the given RVA. If the address is not a /// direct start of a function or block, this is the closest element preceding the block. If no /// frame data covers the given RVA, the iterator starts at the first item **after** the RVA. /// Therefore, check for the desired RVA range when iterating frame data. /// /// To obtain a `PdbInternalRva`, use [`PdbInternalSectionOffset::to_internal_rva`] or /// [`Rva::to_internal_rva`]. pub fn iter_at_rva(&self, rva: PdbInternalRva) -> FrameDataIter<'_> { let old_frames = self.old_frames(); let old_index = binary_search_by_rva(old_frames, rva); let new_frames = self.new_frames(); let new_index = binary_search_by_rva(new_frames, rva); FrameDataIter { old_frames, new_frames, old_index, new_index, } } /// Indicates whether any frame data is available. pub fn is_empty(&self) -> bool { self.new_frames().is_empty() && self.old_frames().is_empty() } fn old_frames(&self) -> &[OldFrameData] { match self.old_stream { // alignment checked during parsing Some(ref stream) => cast_aligned(stream.as_slice()).unwrap(), None => &[], } } fn new_frames(&self) -> &[NewFrameData] { match self.new_stream { // alignment checked during parsing Some(ref stream) => cast_aligned(stream.as_slice()).unwrap(), None => &[], } } } #[cfg(test)] mod tests { use super::*; use std::mem; #[test] fn test_new_frame_data() { assert_eq!(mem::size_of::(), 32); assert_eq!(mem::align_of::(), 4); } #[test] fn test_old_frame_data() { assert_eq!(mem::size_of::(), 16); assert_eq!(mem::align_of::(), 4); } } pdb-0.8.0/src/lib.rs000064400000000000000000000040740072674642500123360ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. //! The `pdb` create parses Microsoft PDB (Program Database) files. PDB files contain debugging //! information produced by most compilers that target Windows, including information about symbols, //! types, modules, and so on. //! //! # Usage //! //! PDB files are accessed via the [`pdb::PDB`] object. //! //! # Example //! //! ``` //! # use pdb::FallibleIterator; //! # //! # fn test() -> pdb::Result { //! let file = std::fs::File::open("fixtures/self/foo.pdb")?; //! let mut pdb = pdb::PDB::open(file)?; //! //! let symbol_table = pdb.global_symbols()?; //! let address_map = pdb.address_map()?; //! //! # let mut count: usize = 0; //! let mut symbols = symbol_table.iter(); //! while let Some(symbol) = symbols.next()? { //! match symbol.parse() { //! Ok(pdb::SymbolData::Public(data)) if data.function => { //! // we found the location of a function! //! let rva = data.offset.to_rva(&address_map).unwrap_or_default(); //! println!("{} is {}", rva, data.name); //! # count += 1; //! } //! _ => {} //! } //! } //! //! # Ok(count) //! # } //! # assert!(test().expect("test") > 2000); //! ``` #![warn(missing_docs)] // modules mod common; mod dbi; mod framedata; mod modi; mod msf; mod omap; mod pdb; mod pdbi; mod pe; mod source; mod strings; mod symbol; mod tpi; // exports pub use crate::common::*; pub use crate::dbi::*; pub use crate::framedata::*; pub use crate::modi::*; pub use crate::omap::*; pub use crate::pdb::*; pub use crate::pdbi::*; pub use crate::pe::*; pub use crate::source::*; pub use crate::strings::*; pub use crate::symbol::*; pub use crate::tpi::*; // re-export FallibleIterator for convenience #[doc(no_inline)] pub use fallible_iterator::FallibleIterator; pdb-0.8.0/src/modi/c13.rs000064400000000000000000001733070072674642500131140ustar 00000000000000use std::fmt; use std::mem; use std::slice; use scroll::{ctx::TryFromCtx, Endian, Pread}; use crate::common::*; use crate::modi::{ constants, CrossModuleExport, CrossModuleRef, FileChecksum, FileIndex, FileInfo, LineInfo, LineInfoKind, ModuleRef, }; use crate::symbol::{BinaryAnnotation, BinaryAnnotationsIter, InlineSiteSymbol}; use crate::FallibleIterator; #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[repr(u32)] #[allow(unused)] enum DebugSubsectionKind { // Native Symbols = 0xf1, Lines = 0xf2, StringTable = 0xf3, FileChecksums = 0xf4, FrameData = 0xf5, InlineeLines = 0xf6, CrossScopeImports = 0xf7, CrossScopeExports = 0xf8, // .NET ILLines = 0xf9, FuncMDTokenMap = 0xfa, TypeMDTokenMap = 0xfb, MergedAssemblyInput = 0xfc, CoffSymbolRva = 0xfd, } impl DebugSubsectionKind { fn parse(value: u32) -> Result> { if (0xf1..=0xfd).contains(&value) { Ok(Some(unsafe { std::mem::transmute(value) })) } else if value == constants::DEBUG_S_IGNORE { Ok(None) } else { Err(Error::UnimplementedDebugSubsection(value)) } } } #[derive(Clone, Copy, Debug)] struct DebugSubsectionHeader { /// The kind of this subsection. kind: u32, /// The length of this subsection in bytes, following the header. len: u32, } impl<'t> TryFromCtx<'t, Endian> for DebugSubsectionHeader { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { kind: this.gread_with(&mut offset, le)?, len: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } impl DebugSubsectionHeader { fn kind(self) -> Result> { DebugSubsectionKind::parse(self.kind) } fn len(self) -> usize { self.len as usize } } #[derive(Clone, Copy, Debug)] struct DebugSubsection<'a> { pub kind: DebugSubsectionKind, pub data: &'a [u8], } #[derive(Clone, Debug, Default)] struct DebugSubsectionIterator<'a> { buf: ParseBuffer<'a>, } impl<'a> DebugSubsectionIterator<'a> { fn new(data: &'a [u8]) -> Self { Self { buf: ParseBuffer::from(data), } } } impl<'a> FallibleIterator for DebugSubsectionIterator<'a> { type Item = DebugSubsection<'a>; type Error = Error; fn next(&mut self) -> Result> { while !self.buf.is_empty() { let header = self.buf.parse::()?; let data = self.buf.take(header.len())?; let kind = match header.kind()? { Some(kind) => kind, None => continue, }; return Ok(Some(DebugSubsection { kind, data })); } Ok(None) } } #[derive(Clone, Copy, Debug, Default)] struct DebugInlineeLinesHeader { /// The signature of the inlinees signature: u32, } impl<'t> TryFromCtx<'t, Endian> for DebugInlineeLinesHeader { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { signature: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } impl DebugInlineeLinesHeader { pub fn has_extra_files(self) -> bool { self.signature == constants::CV_INLINEE_SOURCE_LINE_SIGNATURE_EX } } #[derive(Clone, Copy, Debug, Default, PartialEq)] pub struct InlineeSourceLine<'a> { pub inlinee: IdIndex, pub file_id: FileIndex, pub line: u32, extra_files: &'a [u8], } impl<'a> InlineeSourceLine<'a> { // TODO: Implement extra files iterator when needed. } impl<'a> TryFromCtx<'a, DebugInlineeLinesHeader> for InlineeSourceLine<'a> { type Error = Error; fn try_from_ctx(this: &'a [u8], header: DebugInlineeLinesHeader) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let inlinee = buf.parse()?; let file_id = buf.parse()?; let line = buf.parse()?; let extra_files = if header.has_extra_files() { let file_count = buf.parse::()? as usize; buf.take(file_count * std::mem::size_of::())? } else { &[] }; let source_line = Self { inlinee, file_id, line, extra_files, }; Ok((source_line, buf.pos())) } } #[derive(Debug, Clone, Default)] struct DebugInlineeLinesIterator<'a> { header: DebugInlineeLinesHeader, buf: ParseBuffer<'a>, } impl<'a> FallibleIterator for DebugInlineeLinesIterator<'a> { type Item = InlineeSourceLine<'a>; type Error = Error; fn next(&mut self) -> Result> { if self.buf.is_empty() { Ok(None) } else { Ok(Some(self.buf.parse_with(self.header)?)) } } } #[derive(Clone, Debug, Default)] struct DebugInlineeLinesSubsection<'a> { header: DebugInlineeLinesHeader, data: &'a [u8], } impl<'a> DebugInlineeLinesSubsection<'a> { fn parse(data: &'a [u8]) -> Result { let mut buf = ParseBuffer::from(data); let header = buf.parse::()?; Ok(Self { header, data: &data[buf.pos()..], }) } /// Iterate through all inlinees. fn lines(&self) -> DebugInlineeLinesIterator<'a> { DebugInlineeLinesIterator { header: self.header, buf: ParseBuffer::from(self.data), } } } #[derive(Clone, Copy, Debug, Default)] struct DebugLinesHeader { /// Section offset of this line contribution. offset: PdbInternalSectionOffset, /// See LineFlags enumeration. flags: u16, /// Code size of this line contribution. code_size: u32, } impl<'t> TryFromCtx<'t, Endian> for DebugLinesHeader { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { offset: this.gread_with(&mut offset, le)?, flags: this.gread_with(&mut offset, le)?, code_size: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } impl DebugLinesHeader { fn has_columns(self) -> bool { self.flags & constants::CV_LINES_HAVE_COLUMNS != 0 } } #[derive(Clone, Copy, Debug)] struct DebugLinesSubsection<'a> { header: DebugLinesHeader, data: &'a [u8], } impl<'a> DebugLinesSubsection<'a> { fn parse(data: &'a [u8]) -> Result { let mut buf = ParseBuffer::from(data); let header = buf.parse()?; let data = &data[buf.pos()..]; Ok(Self { header, data }) } fn blocks(&self) -> DebugLinesBlockIterator<'a> { DebugLinesBlockIterator { header: self.header, buf: ParseBuffer::from(self.data), } } } /// Marker instructions for a line offset. #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum LineMarkerKind { /// A debugger should skip this address. DoNotStepOnto, /// A debugger should not step into this address. DoNotStepInto, } /// The raw line number entry in a PDB. #[repr(C)] #[derive(Clone, Copy, Debug)] struct LineNumberHeader { /// Offset to start of code bytes for line number. offset: u32, /// Combined information on the start line, end line and entry type: /// /// ```ignore /// unsigned long linenumStart:24; // line where statement/expression starts /// unsigned long deltaLineEnd:7; // delta to line where statement ends (optional) /// unsigned long fStatement :1; // true if a statement line number, else an expression /// ``` flags: u32, } impl<'t> TryFromCtx<'t, Endian> for LineNumberHeader { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { offset: this.gread_with(&mut offset, le)?, flags: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } /// A mapping of code section offsets to source line numbers. #[derive(Clone, Debug)] struct LineNumberEntry { /// Delta offset to the start of this line contribution (debug lines subsection). pub offset: u32, /// Start line number of the statement or expression. pub start_line: u32, /// End line number of the statement or expression. pub end_line: u32, /// The type of code construct this line entry refers to. pub kind: LineInfoKind, } /// Marker for debugging purposes. #[derive(Clone, Debug)] struct LineMarkerEntry { /// Delta offset to the start of this line contribution (debug lines subsection). #[allow(dead_code)] // reason = "unused until TODO in LineIterator is resolved" pub offset: u32, /// The marker kind, hinting a debugger how to deal with code at this offset. #[allow(dead_code)] // reason = "debugger instructions are not exposed" pub kind: LineMarkerKind, } /// A parsed line entry. #[derive(Clone, Debug)] enum LineEntry { /// Declares a source line number. Number(LineNumberEntry), /// Declares a debugging marker. Marker(LineMarkerEntry), } impl LineNumberHeader { /// Parse this line number header into a line entry. pub fn parse(self) -> LineEntry { // The compiler generates special line numbers to hint the debugger. Separate these out so // that they are not confused with actual line number entries. let start_line = self.flags & 0x00ff_ffff; let marker = match start_line { 0xfeefee => Some(LineMarkerKind::DoNotStepOnto), 0xf00f00 => Some(LineMarkerKind::DoNotStepInto), _ => None, }; if let Some(kind) = marker { return LineEntry::Marker(LineMarkerEntry { offset: self.offset, kind, }); } // It has been observed in some PDBs that this does not store a delta to start_line but // actually just the truncated value of `end_line`. Therefore, prefer to use `end_line` and // compute the deta from `end_line` and `start_line`, if needed. let line_delta = self.flags & 0x7f00_0000 >> 24; // The line_delta contains the lower 7 bits of the end line number. We take all higher bits // from the start line and OR them with the lower delta bits. This combines to the full // original end line number. let high_start = start_line & !0x7f; let mut end_line = high_start | line_delta; // If the end line number is smaller than the start line, we have to assume an overflow. // The end line will most likely be within 128 lines from the start line. Thus, we account // for the overflow by adding 1 to the 8th bit. if end_line < start_line { end_line += 1 << 7; } let kind = if self.flags & 0x8000_0000 != 0 { LineInfoKind::Statement } else { LineInfoKind::Expression }; LineEntry::Number(LineNumberEntry { offset: self.offset, start_line, end_line, kind, }) } } #[derive(Clone, Debug, Default)] struct DebugLinesIterator<'a> { block: DebugLinesBlockHeader, buf: ParseBuffer<'a>, } impl FallibleIterator for DebugLinesIterator<'_> { type Item = LineEntry; type Error = Error; fn next(&mut self) -> Result> { if self.buf.is_empty() { return Ok(None); } self.buf.parse().map(LineNumberHeader::parse).map(Some) } } #[repr(C)] #[derive(Clone, Copy, Debug, Default)] struct ColumnNumberEntry { start_column: u16, end_column: u16, } impl<'t> TryFromCtx<'t, Endian> for ColumnNumberEntry { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { start_column: this.gread_with(&mut offset, le)?, end_column: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } #[derive(Clone, Debug, Default)] struct DebugColumnsIterator<'a> { buf: ParseBuffer<'a>, } impl FallibleIterator for DebugColumnsIterator<'_> { type Item = ColumnNumberEntry; type Error = Error; fn next(&mut self) -> Result> { if self.buf.is_empty() { return Ok(None); } self.buf.parse().map(Some) } } #[repr(C)] #[derive(Clone, Copy, Debug, Default)] struct DebugLinesBlockHeader { /// Offset of the file checksum in the file checksums debug subsection. file_index: u32, /// Number of line entries in this block. /// /// If the debug lines subsection also contains column information (see `has_columns`), then the /// same number of column entries will be present after the line entries. num_lines: u32, /// Total byte size of this block, including following line and column entries. block_size: u32, } impl<'t> TryFromCtx<'t, Endian> for DebugLinesBlockHeader { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { file_index: this.gread_with(&mut offset, le)?, num_lines: this.gread_with(&mut offset, le)?, block_size: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } impl DebugLinesBlockHeader { /// The byte size of all line and column records combined. fn data_size(&self) -> usize { self.block_size as usize - std::mem::size_of::() } /// The byte size of all line number entries combined. fn line_size(&self) -> usize { self.num_lines as usize * std::mem::size_of::() } /// The byte size of all column number entries combined. fn column_size(&self, subsection: DebugLinesHeader) -> usize { if subsection.has_columns() { self.num_lines as usize * std::mem::size_of::() } else { 0 } } } #[derive(Clone, Debug)] struct DebugLinesBlock<'a> { header: DebugLinesBlockHeader, line_data: &'a [u8], column_data: &'a [u8], } impl<'a> DebugLinesBlock<'a> { #[allow(unused)] fn file_index(&self) -> FileIndex { FileIndex(self.header.file_index) } fn lines(&self) -> DebugLinesIterator<'a> { DebugLinesIterator { block: self.header, buf: ParseBuffer::from(self.line_data), } } fn columns(&self) -> DebugColumnsIterator<'a> { DebugColumnsIterator { buf: ParseBuffer::from(self.column_data), } } } #[derive(Clone, Debug, Default)] struct DebugLinesBlockIterator<'a> { header: DebugLinesHeader, buf: ParseBuffer<'a>, } impl<'a> FallibleIterator for DebugLinesBlockIterator<'a> { type Item = DebugLinesBlock<'a>; type Error = Error; fn next(&mut self) -> Result> { if self.buf.is_empty() { return Ok(None); } // The header is followed by a variable-size chunk of data, specified by `data_size`. Load // all of it at once to ensure we're not reading garbage in case there is more information // we do not yet understand. let header = self.buf.parse::()?; let data = self.buf.take(header.data_size())?; // The first data is a set of line entries, optionally followed by column entries. Load both // and discard eventual data that follows let (line_data, data) = data.split_at(header.line_size()); let (column_data, remainder) = data.split_at(header.column_size(self.header)); // In case the PDB format is extended with more information, we'd like to know here. debug_assert!(remainder.is_empty()); Ok(Some(DebugLinesBlock { header, line_data, column_data, })) } } /// Possible representations of file checksums in the file checksums subsection. #[repr(u8)] #[allow(unused)] #[derive(Clone, Copy, Debug, Eq, Ord, Hash, PartialEq, PartialOrd)] enum FileChecksumKind { None = 0, Md5 = 1, Sha1 = 2, Sha256 = 3, } impl FileChecksumKind { /// Parses the checksum kind from its raw value. fn parse(value: u8) -> Result { if value <= 3 { Ok(unsafe { std::mem::transmute(value) }) } else { Err(Error::UnimplementedFileChecksumKind(value)) } } } /// Raw header of a single file checksum entry. #[derive(Clone, Copy, Debug)] struct FileChecksumHeader { name_offset: u32, checksum_size: u8, checksum_kind: u8, } impl<'t> TryFromCtx<'t, Endian> for FileChecksumHeader { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { name_offset: this.gread_with(&mut offset, le)?, checksum_size: this.gread_with(&mut offset, le)?, checksum_kind: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } /// A file checksum entry. #[derive(Clone, Debug)] struct FileChecksumEntry<'a> { /// Reference to the file name in the string table. name: StringRef, /// File checksum value. checksum: FileChecksum<'a>, } #[derive(Clone, Debug, Default)] struct DebugFileChecksumsIterator<'a> { buf: ParseBuffer<'a>, } impl<'a> FallibleIterator for DebugFileChecksumsIterator<'a> { type Item = FileChecksumEntry<'a>; type Error = Error; fn next(&mut self) -> Result> { if self.buf.is_empty() { return Ok(None); } let header = self.buf.parse::()?; let checksum_data = self.buf.take(header.checksum_size as usize)?; let checksum = match FileChecksumKind::parse(header.checksum_kind)? { FileChecksumKind::None => FileChecksum::None, FileChecksumKind::Md5 => FileChecksum::Md5(checksum_data), FileChecksumKind::Sha1 => FileChecksum::Sha1(checksum_data), FileChecksumKind::Sha256 => FileChecksum::Sha256(checksum_data), }; self.buf.align(4)?; Ok(Some(FileChecksumEntry { name: StringRef(header.name_offset), checksum, })) } } #[derive(Clone, Debug, Default)] struct DebugFileChecksumsSubsection<'a> { data: &'a [u8], } impl<'a> DebugFileChecksumsSubsection<'a> { /// Creates a new file checksums subsection. fn new(data: &'a [u8]) -> Self { Self { data } } /// Returns an iterator over all file checksum entries. #[allow(unused)] fn entries(&self) -> Result> { self.entries_at_offset(FileIndex(0)) } /// Returns an iterator over file checksum entries starting at the given offset. fn entries_at_offset(&self, offset: FileIndex) -> Result> { let mut buf = ParseBuffer::from(self.data); buf.take(offset.0 as usize)?; Ok(DebugFileChecksumsIterator { buf }) } } #[derive(Clone, Copy, Debug)] struct CrossScopeImportModule<'a> { name: ModuleRef, /// unparsed in LE byteorder imports: &'a [u32], } impl CrossScopeImportModule<'_> { /// Returns the local reference at the given offset. /// /// This function performs an "unsafe" conversion of the raw value into `Local`. It is /// assumed that this function is only called from contexts where `I` can be statically /// inferred. fn get(self, import: usize) -> Option> where I: ItemIndex, { let value = self.imports.get(import)?; let index = u32::from_le(*value).into(); Some(Local(index)) } } #[derive(Clone, Debug, Default)] struct CrossScopeImportModuleIter<'a> { buf: ParseBuffer<'a>, } impl<'a> FallibleIterator for CrossScopeImportModuleIter<'a> { type Item = CrossScopeImportModule<'a>; type Error = Error; fn next(&mut self) -> Result> { if self.buf.is_empty() { return Ok(None); } let name = ModuleRef(self.buf.parse()?); let count = self.buf.parse::()? as usize; let data = self.buf.take(count * mem::size_of::())?; let imports = cast_aligned(data).ok_or(Error::InvalidStreamLength("CrossScopeImports"))?; Ok(Some(CrossScopeImportModule { name, imports })) } } #[derive(Clone, Copy, Debug, Default)] struct DebugCrossScopeImportsSubsection<'a> { data: &'a [u8], } impl<'a> DebugCrossScopeImportsSubsection<'a> { fn new(data: &'a [u8]) -> Self { Self { data } } fn modules(self) -> CrossScopeImportModuleIter<'a> { let buf = ParseBuffer::from(self.data); CrossScopeImportModuleIter { buf } } } /// Provides efficient access to imported types and IDs from other modules. /// /// This can be used to resolve cross module references. See [`ItemIndex::is_cross_module`] for more /// information. #[derive(Clone, Debug, Default)] pub struct CrossModuleImports<'a> { modules: Vec>, } impl<'a> CrossModuleImports<'a> { /// Creates `CrossModuleImports` from the imports debug subsection. fn from_section(section: DebugCrossScopeImportsSubsection<'a>) -> Result { let modules = section.modules().collect()?; Ok(Self { modules }) } /// Loads `CrossModuleImports` from the debug subsections data. pub(crate) fn parse(data: &'a [u8]) -> Result { let import_data = DebugSubsectionIterator::new(data) .find(|sec| Ok(sec.kind == DebugSubsectionKind::CrossScopeImports))? .map(|sec| sec.data); match import_data { Some(d) => Self::from_section(DebugCrossScopeImportsSubsection::new(d)), None => Ok(Self::default()), } } /// Resolves the referenced module and local index for the index. /// /// The given index **must** be a cross module reference. Use `ItemIndex::is_cross_module` to /// check this before invoking this function. If successful, this function returns a reference /// to the module that declares the type, as well as the local index of the type in that module. /// /// # Errors /// /// * `Error::NotACrossModuleRef` if the given index is already a global index and not a cross /// module reference. /// * `Error::CrossModuleRefNotFound` if the cross module reference points to a module or local /// index that is not indexed by this import table. pub fn resolve_import(&self, index: I) -> Result> where I: ItemIndex, { let raw_index = index.into(); if !index.is_cross_module() { return Err(Error::NotACrossModuleRef(raw_index)); } let module_index = ((raw_index >> 20) & 0x7ff) as usize; let import_index = (raw_index & 0x000f_ffff) as usize; let module = self .modules .get(module_index) .ok_or(Error::CrossModuleRefNotFound(raw_index))?; let local_index = module .get(import_index) .ok_or(Error::CrossModuleRefNotFound(raw_index))?; Ok(CrossModuleRef(module.name, local_index)) } } /// Raw representation of `CrossModuleExport`. /// /// This type can directly be mapped onto a slice of binary data and exposes the underlying `local` /// and `global` fields with correct endianness via getter methods. There are two ways to use this: /// /// 1. Binary search over a slice of exports to find the one matching a given local index /// 2. Enumerate all for debugging purposes #[repr(C)] #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd)] struct RawCrossScopeExport { /// The local index within the module. /// /// This maps to `Local` in the public type signature. local: u32, /// The index in the global type or id stream. /// /// This maps to `I: ItemIndex` in the public type signature. global: u32, } impl<'t> TryFromCtx<'t, Endian> for RawCrossScopeExport { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { local: this.gread_with(&mut offset, le)?, global: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } impl From for CrossModuleExport { fn from(raw: RawCrossScopeExport) -> Self { if (raw.local & 0x8000_0000) != 0 { Self::Id(Local(IdIndex(raw.local)), IdIndex(raw.global)) } else { Self::Type(Local(TypeIndex(raw.local)), TypeIndex(raw.global)) } } } struct RawCrossScopeExportsIter<'a> { buf: ParseBuffer<'a>, } impl FallibleIterator for RawCrossScopeExportsIter<'_> { type Item = RawCrossScopeExport; type Error = Error; fn next(&mut self) -> Result> { if self.buf.is_empty() { return Ok(None); } self.buf.parse().map(Some) } } #[derive(Clone, Copy, Debug, Default)] struct DebugCrossScopeExportsSubsection<'a> { data: &'a [u8], } impl<'a> DebugCrossScopeExportsSubsection<'a> { /// Creates a new cross scope exports subsection. fn parse(data: &'a [u8]) -> Result { if cast_aligned::(data).is_none() { return Err(Error::InvalidStreamLength( "DebugCrossScopeExportsSubsection", )); } Ok(Self { data }) } fn exports(self) -> RawCrossScopeExportsIter<'a> { let buf = ParseBuffer::from(self.data); RawCrossScopeExportsIter { buf } } } /// Iterator returned by [`CrossModuleExports::exports`]. #[derive(Clone, Debug)] pub struct CrossModuleExportIter<'a> { exports: slice::Iter<'a, RawCrossScopeExport>, } impl Default for CrossModuleExportIter<'_> { fn default() -> Self { Self { exports: [].iter() } } } impl<'a> FallibleIterator for CrossModuleExportIter<'a> { type Item = CrossModuleExport; type Error = Error; fn next(&mut self) -> Result> { Ok(self.exports.next().map(|r| (*r).into())) } } /// A table of exports declared by this module. /// /// Other modules can import types and ids from this module by using [cross module /// references](ItemIndex::is_cross_module). #[derive(Clone, Debug, Default)] pub struct CrossModuleExports { raw_exports: Vec, } impl CrossModuleExports { fn from_section(section: DebugCrossScopeExportsSubsection<'_>) -> Result { let raw_exports = section.exports().collect()?; Ok(Self { raw_exports }) } pub(crate) fn parse(data: &[u8]) -> Result { let export_data = DebugSubsectionIterator::new(data) .find(|sec| Ok(sec.kind == DebugSubsectionKind::CrossScopeExports))? .map(|sec| sec.data); match export_data { Some(d) => Self::from_section(DebugCrossScopeExportsSubsection::parse(d)?), None => Ok(Self::default()), } } /// Returns the number of exported types or ids from this module. #[inline] pub fn len(&self) -> usize { self.raw_exports.len() } /// Returns `true` if this module does not export types or ids. #[inline] pub fn is_empty(&self) -> bool { self.raw_exports.is_empty() } /// Returns an iterator over all cross scope exports. pub fn exports(&self) -> CrossModuleExportIter<'_> { CrossModuleExportIter { exports: self.raw_exports.iter(), } } /// Resolves the global index of the given cross module import's local index. /// /// The global index can be used to retrieve items from the /// [`TypeInformation`](crate::TypeInformation) or [`IdInformation`](crate::IdInformation) /// streams. If the given local index is not listed in the export list, this function returns /// `Ok(None)`. pub fn resolve_import(&self, local_index: Local) -> Result> where I: ItemIndex, { let local = local_index.0.into(); let exports = &self.raw_exports; Ok(match exports.binary_search_by_key(&local, |r| r.local) { Ok(i) => Some(I::from(exports[i].global)), Err(_) => None, }) } } #[derive(Clone)] pub struct LineIterator<'a> { /// Iterator over all subsections in the current module. sections: std::slice::Iter<'a, DebugLinesSubsection<'a>>, /// Iterator over all blocks in the current lines subsection. blocks: DebugLinesBlockIterator<'a>, /// Iterator over lines in the current block. lines: DebugLinesIterator<'a>, /// Iterator over optional columns in the current block. columns: DebugColumnsIterator<'a>, /// Previous line info before length can be inferred. last_info: Option, } impl<'a> FallibleIterator for LineIterator<'a> { type Item = LineInfo; type Error = Error; fn next(&mut self) -> Result> { loop { if let Some(entry) = self.lines.next()? { // A column entry is only returned if the debug lines subsection contains column // information. Otherwise, the columns iterator is empty. We can safely assume that // the number of line entries and column entries returned from the two iterators is // equivalent. If it were not, the creation of the block would already have failed. let column_entry = self.columns.next()?; // The high-level line iterator is only interested in actual line entries. It might // make sense to eventually fold markers at the same offset into the `LineInfo` // record. let line_entry = match entry { LineEntry::Number(line_entry) => line_entry, LineEntry::Marker(_) => continue, }; let section_header = self.blocks.header; let block_header = self.lines.block; let offset = section_header.offset + line_entry.offset; let line_info = LineInfo { offset, length: None, // Length is inferred in the next iteration. file_index: FileIndex(block_header.file_index), line_start: line_entry.start_line, line_end: line_entry.end_line, column_start: column_entry.map(|e| e.start_column.into()), column_end: column_entry.map(|e| e.end_column.into()), kind: line_entry.kind, }; let mut last_info = match std::mem::replace(&mut self.last_info, Some(line_info)) { Some(last_info) => last_info, None => continue, }; last_info.set_end(offset); return Ok(Some(last_info)); } if let Some(block) = self.blocks.next()? { self.lines = block.lines(); self.columns = block.columns(); continue; } // The current debug lines subsection ends. Fix up the length of the last line record // using the code size of the lines section, before continuing iteration. This ensures // the most accurate length of the line record, even if there are gaps between sections. if let Some(ref mut last_line) = self.last_info { let section_header = self.blocks.header; last_line.set_end(section_header.offset + section_header.code_size); } if let Some(lines_section) = self.sections.next() { self.blocks = lines_section.blocks(); continue; } return Ok(self.last_info.take()); } } } impl Default for LineIterator<'_> { fn default() -> Self { Self { sections: [].iter(), blocks: DebugLinesBlockIterator::default(), lines: DebugLinesIterator::default(), columns: DebugColumnsIterator::default(), last_info: None, } } } impl fmt::Debug for LineIterator<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("LineIterator") .field("sections", &self.sections.as_slice()) .field("blocks", &self.blocks) .field("lines", &self.lines) .field("columns", &self.columns) .field("last_info", &self.last_info) .finish() } } /// An iterator over line information records in a module. #[derive(Clone, Debug, Default)] pub struct InlineeLineIterator<'a> { annotations: BinaryAnnotationsIter<'a>, file_index: FileIndex, code_offset_base: u32, code_offset: PdbInternalSectionOffset, code_length: Option, line: u32, line_length: u32, col_start: Option, col_end: Option, line_kind: LineInfoKind, last_info: Option, } impl<'a> InlineeLineIterator<'a> { fn new( parent_offset: PdbInternalSectionOffset, inline_site: &InlineSiteSymbol<'a>, inlinee_line: InlineeSourceLine<'a>, ) -> Self { Self { annotations: inline_site.annotations.iter(), file_index: inlinee_line.file_id, code_offset_base: 0, code_offset: parent_offset, code_length: None, line: inlinee_line.line, line_length: 1, col_start: None, col_end: None, line_kind: LineInfoKind::Statement, last_info: None, } } } impl<'a> FallibleIterator for InlineeLineIterator<'a> { type Item = LineInfo; type Error = Error; fn next(&mut self) -> Result> { while let Some(op) = self.annotations.next()? { match op { BinaryAnnotation::CodeOffset(code_offset) => { self.code_offset.offset = code_offset; } BinaryAnnotation::ChangeCodeOffsetBase(code_offset_base) => { self.code_offset_base = code_offset_base; } BinaryAnnotation::ChangeCodeOffset(delta) => { self.code_offset = self.code_offset.wrapping_add(delta); } BinaryAnnotation::ChangeCodeLength(code_length) => { if let Some(ref mut last_info) = self.last_info { if last_info.length.is_none() && last_info.kind == self.line_kind { last_info.length = Some(code_length); } } self.code_offset = self.code_offset.wrapping_add(code_length); } BinaryAnnotation::ChangeFile(file_index) => { // NOTE: There seems to be a bug in VS2015-VS2019 compilers that generates // invalid binary annotations when file changes are involved. This can be // triggered by #including files directly into inline functions. The // `ChangeFile` annotations are generated in the wrong spot or missing // completely. This renders information on the file effectively useless in a lot // of cases. self.file_index = file_index; } BinaryAnnotation::ChangeLineOffset(delta) => { self.line = (i64::from(self.line) + i64::from(delta)) as u32; } BinaryAnnotation::ChangeLineEndDelta(line_length) => { self.line_length = line_length; } BinaryAnnotation::ChangeRangeKind(kind) => { self.line_kind = match kind { 0 => LineInfoKind::Expression, 1 => LineInfoKind::Statement, _ => self.line_kind, }; } BinaryAnnotation::ChangeColumnStart(col_start) => { self.col_start = Some(col_start); } BinaryAnnotation::ChangeColumnEndDelta(delta) => { self.col_end = self .col_end .map(|col_end| (i64::from(col_end) + i64::from(delta)) as u32) } BinaryAnnotation::ChangeCodeOffsetAndLineOffset(code_delta, line_delta) => { self.code_offset += code_delta; self.line = (i64::from(self.line) + i64::from(line_delta)) as u32; } BinaryAnnotation::ChangeCodeLengthAndCodeOffset(code_length, code_delta) => { self.code_length = Some(code_length); self.code_offset += code_delta; } BinaryAnnotation::ChangeColumnEnd(col_end) => { self.col_end = Some(col_end); } } if !op.emits_line_info() { continue; } let line_offset = self.code_offset + self.code_offset_base; if let Some(ref mut last_info) = self.last_info { if last_info.length.is_none() && last_info.kind == self.line_kind { last_info.length = Some(line_offset.offset - last_info.offset.offset); } } let line_info = LineInfo { kind: self.line_kind, file_index: self.file_index, offset: line_offset, length: self.code_length, line_start: self.line, line_end: self.line + self.line_length, column_start: self.col_start, column_end: self.col_end, }; // Code length resets with every line record. self.code_length = None; // Finish the previous record and emit it. The current record is stored so that the // length can be inferred from subsequent operators or the next line info. if let Some(last_info) = std::mem::replace(&mut self.last_info, Some(line_info)) { return Ok(Some(last_info)); } } Ok(self.last_info.take()) } } /// An inlined function that can evaluate to line information. #[derive(Clone, Debug, Default)] pub struct Inlinee<'a>(InlineeSourceLine<'a>); impl<'a> Inlinee<'a> { /// The index of this inlinee in the `IdInformation` stream (IPI). pub fn index(&self) -> IdIndex { self.0.inlinee } /// Returns an iterator over line records for an inline site. /// /// Note that line records are not guaranteed to be ordered by source code offset. If a /// monotonic order by `PdbInternalSectionOffset` or `Rva` is required, the lines have to be /// sorted manually. pub fn lines( &self, parent_offset: PdbInternalSectionOffset, inline_site: &InlineSiteSymbol<'a>, ) -> InlineeLineIterator<'a> { InlineeLineIterator::new(parent_offset, inline_site, self.0) } } /// An iterator over line information records in a module. #[derive(Clone, Debug, Default)] pub struct InlineeIterator<'a> { inlinee_lines: DebugInlineeLinesIterator<'a>, } impl<'a> InlineeIterator<'a> { pub(crate) fn parse(data: &'a [u8]) -> Result { let inlinee_data = DebugSubsectionIterator::new(data) .find(|sec| Ok(sec.kind == DebugSubsectionKind::InlineeLines))? .map(|sec| sec.data); let inlinee_lines = match inlinee_data { Some(d) => DebugInlineeLinesSubsection::parse(d)?, None => DebugInlineeLinesSubsection::default(), }; Ok(Self { inlinee_lines: inlinee_lines.lines(), }) } } impl<'a> FallibleIterator for InlineeIterator<'a> { type Item = Inlinee<'a>; type Error = Error; fn next(&mut self) -> Result> { match self.inlinee_lines.next() { Ok(Some(inlinee_line)) => Ok(Some(Inlinee(inlinee_line))), Ok(None) => Ok(None), Err(error) => Err(error), } } } #[derive(Clone, Debug, Default)] pub struct FileIterator<'a> { checksums: DebugFileChecksumsIterator<'a>, } impl<'a> FallibleIterator for FileIterator<'a> { type Item = FileInfo<'a>; type Error = Error; fn next(&mut self) -> Result> { match self.checksums.next() { Ok(Some(entry)) => Ok(Some(FileInfo { name: entry.name, checksum: entry.checksum, })), Ok(None) => Ok(None), Err(error) => Err(error), } } } pub struct LineProgram<'a> { file_checksums: DebugFileChecksumsSubsection<'a>, line_sections: Vec>, } impl<'a> LineProgram<'a> { pub(crate) fn parse(data: &'a [u8]) -> Result { let mut file_checksums = DebugFileChecksumsSubsection::default(); let mut line_sections = Vec::new(); let mut section_iter = DebugSubsectionIterator::new(data); while let Some(sec) = section_iter.next()? { match sec.kind { DebugSubsectionKind::FileChecksums => { file_checksums = DebugFileChecksumsSubsection::new(sec.data); } DebugSubsectionKind::Lines => { line_sections.push(DebugLinesSubsection::parse(sec.data)?); } _ => {} } } line_sections.sort_unstable_by_key(Self::lines_key); Ok(Self { file_checksums, line_sections, }) } pub(crate) fn lines(&self) -> LineIterator<'_> { LineIterator { sections: self.line_sections.iter(), blocks: DebugLinesBlockIterator::default(), lines: DebugLinesIterator::default(), columns: DebugColumnsIterator::default(), last_info: None, } } pub(crate) fn lines_for_symbol(&self, offset: PdbInternalSectionOffset) -> LineIterator<'_> { // Search for the lines subsection that covers the given offset. They are non-overlapping // and not empty, so there will be at most one match. In most cases, there will be an exact // match for each symbol. However, ASM sometimes yields line records outside of the stated // symbol range `[offset, offset+len)`. In this case, search for the section covering the // offset. let key = Self::lines_offset_key(offset); let index_result = self .line_sections .binary_search_by_key(&key, Self::lines_key); let section = match index_result { Err(0) => return LineIterator::default(), Err(i) => self.line_sections[i - 1], Ok(i) => self.line_sections[i], }; // In the `Err(i)` case, we might have chosen a lines subsection pointing into a different // section. In this case, bail out. if section.header.offset.section != offset.section { return LineIterator::default(); } LineIterator { sections: [].iter(), blocks: section.blocks(), lines: DebugLinesIterator::default(), columns: DebugColumnsIterator::default(), last_info: None, } } pub(crate) fn files(&self) -> FileIterator<'a> { FileIterator { checksums: self.file_checksums.entries().unwrap_or_default(), } } pub(crate) fn get_file_info(&self, index: FileIndex) -> Result> { // The file index actually contains the byte offset value into the file_checksums // subsection. Therefore, treat it as the offset. let mut entries = self.file_checksums.entries_at_offset(index)?; let entry = entries .next()? .ok_or(Error::InvalidFileChecksumOffset(index.0))?; Ok(FileInfo { name: entry.name, checksum: entry.checksum, }) } fn lines_offset_key(offset: PdbInternalSectionOffset) -> (u16, u32) { (offset.section, offset.offset) } fn lines_key(lines: &DebugLinesSubsection<'_>) -> (u16, u32) { Self::lines_offset_key(lines.header.offset) } } #[cfg(test)] mod tests { use super::*; use std::mem; use crate::symbol::BinaryAnnotations; #[test] fn test_line_number_header() { assert_eq!(mem::size_of::(), 8); assert_eq!(mem::align_of::(), 4); } #[test] fn test_column_number_header() { assert_eq!(mem::size_of::(), 4); assert_eq!(mem::align_of::(), 2); } #[test] fn test_debug_lines_block_header() { assert_eq!(mem::size_of::(), 12); assert_eq!(mem::align_of::(), 4); } #[test] fn test_raw_cross_scope_export() { assert_eq!(mem::size_of::(), 8); assert_eq!(mem::align_of::(), 4); } #[test] fn test_iter_lines() { let data = &[ 244, 0, 0, 0, 24, 0, 0, 0, 169, 49, 0, 0, 16, 1, 115, 121, 2, 198, 45, 116, 88, 98, 157, 13, 221, 82, 225, 34, 192, 51, 0, 0, 242, 0, 0, 0, 48, 0, 0, 0, 132, 160, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 128, 0, 0, 0, 0, 23, 0, 0, 128, 11, 0, 0, 0, 24, 0, 0, 128, ]; let line_program = LineProgram::parse(data).expect("parse line program"); let lines: Vec<_> = line_program.lines().collect().expect("collect lines"); let expected = [ LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0xa084, }, length: Some(0), file_index: FileIndex(0x0), line_start: 22, line_end: 22, column_start: None, column_end: None, kind: LineInfoKind::Statement, }, LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0xa084, }, length: Some(11), file_index: FileIndex(0x0), line_start: 23, line_end: 23, column_start: None, column_end: None, kind: LineInfoKind::Statement, }, LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0xa08f, }, length: Some(1), file_index: FileIndex(0x0), line_start: 24, line_end: 24, column_start: None, column_end: None, kind: LineInfoKind::Statement, }, ]; assert_eq!(lines, expected); } #[test] fn test_lines_for_symbol() { let data = &[ 244, 0, 0, 0, 24, 0, 0, 0, 169, 49, 0, 0, 16, 1, 115, 121, 2, 198, 45, 116, 88, 98, 157, 13, 221, 82, 225, 34, 192, 51, 0, 0, 242, 0, 0, 0, 48, 0, 0, 0, 132, 160, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 36, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 128, 0, 0, 0, 0, 23, 0, 0, 128, 11, 0, 0, 0, 24, 0, 0, 128, ]; let offset = PdbInternalSectionOffset { section: 0x0001, offset: 0xa084, }; let line_program = LineProgram::parse(data).expect("parse line program"); let line = line_program .lines_for_symbol(offset) .next() .expect("get line"); let expected = Some(LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0xa084, }, length: Some(0), file_index: FileIndex(0x0), line_start: 22, line_end: 22, column_start: None, column_end: None, kind: LineInfoKind::Statement, }); assert_eq!(expected, line); } #[test] fn test_lines_for_symbol_asm() { // This test is similar to lines_for_symbol, but it tests with an offset that points beyond // the beginning of a lines subsection. This happens when dealing with MASM. let data = &[ 244, 0, 0, 0, 96, 0, 0, 0, 177, 44, 0, 0, 16, 1, 148, 43, 19, 100, 121, 95, 165, 113, 45, 169, 112, 53, 233, 149, 174, 133, 0, 0, 248, 44, 0, 0, 16, 1, 54, 176, 28, 14, 163, 149, 3, 189, 0, 215, 91, 24, 204, 45, 117, 241, 0, 0, 59, 45, 0, 0, 16, 1, 191, 40, 129, 240, 15, 71, 114, 239, 184, 146, 206, 88, 119, 218, 136, 139, 0, 0, 126, 45, 0, 0, 16, 1, 175, 252, 248, 34, 196, 152, 31, 107, 144, 61, 83, 41, 122, 95, 140, 123, 0, 0, 242, 0, 0, 0, 96, 0, 0, 0, 112, 137, 0, 0, 1, 0, 0, 0, 49, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 84, 0, 0, 0, 16, 0, 0, 0, 45, 0, 0, 128, 16, 0, 0, 0, 47, 0, 0, 128, 23, 0, 0, 0, 48, 0, 0, 128, 26, 0, 0, 0, 49, 0, 0, 128, 30, 0, 0, 0, 50, 0, 0, 128, 35, 0, 0, 0, 51, 0, 0, 128, 38, 0, 0, 0, 52, 0, 0, 128, 40, 0, 0, 0, 62, 0, 0, 128, 44, 0, 0, 0, 66, 0, 0, 128, ]; let offset = PdbInternalSectionOffset { section: 0x0001, offset: 0x8990, // XXX: section and first line record at 0x0980 }; let line_program = LineProgram::parse(data).expect("parse line program"); let line = line_program .lines_for_symbol(offset) .next() .expect("get line"); let expected = Some(LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0x8980, }, length: Some(0), file_index: FileIndex(0x0), line_start: 45, line_end: 45, column_start: None, column_end: None, kind: LineInfoKind::Statement, }); assert_eq!(expected, line); } #[test] fn test_parse_inlinee_lines() { let data = &[ 0, 0, 0, 0, 254, 18, 0, 0, 104, 1, 0, 0, 24, 0, 0, 0, 253, 18, 0, 0, 104, 1, 0, 0, 28, 0, 0, 0, ]; let inlinee_lines = DebugInlineeLinesSubsection::parse(data).expect("parse inlinee lines"); assert!(!inlinee_lines.header.has_extra_files()); let lines: Vec<_> = inlinee_lines .lines() .collect() .expect("collect inlinee lines"); let expected = [ InlineeSourceLine { inlinee: IdIndex(0x12FE), file_id: FileIndex(0x168), line: 24, extra_files: &[], }, InlineeSourceLine { inlinee: IdIndex(0x12FD), file_id: FileIndex(0x168), line: 28, extra_files: &[], }, ]; assert_eq!(lines, expected); } #[test] fn test_parse_inlinee_lines_with_files() { let data = &[ 1, 0, 0, 0, 235, 102, 9, 0, 232, 37, 0, 0, 19, 0, 0, 0, 1, 0, 0, 0, 216, 26, 0, 0, 240, 163, 7, 0, 176, 44, 0, 0, 120, 0, 0, 0, 1, 0, 0, 0, 120, 3, 0, 0, ]; let inlinee_lines = DebugInlineeLinesSubsection::parse(data).expect("parse inlinee lines"); assert!(inlinee_lines.header.has_extra_files()); let lines: Vec<_> = inlinee_lines .lines() .collect() .expect("collect inlinee lines"); let expected = [ InlineeSourceLine { inlinee: IdIndex(0x966EB), file_id: FileIndex(0x25e8), line: 19, extra_files: &[216, 26, 0, 0], }, InlineeSourceLine { inlinee: IdIndex(0x7A3F0), file_id: FileIndex(0x2cb0), line: 120, extra_files: &[120, 3, 0, 0], }, ]; assert_eq!(lines, expected) } #[test] fn test_inlinee_lines() { // Obtained from a PDB compiling Breakpad's crash_generation_client.obj // S_GPROC32: [0001:00000120], Cb: 00000054 // S_INLINESITE: Parent: 0000009C, End: 00000318, Inlinee: 0x1173 // S_INLINESITE: Parent: 00000190, End: 000001EC, Inlinee: 0x1180 // BinaryAnnotations: CodeLengthAndCodeOffset 2 3f CodeLengthAndCodeOffset 3 9 let inline_site = InlineSiteSymbol { parent: Some(SymbolIndex(0x190)), end: SymbolIndex(0x1ec), inlinee: IdIndex(0x1180), invocations: None, annotations: BinaryAnnotations::new(&[12, 2, 63, 12, 3, 9, 0, 0]), }; // Inline site from corresponding DEBUG_S_INLINEELINES subsection: let inlinee_line = InlineeSourceLine { inlinee: IdIndex(0x1180), file_id: FileIndex(0x270), line: 341, extra_files: &[], }; // Parent offset from procedure root: // S_GPROC32: [0001:00000120] let parent_offset = PdbInternalSectionOffset { offset: 0x120, section: 0x1, }; let iter = InlineeLineIterator::new(parent_offset, &inline_site, inlinee_line); let lines: Vec<_> = iter.collect().expect("collect inlinee lines"); let expected = [ LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0x015f, }, length: Some(2), file_index: FileIndex(0x270), line_start: 341, line_end: 342, column_start: None, column_end: None, kind: LineInfoKind::Statement, }, LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0x0168, }, length: Some(3), file_index: FileIndex(0x270), line_start: 341, line_end: 342, column_start: None, column_end: None, kind: LineInfoKind::Statement, }, ]; assert_eq!(lines, expected); } #[test] fn test_inlinee_lines_length() { // Obtained from xul.pdb: // https://symbols.mozilla.org/xul.pdb/5DCA9FFE1E8BC7FE4C4C44205044422E1/xul.pd_ // // 1. Rename to `xul.pdb.cab` and extract with `cabextract` // 2. Get procedure at SymbolIndex(0x3e3c7f4) // 3. Get inlinee at SymbolIndex(0x3e51b04) let inline_site = InlineSiteSymbol { parent: Some(SymbolIndex(0x03e5_14dc)), end: SymbolIndex(0x03e5_1bd0), inlinee: IdIndex(0xeb476), invocations: None, annotations: BinaryAnnotations::new(&[6, 38, 3, 186, 32, 11, 71, 11, 36, 4, 5, 0]), }; // Binary annotations: // ChangeLineOffset(19), // ChangeCodeOffset(14880), // ChangeCodeOffsetAndLineOffset(7, 2), // ChangeCodeOffsetAndLineOffset(4, 1), // ChangeCodeLength(5), let inlinee_line = InlineeSourceLine { inlinee: IdIndex(0xeb476), file_id: FileIndex(0x590), line: 499, extra_files: &[], }; let parent_offset = PdbInternalSectionOffset { section: 0x1, offset: 0x0453_f100, }; let iter = InlineeLineIterator::new(parent_offset, &inline_site, inlinee_line); let lines: Vec<_> = iter.collect().expect("collect inlinee lines"); let expected = [ LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0x0454_2b20, }, length: Some(7), file_index: FileIndex(0x590), line_start: 518, line_end: 519, column_start: None, column_end: None, kind: LineInfoKind::Statement, }, LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0x0454_2b27, }, length: Some(4), file_index: FileIndex(0x590), line_start: 520, line_end: 521, column_start: None, column_end: None, kind: LineInfoKind::Statement, }, LineInfo { offset: PdbInternalSectionOffset { section: 0x1, offset: 0x0454_2b2b, }, length: Some(5), file_index: FileIndex(0x590), line_start: 521, line_end: 522, column_start: None, column_end: None, kind: LineInfoKind::Statement, }, ]; assert_eq!(lines, expected); } #[repr(align(4))] struct Align4(T); /// Aligned data for parsing cross module imports. /// /// When parsing them from the file, alignment is validated during ruintime using /// `cast_aligned`. If alignment is validated, it throws an error. const CROSS_MODULE_IMPORT_DATA: Align4<[u8; 76]> = Align4([ // module 0 189, 44, 0, 0, // module name 2CBD 14, 0, 0, 0, // 14 imports (all IDs, no Types) 171, 19, 0, 128, // 800013AB 37, 20, 0, 128, // 80001425 161, 19, 0, 128, // 800013A1 90, 20, 0, 128, // 8000145A 159, 19, 0, 128, // 8000139F 55, 20, 0, 128, // 80001437 109, 17, 0, 128, // 8000116D 238, 17, 0, 128, // 800011EE 246, 19, 0, 128, // 800013F6 69, 20, 0, 128, // 80001445 104, 19, 0, 128, // 80001368 148, 20, 0, 128, // 80001494 195, 20, 0, 128, // 800014C3 219, 20, 0, 128, // 800014DB // module 1 21, 222, 0, 0, // module name DE15 1, 0, 0, 0, // 1 import (id) 96, 22, 0, 128, // 80001660 ]); #[test] fn test_parse_cross_section_imports() { let sec = DebugCrossScopeImportsSubsection::new(&CROSS_MODULE_IMPORT_DATA.0); let modules: Vec<_> = sec.modules().collect().expect("collect imports"); assert_eq!(modules.len(), 2); let module = modules[0]; assert_eq!(module.get(0), Some(Local(IdIndex(0x8000_13AB)))); assert_eq!(module.get(13), Some(Local(IdIndex(0x8000_14DB)))); assert_eq!(module.get::(14), None); } #[test] fn test_resolve_cross_module_import() { let sec = DebugCrossScopeImportsSubsection::new(&CROSS_MODULE_IMPORT_DATA.0); let imports = CrossModuleImports::from_section(sec).expect("parse section"); let cross_ref = imports .resolve_import(IdIndex(0x8000_000A)) .expect("resolve import"); let expected = CrossModuleRef( // The module index is 0x000 = 1st module. ModuleRef(StringRef(0x2CBD)), // The import index is 0x0000A = 11th element. Local(IdIndex(0x8000_1368)), ); assert_eq!(cross_ref, expected); } #[test] fn test_resolve_cross_module_import2() { let sec = DebugCrossScopeImportsSubsection::new(&CROSS_MODULE_IMPORT_DATA.0); let imports = CrossModuleImports::from_section(sec).expect("parse section"); let cross_ref = imports .resolve_import(IdIndex(0x8010_0000)) .expect("resolve import"); let expected = CrossModuleRef( // The module index is 0x001 = 2nd module. ModuleRef(StringRef(0xDE15)), // The import index is 0x00001 = 1st element. Local(IdIndex(0x8000_1660)), ); assert_eq!(cross_ref, expected); } const CROSS_MODULE_EXPORT_DATA: Align4<[u8; 32]> = Align4([ 31, 16, 0, 0, 12, 16, 0, 0, // 101F -> 100C 32, 16, 0, 0, 79, 34, 0, 0, // 1020 -> 224F 92, 17, 0, 128, 97, 17, 0, 0, // 8000115C -> 1161 109, 17, 0, 128, 98, 17, 0, 0, // 8000116D -> 1162 ]); #[test] fn test_iter_cross_module_exports() { let section = DebugCrossScopeExportsSubsection::parse(&CROSS_MODULE_EXPORT_DATA.0) .expect("parse exports"); let exports = CrossModuleExports::from_section(section).expect("parse section"); let exports: Vec<_> = exports.exports().collect().expect("collect exports"); let expected = [ CrossModuleExport::Type(Local(TypeIndex(0x101F)), TypeIndex(0x100C)), CrossModuleExport::Type(Local(TypeIndex(0x1020)), TypeIndex(0x224F)), CrossModuleExport::Id(Local(IdIndex(0x8000_115C)), IdIndex(0x1161)), CrossModuleExport::Id(Local(IdIndex(0x8000_116D)), IdIndex(0x1162)), ]; assert_eq!(exports, expected); } #[test] fn test_resolve_cross_module_ref() { let section = DebugCrossScopeExportsSubsection::parse(&CROSS_MODULE_EXPORT_DATA.0) .expect("parse exports"); let exports = CrossModuleExports::from_section(section).expect("parse section"); let type_index = exports .resolve_import(Local(TypeIndex(0x101F))) .expect("resolve type"); assert_eq!(type_index, Some(TypeIndex(0x100C))); let id_index = exports .resolve_import(Local(IdIndex(0x8000_115C))) .expect("resolve id"); assert_eq!(id_index, Some(IdIndex(0x1161))); let missing_index = exports .resolve_import(Local(TypeIndex(0xFEED))) .expect("resolve missing"); assert_eq!(missing_index, None); } } pdb-0.8.0/src/modi/constants.rs000064400000000000000000000015210072674642500145260ustar 00000000000000//! Constants for all versions of the module info stream. #![allow(unused)] /// First explicit signature. pub const CV_SIGNATURE_C7: u32 = 1; /// Signature indicating a C11 (VC 5.x) module info stream. Uses 32-bit types. pub const CV_SIGNATURE_C11: u32 = 2; /// Signature indicating a C13 (VC 7.x) module info stream. Uses zero terminated names. pub const CV_SIGNATURE_C13: u32 = 4; /// Debug subsection kind for empty subsections. Should be skipped. pub const DEBUG_S_IGNORE: u32 = 0x8000_0000; /// Flag indicating that column information is present. pub const CV_LINES_HAVE_COLUMNS: u16 = 0x1; /// Flag indicating the default format of `DEBUG_S_INLINEELINEINFO` pub const CV_INLINEE_SOURCE_LINE_SIGNATURE: u32 = 0x0; /// Flag indicating the extended format of `DEBUG_S_INLINEELINEINFO` pub const CV_INLINEE_SOURCE_LINE_SIGNATURE_EX: u32 = 0x1; pdb-0.8.0/src/modi/mod.rs000064400000000000000000000262320072674642500132770ustar 00000000000000use std::fmt; use crate::common::*; use crate::dbi::Module; use crate::msf::Stream; use crate::symbol::SymbolIter; use crate::FallibleIterator; mod c13; mod constants; pub use c13::{ CrossModuleExportIter, CrossModuleExports, CrossModuleImports, Inlinee, InlineeIterator, InlineeLineIterator, }; #[derive(Clone, Copy, Debug)] enum LinesSize { C11(usize), C13(usize), } /// This struct contains data about a single module from its module info stream. /// /// The module info stream is where private symbols and line info is stored. pub struct ModuleInfo<'s> { stream: Stream<'s>, symbols_size: usize, lines_size: LinesSize, } impl<'s> ModuleInfo<'s> { /// Parses a `ModuleInfo` from it's Module info stream data. pub(crate) fn parse(stream: Stream<'s>, module: &Module<'_>) -> Self { let info = module.info(); let lines_size = if info.lines_size > 0 { LinesSize::C11(info.lines_size as usize) } else { LinesSize::C13(info.c13_lines_size as usize) }; let symbols_size = info.symbols_size as usize; ModuleInfo { stream, symbols_size, lines_size, } } fn lines_data(&self, size: usize) -> &[u8] { let start = self.symbols_size as usize; &self.stream[start..start + size] } /// Get an iterator over the all symbols in this module. pub fn symbols(&self) -> Result> { let mut buf = self.stream.parse_buffer(); buf.truncate(self.symbols_size)?; if self.symbols_size > 0 { let sig = buf.parse_u32()?; if sig != constants::CV_SIGNATURE_C13 { return Err(Error::UnimplementedFeature( "Unsupported symbol data format", )); } } Ok(SymbolIter::new(buf)) } /// Get an iterator over symbols starting at the given index. pub fn symbols_at(&self, index: SymbolIndex) -> Result> { let mut iter = self.symbols()?; iter.seek(index); Ok(iter) } /// Returns a line program that gives access to file and line information in this module. pub fn line_program(&self) -> Result> { let inner = match self.lines_size { LinesSize::C11(_size) => return Err(Error::UnimplementedFeature("C11 line programs")), LinesSize::C13(size) => { LineProgramInner::C13(c13::LineProgram::parse(self.lines_data(size))?) } }; Ok(LineProgram { inner }) } /// Returns an iterator over all inlinees in this module. /// /// Inlinees are not guaranteed to be sorted. When requiring random access by `ItemId`, collect /// them into a mapping structure rather than reiterating multiple times. pub fn inlinees(&self) -> Result> { Ok(match self.lines_size { // C11 does not contain inlinee information. LinesSize::C11(_size) => Default::default(), LinesSize::C13(size) => InlineeIterator::parse(self.lines_data(size))?, }) } /// Returns a table of exports declared by this module. pub fn exports(&self) -> Result { Ok(match self.lines_size { // C11 does not have cross module exports. LinesSize::C11(_size) => Default::default(), LinesSize::C13(size) => CrossModuleExports::parse(self.lines_data(size))?, }) } /// Returns a table of imports of this module. pub fn imports(&self) -> Result> { Ok(match self.lines_size { // C11 does not have cross module imports. LinesSize::C11(_size) => Default::default(), LinesSize::C13(size) => CrossModuleImports::parse(self.lines_data(size))?, }) } } /// Checksum of a source file's contents. #[derive(Clone, Debug)] #[allow(missing_docs)] pub enum FileChecksum<'a> { None, Md5(&'a [u8]), Sha1(&'a [u8]), Sha256(&'a [u8]), } impl PartialEq for FileChecksum<'_> { fn eq(&self, other: &Self) -> bool { // Manual implementation to allow for None != None. match (self, other) { (&FileChecksum::Md5(lhs), &FileChecksum::Md5(rhs)) => lhs == rhs, (&FileChecksum::Sha1(lhs), &FileChecksum::Sha1(rhs)) => lhs == rhs, (&FileChecksum::Sha256(lhs), &FileChecksum::Sha256(rhs)) => lhs == rhs, _ => false, } } } /// Information record on a source file. #[derive(Clone, Debug, PartialEq)] pub struct FileInfo<'a> { /// Reference to the file name in the [`StringTable`](crate::StringTable). pub name: StringRef, /// Checksum of the file contents. pub checksum: FileChecksum<'a>, } /// The kind of source construct a line info is referring to. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum LineInfoKind { /// A source code expression. Expression, /// A source code statement. Statement, } impl Default for LineInfoKind { fn default() -> Self { Self::Statement } } /// Mapping of a source code offset to a source file location. /// /// A line entry is always valid up to the subsequent entry. #[derive(Clone, Debug, PartialEq)] pub struct LineInfo { /// Source code offset. pub offset: PdbInternalSectionOffset, /// The optional length of the code. pub length: Option, /// Index of the source file in this module. pub file_index: FileIndex, /// Line number of the start of the covered range. pub line_start: u32, /// Line number of the end of the covered range. pub line_end: u32, /// Column number of the start of the covered range. /// /// This value is only present if column information is provided by the PDB. Even then, it is /// often zero. pub column_start: Option, /// Column number of the end of the covered range. /// /// This value is only present if column information is provided by the PDB. Even then, it is /// often zero. pub column_end: Option, /// Kind of this line information. pub kind: LineInfoKind, } impl LineInfo { pub(crate) fn set_end(&mut self, end_offset: PdbInternalSectionOffset) { // This uses PartialOrd which only compares if the section is equal debug_assert!(self.offset <= end_offset); if self.offset <= end_offset { let length = end_offset.offset - self.offset.offset; if self.length.map_or(true, |l| l > length) { self.length = Some(length); } } } } enum LineProgramInner<'a> { C13(c13::LineProgram<'a>), } /// The `LineProgram` provides access to source line information for a module and its procedures. pub struct LineProgram<'a> { inner: LineProgramInner<'a>, } impl<'a> LineProgram<'a> { /// Returns an iterator over all line information records of this module. /// /// Note that line records are not guaranteed to be ordered by source code offset. If a /// monotonic order by `PdbInternalSectionOffset` or `Rva` is required, the lines have to be /// sorted manually. pub fn lines(&self) -> LineIterator<'_> { match self.inner { LineProgramInner::C13(ref inner) => LineIterator { inner: LineIteratorInner::C13(inner.lines()), }, } } /// Returns an iterator over all file records of this module. pub fn files(&self) -> FileIterator<'a> { match self.inner { LineProgramInner::C13(ref inner) => FileIterator { inner: FileIteratorInner::C13(inner.files()), }, } } /// Returns an iterator over line records for a symbol at the given section offset. /// /// This may return line records before the start offset of the symbol. When using ASM, /// specifically MASM, symbol records may specify a range that is smaller than the actual /// code generated for this function. `lines_for_symbol` returns all line records covering this /// function, potentially exceeding this range. /// /// Note that line records are not guaranteed to be ordered by source code offset. If a /// monotonic order by `PdbInternalSectionOffset` or `Rva` is required, the lines have to be /// sorted manually. pub fn lines_for_symbol(&self, offset: PdbInternalSectionOffset) -> LineIterator<'_> { match self.inner { LineProgramInner::C13(ref inner) => LineIterator { inner: LineIteratorInner::C13(inner.lines_for_symbol(offset)), }, } } /// Looks up file information for the specified file. pub fn get_file_info(&self, offset: FileIndex) -> Result> { match self.inner { LineProgramInner::C13(ref inner) => inner.get_file_info(offset), } } } #[derive(Clone, Debug)] enum LineIteratorInner<'a> { C13(c13::LineIterator<'a>), } /// An iterator over line information records in a module. #[derive(Clone, Debug)] pub struct LineIterator<'a> { inner: LineIteratorInner<'a>, } impl Default for LineIterator<'_> { fn default() -> Self { LineIterator { inner: LineIteratorInner::C13(Default::default()), } } } impl<'a> FallibleIterator for LineIterator<'a> { type Item = LineInfo; type Error = Error; fn next(&mut self) -> Result> { match self.inner { LineIteratorInner::C13(ref mut inner) => inner.next(), } } } #[derive(Clone, Debug)] enum FileIteratorInner<'a> { C13(c13::FileIterator<'a>), } /// An iterator over file records in a module. #[derive(Clone, Debug)] pub struct FileIterator<'a> { inner: FileIteratorInner<'a>, } impl Default for FileIterator<'_> { fn default() -> Self { FileIterator { inner: FileIteratorInner::C13(Default::default()), } } } impl<'a> FallibleIterator for FileIterator<'a> { type Item = FileInfo<'a>; type Error = Error; fn next(&mut self) -> Result> { match self.inner { FileIteratorInner::C13(ref mut inner) => inner.next(), } } } /// Named reference to a [`Module`]. /// /// The name stored in the [`StringTable`](crate::StringTable) corresponds to the name of the module /// as returned by [`Module::module_name`]. #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct ModuleRef(pub StringRef); impl fmt::Display for ModuleRef { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt(f) } } /// Reference to a local type or id in another module. /// /// See [`ItemIndex::is_cross_module`] for more information. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct CrossModuleRef(pub ModuleRef, pub Local); /// A cross module export that can either be a `Type` or an `Id`. /// /// Other modules may reference this item using its local ID by declaring it in the cross module /// imports subsection. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum CrossModuleExport { /// A cross module export of a [`Type`](crate::Type). Type(Local, TypeIndex), /// A cross module export of an [`Id`](crate::Id). Id(Local, IdIndex), } pdb-0.8.0/src/msf/mod.rs000064400000000000000000000406220072674642500131330ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use std::fmt; use std::ops::Deref; use scroll::{ctx::TryFromCtx, Endian, Pread}; use crate::common::*; use crate::source::*; mod page_list; use self::page_list::PageList; type PageNumber = u32; #[derive(Debug, Copy, Clone)] struct Header { page_size: usize, maximum_valid_page_number: PageNumber, } impl Header { fn pages_needed_to_store(&self, bytes: usize) -> usize { (bytes + (self.page_size - 1)) / self.page_size } fn validate_page_number(&self, page_number: u32) -> Result { if page_number == 0 || page_number > self.maximum_valid_page_number { Err(Error::PageReferenceOutOfRange(page_number)) } else { Ok(page_number as PageNumber) } } } /// Represents a stream table at various stages of access #[doc(hidden)] #[derive(Debug)] enum StreamTable<'s> { /// The MSF header gives us the size of the table in bytes, and the list of pages (usually one) /// where we can find the list of pages that contain the stream table. HeaderOnly { size_in_bytes: usize, stream_table_location_location: PageList, }, /// Given the HeaderOnly information, we can do an initial read to get the actual location of /// the stream table as a PageList. TableFound { stream_table_location: PageList }, // Given the table location, we can access the stream table itself Available { stream_table_view: Box>, }, } fn view<'s>(source: &mut dyn Source<'s>, page_list: &PageList) -> Result>> { // view it let view = source.view(page_list.source_slices())?; // double check our Source // if the Source didn't return the requested bits, that's an implementation bug, so // assert instead of returning an error assert_eq!(view.as_slice().len(), page_list.len()); // done Ok(view) } mod big { use super::*; pub const MAGIC: &[u8] = b"Microsoft C/C++ MSF 7.00\r\n\x1a\x44\x53\x00\x00\x00"; /// The PDB header as stored on disk. /// /// See the Microsoft code for reference: #[repr(C)] #[derive(Debug, Copy, Clone)] struct RawHeader { magic: [u8; 32], page_size: u32, free_page_map: u32, pages_used: u32, directory_size: u32, _reserved: u32, } impl<'t> TryFromCtx<'t, Endian> for RawHeader { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { magic: { let mut tmp = [0; 32]; this.gread_inout_with(&mut offset, &mut tmp, le)?; tmp }, page_size: this.gread_with(&mut offset, le)?, free_page_map: this.gread_with(&mut offset, le)?, pages_used: this.gread_with(&mut offset, le)?, directory_size: this.gread_with(&mut offset, le)?, _reserved: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } #[derive(Debug)] pub struct BigMSF<'s, S> { header: Header, source: S, stream_table: StreamTable<'s>, } impl<'s, S: Source<'s>> BigMSF<'s, S> { pub fn new(source: S, header_view: Box>) -> Result> { let mut buf = ParseBuffer::from(header_view.as_slice()); let header: RawHeader = buf.parse()?; if header.magic != MAGIC { return Err(Error::UnrecognizedFileFormat); } if header.page_size.count_ones() != 1 || header.page_size < 0x100 || header.page_size > (128 * 0x10000) { return Err(Error::InvalidPageSize(header.page_size)); } let header_object = Header { page_size: header.page_size as usize, maximum_valid_page_number: header.pages_used, }; // calculate how many pages are needed to store the stream table let size_of_stream_table_in_pages = header_object.pages_needed_to_store(header.directory_size as usize); // now: how many pages are needed to store the list of pages that store the stream table? // each page entry is a u32, so multiply by four let size_of_stream_table_page_list_in_pages = header_object.pages_needed_to_store(size_of_stream_table_in_pages * 4); // read the list of stream table page list pages, which immediately follow the header // yes, this is a stupid level of indirection let mut stream_table_page_list_page_list = PageList::new(header_object.page_size); for _ in 0..size_of_stream_table_page_list_in_pages { let n = buf.parse_u32()?; stream_table_page_list_page_list.push(header_object.validate_page_number(n)?); } // truncate the stream table location location to the correct size stream_table_page_list_page_list.truncate(size_of_stream_table_in_pages * 4); Ok(BigMSF { header: header_object, source, stream_table: StreamTable::HeaderOnly { size_in_bytes: header.directory_size as usize, stream_table_location_location: stream_table_page_list_page_list, }, }) } fn find_stream_table(&mut self) -> Result<()> { let mut new_stream_table: Option> = None; if let StreamTable::HeaderOnly { size_in_bytes, ref stream_table_location_location, } = self.stream_table { // the header indicated we need to read size_in_pages page numbers from the // specified PageList. // ask to view the location location let location_location = view(&mut self.source, stream_table_location_location)?; // build a PageList let mut page_list = PageList::new(self.header.page_size); let mut buf = ParseBuffer::from(location_location.as_slice()); while !buf.is_empty() { let n = buf.parse_u32()?; page_list.push(self.header.validate_page_number(n)?); } page_list.truncate(size_in_bytes); // remember what we learned new_stream_table = Some(StreamTable::TableFound { stream_table_location: page_list, }); } if let Some(st) = new_stream_table { self.stream_table = st; } Ok(()) } fn make_stream_table_available(&mut self) -> Result<()> { // do the initial read if we must if let StreamTable::HeaderOnly { .. } = self.stream_table { self.find_stream_table()?; } // do we need to map the stream table itself? let mut new_stream_table = None; if let StreamTable::TableFound { ref stream_table_location, } = self.stream_table { // ask the source to view it let stream_table_view = view(&mut self.source, stream_table_location)?; new_stream_table = Some(StreamTable::Available { stream_table_view }); } if let Some(st) = new_stream_table { self.stream_table = st; } // stream table is available assert!(matches!(self.stream_table, StreamTable::Available { .. })); Ok(()) } fn look_up_stream(&mut self, stream_number: u32) -> Result { // ensure the stream table is available self.make_stream_table_available()?; let header = self.header; // declare the things we're going to find let bytes_in_stream: u32; let page_list: PageList; if let StreamTable::Available { ref stream_table_view, } = self.stream_table { let stream_table_slice = stream_table_view.as_slice(); let mut stream_table = ParseBuffer::from(stream_table_slice); // the stream table is structured as: // stream_count // 0..stream_count: size of stream in bytes (0xffffffff indicating "stream does not exist") // stream 0: PageNumber // stream 1: PageNumber, PageNumber // stream 2: PageNumber, PageNumber, PageNumber, PageNumber, PageNumber // stream 3: PageNumber, PageNumber, PageNumber, PageNumber // (number of pages determined by number of bytes) let stream_count = stream_table.parse_u32()?; // check if we've already outworn our welcome if stream_number >= stream_count { return Err(Error::StreamNotFound(stream_number)); } // we now have {stream_count} u32s describing the length of each stream // walk over the streams before the requested stream // we need to pay attention to how big each one is, since their page numbers come // before our page numbers in the stream table let mut page_numbers_to_skip: usize = 0; for _ in 0..stream_number { let bytes = stream_table.parse_u32()?; if bytes == u32::max_value() { // stream is not present, ergo nothing to skip } else { page_numbers_to_skip += header.pages_needed_to_store(bytes as usize); } } // read our stream's size bytes_in_stream = stream_table.parse_u32()?; if bytes_in_stream == u32::max_value() { return Err(Error::StreamNotFound(stream_number)); } let pages_in_stream = header.pages_needed_to_store(bytes_in_stream as usize); // skip the remaining streams' byte counts let _ = stream_table.take((stream_count - stream_number - 1) as usize * 4)?; // skip the preceding streams' page numbers let _ = stream_table.take((page_numbers_to_skip as usize) * 4)?; // we're now at the list of pages for our stream // accumulate them into a PageList let mut list = PageList::new(header.page_size); for _ in 0..pages_in_stream { let page_number = stream_table.parse_u32()?; list.push(self.header.validate_page_number(page_number)?); } // truncate to the size of the stream list.truncate(bytes_in_stream as usize); page_list = list; } else { unreachable!(); } // done! Ok(page_list) } } impl<'s, S: Source<'s>> Msf<'s, S> for BigMSF<'s, S> { fn get(&mut self, stream_number: u32, limit: Option) -> Result> { // look up the stream let mut page_list = self.look_up_stream(stream_number)?; // apply any limits we have if let Some(limit) = limit { page_list.truncate(limit); } // now that we know where this stream lives, we can view it let view = view(&mut self.source, &page_list)?; // pack it into a Stream let stream = Stream { source_view: view }; Ok(stream) } } } mod small { pub const MAGIC: &[u8] = b"Microsoft C/C++ program database 2.00\r\n\x1a\x4a\x47"; // TODO: implement SmallMSF } /// Represents a single Stream within the multi-stream file. #[derive(Debug)] pub struct Stream<'s> { source_view: Box>, } impl<'s> Stream<'s> { #[inline] pub(crate) fn parse_buffer(&self) -> ParseBuffer<'_> { let slice = self.source_view.as_slice(); ParseBuffer::from(slice) } #[inline] pub fn as_slice(&self) -> &[u8] { self.source_view.as_slice() } } impl Deref for Stream<'_> { type Target = [u8]; #[inline] fn deref(&self) -> &Self::Target { self.as_slice() } } /// Provides access to a "multi-stream file", which is the container format used by PDBs. pub trait Msf<'s, S>: fmt::Debug { /// Accesses a stream by stream number, optionally restricted by a byte limit. fn get(&mut self, stream_number: u32, limit: Option) -> Result>; } fn header_matches(actual: &[u8], expected: &[u8]) -> bool { actual.len() >= expected.len() && &actual[0..expected.len()] == expected } pub fn open_msf<'s, S: Source<'s> + 's>(mut source: S) -> Result + 's>> { // map the header let mut header_location = PageList::new(4096); header_location.push(0); let header_view = match view(&mut source, &header_location) { Ok(view) => view, Err(e) => match e { Error::IoError(x) => { if x.kind() == std::io::ErrorKind::UnexpectedEof { return Err(Error::UnrecognizedFileFormat); } else { return Err(Error::IoError(x)); } } _ => return Err(e), }, }; // see if it's a BigMSF if header_matches(header_view.as_slice(), big::MAGIC) { // claimed! let bigmsf = big::BigMSF::new(source, header_view)?; return Ok(Box::new(bigmsf)); } if header_matches(header_view.as_slice(), small::MAGIC) { // sorry return Err(Error::UnimplementedFeature("small MSF file format")); } Err(Error::UnrecognizedFileFormat) } #[cfg(test)] mod tests { mod header { use crate::common::Error; use crate::msf::open_msf; use crate::msf::Header; #[test] fn test_pages_needed_to_store() { let h = Header { page_size: 4096, maximum_valid_page_number: 15, }; assert_eq!(h.pages_needed_to_store(0), 0); assert_eq!(h.pages_needed_to_store(1), 1); assert_eq!(h.pages_needed_to_store(1024), 1); assert_eq!(h.pages_needed_to_store(2048), 1); assert_eq!(h.pages_needed_to_store(4095), 1); assert_eq!(h.pages_needed_to_store(4096), 1); assert_eq!(h.pages_needed_to_store(4097), 2); } #[test] fn test_validate_page_number() { let h = Header { page_size: 4096, maximum_valid_page_number: 15, }; assert!(matches!( h.validate_page_number(0), Err(Error::PageReferenceOutOfRange(0)) )); assert!(matches!(h.validate_page_number(1), Ok(1))); assert!(matches!(h.validate_page_number(2), Ok(2))); assert!(matches!(h.validate_page_number(14), Ok(14))); assert!(matches!(h.validate_page_number(15), Ok(15))); assert!(matches!( h.validate_page_number(16), Err(Error::PageReferenceOutOfRange(16)) )); assert!(matches!( h.validate_page_number(17), Err(Error::PageReferenceOutOfRange(17)) )); } #[test] fn test_small_file_unrecognized_file_format() { let small_file = std::io::Cursor::new(b"\x7FELF"); match open_msf(small_file) { Ok(_) => panic!("4 byte file should not parse as msf"), Err(e) => match e { Error::UnrecognizedFileFormat => (), _ => panic!("4 byte file should parse as unrecognized file format"), }, }; } } } pdb-0.8.0/src/msf/page_list.rs000064400000000000000000000201600072674642500143160ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use crate::msf::PageNumber; use crate::source::SourceSlice; /// Represents a list of `PageNumbers`, which are likely (but not certainly) sequential, and which /// will be presented as a slice of `SourceSlice`s. #[derive(Debug)] pub struct PageList { page_size: usize, source_slices: Vec, last_page: Option, truncated: bool, } impl PageList { /// Create a new PageList for a given page size. pub fn new(page_size: usize) -> Self { Self { page_size, source_slices: Vec::new(), last_page: None, truncated: false, } } /// Add a page to the PageList. If this page number is sequential with the previous page number, /// it will be combined into the previous `SourceSlice` for efficiency. pub fn push(&mut self, page: PageNumber) { assert!(!self.truncated); let is_continuous = match self.last_page { Some(n) => n.checked_add(1) == Some(page), None => false, }; if is_continuous { // extend by one page debug_assert!(!self.source_slices.is_empty()); let last_slice = self.source_slices.last_mut().unwrap(); last_slice.size += self.page_size; } else { self.source_slices.push(SourceSlice { offset: (self.page_size as u64) * u64::from(page), size: self.page_size, }); } self.last_page = Some(page); } /// Truncate the `PageList` to request only a certain number of bytes, regardless of how many /// pages were pushed. Truncatation is optional, but it must be last; `push()` may not be /// called after `truncate()`. pub fn truncate(&mut self, bytes: usize) { let mut bytes = bytes; let mut new_slices: Vec = Vec::new(); for slice in &self.source_slices { let mut slice: SourceSlice = *slice; if bytes > 0 { // we need something from this slice // restrict this slice to the number of bytes remaining if slice.size > bytes { slice.size = bytes; } // keep it new_slices.push(slice); // subtract the number of bytes in this slice bytes -= slice.size; } else { // we're done break; } } self.source_slices = new_slices; self.truncated = true; } /// Return the total length of this PageList. pub fn len(&self) -> usize { self.source_slices.iter().fold(0, |acc, s| acc + s.size) } /// Return a slice of SourceSlices. pub fn source_slices(&self) -> &[SourceSlice] { self.source_slices.as_slice() } } #[cfg(test)] mod tests { use crate::msf::page_list::*; use crate::source::SourceSlice; #[test] fn test_push() { let mut list = PageList::new(4096); // PageList should coalesce sequential pages list.push(0); list.push(1); let expected = vec![SourceSlice { offset: 0, size: 8192, }]; assert_eq!(list.source_slices(), expected.as_slice()); assert_eq!(list.len(), 8192); // PageList should handle nonsequential runs too list.push(4); list.push(5); let expected = vec![ SourceSlice { offset: 0, size: 8192, }, SourceSlice { offset: 16384, size: 8192, }, ]; assert_eq!(list.source_slices(), expected.as_slice()); assert_eq!(list.len(), 16384); // ...including nonsequential runs that go backwards list.push(2); let expected = vec![ SourceSlice { offset: 0, size: 8192, }, SourceSlice { offset: 16384, size: 8192, }, SourceSlice { offset: 8192, size: 4096, }, ]; assert_eq!(list.source_slices(), expected.as_slice()); assert_eq!(list.len(), 20480); // ...and runs that repeat themselves list.push(2); let expected = vec![ SourceSlice { offset: 0, size: 8192, }, SourceSlice { offset: 16384, size: 8192, }, SourceSlice { offset: 8192, size: 4096, }, SourceSlice { offset: 8192, size: 4096, }, ]; assert_eq!(list.source_slices(), expected.as_slice()); assert_eq!(list.len(), 24576); } #[test] fn test_truncate() { let mut list = PageList::new(4096); list.push(0); list.push(1); list.push(4); list.push(5); list.push(2); list.push(2); assert_eq!(list.len(), 24576); // truncation should do nothing when it's truncating more than is described list.truncate(25000); let expected = vec![ SourceSlice { offset: 0, size: 8192, }, SourceSlice { offset: 16384, size: 8192, }, SourceSlice { offset: 8192, size: 4096, }, SourceSlice { offset: 8192, size: 4096, }, ]; assert_eq!(list.source_slices(), expected.as_slice()); assert_eq!(list.len(), 24576); // it's usually employed to reduce the size of the last slice... list.truncate(24000); let expected = vec![ SourceSlice { offset: 0, size: 8192, }, SourceSlice { offset: 16384, size: 8192, }, SourceSlice { offset: 8192, size: 4096, }, SourceSlice { offset: 8192, size: 3520, }, ]; assert_eq!(list.source_slices(), expected.as_slice()); assert_eq!(list.len(), 24000); // ...but it should be able to lop off entire slices too list.truncate(10000); let expected = vec![ SourceSlice { offset: 0, size: 8192, }, SourceSlice { offset: 16384, size: 1808, }, ]; assert_eq!(list.source_slices(), expected.as_slice()); assert_eq!(list.len(), 10000); // and again, it shouldn't do anything if we re-truncate to a larger size list.truncate(12000); let expected = vec![ SourceSlice { offset: 0, size: 8192, }, SourceSlice { offset: 16384, size: 1808, }, ]; assert_eq!(list.source_slices(), expected.as_slice()); assert_eq!(list.len(), 10000); // finally, we should be able to truncate the entire PageList down to nothing list.truncate(0); assert_eq!(list.source_slices().len(), 0); assert_eq!(list.len(), 0); } #[test] #[should_panic] fn test_push_after_truncate() { // push after truncate isn't permitted let mut list = PageList::new(4096); list.push(5); list.truncate(2000); // so far so good // bam! list.push(6); } #[test] fn push_overflow() { let mut list = PageList::new(4096); list.push(u32::MAX); list.push(u32::MAX); } } pdb-0.8.0/src/omap.rs000064400000000000000000000573520072674642500125330ustar 00000000000000// Copyright 2018 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. //! Utilities for translating addresses between PDB offsets and _Relative Virtual Addresses_ (RVAs). use std::cmp::{self, Ordering}; use std::fmt; use std::iter::FusedIterator; use std::mem; use std::ops::Range; use crate::common::*; use crate::msf::Stream; use crate::pe::ImageSectionHeader; /// A address translation record from an `OMAPTable`. /// /// This record applies to the half-open interval [ `record.source_address`, /// `next_record.source_address` ). #[repr(C)] #[derive(Clone, Copy, Eq, PartialEq)] pub(crate) struct OMAPRecord { source_address: u32, target_address: u32, } impl OMAPRecord { /// Create a new OMAP record for the given mapping. pub fn new(source_address: u32, target_address: u32) -> Self { Self { source_address: source_address.to_le(), target_address: target_address.to_le(), } } /// Returns the address in the source space. #[inline] pub fn source_address(self) -> u32 { u32::from_le(self.source_address) } /// Returns the start of the mapped portion in the target address space. #[inline] pub fn target_address(self) -> u32 { u32::from_le(self.target_address) } /// Translate the given address into the target address space. #[inline] fn translate(self, address: u32) -> u32 { // This method is only to be used internally by the OMAP iterator and lookups. The caller // must verify that the record is valid to translate an address. debug_assert!(self.source_address() <= address); (address - self.source_address()) + self.target_address() } } impl fmt::Debug for OMAPRecord { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("OMAPRecord") .field( "source_address", &format_args!("{:#010x}", self.source_address()), ) .field( "target_address", &format_args!("{:#010x}", self.target_address()), ) .finish() } } impl PartialOrd for OMAPRecord { #[inline] fn partial_cmp(&self, other: &Self) -> Option { self.source_address().partial_cmp(&other.source_address()) } } impl Ord for OMAPRecord { #[inline] fn cmp(&self, other: &Self) -> Ordering { self.source_address().cmp(&other.source_address()) } } /// PDBs can contain OMAP tables, which translate relative virtual addresses (RVAs) from one address /// space into another. /// /// For more information on the pratical use of OMAPs, see the [module level documentation] and /// [`AddressMap`]. A PDB can contain two OMAPs: /// /// - `omap_from_src`: A mapping from the original address space to the transformed address space /// of an optimized binary. Use `PDB::omap_from_src` to obtain an instance of this OMAP. Also, /// `PdbInternalRva::rva` performs this conversion in a safe manner. /// - `omap_to_src`: A mapping from the transformed address space back into the original address /// space of the unoptimized binary. Use `PDB::omap_to_src` to obtain an instace of this OMAP. /// Also, `Rva::original_rva` performs this conversion in a safe manner. /// /// # Structure /// /// OMAP tables are dense arrays, sequentially storing `OMAPRecord` structs sorted by source /// address. /// /// Each record applies to a range of addresses: i.e. record N indicates that addresses in the /// half-open interval [ `record[n].source_address`, `record[n+1].source_address` ) were relocated /// to a starting address of `record[n].target_address`. If `target_address` is zero, the `lookup()` /// will return None, since this indicates a non-existent location in the target address space. /// /// Given that the table is sorted, lookups by source address can be efficiently serviced using a /// binary search directly against the underlying data without secondary data structures. This is /// not the most cache efficient data structure (especially given that half of each cache line is /// storing target addresses), but given that OMAP tables are an uncommon PDBs feature, the obvious /// binary search implementation seems appropriate. /// /// [module level documentation]: self pub(crate) struct OMAPTable<'s> { stream: Stream<'s>, } impl<'s> OMAPTable<'s> { pub(crate) fn parse(stream: Stream<'s>) -> Result { match cast_aligned::(stream.as_slice()) { Some(_) => Ok(OMAPTable { stream }), None => Err(Error::InvalidStreamLength("OMAP")), } } /// Returns a direct view onto the records stored in this OMAP table. #[inline] pub fn records(&self) -> &[OMAPRecord] { // alignment is checked during parsing, unwrap is safe. cast_aligned(self.stream.as_slice()).unwrap() } /// Look up `source_address` to yield a target address. pub fn lookup(&self, source_address: u32) -> Option { let records = self.records(); let index = match records.binary_search_by_key(&source_address, |r| r.source_address()) { Ok(i) => i, Err(0) => return None, Err(i) => i - 1, }; let record = records[index]; // As a special case, `target_address` can be zero, which indicates that the // `source_address` does not exist in the target address space. if record.target_address() == 0 { return None; } Some(record.translate(source_address)) } /// Look up a the range `start..end` and iterate all mapped sub-ranges. pub fn lookup_range(&self, range: Range) -> RangeIter<'_> { let Range { start, end } = range; if end <= start { return RangeIter::empty(); } let records = self.records(); let (record, next) = match records.binary_search_by_key(&start, |r| r.source_address()) { Ok(i) => (records[i], &records[i + 1..]), // Insert a dummy record no indicate that the range before the first record is invalid. // The range might still overlap with the first record however, so attempt regular // iteration. Err(0) => (OMAPRecord::new(0, 0), records), Err(i) => (records[i - 1], &records[i..]), }; RangeIter { records: next.iter(), record, addr: start, end, } } } impl fmt::Debug for OMAPTable<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("OMAPTable").field(&self.records()).finish() } } /// An iterator over mapped target ranges in an OMAP. pub(crate) struct RangeIter<'t> { /// Iterator over subsequent OMAP records. records: std::slice::Iter<'t, OMAPRecord>, /// The record that spans the current start address. record: OMAPRecord, /// The start address of the current subrange. addr: u32, /// The final end address of the (last sub-)range. end: u32, } impl<'t> RangeIter<'t> { /// Creates a `RangeIter` that does not yield any ranges. pub fn empty() -> Self { RangeIter { records: [].iter(), record: OMAPRecord::new(0, 0), addr: 0, end: 0, } } /// Creates a `RangeIter` that only yields the specified range. pub fn identity(range: Range) -> Self { // Declare the range `start..` as valid with an identity mapping. We cannot use `0..` here // since the target must be a non-zero value to be recognized as valid mapping. Since there // are no further records, a single subrange `start..end` will be considered. RangeIter { records: [].iter(), record: OMAPRecord::new(range.start, range.start), addr: range.start, end: range.end, } } } impl Default for RangeIter<'_> { fn default() -> Self { Self::empty() } } impl Iterator for RangeIter<'_> { type Item = Range; fn next(&mut self) -> Option { while self.addr < self.end { // Pull the next record from the list. Since the current record is only valid up to the // next one, this will determine the end of the current sub slice. If there are no more // records, create an unmapped dummy record starting at the end of the source range. let next_record = match self.records.next() { Some(record) => *record, None => OMAPRecord::new(self.end, 0), }; // Calculate the bounds of the current subrange and write it back for the next // iteration. Likewise, remember the next record as address translation base. let subrange_end = cmp::min(next_record.source_address(), self.end); let subrange_start = mem::replace(&mut self.addr, subrange_end); let last_record = mem::replace(&mut self.record, next_record); // Check for the validity of this sub-range or skip it silently: // 2. The sub range covered by the last OMAP record might be empty. This can be an // artifact of a dummy record used when creating a new iterator. // 3. A `target_address` of zero indicates an unmapped address range. if subrange_start >= subrange_end || last_record.target_address() == 0 { continue; } let translated_start = last_record.translate(subrange_start); let translated_end = last_record.translate(subrange_end); return Some(translated_start..translated_end); } None } } impl FusedIterator for RangeIter<'_> {} /// Iterator over [`Rva`] ranges returned by [`AddressMap::rva_ranges`]. pub struct RvaRangeIter<'t>(RangeIter<'t>); impl Iterator for RvaRangeIter<'_> { type Item = Range; fn next(&mut self) -> Option { self.0.next().map(|range| Rva(range.start)..Rva(range.end)) } } impl FusedIterator for RvaRangeIter<'_> {} /// Iterator over [`PdbInternalRva`] ranges returned by [`AddressMap::internal_rva_ranges`]. pub struct PdbInternalRvaRangeIter<'t>(RangeIter<'t>); impl Iterator for PdbInternalRvaRangeIter<'_> { type Item = Range; fn next(&mut self) -> Option { self.0 .next() .map(|range| PdbInternalRva(range.start)..PdbInternalRva(range.end)) } } impl FusedIterator for PdbInternalRvaRangeIter<'_> {} /// A mapping between addresses and offsets used in the PDB and PE file. /// /// To obtain an instace of this address map, call `PDB::address_map`. It will determine the correct /// translation mode and read all internal state from the PDB. Then use the conversion methods on /// the address and offset types to translate addresses. /// /// # Background /// /// Addresses in PDBs are stored as offsets into sections of the PE file. The `AddressMap` contains /// the PE's section headers to translate between the offsets and virtual addresses relative to the /// image base (RVAs). /// /// Additionally, Microsoft has been reordering the Windows system and application binaries to /// optimize them for paging reduction, using a toolset reported to be derived from and/or built on /// top of the [Vulcan research project]. Relatively little else is known about the tools or the /// methods they use. Looking at Windows system binaries like `ntoskrnl.exe`, it is apparent that /// their layout has been rearranged, and their respective symbol files contain _OMAP_ re-mapping /// information. The [Microsoft Binary Technologies Projects] may be involved in this. /// /// The internals of this transformation are not well understood. According to [1997 reference /// material]: /// /// > Yet another form of debug information is relatively new and undocumented, except for a few /// > obscure references in `WINNT.H` and the Win32 SDK help. This type of information is known as /// > OMAP. Apparently, as part of Microsoft's internal build procedure, small fragments of code in /// > EXEs and DLLs are moved around to put the most commonly used code at the beginning of the code /// > section. This presumably keeps the process memory working set as small as possible. However, /// > when shifting around the blocks of code, the corresponding debug information isn't updated. /// > Instead, OMAP information is created. It lets symbol table code translate between the original /// > address in a symbol table and the modified address where the variable or line of code really /// > exists in memory. /// /// # Usage /// /// To aid with translating addresses and offsets, this module exposes `AddressMap`, a helper that /// contains all information to apply the correct translation of any kind of address or offset to /// another. Due to the rearranging optimizations, there are four types involved: /// /// - [`Rva`]: A _Relative Virtual Address_ in the actual binary. This address directly corresponds /// to instruction pointers seen in stack traces and symbol addresses reported by debuggers. /// - [`PdbInternalRva`]: An RVA as it would have appeared before the optimization. These RVAs are /// used in some places and can be converted to an `Rva` in the actual address space. /// - [`SectionOffset`]: An offset into a section of the actual binary. A `section` member of _n_ /// refers to section _n - 1_, which makes a section number of _0_ a null pointer. /// - [`PdbInternalSectionOffset`]: An offset into a section of the original binary. These offsets /// are used throughout the PDB and can be converted to either `SectionOffset`, or directly to /// `Rva` in the actual address space. /// /// For binaries that have not been optimized that way, the `PdbInternal*` values are effectively /// equal to their regular counterparts and the conversion between the two are no-ops. Address /// translation still has to assume different address spaces, which is why there is no direct /// conversion without an `AddressMap`. /// /// # Example /// /// ```rust /// # use pdb::{Rva, FallibleIterator}; /// # /// # fn test() -> pdb::Result<()> { /// # let source = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = pdb::PDB::open(source)?; /// /// // Compute the address map once and reuse it /// let address_map = pdb.address_map()?; /// /// # let symbol_table = pdb.global_symbols()?; /// # let symbol = symbol_table.iter().next()?.unwrap(); /// # match symbol.parse() { Ok(pdb::SymbolData::Public(pubsym)) => { /// // Obtain some section offset, eg from a symbol, and convert it /// match pubsym.offset.to_rva(&address_map) { /// Some(rva) => { /// println!("symbol is at {}", rva); /// # assert_eq!(rva, Rva(26048)); /// } /// None => { /// println!("symbol refers to eliminated code"); /// # panic!("symbol should exist"); /// } /// } /// # } _ => unreachable!() } /// # Ok(()) /// # } /// # test().unwrap() /// ``` /// /// [Vulcan research project]: https://research.microsoft.com/pubs/69850/tr-2001-50.pdf /// [Microsoft Binary Technologies Projects]: https://microsoft.com/windows/cse/bit_projects.mspx /// [1997 reference material]: https://www.microsoft.com/msj/0597/hood0597.aspx #[derive(Debug, Default)] pub struct AddressMap<'s> { pub(crate) original_sections: Vec, pub(crate) transformed_sections: Option>, pub(crate) transformed_to_original: Option>, pub(crate) original_to_transformed: Option>, } impl<'s> AddressMap<'s> { /// Resolves actual ranges in the executable's address space. /// /// The given internal address range might be split up into multiple ranges in the executable. /// This iterator traverses all mapped ranges in the order of the PDB-internal mapping. All /// empty or eliminated ranges are skipped. Thus, the iterator might be empty even for non-empty /// ranges. pub fn rva_ranges(&self, range: Range) -> RvaRangeIter<'_> { RvaRangeIter(match self.original_to_transformed { Some(ref omap) => omap.lookup_range(range.start.0..range.end.0), None => RangeIter::identity(range.start.0..range.end.0), }) } /// Resolves actual ranges in the executable's address space. /// /// The given address range might correspond to multiple ranges in the PDB-internal address /// space. This iterator traverses all mapped ranges in the order of the actual RVA mapping. /// This iterator might be empty even for non-empty ranges if no corresponding original range /// can be found. pub fn internal_rva_ranges(&self, range: Range) -> PdbInternalRvaRangeIter<'_> { PdbInternalRvaRangeIter(match self.transformed_to_original { Some(ref omap) => omap.lookup_range(range.start.0..range.end.0), None => RangeIter::identity(range.start.0..range.end.0), }) } } fn get_section_offset(sections: &[ImageSectionHeader], address: u32) -> Option<(u16, u32)> { // Section headers are sorted by virtual_address, so we only need to iterate until we exceed // the desired address. Since the number of section headers is relatively low, a sequential // search is the fastest option here. let (index, section) = sections .iter() .take_while(|s| s.virtual_address <= address) .enumerate() .find(|(_, s)| address < s.virtual_address + s.size_of_raw_data)?; Some((index as u16 + 1, address - section.virtual_address)) } fn get_virtual_address(sections: &[ImageSectionHeader], section: u16, offset: u32) -> Option { (section as usize) .checked_sub(1) .and_then(|i| sections.get(i)) .map(|section| section.virtual_address + offset) } impl Rva { /// Resolves a PDB-internal Relative Virtual Address. /// /// This address is not necessarily compatible with the executable's address space and should /// therefore not be used for debugging purposes. pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option { match translator.transformed_to_original { Some(ref omap) => omap.lookup(self.0).map(PdbInternalRva), None => Some(PdbInternalRva(self.0)), } } /// Resolves the section offset in the PE headers. /// /// This is an offset into PE section headers of the executable. To retrieve section offsets /// used in the PDB, use [`to_internal_offset`](Self::to_internal_offset) instead. pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option { let (section, offset) = match translator.transformed_sections { Some(ref sections) => get_section_offset(sections, self.0)?, None => get_section_offset(&translator.original_sections, self.0)?, }; Some(SectionOffset { section, offset }) } /// Resolves the PDB internal section offset. /// /// This is the offset value used in the PDB file. To index into the actual PE section headers, /// use [`to_section_offset`](Self::to_section_offset) instead. pub fn to_internal_offset( self, translator: &AddressMap<'_>, ) -> Option { self.to_internal_rva(translator)? .to_internal_offset(translator) } } impl PdbInternalRva { /// Resolves an actual Relative Virtual Address in the executable's address space. pub fn to_rva(self, translator: &AddressMap<'_>) -> Option { match translator.original_to_transformed { Some(ref omap) => omap.lookup(self.0).map(Rva), None => Some(Rva(self.0)), } } /// Resolves the section offset in the PE headers. /// /// This is an offset into PE section headers of the executable. To retrieve section offsets /// used in the PDB, use [`to_internal_offset`](Self::to_internal_offset) instead. pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option { self.to_rva(translator)?.to_section_offset(translator) } /// Resolves the PDB internal section offset. /// /// This is the offset value used in the PDB file. To index into the actual PE section headers, /// use [`to_section_offset`](Self::to_section_offset) instead. pub fn to_internal_offset( self, translator: &AddressMap<'_>, ) -> Option { let (section, offset) = get_section_offset(&translator.original_sections, self.0)?; Some(PdbInternalSectionOffset { section, offset }) } } impl SectionOffset { /// Resolves an actual Relative Virtual Address in the executable's address space. pub fn to_rva(self, translator: &AddressMap<'_>) -> Option { let address = match translator.transformed_sections { Some(ref sections) => get_virtual_address(sections, self.section, self.offset)?, None => get_virtual_address(&translator.original_sections, self.section, self.offset)?, }; Some(Rva(address)) } /// Resolves a PDB-internal Relative Virtual Address. /// /// This address is not necessarily compatible with the executable's address space and should /// therefore not be used for debugging purposes. pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option { self.to_rva(translator)?.to_internal_rva(translator) } /// Resolves the PDB internal section offset. pub fn to_internal_offset( self, translator: &AddressMap<'_>, ) -> Option { if translator.transformed_sections.is_none() { // Fast path to avoid section table lookups let Self { section, offset } = self; return Some(PdbInternalSectionOffset { section, offset }); } self.to_internal_rva(translator)? .to_internal_offset(translator) } } impl PdbInternalSectionOffset { /// Resolves an actual Relative Virtual Address in the executable's address space. pub fn to_rva(self, translator: &AddressMap<'_>) -> Option { self.to_internal_rva(translator)?.to_rva(translator) } /// Resolves a PDB-internal Relative Virtual Address. /// /// This address is not necessarily compatible with the executable's address space and should /// therefore not be used for debugging purposes. pub fn to_internal_rva(self, translator: &AddressMap<'_>) -> Option { get_virtual_address(&translator.original_sections, self.section, self.offset) .map(PdbInternalRva) } /// Resolves the section offset in the PE headers. pub fn to_section_offset(self, translator: &AddressMap<'_>) -> Option { if translator.transformed_sections.is_none() { // Fast path to avoid section table lookups let Self { section, offset } = self; return Some(SectionOffset { section, offset }); } self.to_rva(translator)?.to_section_offset(translator) } } #[cfg(test)] mod tests { use super::*; use std::mem; #[test] fn test_omap_record() { assert_eq!(mem::size_of::(), 8); assert_eq!(mem::align_of::(), 4); } #[test] fn test_get_virtual_address() { let sections = vec![ImageSectionHeader { virtual_address: 0x1000_0000, ..Default::default() }]; assert_eq!(get_virtual_address(§ions, 1, 0x1234), Some(0x1000_1234)); assert_eq!(get_virtual_address(§ions, 2, 0x1234), None); // https://github.com/willglynn/pdb/issues/87 assert_eq!(get_virtual_address(§ions, 0, 0x1234), None); } } pdb-0.8.0/src/pdb.rs000064400000000000000000000524100072674642500123320ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use crate::common::*; use crate::dbi::{DBIExtraStreams, DBIHeader, DebugInformation, Module}; use crate::framedata::FrameTable; use crate::modi::ModuleInfo; use crate::msf::{self, Msf, Stream}; use crate::omap::{AddressMap, OMAPTable}; use crate::pdbi::PDBInformation; use crate::pe::ImageSectionHeader; use crate::source::Source; use crate::strings::StringTable; use crate::symbol::SymbolTable; use crate::tpi::{IdInformation, TypeInformation}; // Some streams have a fixed stream index. // http://llvm.org/docs/PDB/index.html const PDB_STREAM: u32 = 1; const TPI_STREAM: u32 = 2; const DBI_STREAM: u32 = 3; const IPI_STREAM: u32 = 4; /// `PDB` provides access to the data within a PDB file. /// /// A PDB file is internally a Multi-Stream File (MSF), composed of multiple independent /// (and usually discontiguous) data streams on-disk. `PDB` provides lazy access to these data /// structures, which means the `PDB` accessor methods usually cause disk accesses. #[derive(Debug)] pub struct PDB<'s, S> { /// `msf` provides access to the underlying data streams msf: Box + 's>, /// Memoize the `dbi::Header`, since it contains stream numbers we sometimes need dbi_header: Option, /// Memoize the `dbi::DBIExtraStreams`, since it too contains stream numbers we sometimes need dbi_extra_streams: Option, } impl<'s, S: Source<'s> + 's> PDB<'s, S> { /// Create a new `PDB` for a `Source`. /// /// `open()` accesses enough of the source file to find the MSF stream table. This usually /// involves reading the header, a block near the end of the file, and finally the stream table /// itself. It does not access or validate any of the contents of the rest of the PDB. /// /// # Errors /// /// * `Error::UnimplementedFeature` if the PDB file predates ~2002 /// * `Error::UnrecognizedFileFormat` if the `Source` does not appear to be a PDB file /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange`, `Error::InvalidPageSize` if the PDB file seems corrupt pub fn open(source: S) -> Result> { Ok(PDB { msf: msf::open_msf(source)?, dbi_header: None, dbi_extra_streams: None, }) } /// Retrieve the `PDBInformation` for this PDB. /// /// The `PDBInformation` object contains the GUID and age fields that can be used to verify /// that a PDB file matches a binary, as well as the stream indicies of named PDB streams. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB somehow does not contain the PDB information stream /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt pub fn pdb_information(&mut self) -> Result> { let stream = self.msf.get(PDB_STREAM, None)?; PDBInformation::parse(stream) } /// Retrieve the `TypeInformation` for this PDB. /// /// The `TypeInformation` object owns a `SourceView` for the type information ("TPI") stream. /// This is usually the single largest stream of the PDB file. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB does not contain the type information stream /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// * `Error::InvalidTypeInformationHeader` if the type information stream header was not /// understood pub fn type_information(&mut self) -> Result> { let stream = self.msf.get(TPI_STREAM, None)?; TypeInformation::parse(stream) } /// Retrieve the `IdInformation` for this PDB. /// /// The `IdInformation` object owns a `SourceView` for the type information ("IPI") stream. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB does not contain the id information stream /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// * `Error::InvalidTypeInformationHeader` if the id information stream header was not /// understood pub fn id_information(&mut self) -> Result> { let stream = self.msf.get(IPI_STREAM, None)?; IdInformation::parse(stream) } /// Retrieve the `DebugInformation` for this PDB. /// /// The `DebugInformation` object owns a `SourceView` for the debug information ("DBI") stream. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB somehow does not contain a symbol records stream /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// * `Error::UnimplementedFeature` if the debug information header predates ~1995 pub fn debug_information(&mut self) -> Result> { let stream = self.msf.get(DBI_STREAM, None)?; let debug_info = DebugInformation::parse(stream)?; // Grab its header, since we need that for unrelated operations self.dbi_header = Some(debug_info.header()); Ok(debug_info) } fn dbi_header(&mut self) -> Result { // see if we've already got a header if let Some(ref h) = self.dbi_header { return Ok(*h); } // get just the first little bit of the DBI stream let stream = self.msf.get(DBI_STREAM, Some(1024))?; let header = DBIHeader::parse(stream)?; self.dbi_header = Some(header); Ok(header) } /// Retrieve the global symbol table for this PDB. /// /// The `SymbolTable` object owns a `SourceView` for the symbol records stream. This is usually /// the second-largest stream of the PDB file. /// /// The debug information stream indicates which stream is the symbol records stream, so /// `global_symbols()` accesses the debug information stream to read the header unless /// `debug_information()` was called first. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB somehow does not contain a symbol records stream /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// /// If `debug_information()` was not already called, `global_symbols()` will additionally read /// the debug information header, in which case it can also return: /// /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream /// * `Error::UnimplementedFeature` if the debug information header predates ~1995 pub fn global_symbols(&mut self) -> Result> { // the global symbol table is stored in a stream number described by the DBI header // so, start by getting the DBI header let dbi_header = self.dbi_header()?; // open the appropriate stream, assuming that it is always present. let stream = self .raw_stream(dbi_header.symbol_records_stream)? .ok_or(Error::GlobalSymbolsNotFound)?; Ok(SymbolTable::new(stream)) } /// Retrieve the module info stream for a specific `Module`. /// /// Some information for each module is stored in a separate stream per-module. `Module`s can be /// retrieved from the `PDB` by first calling [`debug_information`](Self::debug_information) to /// get the debug information stream, and then calling [`modules`](DebugInformation::modules) on /// that. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB does not contain this module info stream /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// * `Error::UnimplementedFeature` if the module information stream is an unsupported version /// /// # Example /// /// ``` /// # use pdb::FallibleIterator; /// # /// # fn test() -> pdb::Result<()> { /// let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = pdb::PDB::open(file)?; /// let dbi = pdb.debug_information()?; /// let mut modules = dbi.modules()?; /// if let Some(module) = modules.next()? { /// println!("module name: {}, object file name: {}", /// module.module_name(), module.object_file_name()); /// match pdb.module_info(&module)? { /// Some(info) => println!("contains {} symbols", info.symbols()?.count()?), /// None => println!("module information not available"), /// } /// } /// /// # Ok(()) /// # } /// ``` pub fn module_info<'m>(&mut self, module: &Module<'m>) -> Result>> { Ok(self .raw_stream(module.info().stream)? .map(|stream| ModuleInfo::parse(stream, module))) } /// Retrieve the executable's section headers, as stored inside this PDB. /// /// The debug information stream indicates which stream contains the section headers, so /// `sections()` accesses the debug information stream to read the header unless /// `debug_information()` was called first. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// * `Error::UnexpectedEof` if the section headers are truncated mid-record /// /// If `debug_information()` was not already called, `sections()` will additionally read /// the debug information header, in which case it can also return: /// /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream /// * `Error::UnimplementedFeature` if the debug information header predates ~1995 pub fn sections(&mut self) -> Result>> { let index = self.extra_streams()?.section_headers; let stream = match self.raw_stream(index)? { Some(stream) => stream, None => return Ok(None), }; let mut buf = stream.parse_buffer(); let mut headers = Vec::with_capacity(buf.len() / 40); while !buf.is_empty() { headers.push(ImageSectionHeader::parse(&mut buf)?); } Ok(Some(headers)) } /// Retrieve the global frame data table. /// /// This table describes the stack frame layout for functions from all modules in the PDB. Not /// every function in the image file must have FPO information defined for it. Those functions /// that do not have FPO information are assumed to have normal stack frames. /// /// If this PDB does not contain frame data, the returned table is empty. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB does not contain the referenced streams /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// /// # Example /// /// ```rust /// # use pdb::{PDB, Rva, FallibleIterator}; /// # /// # fn test() -> pdb::Result<()> { /// # let source = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = PDB::open(source)?; /// /// // Read the tables once and reuse them /// let address_map = pdb.address_map()?; /// let frame_table = pdb.frame_table()?; /// let mut frames = frame_table.iter(); /// /// // Iterate frame data in internal RVA order /// while let Some(frame) = frames.next()? { /// println!("{:#?}", frame); /// } /// # Ok(()) /// # } /// # test().unwrap() /// ``` pub fn frame_table(&mut self) -> Result> { let extra = self.extra_streams()?; let old_stream = self.raw_stream(extra.fpo)?; let new_stream = self.raw_stream(extra.framedata)?; FrameTable::parse(old_stream, new_stream) } pub(crate) fn original_sections(&mut self) -> Result>> { let index = self.extra_streams()?.original_section_headers; let stream = match self.raw_stream(index)? { Some(stream) => stream, None => return Ok(None), }; let mut buf = stream.parse_buffer(); let mut headers = Vec::with_capacity(buf.len() / 40); while !buf.is_empty() { headers.push(ImageSectionHeader::parse(&mut buf)?); } Ok(Some(headers)) } pub(crate) fn omap_from_src(&mut self) -> Result>> { let index = self.extra_streams()?.omap_from_src; match self.raw_stream(index)? { Some(stream) => OMAPTable::parse(stream).map(Some), None => Ok(None), } } pub(crate) fn omap_to_src(&mut self) -> Result>> { let index = self.extra_streams()?.omap_to_src; match self.raw_stream(index)? { Some(stream) => OMAPTable::parse(stream).map(Some), None => Ok(None), } } /// Build a map translating between different kinds of offsets and virtual addresses. /// /// For more information on address translation, see [`AddressMap`]. /// /// This reads `omap_from_src` and either `original_sections` or `sections` from this PDB and /// chooses internally which strategy to use for resolving RVAs. Consider to reuse this instance /// for multiple translations. /// /// # Errors /// /// * `Error::OmapNotFound` if an OMAP is required for translation but missing /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// * `Error::UnexpectedEof` if the section headers are truncated mid-record /// /// If `debug_information()` was not already called, `omap_table()` will additionally read the /// debug information header, in which case it can also return: /// /// * `Error::StreamNotFound` if the PDB somehow does not contain a debug information stream /// * `Error::UnimplementedFeature` if the debug information header predates ~1995 /// /// # Example /// /// ```rust /// # use pdb::{Rva, FallibleIterator}; /// # /// # fn test() -> pdb::Result<()> { /// # let source = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = pdb::PDB::open(source)?; /// /// // Compute the address map once and reuse it /// let address_map = pdb.address_map()?; /// /// # let symbol_table = pdb.global_symbols()?; /// # let symbol = symbol_table.iter().next()?.unwrap(); /// # match symbol.parse() { Ok(pdb::SymbolData::Public(pubsym)) => { /// // Obtain some section offset, eg from a symbol, and convert it /// match pubsym.offset.to_rva(&address_map) { /// Some(rva) => { /// println!("symbol is at {}", rva); /// # assert_eq!(rva, Rva(26048)); /// } /// None => { /// println!("symbol refers to eliminated code"); /// # panic!("symbol should exist"); /// } /// } /// # } _ => unreachable!() } /// # Ok(()) /// # } /// # test().unwrap() /// ``` pub fn address_map(&mut self) -> Result> { let sections = self.sections()?.unwrap_or_default(); Ok(match self.original_sections()? { Some(original_sections) => { let omap_from_src = self.omap_from_src()?.ok_or(Error::AddressMapNotFound)?; let omap_to_src = self.omap_to_src()?.ok_or(Error::AddressMapNotFound)?; AddressMap { original_sections, transformed_sections: Some(sections), original_to_transformed: Some(omap_from_src), transformed_to_original: Some(omap_to_src), } } None => AddressMap { original_sections: sections, transformed_sections: None, original_to_transformed: None, transformed_to_original: None, }, }) } /// Retrieve the global string table of this PDB. /// /// Long strings, such as file names, are stored in a global deduplicated string table. They are /// referred to by the [`StringRef`] type, which contains an offset into that table. Strings in /// the table are stored as null-terminated C strings. Modern PDBs only store valid UTF-8 data /// in the string table, but for older types a decoding might be necessary. /// /// The string table offers cheap zero-copy access to the underlying string data. It is /// therefore cheap to build. /// /// # Example /// /// ``` /// # use pdb::{FallibleIterator, StringRef, PDB}; /// # /// # fn test() -> pdb::Result<()> { /// # let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = PDB::open(file)?; /// let strings = pdb.string_table()?; /// /// // obtain a string ref somehow /// # let string_ref = StringRef(0); /// let raw_string = strings.get(string_ref)?; /// println!("{}", raw_string.to_string()); /// /// // alternatively, use convenience methods /// println!("{}", string_ref.to_string_lossy(&strings)?); /// /// # Ok(()) /// # } /// ``` /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB somehow does not contain section headers /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// * `Error::UnexpectedEof` if the string table ends prematurely pub fn string_table(&mut self) -> Result> { let stream = self.named_stream(b"/names")?; StringTable::parse(stream) } /// Retrieve a stream by its index to read its contents as bytes. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB does not contain this stream /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt /// /// # Example /// /// ``` /// # fn test() -> pdb::Result<()> { /// let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = pdb::PDB::open(file)?; /// // This is the index of the "mystream" stream that was added using pdbstr.exe. /// let s = pdb.raw_stream(pdb::StreamIndex(208))?.expect("stream exists"); /// assert_eq!(s.as_slice(), b"hello world\n"); /// # Ok(()) /// # } /// ``` pub fn raw_stream(&mut self, index: StreamIndex) -> Result>> { match index.msf_number() { Some(number) => self.msf.get(number, None).map(Some), None => Ok(None), } } /// Retrieve a stream by its name, as declared in the PDB info stream. /// /// # Errors /// /// * `Error::StreamNameNotFound` if the PDB does not specify a stream with that name /// * `Error::StreamNotFound` if the PDB does not contain the stream referred to /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt pub fn named_stream(&mut self, name: &[u8]) -> Result> { let info = self.pdb_information()?; let names = info.stream_names()?; for named_stream in &names { if named_stream.name.as_bytes() == name { return self .raw_stream(named_stream.stream_id)? .ok_or(Error::StreamNameNotFound); } } Err(Error::StreamNameNotFound) } /// Loads the Optional Debug Header Stream, which contains offsets into extra streams. /// /// this stream is always returned, but its members are all optional depending on the data /// present in the PDB. /// /// The optional header begins at offset 0 immediately after the EC Substream ends. fn extra_streams(&mut self) -> Result { if let Some(extra) = self.dbi_extra_streams { return Ok(extra); } // Parse and grab information on extra streams, since we might also need that let debug_info = self.debug_information()?; let extra = DBIExtraStreams::new(&debug_info)?; self.dbi_extra_streams = Some(extra); Ok(extra) } } impl StreamIndex { /// Load the raw data of this stream from the PDB. /// /// Returns `None` if this index is none. Otherwise, this will try to read the stream from the /// PDB, which might fail if the stream is missing. /// /// # Errors /// /// * `Error::StreamNotFound` if the PDB does not contain this stream /// * `Error::IoError` if returned by the `Source` /// * `Error::PageReferenceOutOfRange` if the PDB file seems corrupt pub fn get<'s, S>(self, pdb: &mut PDB<'s, S>) -> Result>> where S: Source<'s> + 's, { pdb.raw_stream(self) } } pdb-0.8.0/src/pdbi.rs000064400000000000000000000162530072674642500125100ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use std::convert::TryInto; use std::mem; use uuid::Uuid; use crate::common::*; use crate::dbi::HeaderVersion; use crate::msf::*; /// A PDB info stream header parsed from a stream. /// /// The [PDB information stream] contains the GUID and age fields that can be used to /// verify that a PDB file matches a specific binary, as well a list of named PDB streams /// with their stream indices. /// /// [PDB information stream]: http://llvm.org/docs/PDB/PdbStream.html #[derive(Debug)] pub struct PDBInformation<'s> { /// The version of the PDB format in use. pub version: HeaderVersion, /// A 32-bit timestamp. pub signature: u32, /// The number of times this PDB file has been written. /// /// This number is bumped by the linker and other tools every time the PDB is modified. It does /// not necessarily correspond to the age declared in the image. Consider using /// [`DebugInformation::age`](crate::DebugInformation::age) for a better match. /// /// This PDB matches an image, if the `guid` values match and the PDB age is equal or higher /// than the image's age. pub age: u32, /// A `Uuid` generated when this PDB file was created that should uniquely identify it. pub guid: Uuid, /// The offset of the start of the stream name data within the stream. pub names_offset: usize, /// The size of the stream name data, in bytes. pub names_size: usize, stream: Stream<'s>, } impl<'s> PDBInformation<'s> { /// Parses a `PDBInformation` from raw stream data. pub(crate) fn parse(stream: Stream<'s>) -> Result { let (version, signature, age, guid, names_size, names_offset) = { let mut buf = stream.parse_buffer(); let version = From::from(buf.parse_u32()?); let signature = buf.parse_u32()?; let age = buf.parse_u32()?; let guid = Uuid::from_fields( buf.parse_u32()?, buf.parse_u16()?, buf.parse_u16()?, buf.take(8)?.try_into().unwrap(), ); let names_size = buf.parse_u32()? as usize; let names_offset = buf.pos(); (version, signature, age, guid, names_size, names_offset) }; Ok(PDBInformation { version, signature, age, guid, names_size, names_offset, stream, }) } /// Get a `StreamNames` object that can be used to iterate over named streams contained /// within the PDB file. /// /// This can be used to look up certain PDB streams by name. /// /// # Example /// /// ``` /// # use pdb::FallibleIterator; /// # /// # fn test() -> pdb::Result<()> { /// let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = pdb::PDB::open(file)?; /// let info = pdb.pdb_information()?; /// let names = info.stream_names()?; /// let mut v: Vec<_> = names.iter().map(|n| n.name.to_string()).collect(); /// v.sort(); /// assert_eq!(&v, &["mystream", "/LinkInfo", "/names", "/src/headerblock"]); /// # Ok(()) /// # } /// ``` pub fn stream_names(&self) -> Result> { // The names map is part of the PDB info stream that provides a mapping from stream names to // stream indicies. Its [format on disk](1) is somewhat complicated, consisting of a block of // data comprising the names as null-terminated C strings, followed by a map of stream indices // to the offset of their names within the names block. // // [The map itself](2) is stored as a 32-bit count of the number of entries, followed by a // 32-bit value that gives the number of bytes taken up by the entries themselves, followed by // two sets: one for names that are present in this PDB, and one for names that have been // deleted, followed by the map entries, each of which is a pair of 32-bit values consisting of // an offset into the names block and a stream ID. // // [The two sets](3) are each stored as a [bit array](4), which consists of a 32-bit count, and // then that many 32-bit words containing the bits in the array. // // [1]: https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/include/nmtni.h#L76 // [2]: https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/include/map.h#L474 // [3]: https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/include/iset.h#L62 // [4]: https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/include/array.h#L209 let mut names = vec![]; let mut buf = self.stream.parse_buffer(); // Seek forward to the name map. buf.take(self.names_offset + self.names_size)?; let count = buf.parse_u32()?; // We don't actually use most of these. let _entries_size = buf.parse_u32()?; let ok_words = buf.parse_u32()?; let _ok_bits = buf.take(ok_words as usize * mem::size_of::())?; let deleted_words = buf.parse_u32()?; let _deleted_bits = buf.take(deleted_words as usize * mem::size_of::())?; // Skip over the header here. let mut names_reader = self.stream.parse_buffer(); names_reader.take(self.names_offset)?; // And take just the name data. let names_buf = names_reader.take(self.names_size)?; for _ in 0..count { let name_offset = buf.parse_u32()? as usize; let stream_id = StreamIndex(buf.parse_u32()? as u16); let name = ParseBuffer::from(&names_buf[name_offset..]).parse_cstring()?; names.push(StreamName { name, stream_id }); } Ok(StreamNames { names }) } } /// A named stream contained within the PDB file. #[derive(Debug)] pub struct StreamName<'n> { /// The stream's name. pub name: RawString<'n>, /// The index of this stream. pub stream_id: StreamIndex, } /// A list of named streams contained within the PDB file. /// /// Call [`StreamNames::iter`] to iterate over the names. The iterator produces [`StreamName`] /// objects. #[derive(Debug)] pub struct StreamNames<'s> { /// The list of streams and their names. names: Vec>, } /// An iterator over [`StreamName`]s. pub type NameIter<'a, 'n> = std::slice::Iter<'a, StreamName<'n>>; impl<'s> StreamNames<'s> { /// Return an iterator over named streams and their stream indices. #[inline] pub fn iter(&self) -> NameIter<'_, 's> { self.names.iter() } } impl<'a, 's> IntoIterator for &'a StreamNames<'s> { type Item = &'a StreamName<'s>; type IntoIter = NameIter<'a, 's>; #[inline] fn into_iter(self) -> Self::IntoIter { self.names.iter() } } pdb-0.8.0/src/pe.rs000064400000000000000000000462530072674642500122010ustar 00000000000000// Copyright 2018 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. //! Definitions for PE headers contained in PDBs. // PDBs contain PE section headers in one or two streams. `pdb::pe` is responsible for parsing them. use std::fmt; use scroll::ctx::TryFromCtx; use scroll::Endian; use crate::common::*; /// The section should not be padded to the next boundary. This flag is /// obsolete and is replaced by `IMAGE_SCN_ALIGN_1BYTES`. const IMAGE_SCN_TYPE_NO_PAD: u32 = 0x00000008; /// The section contains executable code. const IMAGE_SCN_CNT_CODE: u32 = 0x00000020; /// The section contains initialized data. const IMAGE_SCN_CNT_INITIALIZED_DATA: u32 = 0x00000040; /// The section contains uninitialized data. const IMAGE_SCN_CNT_UNINITIALIZED_DATA: u32 = 0x00000080; /// Reserved. const IMAGE_SCN_LNK_OTHER: u32 = 0x00000100; /// The section contains comments or other information. This is valid only for object files. const IMAGE_SCN_LNK_INFO: u32 = 0x00000200; /// The section will not become part of the image. This is valid only for object files. const IMAGE_SCN_LNK_REMOVE: u32 = 0x00000800; /// The section contains COMDAT data. This is valid only for object files. const IMAGE_SCN_LNK_COMDAT: u32 = 0x00001000; /// Reset speculative exceptions handling bits in the TLB entries for this section. const IMAGE_SCN_NO_DEFER_SPEC_EXC: u32 = 0x00004000; /// The section contains data referenced through the global pointer. const IMAGE_SCN_GPREL: u32 = 0x00008000; /// Reserved. const IMAGE_SCN_MEM_PURGEABLE: u32 = 0x00020000; /// Reserved. const IMAGE_SCN_MEM_LOCKED: u32 = 0x00040000; /// Reserved. const IMAGE_SCN_MEM_PRELOAD: u32 = 0x00080000; /// Align data on a 1-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_1BYTES: u32 = 0x00100000; /// Align data on a 2-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_2BYTES: u32 = 0x00200000; /// Align data on a 4-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_4BYTES: u32 = 0x00300000; /// Align data on a 8-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_8BYTES: u32 = 0x00400000; /// Align data on a 16-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_16BYTES: u32 = 0x00500000; /// Align data on a 32-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_32BYTES: u32 = 0x00600000; /// Align data on a 64-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_64BYTES: u32 = 0x00700000; /// Align data on a 128-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_128BYTES: u32 = 0x00800000; /// Align data on a 256-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_256BYTES: u32 = 0x00900000; /// Align data on a 512-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_512BYTES: u32 = 0x00A00000; /// Align data on a 1024-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_1024BYTES: u32 = 0x00B00000; /// Align data on a 2048-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_2048BYTES: u32 = 0x00C00000; /// Align data on a 4096-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_4096BYTES: u32 = 0x00D00000; /// Align data on a 8192-byte boundary. This is valid only for object files. const IMAGE_SCN_ALIGN_8192BYTES: u32 = 0x00E00000; /// The section contains extended relocations. The count of relocations for the /// section exceeds the 16 bits that is reserved for it in the section header. /// If the `number_of_relocations` field in the section header is `0xffff`, the /// actual relocation count is stored in the `virtual_address` field of the first /// relocation. It is an error if `IMAGE_SCN_LNK_NRELOC_OVFL` is set and there /// are fewer than `0xffff` relocations in the section. const IMAGE_SCN_LNK_NRELOC_OVFL: u32 = 0x01000000; /// The section can be discarded as needed. const IMAGE_SCN_MEM_DISCARDABLE: u32 = 0x02000000; /// The section cannot be cached. const IMAGE_SCN_MEM_NOT_CACHED: u32 = 0x04000000; /// The section cannot be paged. const IMAGE_SCN_MEM_NOT_PAGED: u32 = 0x08000000; /// The section can be shared in memory. const IMAGE_SCN_MEM_SHARED: u32 = 0x10000000; /// The section can be executed as code. const IMAGE_SCN_MEM_EXECUTE: u32 = 0x20000000; /// The section can be read. const IMAGE_SCN_MEM_READ: u32 = 0x40000000; /// The section can be written to. const IMAGE_SCN_MEM_WRITE: u32 = 0x80000000; /// Characteristic flags of an [`ImageSectionHeader`]. /// /// These are defined by Microsoft as [`IMAGE_SCN_`] constants. /// /// [`IMAGE_SCN_`]: https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header #[derive(Clone, Copy, Eq, Default, PartialEq)] pub struct SectionCharacteristics(pub u32); impl SectionCharacteristics { /// The section contains executable code. pub fn executable(self) -> bool { (self.0 & IMAGE_SCN_CNT_CODE) > 0 } /// The section contains initialized data. pub fn initialized_data(self) -> bool { (self.0 & IMAGE_SCN_CNT_INITIALIZED_DATA) > 0 } /// The section contains uninitialized data. pub fn uninitialized_data(self) -> bool { (self.0 & IMAGE_SCN_CNT_UNINITIALIZED_DATA) > 0 } /// Reserved. pub fn other(self) -> bool { (self.0 & IMAGE_SCN_LNK_OTHER) > 0 } /// The section contains comments or other information. This is valid only for object files. pub fn info(self) -> bool { (self.0 & IMAGE_SCN_LNK_INFO) > 0 } /// The section will not become part of the image. This is valid only for object files. pub fn remove(self) -> bool { (self.0 & IMAGE_SCN_LNK_REMOVE) > 0 } /// The section contains COMDAT data. This is valid only for object files. pub fn comdat(self) -> bool { (self.0 & IMAGE_SCN_LNK_COMDAT) > 0 } /// Reset speculative exceptions handling bits in the TLB entries for this section. pub fn defer_speculative_exceptions(self) -> bool { (self.0 & IMAGE_SCN_NO_DEFER_SPEC_EXC) > 0 } /// The section contains data referenced through the global pointer. pub fn global_pointer_relative(self) -> bool { (self.0 & IMAGE_SCN_GPREL) > 0 } /// Reserved. pub fn purgeable(self) -> bool { (self.0 & IMAGE_SCN_MEM_PURGEABLE) > 0 } /// Reserved. pub fn locked(self) -> bool { (self.0 & IMAGE_SCN_MEM_LOCKED) > 0 } /// Reserved. pub fn preload(self) -> bool { (self.0 & IMAGE_SCN_MEM_PRELOAD) > 0 } /// Alignment for section data. /// /// This is valid only for object files. Returns `Some` if alignment is specified, and `None` if /// no alignment is specified. An alignment of `Some(1)` means that the section should not be /// padded to a boundary. pub fn alignment(self) -> Option { // Mask covering all align values and IMAGE_SCN_TYPE_NO_PAD. match self.0 & 0x00F00008 { self::IMAGE_SCN_ALIGN_1BYTES => Some(1), self::IMAGE_SCN_ALIGN_2BYTES => Some(2), self::IMAGE_SCN_ALIGN_4BYTES => Some(4), self::IMAGE_SCN_ALIGN_8BYTES => Some(8), self::IMAGE_SCN_ALIGN_16BYTES => Some(16), self::IMAGE_SCN_ALIGN_32BYTES => Some(32), self::IMAGE_SCN_ALIGN_64BYTES => Some(64), self::IMAGE_SCN_ALIGN_128BYTES => Some(128), self::IMAGE_SCN_ALIGN_256BYTES => Some(256), self::IMAGE_SCN_ALIGN_512BYTES => Some(512), self::IMAGE_SCN_ALIGN_1024BYTES => Some(1024), self::IMAGE_SCN_ALIGN_2048BYTES => Some(2048), self::IMAGE_SCN_ALIGN_4096BYTES => Some(4096), self::IMAGE_SCN_ALIGN_8192BYTES => Some(8192), self::IMAGE_SCN_TYPE_NO_PAD => Some(1), _ => None, } } /// The section contains extended relocations. /// /// The count of relocations for the section exceeds the 16 bits that is reserved for it in the /// section header. If the [`number_of_relocations`](ImageSectionHeader::number_of_relocations) /// field in the section header is `0xffff`, the actual relocation count is stored in the /// `virtual_address` field of the first relocation. It is an error if this flag is set and /// there are fewer than `0xffff` relocations in the section. pub fn lnk_nreloc_ovfl(self) -> bool { (self.0 & IMAGE_SCN_LNK_NRELOC_OVFL) > 0 } /// The section can be discarded as needed. pub fn discardable(self) -> bool { (self.0 & IMAGE_SCN_MEM_DISCARDABLE) > 0 } /// The section cannot be cached. pub fn not_cached(self) -> bool { (self.0 & IMAGE_SCN_MEM_NOT_CACHED) > 0 } /// The section cannot be paged. pub fn not_paged(self) -> bool { (self.0 & IMAGE_SCN_MEM_NOT_PAGED) > 0 } /// The section can be shared in memory. pub fn shared(self) -> bool { (self.0 & IMAGE_SCN_MEM_SHARED) > 0 } /// The section can be executed as code. pub fn execute(self) -> bool { (self.0 & IMAGE_SCN_MEM_EXECUTE) > 0 } /// The section can be read. pub fn read(self) -> bool { (self.0 & IMAGE_SCN_MEM_READ) > 0 } /// The section can be written to. pub fn write(self) -> bool { (self.0 & IMAGE_SCN_MEM_WRITE) > 0 } } impl fmt::Debug for SectionCharacteristics { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if f.alternate() { f.debug_struct("ImageCharacteristics") .field("executable", &self.executable()) .field("initialized_data", &self.initialized_data()) .field("uninitialized_data", &self.uninitialized_data()) .field("info", &self.info()) .field("remove", &self.remove()) .field("comdat", &self.comdat()) .field( "defer_speculative_exceptions", &self.defer_speculative_exceptions(), ) .field("global_pointer_relative", &self.global_pointer_relative()) .field("purgeable", &self.purgeable()) .field("locked", &self.locked()) .field("preload", &self.preload()) .field("alignment", &self.alignment()) .field("lnk_nreloc_ovfl", &self.lnk_nreloc_ovfl()) .field("discardable", &self.discardable()) .field("not_cached", &self.not_cached()) .field("not_paged", &self.not_paged()) .field("shared", &self.shared()) .field("execute", &self.execute()) .field("read", &self.read()) .field("write", &self.write()) .finish() } else { f.debug_tuple("ImageCharacteristics") .field(&format_args!("{:#x}", self.0)) .finish() } } } impl<'t> TryFromCtx<'t, Endian> for SectionCharacteristics { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let (value, size) = u32::try_from_ctx(this, le)?; Ok((SectionCharacteristics(value), size)) } } /// A PE `IMAGE_SECTION_HEADER`, as described in [the Microsoft documentation](https://msdn.microsoft.com/en-us/library/windows/desktop/ms680341(v=vs.85).aspx). #[derive(Copy, Clone, Default, PartialEq, Eq)] pub struct ImageSectionHeader { /// An 8-byte, null-padded UTF-8 string. There is no terminating null character if the string is /// exactly eight characters long. For longer names, this member contains a forward slash (`/`) /// followed by an ASCII representation of a decimal number that is an offset into the string /// table. Executable images do not use a string table and do not support section names longer /// than eight characters. pub name: [u8; 8], /// The total size of the section when loaded into memory, in bytes. If this value is greater /// than the [`size_of_raw_data`](Self::size_of_raw_data) member, the section is filled with /// zeroes. This field is valid only for executable images and should be set to `0` for object /// files. /// /// In object files, this field would be replaced with the physical file address. Such headers /// are never embedded in PDBs. pub virtual_size: u32, /// The address of the first byte of the section when loaded into memory, relative to the image /// base. For object files, this is the address of the first byte before relocation is applied. pub virtual_address: u32, /// The size of the initialized data on disk, in bytes. This value must be a multiple of the /// `FileAlignment` member of the [`IMAGE_OPTIONAL_HEADER`] structure. If this value is less than /// the [`virtual_size`](Self::virtual_size) member, the remainder of the section is filled with /// zeroes. If the section contains only uninitialized data, the member is zero. /// /// [`IMAGE_OPTIONAL_HEADER`]: https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header32 pub size_of_raw_data: u32, /// A file pointer to the first page within the COFF file. This value must be a multiple of the /// `FileAlignment` member of the [`IMAGE_OPTIONAL_HEADER`] structure. If a section contains only /// uninitialized data, set this member is zero. /// /// [`IMAGE_OPTIONAL_HEADER`]: https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_optional_header32 pub pointer_to_raw_data: u32, /// A file pointer to the beginning of the relocation entries for the section. If there are no /// relocations, this value is zero. pub pointer_to_relocations: u32, /// A file pointer to the beginning of the line-number entries for the section. If there are no /// COFF line numbers, this value is zero. pub pointer_to_line_numbers: u32, /// The number of relocation entries for the section. This value is zero for executable images. /// /// If the value is `0xffff`, the actual relocation count is stored in the `virtual_address` /// field of the first relocation. It is an error if this flag is set and there are fewer than /// `0xffff` relocations in the section. pub number_of_relocations: u16, /// The number of line-number entries for the section. pub number_of_line_numbers: u16, /// The characteristics of the image. pub characteristics: SectionCharacteristics, } impl ImageSectionHeader { pub(crate) fn parse(parse_buffer: &mut ParseBuffer<'_>) -> Result { let name_bytes = parse_buffer.take(8)?; Ok(Self { name: [ name_bytes[0], name_bytes[1], name_bytes[2], name_bytes[3], name_bytes[4], name_bytes[5], name_bytes[6], name_bytes[7], ], virtual_size: parse_buffer.parse_u32()?, virtual_address: parse_buffer.parse_u32()?, size_of_raw_data: parse_buffer.parse_u32()?, pointer_to_raw_data: parse_buffer.parse_u32()?, pointer_to_relocations: parse_buffer.parse_u32()?, pointer_to_line_numbers: parse_buffer.parse_u32()?, number_of_relocations: parse_buffer.parse_u16()?, number_of_line_numbers: parse_buffer.parse_u16()?, characteristics: parse_buffer.parse()?, }) } /// Returns the name of the section. pub fn name(&self) -> &str { let end = self .name .iter() .position(|ch| *ch == 0) .unwrap_or(self.name.len()); // The spec guarantees that the name is a proper UTF-8 string. // TODO: Look up long names from the string table. std::str::from_utf8(&self.name[0..end]).unwrap_or("") } } impl fmt::Debug for ImageSectionHeader { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ImageSectionHeader") .field("name()", &self.name()) .field("virtual_size", &format_args!("{:#x}", self.virtual_size)) .field( "virtual_address", &format_args!("{:#x}", self.virtual_address), ) .field("size_of_raw_data", &self.size_of_raw_data) .field( "pointer_to_raw_data", &format_args!("{:#x}", self.pointer_to_raw_data), ) .field( "pointer_to_relocations", &format_args!("{:#x}", self.pointer_to_relocations), ) .field( "pointer_to_line_numbers", &format_args!("{:#x}", self.pointer_to_line_numbers), ) .field("number_of_relocations", &self.number_of_relocations) .field("number_of_line_numbers", &self.number_of_line_numbers) .field("characteristics", &self.characteristics) .finish() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_section_characteristics() { let bytes: Vec = vec![0x40, 0x00, 0x00, 0xC8]; let mut parse_buffer = ParseBuffer::from(bytes.as_slice()); let characteristics = parse_buffer .parse::() .expect("parse"); assert_eq!(characteristics, SectionCharacteristics(0xc800_0040)); assert!(characteristics.initialized_data()); assert!(characteristics.not_paged()); assert!(characteristics.read()); assert!(characteristics.write()); assert_eq!(characteristics.alignment(), None); } #[test] fn test_section_characteristics_nopad() { let characteristics = SectionCharacteristics(IMAGE_SCN_TYPE_NO_PAD); assert_eq!(characteristics.alignment(), Some(1)); } #[test] fn test_section_characteristics_alignment() { let characteristics = SectionCharacteristics(IMAGE_SCN_ALIGN_64BYTES); assert_eq!(characteristics.alignment(), Some(64)); } #[test] fn test_image_section_header() { let bytes: Vec = vec![ 0x2E, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x48, 0x35, 0x09, 0x00, 0x00, 0xD0, 0x1E, 0x00, 0x00, 0xFE, 0x00, 0x00, 0x00, 0xA2, 0x1E, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0xC8, ]; let mut parse_buffer = ParseBuffer::from(bytes.as_slice()); let ish = ImageSectionHeader::parse(&mut parse_buffer).expect("parse"); assert_eq!(&ish.name, b".data\0\0\0"); assert_eq!(ish.name(), ".data"); assert_eq!(ish.virtual_size, 0x93548); assert_eq!(ish.virtual_address, 0x001e_d000); assert_eq!(ish.size_of_raw_data, 0xfe00); assert_eq!(ish.pointer_to_raw_data, 0x001e_a200); assert_eq!(ish.pointer_to_relocations, 0); assert_eq!(ish.pointer_to_line_numbers, 0); assert_eq!(ish.number_of_relocations, 0); assert_eq!(ish.number_of_line_numbers, 0); assert_eq!(ish.characteristics, SectionCharacteristics(0xc800_0040)); } } pdb-0.8.0/src/source.rs000064400000000000000000000147470072674642500131000ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use std::fmt; use std::io; /// Represents an offset + size of the source file. /// /// The multi-stream file implementation (used by `pdb::PDB`) determines which byte ranges it needs /// to satisfy its requests, and it describes those requests as a `&[SourceSlice]`. #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub struct SourceSlice { /// Offset into the source file. pub offset: u64, /// Size of the slice. pub size: usize, } /// The `pdb` crate accesses PDB files via the `pdb::Source` trait. /// /// This library is written with zero-copy in mind. `Source`s provide [`SourceView`]s which need not /// outlive their parent, supporting implementations of e.g. memory mapped files. /// /// PDB files are "multi-stream files" (MSF) under the hood. MSFs have various layers of /// indirection, but ultimately the MSF code asks a `Source` to view a series of /// [`{ offset, size }` records](SourceSlice), which the `Source` provides as a /// contiguous `&[u8]`. /// /// # Default /// /// There is a default `Source` implementation for `std::io::Read` + `std::io::Seek` + /// `std::fmt::Debug`, allowing a `std::fs::File` to be treated as `pdb::Source`. This /// implementation provides views by allocating a buffer, seeking, and reading the contents into /// that buffer. /// /// # Alignment /// /// The requested offsets will always be aligned to the MSF's page size, which is always a power of /// two and is usually (but not always) 4096 bytes. The requested sizes will also be multiples of /// the page size, except for the size of the final `SourceSlice`, which may be smaller. /// /// PDB files are specified as always being a multiple of the page size, so `Source` implementations /// are free to e.g. map whole pages and return a sub-slice of the requested length. /// pub trait Source<'s>: fmt::Debug { /// Provides a contiguous view of the source file composed of the requested position(s). /// /// Note that the SourceView's as_slice() method cannot fail, so `view()` is the time to raise /// IO errors. fn view(&mut self, slices: &[SourceSlice]) -> Result>, io::Error>; } /// An owned, droppable, read-only view of the source file which can be referenced as a byte slice. pub trait SourceView<'s>: fmt::Debug { /// Returns a view to the raw data. fn as_slice(&self) -> &[u8]; } #[derive(Clone)] struct ReadView { bytes: Vec, } impl fmt::Debug for ReadView { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "ReadView({} bytes)", self.bytes.len()) } } impl SourceView<'_> for ReadView { fn as_slice(&self) -> &[u8] { self.bytes.as_slice() } } impl<'s, T> Source<'s> for T where T: io::Read + io::Seek + fmt::Debug + 's, { fn view(&mut self, slices: &[SourceSlice]) -> Result>, io::Error> { let len = slices.iter().fold(0, |acc, s| acc + s.size); let mut v = ReadView { bytes: Vec::with_capacity(len), }; v.bytes.resize(len, 0); { let bytes = v.bytes.as_mut_slice(); let mut output_offset: usize = 0; for slice in slices { self.seek(io::SeekFrom::Start(slice.offset))?; self.read_exact(&mut bytes[output_offset..(output_offset + slice.size)])?; output_offset += slice.size; } } Ok(Box::new(v)) } } #[cfg(test)] mod tests { mod read_view { use crate::source::*; use std::io::Cursor; use std::io::ErrorKind; #[test] fn test_basic_reading() { let mut data = vec![0; 4096]; data[42] = 42; let mut source: Box> = Box::new(Cursor::new(data.as_slice())); let source_slices = vec![SourceSlice { offset: 40, size: 4, }]; let view = source .view(source_slices.as_slice()) .expect("viewing must succeed"); assert_eq!(&[0u8, 0, 42, 0], view.as_slice()); } #[test] fn test_discontinuous_reading() { let mut data = vec![0; 4096]; data[42] = 42; data[88] = 88; let mut source: Box> = Box::new(Cursor::new(data.as_slice())); let source_slices = vec![ SourceSlice { offset: 88, size: 1, }, SourceSlice { offset: 40, size: 4, }, ]; let view = source .view(source_slices.as_slice()) .expect("viewing must succeed"); assert_eq!(&[88u8, 0, 0, 42, 0], view.as_slice()); } #[test] fn test_duplicate_reading() { let mut data = vec![0; 4096]; data[42] = 42; data[88] = 88; let mut source: Box> = Box::new(Cursor::new(data.as_slice())); let source_slices = vec![ SourceSlice { offset: 88, size: 1, }, SourceSlice { offset: 40, size: 4, }, SourceSlice { offset: 88, size: 1, }, ]; let view = source .view(source_slices.as_slice()) .expect("viewing must succeed"); assert_eq!(&[88u8, 0, 0, 42, 0, 88], view.as_slice()); } #[test] fn test_eof_reading() { let data = vec![0; 4096]; let mut source: Box> = Box::new(Cursor::new(data.as_slice())); // one byte is readable, but we asked for two let source_slices = vec![SourceSlice { offset: 4095, size: 2, }]; let r = source.view(source_slices.as_slice()); match r { Ok(_) => panic!("should have failed"), Err(e) => { assert_eq!(ErrorKind::UnexpectedEof, e.kind()); } } } } } pdb-0.8.0/src/strings.rs000064400000000000000000000127030072674642500132570ustar 00000000000000use std::borrow::Cow; use scroll::{ctx::TryFromCtx, Endian, Pread}; use crate::common::*; use crate::msf::Stream; /// Magic bytes identifying the string name table. /// /// This value is declared as `NMT::verHdr` in `nmt.h`. const PDB_NMT_HDR: u32 = 0xEFFE_EFFE; #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum StringTableHashVersion { /// Default hash method used for reverse string lookups. /// /// The hash function has originally been defined in `LHashPbCb`. LongHash = 1, /// Revised hash method used for reverse string lookups. /// /// The hash function has originally been defined in `LHashPbCbV2`. LongHashV2 = 2, } impl StringTableHashVersion { fn parse_u32(value: u32) -> Result { match value { 1 => Ok(Self::LongHash), 2 => Ok(Self::LongHashV2), _ => Err(Error::UnimplementedFeature( "unknown string table hash version", )), } } } /// Raw header of the string table stream. #[repr(C)] #[derive(Clone, Copy, Debug)] struct StringTableHeader { /// Magic bytes of the string table. magic: u32, /// Version of the hash table after the names. hash_version: u32, /// The size of all names in bytes. names_size: u32, } impl<'t> TryFromCtx<'t, Endian> for StringTableHeader { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let mut offset = 0; let data = Self { magic: this.gread_with(&mut offset, le)?, hash_version: this.gread_with(&mut offset, le)?, names_size: this.gread_with(&mut offset, le)?, }; Ok((data, offset)) } } impl StringTableHeader { /// Start index of the names buffer in the string table stream. fn names_start(self) -> usize { std::mem::size_of::() } /// End index of the names buffer in the string table stream. fn names_end(self) -> usize { self.names_start() + self.names_size as usize } } /// The global string table of a PDB. /// /// The string table is a two-way mapping from offset to string and back. It can be used to resolve /// [`StringRef`] offsets to their string values. Sometimes, it is also referred to as "Name table". /// The mapping from string to offset has not been implemented yet. /// /// Use [`PDB::string_table`](crate::PDB::string_table) to obtain an instance. #[derive(Debug)] pub struct StringTable<'s> { header: StringTableHeader, #[allow(dead_code)] // reason = "reverse-lookups through hash table not implemented" hash_version: StringTableHashVersion, stream: Stream<'s>, } impl<'s> StringTable<'s> { pub(crate) fn parse(stream: Stream<'s>) -> Result { let mut buf = stream.parse_buffer(); let header = buf.parse::()?; if header.magic != PDB_NMT_HDR { return Err(Error::UnimplementedFeature( "invalid string table signature", )); } // The string table should at least contain all names as C-strings. Their combined size is // declared in the `names_size` header field. if buf.len() < header.names_end() { return Err(Error::UnexpectedEof); } let hash_version = StringTableHashVersion::parse_u32(header.hash_version)?; // After the name buffer, the stream contains a closed hash table for reverse mapping. From // the original header file (`nmi.h`): // // Strings are mapped into name indices using a closed hash table of NIs. // To find a string, we hash it and probe into the table, and compare the // string against each successive ni's name until we hit or find an empty // hash table entry. Ok(StringTable { header, hash_version, stream, }) } } impl<'s> StringTable<'s> { /// Resolves a string value from this string table. /// /// Errors if the offset is out of bounds, otherwise returns the raw binary string value. pub fn get(&self, offset: StringRef) -> Result> { if offset.0 >= self.header.names_size { return Err(Error::UnexpectedEof); } let string_offset = self.header.names_start() + offset.0 as usize; let data = &self.stream.as_slice()[string_offset..self.header.names_end()]; ParseBuffer::from(data).parse_cstring() } } impl StringRef { /// Resolves the raw string value of this reference. /// /// This method errors if the offset is out of bounds of the string table. Use /// [`PDB::string_table`](crate::PDB::string_table) to obtain an instance of the string table. pub fn to_raw_string<'s>(self, strings: &'s StringTable<'_>) -> Result> { strings.get(self) } /// Resolves and decodes the UTF-8 string value of this reference. /// /// This method errors if the offset is out of bounds of the string table. Use /// [`PDB::string_table`](crate::PDB::string_table) to obtain an instance of the string table. pub fn to_string_lossy<'s>(self, strings: &'s StringTable<'_>) -> Result> { strings.get(self).map(|r| r.to_string()) } } #[cfg(test)] mod tests { use super::*; use std::mem; #[test] fn test_string_table_header() { assert_eq!(mem::size_of::(), 12); assert_eq!(mem::align_of::(), 4); } } pdb-0.8.0/src/symbol/annotations.rs000064400000000000000000000274210072674642500154330ustar 00000000000000use crate::common::*; use crate::FallibleIterator; /// These values correspond to the BinaryAnnotationOpcode enum from the /// cvinfo.h #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum BinaryAnnotationOpcode { /// Link time pdb contains PADDINGs. /// /// These are represented with the 0 opcode which is in some PDB /// implementation called "invalid". Eof = 0, /// param : start offset CodeOffset = 1, /// param : nth separated code chunk (main code chunk == 0) ChangeCodeOffsetBase = 2, /// param : delta of offset ChangeCodeOffset = 3, /// param : length of code, default next start ChangeCodeLength = 4, /// param : fileId ChangeFile = 5, /// param : line offset (signed) ChangeLineOffset = 6, /// param : how many lines, default 1 ChangeLineEndDelta = 7, /// param : either 1 (default, for statement) /// or 0 (for expression) ChangeRangeKind = 8, /// param : start column number, 0 means no column info ChangeColumnStart = 9, /// param : end column number delta (signed) ChangeColumnEndDelta = 10, /// param : ((sourceDelta << 4) | CodeDelta) ChangeCodeOffsetAndLineOffset = 11, /// param : codeLength, codeOffset ChangeCodeLengthAndCodeOffset = 12, /// param : end column number ChangeColumnEnd = 13, } impl BinaryAnnotationOpcode { fn parse(value: u32) -> Result { Ok(match value { 0 => Self::Eof, 1 => Self::CodeOffset, 2 => Self::ChangeCodeOffsetBase, 3 => Self::ChangeCodeOffset, 4 => Self::ChangeCodeLength, 5 => Self::ChangeFile, 6 => Self::ChangeLineOffset, 7 => Self::ChangeLineEndDelta, 8 => Self::ChangeRangeKind, 9 => Self::ChangeColumnStart, 10 => Self::ChangeColumnEndDelta, 11 => Self::ChangeCodeOffsetAndLineOffset, 12 => Self::ChangeCodeLengthAndCodeOffset, 13 => Self::ChangeColumnEnd, _ => return Err(Error::UnknownBinaryAnnotation(value)), }) } } /// Represents a parsed `BinaryAnnotation`. /// /// Binary annotations are used by `S_INLINESITE` to encode opcodes for how to /// evaluate the state changes for inline information. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum BinaryAnnotation { /// Sets the code offset to the given absolute value. CodeOffset(u32), /// Sets the base for all code offsets to the given absolute value. All following code offsets /// are relative to this base value. ChangeCodeOffsetBase(u32), /// **Emitting**. Advances the code offset by the given value. /// /// This annotation emits a line record. The length of the covered code block can be established /// by a following `ChangeCodeLength` annotation, or by the offset of the next emitted record. ChangeCodeOffset(u32), /// Adjusts the code length of the previously emitted line record. The code length resets with /// every line record. ChangeCodeLength(u32), /// Sets the file index of following line records. ChangeFile(FileIndex), /// Advances the line number by the given value. ChangeLineOffset(i32), /// Sets the number of source lines covered by following line records. Defaults to `1`. ChangeLineEndDelta(u32), /// Sets the kind of the line record. Defaults to `Statement`. ChangeRangeKind(u32), /// Sets the start column number. Defaults to `None`. ChangeColumnStart(u32), /// Advances the end column number by the given value. ChangeColumnEndDelta(i32), /// **Emitting**. Advances the code offset and the line number by the given values. /// /// This annotation emits a line record. The length of the covered code block can be established /// by a following `ChangeCodeLength` annotation, or by the offset of the next emitted record. ChangeCodeOffsetAndLineOffset(u32, i32), /// **Emitting**. Sets the code length and advances the code offset by the given value. /// /// This annotation emits a line record that is valid for the given code length. This is usually /// issued as one of the last annotations, as there is no subsequent line record to derive the /// code length from. ChangeCodeLengthAndCodeOffset(u32, u32), /// Sets the end column number. ChangeColumnEnd(u32), } impl BinaryAnnotation { /// Does this annotation emit a line info? pub fn emits_line_info(self) -> bool { matches!( self, BinaryAnnotation::ChangeCodeOffset(..) | BinaryAnnotation::ChangeCodeOffsetAndLineOffset(..) | BinaryAnnotation::ChangeCodeLengthAndCodeOffset(..) ) } } /// An iterator over binary annotations used by `S_INLINESITE`. #[derive(Clone, Debug, Default)] pub struct BinaryAnnotationsIter<'t> { buffer: ParseBuffer<'t>, } impl<'t> BinaryAnnotationsIter<'t> { /// Parse a compact version of an unsigned integer. /// /// This implements `CVUncompressData`, which can decode numbers no larger than 0x1FFFFFFF. It /// seems that values compressed this way are only used for binary annotations at this point. fn uncompress_next(&mut self) -> Result { let b1 = u32::from(self.buffer.parse::()?); if (b1 & 0x80) == 0x00 { let value = b1; return Ok(value); } let b2 = u32::from(self.buffer.parse::()?); if (b1 & 0xc0) == 0x80 { let value = (b1 & 0x3f) << 8 | b2; return Ok(value); } let b3 = u32::from(self.buffer.parse::()?); let b4 = u32::from(self.buffer.parse::()?); if (b1 & 0xe0) == 0xc0 { let value = ((b1 & 0x1f) << 24) | (b2 << 16) | (b3 << 8) | b4; return Ok(value); } Err(Error::InvalidCompressedAnnotation) } } /// Resembles `DecodeSignedInt32`. fn decode_signed_operand(value: u32) -> i32 { if value & 1 != 0 { -((value >> 1) as i32) } else { (value >> 1) as i32 } } impl<'t> FallibleIterator for BinaryAnnotationsIter<'t> { type Item = BinaryAnnotation; type Error = Error; fn next(&mut self) -> Result> { if self.buffer.is_empty() { return Ok(None); } let op = self.uncompress_next()?; let annotation = match BinaryAnnotationOpcode::parse(op)? { BinaryAnnotationOpcode::Eof => { // This makes the end of the stream self.buffer = ParseBuffer::default(); return Ok(None); } BinaryAnnotationOpcode::CodeOffset => { BinaryAnnotation::CodeOffset(self.uncompress_next()?) } BinaryAnnotationOpcode::ChangeCodeOffsetBase => { BinaryAnnotation::ChangeCodeOffsetBase(self.uncompress_next()?) } BinaryAnnotationOpcode::ChangeCodeOffset => { BinaryAnnotation::ChangeCodeOffset(self.uncompress_next()?) } BinaryAnnotationOpcode::ChangeCodeLength => { BinaryAnnotation::ChangeCodeLength(self.uncompress_next()?) } BinaryAnnotationOpcode::ChangeFile => { BinaryAnnotation::ChangeFile(FileIndex(self.uncompress_next()?)) } BinaryAnnotationOpcode::ChangeLineOffset => { BinaryAnnotation::ChangeLineOffset(decode_signed_operand(self.uncompress_next()?)) } BinaryAnnotationOpcode::ChangeLineEndDelta => { BinaryAnnotation::ChangeLineEndDelta(self.uncompress_next()?) } BinaryAnnotationOpcode::ChangeRangeKind => { BinaryAnnotation::ChangeRangeKind(self.uncompress_next()?) } BinaryAnnotationOpcode::ChangeColumnStart => { BinaryAnnotation::ChangeColumnStart(self.uncompress_next()?) } BinaryAnnotationOpcode::ChangeColumnEndDelta => BinaryAnnotation::ChangeColumnEndDelta( decode_signed_operand(self.uncompress_next()?), ), BinaryAnnotationOpcode::ChangeCodeOffsetAndLineOffset => { let operand = self.uncompress_next()?; BinaryAnnotation::ChangeCodeOffsetAndLineOffset( operand & 0xf, decode_signed_operand(operand >> 4), ) } BinaryAnnotationOpcode::ChangeCodeLengthAndCodeOffset => { BinaryAnnotation::ChangeCodeLengthAndCodeOffset( self.uncompress_next()?, self.uncompress_next()?, ) } BinaryAnnotationOpcode::ChangeColumnEnd => { BinaryAnnotation::ChangeColumnEnd(self.uncompress_next()?) } }; Ok(Some(annotation)) } } /// Binary annotations of a symbol. /// /// The binary annotation mechanism supports recording a list of annotations in an instruction /// stream. The X64 unwind code and the DWARF standard have a similar design. /// /// Binary annotations are primarily used as line programs for inline function calls. #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] pub struct BinaryAnnotations<'t> { data: &'t [u8], } impl<'t> BinaryAnnotations<'t> { /// Creates a new instance of binary annotations. pub(crate) fn new(data: &'t [u8]) -> Self { BinaryAnnotations { data } } /// Iterates through binary annotations. pub fn iter(&self) -> BinaryAnnotationsIter<'t> { BinaryAnnotationsIter { buffer: ParseBuffer::from(self.data), } } } #[test] fn test_binary_annotation_iter() { let inp = b"\x0b\x03\x06\n\x03\x08\x06\x06\x03-\x06\x08\x03\x07\x0br\x06\x06\x0c\x03\x07\x06\x0f\x0c\x06\x05\x00\x00"; let annotations = BinaryAnnotations::new(inp) .iter() .collect::>() .unwrap(); assert_eq!( annotations, vec![ BinaryAnnotation::ChangeCodeOffsetAndLineOffset(3, 0), BinaryAnnotation::ChangeLineOffset(5), BinaryAnnotation::ChangeCodeOffset(8), BinaryAnnotation::ChangeLineOffset(3), BinaryAnnotation::ChangeCodeOffset(45), BinaryAnnotation::ChangeLineOffset(4), BinaryAnnotation::ChangeCodeOffset(7), BinaryAnnotation::ChangeCodeOffsetAndLineOffset(2, -3), BinaryAnnotation::ChangeLineOffset(3), BinaryAnnotation::ChangeCodeLengthAndCodeOffset(3, 7), BinaryAnnotation::ChangeLineOffset(-7), BinaryAnnotation::ChangeCodeLengthAndCodeOffset(6, 5) ] ); let inp = b"\x03P\x06\x0e\x03\x0c\x06\x04\x032\x06\x06\x03T\x0b#\x0b\\\x0bC\x0b/\x06\x04\x0c-\t\x03;\x06\x1d\x0c\x05\x06\x00\x00"; let annotations = BinaryAnnotations::new(inp) .iter() .collect::>() .unwrap(); assert_eq!( annotations, vec![ BinaryAnnotation::ChangeCodeOffset(80), BinaryAnnotation::ChangeLineOffset(7), BinaryAnnotation::ChangeCodeOffset(12), BinaryAnnotation::ChangeLineOffset(2), BinaryAnnotation::ChangeCodeOffset(50), BinaryAnnotation::ChangeLineOffset(3), BinaryAnnotation::ChangeCodeOffset(84), BinaryAnnotation::ChangeCodeOffsetAndLineOffset(3, 1), BinaryAnnotation::ChangeCodeOffsetAndLineOffset(12, -2), BinaryAnnotation::ChangeCodeOffsetAndLineOffset(3, 2), BinaryAnnotation::ChangeCodeOffsetAndLineOffset(15, 1), BinaryAnnotation::ChangeLineOffset(2), BinaryAnnotation::ChangeCodeLengthAndCodeOffset(45, 9), BinaryAnnotation::ChangeCodeOffset(59), BinaryAnnotation::ChangeLineOffset(-14), BinaryAnnotation::ChangeCodeLengthAndCodeOffset(5, 6), ] ); } pdb-0.8.0/src/symbol/constants.rs000064400000000000000000000573610072674642500151200ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. // A list of known symbol kinds. // from: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2735 #![allow(unused, non_upper_case_globals, non_camel_case_types)] use std::fmt; use scroll::{ctx::TryFromCtx, Endian}; pub const S_COMPILE: u16 = 0x0001; // Compile flags symbol pub const S_REGISTER_16t: u16 = 0x0002; // Register variable pub const S_CONSTANT_16t: u16 = 0x0003; // constant symbol pub const S_UDT_16t: u16 = 0x0004; // User defined type pub const S_SSEARCH: u16 = 0x0005; // Start Search pub const S_END: u16 = 0x0006; // Block procedure "with" or thunk end pub const S_SKIP: u16 = 0x0007; // Reserve symbol space in $$Symbols table pub const S_CVRESERVE: u16 = 0x0008; // Reserved symbol for CV internal use pub const S_OBJNAME_ST: u16 = 0x0009; // path to object file name pub const S_ENDARG: u16 = 0x000a; // end of argument/return list pub const S_COBOLUDT_16t: u16 = 0x000b; // special UDT for cobol that does not symbol pack pub const S_MANYREG_16t: u16 = 0x000c; // multiple register variable pub const S_RETURN: u16 = 0x000d; // return description symbol pub const S_ENTRYTHIS: u16 = 0x000e; // description of this pointer on entry pub const S_BPREL16: u16 = 0x0100; // BP-relative pub const S_LDATA16: u16 = 0x0101; // Module-local symbol pub const S_GDATA16: u16 = 0x0102; // Global data symbol pub const S_PUB16: u16 = 0x0103; // a public symbol pub const S_LPROC16: u16 = 0x0104; // Local procedure start pub const S_GPROC16: u16 = 0x0105; // Global procedure start pub const S_THUNK16: u16 = 0x0106; // Thunk Start pub const S_BLOCK16: u16 = 0x0107; // block start pub const S_WITH16: u16 = 0x0108; // with start pub const S_LABEL16: u16 = 0x0109; // code label pub const S_CEXMODEL16: u16 = 0x010a; // change execution model pub const S_VFTABLE16: u16 = 0x010b; // address of virtual function table pub const S_REGREL16: u16 = 0x010c; // register relative address pub const S_BPREL32_16t: u16 = 0x0200; // BP-relative pub const S_LDATA32_16t: u16 = 0x0201; // Module-local symbol pub const S_GDATA32_16t: u16 = 0x0202; // Global data symbol pub const S_PUB32_16t: u16 = 0x0203; // a public symbol (CV internal reserved) pub const S_LPROC32_16t: u16 = 0x0204; // Local procedure start pub const S_GPROC32_16t: u16 = 0x0205; // Global procedure start pub const S_THUNK32_ST: u16 = 0x0206; // Thunk Start pub const S_BLOCK32_ST: u16 = 0x0207; // block start pub const S_WITH32_ST: u16 = 0x0208; // with start pub const S_LABEL32_ST: u16 = 0x0209; // code label pub const S_CEXMODEL32: u16 = 0x020a; // change execution model pub const S_VFTABLE32_16t: u16 = 0x020b; // address of virtual function table pub const S_REGREL32_16t: u16 = 0x020c; // register relative address pub const S_LTHREAD32_16t: u16 = 0x020d; // local thread storage pub const S_GTHREAD32_16t: u16 = 0x020e; // global thread storage pub const S_SLINK32: u16 = 0x020f; // static link for MIPS EH implementation pub const S_LPROCMIPS_16t: u16 = 0x0300; // Local procedure start pub const S_GPROCMIPS_16t: u16 = 0x0301; // Global procedure start // if these ref symbols have names following then the names are in ST format pub const S_PROCREF_ST: u16 = 0x0400; // Reference to a procedure pub const S_DATAREF_ST: u16 = 0x0401; // Reference to data pub const S_ALIGN: u16 = 0x0402; // Used for page alignment of symbols pub const S_LPROCREF_ST: u16 = 0x0403; // Local Reference to a procedure pub const S_OEM: u16 = 0x0404; // OEM defined symbol // sym records with 32-bit types embedded instead of 16-bit // all have 0x1000 bit set for easy identification // only do the 32-bit target versions since we don't really // care about 16-bit ones anymore. pub const S_TI16_MAX: u16 = 0x1000; pub const S_REGISTER_ST: u16 = 0x1001; // Register variable pub const S_CONSTANT_ST: u16 = 0x1002; // constant symbol pub const S_UDT_ST: u16 = 0x1003; // User defined type pub const S_COBOLUDT_ST: u16 = 0x1004; // special UDT for cobol that does not symbol pack pub const S_MANYREG_ST: u16 = 0x1005; // multiple register variable pub const S_BPREL32_ST: u16 = 0x1006; // BP-relative pub const S_LDATA32_ST: u16 = 0x1007; // Module-local symbol pub const S_GDATA32_ST: u16 = 0x1008; // Global data symbol pub const S_PUB32_ST: u16 = 0x1009; // a public symbol (CV internal reserved) pub const S_LPROC32_ST: u16 = 0x100a; // Local procedure start pub const S_GPROC32_ST: u16 = 0x100b; // Global procedure start pub const S_VFTABLE32: u16 = 0x100c; // address of virtual function table pub const S_REGREL32_ST: u16 = 0x100d; // register relative address pub const S_LTHREAD32_ST: u16 = 0x100e; // local thread storage pub const S_GTHREAD32_ST: u16 = 0x100f; // global thread storage pub const S_LPROCMIPS_ST: u16 = 0x1010; // Local procedure start pub const S_GPROCMIPS_ST: u16 = 0x1011; // Global procedure start pub const S_FRAMEPROC: u16 = 0x1012; // extra frame and proc information pub const S_COMPILE2_ST: u16 = 0x1013; // extended compile flags and info // new symbols necessary for 16-bit enumerates of IA64 registers // and IA64 specific symbols pub const S_MANYREG2_ST: u16 = 0x1014; // multiple register variable pub const S_LPROCIA64_ST: u16 = 0x1015; // Local procedure start (IA64) pub const S_GPROCIA64_ST: u16 = 0x1016; // Global procedure start (IA64) // Local symbols for IL pub const S_LOCALSLOT_ST: u16 = 0x1017; // local IL sym with field for local slot index pub const S_PARAMSLOT_ST: u16 = 0x1018; // local IL sym with field for parameter slot index pub const S_ANNOTATION: u16 = 0x1019; // Annotation string literals // symbols to support managed code debugging pub const S_GMANPROC_ST: u16 = 0x101a; // Global proc pub const S_LMANPROC_ST: u16 = 0x101b; // Local proc pub const S_RESERVED1: u16 = 0x101c; // reserved pub const S_RESERVED2: u16 = 0x101d; // reserved pub const S_RESERVED3: u16 = 0x101e; // reserved pub const S_RESERVED4: u16 = 0x101f; // reserved pub const S_LMANDATA_ST: u16 = 0x1020; pub const S_GMANDATA_ST: u16 = 0x1021; pub const S_MANFRAMEREL_ST: u16 = 0x1022; pub const S_MANREGISTER_ST: u16 = 0x1023; pub const S_MANSLOT_ST: u16 = 0x1024; pub const S_MANMANYREG_ST: u16 = 0x1025; pub const S_MANREGREL_ST: u16 = 0x1026; pub const S_MANMANYREG2_ST: u16 = 0x1027; pub const S_MANTYPREF: u16 = 0x1028; // Index for type referenced by name from metadata pub const S_UNAMESPACE_ST: u16 = 0x1029; // Using namespace // Symbols w/ SZ name fields. All name fields contain utf8 encoded strings. pub const S_ST_MAX: u16 = 0x1100; // starting point for SZ name symbols pub const S_OBJNAME: u16 = 0x1101; // path to object file name pub const S_THUNK32: u16 = 0x1102; // Thunk Start pub const S_BLOCK32: u16 = 0x1103; // block start pub const S_WITH32: u16 = 0x1104; // with start pub const S_LABEL32: u16 = 0x1105; // code label pub const S_REGISTER: u16 = 0x1106; // Register variable pub const S_CONSTANT: u16 = 0x1107; // constant symbol pub const S_UDT: u16 = 0x1108; // User defined type pub const S_COBOLUDT: u16 = 0x1109; // special UDT for cobol that does not symbol pack pub const S_MANYREG: u16 = 0x110a; // multiple register variable pub const S_BPREL32: u16 = 0x110b; // BP-relative pub const S_LDATA32: u16 = 0x110c; // Module-local symbol pub const S_GDATA32: u16 = 0x110d; // Global data symbol pub const S_PUB32: u16 = 0x110e; // a public symbol (CV internal reserved) pub const S_LPROC32: u16 = 0x110f; // Local procedure start pub const S_GPROC32: u16 = 0x1110; // Global procedure start pub const S_REGREL32: u16 = 0x1111; // register relative address pub const S_LTHREAD32: u16 = 0x1112; // local thread storage pub const S_GTHREAD32: u16 = 0x1113; // global thread storage pub const S_LPROCMIPS: u16 = 0x1114; // Local procedure start pub const S_GPROCMIPS: u16 = 0x1115; // Global procedure start pub const S_COMPILE2: u16 = 0x1116; // extended compile flags and info pub const S_MANYREG2: u16 = 0x1117; // multiple register variable pub const S_LPROCIA64: u16 = 0x1118; // Local procedure start (IA64) pub const S_GPROCIA64: u16 = 0x1119; // Global procedure start (IA64) pub const S_LOCALSLOT: u16 = 0x111a; // local IL sym with field for local slot index pub const S_PARAMSLOT: u16 = 0x111b; // local IL sym with field for parameter slot index // symbols to support managed code debugging pub const S_LMANDATA: u16 = 0x111c; pub const S_GMANDATA: u16 = 0x111d; pub const S_MANFRAMEREL: u16 = 0x111e; pub const S_MANREGISTER: u16 = 0x111f; pub const S_MANSLOT: u16 = 0x1120; pub const S_MANMANYREG: u16 = 0x1121; pub const S_MANREGREL: u16 = 0x1122; pub const S_MANMANYREG2: u16 = 0x1123; pub const S_UNAMESPACE: u16 = 0x1124; // Using namespace // ref symbols with name fields pub const S_PROCREF: u16 = 0x1125; // Reference to a procedure pub const S_DATAREF: u16 = 0x1126; // Reference to data pub const S_LPROCREF: u16 = 0x1127; // Local Reference to a procedure pub const S_ANNOTATIONREF: u16 = 0x1128; // Reference to an S_ANNOTATION symbol pub const S_TOKENREF: u16 = 0x1129; // Reference to one of the many MANPROCSYM's // continuation of managed symbols pub const S_GMANPROC: u16 = 0x112a; // Global proc pub const S_LMANPROC: u16 = 0x112b; // Local proc // short light-weight thunks pub const S_TRAMPOLINE: u16 = 0x112c; // trampoline thunks pub const S_MANCONSTANT: u16 = 0x112d; // constants with metadata type info // native attributed local/parms pub const S_ATTR_FRAMEREL: u16 = 0x112e; // relative to virtual frame ptr pub const S_ATTR_REGISTER: u16 = 0x112f; // stored in a register pub const S_ATTR_REGREL: u16 = 0x1130; // relative to register (alternate frame ptr) pub const S_ATTR_MANYREG: u16 = 0x1131; // stored in >1 register // Separated code (from the compiler) support pub const S_SEPCODE: u16 = 0x1132; pub const S_LOCAL_2005: u16 = 0x1133; // defines a local symbol in optimized code pub const S_DEFRANGE_2005: u16 = 0x1134; // defines a single range of addresses in which symbol can be evaluated pub const S_DEFRANGE2_2005: u16 = 0x1135; // defines ranges of addresses in which symbol can be evaluated pub const S_SECTION: u16 = 0x1136; // A COFF section in a PE executable pub const S_COFFGROUP: u16 = 0x1137; // A COFF group pub const S_EXPORT: u16 = 0x1138; // A export pub const S_CALLSITEINFO: u16 = 0x1139; // Indirect call site information pub const S_FRAMECOOKIE: u16 = 0x113a; // Security cookie information pub const S_DISCARDED: u16 = 0x113b; // Discarded by LINK /OPT:REF (experimental see richards) pub const S_COMPILE3: u16 = 0x113c; // Replacement for S_COMPILE2 pub const S_ENVBLOCK: u16 = 0x113d; // Environment block split off from S_COMPILE2 pub const S_LOCAL: u16 = 0x113e; // defines a local symbol in optimized code pub const S_DEFRANGE: u16 = 0x113f; // defines a single range of addresses in which symbol can be evaluated pub const S_DEFRANGE_SUBFIELD: u16 = 0x1140; // ranges for a subfield pub const S_DEFRANGE_REGISTER: u16 = 0x1141; // ranges for en-registered symbol pub const S_DEFRANGE_FRAMEPOINTER_REL: u16 = 0x1142; // range for stack symbol. pub const S_DEFRANGE_SUBFIELD_REGISTER: u16 = 0x1143; // ranges for en-registered field of symbol pub const S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE: u16 = 0x1144; // range for stack symbol span valid full scope of function body gap might apply. pub const S_DEFRANGE_REGISTER_REL: u16 = 0x1145; // range for symbol address as register + offset. // S_PROC symbols that reference ID instead of type pub const S_LPROC32_ID: u16 = 0x1146; pub const S_GPROC32_ID: u16 = 0x1147; pub const S_LPROCMIPS_ID: u16 = 0x1148; pub const S_GPROCMIPS_ID: u16 = 0x1149; pub const S_LPROCIA64_ID: u16 = 0x114a; pub const S_GPROCIA64_ID: u16 = 0x114b; pub const S_BUILDINFO: u16 = 0x114c; // build information. pub const S_INLINESITE: u16 = 0x114d; // inlined function callsite. pub const S_INLINESITE_END: u16 = 0x114e; pub const S_PROC_ID_END: u16 = 0x114f; pub const S_DEFRANGE_HLSL: u16 = 0x1150; pub const S_GDATA_HLSL: u16 = 0x1151; pub const S_LDATA_HLSL: u16 = 0x1152; pub const S_FILESTATIC: u16 = 0x1153; pub const S_LOCAL_DPC_GROUPSHARED: u16 = 0x1154; // DPC groupshared variable pub const S_LPROC32_DPC: u16 = 0x1155; // DPC local procedure start pub const S_LPROC32_DPC_ID: u16 = 0x1156; pub const S_DEFRANGE_DPC_PTR_TAG: u16 = 0x1157; // DPC pointer tag definition range pub const S_DPC_SYM_TAG_MAP: u16 = 0x1158; // DPC pointer tag value to symbol record map pub const S_ARMSWITCHTABLE: u16 = 0x1159; pub const S_CALLEES: u16 = 0x115a; pub const S_CALLERS: u16 = 0x115b; pub const S_POGODATA: u16 = 0x115c; pub const S_INLINESITE2: u16 = 0x115d; // extended inline site information pub const S_HEAPALLOCSITE: u16 = 0x115e; // heap allocation site pub const S_MOD_TYPEREF: u16 = 0x115f; // only generated at link time pub const S_REF_MINIPDB: u16 = 0x1160; // only generated at link time for mini PDB pub const S_PDBMAP: u16 = 0x1161; // only generated at link time for mini PDB pub const S_GDATA_HLSL32: u16 = 0x1162; pub const S_LDATA_HLSL32: u16 = 0x1163; pub const S_GDATA_HLSL32_EX: u16 = 0x1164; pub const S_LDATA_HLSL32_EX: u16 = 0x1165; pub const S_FASTLINK: u16 = 0x1167; // generated at link time for /DEBUG:FASTLINK pub const S_INLINEES: u16 = 0x1168; /// These values correspond to the CV_CPU_TYPE_e enumeration, and are documented /// [on MSDN](https://msdn.microsoft.com/en-us/library/b2fc64ek.aspx). #[non_exhaustive] #[allow(missing_docs)] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum CPUType { Intel8080 = 0x0, Intel8086 = 0x1, Intel80286 = 0x2, Intel80386 = 0x3, Intel80486 = 0x4, Pentium = 0x5, PentiumPro = 0x6, Pentium3 = 0x7, MIPS = 0x10, MIPS16 = 0x11, MIPS32 = 0x12, MIPS64 = 0x13, MIPSI = 0x14, MIPSII = 0x15, MIPSIII = 0x16, MIPSIV = 0x17, MIPSV = 0x18, M68000 = 0x20, M68010 = 0x21, M68020 = 0x22, M68030 = 0x23, M68040 = 0x24, Alpha = 0x30, Alpha21164 = 0x31, Alpha21164A = 0x32, Alpha21264 = 0x33, Alpha21364 = 0x34, PPC601 = 0x40, PPC603 = 0x41, PPC604 = 0x42, PPC620 = 0x43, PPCFP = 0x44, PPCBE = 0x45, SH3 = 0x50, SH3E = 0x51, SH3DSP = 0x52, SH4 = 0x53, SHMedia = 0x54, ARM3 = 0x60, ARM4 = 0x61, ARM4T = 0x62, ARM5 = 0x63, ARM5T = 0x64, ARM6 = 0x65, ARM_XMAC = 0x66, ARM_WMMX = 0x67, ARM7 = 0x68, ARM64 = 0x69, Omni = 0x70, Ia64 = 0x80, Ia64_2 = 0x81, CEE = 0x90, AM33 = 0xa0, M32R = 0xb0, TriCore = 0xc0, X64 = 0xd0, EBC = 0xe0, Thumb = 0xf0, ARMNT = 0xf4, D3D11_Shader = 0x100, } impl fmt::Display for CPUType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Intel8080 => write!(f, "Intel8080"), Self::Intel8086 => write!(f, "Intel8086"), Self::Intel80286 => write!(f, "Intel80286"), Self::Intel80386 => write!(f, "Intel80386"), Self::Intel80486 => write!(f, "Intel80486"), Self::Pentium => write!(f, "Pentium"), Self::PentiumPro => write!(f, "PentiumPro"), Self::Pentium3 => write!(f, "Pentium3"), Self::MIPS => write!(f, "MIPS"), Self::MIPS16 => write!(f, "MIPS16"), Self::MIPS32 => write!(f, "MIPS32"), Self::MIPS64 => write!(f, "MIPS64"), Self::MIPSI => write!(f, "MIPSI"), Self::MIPSII => write!(f, "MIPSII"), Self::MIPSIII => write!(f, "MIPSIII"), Self::MIPSIV => write!(f, "MIPSIV"), Self::MIPSV => write!(f, "MIPSV"), Self::M68000 => write!(f, "M68000"), Self::M68010 => write!(f, "M68010"), Self::M68020 => write!(f, "M68020"), Self::M68030 => write!(f, "M68030"), Self::M68040 => write!(f, "M68040"), Self::Alpha => write!(f, "Alpha"), Self::Alpha21164 => write!(f, "Alpha21164"), Self::Alpha21164A => write!(f, "Alpha21164A"), Self::Alpha21264 => write!(f, "Alpha21264"), Self::Alpha21364 => write!(f, "Alpha21364"), Self::PPC601 => write!(f, "PPC601"), Self::PPC603 => write!(f, "PPC603"), Self::PPC604 => write!(f, "PPC604"), Self::PPC620 => write!(f, "PPC620"), Self::PPCFP => write!(f, "PPCFP"), Self::PPCBE => write!(f, "PPCBE"), Self::SH3 => write!(f, "SH3"), Self::SH3E => write!(f, "SH3E"), Self::SH3DSP => write!(f, "SH3DSP"), Self::SH4 => write!(f, "SH4"), Self::SHMedia => write!(f, "SHMedia"), Self::ARM3 => write!(f, "ARM3"), Self::ARM4 => write!(f, "ARM4"), Self::ARM4T => write!(f, "ARM4T"), Self::ARM5 => write!(f, "ARM5"), Self::ARM5T => write!(f, "ARM5T"), Self::ARM6 => write!(f, "ARM6"), Self::ARM_XMAC => write!(f, "ARM_XMAC"), Self::ARM_WMMX => write!(f, "ARM_WMMX"), Self::ARM7 => write!(f, "ARM7"), Self::ARM64 => write!(f, "ARM64"), Self::Omni => write!(f, "Omni"), Self::Ia64 => write!(f, "Ia64"), Self::Ia64_2 => write!(f, "Ia64_2"), Self::CEE => write!(f, "CEE"), Self::AM33 => write!(f, "AM33"), Self::M32R => write!(f, "M32R"), Self::TriCore => write!(f, "TriCore"), Self::X64 => write!(f, "X64"), Self::EBC => write!(f, "EBC"), Self::Thumb => write!(f, "Thumb"), Self::ARMNT => write!(f, "ARMNT"), Self::D3D11_Shader => write!(f, "D3D11_Shader"), } } } impl From for CPUType { fn from(value: u16) -> Self { match value { 0x0 => Self::Intel8080, 0x1 => Self::Intel8086, 0x2 => Self::Intel80286, 0x3 => Self::Intel80386, 0x4 => Self::Intel80486, 0x5 => Self::Pentium, 0x6 => Self::PentiumPro, 0x7 => Self::Pentium3, 0x10 => Self::MIPS, 0x11 => Self::MIPS16, 0x12 => Self::MIPS32, 0x13 => Self::MIPS64, 0x14 => Self::MIPSI, 0x15 => Self::MIPSII, 0x16 => Self::MIPSIII, 0x17 => Self::MIPSIV, 0x18 => Self::MIPSV, 0x20 => Self::M68000, 0x21 => Self::M68010, 0x22 => Self::M68020, 0x23 => Self::M68030, 0x24 => Self::M68040, 0x30 => Self::Alpha, 0x31 => Self::Alpha21164, 0x32 => Self::Alpha21164A, 0x33 => Self::Alpha21264, 0x34 => Self::Alpha21364, 0x40 => Self::PPC601, 0x41 => Self::PPC603, 0x42 => Self::PPC604, 0x43 => Self::PPC620, 0x44 => Self::PPCFP, 0x45 => Self::PPCBE, 0x50 => Self::SH3, 0x51 => Self::SH3E, 0x52 => Self::SH3DSP, 0x53 => Self::SH4, 0x54 => Self::SHMedia, 0x60 => Self::ARM3, 0x61 => Self::ARM4, 0x62 => Self::ARM4T, 0x63 => Self::ARM5, 0x64 => Self::ARM5T, 0x65 => Self::ARM6, 0x66 => Self::ARM_XMAC, 0x67 => Self::ARM_WMMX, 0x68 => Self::ARM7, 0x69 => Self::ARM64, 0x70 => Self::Omni, 0x80 => Self::Ia64, 0x81 => Self::Ia64_2, 0x90 => Self::CEE, 0xa0 => Self::AM33, 0xb0 => Self::M32R, 0xc0 => Self::TriCore, 0xd0 => Self::X64, 0xe0 => Self::EBC, 0xf0 => Self::Thumb, 0xf4 => Self::ARMNT, 0x100 => Self::D3D11_Shader, _ => Self::Intel8080, // This enum doesn't have an unknown value, so we just force it to Intel8080 since it's 0x0. } } } impl<'a> TryFromCtx<'a, Endian> for CPUType { type Error = scroll::Error; fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, usize)> { u16::try_from_ctx(this, le).map(|(v, l)| (v.into(), l)) } } /// These values correspond to the CV_CFL_LANG enumeration, and are documented /// [on MSDN](https://msdn.microsoft.com/en-us/library/bw3aekw6.aspx). #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum SourceLanguage { /// Application language is C. C = 0x00, /// Application language is C++. Cpp = 0x01, /// Application language is FORTRAN. Fortran = 0x02, /// Application language is Microsoft Macro Assembler. Masm = 0x03, /// Application language is Pascal. Pascal = 0x04, /// Application language is BASIC. Basic = 0x05, /// Application language is COBOL. Cobol = 0x06, /// Application is a linker-generated module. Link = 0x07, /// Application is a resource module converted with CVTRES tool. Cvtres = 0x08, /// Application is a POGO optimized module generated with CVTPGD tool. Cvtpgd = 0x09, /// Application language is C#. CSharp = 0x0a, /// Application language is Visual Basic. VB = 0x0b, /// Application language is intermediate language assembly (that is, Common Language Runtime /// (CLR) assembly). ILAsm = 0x0c, /// Application language is Java. Java = 0x0d, /// Application language is Jscript. JScript = 0x0e, /// Application language is an unknown Microsoft Intermediate Language (MSIL), possibly a result /// of using the [/LTCG (Link-time Code /// Generation)](https://docs.microsoft.com/en-us/cpp/build/reference/ltcg-link-time-code-generation) /// switch. MSIL = 0x0f, /// Application language is High Level Shader Language. HLSL = 0x10, /// The DMD compiler emits 'D' for the CV source language. Microsoft doesn't /// have an enumerator for it yet. D = 0x44, } impl fmt::Display for SourceLanguage { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::C => write!(f, "C"), Self::Cpp => write!(f, "Cpp"), Self::Fortran => write!(f, "Fortran"), Self::Masm => write!(f, "Masm"), Self::Pascal => write!(f, "Pascal"), Self::Basic => write!(f, "Basic"), Self::Cobol => write!(f, "Cobol"), Self::Link => write!(f, "Link"), Self::Cvtres => write!(f, "Cvtres"), Self::Cvtpgd => write!(f, "Cvtpgd"), Self::CSharp => write!(f, "CSharp"), Self::VB => write!(f, "VB"), Self::ILAsm => write!(f, "ILAsm"), Self::Java => write!(f, "Java"), Self::JScript => write!(f, "JScript"), Self::MSIL => write!(f, "MSIL"), Self::HLSL => write!(f, "HLSL"), Self::D => write!(f, "D"), } } } impl From for SourceLanguage { fn from(value: u8) -> Self { match value { 0x00 => Self::C, 0x01 => Self::Cpp, 0x02 => Self::Fortran, 0x03 => Self::Masm, 0x04 => Self::Pascal, 0x05 => Self::Basic, 0x06 => Self::Cobol, 0x07 => Self::Link, 0x08 => Self::Cvtres, 0x09 => Self::Cvtpgd, 0x0a => Self::CSharp, 0x0b => Self::VB, 0x0c => Self::ILAsm, 0x0d => Self::Java, 0x0e => Self::JScript, 0x0f => Self::MSIL, 0x10 => Self::HLSL, 0x44 => Self::D, _ => Self::Masm, // There is no unknown, so we just force to Masm as the default. } } } impl<'a> TryFromCtx<'a, Endian> for SourceLanguage { type Error = scroll::Error; fn try_from_ctx(this: &'a [u8], le: Endian) -> scroll::Result<(Self, usize)> { u8::try_from_ctx(this, le).map(|(v, l)| (v.into(), l)) } } pdb-0.8.0/src/symbol/mod.rs000064400000000000000000002255020072674642500136550ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use std::fmt; use scroll::{ctx::TryFromCtx, Endian, Pread, LE}; use crate::common::*; use crate::msf::*; use crate::FallibleIterator; mod annotations; mod constants; use self::constants::*; pub use self::constants::{CPUType, SourceLanguage}; pub use self::annotations::*; /// The raw type discriminator for `Symbols`. pub type SymbolKind = u16; /// Represents a symbol from the symbol table. /// /// A `Symbol` is represented internally as a `&[u8]`, and in general the bytes inside are not /// inspected in any way before calling any of the accessor methods. /// /// To avoid copying, `Symbol`s exist as references to data owned by the parent `SymbolTable`. /// Therefore, a `Symbol` may not outlive its parent `SymbolTable`. #[derive(Copy, Clone, PartialEq)] pub struct Symbol<'t> { index: SymbolIndex, data: &'t [u8], } impl<'t> Symbol<'t> { /// The index of this symbol in the containing symbol stream. #[inline] pub fn index(&self) -> SymbolIndex { self.index } /// Returns the kind of symbol identified by this Symbol. #[inline] pub fn raw_kind(&self) -> SymbolKind { debug_assert!(self.data.len() >= 2); self.data.pread_with(0, LE).unwrap_or_default() } /// Returns the raw bytes of this symbol record, including the symbol type and extra data, but /// not including the preceding symbol length indicator. #[inline] pub fn raw_bytes(&self) -> &'t [u8] { self.data } /// Parse the symbol into the `SymbolData` it contains. #[inline] pub fn parse(&self) -> Result> { self.raw_bytes().pread_with(0, ()) } /// Returns whether this symbol starts a scope. /// /// If `true`, this symbol has a `parent` and an `end` field, which contains the offset of the /// corrsponding end symbol. pub fn starts_scope(&self) -> bool { matches!( self.raw_kind(), S_GPROC16 | S_GPROC32 | S_GPROC32_ST | S_GPROCMIPS | S_GPROCMIPS_ST | S_GPROCIA64 | S_GPROCIA64_ST | S_LPROC16 | S_LPROC32 | S_LPROC32_ST | S_LPROC32_DPC | S_LPROCMIPS | S_LPROCMIPS_ST | S_LPROCIA64 | S_LPROCIA64_ST | S_LPROC32_DPC_ID | S_GPROC32_ID | S_GPROCMIPS_ID | S_GPROCIA64_ID | S_BLOCK16 | S_BLOCK32 | S_BLOCK32_ST | S_WITH16 | S_WITH32 | S_WITH32_ST | S_THUNK16 | S_THUNK32 | S_THUNK32_ST | S_SEPCODE | S_GMANPROC | S_GMANPROC_ST | S_LMANPROC | S_LMANPROC_ST | S_INLINESITE | S_INLINESITE2 ) } /// Returns whether this symbol declares the end of a scope. pub fn ends_scope(&self) -> bool { matches!(self.raw_kind(), S_END | S_PROC_ID_END | S_INLINESITE_END) } } impl<'t> fmt::Debug for Symbol<'t> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "Symbol{{ kind: 0x{:x} [{} bytes] }}", self.raw_kind(), self.data.len() ) } } fn parse_symbol_name<'t>(buf: &mut ParseBuffer<'t>, kind: SymbolKind) -> Result> { if kind < S_ST_MAX { // Pascal-style name buf.parse_u8_pascal_string() } else { // NUL-terminated name buf.parse_cstring() } } fn parse_optional_name<'t>( buf: &mut ParseBuffer<'t>, kind: SymbolKind, ) -> Result>> { if kind < S_ST_MAX { // ST variants do not specify a name Ok(None) } else { // NUL-terminated name buf.parse_cstring().map(Some) } } fn parse_optional_index(buf: &mut ParseBuffer<'_>) -> Result> { Ok(match buf.parse()? { SymbolIndex(0) => None, index => Some(index), }) } // data types are defined at: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L3038 // constants defined at: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2735 // decoding reference: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/cvdump/dumpsym7.cpp#L264 /// Information parsed from a [`Symbol`] record. #[non_exhaustive] #[derive(Clone, Debug, Eq, PartialEq)] pub enum SymbolData<'t> { /// End of a scope, such as a procedure. ScopeEnd, /// Name of the object file of this module. ObjName(ObjNameSymbol<'t>), /// A Register variable. RegisterVariable(RegisterVariableSymbol<'t>), /// A constant value. Constant(ConstantSymbol<'t>), /// A user defined type. UserDefinedType(UserDefinedTypeSymbol<'t>), /// A Register variable spanning multiple registers. MultiRegisterVariable(MultiRegisterVariableSymbol<'t>), /// Static data, such as a global variable. Data(DataSymbol<'t>), /// A public symbol with a mangled name. Public(PublicSymbol<'t>), /// A procedure, such as a function or method. Procedure(ProcedureSymbol<'t>), /// A thread local variable. ThreadStorage(ThreadStorageSymbol<'t>), /// Flags used to compile a module. CompileFlags(CompileFlagsSymbol<'t>), /// A using namespace directive. UsingNamespace(UsingNamespaceSymbol<'t>), /// Reference to a [`ProcedureSymbol`]. ProcedureReference(ProcedureReferenceSymbol<'t>), /// Reference to an imported variable. DataReference(DataReferenceSymbol<'t>), /// Reference to an annotation. AnnotationReference(AnnotationReferenceSymbol<'t>), /// Trampoline thunk. Trampoline(TrampolineSymbol), /// An exported symbol. Export(ExportSymbol<'t>), /// A local symbol in optimized code. Local(LocalSymbol<'t>), /// Reference to build information. BuildInfo(BuildInfoSymbol), /// The callsite of an inlined function. InlineSite(InlineSiteSymbol<'t>), /// End of an inline callsite. InlineSiteEnd, /// End of a procedure. ProcedureEnd, /// A label. Label(LabelSymbol<'t>), /// A block. Block(BlockSymbol<'t>), /// Data allocated relative to a register. RegisterRelative(RegisterRelativeSymbol<'t>), /// A thunk. Thunk(ThunkSymbol<'t>), /// A block of separated code. SeparatedCode(SeparatedCodeSymbol), } impl<'t> SymbolData<'t> { /// Returns the name of this symbol if it has one. pub fn name(&self) -> Option> { match self { Self::ScopeEnd => None, Self::ObjName(data) => Some(data.name), Self::RegisterVariable(_) => None, Self::Constant(data) => Some(data.name), Self::UserDefinedType(data) => Some(data.name), Self::MultiRegisterVariable(_) => None, Self::Data(data) => Some(data.name), Self::Public(data) => Some(data.name), Self::Procedure(data) => Some(data.name), Self::ThreadStorage(data) => Some(data.name), Self::CompileFlags(_) => None, Self::UsingNamespace(data) => Some(data.name), Self::ProcedureReference(data) => data.name, Self::DataReference(data) => data.name, Self::AnnotationReference(data) => Some(data.name), Self::Trampoline(_) => None, Self::Export(data) => Some(data.name), Self::Local(data) => Some(data.name), Self::InlineSite(_) => None, Self::BuildInfo(_) => None, Self::InlineSiteEnd => None, Self::ProcedureEnd => None, Self::Label(data) => Some(data.name), Self::Block(data) => Some(data.name), Self::RegisterRelative(data) => Some(data.name), Self::Thunk(data) => Some(data.name), Self::SeparatedCode(_) => None, } } } impl<'t> TryFromCtx<'t> for SymbolData<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], _ctx: ()) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let kind = buf.parse()?; let symbol = match kind { S_END => SymbolData::ScopeEnd, S_OBJNAME | S_OBJNAME_ST => SymbolData::ObjName(buf.parse_with(kind)?), S_REGISTER | S_REGISTER_ST => SymbolData::RegisterVariable(buf.parse_with(kind)?), S_CONSTANT | S_CONSTANT_ST | S_MANCONSTANT => { SymbolData::Constant(buf.parse_with(kind)?) } S_UDT | S_UDT_ST | S_COBOLUDT | S_COBOLUDT_ST => { SymbolData::UserDefinedType(buf.parse_with(kind)?) } S_MANYREG | S_MANYREG_ST | S_MANYREG2 | S_MANYREG2_ST => { SymbolData::MultiRegisterVariable(buf.parse_with(kind)?) } S_LDATA32 | S_LDATA32_ST | S_GDATA32 | S_GDATA32_ST | S_LMANDATA | S_LMANDATA_ST | S_GMANDATA | S_GMANDATA_ST => SymbolData::Data(buf.parse_with(kind)?), S_PUB32 | S_PUB32_ST => SymbolData::Public(buf.parse_with(kind)?), S_LPROC32 | S_LPROC32_ST | S_GPROC32 | S_GPROC32_ST | S_LPROC32_ID | S_GPROC32_ID | S_LPROC32_DPC | S_LPROC32_DPC_ID => SymbolData::Procedure(buf.parse_with(kind)?), S_LTHREAD32 | S_LTHREAD32_ST | S_GTHREAD32 | S_GTHREAD32_ST => { SymbolData::ThreadStorage(buf.parse_with(kind)?) } S_COMPILE2 | S_COMPILE2_ST | S_COMPILE3 => { SymbolData::CompileFlags(buf.parse_with(kind)?) } S_UNAMESPACE | S_UNAMESPACE_ST => SymbolData::UsingNamespace(buf.parse_with(kind)?), S_PROCREF | S_PROCREF_ST | S_LPROCREF | S_LPROCREF_ST => { SymbolData::ProcedureReference(buf.parse_with(kind)?) } S_TRAMPOLINE => Self::Trampoline(buf.parse_with(kind)?), S_DATAREF | S_DATAREF_ST => SymbolData::DataReference(buf.parse_with(kind)?), S_ANNOTATIONREF => SymbolData::AnnotationReference(buf.parse_with(kind)?), S_EXPORT => SymbolData::Export(buf.parse_with(kind)?), S_LOCAL => SymbolData::Local(buf.parse_with(kind)?), S_BUILDINFO => SymbolData::BuildInfo(buf.parse_with(kind)?), S_INLINESITE | S_INLINESITE2 => SymbolData::InlineSite(buf.parse_with(kind)?), S_INLINESITE_END => SymbolData::InlineSiteEnd, S_PROC_ID_END => SymbolData::ProcedureEnd, S_LABEL32 | S_LABEL32_ST => SymbolData::Label(buf.parse_with(kind)?), S_BLOCK32 | S_BLOCK32_ST => SymbolData::Block(buf.parse_with(kind)?), S_REGREL32 => SymbolData::RegisterRelative(buf.parse_with(kind)?), S_THUNK32 | S_THUNK32_ST => SymbolData::Thunk(buf.parse_with(kind)?), S_SEPCODE => SymbolData::SeparatedCode(buf.parse_with(kind)?), other => return Err(Error::UnimplementedSymbolKind(other)), }; Ok((symbol, buf.pos())) } } /// A Register variable. /// /// Symbol kind `S_REGISTER`, or `S_REGISTER_ST` #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct RegisterVariableSymbol<'t> { /// Identifier of the variable type. pub type_index: TypeIndex, /// The register this variable is stored in. pub register: Register, /// Name of the variable. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for RegisterVariableSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = RegisterVariableSymbol { type_index: buf.parse()?, register: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// A Register variable spanning multiple registers. /// /// Symbol kind `S_MANYREG`, `S_MANYREG_ST`, `S_MANYREG2`, or `S_MANYREG2_ST`. #[derive(Clone, Debug, Eq, PartialEq)] pub struct MultiRegisterVariableSymbol<'t> { /// Identifier of the variable type. pub type_index: TypeIndex, /// Most significant register first. pub registers: Vec<(Register, RawString<'t>)>, } impl<'t> TryFromCtx<'t, SymbolKind> for MultiRegisterVariableSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let type_index = buf.parse()?; let count = match kind { S_MANYREG2 | S_MANYREG2_ST => buf.parse::()?, _ => u16::from(buf.parse::()?), }; let mut registers = Vec::with_capacity(count as usize); for _ in 0..count { registers.push((buf.parse()?, parse_symbol_name(&mut buf, kind)?)); } let symbol = MultiRegisterVariableSymbol { type_index, registers, }; Ok((symbol, buf.pos())) } } // CV_PUBSYMFLAGS_e const CVPSF_CODE: u32 = 0x1; const CVPSF_FUNCTION: u32 = 0x2; const CVPSF_MANAGED: u32 = 0x4; const CVPSF_MSIL: u32 = 0x8; /// A public symbol with a mangled name. /// /// Symbol kind `S_PUB32`, or `S_PUB32_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct PublicSymbol<'t> { /// The public symbol refers to executable code. pub code: bool, /// The public symbol is a function. pub function: bool, /// The symbol is in managed code (native or IL). pub managed: bool, /// The symbol is managed IL code. pub msil: bool, /// Start offset of the symbol. pub offset: PdbInternalSectionOffset, /// Mangled name of the symbol. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for PublicSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let flags = buf.parse::()?; let symbol = PublicSymbol { code: flags & CVPSF_CODE != 0, function: flags & CVPSF_FUNCTION != 0, managed: flags & CVPSF_MANAGED != 0, msil: flags & CVPSF_MSIL != 0, offset: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// Static data, such as a global variable. /// /// Symbol kinds: /// - `S_LDATA32` and `S_LDATA32_ST` for local unmanaged data /// - `S_GDATA32` and `S_GDATA32_ST` for global unmanaged data /// - `S_LMANDATA32` and `S_LMANDATA32_ST` for local managed data /// - `S_GMANDATA32` and `S_GMANDATA32_ST` for global managed data #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct DataSymbol<'t> { /// Whether this data is global or local. pub global: bool, /// Whether this data is managed or unmanaged. pub managed: bool, /// Type identifier of the type of data. pub type_index: TypeIndex, /// Code offset of the start of the data region. pub offset: PdbInternalSectionOffset, /// Name of the data variable. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for DataSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = DataSymbol { global: matches!(kind, S_GDATA32 | S_GDATA32_ST | S_GMANDATA | S_GMANDATA_ST), managed: matches!( kind, S_LMANDATA | S_LMANDATA_ST | S_GMANDATA | S_GMANDATA_ST ), type_index: buf.parse()?, offset: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// Reference to an imported procedure. /// /// Symbol kind `S_PROCREF`, `S_PROCREF_ST`, `S_LPROCREF`, or `S_LPROCREF_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ProcedureReferenceSymbol<'t> { /// Whether the referenced procedure is global or local. pub global: bool, /// SUC of the name. pub sum_name: u32, /// Symbol index of the referenced [`ProcedureSymbol`]. /// /// Note that this symbol might be located in a different module. pub symbol_index: SymbolIndex, /// Index of the module in [`DebugInformation::modules`](crate::DebugInformation::modules) /// containing the actual symbol. pub module: Option, /// Name of the procedure reference. pub name: Option>, } impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureReferenceSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = ProcedureReferenceSymbol { global: matches!(kind, S_PROCREF | S_PROCREF_ST), sum_name: buf.parse()?, symbol_index: buf.parse()?, module: buf.parse::()?.checked_sub(1).map(usize::from), name: parse_optional_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// Reference to an imported variable. /// /// Symbol kind `S_DATAREF`, or `S_DATAREF_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct DataReferenceSymbol<'t> { /// SUC of the name. pub sum_name: u32, /// Symbol index of the referenced [`DataSymbol`]. /// /// Note that this symbol might be located in a different module. pub symbol_index: SymbolIndex, /// Index of the module in [`DebugInformation::modules`](crate::DebugInformation::modules) /// containing the actual symbol. pub module: Option, /// Name of the data reference. pub name: Option>, } impl<'t> TryFromCtx<'t, SymbolKind> for DataReferenceSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = DataReferenceSymbol { sum_name: buf.parse()?, symbol_index: buf.parse()?, module: buf.parse::()?.checked_sub(1).map(usize::from), name: parse_optional_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// Reference to an annotation. /// /// Symbol kind `S_ANNOTATIONREF`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct AnnotationReferenceSymbol<'t> { /// SUC of the name. pub sum_name: u32, /// Symbol index of the referenced symbol. /// /// Note that this symbol might be located in a different module. pub symbol_index: SymbolIndex, /// Index of the module in [`DebugInformation::modules`](crate::DebugInformation::modules) /// containing the actual symbol. pub module: Option, /// Name of the annotation reference. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for AnnotationReferenceSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = AnnotationReferenceSymbol { sum_name: buf.parse()?, symbol_index: buf.parse()?, module: buf.parse::()?.checked_sub(1).map(usize::from), name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// Subtype of [`TrampolineSymbol`]. #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum TrampolineType { /// An incremental thunk. Incremental, /// Branch island thunk. BranchIsland, /// An unknown thunk type. Unknown, } /// Trampoline thunk. /// /// Symbol kind `S_TRAMPOLINE`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct TrampolineSymbol { /// Trampoline symbol subtype. pub tramp_type: TrampolineType, /// Code size of the thunk. pub size: u16, /// Code offset of the thunk. pub thunk: PdbInternalSectionOffset, /// Code offset of the thunk target. pub target: PdbInternalSectionOffset, } impl TryFromCtx<'_, SymbolKind> for TrampolineSymbol { type Error = Error; fn try_from_ctx(this: &'_ [u8], _kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let tramp_type = match buf.parse::()? { 0x00 => TrampolineType::Incremental, 0x01 => TrampolineType::BranchIsland, _ => TrampolineType::Unknown, }; let size = buf.parse()?; let thunk_offset = buf.parse()?; let target_offset = buf.parse()?; let thunk_section = buf.parse()?; let target_section = buf.parse()?; let symbol = Self { tramp_type, size, thunk: PdbInternalSectionOffset::new(thunk_section, thunk_offset), target: PdbInternalSectionOffset::new(target_section, target_offset), }; Ok((symbol, buf.pos())) } } /// A constant value. /// /// Symbol kind `S_CONSTANT`, or `S_CONSTANT_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ConstantSymbol<'t> { /// Whether this constant has metadata type information. pub managed: bool, /// The type of this constant or metadata token. pub type_index: TypeIndex, /// The value of this constant. pub value: Variant, /// Name of the constant. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for ConstantSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = ConstantSymbol { managed: kind == S_MANCONSTANT, type_index: buf.parse()?, value: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// A user defined type. /// /// Symbol kind `S_UDT`, or `S_UDT_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct UserDefinedTypeSymbol<'t> { /// Identifier of the type. pub type_index: TypeIndex, /// Name of the type. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for UserDefinedTypeSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = UserDefinedTypeSymbol { type_index: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// A thread local variable. /// /// Symbol kinds: /// - `S_LTHREAD32`, `S_LTHREAD32_ST` for local thread storage. /// - `S_GTHREAD32`, or `S_GTHREAD32_ST` for global thread storage. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ThreadStorageSymbol<'t> { /// Whether this is a global or local thread storage. pub global: bool, /// Identifier of the stored type. pub type_index: TypeIndex, /// Code offset of the thread local. pub offset: PdbInternalSectionOffset, /// Name of the thread local. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for ThreadStorageSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = ThreadStorageSymbol { global: matches!(kind, S_GTHREAD32 | S_GTHREAD32_ST), type_index: buf.parse()?, offset: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } // CV_PROCFLAGS: const CV_PFLAG_NOFPO: u8 = 0x01; const CV_PFLAG_INT: u8 = 0x02; const CV_PFLAG_FAR: u8 = 0x04; const CV_PFLAG_NEVER: u8 = 0x08; const CV_PFLAG_NOTREACHED: u8 = 0x10; const CV_PFLAG_CUST_CALL: u8 = 0x20; const CV_PFLAG_NOINLINE: u8 = 0x40; const CV_PFLAG_OPTDBGINFO: u8 = 0x80; /// Flags of a [`ProcedureSymbol`]. #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ProcedureFlags { /// Frame pointer is present (not omitted). pub nofpo: bool, /// Interrupt return. pub int: bool, /// Far return. pub far: bool, /// Procedure does not return. pub never: bool, /// Procedure is never called. pub notreached: bool, /// Custom calling convention. pub cust_call: bool, /// Marked as `noinline`. pub noinline: bool, /// Debug information for optimized code is present. pub optdbginfo: bool, } impl<'t> TryFromCtx<'t, Endian> for ProcedureFlags { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let (value, size) = u8::try_from_ctx(this, le)?; let flags = Self { nofpo: value & CV_PFLAG_NOFPO != 0, int: value & CV_PFLAG_INT != 0, far: value & CV_PFLAG_FAR != 0, never: value & CV_PFLAG_NEVER != 0, notreached: value & CV_PFLAG_NOTREACHED != 0, cust_call: value & CV_PFLAG_CUST_CALL != 0, noinline: value & CV_PFLAG_NOINLINE != 0, optdbginfo: value & CV_PFLAG_OPTDBGINFO != 0, }; Ok((flags, size)) } } /// A procedure, such as a function or method. /// /// Symbol kinds: /// - `S_GPROC32`, `S_GPROC32_ST` for global procedures /// - `S_LPROC32`, `S_LPROC32_ST` for local procedures /// - `S_LPROC32_DPC` for DPC procedures /// - `S_GPROC32_ID`, `S_LPROC32_ID`, `S_LPROC32_DPC_ID` for procedures referencing types from the /// ID stream rather than the Type stream. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ProcedureSymbol<'t> { /// Whether this is a global or local procedure. pub global: bool, /// Indicates Deferred Procedure Calls (DPC). pub dpc: bool, /// The parent scope that this procedure is nested in. pub parent: Option, /// The end symbol of this procedure. pub end: SymbolIndex, /// The next procedure symbol. pub next: Option, /// The length of the code block covered by this procedure. pub len: u32, /// Start offset of the procedure's body code, which marks the end of the prologue. pub dbg_start_offset: u32, /// End offset of the procedure's body code, which marks the start of the epilogue. pub dbg_end_offset: u32, /// Identifier of the procedure type. /// /// The type contains the complete signature, including parameters, modifiers and the return /// type. pub type_index: TypeIndex, /// Code offset of the start of this procedure. pub offset: PdbInternalSectionOffset, /// Detailed flags of this procedure. pub flags: ProcedureFlags, /// The full, demangled name of the procedure. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for ProcedureSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = ProcedureSymbol { global: matches!(kind, S_GPROC32 | S_GPROC32_ST | S_GPROC32_ID), dpc: matches!(kind, S_LPROC32_DPC | S_LPROC32_DPC_ID), parent: parse_optional_index(&mut buf)?, end: buf.parse()?, next: parse_optional_index(&mut buf)?, len: buf.parse()?, dbg_start_offset: buf.parse()?, dbg_end_offset: buf.parse()?, type_index: buf.parse()?, offset: buf.parse()?, flags: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// The callsite of an inlined function. /// /// Symbol kind `S_INLINESITE`, or `S_INLINESITE2`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct InlineSiteSymbol<'t> { /// Index of the parent function. /// /// This might either be a [`ProcedureSymbol`] or another `InlineSiteSymbol`. pub parent: Option, /// The end symbol of this callsite. pub end: SymbolIndex, /// Identifier of the type describing the inline function. pub inlinee: IdIndex, /// The total number of invocations of the inline function. pub invocations: Option, /// Binary annotations containing the line program of this call site. pub annotations: BinaryAnnotations<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for InlineSiteSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = InlineSiteSymbol { parent: parse_optional_index(&mut buf)?, end: buf.parse()?, inlinee: buf.parse()?, invocations: match kind { S_INLINESITE2 => Some(buf.parse()?), _ => None, }, annotations: BinaryAnnotations::new(buf.take(buf.len())?), }; Ok((symbol, buf.pos())) } } /// Reference to build information. /// /// Symbol kind `S_BUILDINFO`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct BuildInfoSymbol { /// Index of the build information record. pub id: IdIndex, } impl<'t> TryFromCtx<'t, SymbolKind> for BuildInfoSymbol { type Error = Error; fn try_from_ctx(this: &'t [u8], _kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = Self { id: buf.parse()? }; Ok((symbol, buf.pos())) } } /// Name of the object file of this module. /// /// Symbol kind `S_OBJNAME`, or `S_OBJNAME_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ObjNameSymbol<'t> { /// Signature. pub signature: u32, /// Path to the object file. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for ObjNameSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = ObjNameSymbol { signature: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// A version number refered to by `CompileFlagsSymbol`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct CompilerVersion { /// The major version number. pub major: u16, /// The minor version number. pub minor: u16, /// The build (patch) version number. pub build: u16, /// The QFE (quick fix engineering) number. pub qfe: Option, } impl<'t> TryFromCtx<'t, bool> for CompilerVersion { type Error = Error; fn try_from_ctx(this: &'t [u8], has_qfe: bool) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let version = Self { major: buf.parse()?, minor: buf.parse()?, build: buf.parse()?, qfe: if has_qfe { Some(buf.parse()?) } else { None }, }; Ok((version, buf.pos())) } } /// Compile flags declared in `CompileFlagsSymbol`. #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct CompileFlags { /// Compiled for edit and continue. pub edit_and_continue: bool, /// Compiled without debugging info. pub no_debug_info: bool, /// Compiled with `LTCG`. pub link_time_codegen: bool, /// Compiled with `/bzalign`. pub no_data_align: bool, /// Managed code or data is present. pub managed: bool, /// Compiled with `/GS`. pub security_checks: bool, /// Compiled with `/hotpatch`. pub hot_patch: bool, /// Compiled with `CvtCIL`. pub cvtcil: bool, /// This is a MSIL .NET Module. pub msil_module: bool, /// Compiled with `/sdl`. pub sdl: bool, /// Compiled with `/ltcg:pgo` or `pgo:`. pub pgo: bool, /// This is a .exp module. pub exp_module: bool, } impl<'t> TryFromCtx<'t, SymbolKind> for CompileFlags { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let is_compile3 = kind == S_COMPILE3; let raw = this.pread_with::(0, LE)?; this.pread::(2)?; // unused let flags = Self { edit_and_continue: raw & 1 != 0, no_debug_info: (raw >> 1) & 1 != 0, link_time_codegen: (raw >> 2) & 1 != 0, no_data_align: (raw >> 3) & 1 != 0, managed: (raw >> 4) & 1 != 0, security_checks: (raw >> 5) & 1 != 0, hot_patch: (raw >> 6) & 1 != 0, cvtcil: (raw >> 7) & 1 != 0, msil_module: (raw >> 8) & 1 != 0, sdl: (raw >> 9) & 1 != 0 && is_compile3, pgo: (raw >> 10) & 1 != 0 && is_compile3, exp_module: (raw >> 11) & 1 != 0 && is_compile3, }; Ok((flags, 3)) } } /// Flags used to compile a module. /// /// Symbol kind `S_COMPILE2`, `S_COMPILE2_ST`, or `S_COMPILE3`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct CompileFlagsSymbol<'t> { /// The source code language. pub language: SourceLanguage, /// Compiler flags. pub flags: CompileFlags, /// Machine type of the compilation target. pub cpu_type: CPUType, /// Version of the compiler frontend. pub frontend_version: CompilerVersion, /// Version of the compiler backend. pub backend_version: CompilerVersion, /// Display name of the compiler. pub version_string: RawString<'t>, // TODO: Command block for S_COMPILE2? } impl<'t> TryFromCtx<'t, SymbolKind> for CompileFlagsSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let has_qfe = kind == S_COMPILE3; let symbol = CompileFlagsSymbol { language: buf.parse()?, flags: buf.parse_with(kind)?, cpu_type: buf.parse()?, frontend_version: buf.parse_with(has_qfe)?, backend_version: buf.parse_with(has_qfe)?, version_string: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// A using namespace directive. /// /// Symbol kind `S_UNAMESPACE`, or `S_UNAMESPACE_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct UsingNamespaceSymbol<'t> { /// The name of the imported namespace. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for UsingNamespaceSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = UsingNamespaceSymbol { name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } // CV_LVARFLAGS: const CV_LVARFLAG_ISPARAM: u16 = 0x01; const CV_LVARFLAG_ADDRTAKEN: u16 = 0x02; const CV_LVARFLAG_COMPGENX: u16 = 0x04; const CV_LVARFLAG_ISAGGREGATE: u16 = 0x08; const CV_LVARFLAG_ISALIASED: u16 = 0x10; const CV_LVARFLAG_ISALIAS: u16 = 0x20; const CV_LVARFLAG_ISRETVALUE: u16 = 0x40; const CV_LVARFLAG_ISOPTIMIZEDOUT: u16 = 0x80; const CV_LVARFLAG_ISENREG_GLOB: u16 = 0x100; const CV_LVARFLAG_ISENREG_STAT: u16 = 0x200; /// Flags for a [`LocalSymbol`]. #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct LocalVariableFlags { /// Variable is a parameter. pub isparam: bool, /// Address is taken. pub addrtaken: bool, /// Variable is compiler generated. pub compgenx: bool, /// The symbol is splitted in temporaries, which are treated by compiler as independent /// entities. pub isaggregate: bool, /// Variable has multiple simultaneous lifetimes. pub isaliased: bool, /// Represents one of the multiple simultaneous lifetimes. pub isalias: bool, /// Represents a function return value. pub isretvalue: bool, /// Variable has no lifetimes. pub isoptimizedout: bool, /// Variable is an enregistered global. pub isenreg_glob: bool, /// Variable is an enregistered static. pub isenreg_stat: bool, } impl<'t> TryFromCtx<'t, Endian> for LocalVariableFlags { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let (value, size) = u16::try_from_ctx(this, le)?; let flags = Self { isparam: value & CV_LVARFLAG_ISPARAM != 0, addrtaken: value & CV_LVARFLAG_ADDRTAKEN != 0, compgenx: value & CV_LVARFLAG_COMPGENX != 0, isaggregate: value & CV_LVARFLAG_ISAGGREGATE != 0, isaliased: value & CV_LVARFLAG_ISALIASED != 0, isalias: value & CV_LVARFLAG_ISALIAS != 0, isretvalue: value & CV_LVARFLAG_ISRETVALUE != 0, isoptimizedout: value & CV_LVARFLAG_ISOPTIMIZEDOUT != 0, isenreg_glob: value & CV_LVARFLAG_ISENREG_GLOB != 0, isenreg_stat: value & CV_LVARFLAG_ISENREG_STAT != 0, }; Ok((flags, size)) } } /// A local symbol in optimized code. /// /// Symbol kind `S_LOCAL`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct LocalSymbol<'t> { /// The type of the symbol. pub type_index: TypeIndex, /// Flags for this symbol. pub flags: LocalVariableFlags, /// Name of the symbol. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for LocalSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = LocalSymbol { type_index: buf.parse()?, flags: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L4456 /// Flags of an [`ExportSymbol`]. #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ExportSymbolFlags { /// An exported constant. pub constant: bool, /// Exported data (e.g. a static variable). pub data: bool, /// A private symbol. pub private: bool, /// A symbol with no name. pub no_name: bool, /// Ordinal was explicitly assigned. pub ordinal: bool, /// This is a forwarder. pub forwarder: bool, } impl<'t> TryFromCtx<'t, Endian> for ExportSymbolFlags { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let (value, size) = u16::try_from_ctx(this, le)?; let flags = Self { constant: value & 0x01 != 0, data: value & 0x02 != 0, private: value & 0x04 != 0, no_name: value & 0x08 != 0, ordinal: value & 0x10 != 0, forwarder: value & 0x20 != 0, }; Ok((flags, size)) } } /// An exported symbol. /// /// Symbol kind `S_EXPORT`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ExportSymbol<'t> { /// Ordinal of the symbol. pub ordinal: u16, /// Flags declaring the type of the exported symbol. pub flags: ExportSymbolFlags, /// The name of the exported symbol. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for ExportSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = ExportSymbol { ordinal: buf.parse()?, flags: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// A label symbol. /// /// Symbol kind `S_LABEL32`, `S_LABEL16`, or `S_LABEL32_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct LabelSymbol<'t> { /// Code offset of the start of this label. pub offset: PdbInternalSectionOffset, /// Detailed flags of this label. pub flags: ProcedureFlags, /// Name of the symbol. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for LabelSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = LabelSymbol { offset: buf.parse()?, flags: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// A block symbol. /// /// Symbol kind `S_BLOCK32`, or `S_BLOCK32_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct BlockSymbol<'t> { /// The parent scope that this block is nested in. pub parent: SymbolIndex, /// The end symbol of this block. pub end: SymbolIndex, /// The length of the block. pub len: u32, /// Code offset of the start of this label. pub offset: PdbInternalSectionOffset, /// The block name. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for BlockSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = BlockSymbol { parent: buf.parse()?, end: buf.parse()?, len: buf.parse()?, offset: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// A register relative symbol. /// /// The address of the variable is the value in the register + offset (e.g. %EBP + 8). /// /// Symbol kind `S_REGREL32`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct RegisterRelativeSymbol<'t> { /// The variable offset. pub offset: i32, /// The type of the variable. pub type_index: TypeIndex, /// The register this variable address is relative to. pub register: Register, /// The variable name. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for RegisterRelativeSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let symbol = RegisterRelativeSymbol { offset: buf.parse()?, type_index: buf.parse()?, register: buf.parse()?, name: parse_symbol_name(&mut buf, kind)?, }; Ok((symbol, buf.pos())) } } /// Thunk adjustor #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ThunkAdjustor<'t> { delta: u16, target: RawString<'t>, } /// A thunk kind #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ThunkKind<'t> { /// Standard thunk NoType, /// "this" adjustor thunk with delta and target Adjustor(ThunkAdjustor<'t>), /// Virtual call thunk with table entry VCall(u16), /// pcode thunk PCode, /// thunk which loads the address to jump to via unknown means... Load, /// Unknown with ordinal value Unknown(u8), } /// A thunk symbol. /// /// Symbol kind `S_THUNK32`, or `S_THUNK32_ST`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct ThunkSymbol<'t> { /// The parent scope that this thunk is nested in. pub parent: Option, /// The end symbol of this thunk. pub end: SymbolIndex, /// The next symbol. pub next: Option, /// Code offset of the start of this label. pub offset: PdbInternalSectionOffset, /// The length of the thunk. pub len: u16, /// The kind of the thunk. pub kind: ThunkKind<'t>, /// The thunk name. pub name: RawString<'t>, } impl<'t> TryFromCtx<'t, SymbolKind> for ThunkSymbol<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], kind: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let parent = parse_optional_index(&mut buf)?; let end = buf.parse()?; let next = parse_optional_index(&mut buf)?; let offset = buf.parse()?; let len = buf.parse()?; let ord = buf.parse::()?; let name = parse_symbol_name(&mut buf, kind)?; let kind = match ord { 0 => ThunkKind::NoType, 1 => ThunkKind::Adjustor(ThunkAdjustor { delta: buf.parse::()?, target: buf.parse_cstring()?, }), 2 => ThunkKind::VCall(buf.parse::()?), 3 => ThunkKind::PCode, 4 => ThunkKind::Load, ord => ThunkKind::Unknown(ord), }; let symbol = ThunkSymbol { parent, end, next, offset, len, kind, name, }; Ok((symbol, buf.pos())) } } // CV_SEPCODEFLAGS: const CV_SEPCODEFLAG_IS_LEXICAL_SCOPE: u32 = 0x01; const CV_SEPCODEFLAG_RETURNS_TO_PARENT: u32 = 0x02; /// Flags for a [`SeparatedCodeSymbol`]. #[non_exhaustive] #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct SeparatedCodeFlags { /// S_SEPCODE doubles as lexical scope. pub islexicalscope: bool, /// code frag returns to parent. pub returnstoparent: bool, } impl<'t> TryFromCtx<'t, Endian> for SeparatedCodeFlags { type Error = scroll::Error; fn try_from_ctx(this: &'t [u8], le: Endian) -> scroll::Result<(Self, usize)> { let (value, size) = u32::try_from_ctx(this, le)?; let flags = Self { islexicalscope: value & CV_SEPCODEFLAG_IS_LEXICAL_SCOPE != 0, returnstoparent: value & CV_SEPCODEFLAG_RETURNS_TO_PARENT != 0, }; Ok((flags, size)) } } /// A separated code symbol. /// /// Symbol kind `S_SEPCODE`. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct SeparatedCodeSymbol { /// The parent scope that this block is nested in. pub parent: SymbolIndex, /// The end symbol of this block. pub end: SymbolIndex, /// The length of the block. pub len: u32, /// Flags for this symbol pub flags: SeparatedCodeFlags, /// Code offset of the start of the separated code. pub offset: PdbInternalSectionOffset, /// Parent offset. pub parent_offset: PdbInternalSectionOffset, } impl<'t> TryFromCtx<'t, SymbolKind> for SeparatedCodeSymbol { type Error = Error; fn try_from_ctx(this: &'t [u8], _: SymbolKind) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let parent = buf.parse()?; let end = buf.parse()?; let len = buf.parse()?; let flags = buf.parse()?; let offset = buf.parse()?; let parent_offset = buf.parse()?; let section = buf.parse()?; let parent_section = buf.parse()?; let symbol = Self { parent, end, len, flags, offset: PdbInternalSectionOffset { offset, section }, parent_offset: PdbInternalSectionOffset { offset: parent_offset, section: parent_section, }, }; Ok((symbol, buf.pos())) } } /// PDB symbol tables contain names, locations, and metadata about functions, global/static data, /// constants, data types, and more. /// /// The `SymbolTable` holds a `SourceView` referencing the symbol table inside the PDB file. All the /// data structures returned by a `SymbolTable` refer to that buffer. /// /// # Example /// /// ``` /// # use pdb::FallibleIterator; /// # /// # fn test() -> pdb::Result { /// let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// let mut pdb = pdb::PDB::open(file)?; /// /// let symbol_table = pdb.global_symbols()?; /// let address_map = pdb.address_map()?; /// /// # let mut count: usize = 0; /// let mut symbols = symbol_table.iter(); /// while let Some(symbol) = symbols.next()? { /// match symbol.parse() { /// Ok(pdb::SymbolData::Public(data)) if data.function => { /// // we found the location of a function! /// let rva = data.offset.to_rva(&address_map).unwrap_or_default(); /// println!("{} is {}", rva, data.name); /// # count += 1; /// } /// _ => {} /// } /// } /// /// # Ok(count) /// # } /// # assert!(test().expect("test") > 2000); /// ``` #[derive(Debug)] pub struct SymbolTable<'s> { stream: Stream<'s>, } impl<'s> SymbolTable<'s> { /// Parses a symbol table from raw stream data. pub(crate) fn new(stream: Stream<'s>) -> Self { SymbolTable { stream } } /// Returns an iterator that can traverse the symbol table in sequential order. pub fn iter(&self) -> SymbolIter<'_> { SymbolIter::new(self.stream.parse_buffer()) } /// Returns an iterator over symbols starting at the given index. pub fn iter_at(&self, index: SymbolIndex) -> SymbolIter<'_> { let mut iter = self.iter(); iter.seek(index); iter } } /// A `SymbolIter` iterates over a `SymbolTable`, producing `Symbol`s. /// /// Symbol tables are represented internally as a series of records, each of which have a length, a /// type, and a type-specific field layout. Iteration performance is therefore similar to a linked /// list. #[derive(Debug)] pub struct SymbolIter<'t> { buf: ParseBuffer<'t>, } impl<'t> SymbolIter<'t> { pub(crate) fn new(buf: ParseBuffer<'t>) -> SymbolIter<'t> { SymbolIter { buf } } /// Move the iterator to the symbol referred to by `index`. /// /// This can be used to jump to the sibiling or parent of a symbol record. pub fn seek(&mut self, index: SymbolIndex) { self.buf.seek(index.0 as usize); } /// Skip to the symbol referred to by `index`, returning the symbol. /// /// This can be used to jump to the sibiling or parent of a symbol record. Iteration continues /// after that symbol. /// /// Note that the symbol may be located **before** the originating symbol, for instance when /// jumping to the parent symbol. Take care not to enter an endless loop in this case. pub fn skip_to(&mut self, index: SymbolIndex) -> Result>> { self.seek(index); self.next() } } impl<'t> FallibleIterator for SymbolIter<'t> { type Item = Symbol<'t>; type Error = Error; fn next(&mut self) -> Result> { while !self.buf.is_empty() { let index = SymbolIndex(self.buf.pos() as u32); // read the length of the next symbol let symbol_length = self.buf.parse::()? as usize; if symbol_length < 2 { // this can't be correct return Err(Error::SymbolTooShort); } // grab the symbol itself let data = self.buf.take(symbol_length)?; let symbol = Symbol { index, data }; // skip over padding in the symbol table match symbol.raw_kind() { S_ALIGN | S_SKIP => continue, _ => return Ok(Some(symbol)), } } Ok(None) } } #[cfg(test)] mod tests { mod parsing { use crate::symbol::*; #[test] fn kind_0006() { let data = &[6, 0]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x0006); assert_eq!(symbol.parse().expect("parse"), SymbolData::ScopeEnd); } #[test] fn kind_1101() { let data = &[1, 17, 0, 0, 0, 0, 42, 32, 67, 73, 76, 32, 42, 0]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1101); assert_eq!( symbol.parse().expect("parse"), SymbolData::ObjName(ObjNameSymbol { signature: 0, name: "* CIL *".into(), }) ); } #[test] fn kind_1102() { let data = &[ 2, 17, 0, 0, 0, 0, 108, 22, 0, 0, 0, 0, 0, 0, 140, 11, 0, 0, 1, 0, 9, 0, 3, 91, 116, 104, 117, 110, 107, 93, 58, 68, 101, 114, 105, 118, 101, 100, 58, 58, 70, 117, 110, 99, 49, 96, 97, 100, 106, 117, 115, 116, 111, 114, 123, 56, 125, 39, 0, 0, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1102); assert_eq!( symbol.parse().expect("parse"), SymbolData::Thunk(ThunkSymbol { parent: None, end: SymbolIndex(0x166c), next: None, offset: PdbInternalSectionOffset { section: 0x1, offset: 0xb8c }, len: 9, kind: ThunkKind::PCode, name: "[thunk]:Derived::Func1`adjustor{8}'".into() }) ); } #[test] fn kind_1105() { let data = &[ 5, 17, 224, 95, 151, 0, 1, 0, 0, 100, 97, 118, 49, 100, 95, 119, 95, 97, 118, 103, 95, 115, 115, 115, 101, 51, 0, 0, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1105); assert_eq!( symbol.parse().expect("parse"), SymbolData::Label(LabelSymbol { offset: PdbInternalSectionOffset { offset: 0x0097_5fe0, section: 1 }, flags: ProcedureFlags { nofpo: false, int: false, far: false, never: false, notreached: false, cust_call: false, noinline: false, optdbginfo: false }, name: "dav1d_w_avg_ssse3".into(), }) ); } #[test] fn kind_1106() { let data = &[6, 17, 120, 34, 0, 0, 18, 0, 116, 104, 105, 115, 0, 0]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1106); assert_eq!( symbol.parse().expect("parse"), SymbolData::RegisterVariable(RegisterVariableSymbol { type_index: TypeIndex(8824), register: Register(18), name: "this".into(), }) ); } #[test] fn kind_110e() { let data = &[ 14, 17, 2, 0, 0, 0, 192, 85, 0, 0, 1, 0, 95, 95, 108, 111, 99, 97, 108, 95, 115, 116, 100, 105, 111, 95, 112, 114, 105, 110, 116, 102, 95, 111, 112, 116, 105, 111, 110, 115, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x110e); assert_eq!( symbol.parse().expect("parse"), SymbolData::Public(PublicSymbol { code: false, function: true, managed: false, msil: false, offset: PdbInternalSectionOffset { offset: 21952, section: 1 }, name: "__local_stdio_printf_options".into(), }) ); } #[test] fn kind_1111() { let data = &[ 17, 17, 12, 0, 0, 0, 48, 16, 0, 0, 22, 0, 109, 97, 120, 105, 109, 117, 109, 95, 99, 111, 117, 110, 116, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1111); assert_eq!( symbol.parse().expect("parse"), SymbolData::RegisterRelative(RegisterRelativeSymbol { offset: 12, type_index: TypeIndex(0x1030), register: Register(22), name: "maximum_count".into(), }) ); } #[test] fn kind_1124() { let data = &[36, 17, 115, 116, 100, 0]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1124); assert_eq!( symbol.parse().expect("parse"), SymbolData::UsingNamespace(UsingNamespaceSymbol { name: "std".into() }) ); } #[test] fn kind_1125() { let data = &[ 37, 17, 0, 0, 0, 0, 108, 0, 0, 0, 1, 0, 66, 97, 122, 58, 58, 102, 95, 112, 117, 98, 108, 105, 99, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1125); assert_eq!( symbol.parse().expect("parse"), SymbolData::ProcedureReference(ProcedureReferenceSymbol { global: true, sum_name: 0, symbol_index: SymbolIndex(108), module: Some(0), name: Some("Baz::f_public".into()), }) ); } #[test] fn kind_1108() { let data = &[8, 17, 112, 6, 0, 0, 118, 97, 95, 108, 105, 115, 116, 0]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1108); assert_eq!( symbol.parse().expect("parse"), SymbolData::UserDefinedType(UserDefinedTypeSymbol { type_index: TypeIndex(1648), name: "va_list".into(), }) ); } #[test] fn kind_1107() { let data = &[ 7, 17, 201, 18, 0, 0, 1, 0, 95, 95, 73, 83, 65, 95, 65, 86, 65, 73, 76, 65, 66, 76, 69, 95, 83, 83, 69, 50, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1107); assert_eq!( symbol.parse().expect("parse"), SymbolData::Constant(ConstantSymbol { managed: false, type_index: TypeIndex(4809), value: Variant::U16(1), name: "__ISA_AVAILABLE_SSE2".into(), }) ); } #[test] fn kind_110d() { let data = &[ 13, 17, 116, 0, 0, 0, 16, 0, 0, 0, 3, 0, 95, 95, 105, 115, 97, 95, 97, 118, 97, 105, 108, 97, 98, 108, 101, 0, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x110d); assert_eq!( symbol.parse().expect("parse"), SymbolData::Data(DataSymbol { global: true, managed: false, type_index: TypeIndex(116), offset: PdbInternalSectionOffset { offset: 16, section: 3 }, name: "__isa_available".into(), }) ); } #[test] fn kind_110c() { let data = &[ 12, 17, 32, 0, 0, 0, 240, 36, 1, 0, 2, 0, 36, 120, 100, 97, 116, 97, 115, 121, 109, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x110c); assert_eq!( symbol.parse().expect("parse"), SymbolData::Data(DataSymbol { global: false, managed: false, type_index: TypeIndex(32), offset: PdbInternalSectionOffset { offset: 74992, section: 2 }, name: "$xdatasym".into(), }) ); } #[test] fn kind_1127() { let data = &[ 39, 17, 0, 0, 0, 0, 128, 4, 0, 0, 182, 0, 99, 97, 112, 116, 117, 114, 101, 95, 99, 117, 114, 114, 101, 110, 116, 95, 99, 111, 110, 116, 101, 120, 116, 0, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1127); assert_eq!( symbol.parse().expect("parse"), SymbolData::ProcedureReference(ProcedureReferenceSymbol { global: false, sum_name: 0, symbol_index: SymbolIndex(1152), module: Some(181), name: Some("capture_current_context".into()), }) ); } #[test] fn kind_112c() { let data = &[44, 17, 0, 0, 5, 0, 5, 0, 0, 0, 32, 124, 0, 0, 2, 0, 2, 0]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x112c); assert_eq!( symbol.parse().expect("parse"), SymbolData::Trampoline(TrampolineSymbol { tramp_type: TrampolineType::Incremental, size: 0x5, thunk: PdbInternalSectionOffset { offset: 0x5, section: 0x2 }, target: PdbInternalSectionOffset { offset: 0x7c20, section: 0x2 }, }) ); } #[test] fn kind_1110() { let data = &[ 16, 17, 0, 0, 0, 0, 48, 2, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 7, 16, 0, 0, 64, 85, 0, 0, 1, 0, 0, 66, 97, 122, 58, 58, 102, 95, 112, 114, 111, 116, 101, 99, 116, 101, 100, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1110); assert_eq!( symbol.parse().expect("parse"), SymbolData::Procedure(ProcedureSymbol { global: true, dpc: false, parent: None, end: SymbolIndex(560), next: None, len: 6, dbg_start_offset: 5, dbg_end_offset: 5, type_index: TypeIndex(4103), offset: PdbInternalSectionOffset { offset: 21824, section: 1 }, flags: ProcedureFlags { nofpo: false, int: false, far: false, never: false, notreached: false, cust_call: false, noinline: false, optdbginfo: false }, name: "Baz::f_protected".into(), }) ); } #[test] fn kind_1103() { let data = &[ 3, 17, 244, 149, 9, 0, 40, 151, 9, 0, 135, 1, 0, 0, 108, 191, 184, 2, 1, 0, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1103); assert_eq!( symbol.parse().expect("parse"), SymbolData::Block(BlockSymbol { parent: SymbolIndex(0x0009_95f4), end: SymbolIndex(0x0009_9728), len: 391, offset: PdbInternalSectionOffset { section: 0x1, offset: 0x02b8_bf6c }, name: "".into(), }) ); } #[test] fn kind_110f() { let data = &[ 15, 17, 0, 0, 0, 0, 156, 1, 0, 0, 0, 0, 0, 0, 18, 0, 0, 0, 4, 0, 0, 0, 9, 0, 0, 0, 128, 16, 0, 0, 196, 87, 0, 0, 1, 0, 128, 95, 95, 115, 99, 114, 116, 95, 99, 111, 109, 109, 111, 110, 95, 109, 97, 105, 110, 0, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x110f); assert_eq!( symbol.parse().expect("parse"), SymbolData::Procedure(ProcedureSymbol { global: false, dpc: false, parent: None, end: SymbolIndex(412), next: None, len: 18, dbg_start_offset: 4, dbg_end_offset: 9, type_index: TypeIndex(4224), offset: PdbInternalSectionOffset { offset: 22468, section: 1 }, flags: ProcedureFlags { nofpo: false, int: false, far: false, never: false, notreached: false, cust_call: false, noinline: false, optdbginfo: true }, name: "__scrt_common_main".into(), }) ); } #[test] fn kind_1116() { let data = &[ 22, 17, 7, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 14, 0, 10, 0, 115, 98, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 76, 73, 78, 75, 0, 0, 0, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1116); assert_eq!( symbol.parse().expect("parse"), SymbolData::CompileFlags(CompileFlagsSymbol { language: SourceLanguage::Link, flags: CompileFlags { edit_and_continue: false, no_debug_info: false, link_time_codegen: false, no_data_align: false, managed: false, security_checks: false, hot_patch: false, cvtcil: false, msil_module: false, sdl: false, pgo: false, exp_module: false, }, cpu_type: CPUType::Intel80386, frontend_version: CompilerVersion { major: 0, minor: 0, build: 0, qfe: None, }, backend_version: CompilerVersion { major: 14, minor: 10, build: 25203, qfe: None, }, version_string: "Microsoft (R) LINK".into(), }) ); } #[test] fn kind_1132() { let data = &[ 50, 17, 0, 0, 0, 0, 108, 0, 0, 0, 88, 0, 0, 0, 0, 0, 0, 0, 196, 252, 10, 0, 56, 67, 0, 0, 1, 0, 1, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x1132); assert_eq!( symbol.parse().expect("parse"), SymbolData::SeparatedCode(SeparatedCodeSymbol { parent: SymbolIndex(0x0), end: SymbolIndex(0x6c), len: 88, flags: SeparatedCodeFlags { islexicalscope: false, returnstoparent: false }, offset: PdbInternalSectionOffset { section: 0x1, offset: 0xafcc4 }, parent_offset: PdbInternalSectionOffset { section: 0x1, offset: 0x4338 } }) ); } #[test] fn kind_113c() { let data = &[ 60, 17, 1, 36, 2, 0, 7, 0, 19, 0, 13, 0, 6, 102, 0, 0, 19, 0, 13, 0, 6, 102, 0, 0, 77, 105, 99, 114, 111, 115, 111, 102, 116, 32, 40, 82, 41, 32, 79, 112, 116, 105, 109, 105, 122, 105, 110, 103, 32, 67, 111, 109, 112, 105, 108, 101, 114, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x113c); assert_eq!( symbol.parse().expect("parse"), SymbolData::CompileFlags(CompileFlagsSymbol { language: SourceLanguage::Cpp, flags: CompileFlags { edit_and_continue: false, no_debug_info: false, link_time_codegen: true, no_data_align: false, managed: false, security_checks: true, hot_patch: false, cvtcil: false, msil_module: false, sdl: true, pgo: false, exp_module: false, }, cpu_type: CPUType::Pentium3, frontend_version: CompilerVersion { major: 19, minor: 13, build: 26118, qfe: Some(0), }, backend_version: CompilerVersion { major: 19, minor: 13, build: 26118, qfe: Some(0), }, version_string: "Microsoft (R) Optimizing Compiler".into(), }) ); } #[test] fn kind_113e() { let data = &[62, 17, 193, 19, 0, 0, 1, 0, 116, 104, 105, 115, 0, 0]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x113e); assert_eq!( symbol.parse().expect("parse"), SymbolData::Local(LocalSymbol { type_index: TypeIndex(5057), flags: LocalVariableFlags { isparam: true, addrtaken: false, compgenx: false, isaggregate: false, isaliased: false, isalias: false, isretvalue: false, isoptimizedout: false, isenreg_glob: false, isenreg_stat: false, }, name: "this".into(), }) ); } #[test] fn kind_114c() { let data = &[76, 17, 95, 17, 0, 0]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x114c); assert_eq!( symbol.parse().expect("parse"), SymbolData::BuildInfo(BuildInfoSymbol { id: IdIndex(0x115F) }) ); } #[test] fn kind_114d() { let data = &[ 77, 17, 144, 1, 0, 0, 208, 1, 0, 0, 121, 17, 0, 0, 12, 6, 3, 0, ]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x114d); assert_eq!( symbol.parse().expect("parse"), SymbolData::InlineSite(InlineSiteSymbol { parent: Some(SymbolIndex(0x0190)), end: SymbolIndex(0x01d0), inlinee: IdIndex(4473), invocations: None, annotations: BinaryAnnotations::new(&[12, 6, 3, 0]), }) ); } #[test] fn kind_114e() { let data = &[78, 17]; let symbol = Symbol { data, index: SymbolIndex(0), }; assert_eq!(symbol.raw_kind(), 0x114e); assert_eq!(symbol.parse().expect("parse"), SymbolData::InlineSiteEnd); } } mod iterator { use crate::symbol::*; fn create_iter() -> SymbolIter<'static> { let data = &[ 0x00, 0x00, 0x00, 0x00, // module signature (padding) 0x02, 0x00, 0x4e, 0x11, // S_INLINESITE_END 0x02, 0x00, 0x06, 0x00, // S_END ]; let mut buf = ParseBuffer::from(&data[..]); buf.seek(4); // skip the module signature SymbolIter::new(buf) } #[test] fn test_iter() { let symbols: Vec<_> = create_iter().collect().expect("collect"); let expected = [ Symbol { index: SymbolIndex(0x4), data: &[0x4e, 0x11], // S_INLINESITE_END }, Symbol { index: SymbolIndex(0x8), data: &[0x06, 0x00], // S_END }, ]; assert_eq!(symbols, expected); } #[test] fn test_seek() { let mut symbols = create_iter(); symbols.seek(SymbolIndex(0x8)); let symbol = symbols.next().expect("get symbol"); let expected = Symbol { index: SymbolIndex(0x8), data: &[0x06, 0x00], // S_END }; assert_eq!(symbol, Some(expected)); } #[test] fn test_skip_to() { let mut symbols = create_iter(); let symbol = symbols.skip_to(SymbolIndex(0x8)).expect("get symbol"); let expected = Symbol { index: SymbolIndex(0x8), data: &[0x06, 0x00], // S_END }; assert_eq!(symbol, Some(expected)); } } } pdb-0.8.0/src/tpi/constants.rs000064400000000000000000000204760072674642500144040ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. #![allow(unused, non_upper_case_globals)] // TODO: special types // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L328 // A list of known type kinds: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L772 // leaf indices starting records but referenced from symbol records pub const LF_MODIFIER_16t: u16 = 0x0001; pub const LF_POINTER_16t: u16 = 0x0002; pub const LF_ARRAY_16t: u16 = 0x0003; pub const LF_CLASS_16t: u16 = 0x0004; pub const LF_STRUCTURE_16t: u16 = 0x0005; pub const LF_UNION_16t: u16 = 0x0006; pub const LF_ENUM_16t: u16 = 0x0007; pub const LF_PROCEDURE_16t: u16 = 0x0008; pub const LF_MFUNCTION_16t: u16 = 0x0009; pub const LF_VTSHAPE: u16 = 0x000a; pub const LF_COBOL0_16t: u16 = 0x000b; pub const LF_COBOL1: u16 = 0x000c; pub const LF_BARRAY_16t: u16 = 0x000d; pub const LF_LABEL: u16 = 0x000e; pub const LF_NULL: u16 = 0x000f; pub const LF_NOTTRAN: u16 = 0x0010; pub const LF_DIMARRAY_16t: u16 = 0x0011; pub const LF_VFTPATH_16t: u16 = 0x0012; pub const LF_PRECOMP_16t: u16 = 0x0013; // not referenced from symbol pub const LF_ENDPRECOMP: u16 = 0x0014; // not referenced from symbol pub const LF_OEM_16t: u16 = 0x0015; // oem definable type string pub const LF_TYPESERVER_ST: u16 = 0x0016; // not referenced from symbol // leaf indices starting records but referenced only from type records pub const LF_SKIP_16t: u16 = 0x0200; pub const LF_ARGLIST_16t: u16 = 0x0201; pub const LF_DEFARG_16t: u16 = 0x0202; pub const LF_LIST: u16 = 0x0203; pub const LF_FIELDLIST_16t: u16 = 0x0204; pub const LF_DERIVED_16t: u16 = 0x0205; pub const LF_BITFIELD_16t: u16 = 0x0206; pub const LF_METHODLIST_16t: u16 = 0x0207; pub const LF_DIMCONU_16t: u16 = 0x0208; pub const LF_DIMCONLU_16t: u16 = 0x0209; pub const LF_DIMVARU_16t: u16 = 0x020a; pub const LF_DIMVARLU_16t: u16 = 0x020b; pub const LF_REFSYM: u16 = 0x020c; pub const LF_BCLASS_16t: u16 = 0x0400; pub const LF_VBCLASS_16t: u16 = 0x0401; pub const LF_IVBCLASS_16t: u16 = 0x0402; pub const LF_ENUMERATE_ST: u16 = 0x0403; pub const LF_FRIENDFCN_16t: u16 = 0x0404; pub const LF_INDEX_16t: u16 = 0x0405; pub const LF_MEMBER_16t: u16 = 0x0406; pub const LF_STMEMBER_16t: u16 = 0x0407; pub const LF_METHOD_16t: u16 = 0x0408; pub const LF_NESTTYPE_16t: u16 = 0x0409; pub const LF_VFUNCTAB_16t: u16 = 0x040a; pub const LF_FRIENDCLS_16t: u16 = 0x040b; pub const LF_ONEMETHOD_16t: u16 = 0x040c; pub const LF_VFUNCOFF_16t: u16 = 0x040d; // 32-bit type index versions of leaves all have the 0x1000 bit set // pub const LF_TI16_MAX: u16 = 0x1000; pub const LF_MODIFIER: u16 = 0x1001; pub const LF_POINTER: u16 = 0x1002; pub const LF_ARRAY_ST: u16 = 0x1003; pub const LF_CLASS_ST: u16 = 0x1004; pub const LF_STRUCTURE_ST: u16 = 0x1005; pub const LF_UNION_ST: u16 = 0x1006; pub const LF_ENUM_ST: u16 = 0x1007; pub const LF_PROCEDURE: u16 = 0x1008; pub const LF_MFUNCTION: u16 = 0x1009; pub const LF_COBOL0: u16 = 0x100a; pub const LF_BARRAY: u16 = 0x100b; pub const LF_DIMARRAY_ST: u16 = 0x100c; pub const LF_VFTPATH: u16 = 0x100d; pub const LF_PRECOMP_ST: u16 = 0x100e; // not referenced from symbol pub const LF_OEM: u16 = 0x100f; // oem definable type string pub const LF_ALIAS_ST: u16 = 0x1010; // alias (typedef) type pub const LF_OEM2: u16 = 0x1011; // oem definable type string // leaf indices starting records but referenced only from type records pub const LF_SKIP: u16 = 0x1200; pub const LF_ARGLIST: u16 = 0x1201; pub const LF_DEFARG_ST: u16 = 0x1202; pub const LF_FIELDLIST: u16 = 0x1203; pub const LF_DERIVED: u16 = 0x1204; pub const LF_BITFIELD: u16 = 0x1205; pub const LF_METHODLIST: u16 = 0x1206; pub const LF_DIMCONU: u16 = 0x1207; pub const LF_DIMCONLU: u16 = 0x1208; pub const LF_DIMVARU: u16 = 0x1209; pub const LF_DIMVARLU: u16 = 0x120a; pub const LF_BCLASS: u16 = 0x1400; pub const LF_VBCLASS: u16 = 0x1401; pub const LF_IVBCLASS: u16 = 0x1402; pub const LF_FRIENDFCN_ST: u16 = 0x1403; pub const LF_INDEX: u16 = 0x1404; pub const LF_MEMBER_ST: u16 = 0x1405; pub const LF_STMEMBER_ST: u16 = 0x1406; pub const LF_METHOD_ST: u16 = 0x1407; pub const LF_NESTTYPE_ST: u16 = 0x1408; pub const LF_VFUNCTAB: u16 = 0x1409; pub const LF_FRIENDCLS: u16 = 0x140a; pub const LF_ONEMETHOD_ST: u16 = 0x140b; pub const LF_VFUNCOFF: u16 = 0x140c; pub const LF_NESTTYPEEX_ST: u16 = 0x140d; pub const LF_MEMBERMODIFY_ST: u16 = 0x140e; pub const LF_MANAGED_ST: u16 = 0x140f; // Types w/ SZ names pub const LF_ST_MAX: u16 = 0x1500; pub const LF_TYPESERVER: u16 = 0x1501; // not referenced from symbol pub const LF_ENUMERATE: u16 = 0x1502; pub const LF_ARRAY: u16 = 0x1503; pub const LF_CLASS: u16 = 0x1504; pub const LF_STRUCTURE: u16 = 0x1505; pub const LF_UNION: u16 = 0x1506; pub const LF_ENUM: u16 = 0x1507; pub const LF_DIMARRAY: u16 = 0x1508; pub const LF_PRECOMP: u16 = 0x1509; // not referenced from symbol pub const LF_ALIAS: u16 = 0x150a; // alias (typedef) type pub const LF_DEFARG: u16 = 0x150b; pub const LF_FRIENDFCN: u16 = 0x150c; pub const LF_MEMBER: u16 = 0x150d; pub const LF_STMEMBER: u16 = 0x150e; pub const LF_METHOD: u16 = 0x150f; pub const LF_NESTTYPE: u16 = 0x1510; pub const LF_ONEMETHOD: u16 = 0x1511; pub const LF_NESTTYPEEX: u16 = 0x1512; pub const LF_MEMBERMODIFY: u16 = 0x1513; pub const LF_MANAGED: u16 = 0x1514; pub const LF_TYPESERVER2: u16 = 0x1515; pub const LF_STRIDED_ARRAY: u16 = 0x1516; // same as LF_ARRAY but with stride between adjacent elements pub const LF_HLSL: u16 = 0x1517; pub const LF_MODIFIER_EX: u16 = 0x1518; pub const LF_INTERFACE: u16 = 0x1519; pub const LF_BINTERFACE: u16 = 0x151a; pub const LF_VECTOR: u16 = 0x151b; pub const LF_MATRIX: u16 = 0x151c; pub const LF_VFTABLE: u16 = 0x151d; // a virtual function table pub const LF_ENDOFLEAFRECORD: u16 = LF_VFTABLE; pub const LF_TYPE_LAST: u16 = LF_ENDOFLEAFRECORD + 1; // one greater than the last type record pub const LF_TYPE_MAX: u16 = LF_TYPE_LAST - 1; pub const LF_FUNC_ID: u16 = 0x1601; // global func ID pub const LF_MFUNC_ID: u16 = 0x1602; // member func ID pub const LF_BUILDINFO: u16 = 0x1603; // build info: tool version command line src/pdb file pub const LF_SUBSTR_LIST: u16 = 0x1604; // similar to LF_ARGLIST for list of sub strings pub const LF_STRING_ID: u16 = 0x1605; // string ID pub const LF_UDT_SRC_LINE: u16 = 0x1606; // source and line on where an UDT is defined // only generated by compiler pub const LF_UDT_MOD_SRC_LINE: u16 = 0x1607; // module source and line on where an UDT is defined // only generated by linker pub const LF_STRUCTURE19: u16 = 0x1609; pub const LF_ID_LAST: u16 = LF_UDT_MOD_SRC_LINE + 1; // one greater than the last ID record pub const LF_ID_MAX: u16 = LF_ID_LAST - 1; pub const LF_NUMERIC: u16 = 0x8000; pub const LF_CHAR: u16 = 0x8000; pub const LF_SHORT: u16 = 0x8001; pub const LF_USHORT: u16 = 0x8002; pub const LF_LONG: u16 = 0x8003; pub const LF_ULONG: u16 = 0x8004; pub const LF_REAL32: u16 = 0x8005; pub const LF_REAL64: u16 = 0x8006; pub const LF_REAL80: u16 = 0x8007; pub const LF_REAL128: u16 = 0x8008; pub const LF_QUADWORD: u16 = 0x8009; pub const LF_UQUADWORD: u16 = 0x800a; pub const LF_REAL48: u16 = 0x800b; pub const LF_COMPLEX32: u16 = 0x800c; pub const LF_COMPLEX64: u16 = 0x800d; pub const LF_COMPLEX80: u16 = 0x800e; pub const LF_COMPLEX128: u16 = 0x800f; pub const LF_VARSTRING: u16 = 0x8010; pub const LF_OCTWORD: u16 = 0x8017; pub const LF_UOCTWORD: u16 = 0x8018; pub const LF_DECIMAL: u16 = 0x8019; pub const LF_DATE: u16 = 0x801a; pub const LF_UTF8STRING: u16 = 0x801b; pub const LF_REAL16: u16 = 0x801c; pub const LF_PAD0: u16 = 0xf0; pub const LF_PAD1: u16 = 0xf1; pub const LF_PAD2: u16 = 0xf2; pub const LF_PAD3: u16 = 0xf3; pub const LF_PAD4: u16 = 0xf4; pub const LF_PAD5: u16 = 0xf5; pub const LF_PAD6: u16 = 0xf6; pub const LF_PAD7: u16 = 0xf7; pub const LF_PAD8: u16 = 0xf8; pub const LF_PAD9: u16 = 0xf9; pub const LF_PAD10: u16 = 0xfa; pub const LF_PAD11: u16 = 0xfb; pub const LF_PAD12: u16 = 0xfc; pub const LF_PAD13: u16 = 0xfd; pub const LF_PAD14: u16 = 0xfe; pub const LF_PAD15: u16 = 0xff; pdb-0.8.0/src/tpi/data.rs000064400000000000000000001127270072674642500133020ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. #![allow(missing_docs)] use crate::common::*; use crate::tpi::constants::*; use crate::tpi::primitive::*; /// Encapsulates parsed data about a `Type`. #[non_exhaustive] #[derive(Debug, Clone, PartialEq, Eq)] pub enum TypeData<'t> { Primitive(PrimitiveType), Class(ClassType<'t>), Member(MemberType<'t>), MemberFunction(MemberFunctionType), OverloadedMethod(OverloadedMethodType<'t>), Method(MethodType<'t>), StaticMember(StaticMemberType<'t>), Nested(NestedType<'t>), BaseClass(BaseClassType), VirtualBaseClass(VirtualBaseClassType), VirtualFunctionTablePointer(VirtualFunctionTablePointerType), Procedure(ProcedureType), Pointer(PointerType), Modifier(ModifierType), Enumeration(EnumerationType<'t>), Enumerate(EnumerateType<'t>), Array(ArrayType), Union(UnionType<'t>), Bitfield(BitfieldType), FieldList(FieldList<'t>), ArgumentList(ArgumentList), MethodList(MethodList), } impl<'t> TypeData<'t> { /// Return the name of this TypeData, if any pub fn name(&self) -> Option> { let name = match self { Self::Class(ClassType { ref name, .. }) | Self::Member(MemberType { ref name, .. }) | Self::OverloadedMethod(OverloadedMethodType { ref name, .. }) | Self::StaticMember(StaticMemberType { ref name, .. }) | Self::Nested(NestedType { ref name, .. }) | Self::Enumeration(EnumerationType { ref name, .. }) | Self::Enumerate(EnumerateType { ref name, .. }) | Self::Union(UnionType { ref name, .. }) => name, _ => return None, }; Some(*name) } } /// Parse a type out of a `ParseBuffer`. pub(crate) fn parse_type_data<'t>(buf: &mut ParseBuffer<'t>) -> Result> { let leaf = buf.parse_u16()?; match leaf { // Basic types // ----------- // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1631-L1642 LF_CLASS | LF_CLASS_ST | LF_STRUCTURE | LF_STRUCTURE_ST | LF_INTERFACE => { let mut class = ClassType { kind: match leaf { LF_CLASS | LF_CLASS_ST => ClassKind::Class, LF_STRUCTURE | LF_STRUCTURE_ST => ClassKind::Struct, LF_INTERFACE => ClassKind::Interface, _ => unreachable!(), }, count: buf.parse_u16()?, properties: TypeProperties(buf.parse_u16()?), fields: parse_optional_type_index(buf)?, derived_from: parse_optional_type_index(buf)?, vtable_shape: parse_optional_type_index(buf)?, size: parse_unsigned(buf)?, name: parse_string(leaf, buf)?, unique_name: None, }; if class.properties.has_unique_name() { class.unique_name = Some(parse_string(leaf, buf)?); } Ok(TypeData::Class(class)) } // https://github.com/microsoft/microsoft-pdb/issues/50#issuecomment-737890766 LF_STRUCTURE19 => { let mut class = ClassType { kind: ClassKind::Struct, properties: TypeProperties(buf.parse_u32()? as u16), fields: parse_optional_type_index(buf)?, derived_from: parse_optional_type_index(buf)?, vtable_shape: parse_optional_type_index(buf)?, count: buf.parse_u16()?, size: parse_unsigned(buf)?, name: parse_string(leaf, buf)?, unique_name: None, }; if class.properties.has_unique_name() { class.unique_name = Some(parse_string(leaf, buf)?); } Ok(TypeData::Class(class)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2580-L2586 LF_MEMBER | LF_MEMBER_ST => Ok(TypeData::Member(MemberType { attributes: FieldAttributes(buf.parse_u16()?), field_type: buf.parse()?, offset: parse_unsigned(buf)?, name: parse_string(leaf, buf)?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2699-L2714 LF_NESTTYPE | LF_NESTTYPE_ST | LF_NESTTYPEEX | LF_NESTTYPEEX_ST => { // These structs differ in their use of the first 16 bits let raw_attr = match leaf { LF_NESTTYPEEX | LF_NESTTYPEEX_ST => buf.parse_u16()?, _ => { // discard padding buf.parse_u16()?; // assume zero 0 } }; Ok(TypeData::Nested(NestedType { attributes: FieldAttributes(raw_attr), nested_type: buf.parse()?, name: parse_string(leaf, buf)?, })) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1801-L1811 LF_MFUNCTION => Ok(TypeData::MemberFunction(MemberFunctionType { return_type: buf.parse()?, class_type: buf.parse()?, this_pointer_type: parse_optional_type_index(buf)?, attributes: FunctionAttributes(buf.parse_u16()?), parameter_count: buf.parse_u16()?, argument_list: buf.parse()?, this_adjustment: buf.parse_u32()?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2650-L2655 LF_METHOD | LF_METHOD_ST => Ok(TypeData::OverloadedMethod(OverloadedMethodType { count: buf.parse_u16()?, method_list: buf.parse()?, name: parse_string(leaf, buf)?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2671-L2678 LF_ONEMETHOD | LF_ONEMETHOD_ST => { let attr = FieldAttributes(buf.parse_u16()?); Ok(TypeData::Method(MethodType { attributes: attr, method_type: buf.parse()?, vtable_offset: if attr.is_intro_virtual() { Some(buf.parse_u32()? as u32) } else { // yes, this is variable length None }, name: parse_string(leaf, buf)?, })) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2499-L2505 LF_BCLASS | LF_BINTERFACE => Ok(TypeData::BaseClass(BaseClassType { kind: match leaf { LF_BCLASS => ClassKind::Class, LF_BINTERFACE => ClassKind::Interface, _ => unreachable!(), }, attributes: FieldAttributes(buf.parse_u16()?), base_class: buf.parse()?, offset: parse_unsigned(buf)? as u32, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2615-L2619 LF_VFUNCTAB => { // padding is supposed to be zero always, but… let's not check buf.parse_u16()?; Ok(TypeData::VirtualFunctionTablePointer( VirtualFunctionTablePointerType { table: buf.parse()?, }, )) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2599-L2604 LF_STMEMBER | LF_STMEMBER_ST => Ok(TypeData::StaticMember(StaticMemberType { attributes: FieldAttributes(buf.parse_u16()?), field_type: buf.parse()?, name: parse_string(leaf, buf)?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1469-L1506 LF_POINTER => { let underlying_type = buf.parse()?; let attributes = PointerAttributes(buf.parse()?); let containing_class = if attributes.pointer_to_member() { Some(buf.parse()?) } else { None }; Ok(TypeData::Pointer(PointerType { underlying_type, attributes, containing_class, })) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1775-L1782 LF_PROCEDURE => Ok(TypeData::Procedure(ProcedureType { return_type: parse_optional_type_index(buf)?, attributes: FunctionAttributes(buf.parse_u16()?), parameter_count: buf.parse_u16()?, argument_list: buf.parse()?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1460-L1464 LF_MODIFIER => { let type_index = buf.parse()?; // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1090-L1095 let flags = buf.parse_u16()?; Ok(TypeData::Modifier(ModifierType { underlying_type: type_index, constant: (flags & 0x01) != 0, volatile: (flags & 0x02) != 0, unaligned: (flags & 0x04) != 0, })) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1752-L1759 LF_ENUM | LF_ENUM_ST => { let mut enumeration = EnumerationType { count: buf.parse_u16()?, properties: TypeProperties(buf.parse_u16()?), underlying_type: buf.parse()?, fields: buf.parse()?, name: parse_string(leaf, buf)?, unique_name: None, }; if enumeration.properties.has_unique_name() { enumeration.unique_name = Some(parse_string(leaf, buf)?); } Ok(TypeData::Enumeration(enumeration)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2683-L2688 LF_ENUMERATE | LF_ENUMERATE_ST => Ok(TypeData::Enumerate(EnumerateType { attributes: FieldAttributes(buf.parse_u16()?), value: buf.parse()?, name: parse_string(leaf, buf)?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1564-L1579 LF_ARRAY | LF_ARRAY_ST | LF_STRIDED_ARRAY => { let element_type = buf.parse()?; let indexing_type = buf.parse()?; let stride: Option = if leaf == LF_STRIDED_ARRAY { Some(buf.parse_u32()?) } else { None }; let mut dimensions: Vec = Vec::new(); loop { let dim = parse_unsigned(buf)?; if dim > u64::from(u32::max_value()) { return Err(Error::UnimplementedFeature("u64 array sizes")); } dimensions.push(dim as u32); if buf.is_empty() { // shouldn't run out here return Err(Error::UnexpectedEof); } if buf.peek_u8()? == 0x00 { // end of dimensions buf.parse_u8()?; break; } } // eat any padding parse_padding(buf)?; //println!("array: {:x}", buf); //println!("dimensions: {:?}", dimensions); assert!(buf.is_empty()); Ok(TypeData::Array(ArrayType { element_type, indexing_type, stride, dimensions, })) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1657-L1664 LF_UNION | LF_UNION_ST => { let mut union = UnionType { count: buf.parse_u16()?, properties: TypeProperties(buf.parse_u16()?), fields: buf.parse()?, size: parse_unsigned(buf)?, name: parse_string(leaf, buf)?, unique_name: None, }; if union.properties.has_unique_name() { union.unique_name = Some(parse_string(leaf, buf)?); } Ok(TypeData::Union(union)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2164-L2170 LF_BITFIELD => Ok(TypeData::Bitfield(BitfieldType { underlying_type: buf.parse()?, length: buf.parse_u8()?, position: buf.parse_u8()?, })), // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1819-L1823 LF_VTSHAPE => { // TODO Err(Error::UnimplementedTypeKind(leaf)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1825-L1837 LF_VFTABLE => { // TODO Err(Error::UnimplementedTypeKind(leaf)) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2521-L2528 LF_VBCLASS | LF_IVBCLASS => Ok(TypeData::VirtualBaseClass(VirtualBaseClassType { direct: leaf == LF_VBCLASS, attributes: FieldAttributes(buf.parse_u16()?), base_class: buf.parse()?, base_pointer: buf.parse()?, base_pointer_offset: parse_unsigned(buf)? as u32, virtual_base_offset: parse_unsigned(buf)? as u32, })), // List types // ---------- // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2112-L2115 LF_FIELDLIST => { let mut fields: Vec> = Vec::new(); let mut continuation: Option = None; while !buf.is_empty() { match buf.peek_u16()? { LF_INDEX => { // continuation record // eat the leaf value buf.parse_u16()?; // parse the TypeIndex where we continue continuation = Some(buf.parse()?); } _ => { // other type // recurse because recursion is endless fun because [STACK OVERFLOW] fields.push(parse_type_data(buf)?); } } // consume any padding parse_padding(buf)?; } Ok(TypeData::FieldList(FieldList { fields, continuation, })) } LF_ARGLIST => { let count = buf.parse_u32()?; let mut arglist: Vec = Vec::with_capacity(count as usize); for _ in 0..count { arglist.push(buf.parse()?); } Ok(TypeData::ArgumentList(ArgumentList { arguments: arglist })) } LF_METHODLIST => { let mut methods: Vec = Vec::new(); while !buf.is_empty() { // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L2131-L2136 let attr = FieldAttributes(buf.parse_u16()?); buf.parse_u16()?; // padding methods.push(MethodListEntry { attributes: attr, method_type: buf.parse()?, vtable_offset: if attr.is_intro_virtual() { Some(buf.parse_u32()?) } else { None }, }); } Ok(TypeData::MethodList(MethodList { methods })) } _ => Err(Error::UnimplementedTypeKind(leaf)), } } #[inline] fn parse_optional_type_index(buf: &mut ParseBuffer<'_>) -> Result> { let index = buf.parse()?; if index == TypeIndex(0) || index == TypeIndex(0xffff) { Ok(None) } else { Ok(Some(index)) } } #[inline] fn parse_string<'t>(leaf: u16, buf: &mut ParseBuffer<'t>) -> Result> { if leaf > LF_ST_MAX { buf.parse_cstring() } else { buf.parse_u8_pascal_string() } } #[inline] fn parse_padding(buf: &mut ParseBuffer<'_>) -> Result<()> { while !buf.is_empty() && buf.peek_u8()? >= 0xf0 { let padding = buf.parse_u8()?; if padding > 0xf0 { // low four bits indicate amount of padding // (don't ask me what 0xf0 means, then) buf.take((padding & 0x0f) as usize - 1)?; } } Ok(()) } // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/pdbdump/pdbdump.cpp#L2417-L2456 fn parse_unsigned(buf: &mut ParseBuffer<'_>) -> Result { let leaf = buf.parse_u16()?; if leaf < LF_NUMERIC { // the u16 directly encodes a value return Ok(u64::from(leaf)); } match leaf { LF_CHAR => Ok(u64::from(buf.parse_u8()?)), LF_USHORT => Ok(u64::from(buf.parse_u16()?)), LF_ULONG => Ok(u64::from(buf.parse_u32()?)), LF_UQUADWORD => Ok(buf.parse_u64()?), _ => { if cfg!(debug_assertions) { unreachable!(); } else { Err(Error::UnexpectedNumericPrefix(leaf)) } } } } /* typedef struct CV_prop_t { unsigned short packed :1; // true if structure is packed unsigned short ctor :1; // true if constructors or destructors present unsigned short ovlops :1; // true if overloaded operators present unsigned short isnested :1; // true if this is a nested class unsigned short cnested :1; // true if this class contains nested types unsigned short opassign :1; // true if overloaded assignment (=) unsigned short opcast :1; // true if casting methods unsigned short fwdref :1; // true if forward reference (incomplete defn) unsigned short scoped :1; // scoped definition unsigned short hasuniquename :1; // true if there is a decorated name following the regular name unsigned short sealed :1; // true if class cannot be used as a base class unsigned short hfa :2; // CV_HFA_e unsigned short intrinsic :1; // true if class is an intrinsic type (e.g. __m128d) unsigned short mocom :2; // CV_MOCOM_UDT_e } CV_prop_t; */ #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct TypeProperties(u16); impl TypeProperties { /// Indicates if a type is packed via `#pragma pack` or similar. pub fn packed(self) -> bool { self.0 & 0x0001 != 0 } /// Indicates if a type has constructors or destructors. pub fn constructors(self) -> bool { self.0 & 0x0002 != 0 } /// Indicates if a type has any overloaded operators. pub fn overloaded_operators(self) -> bool { self.0 & 0x0004 != 0 } /// Indicates if a type is a nested type, e.g. a `union` defined inside a `class`. pub fn is_nested_type(self) -> bool { self.0 & 0x0008 != 0 } /// Indicates if a type contains nested types. pub fn contains_nested_types(self) -> bool { self.0 & 0x0010 != 0 } /// Indicates if a class has overloaded the assignment operator. pub fn overloaded_assignment(self) -> bool { self.0 & 0x0020 != 0 } pub fn overloaded_casting(self) -> bool { self.0 & 0x0040 != 0 } /// Indicates if a type is a forward reference, i.e. an incomplete Type that serves as a /// placeholder until a complete Type can be built. This is necessary for e.g. self-referential /// data structures, but other more common declaration/definition idioms can cause forward /// references too. pub fn forward_reference(self) -> bool { self.0 & 0x0080 != 0 } pub fn scoped_definition(self) -> bool { self.0 & 0x0100 != 0 } pub fn has_unique_name(self) -> bool { self.0 & 0x0200 != 0 } pub fn sealed(self) -> bool { self.0 & 0x0400 != 0 } pub fn hfa(self) -> u8 { ((self.0 & 0x1800) >> 11) as u8 } pub fn intrinsic_type(self) -> bool { self.0 & 0x1000 != 0 } pub fn mocom(self) -> u8 { ((self.0 & 0x6000) >> 14) as u8 } } /* typedef struct CV_fldattr_t { unsigned short access :2; // access protection CV_access_t unsigned short mprop :3; // method properties CV_methodprop_t unsigned short pseudo :1; // compiler generated fcn and does not exist unsigned short noinherit :1; // true if class cannot be inherited unsigned short noconstruct :1; // true if class cannot be constructed unsigned short compgenx :1; // compiler generated fcn and does exist unsigned short sealed :1; // true if method cannot be overridden unsigned short unused :6; // unused } CV_fldattr_t; typedef enum CV_methodprop_e { CV_MTvanilla = 0x00, CV_MTvirtual = 0x01, CV_MTstatic = 0x02, CV_MTfriend = 0x03, CV_MTintro = 0x04, CV_MTpurevirt = 0x05, CV_MTpureintro = 0x06 } CV_methodprop_e; */ #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct FieldAttributes(u16); impl FieldAttributes { #[inline] pub fn access(self) -> u8 { (self.0 & 0x0003) as u8 } #[inline] fn method_properties(self) -> u8 { ((self.0 & 0x001c) >> 2) as u8 } #[inline] pub fn is_static(self) -> bool { self.method_properties() == 0x02 } #[inline] pub fn is_virtual(self) -> bool { self.method_properties() == 0x01 } #[inline] pub fn is_pure_virtual(self) -> bool { self.method_properties() == 0x05 } #[inline] pub fn is_intro_virtual(self) -> bool { matches!(self.method_properties(), 0x04 | 0x06) } // TODO } #[allow(unused)] #[repr(u8)] enum Access { None = 0x00, Private = 0x01, Protected = 0x02, Public = 0x03, } // CV_call_t and CV_funcattr_t are always found back to back // Treat them as a combined u16 /* typedef struct CV_funcattr_t { unsigned char cxxreturnudt :1; // true if C++ style ReturnUDT unsigned char ctor :1; // true if func is an instance constructor unsigned char ctorvbase :1; // true if func is an instance constructor of a class with virtual bases unsigned char unused :5; // unused } CV_funcattr_t; */ #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct FunctionAttributes(u16); impl FunctionAttributes { pub fn calling_convention(self) -> u8 { (self.0 & 0xff) as u8 } pub fn cxx_return_udt(self) -> bool { (self.0 & 0x0100) > 0 } pub fn is_constructor(self) -> bool { (self.0 & 0x0200) > 0 } pub fn is_constructor_with_virtual_bases(self) -> bool { (self.0 & 0x0400) > 0 } } /// The kind of a `PointerType`. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum PointerKind { /// 16 bit pointer. Near16, /// 16:16 far pointer. Far16, /// 16:16 huge pointer. Huge16, /// Based on segment. BaseSeg, /// Based on value of base. BaseVal, /// Based on segment value of base. BaseSegVal, /// Based on address of base. BaseAddr, /// Based on segment address of base. BaseSegAddr, /// Based on type. BaseType, /// Based on self. BaseSelf, /// 32-bit pointer. Near32, /// 48-bit 16:32 pointer. Far32, /// 64-bit pointer. Ptr64, } /// The mode of a `PointerType`. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum PointerMode { /// A regular pointer. Pointer, /// L-Value reference. LValueReference, /// Pointer to data member. Member, /// Pointer to member function. MemberFunction, /// R-Value reference. RValueReference, } /* struct lfPointerAttr { unsigned long ptrtype :5; // ordinal specifying pointer type (CV_ptrtype_e) unsigned long ptrmode :3; // ordinal specifying pointer mode (CV_ptrmode_e) unsigned long isflat32 :1; // true if 0:32 pointer unsigned long isvolatile :1; // TRUE if volatile pointer unsigned long isconst :1; // TRUE if const pointer unsigned long isunaligned :1; // TRUE if unaligned pointer unsigned long isrestrict :1; // TRUE if restricted pointer (allow agressive opts) unsigned long size :6; // size of pointer (in bytes) unsigned long ismocom :1; // TRUE if it is a MoCOM pointer (^ or %) unsigned long islref :1; // TRUE if it is this pointer of member function with & ref-qualifier unsigned long isrref :1; // TRUE if it is this pointer of member function with && ref-qualifier unsigned long unused :10;// pad out to 32-bits for following cv_typ_t's } attr; */ #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct PointerAttributes(u32); impl PointerAttributes { /// Indicates the type of pointer. pub fn pointer_kind(self) -> PointerKind { match self.0 & 0x1f { 0x00 => PointerKind::Near16, 0x01 => PointerKind::Far16, 0x02 => PointerKind::Huge16, 0x03 => PointerKind::BaseSeg, 0x04 => PointerKind::BaseVal, 0x05 => PointerKind::BaseSegVal, 0x06 => PointerKind::BaseAddr, 0x07 => PointerKind::BaseSegAddr, 0x08 => PointerKind::BaseType, 0x09 => PointerKind::BaseSelf, 0x0a => PointerKind::Near32, 0x0b => PointerKind::Far32, 0x0c => PointerKind::Ptr64, _ => unreachable!(), } } /// Returns the mode of this pointer. pub fn pointer_mode(self) -> PointerMode { match (self.0 >> 5) & 0x7 { 0x00 => PointerMode::Pointer, 0x01 => PointerMode::LValueReference, 0x02 => PointerMode::Member, 0x03 => PointerMode::MemberFunction, 0x04 => PointerMode::RValueReference, _ => unreachable!(), } } /// Returns `true` if this points to a member (either data or function). pub fn pointer_to_member(self) -> bool { matches!( self.pointer_mode(), PointerMode::Member | PointerMode::MemberFunction ) } /// Returns `true` if this is a flat `0:32` pointer. pub fn is_flat_32(self) -> bool { (self.0 & 0x100) != 0 } /// Returns `true` if this pointer is `volatile`. pub fn is_volatile(self) -> bool { (self.0 & 0x200) != 0 } /// Returns `true` if this pointer is `const`. pub fn is_const(self) -> bool { (self.0 & 0x400) != 0 } /// Returns `true` if this pointer is unaligned. pub fn is_unaligned(self) -> bool { (self.0 & 0x800) != 0 } /// Returns `true` if this pointer is restricted (allow aggressive opts). pub fn is_restrict(self) -> bool { (self.0 & 0x1000) != 0 } /// Is this a C++ reference, as opposed to a C pointer? pub fn is_reference(self) -> bool { matches!( self.pointer_mode(), PointerMode::LValueReference | PointerMode::RValueReference ) } /// The size of the pointer in bytes. pub fn size(self) -> u8 { let size = ((self.0 >> 13) & 0x3f) as u8; if size != 0 { return size; } match self.pointer_kind() { PointerKind::Near32 | PointerKind::Far32 => 4, PointerKind::Ptr64 => 8, _ => 0, } } /// Returns `true` if this is a MoCOM pointer (`^` or `%`). pub fn is_mocom(self) -> bool { (self.0 & 0x40000) != 0 } } /// The information parsed from a type record with kind /// `LF_CLASS`, `LF_CLASS_ST`, `LF_STRUCTURE`, `LF_STRUCTURE_ST` or `LF_INTERFACE`. // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L1631 #[derive(Debug, Clone, PartialEq, Eq)] pub struct ClassType<'t> { pub kind: ClassKind, /// Count of number of elements in this class pub count: u16, pub properties: TypeProperties, /// Type index which describes the fields of this class pub fields: Option, /// Type index which describes the class from which this class is derived, if any pub derived_from: Option, /// Type index which describes the shape of the vtable for this class, if any pub vtable_shape: Option, pub size: u64, /// Display name of the class including type parameters. pub name: RawString<'t>, /// Mangled name, if present. pub unique_name: Option>, } /// Used by `ClassType` to distinguish class-like concepts. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum ClassKind { Class, Struct, Interface, } /// The information parsed from a type record with kind `LF_MEMBER` or `LF_MEMBER_ST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct MemberType<'t> { pub attributes: FieldAttributes, pub field_type: TypeIndex, pub offset: u64, pub name: RawString<'t>, } /// The information parsed from a type record with kind `LF_MFUNCTION`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct MemberFunctionType { pub return_type: TypeIndex, pub class_type: TypeIndex, pub this_pointer_type: Option, pub attributes: FunctionAttributes, pub parameter_count: u16, pub argument_list: TypeIndex, pub this_adjustment: u32, } /// The information parsed from a type record with kind `LF_METHOD` or `LF_METHOD_ST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct OverloadedMethodType<'t> { pub count: u16, pub method_list: TypeIndex, pub name: RawString<'t>, } /// The information parsed from a type record with kind `LF_ONEMETHOD` or `LF_ONEMETHOD_ST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct MethodType<'t> { pub attributes: FieldAttributes, pub method_type: TypeIndex, pub vtable_offset: Option, pub name: RawString<'t>, } /// The information parsed from a type record with kind `LF_STMEMBER` or `LF_STMEMBER_ST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct StaticMemberType<'t> { pub attributes: FieldAttributes, pub field_type: TypeIndex, pub name: RawString<'t>, } /// The information parsed from a type record with kind /// `LF_NESTTYPE`, `LF_NESTTYPE_ST`, `LF_NESTTYPEEX`, or `LF_NESTTYPEEX_ST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct NestedType<'t> { pub attributes: FieldAttributes, pub nested_type: TypeIndex, pub name: RawString<'t>, } /// The information parsed from a type record with kind `LF_BCLASS` or `LF_BINTERFACE`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct BaseClassType { pub kind: ClassKind, pub attributes: FieldAttributes, pub base_class: TypeIndex, /// Describes the offset of the base class within the class pub offset: u32, } /// The information parsed from a type record with kind `LF_VBCLASS` or `LF_IVBCLASS`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct VirtualBaseClassType { pub direct: bool, pub attributes: FieldAttributes, pub base_class: TypeIndex, pub base_pointer: TypeIndex, pub base_pointer_offset: u32, pub virtual_base_offset: u32, } /// The information parsed from a type record with kind `LF_VFUNCTAB`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct VirtualFunctionTablePointerType { pub table: TypeIndex, } /// The information parsed from a type record with kind `LF_PROCEDURE`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct ProcedureType { pub return_type: Option, pub attributes: FunctionAttributes, pub parameter_count: u16, pub argument_list: TypeIndex, } /// The information parsed from a type record with kind `LF_POINTER`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct PointerType { pub underlying_type: TypeIndex, pub attributes: PointerAttributes, pub containing_class: Option, } /// The information parsed from a type record with kind `LF_MODIFIER`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct ModifierType { pub underlying_type: TypeIndex, pub constant: bool, pub volatile: bool, pub unaligned: bool, } /// The information parsed from a type record with kind `LF_ENUM` or `LF_ENUM_ST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct EnumerationType<'t> { pub count: u16, pub properties: TypeProperties, pub underlying_type: TypeIndex, pub fields: TypeIndex, pub name: RawString<'t>, pub unique_name: Option>, } /// The information parsed from a type record with kind `LF_ENUMERATE` or `LF_ENUMERATE_ST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct EnumerateType<'t> { pub attributes: FieldAttributes, pub value: Variant, pub name: RawString<'t>, } /// The information parsed from a type record with kind /// `LF_ARRAY`, `LF_ARRAY_ST` or `LF_STRIDED_ARRAY`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ArrayType { pub element_type: TypeIndex, pub indexing_type: TypeIndex, pub stride: Option, /// Contains array dimensions as specified in the PDB. This is not what you expect: /// /// * Dimensions are specified in terms of byte sizes, not element counts. /// * Multidimensional arrays aggregate the lower dimensions into the sizes of the higher /// dimensions. /// /// Thus a `float[4][4]` has `dimensions: [16, 64]`. Determining array dimensions in terms /// of element counts requires determining the size of the `element_type` and iteratively /// dividing. pub dimensions: Vec, } /// The information parsed from a type record with kind `LF_UNION` or `LF_UNION_ST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct UnionType<'t> { pub count: u16, pub properties: TypeProperties, pub fields: TypeIndex, pub size: u64, pub name: RawString<'t>, pub unique_name: Option>, } /// The information parsed from a type record with kind `LF_BITFIELD`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct BitfieldType { pub underlying_type: TypeIndex, pub length: u8, pub position: u8, } /// The information parsed from a type record with kind `LF_FIELDLIST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct FieldList<'t> { pub fields: Vec>, /// Sometimes fields can't all fit in a single FieldList, in which case the FieldList /// refers to another FieldList in a chain. pub continuation: Option, } /// The information parsed from a type record with kind `LF_ARGLIST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ArgumentList { pub arguments: Vec, } /// The information parsed from a type record with kind `LF_METHODLIST`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct MethodList { pub methods: Vec, } /// An entry in a `MethodList`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct MethodListEntry { pub attributes: FieldAttributes, pub method_type: TypeIndex, pub vtable_offset: Option, } /* // arrays: ParseBuf::from("\x03\x15\xa0\xdc\x0b\x00\x23\x00\x00\x00\x40\x00\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\xa0\xdc\x0b\x00\x23\x00\x00\x00\x50\x00\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\xa9\x12\x00\x00\x23\x00\x00\x00\x50\x02\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\xac\x12\x00\x00\x23\x00\x00\x00\x6c\x00\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\x14\x10\x00\x00\x23\x00\x00\x00\x80\x00\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\x75\x00\x00\x00\x23\x00\x00\x00\x28\x00\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\x14\x10\x00\x00\x23\x00\x00\x00\x70\x0e\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\x31\x14\x00\x00\x23\x00\x00\x00\x04\x02\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\x31\x14\x00\x00\x23\x00\x00\x00\x0e\x03\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\x77\x13\x00\x00\x23\x00\x00\x00\x02\x80\xbd\xda\x00\xf3\xf2\xf1").as_bytes(), ParseBuf::from("\x03\x15\xb7\x16\x00\x00\x23\x00\x00\x00\x28\x00\x00\xf1").as_bytes(), ParseBuf::from("\x03\x15\x14\x10\x00\x00\x23\x00\x00\x00\x55\x00\x00\xf1").as_bytes(), */ #[test] fn kind_1609() { let data = &[ 9, 22, 0, 2, 0, 0, 22, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 6, 0, 72, 95, 115, 105, 122, 101, 0, 46, 63, 65, 85, 72, 95, 115, 105, 122, 101, 64, 64, 0, ][..]; assert_eq!( parse_type_data(&mut ParseBuffer::from(data)).expect("parse"), TypeData::Class(ClassType { kind: ClassKind::Struct, count: 2, properties: TypeProperties(512), fields: Some(TypeIndex(0x1016)), derived_from: None, vtable_shape: None, size: 6, name: RawString::from("H_size"), unique_name: Some(RawString::from(".?AUH_size@@")), }) ); } pdb-0.8.0/src/tpi/header.rs000064400000000000000000000074550072674642500136220ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use crate::common::*; // OFFCB: #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Slice { pub offset: i32, // technically a "long", but... 32 bits for life? pub size: u32, } // HDR: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/PDB/dbi/tpi.h#L45 #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct Header { pub version: u32, pub header_size: u32, pub minimum_index: u32, pub maximum_index: u32, pub gprec_size: u32, pub tpi_hash_stream: u16, pub tpi_hash_pad_stream: u16, pub hash_key_size: u32, pub hash_bucket_size: u32, pub hash_values: Slice, pub ti_off: Slice, pub hash_adj: Slice, // "offcb of hash head list, maps (hashval,ti), where ti is the head of the hashval chain." } impl Header { pub(crate) fn empty() -> Self { let empty_slice = Slice { offset: 0, size: 0 }; Self { version: 0, header_size: 0, minimum_index: 0, maximum_index: 0, gprec_size: 0, tpi_hash_stream: 0, tpi_hash_pad_stream: 0, hash_key_size: 0, hash_bucket_size: 0, hash_values: empty_slice, ti_off: empty_slice, hash_adj: empty_slice, } } pub(crate) fn parse(buf: &mut ParseBuffer<'_>) -> Result { debug_assert!(buf.pos() == 0); if buf.is_empty() { // Special case when the buffer is completely empty. This indicates a missing TPI or IPI // stream. In this case, `ItemInformation` acts like an empty shell that never resolves // any types. return Ok(Self::empty()); } let header = Self { version: buf.parse()?, header_size: buf.parse()?, minimum_index: buf.parse()?, maximum_index: buf.parse()?, gprec_size: buf.parse()?, tpi_hash_stream: buf.parse()?, tpi_hash_pad_stream: buf.parse()?, hash_key_size: buf.parse()?, hash_bucket_size: buf.parse()?, hash_values: Slice { offset: buf.parse()?, size: buf.parse()?, }, ti_off: Slice { offset: buf.parse()?, size: buf.parse()?, }, hash_adj: Slice { offset: buf.parse()?, size: buf.parse()?, }, }; // we read 56 bytes // make sure that's okay let bytes_read = buf.pos() as u32; if header.header_size < bytes_read { return Err(Error::InvalidTypeInformationHeader( "header size is impossibly small", )); } else if header.header_size > 1024 { return Err(Error::InvalidTypeInformationHeader( "header size is unreasonably large", )); } // consume anything else the header says belongs to the header buf.take((header.header_size - bytes_read) as usize)?; // do some final validations if header.minimum_index < 4096 { return Err(Error::InvalidTypeInformationHeader( "minimum type index is < 4096", )); } if header.maximum_index < header.minimum_index { return Err(Error::InvalidTypeInformationHeader( "maximum type index is < minimum type index", )); } // success Ok(header) } } pdb-0.8.0/src/tpi/id.rs000064400000000000000000000137040072674642500127600ustar 00000000000000use scroll::ctx::TryFromCtx; use crate::common::*; use crate::tpi::constants::*; #[inline] fn parse_optional_id_index(buf: &mut ParseBuffer<'_>) -> Result> { Ok(match buf.parse()? { IdIndex(0) => None, index => Some(index), }) } #[inline] fn parse_string<'t>(leaf: u16, buf: &mut ParseBuffer<'t>) -> Result> { if leaf > LF_ST_MAX { buf.parse_cstring() } else { buf.parse_u8_pascal_string() } } /// Encapsulates parsed data about an `Id`. #[non_exhaustive] #[derive(Debug, Clone, PartialEq, Eq)] pub enum IdData<'t> { /// Global function, usually inlined. Function(FunctionId<'t>), /// Member function, usually inlined. MemberFunction(MemberFunctionId<'t>), /// Tool, version and command line build information. BuildInfo(BuildInfoId), /// A list of substrings. StringList(StringListId), /// A string. String(StringId<'t>), /// Source and line of the definition of a User Defined Type (UDT). UserDefinedTypeSource(UserDefinedTypeSourceId), } impl<'t> IdData<'t> {} impl<'t> TryFromCtx<'t, scroll::Endian> for IdData<'t> { type Error = Error; fn try_from_ctx(this: &'t [u8], _ctx: scroll::Endian) -> Result<(Self, usize)> { let mut buf = ParseBuffer::from(this); let leaf = buf.parse_u16()?; let data = match leaf { LF_FUNC_ID => IdData::Function(FunctionId { scope: parse_optional_id_index(&mut buf)?, function_type: buf.parse()?, name: parse_string(leaf, &mut buf)?, }), LF_MFUNC_ID => IdData::MemberFunction(MemberFunctionId { parent: buf.parse()?, function_type: buf.parse()?, name: parse_string(leaf, &mut buf)?, }), LF_BUILDINFO => IdData::BuildInfo({ let count = buf.parse::()?; let mut arguments = Vec::with_capacity(count as usize); for _ in 0..count { arguments.push(buf.parse()?); } BuildInfoId { arguments } }), LF_SUBSTR_LIST => IdData::StringList({ let count = buf.parse::()?; let mut substrings = Vec::with_capacity(count as usize); for _ in 0..count { substrings.push(buf.parse()?); } StringListId { substrings } }), LF_STRING_ID => IdData::String(StringId { substrings: parse_optional_id_index(&mut buf)?, name: parse_string(leaf, &mut buf)?, }), LF_UDT_SRC_LINE | LF_UDT_MOD_SRC_LINE => { let udt = buf.parse()?; let file_id = buf.parse()?; let line = buf.parse()?; let source_file = if leaf == self::LF_UDT_SRC_LINE { UserDefinedTypeSourceFileRef::Local(IdIndex(file_id)) } else { UserDefinedTypeSourceFileRef::Remote(buf.parse()?, StringRef(file_id)) }; IdData::UserDefinedTypeSource(UserDefinedTypeSourceId { udt, source_file, line, }) } _ => return Err(Error::UnimplementedTypeKind(leaf)), }; Ok((data, buf.pos())) } } /// Global function, usually inlined. /// /// This Id is usually referenced by [`InlineSiteSymbol`](crate::InlineSiteSymbol). #[derive(Clone, Debug, PartialEq, Eq)] pub struct FunctionId<'t> { /// Parent scope of this id. pub scope: Option, /// Index of the function type declaration. pub function_type: TypeIndex, /// Name of the function. pub name: RawString<'t>, } /// Member function, usually inlined. /// /// This Id is usually referenced by [`InlineSiteSymbol`](crate::InlineSiteSymbol). #[derive(Clone, Debug, PartialEq, Eq)] pub struct MemberFunctionId<'t> { /// Index of the parent type. pub parent: TypeIndex, /// Index of the member function type declaration. pub function_type: TypeIndex, /// Name of the member function. pub name: RawString<'t>, } /// Tool, version and command line build information. /// /// This Id is usually referenced by [`BuildInfoSymbol`](crate::BuildInfoSymbol). #[derive(Clone, Debug, PartialEq, Eq)] pub struct BuildInfoId { /// Indexes of build arguments. pub arguments: Vec, } /// A list of substrings. /// /// This Id is usually referenced by [`StringId`]. #[derive(Clone, Debug, PartialEq, Eq)] pub struct StringListId { /// The list of substrings. pub substrings: Vec, } /// A string. /// /// This Id is usually referenced by [`FunctionId`] and contains the full namespace of a function. #[derive(Clone, Debug, PartialEq, Eq)] pub struct StringId<'t> { /// Index of the list of substrings. pub substrings: Option, /// The string. pub name: RawString<'t>, } /// A reference to the source file name of a [`UserDefinedTypeSourceId`]. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum UserDefinedTypeSourceFileRef { /// Index of the source file name in the [`IdInformation`](crate::IdInformation) of the same module. /// /// The index should resolve to a [`IdData::String`]. Local(IdIndex), /// Reference into the [`StringTable`](crate::StringTable) of another module that contributes /// this UDT definition. /// /// Use [`DebugInformation::modules`](crate::DebugInformation::modules) to resolve the /// corresponding module. Remote(u16, StringRef), } /// Source and line of the definition of a User Defined Type (UDT). #[derive(Clone, Debug, PartialEq, Eq)] pub struct UserDefinedTypeSourceId { /// Index of the UDT's type definition. pub udt: TypeIndex, /// Reference to the source file name. pub source_file: UserDefinedTypeSourceFileRef, /// Line number in the source file. pub line: u32, } pdb-0.8.0/src/tpi/mod.rs000064400000000000000000000462640072674642500131520ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use std::fmt; use std::marker::PhantomData; use std::result; use crate::common::*; use crate::msf::Stream; use crate::FallibleIterator; pub(crate) mod constants; mod data; mod header; mod id; mod primitive; use self::header::*; use self::primitive::type_data_for_primitive; pub use self::data::*; pub use self::id::*; pub use self::primitive::{Indirection, PrimitiveKind, PrimitiveType}; /// Zero-copy access to a PDB type or id stream. /// /// PDBs store two kinds of related streams with an identical internal structure: /// /// - [`TypeInformation`] (TPI stream) contains information on primitive types, classes and /// procedures, including their return type and arguments. Its contents are identified by /// [`TypeIndex`]. /// - [`IdInformation`] (IPI stream) is a stricter version of the above stream that contains inline /// functions, build infos and source references. Its contents are identified by [`IdIndex`]. /// /// Items in these streams are stored by their index in ascending order. Symbols declared in /// [`ModuleInfo`](crate::ModuleInfo) can refer to items in both streams, as well as items to other /// items with one exception: `Type`s cannot refer to `Id`s. Also, the PDB format requires that /// items refer only to types with lower indexes. Thus, the stream of items forms a directed acyclic /// graph. /// /// Both streams can iterate by their index using [`ItemInformation::iter`]. Additionally, /// [`ItemFinder`] is a secondary data structure to provide efficient backtracking for random /// access. /// /// There are type definitions for both streams: /// /// - `ItemInformation`: [`TypeInformation`] and [`IdInformation`] /// - [`ItemFinder`]: [`TypeFinder`] and [`IdFinder`] /// - [`ItemIndex`]: [`TypeIndex`] and [`IdIndex`] /// - [`ItemIter`]: [`TypeIter`] and [`IdIter`] /// - [`Item`]: [`Type`] and [`Id`] /// /// # Examples /// /// Iterating over the types while building a `TypeFinder`: /// /// ``` /// # use pdb::FallibleIterator; /// # /// # fn test() -> pdb::Result { /// # let file = std::fs::File::open("fixtures/self/foo.pdb")?; /// # let mut pdb = pdb::PDB::open(file)?; /// /// let type_information = pdb.type_information()?; /// let mut type_finder = type_information.finder(); /// /// # let expected_count = type_information.len(); /// # let mut count: usize = 0; /// let mut iter = type_information.iter(); /// while let Some(typ) = iter.next()? { /// // build the type finder as we go /// type_finder.update(&iter); /// /// // parse the type record /// match typ.parse() { /// Ok(pdb::TypeData::Class(pdb::ClassType {name, properties, fields: Some(fields), ..})) => { /// // this Type describes a class-like type with fields /// println!("type {} is a class named {}", typ.index(), name); /// /// // `fields` is a TypeIndex which refers to a FieldList /// // To find information about the fields, find and parse that Type /// match type_finder.find(fields)?.parse()? { /// pdb::TypeData::FieldList(list) => { /// // `fields` is a Vec /// for field in list.fields { /// if let pdb::TypeData::Member(member) = field { /// // follow `member.field_type` as desired /// println!(" - field {} at offset {:x}", member.name, member.offset); /// } else { /// // handle member functions, nested types, etc. /// } /// } /// /// if let Some(more_fields) = list.continuation { /// // A FieldList can be split across multiple records /// // TODO: follow `more_fields` and handle the next FieldList /// } /// } /// _ => { } /// } /// /// }, /// Ok(_) => { /// // ignore everything that's not a class-like type /// }, /// Err(pdb::Error::UnimplementedTypeKind(_)) => { /// // found an unhandled type record /// // this probably isn't fatal in most use cases /// }, /// Err(e) => { /// // other error, probably is worth failing /// return Err(e); /// } /// } /// # count += 1; /// } /// /// # assert_eq!(expected_count, count); /// # Ok(count) /// # } /// # assert!(test().expect("test") > 8000); /// ``` #[derive(Debug)] pub struct ItemInformation<'s, I> { stream: Stream<'s>, header: Header, _ph: PhantomData<&'s I>, } impl<'s, I> ItemInformation<'s, I> where I: ItemIndex, { /// Parses `TypeInformation` from raw stream data. pub(crate) fn parse(stream: Stream<'s>) -> Result { let mut buf = stream.parse_buffer(); let header = Header::parse(&mut buf)?; let _ph = PhantomData; Ok(Self { stream, header, _ph, }) } /// Returns an iterator that can traverse the type table in sequential order. pub fn iter(&self) -> ItemIter<'_, I> { // get a parse buffer let mut buf = self.stream.parse_buffer(); // drop the header // this can't fail; we've already read this once buf.take(self.header.header_size as usize) .expect("dropping TPI header"); ItemIter { buf, index: self.header.minimum_index, _ph: PhantomData, } } /// Returns the number of items contained in this `ItemInformation`. /// /// Note that in the case of the type stream ([`TypeInformation`]) primitive types are not /// stored in the PDB file. The number of distinct types reachable via this table will be higher /// than `len()`. pub fn len(&self) -> usize { (self.header.maximum_index - self.header.minimum_index) as usize } /// Returns whether this `ItemInformation` contains any data. pub fn is_empty(&self) -> bool { self.len() == 0 } /// Returns an `ItemFinder` with a default time-space tradeoff useful for access by /// [`ItemIndex`]. /// /// The `ItemFinder` is initially empty and must be populated by iterating. See the struct-level /// docs for an example. pub fn finder(&self) -> ItemFinder<'_, I> { ItemFinder::new(self, 3) } } /// This buffer is used when a `Type` refers to a primitive type. It doesn't contain anything /// type-specific, but it does parse as `raw_type() == 0xffff`, which is a reserved value. Seems /// like a reasonable thing to do. const PRIMITIVE_TYPE: &[u8] = b"\xff\xff"; /// Represents an entry in the type or id stream. /// /// An `Item` has been minimally processed and may not be correctly formed or even understood by /// this library. To avoid copying, `Items`s exist as references to data owned by the parent /// `ItemInformation`. Therefore, an `Item` may not outlive its parent. /// /// The data held by items can be parsed: /// /// - [`Type::parse`](Self::parse) returns [`TypeData`]. /// - [`Id::parse`](Self::parse) returns [`IdData`]. /// /// Depending on the stream, this can either be a [`Type`] or [`Id`]. #[derive(Copy, Clone, PartialEq)] pub struct Item<'t, I> { index: I, data: &'t [u8], } impl<'t, I> Item<'t, I> where I: ItemIndex, { /// Returns this item's index. /// /// Depending on the stream, either a [`TypeIndex`] or [`IdIndex`]. pub fn index(&self) -> I { self.index } /// Returns the the binary data length in the on-disk format. /// /// Items are prefixed by a 16-bit length number, which is not included in this length. pub fn len(&self) -> usize { self.data.len() } /// Returns whether this items's data is empty. /// /// Items are prefixed by a 16-bit length number, which is not included in this operation. pub fn is_empty(&self) -> bool { self.data.is_empty() } /// Returns the identifier of the kind of data stored by this this `Item`. /// /// As a special case, if this is a primitive [`Type`], this function will return `0xffff`. #[inline] pub fn raw_kind(&self) -> u16 { debug_assert!(self.data.len() >= 2); // assemble a little-endian u16 u16::from(self.data[0]) | (u16::from(self.data[1]) << 8) } } impl<'t, I> fmt::Debug for Item<'t, I> where I: ItemIndex, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "Type{{ kind: 0x{:04x} [{} bytes] }}", self.raw_kind(), self.data.len() ) } } /// In-memory index for efficient random-access to [`Item`]s by index. /// /// `ItemFinder` can be obtained via [`ItemInformation::finder`]. It starts out empty and must be /// populated by calling [`ItemFinder::update`] while iterating. There are two typedefs for easier /// use: /// /// - [`TypeFinder`] for finding [`Type`]s in a [`TypeInformation`](crate::TypeInformation) (TPI stream). /// - [`IdFinder`] for finding [`Id`]s in a [`IdInformation`](crate::IdInformation) (IPI stream). /// /// `ItemFinder` allocates all the memory it needs when it is first created. The footprint is /// directly proportional to the total number of types; see [`ItemInformation::len`]. /// /// # Time/space trade-off /// /// The naïve approach is to store the position of each `Item` as they are covered in the stream. /// The cost is memory: namely one `u32` per `Item`. /// /// Compare this approach to an `ItemFinder` that stores the position of every Nth item. Memory /// requirements would be reduced by a factor of N in exchange for requiring an average of (N-1)/2 /// iterations per lookup. However, iteration is cheap sequential memory access, and spending less /// memory on `ItemFinder` means more of the data can fit in the cache, so this is likely a good /// trade-off for small-ish values of N. /// /// `ItemFinder` is parameterized by `shift` which controls this trade-off as powers of two: /// /// * If `shift` is 0, `ItemFinder` stores 4 bytes per `Item` and always performs direct lookups. /// * If `shift` is 1, `ItemFinder` stores 2 bytes per `Item` and averages 0.5 iterations per /// lookup. /// * If `shift` is 2, `ItemFinder` stores 1 byte per `Item` and averages 1.5 iterations per /// lookup. /// * If `shift` is 3, `ItemFinder` stores 4 bits per `Item` and averages 3.5 iterations per /// lookup. /// * If `shift` is 4, `ItemFinder` stores 2 bits per `Item` and averages 7.5 iterations per /// lookup. /// * If `shift` is 5, `ItemFinder` stores 1 bit per `Item` and averages 15.5 iterations per /// lookup. /// /// This list can continue but with rapidly diminishing returns. Iteration cost is proportional to /// item size, which varies, but typical numbers from a large program are: /// /// * 24% of items are 12 bytes /// * 34% of items are <= 16 bytes /// * 84% of items are <= 32 bytes /// /// A `shift` of 2 or 3 is likely appropriate for most workloads. 500K items would require 1 MB or /// 500 KB of memory respectively, and lookups -- though indirect -- would still usually need only /// one or two 64-byte cache lines. #[derive(Debug)] pub struct ItemFinder<'t, I> { buffer: ParseBuffer<'t>, minimum_index: u32, maximum_index: u32, positions: Vec, shift: u8, _ph: PhantomData<&'t I>, } impl<'t, I> ItemFinder<'t, I> where I: ItemIndex, { fn new(info: &'t ItemInformation<'_, I>, shift: u8) -> Self { // maximum index is the highest index + 1. let count = info.header.maximum_index - info.header.minimum_index; let round_base = (1 << shift) - 1; let shifted_count = ((count + round_base) & !round_base) >> shift; let mut positions = Vec::with_capacity(shifted_count as usize); if shifted_count > 0 { // add record zero, which is identical regardless of shift positions.push(info.header.header_size); } Self { buffer: info.stream.parse_buffer(), minimum_index: info.header.minimum_index, maximum_index: info.header.maximum_index, positions, shift, _ph: PhantomData, } } /// Given an index, find which position in the Vec we should jump to and how many times we /// need to iterate to find the requested type. /// /// `shift` refers to the size of these bit shifts. #[inline] fn resolve(&self, type_index: u32) -> (usize, usize) { let raw = type_index - self.minimum_index; ( (raw >> self.shift) as usize, (raw & ((1 << self.shift) - 1)) as usize, ) } /// Returns the highest index which is currently served by this `ItemFinder`. /// /// When iterating through the stream, you shouldn't need to consider this. Items only ever /// reference lower indexes. However, when loading items referenced by the symbols stream, this /// can be useful to check whether iteration is required. #[inline] pub fn max_index(&self) -> I { I::from(match self.positions.len() { 0 => 0, // special case for an empty type index len => (len << self.shift) as u32 + self.minimum_index - 1, }) } /// Update this `ItemFinder` based on the current position of a [`ItemIter`]. /// /// Do this each time you call `.next()`. See documentation of [`ItemInformation`] for an /// example. #[inline] pub fn update(&mut self, iterator: &ItemIter<'t, I>) { let (vec_index, iteration_count) = self.resolve(iterator.index); if iteration_count == 0 && vec_index == self.positions.len() { let pos = iterator.buf.pos(); assert!(pos < u32::max_value() as usize); self.positions.push(pos as u32); } } /// Find an `Item` by its index. /// /// # Errors /// /// * `Error::TypeNotFound(index)` if you ask for an item that doesn't exist. /// * `Error::TypeNotIndexed(index, max_index)` if you ask for an item that is known to exist /// but is not currently known by this `ItemFinder`. pub fn find(&self, index: I) -> Result> { let index: u32 = index.into(); if index < self.minimum_index { return Ok(Item { index: I::from(index), data: PRIMITIVE_TYPE, }); } else if index > self.maximum_index { return Err(Error::TypeNotFound(index)); } // figure out where we'd find this let (vec_index, iteration_count) = self.resolve(index); if let Some(pos) = self.positions.get(vec_index) { // hit let mut buf = self.buffer.clone(); // jump forwards buf.take(*pos as usize)?; // skip some records for _ in 0..iteration_count { let length = buf.parse_u16()?; buf.take(length as usize)?; } // read the type let length = buf.parse_u16()?; Ok(Item { index: I::from(index), data: buf.take(length as usize)?, }) } else { // miss Err(Error::TypeNotIndexed(index, self.max_index().into())) } } } /// An iterator over items in [`TypeInformation`](crate::TypeInformation) or /// [`IdInformation`](crate::IdInformation). /// /// The TPI and IPI streams are represented internally as a series of records, each of which have a /// length, a kind, and a type-specific field layout. Iteration performance is therefore similar to /// a linked list. #[derive(Debug)] pub struct ItemIter<'t, I> { buf: ParseBuffer<'t>, index: u32, _ph: PhantomData<&'t I>, } impl<'t, I> FallibleIterator for ItemIter<'t, I> where I: ItemIndex, { type Item = Item<'t, I>; type Error = Error; fn next(&mut self) -> result::Result, Self::Error> { // see if we're at EOF if self.buf.is_empty() { return Ok(None); } // read the length of the next type let length = self.buf.parse_u16()? as usize; // validate if length < 2 { // this can't be correct return Err(Error::TypeTooShort); } // grab the type itself let type_buf = self.buf.take(length)?; let index = self.index; self.index += 1; // Done Ok(Some(Item { index: I::from(index), data: type_buf, })) } } /// Zero-copy access to the PDB type stream (TPI). /// /// This stream exposes types, the variants of which are enumerated by [`TypeData`]. See /// [`ItemInformation`] for more information on accessing types. pub type TypeInformation<'s> = ItemInformation<'s, TypeIndex>; /// In-memory index for efficient random-access of [`Type`]s by index. /// /// `TypeFinder` can be obtained via [`TypeInformation::finder`](ItemInformation::finder). See /// [`ItemFinder`] for more information. pub type TypeFinder<'t> = ItemFinder<'t, TypeIndex>; /// An iterator over [`Type`]s returned by [`TypeInformation::iter`](ItemInformation::iter). pub type TypeIter<'t> = ItemIter<'t, TypeIndex>; /// Information on a primitive type, class, or procedure. pub type Type<'t> = Item<'t, TypeIndex>; impl<'t> Item<'t, TypeIndex> { /// Parse this `Type` into `TypeData`. /// /// # Errors /// /// * `Error::UnimplementedTypeKind(kind)` if the type record isn't currently understood by this /// library /// * `Error::UnexpectedEof` if the type record is malformed pub fn parse(&self) -> Result> { if self.index < TypeIndex(0x1000) { // Primitive type type_data_for_primitive(self.index) } else { let mut buf = ParseBuffer::from(self.data); parse_type_data(&mut buf) } } } /// Zero-copy access to the PDB type stream (TPI). /// /// This stream exposes types, the variants of which are enumerated by [`IdData`]. See /// [`ItemInformation`] for more information on accessing types. pub type IdInformation<'s> = ItemInformation<'s, IdIndex>; /// In-memory index for efficient random-access of [`Id`]s by index. /// /// `IdFinder` can be obtained via [`IdInformation::finder`](ItemInformation::finder). See /// [`ItemFinder`] for more information. pub type IdFinder<'t> = ItemFinder<'t, IdIndex>; /// An iterator over [`Id`]s returned by [`IdInformation::iter`](ItemInformation::iter). pub type IdIter<'t> = ItemIter<'t, IdIndex>; /// Information on an inline function, build infos or source references. pub type Id<'t> = Item<'t, IdIndex>; impl<'t> Item<'t, IdIndex> { /// Parse this `Id` into `IdData`. /// /// # Errors /// /// * `Error::UnimplementedTypeKind(kind)` if the id record isn't currently understood by this /// library /// * `Error::UnexpectedEof` if the id record is malformed pub fn parse(&self) -> Result> { ParseBuffer::from(self.data).parse() } } pdb-0.8.0/src/tpi/primitive.rs000064400000000000000000000152330072674642500143730ustar 00000000000000// Copyright 2017 pdb Developers // // Licensed under the Apache License, Version 2.0, or the MIT license , at your option. This file may not be // copied, modified, or distributed except according to those terms. use crate::common::*; use crate::tpi::data::TypeData; // References for primitive types: // // cvinfo.h provides an enumeration: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L328-L750 // // pdbparse.cpp describes them as strings: // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/pdbdump/pdbdump.cpp#L1896-L1974 // // The most obscure: MSDN Library October 2001 Disk 2 contains a \MSDN\specs.chm file which contains // html\S66CD.HTM which actually documents the *format* of the primitive type descriptors rather // than just listing them. TypeData::Primitive is designed to model the orthogonal information // encoded into the bits of the TypeIndex rather than exploding the matrix like the reference // implementations. /// Represents a primitive type like `void` or `char *`. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct PrimitiveType { /// The kind of the primitive type. pub kind: PrimitiveKind, /// Pointer indirection applied to the primitive type. pub indirection: Option, } /// A simple type. #[non_exhaustive] #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum PrimitiveKind { /// Uncharacterized type (no type) NoType, /// Void type Void, /// Character (byte) Char, /// Unsigned character UChar, /// "Really a char" RChar, /// Wide characters, i.e. 16 bits WChar, /// "Really a 16-bit char" RChar16, /// "Really a 32-bit char" RChar32, /// Signed 8-bit integer I8, /// Unsigned 8-bit integer U8, /// Signed 16-bit integer Short, /// Unsigned 16-bit integer UShort, /// Signed 16-bit integer I16, /// Unsigned 16-bit integer U16, /// Signed 32-bit integer Long, /// Unsigned 32-bit inteer ULong, /// Signed 32-bit integer I32, /// Unsigned 32-bit inteer U32, /// Signed 64-bit integer Quad, /// Unsigned 64-bit integer UQuad, /// Signed 64-bit integer I64, /// Unsigned 64-bit integer U64, /// Signed 128-bit integer Octa, /// Unsigned 128-bit integer UOcta, /// Signed 128-bit integer I128, /// Unsigned 128-bit integer U128, /// 16-bit floating point F16, /// 32-bit floating point F32, /// 32-bit partial precision floating point F32PP, /// 48-bit floating point F48, /// 64-bit floating point F64, /// 80-bit floating point F80, /// 128-bit floating point F128, /// 32-bit complex number Complex32, /// 64-bit complex number Complex64, /// 80-bit complex number Complex80, /// 128-bit complex number Complex128, /// 8-bit boolean value Bool8, /// 16-bit boolean value Bool16, /// 32-bit boolean value Bool32, /// 16-bit boolean value Bool64, /// Windows `HRESULT` error code. /// /// See: HRESULT, } /// Pointer mode of primitive types. /// /// This is partially overlapping with [`PointerKind`](crate::PointerKind) for regular pointer type /// definitions. While `PointerKind` can specify many more pointer types, including relative /// pointers, `Indirection` also contains a 128-bit variant. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Indirection { /// 16-bit ("near") pointer. Near16, /// 16:16 far pointer. Far16, /// 16:16 huge pointer. Huge16, /// 32-bit pointer. Near32, /// 48-bit 16:32 pointer. Far32, /// 64-bit near pointer. Near64, /// 128-bit near pointer. Near128, } pub fn type_data_for_primitive(index: TypeIndex) -> Result> { // https://github.com/Microsoft/microsoft-pdb/blob/082c5290e5aff028ae84e43affa8be717aa7af73/include/cvinfo.h#L326-L750 // primitives live under 0x1000, and we should never reach here for non-primitive indexes assert!(index < TypeIndex(0x1000)); // indirection is stored in these bits let indirection = match index.0 & 0xf00 { 0x000 => None, 0x100 => Some(Indirection::Near16), 0x200 => Some(Indirection::Far16), 0x300 => Some(Indirection::Huge16), 0x400 => Some(Indirection::Near32), 0x500 => Some(Indirection::Far32), 0x600 => Some(Indirection::Near64), 0x700 => Some(Indirection::Near128), _ => { return Err(Error::TypeNotFound(index.0)); } }; // primitive types are stored in the lowest octet let kind = match index.0 & 0xff { 0x00 => PrimitiveKind::NoType, 0x03 => PrimitiveKind::Void, 0x08 => PrimitiveKind::HRESULT, 0x10 => PrimitiveKind::Char, 0x20 => PrimitiveKind::UChar, 0x68 => PrimitiveKind::I8, 0x69 => PrimitiveKind::U8, 0x70 => PrimitiveKind::RChar, 0x71 => PrimitiveKind::WChar, 0x7a => PrimitiveKind::RChar16, 0x7b => PrimitiveKind::RChar32, 0x11 => PrimitiveKind::Short, 0x21 => PrimitiveKind::UShort, 0x72 => PrimitiveKind::I16, 0x73 => PrimitiveKind::U16, 0x12 => PrimitiveKind::Long, 0x22 => PrimitiveKind::ULong, 0x74 => PrimitiveKind::I32, 0x75 => PrimitiveKind::U32, 0x13 => PrimitiveKind::Quad, 0x23 => PrimitiveKind::UQuad, 0x76 => PrimitiveKind::I64, 0x77 => PrimitiveKind::U64, 0x14 => PrimitiveKind::Octa, 0x24 => PrimitiveKind::UOcta, 0x78 => PrimitiveKind::I128, 0x79 => PrimitiveKind::U128, 0x46 => PrimitiveKind::F16, 0x40 => PrimitiveKind::F32, 0x45 => PrimitiveKind::F32PP, 0x44 => PrimitiveKind::F48, 0x41 => PrimitiveKind::F64, 0x42 => PrimitiveKind::F80, 0x43 => PrimitiveKind::F128, 0x50 => PrimitiveKind::Complex32, 0x51 => PrimitiveKind::Complex64, 0x52 => PrimitiveKind::Complex80, 0x53 => PrimitiveKind::Complex128, 0x30 => PrimitiveKind::Bool8, 0x31 => PrimitiveKind::Bool16, 0x32 => PrimitiveKind::Bool32, 0x33 => PrimitiveKind::Bool64, _ => { return Err(Error::TypeNotFound(index.0)); } }; Ok(TypeData::Primitive(PrimitiveType { kind, indirection })) } pdb-0.8.0/tests/debug_information.rs000064400000000000000000000005430072674642500156330ustar 00000000000000#[test] fn pdb_info() { let file = std::fs::File::open("fixtures/self/foo.pdb").expect("opening file"); let mut pdb = pdb::PDB::open(file).expect("opening pdb"); let pdb_info = pdb.debug_information().expect("pdb information"); assert_eq!( pdb_info.machine_type().expect("machien type"), pdb::MachineType::Amd64 ); } pdb-0.8.0/tests/id_information.rs000064400000000000000000000017530072674642500151450ustar 00000000000000//! Tests that IdInformation works on files where the IPI is missing (empty stream). use pdb::{FallibleIterator, IdIndex, PDB}; fn open_file() -> std::fs::File { let path = "fixtures/symbol_server/0ea7c70545374958ad3307514bdfc8642-wntdll.pdb"; std::fs::File::open(path).expect("missing fixtures, please run scripts/download from the root") } #[test] fn test_missing_ipi() { let mut pdb = PDB::open(open_file()).expect("opening pdb"); let id_information = pdb.id_information().expect("get id information"); // Check ItemInformation API assert_eq!(id_information.len(), 0); assert!(id_information.is_empty()); // Check ItemIter API let mut iter = id_information.iter(); assert!(iter.next().expect("iter empty IPI").is_none()); // Check ItemFinder API let finder = id_information.finder(); assert_eq!(finder.max_index(), IdIndex(0)); finder.find(IdIndex(0)).expect_err("find index"); finder.find(IdIndex(4097)).expect_err("find index"); } pdb-0.8.0/tests/modi_symbol_depth.rs000064400000000000000000000021720072674642500156410ustar 00000000000000use std::fs::File; use pdb::{FallibleIterator, Result, PDB}; #[test] fn test_symbol_depth() -> Result<()> { let file = File::open("fixtures/self/foo.pdb")?; let mut pdb = PDB::open(file)?; let dbi = pdb.debug_information()?; let mut modules = dbi.modules()?; while let Some(module) = modules.next()? { let module_info = match pdb.module_info(&module)? { Some(module_info) => module_info, None => continue, }; let mut depth = 0isize; let mut symbols = module_info.symbols()?; while let Some(symbol) = symbols.next()? { if symbol.starts_scope() { depth += 1; } else if symbol.ends_scope() { depth -= 1; } // The most common case here will be that we forgot to add a raw kind to `starts_scope`. // PDBs seem to use `S_END` for most symbols with inline sites being the notable // exception. In case we forgot a start scope symbol, the depth will become negative. assert!(depth >= 0, "depth must not be negative"); } } Ok(()) } pdb-0.8.0/tests/omap_address_translation.rs000064400000000000000000000113220072674642500172140ustar 00000000000000use pdb::{FallibleIterator, PdbInternalRva, PdbInternalSectionOffset, Rva}; // This test is intended to cover OMAP address translation: // https://github.com/willglynn/pdb/issues/17 fn open_file() -> std::fs::File { let path = "fixtures/symbol_server/3844dbb920174967be7aa4a2c20430fa2-ntkrnlmp.pdb"; std::fs::File::open(path).expect("missing fixtures, please run scripts/download from the root") } #[test] fn test_omap_section_zero() { // https://github.com/willglynn/pdb/issues/87 let mut pdb = pdb::PDB::open(open_file()).expect("opening pdb"); let address = pdb::PdbInternalSectionOffset { offset: 0, section: 0x1234, }; let address_map = pdb.address_map().expect("address map"); assert_eq!(address.to_rva(&address_map), None); } #[test] fn test_omap_symbol() { let mut pdb = pdb::PDB::open(open_file()).expect("opening pdb"); let global_symbols = pdb.global_symbols().expect("global_symbols"); // find the target symbol let target_symbol = { let target_name = pdb::RawString::from("NtWaitForSingleObject"); let mut iter = global_symbols.iter(); iter.find(|sym| { let matches = sym .parse() .ok() .and_then(|d| d.name()) .map_or(false, |n| n == target_name); Ok(matches) }) .expect("iterate symbols") .expect("find target symbol") }; // extract the PublicSymbol data let pubsym = match target_symbol.parse().expect("parse symbol") { pdb::SymbolData::Public(pubsym) => pubsym, _ => panic!("expected public symbol"), }; // ensure the symbol has the correct location assert_eq!( pubsym.offset, PdbInternalSectionOffset { section: 0xc, offset: 0x0004_aeb0, } ); // translate the segment offset to an RVA let address_map = pdb.address_map().expect("address map"); assert_eq!(pubsym.offset.to_rva(&address_map), Some(Rva(0x0037_68c0))); assert_eq!( Rva(0x0037_68c0).to_internal_offset(&address_map), Some(pubsym.offset) ); } #[test] fn test_omap_range() { let mut pdb = pdb::PDB::open(open_file()).expect("opening pdb"); let address_map = pdb.address_map().expect("address map"); // Range partially covered by OMAPs // [ // OMAPRecord { // source_address: 0x000010aa, // target_address: 0x00015de6 // }, // OMAPRecord { // source_address: 0x000010bd, // target_address: 0x00000000 // }, // OMAPRecord { // source_address: 0x000010c4, // target_address: 0x0002da00 // }, // OMAPRecord { // source_address: 0x000010c8, // target_address: 0x0002da04 // }, // ] let start = PdbInternalRva(0x10b0); let end = PdbInternalRva(0x10c6); assert_eq!( address_map.rva_ranges(start..end).collect::>(), vec![ Rva(0x15dec)..Rva(0x15df9), // 0x10aa - 0x10bd // 0x10bd - 0x10c4 omitted due to missing target address Rva(0x2da00)..Rva(0x2da02), // 0x10c4 - 0x10c6 ], ); // Range starting outside OMAPs // [ // OMAPRecord { // source_address: 0x00001000, // target_address: 0x00000000 // }, // OMAPRecord { // source_address: 0x00001008, // target_address: 0x00015d44 // }, // ] let start = PdbInternalRva(0x0); let end = PdbInternalRva(0x1010); assert_eq!( address_map.rva_ranges(start..end).collect::>(), vec![Rva(0x15d44)..Rva(0x15d4c)], ); // Range ending outside OMAPs // [ // OMAPRecord { // source_address: 0x005e40e0, // target_address: 0x005e50e0 // }, // OMAPRecord { // source_address: 0x005e5000, // target_address: 0x00000000 // }, // OMAPRecord { // source_address: 0x005e70c0, // target_address: 0x00000000 // } // ] let start = PdbInternalRva(0x5e_4fe0); let end = PdbInternalRva(0x5e_8000); assert_eq!( address_map.rva_ranges(start..end).collect::>(), vec![Rva(0x005e_5fe0)..Rva(0x5e_6000)], ); // Range fully before OMAPs let start = PdbInternalRva(0x0); let end = PdbInternalRva(0x100); assert_eq!( address_map.rva_ranges(start..end).collect::>(), vec![], ); // Range fully after OMAPs let start = PdbInternalRva(0x005e_8000); let end = PdbInternalRva(0x005e_9000); assert_eq!( address_map.rva_ranges(start..end).collect::>(), vec![], // last record targets 0, thus the range is omitted ); } pdb-0.8.0/tests/pdb_information.rs000064400000000000000000000006630072674642500153150ustar 00000000000000#[test] fn pdb_info() { let file = std::fs::File::open("fixtures/self/foo.pdb").expect("opening file"); let mut pdb = pdb::PDB::open(file).expect("opening pdb"); let pdb_info = pdb.pdb_information().expect("pdb information"); assert_eq!(pdb_info.age, 2); assert_eq!( pdb_info.guid, "2B3C3FA5-5A2E-44B8-8BBA-C3300FF69F62".parse().unwrap(), ); assert_eq!(pdb_info.signature, 0x587B_A621); } pdb-0.8.0/tests/pdb_lines.rs000064400000000000000000000024720072674642500141020ustar 00000000000000use pdb::{FallibleIterator, Rva, PDB}; #[test] fn test_module_lines() { let file = std::fs::File::open("fixtures/self/foo.pdb").expect("opening file"); let mut pdb = PDB::open(file).expect("parse pdb"); let address_map = pdb.address_map().expect("address map"); let string_table = pdb.string_table().expect("string table"); let dbi = pdb.debug_information().expect("dbi"); let mut modules = dbi.modules().expect("modules"); let module = modules.next().expect("parse module").expect("no module"); let module_info = pdb .module_info(&module) .expect("parse module info") .expect("module info"); let line_program = module_info.line_program().expect("line program"); let mut lines = line_program.lines(); let line_info = lines.next().expect("parse line info").expect("no lines"); let rva = line_info.offset.to_rva(&address_map).expect("line rva"); let file_info = line_program .get_file_info(line_info.file_index) .expect("file info"); let file_name = file_info .name .to_string_lossy(&string_table) .expect("file name"); assert_eq!(line_info.line_start, 29); assert_eq!(line_info.column_start, None); assert_eq!(rva, Rva(0x64f0)); assert_eq!(file_name, "c:\\users\\user\\desktop\\self\\foo.cpp"); } pdb-0.8.0/tests/symbol_table.rs000064400000000000000000000077050072674642500146230ustar 00000000000000use std::collections::hash_map::{Entry, HashMap}; use pdb::FallibleIterator; fn setup(func: F) where F: FnOnce(&pdb::SymbolTable<'_>, bool), { let (file, is_fixture) = if let Ok(filename) = std::env::var("PDB_FILE") { (std::fs::File::open(filename).expect("opening file"), false) } else { ( std::fs::File::open("fixtures/self/foo.pdb").expect("opening file"), true, ) }; let mut pdb = pdb::PDB::open(file).expect("opening pdb"); let symbol_table = pdb.global_symbols().expect("global symbols"); func(&symbol_table, is_fixture); } #[test] fn count_symbols() { setup(|global_symbols, is_fixture| { let mut map: HashMap = HashMap::new(); // walk the symbol table let mut iter = global_symbols.iter(); while let Some(sym) = iter.next().expect("next symbol") { let kind = sym.raw_kind(); let entry = map.entry(kind).or_insert(0); if *entry == 0 && is_fixture { // first symbol of this kind seen // emit a unit test println!("#[test]"); println!("fn kind_{:04x}() {{", sym.raw_kind()); println!(" let buf = &{:?};", sym.raw_bytes()); println!(" let (symbol, data, name) = parse(buf).expect(\"parse\");"); println!( " assert_eq!(symbol.raw_kind(), 0x{:04x});", sym.raw_kind() ); println!( " assert_eq!(data, SymbolData::{:?});", sym.parse().expect("parse") ); println!("}}"); println!(); } *entry += 1; } println!("symbol counts by kind:"); for (kind, count) in &map { println!(" - kind: 0x{:04x}, count: {}", kind, count); } assert!(*map.get(&0x1107).expect("0x1107") >= 500); assert!(*map.get(&0x1108).expect("0x1108") >= 400); assert!(*map.get(&0x110c).expect("0x110c") >= 90); assert!(*map.get(&0x110d).expect("0x110d") >= 120); assert!(*map.get(&0x110e).expect("0x110e") >= 3000); assert!(*map.get(&0x110e).expect("0x110e") >= 3000); assert!(*map.get(&0x1125).expect("0x1125") >= 2000); assert!(*map.get(&0x1127).expect("0x1127") >= 500); }) } #[test] fn find_symbols() { setup(|global_symbols, is_fixture| { // can't do much if we don't know which PDB we're using if !is_fixture { return; } let mut map: HashMap<&[u8], Option>> = HashMap::new(); // look for: // main(), defined in the program map.insert(b"main", None); // malloc(), defined in libc map.insert(b"memcpy", None); // HeapAlloc(), defined... somewhere map.insert(b"HeapAlloc", None); // Baz::static_f_public(), except MSVC-mangled map.insert(b"?static_f_public@Baz@@SAXXZ", None); // walk the symbol table let mut iter = global_symbols.iter(); while let Some(sym) = iter.next().expect("next symbol") { // ensure we can parse all the symbols, even though we only want a few let data = sym.parse().expect("symbol parsing"); // get symbol name let name = data.name().unwrap_or_default(); if let Entry::Occupied(mut e) = map.entry(name.as_bytes()) { // this is a symbol we wanted to find // store our data e.insert(Some(data)); } } for (key, value) in map { match value { Some(data) => { println!("found {} => {:?}", String::from_utf8_lossy(key), data); } None => { panic!("couldn't find {}", String::from_utf8_lossy(key)); } } } }) } pdb-0.8.0/tests/type_information.rs000064400000000000000000000154210072674642500155270ustar 00000000000000use std::collections::HashMap; use pdb::FallibleIterator; fn setup(func: F) where F: FnOnce(&pdb::TypeInformation<'_>), { let file = if let Ok(filename) = std::env::var("PDB_FILE") { std::fs::File::open(filename) } else { std::fs::File::open("fixtures/self/foo.pdb") } .expect("opening file"); let mut pdb = pdb::PDB::open(file).expect("opening pdb"); let type_information = pdb.type_information().expect("type information"); func(&type_information); } #[test] fn iteration() { setup(|type_information| { let len = type_information.len(); let mut count: usize = 0; let mut last_index = pdb::TypeIndex(4095); let mut iter = type_information.iter(); while let Some(typ) = iter.next().expect("next type") { assert_eq!(typ.index().0, last_index.0 + 1); last_index = typ.index(); count += 1; } assert_eq!(len, count); }); } #[test] fn type_finder() { setup(|type_information| { let mut type_finder = type_information.finder(); let mut map: HashMap> = HashMap::new(); assert_eq!(type_finder.max_index().0 >> 3, 4096 >> 3); // iterate over all the types let mut iter = type_information.iter(); while let Some(typ) = iter.next().expect("next type") { assert_eq!(type_finder.max_index().0 >> 3, typ.index().0 >> 3); // update the type finder type_finder.update(&iter); // record this type in our map map.insert(typ.index(), typ); } // iterate over the map -- which is randomized -- making sure the type finder finds identical types for (index, typ) in map.iter() { let found = type_finder.find(*index).expect("find"); assert_eq!(*typ, found); } }) } #[test] fn find_classes() { setup(|type_information| { let mut type_finder = type_information.finder(); // iterate over all the types let mut iter = type_information.iter(); while let Some(typ) = iter.next().expect("next type") { // update the type finder type_finder.update(&iter); // parse the type record match typ.parse() { Ok(pdb::TypeData::Class(pdb::ClassType { name, fields: Some(fields), .. })) => { // this Type describes a class-like type with fields println!("class {} (type {}):", name, typ.index()); // fields is presently a TypeIndex // find and parse the list of fields match type_finder.find(fields).expect("find fields").parse() { Ok(pdb::TypeData::FieldList(list)) => { for field in list.fields { println!(" - {:?}", field); } if let Some(c) = list.continuation { println!("TODO: follow to type {}", c); } } Ok(value) => { panic!("expected a field list, got {:?}", value); } Err(e) => { println!("field parse error: {}", e); } } } Ok(pdb::TypeData::Enumeration(data)) => { println!("enum {} (type {}):", data.name, data.fields); // fields is presently a TypeIndex match type_finder.find(data.fields).expect("find fields").parse() { Ok(pdb::TypeData::FieldList(list)) => { for field in list.fields { println!(" - {:?}", field); } if let Some(c) = list.continuation { println!("TODO: follow to type {}", c); } } Ok(value) => { panic!("expected a field list, got {:?}", value); } Err(e) => { println!("field parse error: {}", e); } } } Ok(pdb::TypeData::FieldList(_)) => { // ignore, since we find these by class } Ok(_) => { //println!("type: {:?}", data); } Err(pdb::Error::UnimplementedTypeKind(kind)) => { println!("unimplemented: 0x{:04x}", kind); // TODO: parse everything // ignore for now } Err(e) => { // other parse error println!( "other parse error on type {} (raw type {:04x}): {}", typ.index(), typ.raw_kind(), e ); panic!("dying due to parse error"); } } } // hooah! }) } /* #[bench] fn bench_type_finder(b: &mut test::Bencher) { setup(|type_information| { let mut type_finder = type_information.finder(); assert_eq!(type_finder.max_index() >> 3, 4096 >> 3); // iterate over all the types let mut iter = type_information.iter(); while let Some(typ) = iter.next().expect("next type") { assert_eq!(type_finder.max_index() >> 3, typ.index() >> 3); type_finder.update(&iter); } let mut rng = rand::thread_rng(); let count: pdb::TypeIndex = type_information.len() as pdb::TypeIndex; let base: pdb::TypeIndex = 4096; // time how long it takes to build a map b.iter(|| { let lucky = rng.gen_range(base, base + count); let found = type_finder.find(lucky).expect("find"); test::black_box(&found); }); }) } */ /* #[test] fn type_length_histogram() { setup(|type_information| { let mut lens: Vec = Vec::new(); lens.resize(1025, 0); // iterate over all the types let mut iter = type_information.iter(); while let Some(typ) = iter.next().expect("next type") { let mut len = typ.len() + 2; if len > 1024 { len = 1024; } lens[len] += 1; } for (len, count) in lens.as_slice().iter().enumerate() { println!("{}\t{}", len, count); } panic!(); }) } */