yaxpeax-arch-0.2.7/.cargo_vcs_info.json0000644000000001120000000000100134260ustar { "git": { "sha1": "eb673b89d75865e5e7d38d9ee706ff7b2f9aed8f" } } yaxpeax-arch-0.2.7/.gitignore000064400000000000000000000000240072674642500142400ustar 00000000000000/target/ Cargo.lock yaxpeax-arch-0.2.7/CHANGELOG000064400000000000000000000066740072674642500135030ustar 00000000000000## 0.3.0 TODO: Reader::next_n should return the number of items read as Err(ReadError::Incomplete(n)) if the buffer is exhausted TODO: Reader::offset should return an AddressDiff
, not a bare Address TODO: impls of `fn one` and `fn zero` so downstream users don't have to import num_traits directly ## 0.2.7 moved `AnnotatingDecoder` and its associated types to `annotation/`, for module-level documentation about that feature. yanked 0.2.6 because there was not yet a user of it other than myself, and it had this feature in the wrong location in the crate. ## 0.2.6 added `AnnotatingDecoder` and associated traits `FieldDescription` and `DescriptionSink` for architectures to report meanings for bit ranges in decoded instructions. added `NullSink`, with an `impl DescriptionSink for NullSink` - `NullSink` can always be used to discard instruction annotations. this is mostly useful for shared annotating and non-annotating decode logic. added a `docs/` directory for `yaxpeax-arch`: trip reports for `yaxpeax-arch` design. if `yaxpeax` eventually grows an RFC process one day, these are the kind of changes that would get RFC'd. added `docs/0001-AnnotatingDecoder.md`, describing motivation and implementation notes of `AnnotatingDecoder`. ## 0.2.5 added `yaxpeax-lc87` to the matrix ## 0.2.4 fix incorrect `Reader` impls of `offset` and `total_offset` on non-`u8` words ## 0.2.3 added `Reader` impls for `U8Reader` on `u16` addresses ## 0.2.2 added `ReaderBuilder` trait and impls for `U8Reader` on various address and word types. added documentation for `Reader`, `U8Reader`, and `ReaderBuilder`. avoid an unlikely violation of `core::ptr::offset` safety rules on 32-bit architectures. ## 0.2.1 updated architecture matrix ## 0.2.0 correct a bug in 0.1.0 that incorrectly bounded `DecodeError` and did not actually require `std::error::Error`. added a test that `std::error::Error` is actually required of `Arch::DecodeError` in non-std builds. ## 0.1.0 new trait `Reader` to provide a reader of `Arch`-defined `Word`s. in many cases it is acceptable for `Word` to be `u8`, but `yaxpeax-arch` provides pre-defined words `u8`, `U16le`, `U16be`, `U32le`, `U32be`, `U64le`, and `U64be`. `yaxpeax_arch::U8Reader` is a struct to read from `&[u8]` that implements `Reader` for all predefined words. it is suitable to read larger words if the minimum word size is still one byte. `Decoder` now decodes from a `Reader`, to prepare for ISAs where instruction sizes are not multiples of 8 bits. `yaxpeax_arch::DecodeError` now requires a `std::error::Error` impl for `std` builds, to support interop with the Rust `error` ecosystem. committed to `AddressDiff` being convertable to a primitive with `AddressDiff::to_const` - this addresses the need for hacks to translate an instruction length into a usize ## 0.0.5 swap the `termion` dependency for `crossterm`. this is motivated by improved cross-platform support (notably Windows) as well as removing a type parameter from `Colored` and `YaxColors`. ## 0.0.4 add `AddressDiff`. `LengthedInstruction::len` now return `AddressDiff`. the length of an instruction is the difference between two addresses, not itself an address. ## 0.0.3 `ColorSettings` gets a default impl ## 0.0.2 add `AddressDisplay` to provide a usable interface to display `Address` implementors. at the same time, remove `Address::stringy()`. it was a very bad interface, and will not be missed. ## 0.0.1 history starts here yaxpeax-arch-0.2.7/Cargo.toml0000644000000025130000000000100114330ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "yaxpeax-arch" version = "0.2.7" authors = ["iximeow "] description = "fundamental traits to describe an architecture in the yaxpeax project" keywords = ["disassembly", "disassembler"] license = "0BSD" repository = "https://git.iximeow.net/yaxpeax-arch/" [profile.release] lto = true [dependencies.crossterm] version = "0.19.0" optional = true [dependencies.num-traits] version = "0.2" default-features = false [dependencies.serde] version = "1.0" optional = true [dependencies.serde_derive] version = "1.0" optional = true [dev-dependencies.anyhow] version = "1.0.41" [dev-dependencies.thiserror] version = "1.0.26" [features] address-parse = [] colors = ["crossterm"] default = ["std", "use-serde", "colors", "address-parse"] std = [] use-serde = ["serde", "serde_derive"] yaxpeax-arch-0.2.7/Cargo.toml.orig000064400000000000000000000015450072674642500151500ustar 00000000000000[package] authors = [ "iximeow " ] description = "fundamental traits to describe an architecture in the yaxpeax project" edition = "2018" keywords = ["disassembly", "disassembler"] license = "0BSD" name = "yaxpeax-arch" repository = "https://git.iximeow.net/yaxpeax-arch/" version = "0.2.7" [dependencies] "num-traits" = { version = "0.2", default-features = false } "crossterm" = { version = "0.19.0", optional = true } "serde" = { version = "1.0", optional = true } "serde_derive" = { version = "1.0", optional = true } [dev-dependencies] anyhow = "1.0.41" thiserror = "1.0.26" [profile.release] lto = true [features] default = ["std", "use-serde", "colors", "address-parse"] std = [] # enables the (optional) use of Serde for bounds on # Arch and Arch::Address use-serde = ["serde", "serde_derive"] colors = ["crossterm"] address-parse = [] yaxpeax-arch-0.2.7/LICENSE000064400000000000000000000011730072674642500132630ustar 00000000000000Copyright (c) 2020 iximeow Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. yaxpeax-arch-0.2.7/README.md000064400000000000000000000152710072674642500135410ustar 00000000000000## yaxpeax-arch [![crate](https://img.shields.io/crates/v/yaxpeax-arch.svg?logo=rust)](https://crates.io/crates/yaxpeax-arch) [![documentation](https://docs.rs/yaxpeax-arch/badge.svg)](https://docs.rs/yaxpeax-arch) shared traits for architecture definitions, instruction decoders, and related interfaces for instruction decoders from the yaxpeax project. typically this crate is only interesting if you're writing code to operate on multiple architectures that all implement `yaxpeax-arch` traits. for example, [yaxpeax-dis](https://crates.io/crates/yaxpeax-dis) implements disassembly and display logic generic over the traits defined here, so adding a new decoder is usually only a one or two line addition. `yaxpeax-arch` has several crate features, which implementers are encouraged to also support: * `std`: opt-in for `std`-specific support - in this crate, `std` enables a [`std::error::Error`](https://doc.rust-lang.org/std/error/trait.Error.html) requirement on `DecodeError`, allowing users to `?`-unwrap decode results. * `colors`: enables (optional) [`crossterm`](https://docs.rs/crossterm/latest/crossterm/)-based ANSI colorization. default coloring rules are defined by [`ColorSettings`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/struct.ColorSettings.html), when enabled. * `address-parse`: enable a requirement that `yaxpeax_arch::Address` be parsable from `&str`. this is useful for use cases that, for example, read addresses from humans. * `use-serde`: enable [`serde`](https://docs.rs/serde/latest/serde/) serialization and deserialization bounds for types like `Address`. with all features disabled, `yaxpeax-arch`'s only direct dependency is `num-traits`, and is suitable for `#![no_std]` usage. ### design `yaxpeax-arch` has backwards-incompatible changes from time to time, but there's not much to make incompatible. the main benefit of this crate is the [`Arch`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/trait.Arch.html) trait, for other libraries to build architecture-agnostic functionality. nontrivial additions to `yaxpeax-arch` should include some discussion summarized by an addition to the crate [`docs/`](https://github.com/iximeow/yaxpeax-arch/tree/no-gods-no-/docs). you may ask, "where does discussion happen?", and the answer currently is in my (iximeow's) head, or various discord/irc/discord/email conversations. if there's need in the future, `yaxpeax` may develop a more consistent process. `yaxpeax-arch` intends to support ad-hoc development of architecture support. maintainers of various architectures' crates may not want to implement all available interfaces to a complete level of detail, and must not be required to. incomplete implementations may be an issue for downstream users, but library quality is mediated by human conversation, not `yaxpeax-arch` interfaces. extensions to these fundamental definitions should be considerate of partial and incomplete implementations. ### implementations there are numerous architectures for which decoders are implemented, at varying levels of completion. now and in the future, they will be enumerated here: | symbol | meaning | | ------ | ------- | | 🥳 | complete, reliable | | ⚠️| "complete", likely has gaps | | 🚧 | incomplete | | ❓ | unimplemented | | architecture | library | decode | tests | benchmarks | note | | ------------ | ------- | ------ | ----- | ---------- | ---- | | `x86_64` | [yaxpeax-x86](https://www.github.com/iximeow/yaxpeax-x86) | 🥳 | 🥳 | 🥳 | | | `x86:32` | [yaxpeax-x86](https://www.github.com/iximeow/yaxpeax-x86) | 🥳 | 🥳 | ❓ | sse and sse2 support cannot be disabled | | `x86:16` | [yaxpeax-x86](https://www.github.com/iximeow/yaxpeax-x86) | 🥳 | 🥳 | ❓ | instructions above the 8086 or 286 cannot be disabled | | `ia64` | [yaxpeax-ia64](https://www.github.com/iximeow/yaxpeax-ia64) | 🥳 | ⚠️ | ❓ | lack of a good oracle has complicated testing | | `armv7` | [yaxpeax-arm](https://www.github.com/iximeow/yaxpeax-arm) | 🚧 | 🚧 | ❓ | NEON is not yet supported | | `armv8` | [yaxpeax-arm](https://www.github.com/iximeow/yaxpeax-arm) | 🚧 | 🚧 | ❓ | a32 decoding is not yet supported, NEON is not supported | | `m16c` | [yaxpeax-m16c](https://www.github.com/iximeow/yaxpeax-m16c) | ⚠️ | 🚧 | ❓ | | | `mips` | [yaxpeax-mips](https://www.github.com/iximeow/yaxpeax-mips) | 🚧 | 🚧 | ❓ | | | `msp430` | [yaxpeax-msp430](https://www.github.com/iximeow/yaxpeax-msp430) | 🚧 | 🚧 | ❓ | | | `pic17` | [yaxpeax-pic17](https://www.github.com/iximeow/yaxpeax-pic17) | 🚧 | 🚧 | ❓ | | | `pic18` | [yaxpeax-pic18](https://www.github.com/iximeow/yaxpeax-pic18) | 🚧 | 🚧 | ❓ | | | `pic24` | [yaxpeax-pic24](https://www.github.com/iximeow/yaxpeax-pic24) | ❓ | ❓ | ❓ | exists, but only decodes `NOP` | | `sm83` | [yaxpeax-sm83](https://www.github.com/iximeow/yaxpeax-sm83) | 🥳 | 🚧 | ❓ | | | `avr` | [yaxpeax-avr](https://github.com/The6P4C/yaxpeax-avr) | 🥳 | 🚧 | ❓ | contributed by [@the6p4c](https://twitter.com/The6P4C)! | | `sh`/`sh2`/`j2`/`sh3`/`sh4` | [yaxpeax-superh](https://git.sr.ht/~nabijaczleweli/yaxpeax-superh) | 🥳 | 🚧 | ❓ | contributed by [наб](https://nabijaczleweli.xyz) | | `MOS 6502` | [yaxpeax-6502](https://github.com/cr1901/yaxpeax-6502) | ⚠️ | ❓ | ❓ | contributed by [@cr1901](https://www.twitter.com/cr1901) | | `lc87` | [yaxpeax-lc87](https://www.github.com/iximeow/yaxpeax-lc87) | 🥳 | ⚠️ | ❓ | | #### feature support `yaxpeax-arch` defines a few typically-optional features that decoders can also implement, in addition to simple `(bytes) -> instruction` decoding. these are `yaxpeax-arch` traits (or collections thereof) which architectures implement, not crate features. `description_spans`: implementation of [`AnnotatingDecoder`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/trait.AnnotatingDecoder.html), to decode instructions with bit-level details of what incoming bitstreams mean. `contextualize`: implementation of [`ShowContextual`](https://docs.rs/yaxpeax-arch/latest/yaxpeax_arch/trait.ShowContextual.html), to display instructions with user-defined information in place of default instruction data. typically expected to show label names instead of relative branch addresses. **i do not recommend implementing this trait**, it needs significant reconsideration. | architecture | `description_spans` | `contextualize` | | ------------ | ------------------- | --------------- | | `x86_64` | 🥳 | ❓ | | `ia64` | ⚠️ | ❓ | | `msp430` | 🥳 | ❓ | ### mirrors the canonical copy of `yaxpeax-arch` is at [https://git.iximeow.net/yaxpeax-arch](https://git.iximeow.net/yaxpeax-arch). `yaxpeax-arch` is also mirrored on GitHub at [https://www.github.com/iximeow/yaxpeax-arch](https://www.github.com/iximeow/yaxpeax-arch). yaxpeax-arch-0.2.7/docs/0001-AnnotatingDecoder.md000064400000000000000000000157410072674642500175040ustar 00000000000000## `DescriptionSink` most architectures' machine code packs interesting meanings into specific bit fields, and one of the more important tasks of the yaxpeax decoders is to unpack these into opcodes, operands, and other instruction data for later use. in the worst case, some architectures - typically interpreted bytecodes - do less bit-packing and simply map bytes to instructions. the yaxpeax decoders' primary role is to handle this unpacking into user code-friendly structs. i want decoders to be able to report the meaning of bitfields too, so user code can mark up bit streams. implementing this capability should (borderline-"must") not regress performance for decoders that do not use it. as a constraint, this is surprisingly restrictive! a. it rules out a parameter to [`Decoder::decode_into`](https://docs.rs/yaxpeax-arch/0.2.5/yaxpeax_arch/trait.Decoder.html#tymethod.decode_into): an ignored or unused parameter can still change how `decode_into` inlines. b. it rules out extra state on `Decoder` impls: writing to an unread `Vec` is still extra work at decode time. decoders other than x86 are less performance-sensitive, so **light** regressions in performance may be tolerable. i would also like to: c. not require decoders implement this to participate in code analysis [`yaxpeax-core`](https://github.com/iximeow/yaxpeax-core/) provides. d. re-use existing decode logic -- requiring myself and other decoder authors to write everything twice would be miserable. the point `c` suggests not adding this capability to existing traits. taken together, these constraints point towards a _new_ trait that _could_ be implemented as an independent copy of decode logic, like: ```rust trait AnnotatingDecoder { fn decode_with_annotation< T: Reader, >(&mut self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>; } ``` but for implementations, it's easiest to tack this onto an existing `Arch`'s `InstDecoder`. point `b` means no new state, so wherever details about a span of bits are recorded, it should be an additional `&mut` parameter. then, if that parameter is an impl of some `Sink` trait, `yaxpeax_arch` can provide a no-op implementation of the `Sink` and let call sites be eliminated for non-annotating decodes. taken together, this ends up adding three traits: ```rust pub trait DescriptionSink { fn record(&mut self, start: u32, end: u32, description: Descriptor); } pub trait FieldDescription { fn id(&self) -> u32; } pub trait AnnotatingDecoder { type FieldDescription: FieldDescription + Clone + Display + PartialEq; fn decode_with_annotation< T: Reader, S: DescriptionSink >(&self, inst: &mut A::Instruction, words: &mut T, sink: &mut S) -> Result<(), A::DecodeError>; } ``` where `FieldDescription` lets callers that operate generically over spans do *something* with them. implementations can use `id` to tag descriptions that should be ordered together, regardless of the actual order the decoder reports them in. for some architectures, fields parsed later in decoding may influence the understanding of earlier fields, so reporting spans in `id`-order up front is an unreasonable burden. consider an x86 instruction, `660f6ec0` - the leading `66` is an operand-size override, but only after reading `0f6e` is it known that that prefix is changing the operands from `mm`/`dword` registers to `xmm`/`qword` registers. in fact this is only known _after_ reporting the opcode of `0f6e`, too. `start` and `end` are bit offsets where a `description` applies. `description`s can overlap in part, or in full. exact bit order is known only by the architecture being decoded; is the order `0-7,8-15,16-23,24-31`, `7-0,15-8,23-16,31-24`, or something else? i'm not sure trying to encode that in `yaxpeax-arch` traits is useful right now. `start` and `end` are `u32` because in my professional opinion, `u16` is cursed, `u8` isn't large enough, and `u32` is the next smallest size. `id()` returns a `u32` because i never want to think of `id` space constraints; even if `id` encoded a `major.minor`-style pair of ordering components, the most constrained layout would be `u16.u16` for at most 65536 values in major or minor. that's a big instruction. ### implementation i've added WIP support for span reporting to `msp430`, `ia64`, and `x86` decoders. i extended `yaxpeax-dis` to [make pretty lines](https://twitter.com/iximeow/status/1423930207614889984). more could be said about that; `id`-order is expected to be, roughtly, the order an instruction is decoded. some instructions sets keep the "first" bits as the low-order bits, some others use the higher bits first. so respecting `id`-order necessarily means some instruction sets will have fields "backwards" and make lines extra confusing. decoders probably ought to indicate boundaries for significant parts of decoding, lest large instructions [like itanium](https://twitter.com/iximeow/status/1424092536071618561) be a nebulous mess. maybe `FieldDescription` could have an `is_separator()` to know when an element (and its bit range) indicates the end of part of an instruction? for the most part, things work great. `yaxpeax-x86` had a minor performance regression. tracking it down wasn't too bad: the first one was because `sink` is a fifth argument for a non-inlined function. at this point most ABIs start spilling to memory. so an unused `sink` caused an extra stack write. this was a measurable overhead. the second regression was again pretty simple looking at `disas-bench` builds: ```sh diff \ ` # a typical non-formatting build, from cratesio yaxpeax-x86 1.0.4 ` \ <(objdump -d bench-yaxpeax-no-fmt | grep -o ' .*long_mode.*>:') ` # a non-formatting build, from the local patch of yaxpeax-x86 with annotation reported to a no-op sink ` \ <(objdump -d bench-yaxpeax-no-fmt-no-annotation | grep -o ' .*long_mode.*>:') ``` the entire diff output: ```diff > <_ZN11yaxpeax_x869long_mode8read_sib17hdc339ef7a182098aE>: ``` indeed, [`read_sib`](https://github.com/iximeow/yaxpeax-x86/blob/4371ed02ac30cb56ec4ddbf60c87e85c183d860b/src/long_mode/mod.rs#L5769-L5770) is not written as `inline(always)`, so it's possible this might not get inlined sometimes. since the only difference to `read_sib` is an extra parameter, for which all calls are no-ops that ignore arguments, i'm surprised to see the change, anyway. adding `#[inline(always)]` to `read_sib` returned `yaxpeax-x86` to "same-as-before" decode throughput. in the process, i found a slight optimization for `read_sib` that removed a few extra branches from the function. the scrutiny was good after all. ### conclusion in summary, it works. it doesn't slow down callers that don't need spans of information. decoders can implement it optionally and at their leisure, without being ineligible for analysis-oriented libraries. this is almost certainly going to be in `yaxpeax-arch 0.2.6` with implementations trickling into decoders whenever it seems like fun. yaxpeax-arch-0.2.7/rust-toolchain000064400000000000000000000000070072674642500151470ustar 000000000000001.54.0 yaxpeax-arch-0.2.7/src/address/mod.rs000064400000000000000000000301170072674642500156170ustar 00000000000000use core::hash::Hash; use core::fmt; use core::ops::{Add, Sub, AddAssign, SubAssign}; use num_traits::identities; use num_traits::{Bounded, WrappingAdd, WrappingSub, CheckedAdd, Zero, One}; #[cfg(feature="use-serde")] use serde::{Deserialize, Serialize}; #[cfg(feature="use-serde")] pub trait AddressDiffAmount: Copy + Clone + PartialEq + PartialOrd + Eq + Ord + identities::Zero + identities::One + Serialize + for<'de> Deserialize<'de> {} #[cfg(not(feature="use-serde"))] pub trait AddressDiffAmount: Copy + Clone + PartialEq + PartialOrd + Eq + Ord + identities::Zero + identities::One {} impl AddressDiffAmount for u64 {} impl AddressDiffAmount for u32 {} impl AddressDiffAmount for u16 {} impl AddressDiffAmount for usize {} /// a struct describing the differece between some pair of `A: Address`. this is primarily useful /// in describing the size of an instruction, or the relative offset of a branch. /// /// for any address type `A`, the following must hold: /// ```rust /// use yaxpeax_arch::AddressBase; /// fn diff_check(left: A, right: A) { /// let diff = left.diff(&right); /// if let Some(offset) = diff { /// assert_eq!(left.wrapping_offset(offset), right); /// } /// } /// ``` /// /// which is to say, `yaxpeax` assumes associativity holds when `diff` yields a `Some`. #[cfg(feature="use-serde")] #[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Deserialize)] pub struct AddressDiff { // the AddressDiffAmount trait fools `Deserialize`'s proc macro, so we have to explicitly write // the bound serde should use. #[serde(bound(deserialize = "T::Diff: AddressDiffAmount"))] amount: T::Diff, } /// a struct describing the differece between some pair of `A: Address`. this is primarily useful /// in describing the size of an instruction, or the relative offset of a branch. /// /// for any address type `A`, the following must hold: /// ```rust /// use yaxpeax_arch::AddressBase; /// fn diff_check(left: A, right: A) { /// let diff = left.diff(&right); /// if let Some(offset) = diff { /// assert_eq!(left.wrapping_offset(offset), right); /// } /// } /// ``` /// /// which is to say, `yaxpeax` assumes associativity holds when `diff` yields a `Some`. #[cfg(not(feature="use-serde"))] #[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)] pub struct AddressDiff { amount: T::Diff, } impl AddressDiff { pub fn from_const(amount: T::Diff) -> Self { AddressDiff { amount } } pub fn to_const(&self) -> T::Diff { self.amount } } impl fmt::Debug for AddressDiff where T::Diff: fmt::Debug { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "AddressDiff({:?})", self.amount) } } impl AddressDiff { pub fn one() -> Self { AddressDiff { amount: ::Diff::one(), } } pub fn zero() -> Self { AddressDiff { amount: ::Diff::zero(), } } } impl Sub> for u16 { type Output = Self; fn sub(self, other: AddressDiff) -> Self::Output { self - other.amount } } impl Sub> for u32 { type Output = Self; fn sub(self, other: AddressDiff) -> Self::Output { self - other.amount } } impl Sub> for u64 { type Output = Self; fn sub(self, other: AddressDiff) -> Self::Output { self - other.amount } } impl Sub> for usize { type Output = Self; fn sub(self, other: AddressDiff) -> Self::Output { self - other.amount } } impl Add> for u16 { type Output = Self; fn add(self, other: AddressDiff) -> Self::Output { self + other.amount } } impl Add> for u32 { type Output = Self; fn add(self, other: AddressDiff) -> Self::Output { self + other.amount } } impl Add> for u64 { type Output = Self; fn add(self, other: AddressDiff) -> Self::Output { self + other.amount } } impl Add> for usize { type Output = Self; fn add(self, other: AddressDiff) -> Self::Output { self + other.amount } } impl SubAssign> for u16 { fn sub_assign(&mut self, other: AddressDiff) { *self -= other.amount; } } impl SubAssign> for u32 { fn sub_assign(&mut self, other: AddressDiff) { *self -= other.amount; } } impl SubAssign> for u64 { fn sub_assign(&mut self, other: AddressDiff) { *self -= other.amount; } } impl SubAssign> for usize { fn sub_assign(&mut self, other: AddressDiff) { *self -= other.amount; } } impl AddAssign> for u16 { fn add_assign(&mut self, other: AddressDiff) { *self += other.amount; } } impl AddAssign> for u32 { fn add_assign(&mut self, other: AddressDiff) { *self += other.amount; } } impl AddAssign> for u64 { fn add_assign(&mut self, other: AddressDiff) { *self += other.amount; } } impl AddAssign> for usize { fn add_assign(&mut self, other: AddressDiff) { *self += other.amount; } } pub trait AddressBase where Self: AddressDisplay + Copy + Clone + Sized + Hash + Ord + Eq + PartialEq + Bounded + Add, Output=Self> + Sub, Output=Self> + AddAssign> + SubAssign> + identities::Zero + Hash { type Diff: AddressDiffAmount; fn to_linear(&self) -> usize; /// compute the `AddressDiff` beetween `self` and `other`. /// /// may return `None` if the two addresses aren't comparable. for example, if a pair of /// addresses are a data-space address and code-space address, there may be no scalar that can /// describe the difference between them. fn diff(&self, other: &Self) -> Option>; /* { Some(AddressDiff { amount: self.wrapping_sub(other) }) } */ fn wrapping_offset(&self, other: AddressDiff) -> Self; /* { self.wrapping_add(&other.amount) } */ fn checked_offset(&self, other: AddressDiff) -> Option; /* { self.checked_add(&other.amount) } */ } #[cfg(all(feature="use-serde", feature="address-parse"))] pub trait Address where Self: AddressBase + Serialize + for<'de> Deserialize<'de> + AddrParse { } #[cfg(all(feature="use-serde", not(feature="address-parse")))] pub trait Address where Self: AddressBase + Serialize + for<'de> Deserialize<'de> { } #[cfg(all(not(feature="use-serde"), feature="address-parse"))] pub trait Address where Self: AddressBase + AddrParse { } #[cfg(all(not(feature="use-serde"), not(feature="address-parse")))] pub trait Address where Self: AddressBase { } impl AddressBase for u16 { type Diff = Self; fn to_linear(&self) -> usize { *self as usize } fn diff(&self, other: &Self) -> Option> { Some(AddressDiff { amount: self.wrapping_sub(other) }) } fn wrapping_offset(&self, other: AddressDiff) -> Self { self.wrapping_add(&other.amount) } fn checked_offset(&self, other: AddressDiff) -> Option { self.checked_add(&other.amount) } } impl Address for u16 {} impl AddressBase for u32 { type Diff = Self; fn to_linear(&self) -> usize { *self as usize } fn diff(&self, other: &Self) -> Option> { Some(AddressDiff { amount: self.wrapping_sub(other) }) } fn wrapping_offset(&self, other: AddressDiff) -> Self { self.wrapping_add(&other.amount) } fn checked_offset(&self, other: AddressDiff) -> Option { self.checked_add(&other.amount) } } impl Address for u32 {} impl AddressBase for u64 { type Diff = Self; fn to_linear(&self) -> usize { *self as usize } fn diff(&self, other: &Self) -> Option> { Some(AddressDiff { amount: self.wrapping_sub(other) }) } fn wrapping_offset(&self, other: AddressDiff) -> Self { self.wrapping_add(&other.amount) } fn checked_offset(&self, other: AddressDiff) -> Option { self.checked_add(&other.amount) } } impl Address for u64 {} impl AddressBase for usize { type Diff = Self; fn to_linear(&self) -> usize { *self } fn diff(&self, other: &Self) -> Option> { Some(AddressDiff { amount: self.wrapping_sub(other) }) } fn wrapping_offset(&self, other: AddressDiff) -> Self { self.wrapping_add(&other.amount) } fn checked_offset(&self, other: AddressDiff) -> Option { self.checked_add(&other.amount) } } impl Address for usize {} pub trait AddressDisplay { type Show: fmt::Display; fn show(&self) -> Self::Show; } impl AddressDisplay for usize { type Show = AddressDisplayUsize; fn show(&self) -> AddressDisplayUsize { AddressDisplayUsize(*self) } } #[repr(transparent)] pub struct AddressDisplayUsize(usize); impl fmt::Display for AddressDisplayUsize { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:#x}", self.0) } } impl AddressDisplay for u64 { type Show = AddressDisplayU64; fn show(&self) -> AddressDisplayU64 { AddressDisplayU64(*self) } } #[repr(transparent)] pub struct AddressDisplayU64(u64); impl fmt::Display for AddressDisplayU64 { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:#x}", self.0) } } impl AddressDisplay for u32 { type Show = AddressDisplayU32; fn show(&self) -> AddressDisplayU32 { AddressDisplayU32(*self) } } #[repr(transparent)] pub struct AddressDisplayU32(u32); impl fmt::Display for AddressDisplayU32 { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:#x}", self.0) } } impl AddressDisplay for u16 { type Show = AddressDisplayU16; fn show(&self) -> AddressDisplayU16 { AddressDisplayU16(*self) } } #[repr(transparent)] pub struct AddressDisplayU16(u16); impl fmt::Display for AddressDisplayU16 { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:#x}", self.0) } } /* * TODO: this should be FromStr. * that would require newtyping address primitives, though * * this is not out of the question, BUT is way more work than * i want to put in right now * * this is one of those "clean it up later" situations */ #[cfg(feature="address-parse")] use core::str::FromStr; #[cfg(feature="address-parse")] pub trait AddrParse: Sized { type Err; fn parse_from(s: &str) -> Result; } #[cfg(feature="address-parse")] impl AddrParse for usize { type Err = core::num::ParseIntError; fn parse_from(s: &str) -> Result { if s.starts_with("0x") { usize::from_str_radix(&s[2..], 16) } else { usize::from_str(s) } } } #[cfg(feature="address-parse")] impl AddrParse for u64 { type Err = core::num::ParseIntError; fn parse_from(s: &str) -> Result { if s.starts_with("0x") { u64::from_str_radix(&s[2..], 16) } else { u64::from_str(s) } } } #[cfg(feature="address-parse")] impl AddrParse for u32 { type Err = core::num::ParseIntError; fn parse_from(s: &str) -> Result { if s.starts_with("0x") { u32::from_str_radix(&s[2..], 16) } else { u32::from_str(s) } } } #[cfg(feature="address-parse")] impl AddrParse for u16 { type Err = core::num::ParseIntError; fn parse_from(s: &str) -> Result { if s.starts_with("0x") { u16::from_str_radix(&s[2..], 16) } else { u16::from_str(s) } } } yaxpeax-arch-0.2.7/src/annotation/mod.rs000064400000000000000000000142400072674642500163430ustar 00000000000000//! traits (and convenient impls) for decoders that also produce descriptions of parsed bit fields. //! //! the design of this API is discussed in [`yaxpeax-arch` //! documentation](https://github.com/iximeow/yaxpeax-arch/blob/no-gods-no-/docs/0001-AnnotatingDecoder.md#descriptionsink). //! //! ## usage //! //! [`AnnotatingDecoder::decode_with_annotation`] decodes an instruction much like //! [`crate::Decoder::decode_into`], but also reports descriptions of bit fields to a provided //! [`DescriptionSink`]. [`VecSink`] is likely the `DescriptionSink` of interest to retain fields; //! decoders are not required to make any guarantees about the order of descriptions, either by the //! description's associated [`FieldDescription::id`], or with respect to the bits a //! `FieldDescription` is reported against. fields may be described by multiple `FieldDescription` //! with matching `id` and `desc` -- this is to describe data in an instruction where //! non-contiguous bits are taken together for a single detail. for these cases, the various //! `FieldDescription` must compare equal, and users of `yaxpeax-arch` can rely on this equivalence //! for grouping bit ranges. //! //! in a generic setting, there isn't much to do with a `FieldDescription` other than display it. a //! typical use might look something like: //! ``` //! fn show_field_descriptions(decoder: A::Decoder, buf: &[u8]) //! where //! A::Decoder: AnnotatingDecoder, //! A::Instruction: fmt::Display, for<'data> U8Reader<'data>: Reader, //! { //! let mut inst = A::Instruction::default(); //! let mut reader = U8Reader::new(buf); //! let mut sink: VecSink<>::FieldDescription> = VecSink::new(); //! //! decoder.decode_with_annotation(&mut inst, &mut reader, &mut sink).unwrap(); //! //! println!("decoded instruction {}", inst); //! for (start, end, desc) in sink.records.iter() { //! println(" bits [{}, {}]: {}", start, end, desc); //! } //! } //! ``` //! //! note that the range `[start, end]` for a reported span is _inclusive_. the `end`-th bit of a //! an instruction's bit stream is described by the description. //! //! ## implementation guidance //! //! the typical implementation pattern is that an architecture's `Decoder` implements [`crate::Decoder`] //! _and_ [`AnnotatingDecoder`], then callers are free to choose which style of decoding they want. //! [`NullSink`] has a blanket impl of [`DescriptionSink`] for all possible descriptions, and //! discards reported field descriptions. `decode_with_annotation` with annotations reported to a //! `NullSink` must be functionally identical to a call to `Decoder::decode_into`. //! //! the important points: //! //! * `AnnotatingDecoder` is an **optional** implementation for decoders. //! * `FieldDescription` in general is oriented towards human-directed output, but implementations //! can be as precise as they want. //! * since bit/byte order varies from architecture to architecture, a field's `start` and `end` //! are defined with some ordering from the corresponding decoder crate. crates should describe the //! bit ordering they select, and where possible, the bit ordering they describe should match //! relevant ISA mauals. //! * `FieldDescription` that return true for [`FieldDescription::is_separator`] are an exception //! to bit span inclusivity: for these descriptions, the bit range should be `[b, b]` where `b` is //! the last bit before the boundary being delimited. unlike other descriptions, `is_separator` //! descriptions describe the space between bits `b` and `b+1`. //! * if a description is to cover multiple bit fields, the reported `FieldDescription` must //! be identical on `id` and `desc` for all involved bit fields. use crate::{Arch, Reader}; use core::fmt::Display; /// implementors of `DescriptionSink` receive descriptions of an instruction's disassembly process /// and relevant offsets in the bitstream being decoded. descriptions are archtecture-specific, and /// architectures are expected to be able to turn the bit-level `start` and `width` values into a /// meaningful description of bits in the original instruction stream. pub trait DescriptionSink { /// inform this `DescriptionSink` of a `description` that was informed by bits `start` to /// `end` from the start of an instruction's decoding. `start` and `end` are only relative the /// instruction being decoded when this sink `DescriptionSink` provided, so they will have no /// relation to the position in an underlying data stream used for past or future instructions. fn record(&mut self, start: u32, end: u32, description: Descriptor); } pub struct NullSink; impl DescriptionSink for NullSink { fn record(&mut self, _start: u32, _end: u32, _description: T) { } } #[cfg(feature = "std")] pub struct VecSink { pub records: std::vec::Vec<(u32, u32, T)> } #[cfg(feature = "std")] impl VecSink { pub fn new() -> Self { VecSink { records: std::vec::Vec::new() } } } #[cfg(feature = "std")] impl DescriptionSink for VecSink { fn record(&mut self, start: u32, end: u32, description: T) { self.records.push((start, end, description)); } } pub trait FieldDescription { fn id(&self) -> u32; fn is_separator(&self) -> bool; } /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s, with the /// decoder able to report descriptions of bits or fields in the instruction to a sink implementing /// [`DescriptionSink`]. the sink may be [`NullSink`] to discard provided data. decoding with a /// `NullSink` should behave identically to `Decoder::decode_into`. implementors are recommended to /// implement `Decoder::decode_into` as a call to `AnnotatingDecoder::decode_with_annotation` if /// implementing both traits. pub trait AnnotatingDecoder { type FieldDescription: FieldDescription + Clone + Display + PartialEq; fn decode_with_annotation< T: Reader, S: DescriptionSink >(&self, inst: &mut A::Instruction, words: &mut T, sink: &mut S) -> Result<(), A::DecodeError>; } yaxpeax-arch-0.2.7/src/color.rs000064400000000000000000000310760072674642500145360ustar 00000000000000use core::fmt::{self, Display, Formatter}; #[cfg(feature="colors")] use crossterm::style; #[cfg(feature="colors")] pub enum Colored { Color(T, style::Color), Just(T) } #[cfg(feature="colors")] impl Display for Colored { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { match self { Colored::Color(t, before) => { write!(fmt, "{}", style::style(t).with(*before)) }, Colored::Just(t) => { write!(fmt, "{}", t) } } } } #[cfg(not(feature="colors"))] pub enum Colored { Just(T) } #[cfg(not(feature="colors"))] impl Display for Colored { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { match self { Colored::Just(t) => { write!(fmt, "{}", t) } } } } pub trait YaxColors { fn arithmetic_op(&self, t: T) -> Colored; fn stack_op(&self, t: T) -> Colored; fn nop_op(&self, t: T) -> Colored; fn stop_op(&self, t: T) -> Colored; fn control_flow_op(&self, t: T) -> Colored; fn data_op(&self, t: T) -> Colored; fn comparison_op(&self, t: T) -> Colored; fn invalid_op(&self, t: T) -> Colored; fn platform_op(&self, t: T) -> Colored; fn misc_op(&self, t: T) -> Colored; fn register(&self, t: T) -> Colored; fn program_counter(&self, t: T) -> Colored; fn number(&self, t: T) -> Colored; fn zero(&self, t: T) -> Colored; fn one(&self, t: T) -> Colored; fn minus_one(&self, t: T) -> Colored; fn address(&self, t: T) -> Colored; fn symbol(&self, t: T) -> Colored; fn function(&self, t: T) -> Colored; } pub struct NoColors; impl YaxColors for NoColors { fn arithmetic_op(&self, t: T) -> Colored { Colored::Just(t) } fn stack_op(&self, t: T) -> Colored { Colored::Just(t) } fn nop_op(&self, t: T) -> Colored { Colored::Just(t) } fn stop_op(&self, t: T) -> Colored { Colored::Just(t) } fn control_flow_op(&self, t: T) -> Colored { Colored::Just(t) } fn data_op(&self, t: T) -> Colored { Colored::Just(t) } fn comparison_op(&self, t: T) -> Colored { Colored::Just(t) } fn invalid_op(&self, t: T) -> Colored { Colored::Just(t) } fn platform_op(&self, t: T) -> Colored { Colored::Just(t) } fn misc_op(&self, t: T) -> Colored { Colored::Just(t) } fn register(&self, t: T) -> Colored { Colored::Just(t) } fn program_counter(&self, t: T) -> Colored { Colored::Just(t) } fn number(&self, t: T) -> Colored { Colored::Just(t) } fn zero(&self, t: T) -> Colored { Colored::Just(t) } fn one(&self, t: T) -> Colored { Colored::Just(t) } fn minus_one(&self, t: T) -> Colored { Colored::Just(t) } fn address(&self, t: T) -> Colored { Colored::Just(t) } fn symbol(&self, t: T) -> Colored { Colored::Just(t) } fn function(&self, t: T) -> Colored { Colored::Just(t) } } pub trait Colorize { fn colorize(&self, colors: &Y, out: &mut T) -> fmt::Result; } #[cfg(feature="colors")] pub use termion_color::ColorSettings; #[cfg(feature="colors")] mod termion_color { use core::fmt::Display; use crossterm::style; use serde::Serialize; use crate::color::{Colored, YaxColors}; #[cfg(feature="use-serde")] impl Serialize for ColorSettings { fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeStruct; let s = serializer.serialize_struct("ColorSettings", 0)?; s.end() } } pub struct ColorSettings { arithmetic: style::Color, stack: style::Color, nop: style::Color, stop: style::Color, control: style::Color, data: style::Color, comparison: style::Color, invalid: style::Color, platform: style::Color, misc: style::Color, register: style::Color, program_counter: style::Color, number: style::Color, zero: style::Color, one: style::Color, minus_one: style::Color, function: style::Color, symbol: style::Color, address: style::Color, } impl Default for ColorSettings { fn default() -> ColorSettings { ColorSettings { arithmetic: style::Color::Yellow, stack: style::Color::DarkMagenta, nop: style::Color::DarkBlue, stop: style::Color::Red, control: style::Color::DarkGreen, data: style::Color::Magenta, comparison: style::Color::DarkYellow, invalid: style::Color::DarkRed, platform: style::Color::DarkCyan, misc: style::Color::Cyan, register: style::Color::DarkCyan, program_counter: style::Color::DarkRed, number: style::Color::White, zero: style::Color::White, one: style::Color::White, minus_one: style::Color::White, function: style::Color::Green, symbol: style::Color::Green, address: style::Color::DarkGreen, } } } impl YaxColors for ColorSettings { fn arithmetic_op(&self, t: T) -> Colored { Colored::Color(t, self.arithmetic) } fn stack_op(&self, t: T) -> Colored { Colored::Color(t, self.stack) } fn nop_op(&self, t: T) -> Colored { Colored::Color(t, self.nop) } fn stop_op(&self, t: T) -> Colored { Colored::Color(t, self.stop) } fn control_flow_op(&self, t: T) -> Colored { Colored::Color(t, self.control) } fn data_op(&self, t: T) -> Colored { Colored::Color(t, self.data) } fn comparison_op(&self, t: T) -> Colored { Colored::Color(t, self.comparison) } fn invalid_op(&self, t: T) -> Colored { Colored::Color(t, self.invalid) } fn misc_op(&self, t: T) -> Colored { Colored::Color(t, self.misc) } fn platform_op(&self, t: T) -> Colored { Colored::Color(t, self.platform) } fn register(&self, t: T) -> Colored { Colored::Color(t, self.register) } fn program_counter(&self, t: T) -> Colored { Colored::Color(t, self.program_counter) } fn number(&self, t: T) -> Colored { Colored::Color(t, self.number) } fn zero(&self, t: T) -> Colored { Colored::Color(t, self.zero) } fn one(&self, t: T) -> Colored { Colored::Color(t, self.one) } fn minus_one(&self, t: T) -> Colored { Colored::Color(t, self.minus_one) } fn address(&self, t: T) -> Colored { Colored::Color(t, self.address) } fn symbol(&self, t: T) -> Colored { Colored::Color(t, self.symbol) } fn function(&self, t: T) -> Colored { Colored::Color(t, self.function) } } impl <'a> YaxColors for Option<&'a ColorSettings> { fn arithmetic_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.arithmetic_op(t) } None => { Colored::Just(t) } } } fn stack_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.stack_op(t) } None => { Colored::Just(t) } } } fn nop_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.nop_op(t) } None => { Colored::Just(t) } } } fn stop_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.stop_op(t) } None => { Colored::Just(t) } } } fn control_flow_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.control_flow_op(t) } None => { Colored::Just(t) } } } fn data_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.data_op(t) } None => { Colored::Just(t) } } } fn comparison_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.comparison_op(t) } None => { Colored::Just(t) } } } fn invalid_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.invalid_op(t) } None => { Colored::Just(t) } } } fn misc_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.misc_op(t) } None => { Colored::Just(t) } } } fn platform_op(&self, t: T) -> Colored { match self { Some(colors) => { colors.platform_op(t) } None => { Colored::Just(t) } } } fn register(&self, t: T) -> Colored { match self { Some(colors) => { colors.register(t) } None => { Colored::Just(t) } } } fn program_counter(&self, t: T) -> Colored { match self { Some(colors) => { colors.program_counter(t) } None => { Colored::Just(t) } } } fn number(&self, t: T) -> Colored { match self { Some(colors) => { colors.number(t) } None => { Colored::Just(t) } } } fn zero(&self, t: T) -> Colored { match self { Some(colors) => { colors.zero(t) } None => { Colored::Just(t) } } } fn one(&self, t: T) -> Colored { match self { Some(colors) => { colors.one(t) } None => { Colored::Just(t) } } } fn minus_one(&self, t: T) -> Colored { match self { Some(colors) => { colors.minus_one(t) } None => { Colored::Just(t) } } } fn address(&self, t: T) -> Colored { match self { Some(colors) => { colors.address(t) } None => { Colored::Just(t) } } } fn symbol(&self, t: T) -> Colored { match self { Some(colors) => { colors.symbol(t) } None => { Colored::Just(t) } } } fn function(&self, t: T) -> Colored { match self { Some(colors) => { colors.function(t) } None => { Colored::Just(t) } } } } } /* * can this be a derivable trait or something? */ /* impl Display for T { fn fmt(&self, fmt: &mut Formatter) -> fmt::Result { self.colorize(None, fmt) } } */ /* * and make this auto-derive from a ShowContextual impl? */ /* impl Colorize for T where T: ShowContextual { fn colorize(&self, colors: Option<&ColorSettings>, fmt: &mut Formatter) -> fmt::Result { self.contextualize(colors, None, fmt) } } */ yaxpeax-arch-0.2.7/src/display.rs000064400000000000000000000067570072674642500150750ustar 00000000000000use crate::YaxColors; use core::fmt; use core::num::Wrapping; use core::ops::Neg; pub enum NumberStyleHint { Signed, HexSigned, SignedWithSign, HexSignedWithSign, SignedWithSignSplit, HexSignedWithSignSplit, Unsigned, HexUnsigned, UnsignedWithSign, HexUnsignedWithSign } pub fn format_number_i32(colors: &Y, f: &mut W, i: i32, hint: NumberStyleHint) -> fmt::Result { match hint { NumberStyleHint::Signed => { write!(f, "{}", colors.number(i)) }, NumberStyleHint::HexSigned => { write!(f, "{}", colors.number(signed_i32_hex(i))) }, NumberStyleHint::Unsigned => { write!(f, "{}", colors.number(i as u32)) }, NumberStyleHint::HexUnsigned => { write!(f, "{}", colors.number(u32_hex(i as u32))) }, NumberStyleHint::SignedWithSignSplit => { if i == core::i32::MIN { write!(f, "- {}", colors.number("2147483647")) } else if i < 0 { write!(f, "- {}", colors.number(-Wrapping(i))) } else { write!(f, "+ {}", colors.number(i)) } } NumberStyleHint::HexSignedWithSignSplit => { if i == core::i32::MIN { write!(f, "- {}", colors.number("0x7fffffff")) } else if i < 0 { write!(f, "- {}", colors.number(u32_hex((-Wrapping(i)).0 as u32))) } else { write!(f, "+ {}", colors.number(u32_hex(i as u32))) } }, NumberStyleHint::HexSignedWithSign => { write!(f, "{}", signed_i32_hex(i)) }, NumberStyleHint::SignedWithSign => { write!(f, "{:+}", i) } NumberStyleHint::HexUnsignedWithSign => { write!(f, "{:+#x}", i as u32) }, NumberStyleHint::UnsignedWithSign => { write!(f, "{:+}", i as u32) } } } pub struct SignedHexDisplay { value: T, negative: bool } impl fmt::Display for SignedHexDisplay where Wrapping: Neg, as Neg>::Output: fmt::LowerHex { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { if self.negative { write!(f, "-{:#x}", -Wrapping(self.value)) } else { write!(f, "{:#x}", self.value) } } } pub fn u8_hex(value: u8) -> SignedHexDisplay { SignedHexDisplay { value: value as i8, negative: false, } } pub fn signed_i8_hex(imm: i8) -> SignedHexDisplay { SignedHexDisplay { value: imm, negative: imm < 0, } } pub fn u16_hex(value: u16) -> SignedHexDisplay { SignedHexDisplay { value: value as i16, negative: false, } } pub fn signed_i16_hex(imm: i16) -> SignedHexDisplay { SignedHexDisplay { value: imm, negative: imm < 0, } } pub fn u32_hex(value: u32) -> SignedHexDisplay { SignedHexDisplay { value: value as i32, negative: false, } } pub fn signed_i32_hex(imm: i32) -> SignedHexDisplay { SignedHexDisplay { value: imm, negative: imm < 0, } } pub fn u64_hex(value: u64) -> SignedHexDisplay { SignedHexDisplay { value: value as i64, negative: false, } } pub fn signed_i64_hex(imm: i64) -> SignedHexDisplay { SignedHexDisplay { value: imm, negative: imm < 0, } } yaxpeax-arch-0.2.7/src/lib.rs000064400000000000000000000233030072674642500141600ustar 00000000000000#![no_std] #![doc = include_str!("../README.md")] use core::fmt::{self, Debug, Display}; use core::hash::Hash; #[cfg(feature="use-serde")] #[macro_use] extern crate serde_derive; #[cfg(feature="use-serde")] use serde::{Serialize, Deserialize}; mod address; pub use address::{Address, AddressBase, AddressDiff, AddressDiffAmount, AddressDisplay}; pub use address::{AddressDisplayUsize, AddressDisplayU64, AddressDisplayU32, AddressDisplayU16}; #[cfg(feature="address-parse")] pub use address::AddrParse; pub mod annotation; mod color; pub use color::{Colorize, NoColors, YaxColors}; #[cfg(feature="colors")] pub use color::ColorSettings; pub mod display; mod reader; pub use reader::{Reader, ReaderBuilder, ReadError, U8Reader, U16le, U16be, U32le, U32be, U64le, U64be}; /// the minimum set of errors a `yaxpeax-arch` disassembler may produce. /// /// it is permissible for an implementor of `DecodeError` to have items that return `false` for /// all these functions; decoders are permitted to error in way that `yaxpeax-arch` does not know /// about. pub trait DecodeError: PartialEq + Display + Debug + Send + Sync + 'static { /// did the decoder fail because it reached the end of input? fn data_exhausted(&self) -> bool; /// did the decoder error because the instruction's opcode is invalid? /// /// this may not be a sensical question for some instruction sets - `bad_opcode` should /// generally indicate an issue with the instruction itself. this is in contrast to one /// specific operand being invalid for the instruction, or some other issue to do with decoding /// data beyond the top-level instruction. the "opcode"/"operand" distinction is often fuzzy /// and left as best-effort for decoder implementors. fn bad_opcode(&self) -> bool; /// did the decoder error because an operand of the instruction to decode is invalid? /// /// similar to [`DecodeError::bad_opcode`], this is a subjective distinction and best-effort on /// the part of implementors. fn bad_operand(&self) -> bool; /// a human-friendly description of this decode error. fn description(&self) -> &'static str; } /// a minimal enum implementing `DecodeError`. this is intended to be enough for a low effort, /// low-fidelity error taxonomy, without boilerplate of a `DecodeError` implementation. #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum StandardDecodeError { ExhaustedInput, InvalidOpcode, InvalidOperand, } /// a slightly less minimal enum `DecodeError`. similar to `StandardDecodeError`, this is an /// anti-boilerplate measure. it additionally provides `IncompleteDecoder`, making it suitable to /// represent error kinds for decoders that are ... not yet complete. #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum StandardPartialDecoderError { ExhaustedInput, InvalidOpcode, InvalidOperand, IncompleteDecoder, } #[cfg(feature = "std")] extern crate std; #[cfg(feature = "std")] impl std::error::Error for StandardDecodeError { fn description(&self) -> &str { ::description(self) } } #[cfg(feature = "std")] impl std::error::Error for StandardPartialDecoderError { fn description(&self) -> &str { ::description(self) } } impl fmt::Display for StandardDecodeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(self.description()) } } impl fmt::Display for StandardPartialDecoderError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str(self.description()) } } impl DecodeError for StandardDecodeError { fn data_exhausted(&self) -> bool { *self == StandardDecodeError::ExhaustedInput } fn bad_opcode(&self) -> bool { *self == StandardDecodeError::InvalidOpcode } fn bad_operand(&self) -> bool { *self == StandardDecodeError::InvalidOperand } fn description(&self) -> &'static str { match self { StandardDecodeError::ExhaustedInput => "exhausted input", StandardDecodeError::InvalidOpcode => "invalid opcode", StandardDecodeError::InvalidOperand => "invalid operand", } } } impl DecodeError for StandardPartialDecoderError { fn data_exhausted(&self) -> bool { *self == StandardPartialDecoderError::ExhaustedInput } fn bad_opcode(&self) -> bool { *self == StandardPartialDecoderError::InvalidOpcode } fn bad_operand(&self) -> bool { *self == StandardPartialDecoderError::InvalidOperand } fn description(&self) -> &'static str { match self { StandardPartialDecoderError::ExhaustedInput => "exhausted input", StandardPartialDecoderError::InvalidOpcode => "invalid opcode", StandardPartialDecoderError::InvalidOperand => "invalid operand", StandardPartialDecoderError::IncompleteDecoder => "incomplete decoder", } } } #[derive(Copy, Clone)] struct NoDescription {} impl fmt::Display for NoDescription { fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result { Ok(()) } } /// an interface to decode [`Arch::Instruction`] words from a reader of [`Arch::Word`]s. errors are /// the architecture-defined [`DecodeError`] implemention. pub trait Decoder { /// decode one instruction for this architecture from the [`crate::Reader`] of this /// architecture's `Word`. fn decode>(&self, words: &mut T) -> Result { let mut inst = A::Instruction::default(); self.decode_into(&mut inst, words).map(|_: ()| inst) } /// decode one instruction for this architecture from the [`crate::Reader`] of this /// architecture's `Word`, writing into the provided `inst`. /// /// SAFETY: /// /// while `inst` MUST be left in a state that does not violate Rust's safety guarantees, /// implementors are NOT obligated to leave `inst` in a semantically meaningful state if /// decoding fails. if `decode_into` returns an error, callers may find contradictory and /// useless information in `inst`, as well as *stale data* from whatever was passed in. fn decode_into>(&self, inst: &mut A::Instruction, words: &mut T) -> Result<(), A::DecodeError>; } #[cfg(feature = "use-serde")] pub trait AddressBounds: Address + Debug + Hash + PartialEq + Eq + Serialize + for<'de> Deserialize<'de> {} #[cfg(not(feature = "use-serde"))] pub trait AddressBounds: Address + Debug + Hash + PartialEq + Eq {} #[cfg(feature = "use-serde")] impl AddressBounds for T where T: Address + Debug + Hash + PartialEq + Eq + Serialize + for<'de> Deserialize<'de> {} #[cfg(not(feature = "use-serde"))] impl AddressBounds for T where T: Address + Debug + Hash + PartialEq + Eq {} #[cfg(feature = "std")] /// this is not a particularly interesting trait. it just exists to add a `std::error::Error` /// bound onto `DecodeError` for `std` builds. pub trait DecodeErrorBounds: std::error::Error + DecodeError {} #[cfg(feature = "std")] impl DecodeErrorBounds for T {} #[cfg(not(feature = "std"))] /// this is not a particularly interesting trait. it just exists to add a `std::error::Error` /// bound onto `DecodeError` for `std` builds. pub trait DecodeErrorBounds: DecodeError {} #[cfg(not(feature = "std"))] impl DecodeErrorBounds for T {} /// a collection of associated type parameters that constitute the definitions for an instruction /// set. `Arch` provides an `Instruction` and its associated `Operand`s, which is guaranteed to be /// decodable by this `Arch::Decoder`. `Arch::Decoder` can always be constructed with a `Default` /// implementation, and decodes from a `Reader`. /// /// `Arch` is suitable as the foundational trait to implement more complex logic on top of; for /// example, it would be entirely expected to have a /// ```text /// pub fn emulate>( /// reader: &mut Reader, /// emu: &mut E /// ) -> Result; /// ``` /// /// in some library built on top of `yaxpeax-arch`. pub trait Arch { type Word: Debug + Display + PartialEq + Eq; type Address: AddressBounds; type Instruction: Instruction + LengthedInstruction> + Debug + Default + Sized; type DecodeError: DecodeErrorBounds + Debug + Display; type Decoder: Decoder + Default; type Operand; } /// instructions have lengths, and minimum possible sizes for advancing a decoder on error. /// /// unfortunately, this means calling `x.len()` for some `Arch::Instruction` requires importing /// this trait. sorry. pub trait LengthedInstruction { type Unit; /// the length, in terms of `Unit`, of this instruction. because `Unit` will be a diff of an /// architecture's `Address` type, this almost always is a number of bytes. implementations /// should indicate if this is ever not the case. fn len(&self) -> Self::Unit; /// the length, in terms of `Unit`, of the shortest possible instruction in a given /// architecture.. because `Unit` will be a diff of an architecture's `Address` type, this /// almost always is a number of bytes. implementations should indicate if this is ever not the /// case. fn min_size() -> Self::Unit; } pub trait Instruction { fn well_defined(&self) -> bool; } pub trait ShowContextual { fn contextualize(&self, colors: &Y, address: Addr, context: Option<&Ctx>, out: &mut T) -> fmt::Result; } /* impl > ShowContextual for U { fn contextualize(&self, colors: Option<&ColorSettings>, context: Option<&C>, out: &mut T) -> fmt::Result { self.colorize(colors, out) } } */ yaxpeax-arch-0.2.7/src/reader.rs000064400000000000000000000256220072674642500146620ustar 00000000000000use crate::StandardDecodeError; impl From for StandardDecodeError { fn from(_: ReadError) -> StandardDecodeError { StandardDecodeError::ExhaustedInput } } #[derive(Debug, PartialEq, Eq, Copy, Clone)] pub enum ReadError { ExhaustedInput, IOError(&'static str), } /// a trait defining how `Item`-sized words are read at `Address`-positioned offsets into some /// stream of data. for *most* uses, [`crate::U8Reader`] probably is sufficient. when /// reading from data sources that aren't `&[u8]`, `Address` isn't a multiple of `u8`, or `Item` /// isn't a multiple of 8 bits, `U8Reader` won't be sufficient. pub trait Reader { fn next(&mut self) -> Result; /// read `buf`-many items from this reader in bulk. if `Reader` cannot read `buf`-many items, /// return `ReadError::ExhaustedInput`. fn next_n(&mut self, buf: &mut [Item]) -> Result<(), ReadError>; /// mark the current position as where to measure `offset` against. fn mark(&mut self); /// the difference, in `Address`, between the current `Reader` position and its last `mark`. /// when created, a `Reader`'s initial position is `mark`ed, so creating a `Reader` and /// immediately calling `offset()` must return `Address::zero()`. fn offset(&mut self) -> Address; /// the difference, in `Address`, between the current `Reader` position and the initial offset /// when constructed. fn total_offset(&mut self) -> Address; } /// a trait defining how to build a `Reader` from some data source (`Self`). /// definitions of `ReaderBuilder` are provided for `U8Reader` on `Address` and `Word` types that /// `yaxpeax_arch` provides - external decoder implementations should also provide `ReaderBuilder` /// impls if they use custom `Reader` types. pub trait ReaderBuilder where Self: Sized { type Result: Reader; /// construct a reader from `data` beginning at `addr` from its beginning. fn read_at(data: Self, addr: Address) -> Self::Result; /// construct a reader from `data` beginning at the start of `data`. fn read_from(data: Self) -> Self::Result { Self::read_at(data, Address::zero()) } } /// a struct for `Reader` impls that can operate on units of `u8`. pub struct U8Reader<'a> { start: *const u8, data: *const u8, end: *const u8, mark: *const u8, _lifetime: core::marker::PhantomData<&'a [u8]>, } impl<'a> U8Reader<'a> { pub fn new(data: &'a [u8]) -> U8Reader<'a> { // WHY: either on <64b systems we panic on `data.len() > isize::MAX`, or we compute end // without `offset` (which would be UB for such huge slices) #[cfg(not(target_pointer_width = "64"))] let end = data.as_ptr().wrapping_add(data.len()); // SAFETY: the slice was valid, so data + data.len() does not overflow. at the moment, // there aren't 64-bit systems with 63 bits of virtual address space, so it's not possible // to have a slice length larger than 64-bit isize::MAX. #[cfg(target_pointer_width = "64")] let end = unsafe { data.as_ptr().offset(data.len() as isize) }; U8Reader { start: data.as_ptr(), data: data.as_ptr(), end, mark: data.as_ptr(), _lifetime: core::marker::PhantomData, } } } /* a `std::io::Read`-friendly `Reader` would take some thought. this was an old impl, and now would * require something like * ``` * pub struct IoReader<'io, T: std::io::Read> { * io: &io mut T, * count: u64, * start: u64, * } * ``` */ /* #[cfg(feature = "std")] impl Reader for T { fn next(&mut self) -> Result { let mut buf = [0u8]; match self.read(&mut buf) { Ok(0) => { Err(ReadError::ExhaustedInput) } Ok(1) => { Ok(buf[0]) } Err(_) => { Err(ReadError::IOError("error")) } } } } */ macro_rules! word_wrapper { ($name:ident, $underlying:ident) => { #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Copy, Clone)] pub struct $name(pub $underlying); impl core::fmt::Display for $name { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!(f, "{}", self.0) } } } } word_wrapper!(U16le, u16); word_wrapper!(U16be, u16); word_wrapper!(U32le, u32); word_wrapper!(U32be, u32); word_wrapper!(U64le, u64); word_wrapper!(U64be, u64); macro_rules! u8reader_reader_impl { ($addr_size:ident, $word:ident, $word_from_slice:expr, $words_from_slice:expr) => { impl Reader<$addr_size, $word> for U8Reader<'_> { #[inline] fn next(&mut self) -> Result<$word, ReadError> { let data_size = self.end as usize - self.data as usize; if core::mem::size_of::<$word>() > data_size { return Err(ReadError::ExhaustedInput); } // `word_from_slice` knows that we have bounds-checked that `word`-many bytes are // available. let word = $word_from_slice(self.data); unsafe { self.data = self.data.offset(core::mem::size_of::<$word>() as isize); } Ok(word) } #[inline] fn next_n(&mut self, buf: &mut [$word]) -> Result<(), ReadError> { let data_size = self.end as usize - self.data as usize; let words_size_bytes = buf.len() * core::mem::size_of::<$word>(); if words_size_bytes > data_size { return Err(ReadError::ExhaustedInput); } // `word_from_slice` knows that we have bounds-checked that `word`-many bytes are // available. $words_from_slice(self.data, buf); unsafe { self.data = self.data.offset(words_size_bytes as isize); } Ok(()) } #[inline] fn mark(&mut self) { self.mark = self.data; } #[inline] fn offset(&mut self) -> $addr_size { (self.data as usize - self.mark as usize) as $addr_size / (core::mem::size_of::<$word>() as $addr_size) } #[inline] fn total_offset(&mut self) -> $addr_size { (self.data as usize - self.start as usize) as $addr_size / (core::mem::size_of::<$word>() as $addr_size) } } impl<'data> ReaderBuilder<$addr_size, $word> for &'data [u8] { type Result = U8Reader<'data>; fn read_at(data: Self, addr: $addr_size) -> Self::Result { U8Reader::new(&data[(addr as usize)..]) } } } } macro_rules! u8reader_each_addr_size { ($word:ident, $word_from_slice:expr, $words_from_slice:expr) => { u8reader_reader_impl!(u64, $word, $word_from_slice, $words_from_slice); u8reader_reader_impl!(u32, $word, $word_from_slice, $words_from_slice); u8reader_reader_impl!(u16, $word, $word_from_slice, $words_from_slice); } } u8reader_each_addr_size!(u8, |ptr: *const u8| { unsafe { core::ptr::read(ptr) } }, |ptr: *const u8, buf: &mut [u8]| { unsafe { core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr(), buf.len()) } } ); u8reader_each_addr_size!(U16le, |ptr: *const u8| { let mut word = [0u8; 2]; unsafe { core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len()); } U16le(u16::from_le_bytes(word)) }, |ptr: *const u8, buf: &mut [U16le]| { // `U16le` are layout-identical to u16, so we can just copy into buf unsafe { core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::()) } } ); u8reader_each_addr_size!(U32le, |ptr: *const u8| { let mut word = [0u8; 4]; unsafe { core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len()); } U32le(u32::from_le_bytes(word)) }, |ptr: *const u8, buf: &mut [U32le]| { // `U32le` are layout-identical to u32, so we can just copy into buf unsafe { core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::()) } } ); u8reader_each_addr_size!(U64le, |ptr: *const u8| { let mut word = [0u8; 8]; unsafe { core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len()); } U64le(u64::from_le_bytes(word)) }, |ptr: *const u8, buf: &mut [U64le]| { // `U64le` are layout-identical to u64, so we can just copy into buf unsafe { core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::()) } } ); u8reader_each_addr_size!(U16be, |ptr: *const u8| { let mut word = [0u8; 2]; unsafe { core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len()); } U16be(u16::from_be_bytes(word)) }, |ptr: *const u8, buf: &mut [U16be]| { // `U16be` are layout-identical to u16, so we can just copy into buf unsafe { core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::()) } // but now we have to bswap all the words for i in 0..buf.len() { buf[i] = U16be(buf[i].0.swap_bytes()); } } ); u8reader_each_addr_size!(U32be, |ptr: *const u8| { let mut word = [0u8; 4]; unsafe { core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len()); } U32be(u32::from_be_bytes(word)) }, |ptr: *const u8, buf: &mut [U32be]| { // `U32be` are layout-identical to u32, so we can just copy into buf unsafe { core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::()) } // but now we have to bswap all the words for i in 0..buf.len() { buf[i] = U32be(buf[i].0.swap_bytes()); } } ); u8reader_each_addr_size!(U64be, |ptr: *const u8| { let mut word = [0u8; 8]; unsafe { core::ptr::copy_nonoverlapping(ptr, word.as_mut_ptr(), word.len()); } U64be(u64::from_be_bytes(word)) }, |ptr: *const u8, buf: &mut [U64be]| { // `U64be` are layout-identical to u64, so we can just copy into buf unsafe { core::ptr::copy_nonoverlapping(ptr, buf.as_mut_ptr() as *mut u8, buf.len() * core::mem::size_of::()) } // but now we have to bswap all the words for i in 0..buf.len() { buf[i] = U64be(buf[i].0.swap_bytes()); } } ); yaxpeax-arch-0.2.7/tests/lib.rs000064400000000000000000000042000072674642500145260ustar 00000000000000use yaxpeax_arch::AddressBase; mod reader; #[test] fn test_u16() { for l in 0..100 { for r in 0..=core::u16::MAX { assert_eq!(r.wrapping_offset(l.diff(&r).expect("u16 addresses always have valid diffs")), l); } } } #[test] fn generic_error_can_bail() { use yaxpeax_arch::{Arch, Decoder, Reader}; #[allow(dead_code)] fn decode>>(data: U, decoder: &A::Decoder) -> anyhow::Result<()> { let mut reader = data.into(); decoder.decode(&mut reader)?; Ok(()) } } #[test] fn error_can_bail() { use yaxpeax_arch::{Arch, AddressDiff, Decoder, Reader, LengthedInstruction, Instruction, StandardDecodeError, U8Reader}; struct TestIsa {} #[derive(Debug, Default)] struct TestInst {} impl Arch for TestIsa { type Word = u8; type Address = u64; type Instruction = TestInst; type Decoder = TestIsaDecoder; type DecodeError = StandardDecodeError; type Operand = (); } impl Instruction for TestInst { fn well_defined(&self) -> bool { true } } impl LengthedInstruction for TestInst { type Unit = AddressDiff; fn len(&self) -> Self::Unit { AddressDiff::from_const(1) } fn min_size() -> Self::Unit { AddressDiff::from_const(1) } } struct TestIsaDecoder {} impl Default for TestIsaDecoder { fn default() -> Self { TestIsaDecoder {} } } impl Decoder for TestIsaDecoder { fn decode_into>(&self, _inst: &mut TestInst, _words: &mut T) -> Result<(), StandardDecodeError> { Err(StandardDecodeError::ExhaustedInput) } } #[derive(Debug, PartialEq, thiserror::Error)] pub enum Error { #[error("decode error")] TestDecode(#[from] StandardDecodeError), } fn exercise_eq() -> Result<(), Error> { let mut reader = U8Reader::new(&[]); TestIsaDecoder::default().decode(&mut reader)?; Ok(()) } assert_eq!(exercise_eq(), Err(Error::TestDecode(StandardDecodeError::ExhaustedInput))); } yaxpeax-arch-0.2.7/tests/reader.rs000064400000000000000000000013730072674642500152320ustar 00000000000000use yaxpeax_arch::{Reader, U8Reader, U16le, U32le}; #[test] fn reader_offset_is_words_not_bytes() { fn test_u16>(reader: &mut T) { reader.mark(); assert_eq!(reader.offset(), 0); reader.next().unwrap(); assert_eq!(reader.offset(), 1); reader.mark(); reader.next().unwrap(); assert_eq!(reader.offset(), 1); assert_eq!(reader.total_offset(), 2); } fn test_u32>(reader: &mut T) { reader.mark(); assert_eq!(reader.offset(), 0); reader.next().unwrap(); assert_eq!(reader.offset(), 1); } test_u16(&mut U8Reader::new(&[0x01, 0x02, 0x03, 0x04])); test_u32(&mut U8Reader::new(&[0x01, 0x02, 0x03, 0x04])); }