pax_global_header00006660000000000000000000000064146703422720014521gustar00rootroot0000000000000052 comment=86303b0cbf8ad3bf6403962f8bfc2d0501dd3141 regalloc2-0.10.2/000077500000000000000000000000001467034227200134535ustar00rootroot00000000000000regalloc2-0.10.2/.github/000077500000000000000000000000001467034227200150135ustar00rootroot00000000000000regalloc2-0.10.2/.github/workflows/000077500000000000000000000000001467034227200170505ustar00rootroot00000000000000regalloc2-0.10.2/.github/workflows/rust.yml000066400000000000000000000047101467034227200205720ustar00rootroot00000000000000# Derived from regalloc.rs' GitHub CI config file. name: Rust on: push: branches: [ main ] pull_request: branches: [ main ] jobs: # Lint code with rustfmt, report an error if it needs to be run. lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Install rustfmt run: rustup component add rustfmt - name: Run rustfmt and check there's no difference run: cargo fmt --all -- --check # Make sure the code compiles and that all the tests pass. build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Build run: cargo build - name: Run tests run: cargo test --all --verbose # Make sure the code typechecks with non-default features enabled. features: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Check with all features run: cargo check --all-features # Make sure the code and its dependencies compile without std. no_std: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Install thumbv6m-none-eabi target run: rustup target add thumbv6m-none-eabi - name: Check no_std build run: cargo check --target thumbv6m-none-eabi --no-default-features --features trace-log,checker,enable-serde # Lint dependency graph for security advisories, duplicate versions, and # incompatible licences. cargo_deny: name: Cargo deny runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: submodules: true - run: | set -e curl -L https://github.com/EmbarkStudios/cargo-deny/releases/download/0.14.22/cargo-deny-0.14.22-x86_64-unknown-linux-musl.tar.gz | tar xzf - mv cargo-deny-*-x86_64-unknown-linux-musl/cargo-deny cargo-deny echo `pwd` >> $GITHUB_PATH - run: cargo deny check # Builds the fuzz targets. fuzz: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Install nightly run: rustup toolchain install nightly - name: Install cargo-fuzz run: cargo +nightly install cargo-fuzz - name: Build ssagen fuzzing target run: cargo +nightly fuzz build ssagen - name: Build moves fuzzing target run: cargo +nightly fuzz build moves - name: Build ion fuzzing target run: cargo +nightly fuzz build ion - name: Build and smoke-test ion_checker fuzzing target run: cargo +nightly fuzz run ion_checker ./fuzz/smoketest/ion_checker.bin regalloc2-0.10.2/.gitignore000066400000000000000000000000351467034227200154410ustar00rootroot00000000000000Cargo.lock target/ .*.swp *~ regalloc2-0.10.2/Cargo.toml000066400000000000000000000030161467034227200154030ustar00rootroot00000000000000[workspace] members = ["regalloc2-tool"] [package] name = "regalloc2" version = "0.10.2" authors = [ "Chris Fallin ", "Mozilla SpiderMonkey Developers", ] edition = "2018" license = "Apache-2.0 WITH LLVM-exception" description = "Backtracking register allocator inspired from IonMonkey" repository = "https://github.com/bytecodealliance/regalloc2" [dependencies] log = { version = "0.4.8", default-features = false } smallvec = { version = "1.6.1", features = ["union"] } rustc-hash = { version = "2.0.0", default-features = false } slice-group-by = { version = "0.3.0", default-features = false } hashbrown = { version = "0.14", features = ["ahash"], default-features = false } # Optional serde support, enabled by feature below. serde = { version = "1.0.136", features = [ "derive", "alloc", ], default-features = false, optional = true } # The below are only needed for fuzzing. libfuzzer-sys = { version = "0.4.2", optional = true } # When testing regalloc2 by itself, enable debug assertions and overflow checks [profile.release] debug = true debug-assertions = true overflow-checks = true [features] default = ["std"] # Enables std-specific features such as the Error trait for RegAllocError. std = [] # Enables generation of DefAlloc edits for the checker. checker = [] # Enables detailed logging which can be somewhat expensive. trace-log = [] # Exposes the internal API for fuzzing. fuzzing = ["libfuzzer-sys", "checker", "trace-log"] # Enables serde for exposed types. enable-serde = ["serde"] regalloc2-0.10.2/LICENSE000066400000000000000000000277231467034227200144730ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --- LLVM Exceptions to the Apache 2.0 License ---- As an exception, if, as a result of your compiling your source code, portions of this Software are embedded into an Object form of such source code, you may redistribute such embedded portions in such Object form without complying with the conditions of Sections 4(a), 4(b) and 4(d) of the License. In addition, if you combine or link compiled forms of this Software with software that is licensed under the GPLv2 ("Combined Software") and if a court of competent jurisdiction determines that the patent provision (Section 3), the indemnity provision (Section 9) or other Section of the License conflicts with the conditions of the GPLv2, you may retroactively and prospectively choose to deem waived or otherwise exclude such Section(s) of the License, but only in their entirety and only with respect to the Combined Software. regalloc2-0.10.2/README.md000066400000000000000000000015401467034227200147320ustar00rootroot00000000000000## regalloc2: another register allocator This is a register allocator that started life as, and is about 50% still, a port of IonMonkey's backtracking register allocator to Rust. In many regards, it has been generalized, optimized, and improved since the initial port. In addition, it contains substantial amounts of testing infrastructure (fuzzing harnesses and checkers) that does not exist in the original IonMonkey allocator. See the [design overview](doc/DESIGN.md) for (much!) more detail on how the allocator works. ## License This crate is licensed under the Apache 2.0 License with LLVM Exception. This license text can be found in the file `LICENSE`. Parts of the code are derived from regalloc.rs: in particular, `src/checker.rs` and `src/domtree.rs`. This crate has the same license as regalloc.rs, so the license on these files does not differ. regalloc2-0.10.2/deny.toml000066400000000000000000000012231467034227200153050ustar00rootroot00000000000000targets = [ { triple = "x86_64-unknown-linux-gnu" }, { triple = "x86_64-apple-darwin" }, { triple = "x86_64-pc-windows-msvc" }, { triple = "aarch64-linux-android" }, ] # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html [advisories] vulnerability = "deny" unmaintained = "deny" yanked = "deny" ignore = [] # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html [licenses] allow = [ "Apache-2.0 WITH LLVM-exception", "Apache-2.0", "MIT", "Unicode-DFS-2016", ] # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html [bans] multiple-versions = "deny" wildcards = "allow" deny = [] regalloc2-0.10.2/doc/000077500000000000000000000000001467034227200142205ustar00rootroot00000000000000regalloc2-0.10.2/doc/DESIGN.md000066400000000000000000002106301467034227200155150ustar00rootroot00000000000000# regalloc2 Design Overview This document describes the basic architecture of the regalloc2 register allocator. It describes the externally-visible interface (input CFG, instructions, operands, with their invariants; meaning of various parts of the output); core data structures; and the allocation pipeline, or series of algorithms that compute an allocation. It ends with a description of future work and expectations, as well as an appendix that notes design influences and similarities to the IonMonkey backtracking allocator. # API, Input IR and Invariants The toplevel API to regalloc2 consists of a single entry point `run()` that takes a register environment, which specifies all physical registers, and the input program. The function returns either an error or an `Output` struct that provides allocations for each operand and a vector of additional instructions (moves, loads, stores) to insert. ## Register Environment The allocator takes a `MachineEnv` which specifies, for each of the two register classes `Int` and `Float`, a vector of `PReg`s by index. A `PReg` is nothing more than the class and index within the class; the allocator does not need to know anything more. The `MachineEnv` provides a vector of preferred and non-preferred physical registers per class. Any register not in either vector will not be allocated. Usually, registers that do not need to be saved in the prologue if used (i.e., caller-save registers) are given in the "preferred" vector. The environment also provides exactly one scratch register per class. This register must not be in the preferred or non-preferred vectors, and is used whenever a set of moves that need to occur logically in parallel have a cycle (for a simple example, consider a swap `r0, r1 := r1, r0`). With some more work, we could potentially remove the need for the scratch register by requiring support for an additional edit type from the client ("swap"), but we have not pursued this. ## CFG and Instructions The allocator operates on an input program that is in a standard CFG representation: the function body is a sequence of basic blocks, and each block has a sequence of instructions and zero or more successors. The allocator also requires the client to provide predecessors for each block, and these must be consistent with the successors. Instructions are opaque to the allocator except for a few important bits: (1) `is_ret` (is a return instruction); (2) `is_branch` (is a branch instruction); and (3) a vector of Operands, covered below. Every block must end in a return or branch. Both instructions and blocks are named by indices in contiguous index spaces. A block's instructions must be a contiguous range of instruction indices, and block i's first instruction must come immediately after block i-1's last instruction. The CFG must have *no critical edges*. A critical edge is an edge from block A to block B such that A has more than one successor *and* B has more than one predecessor. For this definition, the entry block has an implicit predecessor, and any block that ends in a return has an implicit successor. Note that there are *no* requirements related to the ordering of blocks, and there is no requirement that the control flow be reducible. Some *heuristics* used by the allocator will perform better if the code is reducible and ordered in reverse postorder (RPO), however: in particular, (1) this interacts better with the contiguous-range-of-instruction-indices live range representation that we use, and (2) the "approximate loop depth" metric will actually be exact if both these conditions are met. ## Operands and VRegs Every instruction operates on values by way of `Operand`s. An operand consists of the following fields: - VReg, or virtual register. *Every* operand mentions a virtual register, even if it is constrained to a single physical register in practice. This is because we track liveranges uniformly by vreg. - Policy, or "constraint". Every reference to a vreg can apply some constraint to the vreg at that point in the program. Valid policies are: - Any location; - Any register of the vreg's class; - Any stack slot; - A particular fixed physical register; or - For a def (output), a *reuse* of an input register. - The "kind" of reference to this vreg: Def, Use, Mod. A def (definition) writes to the vreg, and disregards any possible earlier value. A mod (modify) reads the current value then writes a new one. A use simply reads the vreg's value. - The position: before or after the instruction. - Note that to have a def (output) register available in a way that does not conflict with inputs, the def should be placed at the "before" position. Similarly, to have a use (input) register available in a way that does not conflict with outputs, the use should be placed at the "after" position. VRegs, or virtual registers, are specified by an index and a register class (Float or Int). The classes are not given separately; they are encoded on every mention of the vreg. (In a sense, the class is an extra index bit, or part of the register name.) The input function trait does require the client to provide the exact vreg count, however. Implementation note: both vregs and operands are bit-packed into u32s. This is essential for memory-efficiency. As a result of the operand bit-packing in particular (including the policy constraints!), the allocator supports up to 2^21 (2M) vregs per function, and 2^6 (64) physical registers per class. Later we will also see a limit of 2^20 (1M) instructions per function. These limits are considered sufficient for the anticipated use-cases (e.g., compiling Wasm, which also has function-size implementation limits); for larger functions, it is likely better to use a simpler register allocator in any case. ## Reuses and Two-Address ISAs Some instruction sets primarily have instructions that name only two registers for a binary operator, rather than three: both registers are inputs, and the result is placed in one of the registers, clobbering its original value. The most well-known modern example is x86. It is thus imperative that we support this pattern well in the register allocator. This instruction-set design is somewhat at odds with an SSA representation, where a value cannot be redefined. Thus, the allocator supports a useful fiction of sorts: the instruction can be described as if it has three register mentions -- two inputs and a separate output -- and neither input will be clobbered. The output, however, is special: its register-placement policy is "reuse input i" (where i == 0 or 1). The allocator guarantees that the register assignment for that input and the output will be the same, so the instruction can use that register as its "modifies" operand. If the input is needed again later, the allocator will take care of the necessary copying. We will see below how the allocator makes this work by doing some preprocessing so that the core allocation algorithms do not need to worry about this constraint. ## SSA regalloc2 takes an SSA IR as input, where the usual definitions apply: every vreg is defined exactly once, and every vreg use is dominated by its one def. (Using blockparams means that we do not need additional conditions for phi-nodes.) ## Block Parameters Every block can have *block parameters*, and a branch to a block with block parameters must provide values for those parameters via operands. When a branch has more than one successor, it provides separate operands for each possible successor. These block parameters are equivalent to phi-nodes; we chose this representation because they are in many ways a more consistent representation of SSA. To see why we believe block parameters are a slightly nicer design choice than use of phi nodes, consider: phis are special pseudoinstructions that must come first in a block, are all defined in parallel, and whose uses occur on the edge of a particular predecessor. All of these facts complicate any analysis that scans instructions and reasons about uses and defs. It is much closer to the truth to actually put those uses *in* the predecessor, on the branch, and put all the defs at the top of the block as a separate kind of def. The tradeoff is that a vreg's def now has two possibilities -- ordinary instruction def or blockparam def -- but this is fairly reasonable to handle. ## Output The allocator produces two main data structures as output: an array of `Allocation`s and a sequence of edits. Some other data, such as stackmap slot info, is also provided. ### Allocations The allocator provides an array of `Allocation` values, one per `Operand`. Each `Allocation` has a kind and an index. The kind may indicate that this is a physical register or a stack slot, and the index gives the respective register or slot. All allocations will conform to the constraints given, and will faithfully preserve the dataflow of the input program. ### Inserted Moves In order to implement the necessary movement of data between allocations, the allocator needs to insert moves at various program points. The vector of inserted moves contains tuples that name a program point and an "edit". The edit is either a move, from one `Allocation` to another, or else a kind of metadata used by the checker to know which VReg is live in a given allocation at any particular time. The latter sort of edit can be ignored by a backend that is just interested in generating machine code. Note that the allocator will never generate a move from one stackslot directly to another, by design. Instead, if it needs to do so, it will make use of the scratch register. (Sometimes such a move occurs when the scratch register is already holding a value, e.g. to resolve a cycle of moves; in this case, it will allocate another spillslot and spill the original scratch value around the move.) Thus, the single "edit" type can become either a register-to-register move, a load from a stackslot into a register, or a store from a register into a stackslot. # Data Structures We now review the data structures that regalloc2 uses to track its state. ## Program-Derived Alloc-Invariant Data There are a number of data structures that are computed in a deterministic way from the input program and then subsequently used only as read-only data during the core allocation procedure. ### Livein/Liveout Bitsets The livein and liveout bitsets (`liveins` and `liveouts` on the `Env`) are allocated one per basic block and record, per block, which vregs are live entering and leaving that block. They are computed using a standard backward iterative dataflow analysis and are exact; they do not over-approximate (this turns out to be important for performance, and is also necessary for correctness in the case of stackmaps). ### Blockparam Vectors: Source-Side and Dest-Side The initialization stage scans the input program and produces two vectors that represent blockparam flows from branches to destination blocks: `blockparam_ins` and `blockparam_outs`. These two vectors are the first instance we will see of a recurring pattern: the vectors contain tuples that are carefully ordered in a way such that their sort-order is meaningful. "Build a vector lazily then sort" is a common idiom: it batches the O(n log n) cost into one operation that the stdlib has aggressively optimized, it provides dense storage, and it allows for a scan in a certain order that often lines up with a scan over the program. In this particular case, we will build vectors of (vreg, block) points that are meaningful either at the start or end of a block, so that later, when we scan over a particular vreg's allocations in block order, we can generate another vector of allocations. One side (the "outs") also contains enough information that it can line up with the other side (the "ins") in a later sort. To make this work, `blockparam_ins` contains a vector of (to-vreg, to-block, from-block) tuples, and has an entry for every blockparam of every block. Note that we can compute this without actually observing from-blocks; we only need to iterate over `block_preds` at any given block. Then, `blockparam_outs` contains a vector of (from-vreg, from-block, to-block, to-vreg), and has an entry for every parameter on every branch that ends a block. There is exactly one "out" tuple for every "in" tuple. As mentioned above, we will later scan over both to generate moves. ## Core Allocation State: Ranges, Uses, Bundles, VRegs, PRegs We now come to the core data structures: live-ranges, bundles, virtual registers and their state, and physical registers and their state. First we must define a `ProgPoint` precisely: a `ProgPoint` is an instruction index and a `Before` or `After` suffix. We pack the before/after suffix into the LSB of a `u32`, so a `ProgPoint` can be incremented and compared as a simple integer. A live-range is a contiguous range of program points (half-open, i.e. including `from` and excluding `to`) for which a particular vreg is live with a value. A live-range contains a vector of uses. Each use contains four parts: the Operand word (directly copied, so there is no need to dereference it); the ProgPoint at which the use occurs; the operand slot on that instruction, if any, that the operand comes from, and the use's 'weight". (It's possible to have "ghost uses" that do not derive from any slot on the instruction.) These four parts are packed into three `u32`s: the slot can fit in 8 bits, and the weight in 16. The live-range carries its program-point range, uses, vreg index, bundle index (see below), and some metadata: spill weight and flags. The spill weight is the sum of weights of each use. The flags set currently carries one flag only: whether the live-range starts at a Def-kind operand. (This is equivalent to whether the range consumes a value at its start or not.) Uses are owned only by live-ranges and have no separate identity, but live-ranges live in a toplevel array and are known by `LiveRangeIndex` values throughout the allocator. New live-ranges can be created (e.g. during splitting); old ones are not cleaned up, but rather, all state is bulk-freed at the end. Live-ranges are aggregated into "bundles". A bundle is a collection of ranges that does not overlap. Each bundle carries: a vector (inline SmallVec) of (range, live-range index) tuples, an allocation (starts as "none"), a "spillset" (more below), and some metadata, including a spill weight (sum of ranges' weights), a priority (sum of ranges' lengths), and three property flags: "minimal", "contains fixed constraints", "contains stack constraints". VRegs also contain their vectors of live-ranges, in the same form as a bundle does (inline SmallVec that has inline (from, to) range bounds and range indices). There are two important overlap invariants: (i) no liveranges within a bundle overlap, and (ii) no liveranges within a vreg overlap. These are extremely important and we rely on them implicitly in many places. The live-range vectors in bundles and vregs, and use-vectors in ranges, have various sorting invariants as well. These invariants differ according to the phase of the allocator's computation. First, during live-range construction, live-ranges are placed into vregs in reverse order (because the computation is a reverse scan) and uses into ranges in reverse order; these are sorted into forward order at the end of live-range computation. When bundles are first constructed, their range vectors are sorted, and they remain so for the rest of allocation, as we need for interference testing. However, as ranges are created and split, sortedness of vreg ranges is *not* maintained; they are sorted once more, in bulk, when allocation is done and we start to resolve moves. Finally, we have physical registers. The main data associated with each is the allocation map. This map is a standard BTree, indexed by ranges (`from` and `to` ProgPoints) and yielding a LiveRange for each location range. The ranges have a custom comparison operator defined that compares equal for any overlap. This comparison operator allows us to determine whether a range is free, i.e. has no overlap with a particular range, in one probe -- the btree will not contain a match. However, it makes iteration over *all* overlapping ranges somewhat tricky to get right. Notably, Rust's BTreeMap does not guarantee that the lookup result will be the *first* equal key, if multiple keys are equal to the probe key. Thus, when we want to enumerate all overlapping ranges, we probe with a range that consists of the single program point *before* the start of the actual query range, using the API that returns an iterator over a range in the BTree, and then iterate through the resulting iterator to gather all overlapping ranges (which will be contiguous). ## Spill Bundles It is worth describing "spill bundles" separately. Every spillset (see below; a group of bundles that originated from one bundle) optionally points to a single bundle that we designate the "spill bundle" for that spillset. Contrary to the name, this bundle is not unconditionally spilled. Rather, one can see it as a sort of fallback: it is where liveranges go when we give up on processing them via the normal backtracking loop, and will only process them once more in the "second-chance" stage. This fallback behavior implies that the spill bundle must always be able to accept a spillslot allocation, i.e., it cannot require a register. This invariant is what allows spill bundles to be processed in a different way, after backtracking has completed. The spill bundle acquires liveranges in two ways. First, as we split bundles, we will trim the split pieces in certain ways so that some liveranges are immediately placed in the spill bundle. Intuitively, the "empty" regions that just carry a value, but do not satisfy any operands, should be in the spill bundle: it is better to have a single consistent location for the value than to move it between lots of different split pieces without using it, as moves carry a cost. Second, the spill bundle acquires the liveranges of a bundle that has no requirement to be in a register when that bundle is processed, but only if the spill bundle already exists. In other words, we won't create a second-chance spill bundle just for a liverange with an "Any" use; but if it was already forced into existence by splitting and trimming, then we might as well use it. Note that unlike other bundles, a spill bundle's liverange vector remains unsorted until we do the second-chance allocation. This allows quick appends of more liveranges. ## Allocation Queue The allocation queue is simply a priority queue (built with a binary max-heap) of (prio, bundle-index) tuples. ## Spillsets and Spillslots Every bundle contains a reference to a spillset. Spillsets are used to assign spillslots near the end of allocation, but before then, they are also a convenient place to store information that is common among *all bundles* that share the spillset. In particular, spillsets are initially assigned 1-to-1 to bundles after all bundle-merging is complete; so spillsets represent in some sense the "original bundles", and as splitting commences, the smaller bundle-pieces continue to refer to their original spillsets. We stash some useful information on the spillset because of this: a register hint, used to create some "stickiness" between pieces of an original bundle that are assigned separately after splitting; the spill bundle; the common register class of all vregs in this bundle; the vregs whose liveranges are contained in this bundle; and then some information actually used if this is spilled to the stack (`required` indicates actual stack use; `size` is the spillslot count; `slot` is the actual stack slot). Spill *sets* are later allocated to spill *slots*. Multiple spillsets can be assigned to one spillslot; the only constraint is that spillsets assigned to a spillslot must not overlap. When we look up the allocation for a bundle, if the bundle is not given a specific allocation (its `alloc` field is `Allocation::none()`), this means it is spilled, and we traverse to the spillset then spillslot. ## Other: Fixups, Stats, Debug Annotations There are a few fixup vectors that we will cover in more detail later. Of particular note is the "multi-fixed-reg fixup vector": this handles instructions that constrain the same input vreg to multiple, different, fixed registers for different operands at the same program point. The only way to satisfy such a set of constraints is to decouple all but one of the inputs (make them no longer refer to the vreg) and then later insert copies from the first fixed use of the vreg to the other fixed regs. The `Env` also carries a statistics structure with counters that are incremented, which can be useful for evaluating the effects of changes; and a "debug annotations" hashmap from program point to arbitrary strings that is filled out with various useful diagnostic information if enabled, so that an annotated view of the program with its liveranges, bundle assignments, inserted moves, merge and split decisions, etc. can be viewed. # Allocation Pipeline We now describe the pipeline that computes register allocations. ## Live-range Construction The first step in performing allocation is to analyze the input program to understand its dataflow: that is, the ranges during which virtual registers must be assigned to physical registers. Computing these ranges is what allows us to do better than a trivial "every vreg lives in a different location, always" allocation. We compute precise liveness first using an iterative dataflow algorithm with BitVecs. (See below for our sparse chunked BitVec description.) This produces the `liveins` and `liveouts` vectors of BitVecs per block. We then perform a single pass over blocks in reverse order, and scan instructions in each block in reverse order. Why reverse order? We must see instructions within a block in reverse to properly compute liveness (a value is live backward from an use to a def). Because we want to keep liveranges in-order as we build them, to enable coalescing, we visit blocks in reverse order as well, so overall this is simply a scan over the whole instruction index space in reverse order. For each block, we perform a scan with the following state: - A liveness bitvec, initialized at the start from `liveouts`. - A vector of live-range indices, with one entry per vreg, initially "invalid" (this vector is allocated once and reused at each block). - In-progress vector of live-range indices per vreg in the vreg state, in *reverse* order (we will reverse it when we're done). A vreg is live at the current point in the scan if its bit is set in the bitvec; its entry in the vreg-to-liverange vec may be stale, but if the bit is not set, we ignore it. We initially create a liverange for all vregs that are live out of the block, spanning the whole block. We will trim this below if it is locally def'd and does not pass through the block. For each instruction, we process its effects on the scan state: - For all clobbers (which logically happen at the end of the instruction), add a single-program-point liverange to each clobbered preg. - For each program point [after, before], for each operand at this point(\*): - if a def: - if not currently live, this is a dead def; create an empty LR. - set the start of the LR for this vreg to this point. - set as dead. - if a use: - create LR if not live, with start at beginning of block. (\*) an instruction operand's effective point is adjusted in a few cases. If the instruction is a branch, its uses (which are blockparams) are extended to the "after" point. If there is a reused input, all *other* inputs are extended to "after": this ensures proper interference (as we explain more below). We then treat blockparams as defs at the end of the scan (beginning of the block), and create the "ins" tuples. (The uses for the other side of the edge are already handled as normal uses on a branch instruction.) ### Handling Reused Inputs Reused inputs are also handled a bit specially. We have already described how we essentially translate the idiom so that the output's allocation is used for input and output, and there is a move just before the instruction that copies the actual input (which will not be clobbered) to the output. Together with an attempt to merge the bundles for the two, to elide the move if possible, this works perfectly well as long as we ignore all of the other inputs. But we can't do that: we have to ensure that other inputs' allocations are correct too. Note that using the output's allocation as the input is actually potentially incorrect if the output is at the After point and the input is at the Before: the output might share a register with one of the *other* (normal, non-reused) inputs if that input's vreg were dead afterward. This will mean that we clobber the other input. So, to get the interference right, we *extend* all other (non-reused) inputs of an instruction with a reused input to the After point. This ensures that the other inputs are *not* clobbered by the slightly premature use of the output register. The source has a link to a comment in IonMonkey that implies that it uses a similar solution to this problem, though it's not entirely clear. (This odd dance, like many of the others above and below, is "written in fuzzbug failures", so to speak. It's not entirely obvious until one sees the corner case where it's necessary!) ## Bundle Merging Once we have built the liverange vectors for every vreg, we can reverse these vectors (recall, they were built in strict reverse order) and initially assign one bundle per (non-pinned) vreg. We then try to merge bundles together as long as find pairs of bundles that do not overlap and that (heuristically) make sense to merge. Note that this is the only point in the allocation pipeline where bundles get larger. We initially merge as large as we dare (but not too large, because then we'll just cause lots of conflicts and splitting later), and then try out assignments, backtrack via eviction, and split continuously to chip away at the problem until we have a working set of allocation assignments. We attempt to merge two kinds of bundle pairs: reused-input to corresponding output; and across blockparam assignments. To merge two bundles, we traverse over both their sorted liverange vectors at once, checking for overlaps. Note that we can do this without pointer-chasing to the liverange data; the (from, to) range is in the liverange vector itself. We also check whether the merged bundle would have conflicting requirements (see below for more on requirements). We do a coarse check first, checking 1-bit flags that indicate whether either bundle has any fixed-reg constraints or stack-only constraints. If so, we need to do a detailed check by actually computing merged requirements on both sides, merging, and checking for Conflict (the lattice bottom value). If no conflict, we merge. A performance note: merging is extremely performance-sensitive, and it turns out that a mergesort-like merge of the liverange vectors is too expensive, partly because it requires allocating a separate result vector (in-place merge in mergesort is infamously complex). Instead, we simply append one vector onto the end of the other and invoke Rust's builtin sort. We could special-case "one bundle is completely before the other", but we currently don't do that (performance idea!). Once all bundles are merged as far as they will go, we compute cached bundle properties (priorities and weights) and enqueue them on the priority queue for allocation. ## Recurring: Bundle Property Computation The core allocation loop is a recurring iteration of the following: we take the highest-priority bundle from the allocation queue; we compute its requirements; we try to find it a register according to those requirements; if no fit, we either evict some other bundle(s) from their allocations and try again, or we split the bundle and put the parts back on the queue. We record all the information we need to make the evict-or-split decision (and where to split) *during* the physical register allocation-map scans, so we don't need to go back again to compute that. Termination is nontrivial to see, because of eviction. How do we guarantee we don't get into an infinite loop where two bundles fight over a register forever? In fact, this can easily happen if there is a bug; we fixed many fuzzbugs like this, and we have a check for "infinite loop" based on an upper bound on iterations. But if the allocator is correct, it should never happen. Termination is guaranteed because (i) bundles always get smaller, (ii) eviction only occurs when a bundle is *strictly* higher weight (not higher-or-equal), and (iii) once a bundle gets down to its "minimal" size, it has an extremely high weight that is guaranteed to evict any non-minimal bundle. A minimal bundle is one that covers only one instruction. As long as the input program does not have impossible constraints that require more than one vreg to exist in one preg, an allocation problem of all minimal bundles will always have a solution. ## Bundle Processing Let's now talk about what happens when we take a bundle off the allocation queue. The three basic outcomes are: allocate; split and requeue; or evict and try again immediately (and eventually allocate or split/requeue). ### Properties: Weight, Priority, and Requirements To process a bundle, we have to compute a few properties. In fact we will have already computed a few of these beforehand, but we describe them all here. - Priority: a bundle's priority determines the order in which it is considered for allocation. RA2 defines as the sum of the lengths (in instruction index space) of each liverange. This causes the allocator to consider larger bundles first, when the allocation maps are generally more free; they can always be evicted and split later. - Weight: a bundle's weight indicates how important (in terms of runtime) its uses/register mentions are. In an approximate sense, inner loop bodies create higher-weight uses. Fixed register constraints add some weight, and defs add some weight. Finally, weight is divided by priority, so a very large bundle that happens to have a few important uses does not unformly exert its weight across its entire range. This has the effect of causing bundles to be more important (more likely to evict others) the more they are split. - Requirement: a bundle's requirement is a value in a lattice that we have defined, where top is "Unknown" and bottom is "Conflict". Between these two, we have: any register (of a class); any stackslot (of a class); a particular register. "Any register" can degrade to "a particular register", but any other pair of different requirements meets to Conflict. Requirements are derived from the operand constraints for all uses in all liveranges in a bundle, and then merged with the lattice meet-function. The lattice is as follows (diagram simplified to remove multiple classes and multiple fixed registers which parameterize nodes; any two differently-parameterized values are unordered with respect to each other): ```plain Any(rc) / \ FixedReg(reg) FixedStack(reg) \ / Conflict ``` Once we have the Requirement for a bundle, we can decide what to do. ### No-Register-Required Cases If the requirement indicates that no register is needed (`Unknown` or `Any`, i.e. a register or stack slot would be OK), *and* if the spill bundle already exists for this bundle's spillset, then we move all the liveranges over to the spill bundle, as described above. If the requirement indicates a conflict, we immediately split and requeue the split pieces. This split is performed at the point at which the conflict is first introduced, i.e. just before the first use whose requirement, when merged into the requirement for all prior uses combined, goes to `Conflict`. In this way, we always guarantee forward progress. Note also that a bundle can reach this stage with a conflicting requirement only if the original liverange had conflicting uses (e.g., a liverange from a def in a register to a use on stack, or a liverange between two different fixed-reg-constrained operands); our bundle merging logic explicitly avoids merging two bundles if it would create a conflict. ### Allocation-Map Probing If we did not immediately dispose of the bundle as described above, then we *can* use a register (either `Any`, which accepts a register as one of several options, or `Reg`, which must have one, or `Fixed`, which must have a particular one). We determine which physical registers whose allocation maps we will probe, and in what order. If a particular fixed register is required, we probe only that register. Otherwise, we probe all registers in the required class. The order in which we probe, if we are not constrained to a single register, is carefully chosen. First, if there is a hint register from the spillset (this is set by the last allocation into a register of any other bundle in this spillset), we probe that. Then, we probe all preferred registers; then all non-preferred registers. For each of the preferred and non-preferred register sequences, we probe in an *offset* manner: we start at some index partway through the sequence, determined by some heuristic number that is random and well-distributed. (In practice, we use the sum of the bundle index and the instruction index of the start of the first range in the bundle.) We then march through the sequence and wrap around, stopping before we hit our starting point again. The purpose of this offset is to distribute the contention and speed up the allocation process. In the common case where there are enough registers to hold values without spilling (for small functions), we are more likely to choose a free register right away if we throw the dart at random than if we start *every* probe at register 0, in order. This has a large allocation performance impact in practice. For each register in probe order, we probe the allocation map, and gather, simultaneously, several results: (i) whether the entire range is free; (ii) if not, the vector of all conflicting bundles, *and* the highest weight among those bundles; (iii) if not, the *first* conflict point. We do this by iterating over all liveranges in the preg's btree that overlap with each range in the current bundle. This iteration is somewhat subtle due to multiple "equal" keys (see above where we describe the use of the btree). It is also adaptive for performance reasons: it initially obtains an iterator into the btree corresponding to the start of the first range in the bundle, and concurrently iterates through both the btree and the bundle. However, if there is a large gap in the bundle, this might require skipping many irrelevant entries in the btree. So, if we skip too many entries (heuristically, 16, right now), we do another lookup from scratch in the btree for the start of the next range in the bundle. This balances between the two cases: dense bundle, where O(1) iteration through the btree is faster, and sparse bundle, where O(log n) lookup for each entry is better. ### Decision: Allocate, Evict, or Split First, the "allocate" case is easy: if, during our register probe loop, we find a physical register whose allocations do not overlap this bundle, then we allocate this register; done! If not, then we need to decide whether to evict some conflicting bundles and retry, or to split the current bundle into smaller pieces that may have better luck. A bit about our split strategy first: contrary to the IonMonkey allocator which inspired much of our design, we do *not* have a list of split strategies that split one bundle into many pieces at once. Instead, each iteration of the allocation loop splits at most *once*. This simplifies the splitting code greatly, but also turns out to be a nice heuristic: we split at the point that the bundle first encounters a conflict for a particular preg assignment, then we hint that preg for the first (pre-conflict) piece when we retry. In this way, we always make forward progress -- one piece of the bundle is always allocated -- and splits are informed by the actual situation at hand, rather than best guesses. Also note that while this may appear at first to be a greedy algorithm, it still allows backtracking: the first half of the split bundle, which we *can* now assign to a preg, does not necessarily remain on that preg forever (it can still be evicted later). It is just a split that is known to make at least one part of the allocation problem solvable. To determine whether to split or evict, we track our best options: as we probe, we track the "lowest cost eviction option", which is a set of bundles and the maximum weight in that set of bundles. We also track the "lowest cost split option", which is the cost (more below), the point at which to split, and the register for this option. For each register we probe, if there is a conflict but none of the conflicts are fixed allocations, we receive a vector of bundles that conflicted, and also separately, the first conflicting program point. We update the lowest-cost eviction option if the cost (max weight) of the conflicting bundles is less than the current best. We update the lowest-cost split option if the cost is less as well, according to the following definition of cost: a split's cost is the cost of its move, as defined by the weight of a normal def operand at the split program point, plus the cost of all bundles beyond the split point (which will still be conflicts even after the split). If there is a conflict with a fixed allocation, then eviction is not an option, but we can still compute the candidate split point and cost in the same way as above. Finally, as an optimization, we pass in the current best cost to the btree probe inner loop; if, while probing, we have already exceeded the best cost, we stop early (this improves allocation time without affecting the result). Once we have the best cost for split and evict options, we split if (i) the bundle is not already a minimal bundle, and (ii) we've already evicted once in this toplevel iteration without success, or the weight of the current bundle is less than the eviction cost. We then requeue *both* resulting halves of the bundle with the preg that resulted in this option as the register hint. Otherwise, we evict all conflicting bundles and try again. Note that the split cost does not actually play into the above (split vs. evict) decision; it is only used to choose *which* split is best. This is equivalent to saying: we never evict if the current bundle is less important than the evicted bundles, even if the split is more expensive still. This is important for forward progress, and the case where the split would be even more expensive should be very very rare (it would have to come from a costly move in the middle of an inner loop). ### How to Split The actual split procedure is fairly simple. We are given a bundle and a split-point. We create a new bundle to take on the second half ("rest") of the original. We find the point in the liverange vector that corresponds to the split, and distribute appropriately. If the split-point lands in the middle of a liverange, then we split that liverange as well. In the case that a new liverange is created, we add the liverange to the corresponding vreg liverange vector as well. Note that, as described above, the vreg's liverange vector is unsorted while splitting is occurring (because we do not need to traverse it or do any lookups during this phase); so we just append. The splitting code also supports a "minimal split", in which it simply peels off the first use. This is used to ensure forward progress when a bundle has conflicting requirements within it (see above). #### Spill Bundle and Splitting Once a split occurs, however, it turns out that we can improve results by doing a little cleanup. Once we distribute a bundle's liveranges across two half-bundles, we postprocess by trimming a bit. In particular, if we see that the "loose ends" around the split point extend beyond uses, we will create and move ranges to a spill bundle. That is: if the last liverange in the first-half bundle extends beyond its last use, we trim that part off into an empty (no uses) liverange and place that liverange in the spill bundle. Likewise, if the first liverange in the second-half bundle starts before its first use, we trim that part off into an empty liverange and place it in the spill bundle. This is, empirically, an improvement: it reduces register contention and makes splitting more effective. The intuition is twofold: (i) it is better to put all of the "flow-through" parts of a vreg's liveness into one bundle that is never split, and can be spilled to the stack if needed, to avoid unnecessary moves; and (ii) if contention is high enough to cause splitting, it is more likely there will be an actual stack spill, and if this is the case, it is better to do the store just after the last use and reload just before the first use of the respective bundles. ## Second-Chance Allocation: Spilled Bundles Once the main allocation loop terminates, when all bundles have either been allocated or punted to the "spilled bundles" vector, we do second-chance allocation. This is a simpler loop that never evicts and never splits. Instead, each bundle gets one second chance, in which it can probe pregs and attempt to allocate. If it fails, it will actually live on the stack. This is correct because we are careful to only place bundles on the spilled-bundles vector that are *allowed* to live on the stack. Specifically, only the canonical spill bundles (which will contain only empty ranges) and other bundles that have an "any" or "unknown" requirement are placed here (but *not* "stack" requirements; those *must* be on the stack, so do not undergo second-chance allocation). At the end of this process, we have marked spillsets as required whenever at least one bundle in the spillset actually requires a stack slot. We can then allocate slots to the spillsets. ## Spillslot Allocation We must allocate space on the stack, denoted by an abstract index space, to each spillset that requires it, and for the liveranges in which it requires it. To facilitate this, we keep a btree per spillslot in the same way we do per preg. We will allocate spillsets to slots in a way that avoids interference. Note that we actually overapproximate the required ranges for each spillset in order to improve the behavior of a later phase (redundant move elimination). Specifically, when we allocate a slot for a spillset, we reserve that slot for *all* of the liveranges of *every* vreg that is assigned to that spillset (due to merging rules that initially merge one-vreg bundles into final merged bundles, there will be no overlaps here). In other words, we rule out interleaving of completely different values in the same slot, though bundle merging does mean that potentially many (non-interfering) vregs may share it. This provides the important property that if a vreg has been reloaded, but not modified, its spillslot *still contains the up-to-date value* (because the slot is reserved for all liveranges of the vreg). This enables us to avoid another store to the spillslot later if there is another spilled range. We perform probing in a way that is somewhat different than for registers, because the spillslot space is conceptually infinite. We can thus optimize for slightly better allocation performance by giving up and allocating a new slot at any time. For each size class, we keep a linked list of slots. When we need to allocate a spillset to a slot, we traverse down the list and try a fixed number of slots. If we find one that fits the spillset's ranges, we allocate, and we remove the slot from its current place in the list and append to the end. In this way, it is deprioritized from probing "for a while", which tends to reduce contention. This is a simple way to round-robin between slots. If we don't find one that fits after a fixed number of probes, we allocate a new slot. And with that, we have valid allocations for all vregs for all points that they are live! Now we just need to modify the program to reify these choices. ## Allocation Assignment The first step in reifying the allocation is to iterate through all mentions of a vreg and fill in the resulting `Allocation` array with the appropriate allocations. We do this by simply traversing liveranges per vreg, looking up the allocation by observing the bundle (and spillset if no specific allocation for the bundle), and for each use, filling in the slot according to the saved progpoint/slot info in the use data. ## Move Generation The more difficult half of the reification step is generating the *moves* that will put the values in the right spots. There are two sources of moves that we must generate. The first are moves between different ranges of the same vreg, as the split pieces of that vreg's original bundle may have been assigned to different locations. The second are moves that result from move semantics in the input program: assignments from blockparam args on branches to the target block's params. Moves are tricky to handle efficiently because they join two potentially very different locations in the program (in the case of control-flow-edge moves). In order to avoid the need for random lookups, which are a cache-locality nightmare even if we have O(log n) lookups, we instead take a scan-sort-scan approach. First, we scan over each vreg's liveranges, find the allocation for each, and for each move that comes *to* or *from* this liverange, generate a "half-move". The key idea is that we generate a record for each "side" of the move, and these records are keyed in a way that after a sort, the "from" and "to" ends will be consecutive. We can sort the vector of halfmoves once (this is expensive, but not as expensive as many separate pointer-chasing lookups), then scan it again to actually generate the move instructions. To enable the sort to work, half-moves are sorted by a key that is equivalent to the tuple (from-block, to-block, to-vreg, kind), where `kind` is "source" or "dest". For each key, the payload is an allocation. The fields in this tuple are carefully chosen: we know all of them at every location we generate a halfmove, without expensive lookups, and sorting by this key will make the source and all dests (there can be more than one) contiguous in the final order. Half-moves are generated for several situations. First, at the start of every block covered by a liverange, we can generate "dest" half-moves for blockparams, and at the end of every block covered by a liverange, we can generate "source" half-moves for blockparam args on branches. Incidentally, this is the reason that `blockparam_ins` and `blockparam_outs` are sorted tuple-vectors whose tuples begin with (vreg, block, ...): this is the order in which we do the toplevel scan over allocations. Second, at every block edge, if the vreg is live in any pred (at block-start) or succ (at block-end), we generate a half-move to transfer the vreg to its own location in the connected block. This completes the "edge-moves". We sort the half-move array and then have all of the alloc-to-alloc pairs on a given (from-block, to-block) edge. Next, when a live-range ends and another begins for the same vreg in the same block (i.e., a split in the middle of a block), we know both sides of the move immediately (because it is the same vreg and we can look up the adjacent allocation easily), and we can generate that move. Finally, we generate moves to fix up multi-fixed-reg-constraint situations, and make reused inputs work, as described earlier. ## Move Resolution During this whole discussion, we have described "generating moves", but we have not said what that meant. Note that in many cases, there are several moves at a particular program point that semantically happen *in parallel*. For example, if multiple vregs change allocations between two instructions, all of those moves happen as part of one parallel permutation. Similarly, blockparams have parallel-assignment semantics. We thus enqueue all the moves that we generate at program points and resolve them into sequences of sequential moves that can actually be lowered to move instructions in the machine code. First, a word on *move priorities*. There are different kinds of moves that are generated between instructions, and we have to ensure that some happen before others, i.e., *not* in parallel. For example, a vreg might change allocation (due to a split) before an instruction, then be copied to an output register for an output with a reused-input policy. The latter move must happen *after* the vreg has been moved into its location for this instruction. To enable this, we define "move priorities", which are a logical extension of program points (i.e., they are sub-points) that enable finer-grained ordering of moves. We currently have the following priorities: - In-edge moves, to place edge-moves before the first instruction in a block. - Regular, used for vreg movement between allocations. - Multi-fixed-reg, used for moves that handle the single-vreg-in-multiple-fixed-pregs constraint case. - Reused-input, used for implementing outputs with reused-input policies. - Out-edge moves, to place edge-moves after the last instruction (prior to the branch) in a block. Every move is statically given one of these priorities by the code that generates it. We collect moves with (prog-point, prio) keys, and we sort by those keys. We then have, for each such key, a set of moves that semantically happen in parallel. We then resolve those moves using a parallel-move resolver, as we now describe. ### Parallel-Move Resolver The fundamental issue that arises when resolving parallel moves to sequential moves is *overlap*: some of the moves may overwrite registers that other moves use as sources. We must carefully order moves so that this does not clobber values incorrectly. We first check if such overlap occurs. If it does not (this is actually the most common case), the sequence of parallel moves can be emitted as sequential moves directly. Done! Otherwise, we have to order the moves carefully. Furthermore, if there is a *cycle* anywhere among the moves, we will need a scratch register. (Consider, e.g., t0 := t1 and t1 := t0 in parallel: with only move instructions and no direct "exchange" instruction, we cannot reify this without a third register.) We first compute a mapping from each move instruction to the move instruction, if any, that it must precede. Note that there can be only one such move for a given move, because each destination can be written only once; so a move might be constrained only before the one move that overwrites its source. (This will be important in a bit!) Our task is now to find an ordering of moves that respects these dependencies. To do so, we perform a depth-first search on the graph induced by the dependencies, which will generate a sequence of sequential moves in reverse order. We keep a stack of moves; we start with any move that has not been visited yet; in each iteration, if the top-of-stack has no out-edge to another move (does not need to come before any others), then push it to a result vector, followed by all others on the stack (in popped order). If it does have an out-edge and the target is already visited and not on the stack anymore (so already emitted), likewise, emit this move and the rest on the stack. If it has an out-edge to a move not yet visited, push on the stack and continue. Otherwise, if out-edge to a move currently on the stack, we have found a cycle. In this case, we emit the moves on the stack with a modification: the first move writes to a scratch register, and we emit an additional move that moves from the scratch to the first move's dest. This breaks the cycle. The astute reader may notice that this sounds like a canonical application of Tarjan's algorithm for finding SCCs (strongly-connected components). Why don't we have the full complexity of that algorithm? In particular, *why* can we emit the cycle *right away* once we find it, rather than ensuring that we have gotten all of the SCC first? The answer is that because there is only *one* out-edge at most (a move can only require preceding *one* other move), all SCCs must be simple cycles. This means that once we have found a cycle, no other nodes (moves) can be part of the SCC, because every node's single out-edge is already accounted for. This is what allows us to avoid a fully general SCC algorithm. Once the vector of moves in-reverse has been constructed, we reverse it and return. Note that this "move resolver" is fuzzed separately with a simple symbolic move simulator (the `moves` fuzz-target). ### Stack-to-Stack Moves There is one potentially difficult situation that could arise from the move-resolution logic so far: if a vreg moves from one spillslot to another, this implies a memory-to-memory move, which most machine architectures cannot handle natively. It would be much nicer if we could ensure within the regalloc that this never occurs. This is in fact possible to do in a postprocessing step. We iterate through the sequential moves, tracking whether the scratch register is in use (has been written). When we see a stack-to-stack move: (i) if the scratch register is not in use, generate a stack-to-scratch move and scratch-to-stack move; otherwise, (ii) if the scratch register is in use, allocate an "extra spillslot" if one has not already been allocated, move the scratch reg to that, do the above stack-to-scratch / scratch-to-stack sequence, then reload the scratch reg from the extra spillslot. ## Redundant-Spill/Load Elimination As a final step before returning the vector of program edits to the client, we perform one optimization: redundant-spill/load elimination. To understand the need for this, consider what will occur when a vreg is (i) defined once, (ii) used many times, and (iii) spilled multiple times between some of the uses: with the design described above, we will move the value from the preg to the stack after every segment of uses, and then reload it when the next use occurs. However, only the first spill is actually needed; as we noted above, we allocate spillslots so that the slot that corresponded to the vreg at the first spill will always be reserved for that vreg as long as it is live. If no other defs or mods occur, the value in the slot can be reloaded, and need not be written back every time. This inefficiency is a result of our invariant that a vreg lives in exactly one place at a time, and these locations are joined by moves. This is a simple and effective design to use for most of the allocation pipeline, but falls flat here. It is especially inefficient when the unnecessary spill occurs in an inner loop. (E.g.: value defined at top of function is spilled, then used once in the middle of an inner loop body.) The opposite case can also sometimes occur, though it is rarer: a value is loaded into a register, spilled, and then reloaded into the same register. This can happen when hinting is successful at getting several segments of a vreg to use the same preg, but splitting has trimmed part of the liverange between uses and put it in the spill bundle, and the spill bundle did not get a reg. In order to resolve this inefficiency, we implement a general redundant-spill/load elimination pass (an even more general solution would be a full redundant-move elimination pass, but we focus on moves that are spills/loads to contain the complexity for now). This pass tracks, for every allocation (reg or spillslot), whether it is a copy of another allocation. This state is invalidated whenever either that allocation or the allocation of which it is a copy is overwritten. When we see a move instruction, if the destination is already a copy of the source, we elide the move. (There are some additional complexities to preserve checker metadata which we do not describe here.) Note that this could, in principle, be done as a fixpoint analysis over the CFG; it must be, if we try to preserve state across blocks. This is because a location is only a copy of another if that is true on every incoming edge. However, to avoid the cost and complexity of doing such an analysis, we instead take the much simpler approach of doing only an intra-block analysis. This turns out to be sufficient to remove most redundant moves, especially in the common case of a single use of an otherwise-spilled value. Note that there is an opportunity to do better: as we only accept SSA code we would know that a value could not be redefined once written. # Future Plans ## Better Split Heuristics We have spent quite some effort trying to improve splitting behavior, and it is now generally decent, but more work could be done here, especially with regard to the interaction between splits and the loop nest. # Appendix: Comparison to IonMonkey Allocator There are a number of differences between the [IonMonkey allocator](https://searchfox.org/mozilla-central/source/js/src/jit/BacktrackingAllocator.cpp) and this one. While this allocator initially began as an attempt to clone IonMonkey's, it has drifted significantly as we optimized the design (especially after we built the regalloc.rs shim and had to adapt to its code style); it is easier at this point to name the similarities than the differences. * The core abstractions of "liverange", "bundle", "vreg", "preg", and "operand" (with policies/constraints) are the same. * The overall allocator pipeline is the same, and the top-level structure of each stage should look similar. Both allocators begin by computing liveranges, then merging bundles, then handling bundles and splitting/evicting as necessary, then doing second-chance allocation, then reifying the decisions. * The cost functions are very similar, though the heuristics that make decisions based on them are not. Several notable high-level differences are: * There are [fuzz/fuzz_targets/](many different fuzz targets) that exercise the allocator, including a full symbolic checker (`ion_checker` target) based on the [symbolic checker in regalloc.rs](https://cfallin.org/blog/2021/03/15/cranelift-isel-3/) and, e.g., a targetted fuzzer for the parallel move-resolution algorithm (`moves`) and the SSA generator used for generating cases for the other fuzz targets (`ssagen`). * The data-structure invariants are simplified. While the IonMonkey allocator allowed for LiveRanges and Bundles to overlap in certain cases, this allocator sticks to a strict invariant: ranges do not overlap in bundles, and bundles do not overlap. There are other examples too: e.g., the definition of minimal bundles is very simple and does not depend on scanning the code at all. In general, we should be able to state simple invariants and see by inspection (as well as fuzzing -- see above) that they hold. * The data structures themselves are simplified. Where IonMonkey uses linked lists in many places, this allocator stores simple inline smallvecs of liveranges on bundles and vregs, and smallvecs of uses on liveranges. We also (i) find a way to construct liveranges in-order immediately, without any need for splicing, unlike IonMonkey, and (ii) relax sorting invariants where possible to allow for cheap append operations in many cases. * The splitting heuristics are significantly reworked. Whereas IonMonkey has an all-at-once approach to splitting an entire bundle, and has a list of complex heuristics to choose where to split, this allocator does conflict-based splitting, and tries to decide whether to split or evict and which split to take based on cost heuristics. * The liverange computation is exact, whereas IonMonkey approximates using a single-pass algorithm that makes vregs live across entire loop bodies. We have found that precise liveness improves allocation performance and generated code quality, even though the liveness itself is slightly more expensive to compute. * Many of the algorithms in the IonMonkey allocator are built with helper functions that do linear scans. These "small quadratic" loops are likely not a huge issue in practice, but nevertheless have the potential to be in corner cases. As much as possible, all work in this allocator is done in linear scans. * There are novel schemes for solving certain interesting design challenges. One example: in IonMonkey, liveranges are connected across blocks by, when reaching one end of a control-flow edge in a scan, doing a lookup of the allocation at the other end. This is in principle a linear lookup (so quadratic overall). We instead generate a vector of "half-moves", keyed on the edge and from/to vregs, with each holding one of the allocations. By sorting and then scanning this vector, we can generate all edge moves in one linear scan. There are a number of other examples of simplifications: for example, we handle multiple conflicting physical-register-constrained uses of a vreg in a single instruction by recording a copy to do in a side-table, then removing constraints for the core regalloc. Ion instead has to tweak its definition of minimal bundles and create two liveranges that overlap (!) to represent the two uses. * Using block parameters rather than phi-nodes significantly simplifies handling of inter-block data movement. IonMonkey had to special-case phis in many ways because they are actually quite weird: their uses happen semantically in other blocks, and their defs happen in parallel at the top of the block. Block parameters naturally and explicitly reprsent these semantics in a direct way. * The allocator supports irreducible control flow and arbitrary block ordering (its only CFG requirement is that critical edges are split). * The allocator supports non-SSA code, and has native support for handling program moves specially. # Appendix: Performance-Tuning Lessons In the course of optimizing the allocator's performance, we found a number of general principles: * We got substantial performance speedups from using vectors rather than linked lists everywhere. This is well-known, but nevertheless, it took some thought to work out how to avoid the need for any splicing, and it turns out that even when our design is slightly less efficient asymptotically (e.g., apend-and-re-sort rather than linear-time merge of two sorted liverange lists when merging bundles), it is faster. * We initially used a direct translation of IonMonkey's splay tree as an allocation map for each PReg. This turned out to be significantly (!) less efficient than Rust's built-in BTree data structures, for the usual cache-efficiency vs. pointer-chasing reasons. * We initially used dense bitvecs, as IonMonkey does, for livein/liveout bits. It turned out that a chunked sparse design (see below) was much more efficient. * Precise liveness significantly improves performance because it reduces the size of liveranges (i.e., interference), and probing registers with liveranges is the most significant hot inner loop. Paying a fraction of a percent runtime for the iterative dataflow algorithm to get precise bitsets is more than worth it. * The randomized probing of registers was a huge win: as above, the probing is very expensive, and reducing the average number of probes it takes to find a free register is very important. * In general, single-pass algorithms and design of data structures to enable them are important. For example, the half-move technique avoids the need to do any O(log n) search at all, and is relatively cache-efficient. As another example, a side-effect of the precise liveness was that we could then process operands within blocks in actual instruction order (in reverse), which allowed us to simply append liveranges to in-progress vreg liverange vectors and then reverse at the end. The expensive part is a single pass; only the bitset computation is a fixpoint loop. * Sorts are better than always-sorted data structures (like btrees): they amortize all the comparison and update cost to one phase, and this phase is much more cache-friendly than a bunch of spread-out updates. * Take care of basic data structures and their operator definitions! We initially used the auto-derived comparator on ProgPoint, and let ProgPoint be a normal struct (with a u32 inst index and a Befor/After enum). The comparator for this, used in many sorting inner loops, was a compound thing with conditionals. Instead, pack them in a u32 and do a simple compare (and save half the memory as well). Likewise, the half-move key is a single value packed in a u64; this is far more efficient than the tuple comparator on a 4-tuple, and the half-move sort (which can be a few percent or more of total allocation time) became multiple times cheaper. # Appendix: Data Structure: Chunked Sparse BitVec We use a "chunked sparse bitvec" to store liveness information, which is just a set of VReg indices. The design is fairly simple: the toplevel is a HashMap from "chunk" to a `u64`, and each `u64` represents 64 contiguous indices. The intuition is that while the vreg sets are likely sparse overall, they will probably be dense within small regions of the index space. For example, in the Nth block in a function, the values that flow from block N-1 will largely be almost-contiguous vreg indices, if vregs are allocated in sequence down the function body. Or, at least, they will be some local vregs together with a few defined at the top of the function; two separate chunks will cover that. We tried a number of other designs as well. Initially we used a simple dense bitvec, but this was prohibitively expensive: O(n^2) space when the real need is closer to O(n) (i.e., a classic sparse matrix). We also tried a hybrid scheme that kept a vector of indices when small and used either a bitvec or a hashset when large. This did not perform as well because (i) it was less memory-efficient (the chunking helps with this) and (ii) insertions are more expensive when they always require a full hashset/hashmap insert. # Appendix: Fuzzing We have five fuzz targets: `ssagen`, `domtree`, `moves`, `ion`, and `ion_checker`. ## SSAGen The SSAGen target tests our SSA generator, which generates cases for the full allocator fuzz targets. The SSA generator is careful to always generate a valid CFG, with split critical edges, and valid SSA, so that we never have to throw out a test input before we reach the allocator itself. (An alternative fuzzing approach randomly generates programs and then throws out those that do not meet certain conditions before using them as legitimate testcases; this is much simpler, but less efficient.) To generate a valid CFG, with no unreachable blocks and with no critical edges, the generator (i) glues together units of either one or three blocks (A->B, A->C), forming either a straight-through section or a conditional. These are glued together into a "spine", and the conditionals (the "C" block), where they exist, are then linked to a random target block chosen among the main blocks of these one- or three-block units. The targets are chosen either randomly, for potentially irreducible CFGs, or in a way that ensures proper nesting of loop backedges, if a structured CFG is requested. SSA is generated by first choosing which vregs will be defined in each block, and which will be defined as blockparams vs. instruction defs. Instructions are then generated, with operands chosen among the "available" vregs: those defined so far in the current block and all of those in any other block that dominates this one. The SSAGen fuzz target runs the above code generator against an SSA validator, and thus ensures that it will only generate valid SSA code. ## Domtree The `domtree` fuzz target computes dominance using the algorithm that we use elsewhere in our CFG analysis, and then walks a randomly-generated path through the CFG. It checks that the dominance definition ("a dom b if any path from entry to b must pass through a") is consistent with this particular randomly-chosen path. ## Moves The `moves` fuzz target tests the parallel move resolver. It generates a random sequence of parallel moves, careful to ensure that each destination is written only once. It then runs the parallel move resolver, and then *abstractly interprets* the resulting sequential series of moves, thus determining which inputs flow to which outputs. This must match the original set of parallel moves. ## Ion and Ion-checker The `ion` fuzz target runs the allocator over test programs generated by SSAGen. It does not validate the output; it only tests that the allocator runs to completion and does not panic. This was used mainly during development, and is now less useful than the checker-based target. The `ion_checker` fuzz target runs the allocator's result through a symbolic checker, which is adapted from the one developed for regalloc.rs (see [this blog post](https://cfallin.org/blog/2021/01/22/cranelift-isel-2/) for more details). This is the most useful fuzz target in the fuzzing suite, and has found many bugs in development. regalloc2-0.10.2/doc/TODO000066400000000000000000000021661467034227200147150ustar00rootroot00000000000000# Features - Large-input support (> 1M vregs, > 1M blocks) - Two operand impls: u64-based and u32-based. Always accept u64-based `Operand` publicly (do not expose this in interface). - Trait to generalize over them and support both internally (parameterize the whole allocator impl) - On data-structure init, choose one or the other based on max vreg index - Update halfmove keys: u128 rather than u64 - Support allocation of register pairs (or overlapping registers generally) - Rematerialization - Stack-location constraints that place operands in user-defined stack locations (distinct from SpillSlots) (e.g., stack args) # Performance - Investigate better register hinting - Investigate more principled cost functions and split locations, especially around loop nests - Investigate ways to improve bundle-merging; e.g., merge moves before other types of connections - Add limited inter-block redundant-move elimination: propagate across splits but not joins. - Optimize allocations (some reports of 5-7% of time spent in allocator) # Cleanup - Remove support for non-SSA code once no longer necessaryregalloc2-0.10.2/fuzz/000077500000000000000000000000001467034227200144515ustar00rootroot00000000000000regalloc2-0.10.2/fuzz/.gitignore000066400000000000000000000000301467034227200164320ustar00rootroot00000000000000target corpus artifacts regalloc2-0.10.2/fuzz/Cargo.toml000066400000000000000000000017761467034227200164140ustar00rootroot00000000000000[package] name = "regalloc2-fuzz" version = "0.0.0" authors = ["Chris Fallin "] license = "MPL-2.0 AND Apache-2.0 WITH LLVM-exception" publish = false edition = "2018" [package.metadata] cargo-fuzz = true [dependencies] regalloc2 = { path = "../", features = ["fuzzing"] } log = { version = "0.4.8", default-features = false } env_logger = "0.8.3" libfuzzer-sys = "0.4.2" # Prevent this from interfering with workspaces [workspace] members = ["."] [[bin]] name = "domtree" path = "fuzz_targets/domtree.rs" test = false doc = false [[bin]] name = "ssagen" path = "fuzz_targets/ssagen.rs" test = false doc = false [[bin]] name = "ion" path = "fuzz_targets/ion.rs" test = false doc = false [[bin]] name = "moves" path = "fuzz_targets/moves.rs" test = false doc = false [[bin]] name = "ion_checker" path = "fuzz_targets/ion_checker.rs" test = false doc = false # Enable debug assertions and overflow checks when fuzzing [profile.release] debug = true debug-assertions = true overflow-checks = true regalloc2-0.10.2/fuzz/fuzz_targets/000077500000000000000000000000001467034227200172005ustar00rootroot00000000000000regalloc2-0.10.2/fuzz/fuzz_targets/domtree.rs000066400000000000000000000074261467034227200212160ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ #![no_main] use regalloc2::fuzzing::arbitrary::{Arbitrary, Result, Unstructured}; use regalloc2::fuzzing::{domtree, fuzz_target, postorder}; use regalloc2::Block; use std::collections::HashSet; #[derive(Clone, Debug)] struct CFG { num_blocks: usize, preds: Vec>, succs: Vec>, } impl Arbitrary<'_> for CFG { fn arbitrary(u: &mut Unstructured) -> Result { let num_blocks = u.int_in_range(1..=1000)?; let mut succs = vec![]; for _ in 0..num_blocks { let mut block_succs = vec![]; for _ in 0..u.int_in_range(0..=5)? { block_succs.push(Block::new(u.int_in_range(0..=(num_blocks - 1))?)); } succs.push(block_succs); } let mut preds = vec![]; for _ in 0..num_blocks { preds.push(vec![]); } for from in 0..num_blocks { for succ in &succs[from] { preds[succ.index()].push(Block::new(from)); } } Ok(CFG { num_blocks, preds, succs, }) } } #[derive(Clone, Debug)] struct Path { blocks: Vec, } impl Path { fn choose_from_cfg(cfg: &CFG, u: &mut Unstructured) -> Result { let succs = u.int_in_range(0..=(2 * cfg.num_blocks))?; let mut block = Block::new(0); let mut blocks = vec![]; blocks.push(block); for _ in 0..succs { if cfg.succs[block.index()].is_empty() { break; } block = *u.choose(&cfg.succs[block.index()])?; blocks.push(block); } Ok(Path { blocks }) } } fn check_idom_violations(idom: &[Block], path: &Path) { // "a dom b" means that any path from the entry block through the CFG that // contains a and b will contain a before b. // // To test this, for any given block b_i, we have the set S of b_0 .. b_{i-1}, // and we walk up the domtree from b_i to get all blocks that dominate b_i; // each such block must appear in S. (Otherwise, we have a counterexample // for which dominance says it should appear in the path prefix, but it does // not.) let mut visited = HashSet::new(); visited.insert(Block::new(0)); for block in &path.blocks { let mut parent = idom[block.index()]; let mut domset = HashSet::new(); domset.insert(*block); while parent.is_valid() { assert!(visited.contains(&parent)); domset.insert(parent); let next = idom[parent.index()]; parent = next; } // Check that `dominates()` returns true for every block in domset, // and false for every other block. for domblock in 0..idom.len() { let domblock = Block::new(domblock); assert_eq!( domset.contains(&domblock), domtree::dominates(idom, domblock, *block) ); } visited.insert(*block); } } #[derive(Clone, Debug)] struct TestCase { cfg: CFG, path: Path, } impl Arbitrary<'_> for TestCase { fn arbitrary(u: &mut Unstructured) -> Result { let cfg = CFG::arbitrary(u)?; let path = Path::choose_from_cfg(&cfg, u)?; Ok(TestCase { cfg, path }) } } fuzz_target!(|testcase: TestCase| { let postord = postorder::calculate(testcase.cfg.num_blocks, Block::new(0), |block| { &testcase.cfg.succs[block.index()] }); let idom = domtree::calculate( testcase.cfg.num_blocks, |block| &testcase.cfg.preds[block.index()], &postord[..], Block::new(0), ); check_idom_violations(&idom[..], &testcase.path); }); regalloc2-0.10.2/fuzz/fuzz_targets/ion.rs000066400000000000000000000007331467034227200203360ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ #![no_main] use regalloc2::fuzzing::func::Func; use regalloc2::fuzzing::fuzz_target; fuzz_target!(|func: Func| { let _ = env_logger::try_init(); log::trace!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); let _out = regalloc2::fuzzing::ion::run(&func, &env, false, false).expect("regalloc did not succeed"); }); regalloc2-0.10.2/fuzz/fuzz_targets/ion_checker.rs000066400000000000000000000024011467034227200220140ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ #![no_main] use regalloc2::fuzzing::arbitrary::{Arbitrary, Result, Unstructured}; use regalloc2::fuzzing::checker::Checker; use regalloc2::fuzzing::func::{Func, Options}; use regalloc2::fuzzing::fuzz_target; #[derive(Clone, Debug)] struct TestCase { func: Func, } impl Arbitrary<'_> for TestCase { fn arbitrary(u: &mut Unstructured) -> Result { Ok(TestCase { func: Func::arbitrary_with_options( u, &Options { reused_inputs: true, fixed_regs: true, fixed_nonallocatable: true, clobbers: true, reftypes: true, }, )?, }) } } fuzz_target!(|testcase: TestCase| { let func = testcase.func; let _ = env_logger::try_init(); log::trace!("func:\n{:?}", func); let env = regalloc2::fuzzing::func::machine_env(); let out = regalloc2::fuzzing::ion::run(&func, &env, true, false).expect("regalloc did not succeed"); let mut checker = Checker::new(&func, &env); checker.prepare(&out); checker.run().expect("checker failed"); }); regalloc2-0.10.2/fuzz/fuzz_targets/moves.rs000066400000000000000000000110301467034227200206720ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ #![no_main] use regalloc2::fuzzing::arbitrary::{Arbitrary, Result, Unstructured}; use regalloc2::fuzzing::fuzz_target; use regalloc2::fuzzing::moves::{MoveAndScratchResolver, ParallelMoves}; use regalloc2::{Allocation, PReg, RegClass, SpillSlot}; use std::collections::{HashMap, HashSet}; fn is_stack_alloc(alloc: Allocation) -> bool { // Treat registers 20..=29 as fixed stack slots. if let Some(reg) = alloc.as_reg() { reg.index() > 20 } else { alloc.is_stack() } } #[derive(Clone, Debug)] struct TestCase { moves: Vec<(Allocation, Allocation)>, available_pregs: Vec, } impl Arbitrary<'_> for TestCase { fn arbitrary(u: &mut Unstructured) -> Result { let mut ret = TestCase { moves: vec![], available_pregs: vec![], }; let mut written = HashSet::new(); // An arbitrary sequence of moves between registers 0 to 29 // inclusive. while bool::arbitrary(u)? { let src = if bool::arbitrary(u)? { let reg = u.int_in_range(0..=29)?; Allocation::reg(PReg::new(reg, RegClass::Int)) } else { let slot = u.int_in_range(0..=31)?; Allocation::stack(SpillSlot::new(slot)) }; let dst = if bool::arbitrary(u)? { let reg = u.int_in_range(0..=29)?; Allocation::reg(PReg::new(reg, RegClass::Int)) } else { let slot = u.int_in_range(0..=31)?; Allocation::stack(SpillSlot::new(slot)) }; // Stop if we are going to write a reg more than once: // that creates an invalid parallel move set. if written.contains(&dst) { break; } written.insert(dst); ret.moves.push((src, dst)); } // We might have some unallocated registers free for scratch // space... for i in 0..u.int_in_range(0..=2)? { let reg = PReg::new(30 + i, RegClass::Int); ret.available_pregs.push(Allocation::reg(reg)); } Ok(ret) } } fuzz_target!(|testcase: TestCase| { let _ = env_logger::try_init(); let mut par = ParallelMoves::new(); for &(src, dst) in &testcase.moves { par.add(src, dst, ()); } let moves = par.resolve(); log::trace!("raw resolved moves: {:?}", moves); // Resolve uses of scratch reg and stack-to-stack moves with the // scratch resolver. let mut avail = testcase.available_pregs.clone(); let find_free_reg = || avail.pop(); let mut next_slot = 32; let get_stackslot = || { let slot = next_slot; next_slot += 1; Allocation::stack(SpillSlot::new(slot)) }; let preferred_victim = PReg::new(0, RegClass::Int); let scratch_resolver = MoveAndScratchResolver { find_free_reg, get_stackslot, is_stack_alloc, borrowed_scratch_reg: preferred_victim, }; let moves = scratch_resolver.compute(moves); log::trace!("resolved moves: {:?}", moves); // Compute the final source reg for each dest reg in the original // parallel-move set. let mut final_src_per_dest: HashMap = HashMap::new(); for &(src, dst) in &testcase.moves { final_src_per_dest.insert(dst, src); } log::trace!("expected final state: {:?}", final_src_per_dest); // Simulate the sequence of moves. let mut locations: HashMap = HashMap::new(); for (src, dst, _) in moves { let data = locations.get(&src).cloned().unwrap_or(src); locations.insert(dst, data); } log::trace!("simulated final state: {:?}", locations); // Assert that the expected register-moves occurred. for (reg, data) in locations { if let Some(&expected_data) = final_src_per_dest.get(®) { assert_eq!(expected_data, data); } else { if data != reg { // If not just the original value, then this location // has been modified, but it was not part of the // original parallel move. It must have been an // available preg or a scratch stackslot. assert!( testcase.available_pregs.contains(®) || (reg.is_stack() && reg.as_stack().unwrap().index() >= 32) ); } } } }); regalloc2-0.10.2/fuzz/fuzz_targets/ssagen.rs000066400000000000000000000020001467034227200210160ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ #![no_main] use regalloc2::fuzzing::arbitrary::{Arbitrary, Result, Unstructured}; use regalloc2::fuzzing::cfg::CFGInfo; use regalloc2::fuzzing::func::{Func, Options}; use regalloc2::fuzzing::fuzz_target; use regalloc2::ssa::validate_ssa; #[derive(Debug)] struct TestCase { f: Func, } impl Arbitrary<'_> for TestCase { fn arbitrary(u: &mut Unstructured) -> Result { Ok(TestCase { f: Func::arbitrary_with_options( u, &Options { reused_inputs: true, fixed_regs: true, fixed_nonallocatable: true, clobbers: true, reftypes: true, }, )?, }) } } fuzz_target!(|t: TestCase| { let cfginfo = CFGInfo::new(&t.f).expect("could not create CFG info"); validate_ssa(&t.f, &cfginfo).expect("invalid SSA"); }); regalloc2-0.10.2/fuzz/smoketest/000077500000000000000000000000001467034227200164675ustar00rootroot00000000000000regalloc2-0.10.2/fuzz/smoketest/ion_checker.bin000066400000000000000000000053331467034227200214360ustar00rootroot00000000000000ÿÿÿÿÿÿÿÿ1  æÿaæææææææææ:æææææææææææžƒ­­­­­í­­­­­­­­­­­­­­­­­­­­­­­í­­­­­­­­­­­­­­­­­­­­­­­­­­­­Áááá÷­­­­­­­­®­­­­­NÑNNNNNN! :zƒ£ÿC§)žÎžžfÿÿ|­­­­­­­­­­­­­­­­­­­­­­ ááááá­­­­­­­­­­­­­­­­­­­­­­­á×ááááááÁ÷­ÿÿÿÿÿÿ­­IIIIIIRRRRRR\­­­­­­­­áá­­­­­­­­­­­­­­­í­­­­­­­ááÁááá­÷­­­­­í­­­­­­­­­­­­­­­­­­­­­­­í­­­­­­­­­­­­­­­­­­­­­­­­­­­­Áááá÷­­­­­­­­®­­­­­NÑNNNNNN! :zƒ£ÿC§)žÎžžfÿÿ|+žÎ|||úžžžžžžžŸžž]žkžžžžžžžžžžžžžžžž(žžžžžžžžžžžžž[õÿÿ““““×üÿ¡ÿÞžžžžžžžpžžž(žžžžžžžžžžžžž[õÿÿ“““““WüÿŒÿÃ`'0¶¶¶!ÿÿÿÿÿ1 žžžž –žžžž•žžžž#žž™žžžžž žžžžžžžžžžžžžžžž0žžžžžžž.ÿÿÿÿÿâââââ Åïââââââââââââââ°aÿââââââââ&âââââ%žžžžžžžžžžÿÿÿ+žžžžZažž˜âââââââââââââââøžžžžž•žžžžŽžž™žžžžž žžžžžžžžžžžžžžžž0žžžžžžž.ÿÿÿÿÿâââââââââââââ Åïââââââââââââââ°aâââââââââââ+žÎ|||úžžžžžžžŸžž]žkžžžžžžžžžž žžžžž•žžžžŽžž™žžžžž žžžžžžžžžžžžžžžž0žžžžžžž.ÿÿÿÿÿâââââââââââââ Åïâââââââž[õÿÿ““““×üÿ¡ÿÞžžžžžžžpžžž(žžžžžžžžžžžžž[õÿÿ“““““WüÿŒÿÃ`'0¶¶¶!ÿÿÿÿÿ1 žžžž žžžžž•žžžž#žž™žžžžž žžžžžžžžžžžžžžžž0žžžžžžž.ÿÿÿÿÿâââââ Åïââââââââââââââ°aÿââââââââ&âââââ%žžžžžžžžžžÿÿÿ+žžžžZažž˜âââââââââââââââøžžžžž•žžžžŽžž™žžžžž žžžžžžžžžžžžžžžž0žžžžžžž.ÿÿÿÿÿââââââââââââò Åïââââââââââââ°aââââââââââââââ%žžžžžžžžžžÿÿÿ+žžžžžžbaagâââââââââââââââââøžž úûaažžžžô[ôôôôôôôkôôôôôôPôôôôôôôôôôôôžžžž[õÿÿ“““““×üÿŒÿÈ`'5¶­­­ ááááá­­­­­­­­­­­­­­­­­­­­­­­á×ááááááÁ÷­ÿÿÿÿÿÿ$­IIIIIIRRRRRR\­­­­­­­­áá­­­­­­­­­­­­­­­í­­­­­­­ááÁááá÷­­­­­­í­­­­­­­­­­­­­­­­­­­­­­­í­­­­­­­­­­­­­­­­­­­­­­­­­­­­Áááá÷­­­­­­­­®­­­­­NÑN­­­­á÷­­­NNNNN! :zƒ£ÿC§)žÎžžfÿÿ|+žÎ|||úžžžžžžžŸžž]žkžžžžžžžžžžžžžžžž(žžžžžžžÿÿÿÿÿÿÿžžžžžž[õÿÿ““““×üÿ¡ÿÞžžžžžžžpžžž(žžžžžžžžž¶¶!ÿÿÿÿÿ0 žžžž žžžžž•—žžžŽžž™žžžžž í žžžžžžžžžžžžžžžž0žžžžžžž.ÿÿÿÿÿâââââââââââžžžžžžÿÿÿ+žœžŽžž?*÷ÿÿÿÿ-_____.`;ìC§)Ξžžžƒ+ žžžžžžžžžžžžžžžž0žžžžžžž.ÿÿÿÿÿâââââââââââââ@ Åïââââââââžžžžžžzƒ£ÿC§)õ!âââââââââ°aââââââââââââââ%žžžžžžžžžžÿÿÿ+žžžžžžbaagâââââââââââââââââøžžúû¤žžžžžô[ôôôææææææææææææææææææéaaÿÿù((((`aaažž 'regalloc2-0.10.2/regalloc2-tool/000077500000000000000000000000001467034227200163005ustar00rootroot00000000000000regalloc2-0.10.2/regalloc2-tool/Cargo.toml000066400000000000000000000007761467034227200202420ustar00rootroot00000000000000[package] name = "regalloc2-tool" authors = [ "Chris Fallin ", "Mozilla SpiderMonkey Developers", ] version = "0.0.0" edition = "2021" publish = false license = "Apache-2.0 WITH LLVM-exception" description = "Tool for testing regalloc2" repository = "https://github.com/bytecodealliance/regalloc2" [dependencies] bincode = "1.3.3" clap = { version = "4.3.11", features = ["derive"] } pretty_env_logger = "0.5.0" regalloc2 = { path = "..", features = ["trace-log", "enable-serde"] } regalloc2-0.10.2/regalloc2-tool/src/000077500000000000000000000000001467034227200170675ustar00rootroot00000000000000regalloc2-0.10.2/regalloc2-tool/src/main.rs000066400000000000000000000056101467034227200203630ustar00rootroot00000000000000use std::path::PathBuf; use clap::Parser; use regalloc2::{ checker::Checker, serialize::SerializableFunction, Block, Edit, Function, InstOrEdit, Output, RegallocOptions, }; #[derive(Parser)] /// Tool for testing regalloc2. struct Args { /// Print the input function and the result of register allocation. #[clap(short = 'v')] verbose: bool, /// Input file containing a bincode-encoded SerializedFunction. input: PathBuf, } fn main() { pretty_env_logger::init(); let args = Args::parse(); let input = std::fs::read(&args.input).expect("could not read input file"); let function: SerializableFunction = bincode::deserialize(&input).expect("could not deserialize input file"); if args.verbose { println!("Input function: {function:?}"); } let options = RegallocOptions { verbose_log: true, validate_ssa: true, }; let output = match regalloc2::run(&function, function.machine_env(), &options) { Ok(output) => output, Err(e) => { panic!("Register allocation failed: {e:#?}"); } }; if args.verbose { print_output(&function, &output); } let mut checker = Checker::new(&function, function.machine_env()); checker.prepare(&output); if let Err(e) = checker.run() { panic!("Regsiter allocation checker failed: {e:#?}"); } } fn print_output(func: &SerializableFunction, output: &Output) { print!("Register allocation result: {{\n"); for i in 0..func.num_blocks() { let block = Block::new(i); let succs = func .block_succs(block) .iter() .map(|b| b.index()) .collect::>(); let preds = func .block_preds(block) .iter() .map(|b| b.index()) .collect::>(); print!(" block{}: # succs:{:?} preds:{:?}\n", i, succs, preds); for inst_or_edit in output.block_insts_and_edits(func, block) { match inst_or_edit { InstOrEdit::Inst(inst) => { let op = if func.is_ret(inst) { "ret" } else if func.is_branch(inst) { "branch" } else { "op" }; let ops: Vec<_> = func .inst_operands(inst) .iter() .zip(output.inst_allocs(inst)) .map(|(op, alloc)| format!("{op} => {alloc}")) .collect(); let ops = ops.join(", "); print!(" inst{}: {op} {ops}\n", inst.index(),); } InstOrEdit::Edit(Edit::Move { from, to }) => { print!(" edit: move {to} <- {from}\n"); } } } } print!("}}\n"); } regalloc2-0.10.2/src/000077500000000000000000000000001467034227200142425ustar00rootroot00000000000000regalloc2-0.10.2/src/cfg.rs000066400000000000000000000116721467034227200153560ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ //! Lightweight CFG analyses. use crate::{domtree, postorder, Block, Function, Inst, ProgPoint, RegAllocError}; use alloc::vec; use alloc::vec::Vec; use smallvec::{smallvec, SmallVec}; #[derive(Clone, Debug)] pub struct CFGInfo { /// Postorder traversal of blocks. pub postorder: Vec, /// Domtree parents, indexed by block. pub domtree: Vec, /// For each instruction, the block it belongs to. pub insn_block: Vec, /// For each block, the first instruction. pub block_entry: Vec, /// For each block, the last instruction. pub block_exit: Vec, /// For each block, what is the approximate loop depth? /// /// This measure is fully precise iff the input CFG is reducible /// and blocks are in RPO, so that loop backedges are precisely /// those whose block target indices are less than their source /// indices. Otherwise, it will be approximate, but should still /// be usable for heuristic purposes. pub approx_loop_depth: Vec, } impl CFGInfo { pub fn new(f: &F) -> Result { let postorder = postorder::calculate(f.num_blocks(), f.entry_block(), |block| { f.block_succs(block) }); let domtree = domtree::calculate( f.num_blocks(), |block| f.block_preds(block), &postorder[..], f.entry_block(), ); let mut insn_block = vec![Block::invalid(); f.num_insts()]; let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.num_blocks()]; let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.num_blocks()]; let mut backedge_in = vec![0; f.num_blocks()]; let mut backedge_out = vec![0; f.num_blocks()]; for block in 0..f.num_blocks() { let block = Block::new(block); for inst in f.block_insns(block).iter() { insn_block[inst.index()] = block; } block_entry[block.index()] = ProgPoint::before(f.block_insns(block).first()); block_exit[block.index()] = ProgPoint::after(f.block_insns(block).last()); // Check critical edge condition: if there is more than // one predecessor, each must have only one successor // (this block). let preds = f.block_preds(block).len() + if block == f.entry_block() { 1 } else { 0 }; if preds > 1 { for &pred in f.block_preds(block) { let succs = f.block_succs(pred).len(); if succs > 1 { return Err(RegAllocError::CritEdge(pred, block)); } } } // Check branch-arg condition: if any successors have more // than one predecessor (given above, there will only be // one such successor), then the last instruction of this // block (the branch) cannot have any args other than the // blockparams. let mut require_no_branch_args = false; for &succ in f.block_succs(block) { let preds = f.block_preds(succ).len() + if succ == f.entry_block() { 1 } else { 0 }; if preds > 1 { require_no_branch_args = true; break; } } if require_no_branch_args { let last = f.block_insns(block).last(); if !f.inst_operands(last).is_empty() { return Err(RegAllocError::DisallowedBranchArg(last)); } } for &succ in f.block_succs(block) { if succ.index() <= block.index() { backedge_in[succ.index()] += 1; backedge_out[block.index()] += 1; } } } let mut approx_loop_depth = vec![]; let mut backedge_stack: SmallVec<[usize; 4]> = smallvec![]; let mut cur_depth = 0; for block in 0..f.num_blocks() { if backedge_in[block] > 0 { cur_depth += 1; backedge_stack.push(backedge_in[block]); } approx_loop_depth.push(cur_depth); while backedge_stack.len() > 0 && backedge_out[block] > 0 { backedge_out[block] -= 1; *backedge_stack.last_mut().unwrap() -= 1; if *backedge_stack.last().unwrap() == 0 { cur_depth -= 1; backedge_stack.pop(); } } } Ok(CFGInfo { postorder, domtree, insn_block, block_entry, block_exit, approx_loop_depth, }) } pub fn dominates(&self, a: Block, b: Block) -> bool { domtree::dominates(&self.domtree[..], a, b) } } regalloc2-0.10.2/src/checker.rs000066400000000000000000001052641467034227200162240ustar00rootroot00000000000000/* * The following code is derived from `lib/src/checker.rs` in the * regalloc.rs project * (https://github.com/bytecodealliance/regalloc.rs). regalloc.rs is * also licensed under Apache-2.0 with the LLVM exception, as the rest * of regalloc2 is. */ //! Checker: verifies that spills/reloads/moves retain equivalent //! dataflow to original, VReg-based code. //! //! The basic idea is that we track symbolic values as they flow //! through spills and reloads. The symbolic values represent //! particular virtual registers in the original function body //! presented to the register allocator. Any instruction in the //! original function body (i.e., not added by the allocator) //! conceptually generates a symbolic value "Vn" when storing to (or //! modifying) a virtual register. //! //! A symbolic value is logically a *set of virtual registers*, //! representing all virtual registers equal to the value in the given //! storage slot at a given program point. This representation (as //! opposed to tracking just one virtual register) is necessary //! because the regalloc may implement moves in the source program //! (via move instructions or blockparam assignments on edges) in //! "intelligent" ways, taking advantage of values that are already in //! the right place, so we need to know *all* names for a value. //! //! These symbolic values are precise but partial: in other words, if //! a physical register is described as containing a virtual register //! at a program point, it must actually contain the value of this //! register (modulo any analysis bugs); but it may describe fewer //! virtual registers even in cases where one *could* statically prove //! that it contains a certain register, because the analysis is not //! perfectly path-sensitive or value-sensitive. However, all //! assignments *produced by our register allocator* should be //! analyzed fully precisely. (This last point is important and bears //! repeating: we only need to verify the programs that we produce, //! not arbitrary programs.) //! //! Operand constraints (fixed register, register, any) are also checked //! at each operand. //! //! ## Formal Definition //! //! The analysis lattice consists of the elements of ð’«(V), the //! powerset (set of all subsets) of V (the set of all virtual //! registers). The ⊤ (top) value in the lattice is V itself, and the //! ⊥ (bottom) value in the lattice is ∅ (the empty set). The lattice //! ordering relation is the subset relation: S ≤ U iff S ⊆ U. These //! definitions imply that the lattice meet-function (greatest lower //! bound) is set-intersection. //! //! (For efficiency, we represent ⊤ not by actually listing out all //! virtual registers, but by representing a special "top" value, but //! the semantics are the same.) //! //! The dataflow analysis state at each program point (each point //! before or after an instruction) is: //! //! - map of: Allocation -> lattice value //! //! And the transfer functions for instructions are (where `A` is the //! above map from allocated physical registers to lattice values): //! //! - `Edit::Move` inserted by RA: [ alloc_d := alloc_s ] //! //! A' = A[alloc_d → A[alloc_s]] //! //! - statement in pre-regalloc function [ V_i := op V_j, V_k, ... ] //! with allocated form [ A_i := op A_j, A_k, ... ] //! //! A' = { A_k → A[A_k] \ { V_i } for k ≠ i } ∪ //! { A_i -> { V_i } } //! //! In other words, a statement, even after allocation, generates //! a symbol that corresponds to its original virtual-register //! def. Simultaneously, that same virtual register symbol is removed //! from all other allocs: they no longer carry the current value. //! //! - Parallel moves or blockparam-assignments in original program //! [ V_d1 := V_s1, V_d2 := V_s2, ... ] //! //! A' = { A_k → subst(A[A_k]) for all k } //! where subst(S) removes symbols for overwritten virtual //! registers (V_d1 .. V_dn) and then adds V_di whenever //! V_si appeared prior to the removals. //! //! To check correctness, we first find the dataflow fixpoint with the //! above lattice and transfer/meet functions. Then, at each op, we //! examine the dataflow solution at the preceding program point, and //! check that the allocation for each op arg (input/use) contains the //! symbol corresponding to the original virtual register specified //! for this arg. #![allow(dead_code)] use crate::{ Allocation, AllocationKind, Block, Edit, Function, FxHashMap, FxHashSet, Inst, InstOrEdit, InstPosition, MachineEnv, Operand, OperandConstraint, OperandKind, OperandPos, Output, PReg, PRegSet, VReg, }; use alloc::vec::Vec; use alloc::{format, vec}; use core::default::Default; use core::hash::Hash; use core::result::Result; use smallvec::{smallvec, SmallVec}; /// A set of errors detected by the regalloc checker. #[derive(Clone, Debug)] pub struct CheckerErrors { errors: Vec, } /// A single error detected by the regalloc checker. #[derive(Clone, Debug)] pub enum CheckerError { MissingAllocation { inst: Inst, op: Operand, }, UnknownValueInAllocation { inst: Inst, op: Operand, alloc: Allocation, }, ConflictedValueInAllocation { inst: Inst, op: Operand, alloc: Allocation, }, IncorrectValuesInAllocation { inst: Inst, op: Operand, alloc: Allocation, actual: FxHashSet, }, ConstraintViolated { inst: Inst, op: Operand, alloc: Allocation, }, AllocationIsNotReg { inst: Inst, op: Operand, alloc: Allocation, }, AllocationIsNotFixedReg { inst: Inst, op: Operand, alloc: Allocation, }, AllocationIsNotReuse { inst: Inst, op: Operand, alloc: Allocation, expected_alloc: Allocation, }, AllocationIsNotStack { inst: Inst, op: Operand, alloc: Allocation, }, ConflictedValueInStackmap { inst: Inst, alloc: Allocation, }, NonRefValuesInStackmap { inst: Inst, alloc: Allocation, vregs: FxHashSet, }, StackToStackMove { into: Allocation, from: Allocation, }, } /// Abstract state for an allocation. /// /// Equivalent to a set of virtual register names, with the /// universe-set as top and empty set as bottom lattice element. The /// meet-function is thus set intersection. #[derive(Clone, Debug, PartialEq, Eq)] enum CheckerValue { /// The lattice top-value: this value could be equivalent to any /// vreg (i.e., the universe set). Universe, /// The set of VRegs that this value is equal to. VRegs(FxHashSet), } impl CheckerValue { fn vregs(&self) -> Option<&FxHashSet> { match self { CheckerValue::Universe => None, CheckerValue::VRegs(vregs) => Some(vregs), } } fn vregs_mut(&mut self) -> Option<&mut FxHashSet> { match self { CheckerValue::Universe => None, CheckerValue::VRegs(vregs) => Some(vregs), } } } impl Default for CheckerValue { fn default() -> CheckerValue { CheckerValue::Universe } } impl CheckerValue { /// Meet function of the abstract-interpretation value /// lattice. Returns a boolean value indicating whether `self` was /// changed. fn meet_with(&mut self, other: &CheckerValue) { match (self, other) { (_, CheckerValue::Universe) => { // Nothing. } (this @ CheckerValue::Universe, _) => { *this = other.clone(); } (CheckerValue::VRegs(my_vregs), CheckerValue::VRegs(other_vregs)) => { my_vregs.retain(|vreg| other_vregs.contains(vreg)); } } } fn from_reg(reg: VReg) -> CheckerValue { CheckerValue::VRegs(core::iter::once(reg).collect()) } fn remove_vreg(&mut self, reg: VReg) { match self { CheckerValue::Universe => { panic!("Cannot remove VReg from Universe set (we do not have the full list of vregs available"); } CheckerValue::VRegs(vregs) => { vregs.remove(®); } } } fn copy_vreg(&mut self, src: VReg, dst: VReg) { match self { CheckerValue::Universe => { // Nothing. } CheckerValue::VRegs(vregs) => { if vregs.contains(&src) { vregs.insert(dst); } } } } fn empty() -> CheckerValue { CheckerValue::VRegs(FxHashSet::default()) } } fn visit_all_vregs(f: &F, mut v: V) { for block in 0..f.num_blocks() { let block = Block::new(block); for inst in f.block_insns(block).iter() { for op in f.inst_operands(inst) { v(op.vreg()); } if f.is_branch(inst) { for succ_idx in 0..f.block_succs(block).len() { for ¶m in f.branch_blockparams(block, inst, succ_idx) { v(param); } } } } for &vreg in f.block_params(block) { v(vreg); } } } /// State that steps through program points as we scan over the instruction stream. #[derive(Clone, Debug, PartialEq, Eq)] enum CheckerState { Top, Allocations(FxHashMap), } impl CheckerState { fn get_value(&self, alloc: &Allocation) -> Option<&CheckerValue> { match self { CheckerState::Top => None, CheckerState::Allocations(allocs) => allocs.get(alloc), } } fn get_values_mut(&mut self) -> impl Iterator { match self { CheckerState::Top => panic!("Cannot get mutable values iterator on Top state"), CheckerState::Allocations(allocs) => allocs.values_mut(), } } fn get_mappings(&self) -> impl Iterator { match self { CheckerState::Top => panic!("Cannot get mappings iterator on Top state"), CheckerState::Allocations(allocs) => allocs.iter(), } } fn get_mappings_mut(&mut self) -> impl Iterator { match self { CheckerState::Top => panic!("Cannot get mutable mappings iterator on Top state"), CheckerState::Allocations(allocs) => allocs.iter_mut(), } } /// Transition from a "top" (undefined/unanalyzed) state to an empty set of allocations. fn become_defined(&mut self) { match self { CheckerState::Top => *self = CheckerState::Allocations(FxHashMap::default()), _ => {} } } fn set_value(&mut self, alloc: Allocation, value: CheckerValue) { match self { CheckerState::Top => { panic!("Cannot set value on Top state"); } CheckerState::Allocations(allocs) => { allocs.insert(alloc, value); } } } fn copy_vreg(&mut self, src: VReg, dst: VReg) { match self { CheckerState::Top => { // Nothing. } CheckerState::Allocations(allocs) => { for value in allocs.values_mut() { value.copy_vreg(src, dst); } } } } fn remove_value(&mut self, alloc: &Allocation) { match self { CheckerState::Top => { panic!("Cannot remove value on Top state"); } CheckerState::Allocations(allocs) => { allocs.remove(alloc); } } } fn initial() -> Self { CheckerState::Allocations(FxHashMap::default()) } } impl Default for CheckerState { fn default() -> CheckerState { CheckerState::Top } } impl core::fmt::Display for CheckerValue { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { match self { CheckerValue::Universe => { write!(f, "top") } CheckerValue::VRegs(vregs) => { write!(f, "{{ ")?; for vreg in vregs { write!(f, "{} ", vreg)?; } write!(f, "}}")?; Ok(()) } } } } /// Meet function for analysis value: meet individual values at /// matching allocations, and intersect keys (remove key-value pairs /// only on one side). Returns boolean flag indicating whether `into` /// changed. fn merge_map( into: &mut FxHashMap, from: &FxHashMap, ) { into.retain(|k, _| from.contains_key(k)); for (k, into_v) in into.iter_mut() { let from_v = from.get(k).unwrap(); into_v.meet_with(from_v); } } impl CheckerState { /// Create a new checker state. fn new() -> CheckerState { Default::default() } /// Merge this checker state with another at a CFG join-point. fn meet_with(&mut self, other: &CheckerState) { match (self, other) { (_, CheckerState::Top) => { // Nothing. } (this @ CheckerState::Top, _) => { *this = other.clone(); } ( CheckerState::Allocations(my_allocations), CheckerState::Allocations(other_allocations), ) => { merge_map(my_allocations, other_allocations); } } } fn check_val<'a, F: Function>( &self, inst: Inst, op: Operand, alloc: Allocation, val: &CheckerValue, allocs: &[Allocation], checker: &Checker<'a, F>, ) -> Result<(), CheckerError> { if alloc == Allocation::none() { return Err(CheckerError::MissingAllocation { inst, op }); } if op.kind() == OperandKind::Use && op.as_fixed_nonallocatable().is_none() { match val { CheckerValue::Universe => { return Err(CheckerError::UnknownValueInAllocation { inst, op, alloc }); } CheckerValue::VRegs(vregs) if !vregs.contains(&op.vreg()) => { return Err(CheckerError::IncorrectValuesInAllocation { inst, op, alloc, actual: vregs.clone(), }); } _ => {} } } self.check_constraint(inst, op, alloc, allocs, checker)?; Ok(()) } /// Check an instruction against this state. This must be called /// twice: once with `InstPosition::Before`, and once with /// `InstPosition::After` (after updating state with defs). fn check<'a, F: Function>( &self, pos: InstPosition, checkinst: &CheckerInst, checker: &Checker<'a, F>, ) -> Result<(), CheckerError> { let default_val = Default::default(); match checkinst { &CheckerInst::Op { inst, ref operands, ref allocs, .. } => { // Skip Use-checks at the After point if there are any // reused inputs: the Def which reuses the input // happens early. let has_reused_input = operands .iter() .any(|op| matches!(op.constraint(), OperandConstraint::Reuse(_))); if has_reused_input && pos == InstPosition::After { return Ok(()); } // For each operand, check (i) that the allocation // contains the expected vreg, and (ii) that it meets // the requirements of the OperandConstraint. for (op, alloc) in operands.iter().zip(allocs.iter()) { let is_here = match (op.pos(), pos) { (OperandPos::Early, InstPosition::Before) => true, (OperandPos::Late, InstPosition::After) => true, _ => false, }; if !is_here { continue; } let val = self.get_value(alloc).unwrap_or(&default_val); trace!( "checker: checkinst {:?}: op {:?}, alloc {:?}, checker value {:?}", checkinst, op, alloc, val ); self.check_val(inst, *op, *alloc, val, allocs, checker)?; } } &CheckerInst::Move { into, from } => { // Ensure that the allocator never returns stack-to-stack moves. let is_stack = |alloc: Allocation| { if let Some(reg) = alloc.as_reg() { checker.stack_pregs.contains(reg) } else { alloc.is_stack() } }; if is_stack(into) && is_stack(from) { return Err(CheckerError::StackToStackMove { into, from }); } } &CheckerInst::ParallelMove { .. } => { // This doesn't need verification; we just update // according to the move semantics in the step // function below. } } Ok(()) } /// Update according to instruction. fn update(&mut self, checkinst: &CheckerInst) { self.become_defined(); match checkinst { &CheckerInst::Move { into, from } => { // Value may not be present if this move is part of // the parallel move resolver's fallback sequence that // saves a victim register elsewhere. (In other words, // that sequence saves an undefined value and restores // it, so has no effect.) The checker needs to avoid // putting Universe lattice values into the map. if let Some(val) = self.get_value(&from).cloned() { trace!( "checker: checkinst {:?} updating: move {:?} -> {:?} val {:?}", checkinst, from, into, val ); self.set_value(into, val); } } &CheckerInst::ParallelMove { ref moves } => { // First, build map of actions for each vreg in an // alloc. If an alloc has a reg V_i before a parallel // move, then for each use of V_i as a source (V_i -> // V_j), we might add a new V_j wherever V_i appears; // and if V_i is used as a dest (at most once), then // it must be removed first from allocs' vreg sets. let mut additions: FxHashMap> = FxHashMap::default(); let mut deletions: FxHashSet = FxHashSet::default(); for &(dest, src) in moves { deletions.insert(dest); additions .entry(src) .or_insert_with(|| smallvec![]) .push(dest); } // Now process each allocation's set of vreg labels, // first deleting those labels that were updated by // this parallel move, then adding back labels // redefined by the move. for value in self.get_values_mut() { if let Some(vregs) = value.vregs_mut() { let mut insertions: SmallVec<[VReg; 2]> = smallvec![]; for &vreg in vregs.iter() { if let Some(additions) = additions.get(&vreg) { insertions.extend(additions.iter().cloned()); } } for &d in &deletions { vregs.remove(&d); } vregs.extend(insertions); } } } &CheckerInst::Op { ref operands, ref allocs, ref clobbers, .. } => { // For each def, (i) update alloc to reflect defined // vreg (and only that vreg), and (ii) update all // other allocs in the checker state by removing this // vreg, if defined (other defs are now stale). for (op, alloc) in operands.iter().zip(allocs.iter()) { if op.kind() != OperandKind::Def { continue; } self.remove_vreg(op.vreg()); self.set_value(*alloc, CheckerValue::from_reg(op.vreg())); } for clobber in clobbers { self.remove_value(&Allocation::reg(*clobber)); } } } } fn remove_vreg(&mut self, vreg: VReg) { for (_, value) in self.get_mappings_mut() { value.remove_vreg(vreg); } } fn check_constraint<'a, F: Function>( &self, inst: Inst, op: Operand, alloc: Allocation, allocs: &[Allocation], checker: &Checker<'a, F>, ) -> Result<(), CheckerError> { match op.constraint() { OperandConstraint::Any => {} OperandConstraint::Reg => { if let Some(preg) = alloc.as_reg() { // Reject pregs that represent a fixed stack slot. if !checker.machine_env.fixed_stack_slots.contains(&preg) { return Ok(()); } } return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); } OperandConstraint::FixedReg(preg) => { if alloc != Allocation::reg(preg) { return Err(CheckerError::AllocationIsNotFixedReg { inst, op, alloc }); } } OperandConstraint::Reuse(idx) => { if alloc.kind() != AllocationKind::Reg { return Err(CheckerError::AllocationIsNotReg { inst, op, alloc }); } if alloc != allocs[idx] { return Err(CheckerError::AllocationIsNotReuse { inst, op, alloc, expected_alloc: allocs[idx], }); } } } Ok(()) } } /// An instruction representation in the checker's BB summary. #[derive(Clone, Debug)] pub(crate) enum CheckerInst { /// A move between allocations (these could be registers or /// spillslots). Move { into: Allocation, from: Allocation }, /// A parallel move in the original program. Simultaneously moves /// from all source vregs to all corresponding dest vregs, /// permitting overlap in the src and dest sets and doing all /// reads before any writes. ParallelMove { /// Vector of (dest, src) moves. moves: Vec<(VReg, VReg)>, }, /// A regular instruction with fixed use and def slots. Contains /// both the original operands (as given to the regalloc) and the /// allocation results. Op { inst: Inst, operands: Vec, allocs: Vec, clobbers: Vec, }, } #[derive(Debug)] pub struct Checker<'a, F: Function> { f: &'a F, bb_in: FxHashMap, bb_insts: FxHashMap>, edge_insts: FxHashMap<(Block, Block), Vec>, machine_env: &'a MachineEnv, stack_pregs: PRegSet, } impl<'a, F: Function> Checker<'a, F> { /// Create a new checker for the given function, initializing CFG /// info immediately. The client should call the `add_*()` /// methods to add abstract instructions to each BB before /// invoking `run()` to check for errors. pub fn new(f: &'a F, machine_env: &'a MachineEnv) -> Checker<'a, F> { let mut bb_in = FxHashMap::default(); let mut bb_insts = FxHashMap::default(); let mut edge_insts = FxHashMap::default(); for block in 0..f.num_blocks() { let block = Block::new(block); bb_in.insert(block, Default::default()); bb_insts.insert(block, vec![]); for &succ in f.block_succs(block) { edge_insts.insert((block, succ), vec![]); } } bb_in.insert(f.entry_block(), CheckerState::default()); let mut stack_pregs = PRegSet::empty(); for &preg in &machine_env.fixed_stack_slots { stack_pregs.add(preg); } Checker { f, bb_in, bb_insts, edge_insts, machine_env, stack_pregs, } } /// Build the list of checker instructions based on the given func /// and allocation results. pub fn prepare(&mut self, out: &Output) { trace!("checker: out = {:?}", out); let mut last_inst = None; for block in 0..self.f.num_blocks() { let block = Block::new(block); for inst_or_edit in out.block_insts_and_edits(self.f, block) { match inst_or_edit { InstOrEdit::Inst(inst) => { debug_assert!(last_inst.is_none() || inst > last_inst.unwrap()); last_inst = Some(inst); self.handle_inst(block, inst, out); } InstOrEdit::Edit(edit) => self.handle_edit(block, edit), } } } } /// For each original instruction, create an `Op`. fn handle_inst(&mut self, block: Block, inst: Inst, out: &Output) { // Skip normal checks if this is a branch: the blockparams do // not exist in post-regalloc code, and the edge-moves have to // be inserted before the branch rather than after. if !self.f.is_branch(inst) { let operands: Vec<_> = self.f.inst_operands(inst).iter().cloned().collect(); let allocs: Vec<_> = out.inst_allocs(inst).iter().cloned().collect(); let clobbers: Vec<_> = self.f.inst_clobbers(inst).into_iter().collect(); let checkinst = CheckerInst::Op { inst, operands, allocs, clobbers, }; trace!("checker: adding inst {:?}", checkinst); self.bb_insts.get_mut(&block).unwrap().push(checkinst); } // Instead, if this is a branch, emit a ParallelMove on each // outgoing edge as necessary to handle blockparams. else { for (i, &succ) in self.f.block_succs(block).iter().enumerate() { let args = self.f.branch_blockparams(block, inst, i); let params = self.f.block_params(succ); assert_eq!( args.len(), params.len(), "block{} has succ block{}; gave {} args for {} params", block.index(), succ.index(), args.len(), params.len() ); if args.len() > 0 { let moves = params.iter().cloned().zip(args.iter().cloned()).collect(); self.edge_insts .get_mut(&(block, succ)) .unwrap() .push(CheckerInst::ParallelMove { moves }); } } } } fn handle_edit(&mut self, block: Block, edit: &Edit) { trace!("checker: adding edit {:?}", edit); match edit { &Edit::Move { from, to } => { self.bb_insts .get_mut(&block) .unwrap() .push(CheckerInst::Move { into: to, from }); } } } /// Perform the dataflow analysis to compute checker state at each BB entry. fn analyze(&mut self) { let mut queue = Vec::new(); let mut queue_set = FxHashSet::default(); // Put every block in the queue to start with, to ensure // everything is visited even if the initial state remains // `Top` after preds update it. // // We add blocks in reverse order so that when we process // back-to-front below, we do our initial pass in input block // order, which is (usually) RPO order or at least a // reasonable visit order. for block in (0..self.f.num_blocks()).rev() { let block = Block::new(block); queue.push(block); queue_set.insert(block); } while let Some(block) = queue.pop() { queue_set.remove(&block); let mut state = self.bb_in.get(&block).cloned().unwrap(); trace!("analyze: block {} has state {:?}", block.index(), state); for inst in self.bb_insts.get(&block).unwrap() { state.update(inst); trace!("analyze: inst {:?} -> state {:?}", inst, state); } for &succ in self.f.block_succs(block) { let mut new_state = state.clone(); for edge_inst in self.edge_insts.get(&(block, succ)).unwrap() { new_state.update(edge_inst); trace!( "analyze: succ {:?}: inst {:?} -> state {:?}", succ, edge_inst, new_state ); } let cur_succ_in = self.bb_in.get(&succ).unwrap(); trace!( "meeting state {:?} for block {} with state {:?} for block {}", new_state, block.index(), cur_succ_in, succ.index() ); new_state.meet_with(cur_succ_in); let changed = &new_state != cur_succ_in; trace!(" -> {:?}, changed {}", new_state, changed); if changed { trace!( "analyze: block {} state changed from {:?} to {:?}; pushing onto queue", succ.index(), cur_succ_in, new_state ); self.bb_in.insert(succ, new_state); if queue_set.insert(succ) { queue.push(succ); } } } } } /// Using BB-start state computed by `analyze()`, step the checker state /// through each BB and check each instruction's register allocations /// for errors. fn find_errors(&self) -> Result<(), CheckerErrors> { let mut errors = vec![]; for (block, input) in &self.bb_in { let mut state = input.clone(); for inst in self.bb_insts.get(block).unwrap() { if let Err(e) = state.check(InstPosition::Before, inst, self) { trace!("Checker error: {:?}", e); errors.push(e); } state.update(inst); if let Err(e) = state.check(InstPosition::After, inst, self) { trace!("Checker error: {:?}", e); errors.push(e); } } } if errors.is_empty() { Ok(()) } else { Err(CheckerErrors { errors }) } } /// Find any errors, returning `Err(CheckerErrors)` with all errors found /// or `Ok(())` otherwise. pub fn run(mut self) -> Result<(), CheckerErrors> { self.analyze(); let result = self.find_errors(); trace!("=== CHECKER RESULT ==="); fn print_state(state: &CheckerState) { if !trace_enabled!() { return; } if let CheckerState::Allocations(allocs) = state { let mut s = vec![]; for (alloc, state) in allocs { s.push(format!("{} := {}", alloc, state)); } trace!(" {{ {} }}", s.join(", ")) } } for bb in 0..self.f.num_blocks() { let bb = Block::new(bb); trace!("block{}:", bb.index()); let insts = self.bb_insts.get(&bb).unwrap(); let mut state = self.bb_in.get(&bb).unwrap().clone(); print_state(&state); for inst in insts { match inst { &CheckerInst::Op { inst, ref operands, ref allocs, ref clobbers, } => { trace!( " inst{}: {:?} ({:?}) clobbers:{:?}", inst.index(), operands, allocs, clobbers ); } &CheckerInst::Move { from, into } => { trace!(" {} -> {}", from, into); } &CheckerInst::ParallelMove { .. } => { panic!("unexpected parallel_move in body (non-edge)") } } state.update(inst); print_state(&state); } for &succ in self.f.block_succs(bb) { trace!(" succ {:?}:", succ); let mut state = state.clone(); for edge_inst in self.edge_insts.get(&(bb, succ)).unwrap() { match edge_inst { &CheckerInst::ParallelMove { ref moves } => { let moves = moves .iter() .map(|(dest, src)| format!("{} -> {}", src, dest)) .collect::>(); trace!(" parallel_move {}", moves.join(", ")); } _ => panic!("unexpected edge_inst: not a parallel move"), } state.update(edge_inst); print_state(&state); } } } result } } regalloc2-0.10.2/src/domtree.rs000066400000000000000000000067461467034227200162640ustar00rootroot00000000000000/* * Derives from the dominator tree implementation in regalloc.rs, which is * licensed under the Apache Public License 2.0 with LLVM Exception. See: * https://github.com/bytecodealliance/regalloc.rs */ // This is an implementation of the algorithm described in // // A Simple, Fast Dominance Algorithm // Keith D. Cooper, Timothy J. Harvey, and Ken Kennedy // Department of Computer Science, Rice University, Houston, Texas, USA // TR-06-33870 // https://www.cs.rice.edu/~keith/EMBED/dom.pdf use alloc::vec; use alloc::vec::Vec; use crate::Block; // Helper fn merge_sets( idom: &[Block], // map from Block to Block block_to_rpo: &[Option], mut node1: Block, mut node2: Block, ) -> Block { while node1 != node2 { if node1.is_invalid() || node2.is_invalid() { return Block::invalid(); } let rpo1 = block_to_rpo[node1.index()].unwrap(); let rpo2 = block_to_rpo[node2.index()].unwrap(); if rpo1 > rpo2 { node1 = idom[node1.index()]; } else if rpo2 > rpo1 { node2 = idom[node2.index()]; } } debug_assert!(node1 == node2); node1 } pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>( num_blocks: usize, preds: PredFn, post_ord: &[Block], start: Block, ) -> Vec { // We have post_ord, which is the postorder sequence. // Compute maps from RPO to block number and vice-versa. let mut block_to_rpo = vec![None; num_blocks]; block_to_rpo.resize(num_blocks, None); for (i, rpo_block) in post_ord.iter().rev().enumerate() { block_to_rpo[rpo_block.index()] = Some(i as u32); } let mut idom = vec![Block::invalid(); num_blocks]; // The start node must have itself as a parent. idom[start.index()] = start; let mut changed = true; while changed { changed = false; // Consider blocks in reverse postorder. Skip any that are unreachable. for &node in post_ord.iter().rev() { let rponum = block_to_rpo[node.index()].unwrap(); let mut parent = Block::invalid(); for &pred in preds(node).iter() { let pred_rpo = match block_to_rpo[pred.index()] { Some(r) => r, None => { // Skip unreachable preds. continue; } }; if pred_rpo < rponum { parent = pred; break; } } if parent.is_valid() { for &pred in preds(node).iter() { if pred == parent { continue; } if idom[pred.index()].is_invalid() { continue; } parent = merge_sets(&idom, &block_to_rpo[..], parent, pred); } } if parent.is_valid() && parent != idom[node.index()] { idom[node.index()] = parent; changed = true; } } } // Now set the start node's dominator-tree parent to "invalid"; // this allows the loop in `dominates` to terminate. idom[start.index()] = Block::invalid(); idom } pub fn dominates(idom: &[Block], a: Block, mut b: Block) -> bool { loop { if a == b { return true; } if b.is_invalid() { return false; } b = idom[b.index()]; } } regalloc2-0.10.2/src/fuzzing/000077500000000000000000000000001467034227200157365ustar00rootroot00000000000000regalloc2-0.10.2/src/fuzzing/func.rs000066400000000000000000000553241467034227200172500ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ use crate::{ domtree, postorder, Allocation, Block, Function, Inst, InstRange, MachineEnv, Operand, OperandConstraint, OperandKind, OperandPos, PReg, PRegSet, RegClass, VReg, }; use alloc::vec::Vec; use alloc::{format, vec}; use super::arbitrary::Result as ArbitraryResult; use super::arbitrary::{Arbitrary, Unstructured}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum InstOpcode { Op, Ret, Branch, } #[derive(Clone, Debug)] pub struct InstData { op: InstOpcode, operands: Vec, clobbers: Vec, } impl InstData { pub fn branch() -> InstData { InstData { op: InstOpcode::Branch, operands: vec![], clobbers: vec![], } } pub fn ret() -> InstData { InstData { op: InstOpcode::Ret, operands: vec![], clobbers: vec![], } } } #[derive(Clone)] pub struct Func { insts: Vec, blocks: Vec, block_preds: Vec>, block_succs: Vec>, block_params_in: Vec>, block_params_out: Vec>>, num_vregs: usize, reftype_vregs: Vec, debug_value_labels: Vec<(VReg, Inst, Inst, u32)>, } impl Function for Func { fn num_insts(&self) -> usize { self.insts.len() } fn num_blocks(&self) -> usize { self.blocks.len() } fn entry_block(&self) -> Block { debug_assert!(self.blocks.len() > 0); Block::new(0) } fn block_insns(&self, block: Block) -> InstRange { self.blocks[block.index()] } fn block_succs(&self, block: Block) -> &[Block] { &self.block_succs[block.index()][..] } fn block_preds(&self, block: Block) -> &[Block] { &self.block_preds[block.index()][..] } fn block_params(&self, block: Block) -> &[VReg] { &self.block_params_in[block.index()][..] } fn is_ret(&self, insn: Inst) -> bool { self.insts[insn.index()].op == InstOpcode::Ret } fn is_branch(&self, insn: Inst) -> bool { self.insts[insn.index()].op == InstOpcode::Branch } fn branch_blockparams(&self, block: Block, _: Inst, succ: usize) -> &[VReg] { &self.block_params_out[block.index()][succ][..] } fn debug_value_labels(&self) -> &[(VReg, Inst, Inst, u32)] { &self.debug_value_labels[..] } fn inst_operands(&self, insn: Inst) -> &[Operand] { &self.insts[insn.index()].operands[..] } fn inst_clobbers(&self, insn: Inst) -> PRegSet { let mut set = PRegSet::default(); for &preg in &self.insts[insn.index()].clobbers { set = set.with(preg); } set } fn num_vregs(&self) -> usize { self.num_vregs } fn spillslot_size(&self, regclass: RegClass) -> usize { match regclass { // Test the case where 2 classes share the same RegClass::Int => 1, RegClass::Float => 1, RegClass::Vector => 2, } } } struct FuncBuilder { postorder: Vec, idom: Vec, f: Func, insts_per_block: Vec>, } impl FuncBuilder { fn new() -> Self { FuncBuilder { postorder: vec![], idom: vec![], f: Func { block_preds: vec![], block_succs: vec![], block_params_in: vec![], block_params_out: vec![], insts: vec![], blocks: vec![], num_vregs: 0, reftype_vregs: vec![], debug_value_labels: vec![], }, insts_per_block: vec![], } } pub fn add_block(&mut self) -> Block { let b = Block::new(self.f.blocks.len()); self.f .blocks .push(InstRange::new(Inst::new(0), Inst::new(0))); self.f.block_preds.push(vec![]); self.f.block_succs.push(vec![]); self.f.block_params_in.push(vec![]); self.f.block_params_out.push(vec![]); self.insts_per_block.push(vec![]); b } pub fn add_inst(&mut self, block: Block, data: InstData) { self.insts_per_block[block.index()].push(data); } pub fn add_edge(&mut self, from: Block, to: Block) { self.f.block_succs[from.index()].push(to); self.f.block_preds[to.index()].push(from); } pub fn set_block_params_in(&mut self, block: Block, params: &[VReg]) { self.f.block_params_in[block.index()] = params.iter().cloned().collect(); } pub fn set_block_params_out(&mut self, block: Block, params: Vec>) { self.f.block_params_out[block.index()] = params; } fn compute_doms(&mut self) { self.postorder = postorder::calculate(self.f.blocks.len(), Block::new(0), |block| { &self.f.block_succs[block.index()][..] }); self.idom = domtree::calculate( self.f.blocks.len(), |block| &self.f.block_preds[block.index()][..], &self.postorder[..], Block::new(0), ); } fn finalize(mut self) -> Func { for (blocknum, blockrange) in self.f.blocks.iter_mut().enumerate() { let begin_inst = self.f.insts.len(); for inst in &self.insts_per_block[blocknum] { self.f.insts.push(inst.clone()); } let end_inst = self.f.insts.len(); *blockrange = InstRange::new(Inst::new(begin_inst), Inst::new(end_inst)); } self.f } } impl Arbitrary<'_> for RegClass { fn arbitrary(u: &mut Unstructured) -> ArbitraryResult { Ok(*u.choose(&[RegClass::Int, RegClass::Float, RegClass::Vector])?) } } impl Arbitrary<'_> for OperandConstraint { fn arbitrary(u: &mut Unstructured) -> ArbitraryResult { Ok(*u.choose(&[OperandConstraint::Any, OperandConstraint::Reg])?) } } fn choose_dominating_block( idom: &[Block], mut block: Block, allow_self: bool, u: &mut Unstructured, ) -> ArbitraryResult { debug_assert!(block.is_valid()); let orig_block = block; loop { if (allow_self || block != orig_block) && bool::arbitrary(u)? { break; } if idom[block.index()].is_invalid() { break; } block = idom[block.index()]; } let block = if block != orig_block || allow_self { block } else { Block::invalid() }; Ok(block) } #[derive(Clone, Copy, Debug)] pub struct Options { pub reused_inputs: bool, pub fixed_regs: bool, pub fixed_nonallocatable: bool, pub clobbers: bool, pub reftypes: bool, } impl core::default::Default for Options { fn default() -> Self { Options { reused_inputs: false, fixed_regs: false, fixed_nonallocatable: false, clobbers: false, reftypes: false, } } } impl Arbitrary<'_> for Func { fn arbitrary(u: &mut Unstructured) -> ArbitraryResult { Func::arbitrary_with_options(u, &Options::default()) } } impl Func { pub fn arbitrary_with_options(u: &mut Unstructured, opts: &Options) -> ArbitraryResult { // General strategy: // 1. Create an arbitrary CFG. // 2. Create a list of vregs to define in each block. // 3. Define some of those vregs in each block as blockparams.f. // 4. Populate blocks with ops that define the rest of the vregs. // - For each use, choose an available vreg: either one // already defined (via blockparam or inst) in this block, // or one defined in a dominating block. let mut builder = FuncBuilder::new(); for _ in 0..u.int_in_range(1..=100)? { builder.add_block(); } let num_blocks = builder.f.blocks.len(); // Generate a CFG. Create a "spine" of either single blocks, // with links to the next; or fork patterns, with the left // fork linking to the next and the right fork in `out_blocks` // to be connected below. This creates an arbitrary CFG with // split critical edges, which is a property that we require // for the regalloc. let mut from = 0; let mut out_blocks = vec![]; let mut in_blocks = vec![]; while from < num_blocks { in_blocks.push(from); if num_blocks > 3 && from < num_blocks - 3 && bool::arbitrary(u)? { // To avoid critical edges, we use from+1 as an edge // block, and advance `from` an extra block; `from+2` // will be the next normal iteration. builder.add_edge(Block::new(from), Block::new(from + 1)); builder.add_edge(Block::new(from), Block::new(from + 2)); builder.add_edge(Block::new(from + 2), Block::new(from + 3)); out_blocks.push(from + 1); from += 2; } else if from < num_blocks - 1 { builder.add_edge(Block::new(from), Block::new(from + 1)); } from += 1; } for pred in out_blocks { let succ = *u.choose(&in_blocks[..])?; builder.add_edge(Block::new(pred), Block::new(succ)); } builder.compute_doms(); for block in 0..num_blocks { builder.f.block_preds[block].clear(); } for block in 0..num_blocks { for &succ in &builder.f.block_succs[block] { builder.f.block_preds[succ.index()].push(Block::new(block)); } } builder.compute_doms(); let mut vregs_by_block = vec![]; let mut vregs_by_block_to_be_defined = vec![]; let mut block_params = vec![vec![]; num_blocks]; for block in 0..num_blocks { let mut vregs = vec![]; for _ in 0..u.int_in_range(5..=15)? { let vreg = VReg::new(builder.f.num_vregs, RegClass::arbitrary(u)?); builder.f.num_vregs += 1; vregs.push(vreg); if opts.reftypes && bool::arbitrary(u)? { builder.f.reftype_vregs.push(vreg); } if bool::arbitrary(u)? { let assumed_end_inst = 10 * num_blocks; let mut start = u.int_in_range::(0..=assumed_end_inst)?; for _ in 0..10 { if start >= assumed_end_inst { break; } let end = u.int_in_range::(start..=assumed_end_inst)?; let label = u.int_in_range::(0..=100)?; builder.f.debug_value_labels.push(( vreg, Inst::new(start), Inst::new(end), label, )); start = end; } } } vregs_by_block.push(vregs.clone()); let mut vregs_to_be_defined = vec![]; let mut max_block_params = u.int_in_range(0..=core::cmp::min(3, vregs.len() / 3))?; for &vreg in &vregs { if block > 0 && bool::arbitrary(u)? && max_block_params > 0 { block_params[block].push(vreg); max_block_params -= 1; } else { vregs_to_be_defined.push(vreg); } } vregs_to_be_defined.reverse(); vregs_by_block_to_be_defined.push(vregs_to_be_defined); builder.set_block_params_in(Block::new(block), &block_params[block][..]); } for block in 0..num_blocks { let mut avail = block_params[block].clone(); let mut remaining_nonlocal_uses = u.int_in_range(0..=3)?; while let Some(vreg) = vregs_by_block_to_be_defined[block].pop() { let def_constraint = OperandConstraint::arbitrary(u)?; let def_pos = if bool::arbitrary(u)? { OperandPos::Early } else { OperandPos::Late }; let mut operands = vec![Operand::new( vreg, def_constraint, OperandKind::Def, def_pos, )]; let mut allocations = vec![Allocation::none()]; for _ in 0..u.int_in_range(0..=3)? { let vreg = if avail.len() > 0 && (remaining_nonlocal_uses == 0 || bool::arbitrary(u)?) { *u.choose(&avail[..])? } else { let def_block = choose_dominating_block( &builder.idom[..], Block::new(block), /* allow_self = */ false, u, )?; if !def_block.is_valid() { // No vregs already defined, and no pred blocks that dominate us // (perhaps we are the entry block): just stop generating inputs. break; } remaining_nonlocal_uses -= 1; *u.choose(&vregs_by_block[def_block.index()])? }; let use_constraint = OperandConstraint::arbitrary(u)?; operands.push(Operand::new( vreg, use_constraint, OperandKind::Use, OperandPos::Early, )); allocations.push(Allocation::none()); } let mut clobbers: Vec = vec![]; if operands.len() > 1 && opts.reused_inputs && bool::arbitrary(u)? { // Make the def a reused input. let op = operands[0]; debug_assert_eq!(op.kind(), OperandKind::Def); let reused = u.int_in_range(1..=(operands.len() - 1))?; if op.class() == operands[reused].class() { operands[0] = Operand::new( op.vreg(), OperandConstraint::Reuse(reused), op.kind(), OperandPos::Late, ); // Make sure reused input is a Reg. let op = operands[reused]; operands[reused] = Operand::new( op.vreg(), OperandConstraint::Reg, op.kind(), OperandPos::Early, ); } } else if opts.fixed_regs && bool::arbitrary(u)? { let mut fixed_early = vec![]; let mut fixed_late = vec![]; for _ in 0..u.int_in_range(0..=operands.len() - 1)? { // Pick an operand and make it a fixed reg. let i = u.int_in_range(0..=(operands.len() - 1))?; let op = operands[i]; let fixed_reg = PReg::new(u.int_in_range(0..=62)?, op.class()); if op.kind() == OperandKind::Def && op.pos() == OperandPos::Early { // Early-defs with fixed constraints conflict with // any other fixed uses of the same preg. if fixed_late.contains(&fixed_reg) { break; } } if op.kind() == OperandKind::Use && op.pos() == OperandPos::Late { // Late-use with fixed constraints conflict with // any other fixed uses of the same preg. if fixed_early.contains(&fixed_reg) { break; } } let fixed_list = match op.pos() { OperandPos::Early => &mut fixed_early, OperandPos::Late => &mut fixed_late, }; if fixed_list.contains(&fixed_reg) { break; } fixed_list.push(fixed_reg); operands[i] = Operand::new( op.vreg(), OperandConstraint::FixedReg(fixed_reg), op.kind(), op.pos(), ); } } else if opts.clobbers && bool::arbitrary(u)? { for _ in 0..u.int_in_range(0..=5)? { let reg = u.int_in_range(0..=30)?; if clobbers.iter().any(|r| r.hw_enc() == reg) { break; } clobbers.push(PReg::new(reg, RegClass::arbitrary(u)?)); } } else if opts.fixed_nonallocatable && bool::arbitrary(u)? { operands.push(Operand::fixed_nonallocatable(PReg::new( 63, RegClass::arbitrary(u)?, ))); } builder.add_inst( Block::new(block), InstData { op: InstOpcode::Op, operands, clobbers, }, ); avail.push(vreg); } // Define the branch with blockparam args that must end // the block. if builder.f.block_succs[block].len() > 0 { let mut params = vec![]; for &succ in &builder.f.block_succs[block] { let mut args = vec![]; for i in 0..builder.f.block_params_in[succ.index()].len() { let dom_block = choose_dominating_block( &builder.idom[..], Block::new(block), false, u, )?; // Look for a vreg with a suitable class. If no // suitable vreg is available then we error out, which // causes the fuzzer to skip this function. let vregs = if dom_block.is_valid() && bool::arbitrary(u)? { &vregs_by_block[dom_block.index()][..] } else { &avail[..] }; let suitable_vregs: Vec<_> = vregs .iter() .filter(|vreg| { vreg.class() == builder.f.block_params_in[succ.index()][i].class() }) .copied() .collect(); let vreg = u.choose(&suitable_vregs)?; args.push(*vreg); } params.push(args); } builder.set_block_params_out(Block::new(block), params); builder.add_inst(Block::new(block), InstData::branch()); } else { builder.add_inst(Block::new(block), InstData::ret()); } } builder.f.debug_value_labels.sort_unstable(); Ok(builder.finalize()) } } impl core::fmt::Debug for Func { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!(f, "{{\n")?; for (i, blockrange) in self.blocks.iter().enumerate() { let succs = self.block_succs[i] .iter() .map(|b| b.index()) .collect::>(); let preds = self.block_preds[i] .iter() .map(|b| b.index()) .collect::>(); let params_in = self.block_params_in[i] .iter() .map(|v| format!("v{}", v.vreg())) .collect::>() .join(", "); let params_out = self.block_params_out[i] .iter() .enumerate() .map(|(succ_idx, vec)| { let succ = self.block_succs[i][succ_idx]; let params = vec .iter() .map(|v| format!("v{}", v.vreg())) .collect::>() .join(", "); format!("block{}({})", succ.index(), params) }) .collect::>() .join(", "); write!( f, " block{}({}): # succs:{:?} preds:{:?}\n", i, params_in, succs, preds )?; for inst in blockrange.iter() { write!( f, " inst{}: {:?} ops:{:?} clobber:{:?}\n", inst.index(), self.insts[inst.index()].op, self.insts[inst.index()].operands, self.insts[inst.index()].clobbers )?; if let InstOpcode::Branch = self.insts[inst.index()].op { write!(f, " params: {}\n", params_out)?; } } } write!(f, "}}\n")?; Ok(()) } } pub fn machine_env() -> MachineEnv { fn regs(r: core::ops::Range, c: RegClass) -> Vec { r.map(|i| PReg::new(i, c)).collect() } let preferred_regs_by_class: [Vec; 3] = [ regs(0..24, RegClass::Int), regs(0..24, RegClass::Float), regs(0..24, RegClass::Vector), ]; let non_preferred_regs_by_class: [Vec; 3] = [ regs(24..32, RegClass::Int), regs(24..32, RegClass::Float), regs(24..32, RegClass::Vector), ]; let scratch_by_class: [Option; 3] = [None, None, None]; let fixed_stack_slots = (32..63) .flat_map(|i| { [ PReg::new(i, RegClass::Int), PReg::new(i, RegClass::Float), PReg::new(i, RegClass::Vector), ] }) .collect(); // Register 63 is reserved for use as a fixed non-allocatable register. MachineEnv { preferred_regs_by_class, non_preferred_regs_by_class, scratch_by_class, fixed_stack_slots, } } regalloc2-0.10.2/src/fuzzing/mod.rs000066400000000000000000000010211467034227200170550ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ //! Utilities for fuzzing. pub mod func; // Re-exports for fuzz targets. pub mod domtree { pub use crate::domtree::*; } pub mod postorder { pub use crate::postorder::*; } pub mod moves { pub use crate::moves::*; } pub mod cfg { pub use crate::cfg::*; } pub mod ion { pub use crate::ion::*; } pub mod checker { pub use crate::checker::*; } pub use libfuzzer_sys::{arbitrary, fuzz_target}; regalloc2-0.10.2/src/index.rs000066400000000000000000000126671467034227200157330ustar00rootroot00000000000000#[macro_export] macro_rules! define_index { ($ix:ident, $storage:ident, $elem:ident) => { define_index!($ix); #[derive(Clone, Debug)] pub struct $storage { storage: Vec<$elem>, } impl $storage { #[inline(always)] pub fn with_capacity(n: usize) -> Self { Self { storage: Vec::with_capacity(n), } } #[inline(always)] pub fn len(&self) -> usize { self.storage.len() } #[inline(always)] pub fn iter(&self) -> impl Iterator { self.storage.iter() } #[inline(always)] pub fn iter_mut(&mut self) -> impl Iterator { self.storage.iter_mut() } #[inline(always)] pub fn push(&mut self, value: $elem) -> $ix { let idx = $ix(self.storage.len() as u32); self.storage.push(value); idx } } impl core::ops::Index<$ix> for $storage { type Output = $elem; #[inline(always)] fn index(&self, i: $ix) -> &Self::Output { &self.storage[i.index()] } } impl core::ops::IndexMut<$ix> for $storage { #[inline(always)] fn index_mut(&mut self, i: $ix) -> &mut Self::Output { &mut self.storage[i.index()] } } impl<'a> IntoIterator for &'a $storage { type Item = &'a $elem; type IntoIter = core::slice::Iter<'a, $elem>; #[inline(always)] fn into_iter(self) -> Self::IntoIter { self.storage.iter() } } impl<'a> IntoIterator for &'a mut $storage { type Item = &'a mut $elem; type IntoIter = core::slice::IterMut<'a, $elem>; #[inline(always)] fn into_iter(self) -> Self::IntoIter { self.storage.iter_mut() } } }; ($ix:ident) => { #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr( feature = "enable-serde", derive(::serde::Serialize, ::serde::Deserialize) )] pub struct $ix(pub u32); impl $ix { #[inline(always)] pub fn new(i: usize) -> Self { Self(i as u32) } #[inline(always)] pub fn index(self) -> usize { debug_assert!(self.is_valid()); self.0 as usize } #[inline(always)] pub fn invalid() -> Self { Self(u32::MAX) } #[inline(always)] pub fn is_invalid(self) -> bool { self == Self::invalid() } #[inline(always)] pub fn is_valid(self) -> bool { self != Self::invalid() } #[inline(always)] pub fn next(self) -> $ix { debug_assert!(self.is_valid()); Self(self.0 + 1) } #[inline(always)] pub fn prev(self) -> $ix { debug_assert!(self.is_valid()); Self(self.0 - 1) } #[inline(always)] pub fn raw_u32(self) -> u32 { self.0 } } impl crate::index::ContainerIndex for $ix {} }; } pub trait ContainerIndex: Clone + Copy + core::fmt::Debug + PartialEq + Eq {} pub trait ContainerComparator { type Ix: ContainerIndex; fn compare(&self, a: Self::Ix, b: Self::Ix) -> core::cmp::Ordering; } define_index!(Inst); define_index!(Block); #[derive(Clone, Copy, Debug)] #[cfg_attr( feature = "enable-serde", derive(::serde::Serialize, ::serde::Deserialize) )] pub struct InstRange(Inst, Inst); impl InstRange { #[inline(always)] pub fn new(from: Inst, to: Inst) -> Self { debug_assert!(from.index() <= to.index()); InstRange(from, to) } #[inline(always)] pub fn first(self) -> Inst { debug_assert!(self.len() > 0); self.0 } #[inline(always)] pub fn last(self) -> Inst { debug_assert!(self.len() > 0); self.1.prev() } #[inline(always)] pub fn rest(self) -> InstRange { debug_assert!(self.len() > 0); InstRange::new(self.0.next(), self.1) } #[inline(always)] pub fn len(self) -> usize { self.1.index() - self.0.index() } #[inline(always)] pub fn iter(self) -> impl DoubleEndedIterator { (self.0.index()..self.1.index()).map(|i| Inst::new(i)) } } #[cfg(test)] mod test { use alloc::vec; use alloc::vec::Vec; use super::*; #[test] fn test_inst_range() { let range = InstRange::new(Inst::new(0), Inst::new(0)); debug_assert_eq!(range.len(), 0); let range = InstRange::new(Inst::new(0), Inst::new(5)); debug_assert_eq!(range.first().index(), 0); debug_assert_eq!(range.last().index(), 4); debug_assert_eq!(range.len(), 5); debug_assert_eq!( range.iter().collect::>(), vec![ Inst::new(0), Inst::new(1), Inst::new(2), Inst::new(3), Inst::new(4) ] ); } } regalloc2-0.10.2/src/indexset.rs000066400000000000000000000237661467034227200164510ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ //! Index sets: sets of integers that represent indices into a space. use alloc::vec::Vec; use core::cell::Cell; use crate::FxHashMap; const SMALL_ELEMS: usize = 12; /// A hybrid large/small-mode sparse mapping from integer indices to /// elements. /// /// The trailing `(u32, u64)` elements in each variant is a one-item /// cache to allow fast access when streaming through. #[derive(Clone, Debug)] enum AdaptiveMap { Small { len: u32, keys: [u32; SMALL_ELEMS], values: [u64; SMALL_ELEMS], }, Large(FxHashMap), } const INVALID: u32 = 0xffff_ffff; impl AdaptiveMap { fn new() -> Self { Self::Small { len: 0, keys: [INVALID; SMALL_ELEMS], values: [0; SMALL_ELEMS], } } #[inline(always)] fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 { // Check whether the key is present and we are in small mode; // if no to both, we need to expand first. let small_mode_idx = match self { &mut Self::Small { len, ref mut keys, ref values, } => { // Perform this scan but do not return right away; // doing so runs into overlapping-borrow issues // because the current non-lexical lifetimes // implementation is not able to see that the `self` // mutable borrow on return is only on the // early-return path. if let Some(i) = keys[..len as usize].iter().position(|&k| k == key) { Some(i) } else if len != SMALL_ELEMS as u32 { debug_assert!(len < SMALL_ELEMS as u32); None } else if let Some(i) = values.iter().position(|&v| v == 0) { // If an existing value is zero, reuse that slot. keys[i] = key; Some(i) } else { *self = Self::Large(keys.iter().copied().zip(values.iter().copied()).collect()); None } } _ => None, }; match self { Self::Small { len, keys, values } => { // If we found the key already while checking whether // we need to expand above, use that index to return // early. if let Some(i) = small_mode_idx { return &mut values[i]; } // Otherwise, the key must not be present; add a new // entry. debug_assert!(*len < SMALL_ELEMS as u32); let idx = *len as usize; *len += 1; keys[idx] = key; values[idx] = 0; &mut values[idx] } Self::Large(map) => map.entry(key).or_insert(0), } } #[inline(always)] fn get_mut(&mut self, key: u32) -> Option<&mut u64> { match self { &mut Self::Small { len, ref keys, ref mut values, } => { for i in 0..len { if keys[i as usize] == key { return Some(&mut values[i as usize]); } } None } &mut Self::Large(ref mut map) => map.get_mut(&key), } } #[inline(always)] fn get(&self, key: u32) -> Option { match self { &Self::Small { len, ref keys, ref values, } => { for i in 0..len { if keys[i as usize] == key { let value = values[i as usize]; return Some(value); } } None } &Self::Large(ref map) => { let value = map.get(&key).cloned(); value } } } fn iter<'a>(&'a self) -> AdaptiveMapIter<'a> { match self { &Self::Small { len, ref keys, ref values, } => AdaptiveMapIter::Small(&keys[0..len as usize], &values[0..len as usize]), &Self::Large(ref map) => AdaptiveMapIter::Large(map.iter()), } } fn is_empty(&self) -> bool { match self { AdaptiveMap::Small { values, .. } => values.iter().all(|&value| value == 0), AdaptiveMap::Large(m) => m.values().all(|&value| value == 0), } } } enum AdaptiveMapIter<'a> { Small(&'a [u32], &'a [u64]), Large(hashbrown::hash_map::Iter<'a, u32, u64>), } impl<'a> core::iter::Iterator for AdaptiveMapIter<'a> { type Item = (u32, u64); #[inline] fn next(&mut self) -> Option { match self { &mut Self::Small(ref mut keys, ref mut values) => { if keys.is_empty() { None } else { let (k, v) = ((*keys)[0], (*values)[0]); *keys = &(*keys)[1..]; *values = &(*values)[1..]; Some((k, v)) } } &mut Self::Large(ref mut it) => it.next().map(|(&k, &v)| (k, v)), } } } /// A conceptually infinite-length set of indices that allows union /// and efficient iteration over elements. #[derive(Clone)] pub struct IndexSet { elems: AdaptiveMap, cache: Cell<(u32, u64)>, } const BITS_PER_WORD: usize = 64; impl IndexSet { pub fn new() -> Self { Self { elems: AdaptiveMap::new(), cache: Cell::new((INVALID, 0)), } } #[inline(always)] fn elem(&mut self, bit_index: usize) -> &mut u64 { let word_index = (bit_index / BITS_PER_WORD) as u32; if self.cache.get().0 == word_index { self.cache.set((INVALID, 0)); } self.elems.get_or_insert(word_index) } #[inline(always)] fn maybe_elem_mut(&mut self, bit_index: usize) -> Option<&mut u64> { let word_index = (bit_index / BITS_PER_WORD) as u32; if self.cache.get().0 == word_index { self.cache.set((INVALID, 0)); } self.elems.get_mut(word_index) } #[inline(always)] fn maybe_elem(&self, bit_index: usize) -> Option { let word_index = (bit_index / BITS_PER_WORD) as u32; if self.cache.get().0 == word_index { Some(self.cache.get().1) } else { self.elems.get(word_index) } } #[inline(always)] pub fn set(&mut self, idx: usize, val: bool) { let bit = idx % BITS_PER_WORD; if val { *self.elem(idx) |= 1 << bit; } else if let Some(word) = self.maybe_elem_mut(idx) { *word &= !(1 << bit); } } pub fn assign(&mut self, other: &Self) { self.elems = other.elems.clone(); self.cache = other.cache.clone(); } #[inline(always)] pub fn get(&self, idx: usize) -> bool { let bit = idx % BITS_PER_WORD; if let Some(word) = self.maybe_elem(idx) { (word & (1 << bit)) != 0 } else { false } } pub fn union_with(&mut self, other: &Self) -> bool { let mut changed = 0; for (word_idx, bits) in other.elems.iter() { if bits == 0 { continue; } let word_idx = word_idx as usize; let self_word = self.elem(word_idx * BITS_PER_WORD); changed |= bits & !*self_word; *self_word |= bits; } changed != 0 } pub fn iter<'a>(&'a self) -> impl Iterator + 'a { self.elems.iter().flat_map(|(word_idx, bits)| { let word_idx = word_idx as usize; SetBitsIter(bits).map(move |i| BITS_PER_WORD * word_idx + i) }) } /// Is the adaptive data structure in "small" mode? This is meant /// for testing assertions only. pub(crate) fn is_small(&self) -> bool { match &self.elems { &AdaptiveMap::Small { .. } => true, _ => false, } } /// Is the set empty? pub(crate) fn is_empty(&self) -> bool { self.elems.is_empty() } } pub struct SetBitsIter(u64); impl Iterator for SetBitsIter { type Item = usize; #[inline] fn next(&mut self) -> Option { // Build an `Option` so that on the nonzero path, // the compiler can optimize the trailing-zeroes operator // using that knowledge. core::num::NonZeroU64::new(self.0).map(|nz| { let bitidx = nz.trailing_zeros(); self.0 &= self.0 - 1; // clear highest set bit bitidx as usize }) } } impl core::fmt::Debug for IndexSet { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { let vals = self.iter().collect::>(); write!(f, "{:?}", vals) } } #[cfg(test)] mod test { use super::IndexSet; #[test] fn test_set_bits_iter() { let mut vec = IndexSet::new(); let mut sum = 0; for i in 0..1024 { if i % 17 == 0 { vec.set(i, true); sum += i; } } let mut checksum = 0; for bit in vec.iter() { debug_assert!(bit % 17 == 0); checksum += bit; } debug_assert_eq!(sum, checksum); } #[test] fn test_expand_remove_zero_elems() { let mut vec = IndexSet::new(); // Set 12 different words (this is the max small-mode size). for i in 0..12 { vec.set(64 * i, true); } // Now clear a bit, and set a bit in a different word. We // should still be in small mode. vec.set(64 * 5, false); vec.set(64 * 100, true); debug_assert!(vec.is_small()); } } regalloc2-0.10.2/src/ion/000077500000000000000000000000001467034227200150275ustar00rootroot00000000000000regalloc2-0.10.2/src/ion/data_structures.rs000066400000000000000000000565431467034227200206260ustar00rootroot00000000000000/* * This file was initially derived from the files * `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was * originally licensed under the Mozilla Public License 2.0. We * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see * https://github.com/bytecodealliance/regalloc2/issues/7). * * Since the initial port, the design has been substantially evolved * and optimized. */ //! Data structures for backtracking allocator. use super::liveranges::SpillWeight; use crate::cfg::CFGInfo; use crate::index::ContainerComparator; use crate::indexset::IndexSet; use crate::{ define_index, Allocation, Block, Edit, Function, FxHashSet, MachineEnv, Operand, PReg, ProgPoint, RegClass, VReg, }; use alloc::collections::BTreeMap; use alloc::string::String; use alloc::vec::Vec; use core::cmp::Ordering; use core::fmt::Debug; use smallvec::{smallvec, SmallVec}; /// A range from `from` (inclusive) to `to` (exclusive). #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct CodeRange { pub from: ProgPoint, pub to: ProgPoint, } impl CodeRange { #[inline(always)] pub fn is_empty(&self) -> bool { self.from >= self.to } #[inline(always)] pub fn contains(&self, other: &Self) -> bool { other.from >= self.from && other.to <= self.to } #[inline(always)] pub fn contains_point(&self, other: ProgPoint) -> bool { other >= self.from && other < self.to } #[inline(always)] pub fn overlaps(&self, other: &Self) -> bool { other.to > self.from && other.from < self.to } #[inline(always)] pub fn len(&self) -> usize { self.to.inst().index() - self.from.inst().index() } /// Returns the range covering just one program point. #[inline(always)] pub fn singleton(pos: ProgPoint) -> CodeRange { CodeRange { from: pos, to: pos.next(), } } /// Join two [CodeRange] values together, producing a [CodeRange] that includes both. #[inline(always)] pub fn join(&self, other: CodeRange) -> Self { CodeRange { from: self.from.min(other.from), to: self.to.max(other.to), } } } impl core::cmp::PartialOrd for CodeRange { #[inline(always)] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl core::cmp::Ord for CodeRange { #[inline(always)] fn cmp(&self, other: &Self) -> Ordering { if self.to <= other.from { Ordering::Less } else if self.from >= other.to { Ordering::Greater } else { Ordering::Equal } } } define_index!(LiveBundleIndex, LiveBundles, LiveBundle); define_index!(LiveRangeIndex, LiveRanges, LiveRange); define_index!(SpillSetIndex, SpillSets, SpillSet); define_index!(UseIndex); define_index!(VRegIndex, VRegs, VRegData); define_index!(PRegIndex); define_index!(SpillSlotIndex); /// Used to carry small sets of bundles, e.g. for conflict sets. pub type LiveBundleVec = SmallVec<[LiveBundleIndex; 4]>; #[derive(Clone, Copy, Debug)] pub struct LiveRangeListEntry { pub range: CodeRange, pub index: LiveRangeIndex, } pub type LiveRangeList = SmallVec<[LiveRangeListEntry; 4]>; pub type UseList = SmallVec<[Use; 4]>; #[derive(Clone, Debug)] pub struct LiveRange { pub range: CodeRange, pub vreg: VRegIndex, pub bundle: LiveBundleIndex, pub uses_spill_weight_and_flags: u32, pub uses: UseList, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(u32)] pub enum LiveRangeFlag { StartsAtDef = 1, } impl LiveRange { #[inline(always)] pub fn set_flag(&mut self, flag: LiveRangeFlag) { self.uses_spill_weight_and_flags |= (flag as u32) << 29; } #[inline(always)] pub fn clear_flag(&mut self, flag: LiveRangeFlag) { self.uses_spill_weight_and_flags &= !((flag as u32) << 29); } #[inline(always)] pub fn assign_flag(&mut self, flag: LiveRangeFlag, val: bool) { let bit = if val { (flag as u32) << 29 } else { 0 }; self.uses_spill_weight_and_flags &= 0xe000_0000; self.uses_spill_weight_and_flags |= bit; } #[inline(always)] pub fn has_flag(&self, flag: LiveRangeFlag) -> bool { self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0 } #[inline(always)] pub fn flag_word(&self) -> u32 { self.uses_spill_weight_and_flags & 0xe000_0000 } #[inline(always)] pub fn merge_flags(&mut self, flag_word: u32) { self.uses_spill_weight_and_flags |= flag_word; } #[inline(always)] pub fn uses_spill_weight(&self) -> SpillWeight { // NOTE: the spill weight is technically stored in 29 bits, but we ignore the sign bit as // we will always be dealing with positive values. Thus we mask out the top 3 bits to // ensure that the sign bit is clear, then shift left by only two. let bits = (self.uses_spill_weight_and_flags & 0x1fff_ffff) << 2; SpillWeight::from_f32(f32::from_bits(bits)) } #[inline(always)] pub fn set_uses_spill_weight(&mut self, weight: SpillWeight) { let weight_bits = (weight.to_f32().to_bits() >> 2) & 0x1fff_ffff; self.uses_spill_weight_and_flags = (self.uses_spill_weight_and_flags & 0xe000_0000) | weight_bits; } } #[derive(Clone, Copy, Debug)] pub struct Use { pub operand: Operand, pub pos: ProgPoint, pub slot: u8, pub weight: u16, } impl Use { #[inline(always)] pub fn new(operand: Operand, pos: ProgPoint, slot: u8) -> Self { Self { operand, pos, slot, // Weight is updated on insertion into LR. weight: 0, } } } #[derive(Clone, Debug)] pub struct LiveBundle { pub ranges: LiveRangeList, pub spillset: SpillSetIndex, pub allocation: Allocation, pub prio: u32, // recomputed after every bulk update pub spill_weight_and_props: u32, } pub const BUNDLE_MAX_SPILL_WEIGHT: u32 = (1 << 29) - 1; pub const MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT: u32 = BUNDLE_MAX_SPILL_WEIGHT; pub const MINIMAL_BUNDLE_SPILL_WEIGHT: u32 = BUNDLE_MAX_SPILL_WEIGHT - 1; pub const BUNDLE_MAX_NORMAL_SPILL_WEIGHT: u32 = BUNDLE_MAX_SPILL_WEIGHT - 2; impl LiveBundle { #[inline(always)] pub fn set_cached_spill_weight_and_props( &mut self, spill_weight: u32, minimal: bool, fixed: bool, fixed_def: bool, ) { debug_assert!(spill_weight <= BUNDLE_MAX_SPILL_WEIGHT); self.spill_weight_and_props = spill_weight | (if minimal { 1 << 31 } else { 0 }) | (if fixed { 1 << 30 } else { 0 }) | (if fixed_def { 1 << 29 } else { 0 }); } #[inline(always)] pub fn cached_minimal(&self) -> bool { self.spill_weight_and_props & (1 << 31) != 0 } #[inline(always)] pub fn cached_fixed(&self) -> bool { self.spill_weight_and_props & (1 << 30) != 0 } #[inline(always)] pub fn cached_fixed_def(&self) -> bool { self.spill_weight_and_props & (1 << 29) != 0 } #[inline(always)] pub fn set_cached_fixed(&mut self) { self.spill_weight_and_props |= 1 << 30; } #[inline(always)] pub fn set_cached_fixed_def(&mut self) { self.spill_weight_and_props |= 1 << 29; } #[inline(always)] pub fn cached_spill_weight(&self) -> u32 { self.spill_weight_and_props & BUNDLE_MAX_SPILL_WEIGHT } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct BundleProperties { pub minimal: bool, pub fixed: bool, } /// Calculate the maximum `N` inline capacity for a `SmallVec<[T; N]>` we can /// have without bloating its size to be larger than a `Vec`. const fn no_bloat_capacity() -> usize { // `Vec` is three words: `(pointer, capacity, length)`. // // A `SmallVec<[T; N]>` replaces the first two members with the following: // // union { // Inline([T; N]), // Heap(pointer, capacity), // } // // So if `size_of([T; N]) == size_of(pointer) + size_of(capacity)` then we // get the maximum inline capacity without bloat. core::mem::size_of::() * 2 / core::mem::size_of::() } #[derive(Clone, Debug)] pub struct SpillSet { pub slot: SpillSlotIndex, pub reg_hint: PReg, pub class: RegClass, pub spill_bundle: LiveBundleIndex, pub required: bool, pub splits: u8, /// The aggregate [`CodeRange`] of all involved [`LiveRange`]s. The effect of this abstraction /// is that we attempt to allocate one spill slot for the extent of a bundle. For fragmented /// bundles with lots of open space this abstraction is pessimistic, but when bundles are small /// or dense this yields similar results to tracking individual live ranges. pub range: CodeRange, } pub(crate) const MAX_SPLITS_PER_SPILLSET: u8 = 2; #[derive(Clone, Debug)] pub struct VRegData { pub ranges: LiveRangeList, pub blockparam: Block, // We don't initially know the RegClass until we observe a use of the VReg. pub class: Option, } #[derive(Clone, Debug)] pub struct PRegData { pub allocations: LiveRangeSet, pub is_stack: bool, } #[derive(Clone, Debug)] pub struct MultiFixedRegFixup { pub pos: ProgPoint, pub from_slot: u8, pub to_slot: u8, pub level: FixedRegFixupLevel, pub to_preg: PRegIndex, pub vreg: VRegIndex, } #[derive(Clone, Debug, PartialEq, Eq)] pub enum FixedRegFixupLevel { /// A fixup copy for the initial fixed reg; must come first. Initial, /// A fixup copy from the first fixed reg to other fixed regs for /// the same vreg; must come second. Secondary, } /// The field order is significant: these are sorted so that a /// scan over vregs, then blocks in each range, can scan in /// order through this (sorted) list and add allocs to the /// half-move list. /// /// The fields in this struct are reversed in sort order so that the entire /// struct can be treated as a u128 for sorting purposes. #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(C)] pub struct BlockparamOut { pub to_vreg: VRegIndex, pub to_block: Block, pub from_block: Block, pub from_vreg: VRegIndex, } impl BlockparamOut { #[inline(always)] pub fn key(&self) -> u128 { u128_key( self.from_vreg.raw_u32(), self.from_block.raw_u32(), self.to_block.raw_u32(), self.to_vreg.raw_u32(), ) } } /// As above for `BlockparamIn`, field order is significant. /// /// The fields in this struct are reversed in sort order so that the entire /// struct can be treated as a u128 for sorting purposes. #[derive(Clone, Debug)] #[repr(C)] pub struct BlockparamIn { pub from_block: Block, pub to_block: Block, pub to_vreg: VRegIndex, } impl BlockparamIn { #[inline(always)] pub fn key(&self) -> u128 { u128_key( self.to_vreg.raw_u32(), self.to_block.raw_u32(), self.from_block.raw_u32(), 0, ) } } impl LiveRanges { pub fn add(&mut self, range: CodeRange) -> LiveRangeIndex { self.push(LiveRange { range, vreg: VRegIndex::invalid(), bundle: LiveBundleIndex::invalid(), uses_spill_weight_and_flags: 0, uses: smallvec![], }) } } impl LiveBundles { pub fn add(&mut self) -> LiveBundleIndex { self.push(LiveBundle { allocation: Allocation::none(), ranges: smallvec![], spillset: SpillSetIndex::invalid(), prio: 0, spill_weight_and_props: 0, }) } } impl VRegs { pub fn add(&mut self, reg: VReg, data: VRegData) -> VRegIndex { let idx = self.push(data); debug_assert_eq!(reg.vreg(), idx.index()); idx } } impl core::ops::Index for VRegs { type Output = VRegData; #[inline(always)] fn index(&self, idx: VReg) -> &Self::Output { &self.storage[idx.vreg()] } } impl core::ops::IndexMut for VRegs { #[inline(always)] fn index_mut(&mut self, idx: VReg) -> &mut Self::Output { &mut self.storage[idx.vreg()] } } #[derive(Clone, Debug)] pub struct Env<'a, F: Function> { pub func: &'a F, pub env: &'a MachineEnv, pub cfginfo: CFGInfo, pub liveins: Vec, pub liveouts: Vec, pub blockparam_outs: Vec, pub blockparam_ins: Vec, pub ranges: LiveRanges, pub bundles: LiveBundles, pub spillsets: SpillSets, pub vregs: VRegs, pub pregs: Vec, pub allocation_queue: PrioQueue, pub spilled_bundles: Vec, pub spillslots: Vec, pub slots_by_class: [SpillSlotList; 3], pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 3], pub preferred_victim_by_class: [PReg; 3], // When multiple fixed-register constraints are present on a // single VReg at a single program point (this can happen for, // e.g., call args that use the same value multiple times), we // remove all but one of the fixed-register constraints, make a // note here, and add a clobber with that PReg instread to keep // the register available. When we produce the final edit-list, we // will insert a copy from wherever the VReg's primary allocation // was to the approprate PReg. pub multi_fixed_reg_fixups: Vec, // Output: pub allocs: Vec, pub inst_alloc_offsets: Vec, pub num_spillslots: u32, pub debug_locations: Vec<(u32, ProgPoint, ProgPoint, Allocation)>, pub allocated_bundle_count: usize, pub stats: Stats, // For debug output only: a list of textual annotations at every // ProgPoint to insert into the final allocated program listing. pub debug_annotations: hashbrown::HashMap>, pub annotations_enabled: bool, // Cached allocation for `try_to_allocate_bundle_to_reg` to avoid allocating // a new HashSet on every call. pub conflict_set: FxHashSet, } impl<'a, F: Function> Env<'a, F> { /// Get the VReg (with bundled RegClass) from a vreg index. #[inline] pub fn vreg(&self, index: VRegIndex) -> VReg { let class = self.vregs[index] .class .expect("trying to get a VReg before observing its class"); VReg::new(index.index(), class) } /// Record the class of a VReg. We learn this only when we observe /// the VRegs in use. pub fn observe_vreg_class(&mut self, vreg: VReg) { let old_class = self.vregs[vreg].class.replace(vreg.class()); // We should never observe two different classes for two // mentions of a VReg in the source program. debug_assert!(old_class == None || old_class == Some(vreg.class())); } /// Is this vreg actually used in the source program? pub fn is_vreg_used(&self, index: VRegIndex) -> bool { self.vregs[index].class.is_some() } } #[derive(Clone, Debug)] pub struct SpillSetRanges { pub btree: BTreeMap, } impl SpillSetRanges { pub fn new() -> Self { Self { btree: BTreeMap::new(), } } } #[derive(Clone, Debug)] pub struct SpillSlotData { pub ranges: SpillSetRanges, pub slots: u32, pub alloc: Allocation, } #[derive(Clone, Debug)] pub struct SpillSlotList { pub slots: SmallVec<[SpillSlotIndex; 32]>, pub probe_start: usize, } impl SpillSlotList { pub fn new() -> Self { SpillSlotList { slots: smallvec![], probe_start: 0, } } /// Get the next spillslot index in probing order, wrapping around /// at the end of the slots list. pub(crate) fn next_index(&self, index: usize) -> usize { debug_assert!(index < self.slots.len()); if index == self.slots.len() - 1 { 0 } else { index + 1 } } } #[derive(Clone, Debug)] pub struct PrioQueue { pub heap: alloc::collections::BinaryHeap, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct PrioQueueEntry { pub prio: u32, pub bundle: LiveBundleIndex, pub reg_hint: PReg, } #[derive(Clone, Debug)] pub struct LiveRangeSet { pub btree: BTreeMap, } #[derive(Clone, Copy, Debug)] pub struct LiveRangeKey { pub from: u32, pub to: u32, } impl LiveRangeKey { #[inline(always)] pub fn from_range(range: &CodeRange) -> Self { Self { from: range.from.to_index(), to: range.to.to_index(), } } #[inline(always)] pub fn to_range(&self) -> CodeRange { CodeRange { from: ProgPoint::from_index(self.from), to: ProgPoint::from_index(self.to), } } } impl core::cmp::PartialEq for LiveRangeKey { #[inline(always)] fn eq(&self, other: &Self) -> bool { self.to > other.from && self.from < other.to } } impl core::cmp::Eq for LiveRangeKey {} impl core::cmp::PartialOrd for LiveRangeKey { #[inline(always)] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl core::cmp::Ord for LiveRangeKey { #[inline(always)] fn cmp(&self, other: &Self) -> core::cmp::Ordering { if self.to <= other.from { core::cmp::Ordering::Less } else if self.from >= other.to { core::cmp::Ordering::Greater } else { core::cmp::Ordering::Equal } } } pub struct PrioQueueComparator<'a> { pub prios: &'a [usize], } impl<'a> ContainerComparator for PrioQueueComparator<'a> { type Ix = LiveBundleIndex; fn compare(&self, a: Self::Ix, b: Self::Ix) -> core::cmp::Ordering { self.prios[a.index()].cmp(&self.prios[b.index()]) } } impl PrioQueue { pub fn new() -> Self { PrioQueue { heap: alloc::collections::BinaryHeap::new(), } } #[inline(always)] pub fn insert(&mut self, bundle: LiveBundleIndex, prio: usize, reg_hint: PReg) { self.heap.push(PrioQueueEntry { prio: prio as u32, bundle, reg_hint, }); } #[inline(always)] pub fn is_empty(self) -> bool { self.heap.is_empty() } #[inline(always)] pub fn pop(&mut self) -> Option<(LiveBundleIndex, PReg)> { self.heap.pop().map(|entry| (entry.bundle, entry.reg_hint)) } } impl LiveRangeSet { pub(crate) fn new() -> Self { Self { btree: BTreeMap::new(), } } } #[derive(Clone, Debug)] pub struct InsertedMove { pub pos_prio: PosWithPrio, pub from_alloc: Allocation, pub to_alloc: Allocation, pub to_vreg: VReg, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] pub enum InsertMovePrio { InEdgeMoves, Regular, MultiFixedRegInitial, MultiFixedRegSecondary, ReusedInput, OutEdgeMoves, } #[derive(Debug, Default)] pub struct InsertedMoves { pub moves: Vec, } impl InsertedMoves { pub fn push( &mut self, pos: ProgPoint, prio: InsertMovePrio, from_alloc: Allocation, to_alloc: Allocation, to_vreg: VReg, ) { trace!( "insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?} to_vreg {:?}", pos, prio, from_alloc, to_alloc, to_vreg ); if from_alloc == to_alloc { trace!(" -> skipping move with same source and dest"); return; } if let Some(from) = from_alloc.as_reg() { debug_assert_eq!(from.class(), to_vreg.class()); } if let Some(to) = to_alloc.as_reg() { debug_assert_eq!(to.class(), to_vreg.class()); } self.moves.push(InsertedMove { pos_prio: PosWithPrio { pos, prio: prio as u32, }, from_alloc, to_alloc, to_vreg, }); } } #[derive(Clone, Debug)] pub struct Edits { edits: Vec<(PosWithPrio, Edit)>, } impl Edits { #[inline(always)] pub fn with_capacity(n: usize) -> Self { Self { edits: Vec::with_capacity(n), } } #[inline(always)] pub fn len(&self) -> usize { self.edits.len() } #[inline(always)] pub fn iter(&self) -> impl Iterator { self.edits.iter() } #[inline(always)] pub fn into_edits(self) -> impl Iterator { self.edits.into_iter().map(|(pos, edit)| (pos.pos, edit)) } /// Sort edits by the combination of their program position and priority. This is a stable sort /// to preserve the order of the moves the parallel move resolver inserts. #[inline(always)] pub fn sort(&mut self) { self.edits.sort_by_key(|&(pos_prio, _)| pos_prio.key()); } pub fn add(&mut self, pos_prio: PosWithPrio, from: Allocation, to: Allocation) { if from != to { if from.is_reg() && to.is_reg() { debug_assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class()); } self.edits.push((pos_prio, Edit::Move { from, to })); } } } /// The fields in this struct are reversed in sort order so that the entire /// struct can be treated as a u64 for sorting purposes. #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(C)] pub struct PosWithPrio { pub prio: u32, pub pos: ProgPoint, } impl PosWithPrio { #[inline] pub fn key(self) -> u64 { u64_key(self.pos.to_index(), self.prio) } } #[derive(Clone, Copy, Debug, Default)] #[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))] pub struct Stats { pub livein_blocks: usize, pub livein_iterations: usize, pub initial_liverange_count: usize, pub merged_bundle_count: usize, pub process_bundle_count: usize, pub process_bundle_reg_probes_fixed: usize, pub process_bundle_reg_success_fixed: usize, pub process_bundle_bounding_range_probe_start_any: usize, pub process_bundle_bounding_range_probes_any: usize, pub process_bundle_bounding_range_success_any: usize, pub process_bundle_reg_probe_start_any: usize, pub process_bundle_reg_probes_any: usize, pub process_bundle_reg_success_any: usize, pub evict_bundle_event: usize, pub evict_bundle_count: usize, pub splits: usize, pub splits_clobbers: usize, pub splits_hot: usize, pub splits_conflicts: usize, pub splits_defs: usize, pub splits_all: usize, pub final_liverange_count: usize, pub final_bundle_count: usize, pub spill_bundle_count: usize, pub spill_bundle_reg_probes: usize, pub spill_bundle_reg_success: usize, pub blockparam_ins_count: usize, pub blockparam_outs_count: usize, pub halfmoves_count: usize, pub edits_count: usize, } // Helper function for generating sorting keys. The order of arguments is from // the most significant field to the least significant one. // // These work best when the fields are stored in reverse order in memory so that // they can be loaded with a single u64 load on little-endian machines. #[inline(always)] pub fn u64_key(b: u32, a: u32) -> u64 { a as u64 | (b as u64) << 32 } #[inline(always)] pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 { a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96 } regalloc2-0.10.2/src/ion/dump.rs000066400000000000000000000111501467034227200163400ustar00rootroot00000000000000//! Debugging output. use alloc::string::ToString; use alloc::{format, vec}; use alloc::{string::String, vec::Vec}; use super::Env; use crate::{Block, Function, ProgPoint}; impl<'a, F: Function> Env<'a, F> { pub fn dump_state(&self) { trace!("Bundles:"); for (i, b) in self.bundles.iter().enumerate() { trace!( "bundle{}: spillset={:?} alloc={:?}", i, b.spillset, b.allocation ); for entry in &b.ranges { trace!( " * range {:?} -- {:?}: range{}", entry.range.from, entry.range.to, entry.index.index() ); } } trace!("VRegs:"); for (i, v) in self.vregs.iter().enumerate() { trace!("vreg{}:", i); for entry in &v.ranges { trace!( " * range {:?} -- {:?}: range{}", entry.range.from, entry.range.to, entry.index.index() ); } } trace!("Ranges:"); for (i, r) in self.ranges.iter().enumerate() { trace!( "range{}: range={:?} vreg={:?} bundle={:?} weight={:?}", i, r.range, r.vreg, r.bundle, r.uses_spill_weight(), ); for u in &r.uses { trace!(" * use at {:?} (slot {}): {:?}", u.pos, u.slot, u.operand); } } } pub fn annotate(&mut self, progpoint: ProgPoint, s: String) { if self.annotations_enabled { self.debug_annotations .entry(progpoint) .or_insert_with(|| vec![]) .push(s); } } pub fn dump_results(&self) { log::info!("=== REGALLOC RESULTS ==="); for block in 0..self.func.num_blocks() { let block = Block::new(block); log::info!( "block{}: [succs {:?} preds {:?}]", block.index(), self.func .block_succs(block) .iter() .map(|b| b.index()) .collect::>(), self.func .block_preds(block) .iter() .map(|b| b.index()) .collect::>() ); for inst in self.func.block_insns(block).iter() { for annotation in self .debug_annotations .get(&ProgPoint::before(inst)) .map(|v| &v[..]) .unwrap_or(&[]) { log::info!(" inst{}-pre: {}", inst.index(), annotation); } let ops = self .func .inst_operands(inst) .iter() .map(|op| format!("{}", op)) .collect::>(); let clobbers = self .func .inst_clobbers(inst) .into_iter() .map(|preg| format!("{}", preg)) .collect::>(); let allocs = (0..ops.len()) .map(|i| format!("{}", self.get_alloc(inst, i))) .collect::>(); let opname = if self.func.is_branch(inst) { "br" } else if self.func.is_ret(inst) { "ret" } else { "op" }; let args = ops .iter() .zip(allocs.iter()) .map(|(op, alloc)| format!("{} [{}]", op, alloc)) .collect::>(); let clobbers = if clobbers.is_empty() { "".to_string() } else { format!(" [clobber: {}]", clobbers.join(", ")) }; log::info!( " inst{}: {} {}{}", inst.index(), opname, args.join(", "), clobbers ); for annotation in self .debug_annotations .get(&ProgPoint::after(inst)) .map(|v| &v[..]) .unwrap_or(&[]) { log::info!(" inst{}-post: {}", inst.index(), annotation); } } } } } regalloc2-0.10.2/src/ion/liveranges.rs000066400000000000000000001156171467034227200175470ustar00rootroot00000000000000/* * This file was initially derived from the files * `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was * originally licensed under the Mozilla Public License 2.0. We * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see * https://github.com/bytecodealliance/regalloc2/issues/7). * * Since the initial port, the design has been substantially evolved * and optimized. */ //! Live-range computation. use super::{ CodeRange, Env, LiveRangeFlag, LiveRangeIndex, LiveRangeKey, LiveRangeListEntry, LiveRangeSet, PRegData, PRegIndex, RegClass, Use, VRegData, VRegIndex, }; use crate::indexset::IndexSet; use crate::ion::data_structures::{ BlockparamIn, BlockparamOut, FixedRegFixupLevel, MultiFixedRegFixup, }; use crate::{ Allocation, Block, Function, FxHashMap, FxHashSet, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, ProgPoint, RegAllocError, VReg, }; use alloc::collections::VecDeque; use alloc::vec; use alloc::vec::Vec; use slice_group_by::GroupByMut; use smallvec::{smallvec, SmallVec}; /// A spill weight computed for a certain Use. #[derive(Clone, Copy, Debug)] pub struct SpillWeight(f32); #[inline(always)] pub fn spill_weight_from_constraint( constraint: OperandConstraint, loop_depth: usize, is_def: bool, ) -> SpillWeight { // A bonus of 1000 for one loop level, 4000 for two loop levels, // 16000 for three loop levels, etc. Avoids exponentiation. let loop_depth = core::cmp::min(10, loop_depth); let hot_bonus: f32 = (0..loop_depth).fold(1000.0, |a, _| a * 4.0); let def_bonus: f32 = if is_def { 2000.0 } else { 0.0 }; let constraint_bonus: f32 = match constraint { OperandConstraint::Any => 1000.0, OperandConstraint::Reg | OperandConstraint::FixedReg(_) => 2000.0, _ => 0.0, }; SpillWeight(hot_bonus + def_bonus + constraint_bonus) } impl SpillWeight { /// Convert a floating-point weight to a u16 that can be compactly /// stored in a `Use`. We simply take the top 16 bits of the f32; this /// is equivalent to the bfloat16 format /// (https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). pub fn to_bits(self) -> u16 { (self.0.to_bits() >> 15) as u16 } /// Convert a value that was returned from /// `SpillWeight::to_bits()` back into a `SpillWeight`. Note that /// some precision may be lost when round-tripping from a spill /// weight to packed bits and back. pub fn from_bits(bits: u16) -> SpillWeight { let x = f32::from_bits((bits as u32) << 15); SpillWeight(x) } /// Get a zero spill weight. pub fn zero() -> SpillWeight { SpillWeight(0.0) } /// Convert to a raw floating-point value. pub fn to_f32(self) -> f32 { self.0 } /// Create a `SpillWeight` from a raw floating-point value. pub fn from_f32(x: f32) -> SpillWeight { SpillWeight(x) } pub fn to_int(self) -> u32 { self.0 as u32 } } impl core::ops::Add for SpillWeight { type Output = SpillWeight; fn add(self, other: SpillWeight) -> Self { SpillWeight(self.0 + other.0) } } impl<'a, F: Function> Env<'a, F> { pub fn create_pregs_and_vregs(&mut self) { // Create PRegs from the env. self.pregs.resize( PReg::NUM_INDEX, PRegData { allocations: LiveRangeSet::new(), is_stack: false, }, ); for &preg in &self.env.fixed_stack_slots { self.pregs[preg.index()].is_stack = true; } for class in 0..self.preferred_victim_by_class.len() { self.preferred_victim_by_class[class] = self.env.non_preferred_regs_by_class[class] .last() .or(self.env.preferred_regs_by_class[class].last()) .cloned() .unwrap_or(PReg::invalid()); } // Create VRegs from the vreg count. for idx in 0..self.func.num_vregs() { // We'll fill in the real details when we see the def. self.vregs.add( VReg::new(idx, RegClass::Int), VRegData { ranges: smallvec![], blockparam: Block::invalid(), // We'll learn the RegClass as we scan the code. class: None, }, ); } // Create allocations too. for inst in 0..self.func.num_insts() { let start = self.allocs.len() as u32; self.inst_alloc_offsets.push(start); for _ in 0..self.func.inst_operands(Inst::new(inst)).len() { self.allocs.push(Allocation::none()); } } } /// Mark `range` as live for the given `vreg`. /// /// Returns the liverange that contains the given range. pub fn add_liverange_to_vreg( &mut self, vreg: VRegIndex, mut range: CodeRange, ) -> LiveRangeIndex { trace!("add_liverange_to_vreg: vreg {:?} range {:?}", vreg, range); // Invariant: as we are building liveness information, we // *always* process instructions bottom-to-top, and as a // consequence, new liveranges are always created before any // existing liveranges for a given vreg. We assert this here, // then use it to avoid an O(n) merge step (which would lead // to O(n^2) liveness construction cost overall). // // We store liveranges in reverse order in the `.ranges` // array, then reverse them at the end of // `compute_liveness()`. if !self.vregs[vreg].ranges.is_empty() { let last_range_index = self.vregs[vreg].ranges.last().unwrap().index; let last_range = self.ranges[last_range_index].range; if self.func.allow_multiple_vreg_defs() { if last_range.contains(&range) { // Special case (may occur when multiple defs of pinned // physical regs occur): if this new range overlaps the // existing range, return it. return last_range_index; } // If this range's end falls in the middle of the last // range, truncate it to be contiguous so we can merge // below. if range.to >= last_range.from && range.to <= last_range.to { range.to = last_range.from; } } debug_assert!( range.to <= last_range.from, "range {:?}, last_range {:?}", range, last_range ); } if self.vregs[vreg].ranges.is_empty() || range.to < self.ranges[self.vregs[vreg].ranges.last().unwrap().index] .range .from { // Is not contiguous with previously-added (immediately // following) range; create a new range. let lr = self.ranges.add(range); self.ranges[lr].vreg = vreg; self.vregs[vreg] .ranges .push(LiveRangeListEntry { range, index: lr }); lr } else { // Is contiguous with previously-added range; just extend // its range and return it. let lr = self.vregs[vreg].ranges.last().unwrap().index; debug_assert!(range.to == self.ranges[lr].range.from); self.ranges[lr].range.from = range.from; lr } } pub fn insert_use_into_liverange(&mut self, into: LiveRangeIndex, mut u: Use) { let operand = u.operand; let constraint = operand.constraint(); let block = self.cfginfo.insn_block[u.pos.inst().index()]; let loop_depth = self.cfginfo.approx_loop_depth[block.index()] as usize; let weight = spill_weight_from_constraint( constraint, loop_depth, operand.kind() != OperandKind::Use, ); u.weight = weight.to_bits(); trace!( "insert use {:?} into lr {:?} with weight {:?}", u, into, weight, ); // N.B.: we do *not* update `requirement` on the range, // because those will be computed during the multi-fixed-reg // fixup pass later (after all uses are inserted). self.ranges[into].uses.push(u); // Update stats. let range_weight = self.ranges[into].uses_spill_weight() + weight; self.ranges[into].set_uses_spill_weight(range_weight); trace!( " -> now range has weight {:?}", self.ranges[into].uses_spill_weight(), ); } pub fn find_vreg_liverange_for_pos( &self, vreg: VRegIndex, pos: ProgPoint, ) -> Option { for entry in &self.vregs[vreg].ranges { if entry.range.contains_point(pos) { return Some(entry.index); } } None } pub fn add_liverange_to_preg(&mut self, range: CodeRange, reg: PReg) { trace!("adding liverange to preg: {:?} to {}", range, reg); let preg_idx = PRegIndex::new(reg.index()); let res = self.pregs[preg_idx.index()] .allocations .btree .insert(LiveRangeKey::from_range(&range), LiveRangeIndex::invalid()); debug_assert!(res.is_none()); } pub fn is_live_in(&mut self, block: Block, vreg: VRegIndex) -> bool { self.liveins[block.index()].get(vreg.index()) } pub fn compute_liveness(&mut self) -> Result<(), RegAllocError> { // Create initial LiveIn and LiveOut bitsets. for _ in 0..self.func.num_blocks() { self.liveins.push(IndexSet::new()); self.liveouts.push(IndexSet::new()); } // Run a worklist algorithm to precisely compute liveins and // liveouts. let mut workqueue = VecDeque::new(); let mut workqueue_set = FxHashSet::default(); // Initialize workqueue with postorder traversal. for &block in &self.cfginfo.postorder[..] { workqueue.push_back(block); workqueue_set.insert(block); } while let Some(block) = workqueue.pop_front() { workqueue_set.remove(&block); let insns = self.func.block_insns(block); trace!("computing liveins for block{}", block.index()); self.stats.livein_iterations += 1; let mut live = self.liveouts[block.index()].clone(); trace!(" -> initial liveout set: {:?}", live); // Include outgoing blockparams in the initial live set. if self.func.is_branch(insns.last()) { for i in 0..self.func.block_succs(block).len() { for ¶m in self.func.branch_blockparams(block, insns.last(), i) { live.set(param.vreg(), true); self.observe_vreg_class(param); } } } for inst in insns.iter().rev() { for pos in &[OperandPos::Late, OperandPos::Early] { for op in self.func.inst_operands(inst) { if op.as_fixed_nonallocatable().is_some() { continue; } if op.pos() == *pos { let was_live = live.get(op.vreg().vreg()); trace!("op {:?} was_live = {}", op, was_live); match op.kind() { OperandKind::Use => { live.set(op.vreg().vreg(), true); } OperandKind::Def => { live.set(op.vreg().vreg(), false); } } self.observe_vreg_class(op.vreg()); } } } } for &blockparam in self.func.block_params(block) { live.set(blockparam.vreg(), false); self.observe_vreg_class(blockparam); } for &pred in self.func.block_preds(block) { if self.liveouts[pred.index()].union_with(&live) { if !workqueue_set.contains(&pred) { workqueue_set.insert(pred); workqueue.push_back(pred); } } } trace!("computed liveins at block{}: {:?}", block.index(), live); self.liveins[block.index()] = live; } // Check that there are no liveins to the entry block. if !self.liveins[self.func.entry_block().index()].is_empty() { trace!( "non-empty liveins to entry block: {:?}", self.liveins[self.func.entry_block().index()] ); return Err(RegAllocError::EntryLivein); } Ok(()) } pub fn build_liveranges(&mut self) { // Create Uses and Defs referring to VRegs, and place the Uses // in LiveRanges. // // We already computed precise liveouts and liveins for every // block above, so we don't need to run an iterative algorithm // here; instead, every block's computation is purely local, // from end to start. // Track current LiveRange for each vreg. // // Invariant: a stale range may be present here; ranges are // only valid if `live.get(vreg)` is true. let mut vreg_ranges: Vec = vec![LiveRangeIndex::invalid(); self.func.num_vregs()]; for i in (0..self.func.num_blocks()).rev() { let block = Block::new(i); let insns = self.func.block_insns(block); self.stats.livein_blocks += 1; // Init our local live-in set. let mut live = self.liveouts[block.index()].clone(); // If the last instruction is a branch (rather than // return), create blockparam_out entries. if self.func.is_branch(insns.last()) { for (i, &succ) in self.func.block_succs(block).iter().enumerate() { let blockparams_in = self.func.block_params(succ); let blockparams_out = self.func.branch_blockparams(block, insns.last(), i); for (&blockparam_in, &blockparam_out) in blockparams_in.iter().zip(blockparams_out) { let blockparam_out = VRegIndex::new(blockparam_out.vreg()); let blockparam_in = VRegIndex::new(blockparam_in.vreg()); self.blockparam_outs.push(BlockparamOut { to_vreg: blockparam_in, to_block: succ, from_block: block, from_vreg: blockparam_out, }); // Include outgoing blockparams in the initial live set. live.set(blockparam_out.index(), true); } } } // Initially, registers are assumed live for the whole block. for vreg in live.iter() { let range = CodeRange { from: self.cfginfo.block_entry[block.index()], to: self.cfginfo.block_exit[block.index()].next(), }; trace!( "vreg {:?} live at end of block --> create range {:?}", VRegIndex::new(vreg), range ); let lr = self.add_liverange_to_vreg(VRegIndex::new(vreg), range); vreg_ranges[vreg] = lr; } // Create vreg data for blockparams. for ¶m in self.func.block_params(block) { self.vregs[param].blockparam = block; } // For each instruction, in reverse order, process // operands and clobbers. for inst in insns.iter().rev() { // Mark clobbers with CodeRanges on PRegs. for clobber in self.func.inst_clobbers(inst) { // Clobber range is at After point only: an // instruction can still take an input in a reg // that it later clobbers. (In other words, the // clobber is like a normal def that never gets // used.) let range = CodeRange { from: ProgPoint::after(inst), to: ProgPoint::before(inst.next()), }; self.add_liverange_to_preg(range, clobber); } // Does the instruction have any input-reusing // outputs? This is important below to establish // proper interference wrt other inputs. We note the // *vreg* that is reused, not the index. let mut reused_input = None; for op in self.func.inst_operands(inst) { if let OperandConstraint::Reuse(i) = op.constraint() { debug_assert!(self.func.inst_operands(inst)[i] .as_fixed_nonallocatable() .is_none()); reused_input = Some(self.func.inst_operands(inst)[i].vreg()); break; } } // Preprocess defs and uses. Specifically, if there // are any fixed-reg-constrained defs at Late position // and fixed-reg-constrained uses at Early position // with the same preg, we need to (i) add a fixup move // for the use, (ii) rewrite the use to have an Any // constraint, and (ii) move the def to Early position // to reserve the register for the whole instruction. // // We don't touch any fixed-early-def or fixed-late-use // constraints: the only situation where the same physical // register can be used multiple times in the same // instruction is with an early-use and a late-def. Anything // else is a user error. let mut operand_rewrites: FxHashMap = FxHashMap::default(); let mut late_def_fixed: SmallVec<[PReg; 8]> = smallvec![]; for &operand in self.func.inst_operands(inst) { if let OperandConstraint::FixedReg(preg) = operand.constraint() { match (operand.pos(), operand.kind()) { (OperandPos::Late, OperandKind::Def) => { late_def_fixed.push(preg); } _ => {} } } } for (i, &operand) in self.func.inst_operands(inst).iter().enumerate() { if operand.as_fixed_nonallocatable().is_some() { continue; } if let OperandConstraint::FixedReg(preg) = operand.constraint() { match (operand.pos(), operand.kind()) { (OperandPos::Early, OperandKind::Use) if live.get(operand.vreg().vreg()) => { // If we have a use constraint at the // Early point for a fixed preg, and // this preg is also constrained with // a *separate* def at Late or is // clobbered, and *if* the vreg is // live downward, we have to use the // multi-fixed-reg mechanism for a // fixup and rewrite here without the // constraint. See #53. // // We adjust the def liverange and Use // to an "early" position to reserve // the register, it still must not be // used by some other vreg at the // use-site. // // Note that we handle multiple // conflicting constraints for the // same vreg in a separate pass (see // `fixup_multi_fixed_vregs` below). if late_def_fixed.contains(&preg) || self.func.inst_clobbers(inst).contains(preg) { trace!( concat!( "-> operand {:?} is fixed to preg {:?}, ", "is downward live, and there is also a ", "def or clobber at this preg" ), operand, preg ); let pos = ProgPoint::before(inst); self.multi_fixed_reg_fixups.push(MultiFixedRegFixup { pos, from_slot: i as u8, to_slot: i as u8, to_preg: PRegIndex::new(preg.index()), vreg: VRegIndex::new(operand.vreg().vreg()), level: FixedRegFixupLevel::Initial, }); // We need to insert a reservation // at the before-point to reserve // the reg for the use too. let range = CodeRange::singleton(pos); self.add_liverange_to_preg(range, preg); // Remove the fixed-preg // constraint from the Use. operand_rewrites.insert( i, Operand::new( operand.vreg(), OperandConstraint::Any, operand.kind(), operand.pos(), ), ); } } _ => {} } } } // Process defs and uses. for &cur_pos in &[InstPosition::After, InstPosition::Before] { for i in 0..self.func.inst_operands(inst).len() { // don't borrow `self` let operand = operand_rewrites .get(&i) .cloned() .unwrap_or(self.func.inst_operands(inst)[i]); let pos = match (operand.kind(), operand.pos()) { (OperandKind::Def, OperandPos::Early) => ProgPoint::before(inst), (OperandKind::Def, OperandPos::Late) => ProgPoint::after(inst), (OperandKind::Use, OperandPos::Late) => ProgPoint::after(inst), // If there are any reused inputs in this // instruction, and this is *not* the // reused vreg, force `pos` to // `After`. This ensures that we correctly // account for the interference between // the other inputs and the // input-that-is-reused/output. (OperandKind::Use, OperandPos::Early) if reused_input.is_some() && reused_input.unwrap() != operand.vreg() => { ProgPoint::after(inst) } (OperandKind::Use, OperandPos::Early) => ProgPoint::before(inst), }; if pos.pos() != cur_pos { continue; } trace!( "processing inst{} operand at {:?}: {:?}", inst.index(), pos, operand ); // If this is a "fixed non-allocatable // register" operand, set the alloc // immediately and then ignore the operand // hereafter. if let Some(preg) = operand.as_fixed_nonallocatable() { self.set_alloc(inst, i, Allocation::reg(preg)); continue; } match operand.kind() { OperandKind::Def => { trace!("Def of {} at {:?}", operand.vreg(), pos); // Get or create the LiveRange. let mut lr = vreg_ranges[operand.vreg().vreg()]; trace!(" -> has existing LR {:?}", lr); // If there was no liverange (dead def), create a trivial one. if !live.get(operand.vreg().vreg()) { let from = pos; // We want to we want to span // until Before of the next // inst. This ensures that early // defs used for temps on an // instruction are reserved across // the whole instruction. let to = ProgPoint::before(pos.inst().next()); lr = self.add_liverange_to_vreg( VRegIndex::new(operand.vreg().vreg()), CodeRange { from, to }, ); trace!(" -> invalid; created {:?}", lr); vreg_ranges[operand.vreg().vreg()] = lr; live.set(operand.vreg().vreg(), true); } // Create the use in the LiveRange. self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); // If def (not mod), this reg is now dead, // scanning backward; make it so. if operand.kind() == OperandKind::Def { // Trim the range for this vreg to start // at `pos` if it previously ended at the // start of this block (i.e. was not // merged into some larger LiveRange due // to out-of-order blocks). if self.ranges[lr].range.from == self.cfginfo.block_entry[block.index()] { trace!(" -> started at block start; trimming to {:?}", pos); self.ranges[lr].range.from = pos; } self.ranges[lr].set_flag(LiveRangeFlag::StartsAtDef); // Remove from live-set. live.set(operand.vreg().vreg(), false); vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid(); } } OperandKind::Use => { // Create/extend the LiveRange if it // doesn't already exist, and add the use // to the range. let mut lr = vreg_ranges[operand.vreg().vreg()]; if !live.get(operand.vreg().vreg()) { let range = CodeRange { from: self.cfginfo.block_entry[block.index()], to: pos.next(), }; lr = self.add_liverange_to_vreg( VRegIndex::new(operand.vreg().vreg()), range, ); vreg_ranges[operand.vreg().vreg()] = lr; } debug_assert!(lr.is_valid()); trace!("Use of {:?} at {:?} -> {:?}", operand, pos, lr,); self.insert_use_into_liverange(lr, Use::new(operand, pos, i as u8)); // Add to live-set. live.set(operand.vreg().vreg(), true); } } } } } // Block parameters define vregs at the very beginning of // the block. Remove their live vregs from the live set // here. for vreg in self.func.block_params(block) { if live.get(vreg.vreg()) { live.set(vreg.vreg(), false); } else { // Create trivial liverange if blockparam is dead. let start = self.cfginfo.block_entry[block.index()]; self.add_liverange_to_vreg( VRegIndex::new(vreg.vreg()), CodeRange { from: start, to: start.next(), }, ); } // add `blockparam_ins` entries. let vreg_idx = VRegIndex::new(vreg.vreg()); for &pred in self.func.block_preds(block) { self.blockparam_ins.push(BlockparamIn { to_vreg: vreg_idx, to_block: block, from_block: pred, }); } } } // Make ranges in each vreg and uses in each range appear in // sorted order. We built them in reverse order above, so this // is a simple reversal, *not* a full sort. // // The ordering invariant is always maintained for uses and // always for ranges in bundles (which are initialized later), // but not always for ranges in vregs; those are sorted only // when needed, here and then again at the end of allocation // when resolving moves. for vreg in &mut self.vregs { vreg.ranges.reverse(); let mut last = None; for entry in &mut vreg.ranges { // Ranges may have been truncated above at defs. We // need to update with the final range here. entry.range = self.ranges[entry.index].range; // Assert in-order and non-overlapping. debug_assert!(last.is_none() || last.unwrap() <= entry.range.from); last = Some(entry.range.to); } } for range in &mut self.ranges { range.uses.reverse(); debug_assert!(range.uses.windows(2).all(|win| win[0].pos <= win[1].pos)); } self.blockparam_ins.sort_unstable_by_key(|x| x.key()); self.blockparam_outs.sort_unstable_by_key(|x| x.key()); self.stats.initial_liverange_count = self.ranges.len(); self.stats.blockparam_ins_count = self.blockparam_ins.len(); self.stats.blockparam_outs_count = self.blockparam_outs.len(); } pub fn fixup_multi_fixed_vregs(&mut self) { // Do a fixed-reg cleanup pass: if there are any LiveRanges with // multiple uses at the same ProgPoint and there is // more than one FixedReg constraint at that ProgPoint, we // need to record all but one of them in a special fixup list // and handle them later; otherwise, bundle-splitting to // create minimal bundles becomes much more complex (we would // have to split the multiple uses at the same progpoint into // different bundles, which breaks invariants related to // disjoint ranges and bundles). let mut extra_clobbers: SmallVec<[(PReg, ProgPoint); 8]> = smallvec![]; for vreg in 0..self.vregs.len() { let vreg = VRegIndex::new(vreg); for range_idx in 0..self.vregs[vreg].ranges.len() { let entry = self.vregs[vreg].ranges[range_idx]; let range = entry.index; trace!("multi-fixed-reg cleanup: vreg {:?} range {:?}", vreg, range,); // Find groups of uses that occur in at the same program point. for uses in self.ranges[range].uses.linear_group_by_key_mut(|u| u.pos) { if uses.len() < 2 { continue; } // Search for conflicting constraints in the uses. let mut requires_reg = false; let mut num_fixed_reg = 0; let mut num_fixed_stack = 0; let mut first_reg_slot = None; let mut first_stack_slot = None; for u in uses.iter() { match u.operand.constraint() { OperandConstraint::Any => { first_reg_slot.get_or_insert(u.slot); first_stack_slot.get_or_insert(u.slot); } OperandConstraint::Reg | OperandConstraint::Reuse(_) => { first_reg_slot.get_or_insert(u.slot); requires_reg = true; } OperandConstraint::FixedReg(preg) => { if self.pregs[preg.index()].is_stack { num_fixed_stack += 1; first_stack_slot.get_or_insert(u.slot); } else { requires_reg = true; num_fixed_reg += 1; first_reg_slot.get_or_insert(u.slot); } } } } // Fast path if there are no conflicts. if num_fixed_reg + num_fixed_stack <= 1 && !(requires_reg && num_fixed_stack != 0) { continue; } // We pick one constraint (in order: FixedReg, Reg, FixedStack) // and then rewrite any incompatible constraints to Any. // This allows register allocation to succeed and we will // later insert moves to satisfy the rewritten constraints. let source_slot = if requires_reg { first_reg_slot.unwrap() } else { first_stack_slot.unwrap() }; let mut first_preg = None; for u in uses.iter_mut() { if let OperandConstraint::FixedReg(preg) = u.operand.constraint() { let vreg_idx = VRegIndex::new(u.operand.vreg().vreg()); let preg_idx = PRegIndex::new(preg.index()); trace!( "at pos {:?}, vreg {:?} has fixed constraint to preg {:?}", u.pos, vreg_idx, preg_idx ); // FixedStack is incompatible if there are any // Reg/FixedReg constraints. FixedReg is // incompatible if there already is a different // FixedReg constraint. If either condition is true, // we edit the constraint below; otherwise, we can // skip this edit. if !(requires_reg && self.pregs[preg.index()].is_stack) && *first_preg.get_or_insert(preg) == preg { continue; } trace!(" -> duplicate; switching to constraint Any"); self.multi_fixed_reg_fixups.push(MultiFixedRegFixup { pos: u.pos, from_slot: source_slot, to_slot: u.slot, to_preg: preg_idx, vreg: vreg_idx, level: FixedRegFixupLevel::Secondary, }); u.operand = Operand::new( u.operand.vreg(), OperandConstraint::Any, u.operand.kind(), u.operand.pos(), ); trace!(" -> extra clobber {} at inst{}", preg, u.pos.inst().index()); extra_clobbers.push((preg, u.pos)); } } } for (clobber, pos) in extra_clobbers.drain(..) { let range = CodeRange { from: pos, to: pos.next(), }; self.add_liverange_to_preg(range, clobber); } } } } } regalloc2-0.10.2/src/ion/merge.rs000066400000000000000000000333671467034227200165100ustar00rootroot00000000000000/* * This file was initially derived from the files * `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was * originally licensed under the Mozilla Public License 2.0. We * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see * https://github.com/bytecodealliance/regalloc2/issues/7). * * Since the initial port, the design has been substantially evolved * and optimized. */ //! Bundle merging. use super::{Env, LiveBundleIndex, SpillSet, SpillSlotIndex, VRegIndex}; use crate::{ ion::data_structures::{BlockparamOut, CodeRange}, Function, Inst, OperandConstraint, OperandKind, PReg, ProgPoint, }; use alloc::format; use smallvec::smallvec; impl<'a, F: Function> Env<'a, F> { pub fn merge_bundles(&mut self, from: LiveBundleIndex, to: LiveBundleIndex) -> bool { if from == to { // Merge bundle into self -- trivial merge. return true; } trace!( "merging from bundle{} to bundle{}", from.index(), to.index() ); // Both bundles must deal with the same RegClass. let from_rc = self.spillsets[self.bundles[from].spillset].class; let to_rc = self.spillsets[self.bundles[to].spillset].class; if from_rc != to_rc { trace!(" -> mismatching reg classes"); return false; } // If either bundle is already assigned (due to a pinned vreg), don't merge. if self.bundles[from].allocation.is_some() || self.bundles[to].allocation.is_some() { trace!("one of the bundles is already assigned (pinned)"); return false; } #[cfg(debug_assertions)] { // Sanity check: both bundles should contain only ranges with appropriate VReg classes. for entry in &self.bundles[from].ranges { let vreg = self.ranges[entry.index].vreg; debug_assert_eq!(from_rc, self.vreg(vreg).class()); } for entry in &self.bundles[to].ranges { let vreg = self.ranges[entry.index].vreg; debug_assert_eq!(to_rc, self.vreg(vreg).class()); } } // If a bundle has a fixed-reg def then we need to be careful to not // extend the bundle to include another use in the same instruction. // This could result in a minimal bundle that is impossible to split. // // This can only happen with an early use and a late def, so we round // the start of each range containing a fixed def up to the start of // its instruction to detect overlaps. let adjust_range_start = |bundle_idx, range: CodeRange| { if self.bundles[bundle_idx].cached_fixed_def() { ProgPoint::before(range.from.inst()) } else { range.from } }; // Check for overlap in LiveRanges and for conflicting // requirements. let ranges_from = &self.bundles[from].ranges[..]; let ranges_to = &self.bundles[to].ranges[..]; let mut idx_from = 0; let mut idx_to = 0; let mut range_count = 0; while idx_from < ranges_from.len() && idx_to < ranges_to.len() { range_count += 1; if range_count > 200 { trace!( "reached merge complexity (range_count = {}); exiting", range_count ); // Limit merge complexity. return false; } if adjust_range_start(from, ranges_from[idx_from].range) >= ranges_to[idx_to].range.to { idx_to += 1; } else if adjust_range_start(to, ranges_to[idx_to].range) >= ranges_from[idx_from].range.to { idx_from += 1; } else { // Overlap -- cannot merge. trace!( " -> overlap between {:?} and {:?}, exiting", ranges_from[idx_from].index, ranges_to[idx_to].index ); return false; } } // Check for a requirements conflict. if self.bundles[from].cached_fixed() || self.bundles[to].cached_fixed() { if self.merge_bundle_requirements(from, to).is_err() { trace!(" -> conflicting requirements; aborting merge"); return false; } } trace!(" -> committing to merge"); // If we reach here, then the bundles do not overlap -- merge // them! We do this with a merge-sort-like scan over both // lists, building a new range list and replacing the list on // `to` when we're done. if ranges_from.is_empty() { // `from` bundle is empty -- trivial merge. trace!(" -> from bundle{} is empty; trivial merge", from.index()); return true; } if ranges_to.is_empty() { // `to` bundle is empty -- just move the list over from // `from` and set `bundle` up-link on all ranges. trace!(" -> to bundle{} is empty; trivial merge", to.index()); let list = core::mem::replace(&mut self.bundles[from].ranges, smallvec![]); for entry in &list { self.ranges[entry.index].bundle = to; if self.annotations_enabled { self.annotate( entry.range.from, format!( " MERGE range{} v{} from bundle{} to bundle{}", entry.index.index(), self.ranges[entry.index].vreg.index(), from.index(), to.index(), ), ); } } self.bundles[to].ranges = list; if self.bundles[from].cached_fixed() { self.bundles[to].set_cached_fixed(); } if self.bundles[from].cached_fixed_def() { self.bundles[to].set_cached_fixed_def(); } return true; } trace!( "merging: ranges_from = {:?} ranges_to = {:?}", ranges_from, ranges_to ); // Two non-empty lists of LiveRanges: concatenate and // sort. This is faster than a mergesort-like merge into a new // list, empirically. let from_list = core::mem::replace(&mut self.bundles[from].ranges, smallvec![]); for entry in &from_list { self.ranges[entry.index].bundle = to; } self.bundles[to].ranges.extend_from_slice(&from_list[..]); self.bundles[to] .ranges .sort_unstable_by_key(|entry| entry.range.from); if self.annotations_enabled { trace!("merging: merged = {:?}", self.bundles[to].ranges); let mut last_range = None; for i in 0..self.bundles[to].ranges.len() { let entry = self.bundles[to].ranges[i]; if last_range.is_some() { debug_assert!(last_range.unwrap() < entry.range); } last_range = Some(entry.range); if self.ranges[entry.index].bundle == from { self.annotate( entry.range.from, format!( " MERGE range{} v{} from bundle{} to bundle{}", entry.index.index(), self.ranges[entry.index].vreg.index(), from.index(), to.index(), ), ); } trace!( " -> merged result for bundle{}: range{}", to.index(), entry.index.index(), ); } } if self.bundles[from].spillset != self.bundles[to].spillset { // Widen the range for the target spillset to include the one being merged in. let from_range = self.spillsets[self.bundles[from].spillset].range; let to_range = &mut self.spillsets[self.bundles[to].spillset].range; *to_range = to_range.join(from_range); } if self.bundles[from].cached_fixed() { self.bundles[to].set_cached_fixed(); } if self.bundles[from].cached_fixed_def() { self.bundles[to].set_cached_fixed_def(); } true } pub fn merge_vreg_bundles(&mut self) { // Create a bundle for every vreg, initially. trace!("merge_vreg_bundles: creating vreg bundles"); for vreg in 0..self.vregs.len() { let vreg = VRegIndex::new(vreg); if self.vregs[vreg].ranges.is_empty() { continue; } let bundle = self.bundles.add(); let mut range = self.vregs[vreg].ranges.first().unwrap().range; self.bundles[bundle].ranges = self.vregs[vreg].ranges.clone(); trace!("vreg v{} gets bundle{}", vreg.index(), bundle.index()); for entry in &self.bundles[bundle].ranges { trace!( " -> with LR range{}: {:?}", entry.index.index(), entry.range ); range = range.join(entry.range); self.ranges[entry.index].bundle = bundle; } let mut fixed = false; let mut fixed_def = false; for entry in &self.bundles[bundle].ranges { for u in &self.ranges[entry.index].uses { if let OperandConstraint::FixedReg(_) = u.operand.constraint() { fixed = true; if u.operand.kind() == OperandKind::Def { fixed_def = true; } } if fixed && fixed_def { break; } } } if fixed { self.bundles[bundle].set_cached_fixed(); } if fixed_def { self.bundles[bundle].set_cached_fixed_def(); } // Create a spillslot for this bundle. let reg = self.vreg(vreg); let ssidx = self.spillsets.push(SpillSet { slot: SpillSlotIndex::invalid(), required: false, class: reg.class(), reg_hint: PReg::invalid(), spill_bundle: LiveBundleIndex::invalid(), splits: 0, range, }); self.bundles[bundle].spillset = ssidx; } for inst in 0..self.func.num_insts() { let inst = Inst::new(inst); // Attempt to merge Reuse-constraint operand outputs with the // corresponding inputs. for op in self.func.inst_operands(inst) { if let OperandConstraint::Reuse(reuse_idx) = op.constraint() { let src_vreg = op.vreg(); let dst_vreg = self.func.inst_operands(inst)[reuse_idx].vreg(); trace!( "trying to merge reused-input def: src {} to dst {}", src_vreg, dst_vreg ); let src_bundle = self.ranges[self.vregs[src_vreg].ranges[0].index].bundle; debug_assert!(src_bundle.is_valid()); let dest_bundle = self.ranges[self.vregs[dst_vreg].ranges[0].index].bundle; debug_assert!(dest_bundle.is_valid()); self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle); } } } // Attempt to merge blockparams with their inputs. for i in 0..self.blockparam_outs.len() { let BlockparamOut { from_vreg, to_vreg, .. } = self.blockparam_outs[i]; trace!( "trying to merge blockparam v{} with input v{}", to_vreg.index(), from_vreg.index() ); let to_bundle = self.ranges[self.vregs[to_vreg].ranges[0].index].bundle; debug_assert!(to_bundle.is_valid()); let from_bundle = self.ranges[self.vregs[from_vreg].ranges[0].index].bundle; debug_assert!(from_bundle.is_valid()); trace!( " -> from bundle{} to bundle{}", from_bundle.index(), to_bundle.index() ); self.merge_bundles(from_bundle, to_bundle); } trace!("done merging bundles"); } pub fn compute_bundle_prio(&self, bundle: LiveBundleIndex) -> u32 { // The priority is simply the total "length" -- the number of // instructions covered by all LiveRanges. let mut total = 0; for entry in &self.bundles[bundle].ranges { total += entry.range.len() as u32; } total } pub fn queue_bundles(&mut self) { for bundle in 0..self.bundles.len() { trace!("enqueueing bundle{}", bundle); let bundle = LiveBundleIndex::new(bundle); if self.bundles[bundle].ranges.is_empty() { trace!(" -> no ranges; skipping"); continue; } let prio = self.compute_bundle_prio(bundle); trace!(" -> prio {}", prio); self.bundles[bundle].prio = prio; self.recompute_bundle_properties(bundle); self.allocation_queue .insert(bundle, prio as usize, PReg::invalid()); } self.stats.merged_bundle_count = self.allocation_queue.heap.len(); } } regalloc2-0.10.2/src/ion/mod.rs000066400000000000000000000101351467034227200161540ustar00rootroot00000000000000/* * This file was initially derived from the files * `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was * originally licensed under the Mozilla Public License 2.0. We * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see * https://github.com/bytecodealliance/regalloc2/issues/7). * * Since the initial port, the design has been substantially evolved * and optimized. */ //! Backtracking register allocator. See doc/DESIGN.md for details of //! its design. use crate::cfg::CFGInfo; use crate::ssa::validate_ssa; use crate::{Function, MachineEnv, Output, PReg, RegAllocError, RegClass}; use alloc::vec; use alloc::vec::Vec; pub(crate) mod data_structures; pub use data_structures::Stats; use data_structures::*; pub(crate) mod reg_traversal; use reg_traversal::*; pub(crate) mod requirement; use requirement::*; pub(crate) mod redundant_moves; use redundant_moves::*; pub(crate) mod liveranges; use liveranges::*; pub(crate) mod merge; pub(crate) mod process; use process::*; use smallvec::smallvec; pub(crate) mod dump; pub(crate) mod moves; pub(crate) mod spill; impl<'a, F: Function> Env<'a, F> { pub(crate) fn new( func: &'a F, env: &'a MachineEnv, cfginfo: CFGInfo, annotations_enabled: bool, ) -> Self { let n = func.num_insts(); Self { func, env, cfginfo, liveins: Vec::with_capacity(func.num_blocks()), liveouts: Vec::with_capacity(func.num_blocks()), blockparam_outs: vec![], blockparam_ins: vec![], bundles: LiveBundles::with_capacity(n), ranges: LiveRanges::with_capacity(4 * n), spillsets: SpillSets::with_capacity(n), vregs: VRegs::with_capacity(n), pregs: vec![], allocation_queue: PrioQueue::new(), spilled_bundles: vec![], spillslots: vec![], slots_by_class: [ SpillSlotList::new(), SpillSlotList::new(), SpillSlotList::new(), ], allocated_bundle_count: 0, extra_spillslots_by_class: [smallvec![], smallvec![], smallvec![]], preferred_victim_by_class: [PReg::invalid(), PReg::invalid(), PReg::invalid()], multi_fixed_reg_fixups: vec![], allocs: Vec::with_capacity(4 * n), inst_alloc_offsets: vec![], num_spillslots: 0, debug_locations: vec![], stats: Stats::default(), debug_annotations: hashbrown::HashMap::new(), annotations_enabled, conflict_set: Default::default(), } } pub(crate) fn init(&mut self) -> Result<(), RegAllocError> { self.create_pregs_and_vregs(); self.compute_liveness()?; self.build_liveranges(); self.fixup_multi_fixed_vregs(); self.merge_vreg_bundles(); self.queue_bundles(); if trace_enabled!() { self.dump_state(); } Ok(()) } pub(crate) fn run(&mut self) -> Result { self.process_bundles()?; self.try_allocating_regs_for_spilled_bundles(); self.allocate_spillslots(); let moves = self.apply_allocations_and_insert_moves(); let edits = self.resolve_inserted_moves(moves); Ok(edits) } } pub fn run( func: &F, mach_env: &MachineEnv, enable_annotations: bool, enable_ssa_checker: bool, ) -> Result { let cfginfo = CFGInfo::new(func)?; if enable_ssa_checker { validate_ssa(func, &cfginfo)?; } let mut env = Env::new(func, mach_env, cfginfo, enable_annotations); env.init()?; let edits = env.run()?; if enable_annotations { env.dump_results(); } Ok(Output { edits: edits.into_edits().collect(), allocs: env.allocs, inst_alloc_offsets: env.inst_alloc_offsets, num_spillslots: env.num_spillslots as usize, debug_locations: env.debug_locations, stats: env.stats, }) } regalloc2-0.10.2/src/ion/moves.rs000066400000000000000000001263241467034227200165360ustar00rootroot00000000000000/* * This file was initially derived from the files * `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was * originally licensed under the Mozilla Public License 2.0. We * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see * https://github.com/bytecodealliance/regalloc2/issues/7). * * Since the initial port, the design has been substantially evolved * and optimized. */ //! Move resolution. use super::{ Env, InsertMovePrio, InsertedMove, InsertedMoves, LiveRangeFlag, LiveRangeIndex, RedundantMoveEliminator, VRegIndex, }; use crate::ion::data_structures::{ u64_key, BlockparamIn, BlockparamOut, CodeRange, Edits, FixedRegFixupLevel, LiveRangeKey, LiveRangeListEntry, }; use crate::ion::reg_traversal::RegTraversalIter; use crate::moves::{MoveAndScratchResolver, ParallelMoves}; use crate::{ Allocation, Block, Edit, Function, FxHashMap, Inst, InstPosition, OperandConstraint, OperandKind, OperandPos, PReg, ProgPoint, RegClass, SpillSlot, }; use alloc::vec::Vec; use alloc::{format, vec}; use hashbrown::hash_map::Entry; use smallvec::{smallvec, SmallVec}; impl<'a, F: Function> Env<'a, F> { pub fn is_start_of_block(&self, pos: ProgPoint) -> bool { let block = self.cfginfo.insn_block[pos.inst().index()]; pos == self.cfginfo.block_entry[block.index()] } pub fn is_end_of_block(&self, pos: ProgPoint) -> bool { let block = self.cfginfo.insn_block[pos.inst().index()]; pos == self.cfginfo.block_exit[block.index()] } pub fn get_alloc(&self, inst: Inst, slot: usize) -> Allocation { let inst_allocs = &self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; inst_allocs[slot] } pub fn set_alloc(&mut self, inst: Inst, slot: usize, alloc: Allocation) { let inst_allocs = &mut self.allocs[self.inst_alloc_offsets[inst.index()] as usize..]; inst_allocs[slot] = alloc; } pub fn get_alloc_for_range(&self, range: LiveRangeIndex) -> Allocation { trace!("get_alloc_for_range: {:?}", range); let bundle = self.ranges[range].bundle; trace!(" -> bundle: {:?}", bundle); let bundledata = &self.bundles[bundle]; trace!(" -> allocation {:?}", bundledata.allocation); if bundledata.allocation != Allocation::none() { bundledata.allocation } else { trace!(" -> spillset {:?}", bundledata.spillset); trace!( " -> spill slot {:?}", self.spillsets[bundledata.spillset].slot ); self.spillslots[self.spillsets[bundledata.spillset].slot.index()].alloc } } pub fn apply_allocations_and_insert_moves(&mut self) -> InsertedMoves { trace!("apply_allocations_and_insert_moves"); trace!("blockparam_ins: {:?}", self.blockparam_ins); trace!("blockparam_outs: {:?}", self.blockparam_outs); let mut inserted_moves = InsertedMoves::default(); // Now that all splits are done, we can pay the cost once to // sort VReg range lists and update with the final ranges. for vreg in &mut self.vregs { for entry in &mut vreg.ranges { entry.range = self.ranges[entry.index].range; } vreg.ranges.sort_unstable_by_key(|entry| entry.range.from); } /// Buffered information about the previous liverange that was processed. struct PrevBuffer { prev: Option, prev_ins_idx: usize, buffered: Option, buffered_ins_idx: usize, } impl PrevBuffer { fn new(prev_ins_idx: usize) -> Self { Self { prev: None, prev_ins_idx, buffered: None, buffered_ins_idx: prev_ins_idx, } } /// Returns the previous `LiveRangeListEntry` when it's present. #[inline(always)] fn is_valid(&self) -> Option { self.prev } /// Fetch the current index into the `Env::blockparam_ins` vector. #[inline(always)] fn blockparam_ins_idx(&self) -> usize { self.prev_ins_idx } /// Record this index as the next index to use when the previous liverange buffer /// anvances. #[inline(always)] fn update_blockparam_ins_idx(&mut self, idx: usize) { self.buffered_ins_idx = idx; } /// As overlapping liveranges might start at the same program point, we buffer the /// previous liverange used when determining where to take the last value from for /// intra-block moves. The liveranges we process are buffered until we encounter one /// that starts at a later program point, indicating that it's now safe to advance the /// previous LR buffer. We accumulate the longest-lived liverange in the buffer as a /// heuristic for finding the most stable source of a value. /// /// We also buffer the index into the `Env::blockparam_ins` vector, as we may see /// multiple uses of a blockparam within a single instruction, and as such may need to /// generate multiple blockparam move destinations by re-traversing that section of the /// vector. #[inline(always)] fn advance(&mut self, current: LiveRangeListEntry) { // Advance the `prev` pointer to the `next` pointer, as long as the `next` pointer // does not start at the same time as the current LR we're processing. if self .buffered .map(|entry| entry.range.from < current.range.from) .unwrap_or(false) { self.prev = self.buffered; self.prev_ins_idx = self.buffered_ins_idx; } // Advance the `next` pointer to the currently processed LR, as long as it ends // later than the current `next`. if self .buffered .map(|entry| entry.range.to < current.range.to) .unwrap_or(true) { self.buffered = Some(current); } } } // Determine the ProgPoint where moves on this (from, to) // edge should go: // - If there is more than one in-edge to `to`, then // `from` must have only one out-edge; moves go at tail of // `from` just before last Branch/Ret. // - Otherwise, there must be at most one in-edge to `to`, // and moves go at start of `to`. #[inline(always)] fn choose_move_location<'a, F: Function>( env: &Env<'a, F>, from: Block, to: Block, ) -> (ProgPoint, InsertMovePrio) { let from_last_insn = env.func.block_insns(from).last(); let to_first_insn = env.func.block_insns(to).first(); let from_is_ret = env.func.is_ret(from_last_insn); let to_is_entry = env.func.entry_block() == to; let from_outs = env.func.block_succs(from).len() + if from_is_ret { 1 } else { 0 }; let to_ins = env.func.block_preds(to).len() + if to_is_entry { 1 } else { 0 }; if to_ins > 1 && from_outs <= 1 { ( // N.B.: though semantically the edge moves happen // after the branch, we must insert them before // the branch because otherwise, of course, they // would never execute. This is correct even in // the presence of branches that read register // inputs (e.g. conditional branches on some RISCs // that branch on reg zero/not-zero, or any // indirect branch), but for a very subtle reason: // all cases of such branches will (or should) // have multiple successors, and thus due to // critical-edge splitting, their successors will // have only the single predecessor, and we prefer // to insert at the head of the successor in that // case (rather than here). We make this a // requirement, in fact: the user of this library // shall not read registers in a branch // instruction of there is only one successor per // the given CFG information. ProgPoint::before(from_last_insn), InsertMovePrio::OutEdgeMoves, ) } else if to_ins <= 1 { ( ProgPoint::before(to_first_insn), InsertMovePrio::InEdgeMoves, ) } else { panic!( "Critical edge: can't insert moves between blocks {:?} and {:?}", from, to ); } } #[derive(PartialEq)] struct InterBlockDest { to: Block, from: Block, alloc: Allocation, } impl InterBlockDest { fn key(&self) -> u64 { u64_key(self.from.raw_u32(), self.to.raw_u32()) } } let mut inter_block_sources: FxHashMap = FxHashMap::default(); let mut inter_block_dests = Vec::with_capacity(self.func.num_blocks()); #[derive(Hash, Eq, PartialEq)] struct BlockparamSourceKey { bits: u64, } impl BlockparamSourceKey { fn new(from_block: Block, to_vreg: VRegIndex) -> Self { BlockparamSourceKey { bits: u64_key(from_block.raw_u32(), to_vreg.raw_u32()), } } } struct BlockparamDest { from_block: Block, to_block: Block, to_vreg: VRegIndex, alloc: Allocation, } impl BlockparamDest { fn key(&self) -> u64 { u64_key(self.to_block.raw_u32(), self.from_block.raw_u32()) } fn source(&self) -> BlockparamSourceKey { BlockparamSourceKey::new(self.from_block, self.to_vreg) } } let mut block_param_sources = FxHashMap::::with_capacity_and_hasher( 3 * self.func.num_insts(), Default::default(), ); let mut block_param_dests = Vec::with_capacity(3 * self.func.num_insts()); let debug_labels = self.func.debug_value_labels(); let mut reuse_input_insts = Vec::with_capacity(self.func.num_insts() / 2); let mut blockparam_in_idx = 0; let mut blockparam_out_idx = 0; for vreg in 0..self.vregs.len() { let vreg = VRegIndex::new(vreg); if !self.is_vreg_used(vreg) { continue; } inter_block_sources.clear(); // For each range in each vreg, insert moves or // half-moves. We also scan over `blockparam_ins` and // `blockparam_outs`, which are sorted by (block, vreg), // to fill in allocations. let mut prev = PrevBuffer::new(blockparam_in_idx); for range_idx in 0..self.vregs[vreg].ranges.len() { let entry = self.vregs[vreg].ranges[range_idx]; let alloc = self.get_alloc_for_range(entry.index); let range = entry.range; trace!( "apply_allocations: vreg {:?} LR {:?} with range {:?} has alloc {:?}", vreg, entry.index, range, alloc, ); debug_assert!(alloc != Allocation::none()); if self.annotations_enabled { self.annotate( range.from, format!( " <<< start v{} in {} (range{}) (bundle{})", vreg.index(), alloc, entry.index.index(), self.ranges[entry.index].bundle.raw_u32(), ), ); self.annotate( range.to, format!( " end v{} in {} (range{}) (bundle{}) >>>", vreg.index(), alloc, entry.index.index(), self.ranges[entry.index].bundle.raw_u32(), ), ); } prev.advance(entry); // Does this range follow immediately after a prior // range in the same block? If so, insert a move (if // the allocs differ). We do this directly rather than // with half-moves because we eagerly know both sides // already (and also, half-moves are specific to // inter-block transfers). // // Note that we do *not* do this if there is also a // def as the first use in the new range: it's // possible that an old liverange covers the Before // pos of an inst, a new liverange covers the After // pos, and the def also happens at After. In this // case we don't want to an insert a move after the // instruction copying the old liverange. // // Note also that we assert that the new range has to // start at the Before-point of an instruction; we // can't insert a move that logically happens just // before After (i.e. in the middle of a single // instruction). if let Some(prev) = prev.is_valid() { let prev_alloc = self.get_alloc_for_range(prev.index); debug_assert!(prev_alloc != Allocation::none()); if prev.range.to >= range.from && (prev.range.to > range.from || !self.is_start_of_block(range.from)) && !self.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef) { trace!( "prev LR {} abuts LR {} in same block; moving {} -> {} for v{}", prev.index.index(), entry.index.index(), prev_alloc, alloc, vreg.index() ); debug_assert_eq!(range.from.pos(), InstPosition::Before); inserted_moves.push( range.from, InsertMovePrio::Regular, prev_alloc, alloc, self.vreg(vreg), ); } } // Scan over blocks whose ends are covered by this // range. For each, for each successor that is not // already in this range (hence guaranteed to have the // same allocation) and if the vreg is live, add a // Source half-move. let mut block = self.cfginfo.insn_block[range.from.inst().index()]; while block.is_valid() && block.index() < self.func.num_blocks() { if range.to < self.cfginfo.block_exit[block.index()].next() { break; } trace!("examining block with end in range: block{}", block.index()); match inter_block_sources.entry(block) { // If the entry is already present in the map, we'll try to prefer a // register allocation. Entry::Occupied(mut entry) => { if !entry.get().is_reg() { entry.insert(alloc); } } Entry::Vacant(entry) => { entry.insert(alloc); } } // Scan forward in `blockparam_outs`, adding all // half-moves for outgoing values to blockparams // in succs. trace!( "scanning blockparam_outs for v{} block{}: blockparam_out_idx = {}", vreg.index(), block.index(), blockparam_out_idx, ); while blockparam_out_idx < self.blockparam_outs.len() { let BlockparamOut { from_vreg, from_block, to_block, to_vreg, } = self.blockparam_outs[blockparam_out_idx]; if (from_vreg, from_block) > (vreg, block) { break; } if (from_vreg, from_block) == (vreg, block) { trace!( " -> found: from v{} block{} to v{} block{}", from_vreg.index(), from_block.index(), to_vreg.index(), to_vreg.index() ); let key = BlockparamSourceKey::new(from_block, to_vreg); match block_param_sources.entry(key) { // As with inter-block moves, if the entry is already present we'll // try to prefer a register allocation. Entry::Occupied(mut entry) => { if !entry.get().is_reg() { entry.insert(alloc); } } Entry::Vacant(entry) => { entry.insert(alloc); } } if self.annotations_enabled { self.annotate( self.cfginfo.block_exit[block.index()], format!( "blockparam-out: block{} to block{}: v{} to v{} in {}", from_block.index(), to_block.index(), from_vreg.index(), to_vreg.index(), alloc ), ); } } blockparam_out_idx += 1; } block = block.next(); } // Scan over blocks whose beginnings are covered by // this range and for which the vreg is live at the // start of the block. For each, for each predecessor, // add a Dest half-move. let mut block = self.cfginfo.insn_block[range.from.inst().index()]; if self.cfginfo.block_entry[block.index()] < range.from { block = block.next(); } while block.is_valid() && block.index() < self.func.num_blocks() { if self.cfginfo.block_entry[block.index()] >= range.to { break; } // Add half-moves for blockparam inputs. trace!( "scanning blockparam_ins at vreg {} block {}: blockparam_in_idx = {}", vreg.index(), block.index(), prev.prev_ins_idx, ); let mut idx = prev.blockparam_ins_idx(); while idx < self.blockparam_ins.len() { let BlockparamIn { from_block, to_block, to_vreg, } = self.blockparam_ins[idx]; if (to_vreg, to_block) > (vreg, block) { break; } if (to_vreg, to_block) == (vreg, block) { block_param_dests.push(BlockparamDest { from_block, to_block, to_vreg, alloc, }); trace!( "match: blockparam_in: v{} in block{} from block{} into {}", to_vreg.index(), to_block.index(), from_block.index(), alloc, ); #[cfg(debug_assertions)] if self.annotations_enabled { self.annotate( self.cfginfo.block_entry[block.index()], format!( "blockparam-in: block{} to block{}:into v{} in {}", from_block.index(), to_block.index(), to_vreg.index(), alloc ), ); } } idx += 1; } prev.update_blockparam_ins_idx(idx); if !self.is_live_in(block, vreg) { block = block.next(); continue; } trace!( "scanning preds at vreg {} block {} for ends outside the range", vreg.index(), block.index() ); // Now find any preds whose ends are not in the // same range, and insert appropriate moves. for &pred in self.func.block_preds(block) { trace!( "pred block {} has exit {:?}", pred.index(), self.cfginfo.block_exit[pred.index()] ); if range.contains_point(self.cfginfo.block_exit[pred.index()]) { continue; } inter_block_dests.push(InterBlockDest { from: pred, to: block, alloc, }) } block = block.next(); } // Scan over def/uses and apply allocations. for use_idx in 0..self.ranges[entry.index].uses.len() { let usedata = self.ranges[entry.index].uses[use_idx]; trace!("applying to use: {:?}", usedata); debug_assert!(range.contains_point(usedata.pos)); let inst = usedata.pos.inst(); let slot = usedata.slot; let operand = usedata.operand; self.set_alloc(inst, slot as usize, alloc); if let OperandConstraint::Reuse(_) = operand.constraint() { reuse_input_insts.push(inst); } } // Scan debug-labels on this vreg that overlap with // this range, producing a debug-info output record // giving the allocation location for each label. if !debug_labels.is_empty() { // Do a binary search to find the start of any // labels for this vreg. Recall that we require // debug-label requests to be sorted by vreg as a // precondition (which we verified above). let start = debug_labels .binary_search_by(|&(label_vreg, _label_from, _label_to, _label)| { // Search for the point just before the first // tuple that could be for `vreg` overlapping // with `range`. Never return // `Ordering::Equal`; `binary_search_by` in // this case returns the index of the first // entry that is greater as an `Err`. if label_vreg.vreg() < vreg.index() { core::cmp::Ordering::Less } else { core::cmp::Ordering::Greater } }) .unwrap_err(); for &(label_vreg, label_from, label_to, label) in &debug_labels[start..] { let label_from = ProgPoint::before(label_from); let label_to = ProgPoint::before(label_to); let label_range = CodeRange { from: label_from, to: label_to, }; if label_vreg.vreg() != vreg.index() { break; } if !range.overlaps(&label_range) { continue; } let from = core::cmp::max(label_from, range.from); let to = core::cmp::min(label_to, range.to); self.debug_locations.push((label, from, to, alloc)); } } } if !inter_block_dests.is_empty() { self.stats.halfmoves_count += inter_block_dests.len() * 2; inter_block_dests.sort_unstable_by_key(InterBlockDest::key); let vreg = self.vreg(vreg); trace!("processing inter-block moves for {}", vreg); for dest in inter_block_dests.drain(..) { let src = inter_block_sources[&dest.from]; trace!( " -> moving from {} to {} between {:?} and {:?}", src, dest.alloc, dest.from, dest.to ); let (pos, prio) = choose_move_location(self, dest.from, dest.to); inserted_moves.push(pos, prio, src, dest.alloc, vreg); } } blockparam_in_idx = prev.blockparam_ins_idx(); } if !block_param_dests.is_empty() { self.stats.halfmoves_count += block_param_sources.len(); self.stats.halfmoves_count += block_param_dests.len(); trace!("processing block-param moves"); for dest in block_param_dests { let src = dest.source(); let src_alloc = block_param_sources.get(&src).unwrap(); let (pos, prio) = choose_move_location(self, dest.from_block, dest.to_block); inserted_moves.push(pos, prio, *src_alloc, dest.alloc, self.vreg(dest.to_vreg)); } } // Handle multi-fixed-reg constraints by copying. for fixup in core::mem::replace(&mut self.multi_fixed_reg_fixups, vec![]) { let from_alloc = self.get_alloc(fixup.pos.inst(), fixup.from_slot as usize); let to_alloc = Allocation::reg(PReg::from_index(fixup.to_preg.index())); trace!( "multi-fixed-move constraint at {:?} from {} to {} for v{}", fixup.pos, from_alloc, to_alloc, fixup.vreg.index(), ); let prio = match fixup.level { FixedRegFixupLevel::Initial => InsertMovePrio::MultiFixedRegInitial, FixedRegFixupLevel::Secondary => InsertMovePrio::MultiFixedRegSecondary, }; inserted_moves.push(fixup.pos, prio, from_alloc, to_alloc, self.vreg(fixup.vreg)); self.set_alloc( fixup.pos.inst(), fixup.to_slot as usize, Allocation::reg(PReg::from_index(fixup.to_preg.index())), ); } // Handle outputs that reuse inputs: copy beforehand, then set // input's alloc to output's. // // Note that the output's allocation may not *actually* be // valid until InstPosition::After, but the reused input may // occur at InstPosition::Before. This may appear incorrect, // but we make it work by ensuring that all *other* inputs are // extended to InstPosition::After so that the def will not // interfere. (The liveness computation code does this -- we // do not require the user to do so.) // // One might ask: why not insist that input-reusing defs occur // at InstPosition::Before? this would be correct, but would // mean that the reused input and the reusing output // interfere, *guaranteeing* that every such case would // require a move. This is really bad on ISAs (like x86) where // reused inputs are ubiquitous. // // Another approach might be to put the def at Before, and // trim the reused input's liverange back to the previous // instruction's After. This is kind of OK until (i) a block // boundary occurs between the prior inst and this one, or // (ii) any moves/spills/reloads occur between the two // instructions. We really do need the input to be live at // this inst's Before. // // In principle what we really need is a "BeforeBefore" // program point, but we don't want to introduce that // everywhere and pay the cost of twice as many ProgPoints // throughout the allocator. // // Or we could introduce a separate move instruction -- this // is the approach that regalloc.rs takes with "mod" operands // -- but that is also costly. // // So we take this approach (invented by IonMonkey -- somewhat // hard to discern, though see [0] for a comment that makes // this slightly less unclear) to avoid interference between // the actual reused input and reusing output, ensure // interference (hence no incorrectness) between other inputs // and the reusing output, and not require a separate explicit // move instruction. // // [0] https://searchfox.org/mozilla-central/rev/3a798ef9252896fb389679f06dd3203169565af0/js/src/jit/shared/Lowering-shared-inl.h#108-110 for inst in reuse_input_insts { let mut input_reused: SmallVec<[usize; 4]> = smallvec![]; for output_idx in 0..self.func.inst_operands(inst).len() { let operand = self.func.inst_operands(inst)[output_idx]; if let OperandConstraint::Reuse(input_idx) = operand.constraint() { debug_assert!(!input_reused.contains(&input_idx)); debug_assert_eq!(operand.pos(), OperandPos::Late); input_reused.push(input_idx); let input_alloc = self.get_alloc(inst, input_idx); let output_alloc = self.get_alloc(inst, output_idx); trace!( "reuse-input inst {:?}: output {} has alloc {:?}, input {} has alloc {:?}", inst, output_idx, output_alloc, input_idx, input_alloc ); if input_alloc != output_alloc { #[cfg(debug_assertions)] if self.annotations_enabled { self.annotate( ProgPoint::before(inst), format!(" reuse-input-copy: {} -> {}", input_alloc, output_alloc), ); } let input_operand = self.func.inst_operands(inst)[input_idx]; inserted_moves.push( ProgPoint::before(inst), InsertMovePrio::ReusedInput, input_alloc, output_alloc, input_operand.vreg(), ); self.set_alloc(inst, input_idx, output_alloc); } } } } // Sort the debug-locations vector; we provide this // invariant to the client. self.debug_locations.sort_unstable(); inserted_moves } pub fn resolve_inserted_moves(&mut self, mut inserted_moves: InsertedMoves) -> Edits { // For each program point, gather all moves together. Then // resolve (see cases below). let mut i = 0; inserted_moves .moves .sort_unstable_by_key(|m| m.pos_prio.key()); // Redundant-move elimination state tracker. let mut redundant_moves = RedundantMoveEliminator::default(); fn redundant_move_process_side_effects<'a, F: Function>( this: &Env<'a, F>, redundant_moves: &mut RedundantMoveEliminator, from: ProgPoint, to: ProgPoint, ) { // If we cross a block boundary, clear and return. if this.cfginfo.insn_block[from.inst().index()] != this.cfginfo.insn_block[to.inst().index()] { redundant_moves.clear(); return; } let start_inst = if from.pos() == InstPosition::Before { from.inst() } else { from.inst().next() }; let end_inst = if to.pos() == InstPosition::Before { to.inst() } else { to.inst().next() }; for inst in start_inst.index()..end_inst.index() { let inst = Inst::new(inst); for (i, op) in this.func.inst_operands(inst).iter().enumerate() { match op.kind() { OperandKind::Def => { let alloc = this.get_alloc(inst, i); redundant_moves.clear_alloc(alloc); } _ => {} } } for reg in this.func.inst_clobbers(inst) { redundant_moves.clear_alloc(Allocation::reg(reg)); } // The dedicated scratch registers may be clobbered by any // instruction. for reg in this.env.scratch_by_class { if let Some(reg) = reg { redundant_moves.clear_alloc(Allocation::reg(reg)); } } } } let mut last_pos = ProgPoint::before(Inst::new(0)); let mut edits = Edits::with_capacity(self.func.num_insts()); while i < inserted_moves.moves.len() { let start = i; let pos_prio = inserted_moves.moves[i].pos_prio; while i < inserted_moves.moves.len() && inserted_moves.moves[i].pos_prio == pos_prio { i += 1; } let moves = &inserted_moves.moves[start..i]; redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos); last_pos = pos_prio.pos; // Gather all the moves in each RegClass separately. // These cannot interact, so it is safe to have separate // ParallelMove instances. They need to be separate because // moves between the classes are impossible. (We could // enhance ParallelMoves to understand register classes, but // this seems simpler.) let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; let mut vec_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; for m in moves { match m.to_vreg.class() { RegClass::Int => { int_moves.push(m.clone()); } RegClass::Float => { float_moves.push(m.clone()); } RegClass::Vector => { vec_moves.push(m.clone()); } } } for &(regclass, moves) in &[ (RegClass::Int, &int_moves), (RegClass::Float, &float_moves), (RegClass::Vector, &vec_moves), ] { // All moves in `moves` semantically happen in // parallel. Let's resolve these to a sequence of moves // that can be done one at a time. let mut parallel_moves = ParallelMoves::new(); trace!( "parallel moves at pos {:?} prio {:?}", pos_prio.pos, pos_prio.prio ); for m in moves { trace!(" {} -> {}", m.from_alloc, m.to_alloc); parallel_moves.add(m.from_alloc, m.to_alloc, Some(m.to_vreg)); } let resolved = parallel_moves.resolve(); let mut scratch_iter = RegTraversalIter::new( self.env, regclass, PReg::invalid(), PReg::invalid(), 0, None, ); let mut dedicated_scratch = self.env.scratch_by_class[regclass as usize]; let key = LiveRangeKey::from_range(&CodeRange { from: pos_prio.pos, to: pos_prio.pos.next(), }); let find_free_reg = || { // Use the dedicated scratch register first if it is // available. if let Some(reg) = dedicated_scratch.take() { return Some(Allocation::reg(reg)); } while let Some(preg) = scratch_iter.next() { if !self.pregs[preg.index()] .allocations .btree .contains_key(&key) { let alloc = Allocation::reg(preg); if moves .iter() .any(|m| m.from_alloc == alloc || m.to_alloc == alloc) { // Skip pregs used by moves in this // parallel move set, even if not // marked used at progpoint: edge move // liveranges meet but don't overlap // so otherwise we may incorrectly // overwrite a source reg. continue; } return Some(alloc); } } None }; let mut stackslot_idx = 0; let get_stackslot = || { let idx = stackslot_idx; stackslot_idx += 1; // We can't borrow `self` as mutable, so we create // these placeholders then allocate the actual // slots if needed with `self.allocate_spillslot` // below. Allocation::stack(SpillSlot::new(SpillSlot::MAX - idx)) }; let is_stack_alloc = |alloc: Allocation| { if let Some(preg) = alloc.as_reg() { self.pregs[preg.index()].is_stack } else { alloc.is_stack() } }; let preferred_victim = self.preferred_victim_by_class[regclass as usize]; let scratch_resolver = MoveAndScratchResolver { find_free_reg, get_stackslot, is_stack_alloc, borrowed_scratch_reg: preferred_victim, }; let resolved = scratch_resolver.compute(resolved); let mut rewrites = FxHashMap::default(); for i in 0..stackslot_idx { if i >= self.extra_spillslots_by_class[regclass as usize].len() { let slot = self.allocate_spillslot(self.func.spillslot_size(regclass) as u32); self.extra_spillslots_by_class[regclass as usize].push(slot); } rewrites.insert( Allocation::stack(SpillSlot::new(SpillSlot::MAX - i)), self.extra_spillslots_by_class[regclass as usize][i], ); } for (src, dst, to_vreg) in resolved { let src = rewrites.get(&src).cloned().unwrap_or(src); let dst = rewrites.get(&dst).cloned().unwrap_or(dst); trace!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); let action = redundant_moves.process_move(src, dst, to_vreg); if !action.elide { edits.add(pos_prio, src, dst); } else { trace!(" -> redundant move elided"); } } } } // Ensure edits are in sorted ProgPoint order. N.B.: this must // be a stable sort! We have to keep the order produced by the // parallel-move resolver for all moves within a single sort // key. edits.sort(); self.stats.edits_count = edits.len(); // Add debug annotations. if self.annotations_enabled { for &(pos_prio, ref edit) in edits.iter() { match edit { &Edit::Move { from, to } => { self.annotate(pos_prio.pos, format!("move {} -> {}", from, to)); } } } } edits } } regalloc2-0.10.2/src/ion/process.rs000066400000000000000000001570171467034227200170660ustar00rootroot00000000000000/* * This file was initially derived from the files * `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was * originally licensed under the Mozilla Public License 2.0. We * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see * https://github.com/bytecodealliance/regalloc2/issues/7). * * Since the initial port, the design has been substantially evolved * and optimized. */ //! Main allocation loop that processes bundles. use super::{ spill_weight_from_constraint, Env, LiveBundleIndex, LiveBundleVec, LiveRangeFlag, LiveRangeIndex, LiveRangeKey, LiveRangeList, LiveRangeListEntry, PRegIndex, RegTraversalIter, Requirement, SpillWeight, UseList, VRegIndex, }; use crate::{ ion::data_structures::{ CodeRange, BUNDLE_MAX_NORMAL_SPILL_WEIGHT, MAX_SPLITS_PER_SPILLSET, MINIMAL_BUNDLE_SPILL_WEIGHT, MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT, }, Allocation, Function, FxHashSet, Inst, InstPosition, OperandConstraint, OperandKind, PReg, ProgPoint, RegAllocError, }; use core::fmt::Debug; use smallvec::{smallvec, SmallVec}; #[derive(Clone, Debug, PartialEq, Eq)] pub enum AllocRegResult { Allocated(Allocation), Conflict(LiveBundleVec, ProgPoint), ConflictWithFixed(u32, ProgPoint), ConflictHighCost, } impl<'a, F: Function> Env<'a, F> { pub fn process_bundles(&mut self) -> Result<(), RegAllocError> { while let Some((bundle, reg_hint)) = self.allocation_queue.pop() { self.stats.process_bundle_count += 1; self.process_bundle(bundle, reg_hint)?; } self.stats.final_liverange_count = self.ranges.len(); self.stats.final_bundle_count = self.bundles.len(); self.stats.spill_bundle_count = self.spilled_bundles.len(); Ok(()) } pub fn try_to_allocate_bundle_to_reg( &mut self, bundle: LiveBundleIndex, reg: PRegIndex, // if the max bundle weight in the conflict set exceeds this // cost (if provided), just return // `AllocRegResult::ConflictHighCost`. max_allowable_cost: Option, ) -> AllocRegResult { trace!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg); let mut conflicts = smallvec![]; self.conflict_set.clear(); let mut max_conflict_weight = 0; // Traverse the BTreeMap in order by requesting the whole // range spanned by the bundle and iterating over that // concurrently with our ranges. Because our ranges are in // order, and the BTreeMap is as well, this allows us to have // an overall O(n log n) + O(b) complexity, where the PReg has // n current ranges and the bundle has b ranges, rather than // O(b * n log n) with the simple probe-for-each-bundle-range // approach. // // Note that the comparator function on a CodeRange tests for // *overlap*, so we are checking whether the BTree contains // any preg range that *overlaps* with range `range`, not // literally the range `range`. let bundle_ranges = &self.bundles[bundle].ranges; let from_key = LiveRangeKey::from_range(&CodeRange { from: bundle_ranges.first().unwrap().range.from, to: bundle_ranges.first().unwrap().range.from, }); let mut preg_range_iter = self.pregs[reg.index()] .allocations .btree .range(from_key..) .peekable(); trace!( "alloc map for {:?} in range {:?}..: {:?}", reg, from_key, self.pregs[reg.index()].allocations.btree ); let mut first_conflict: Option = None; 'ranges: for entry in bundle_ranges { trace!(" -> range LR {:?}: {:?}", entry.index, entry.range); let key = LiveRangeKey::from_range(&entry.range); let mut skips = 0; 'alloc: loop { trace!(" -> PReg range {:?}", preg_range_iter.peek()); // Advance our BTree traversal until it is >= this bundle // range (i.e., skip PReg allocations in the BTree that // are completely before this bundle range). if preg_range_iter.peek().is_some() && *preg_range_iter.peek().unwrap().0 < key { trace!( "Skipping PReg range {:?}", preg_range_iter.peek().unwrap().0 ); preg_range_iter.next(); skips += 1; if skips >= 16 { let from_pos = entry.range.from; let from_key = LiveRangeKey::from_range(&CodeRange { from: from_pos, to: from_pos, }); preg_range_iter = self.pregs[reg.index()] .allocations .btree .range(from_key..) .peekable(); skips = 0; } continue 'alloc; } skips = 0; // If there are no more PReg allocations, we're done! if preg_range_iter.peek().is_none() { trace!(" -> no more PReg allocations; so no conflict possible!"); break 'ranges; } // If the current PReg range is beyond this range, there is no conflict; continue. if *preg_range_iter.peek().unwrap().0 > key { trace!( " -> next PReg allocation is at {:?}; moving to next VReg range", preg_range_iter.peek().unwrap().0 ); break 'alloc; } // Otherwise, there is a conflict. let preg_key = *preg_range_iter.peek().unwrap().0; debug_assert_eq!(preg_key, key); // Assert that this range overlaps. let preg_range = preg_range_iter.next().unwrap().1; trace!(" -> btree contains range {:?} that overlaps", preg_range); if preg_range.is_valid() { trace!(" -> from vreg {:?}", self.ranges[*preg_range].vreg); // range from an allocated bundle: find the bundle and add to // conflicts list. let conflict_bundle = self.ranges[*preg_range].bundle; trace!(" -> conflict bundle {:?}", conflict_bundle); if self.conflict_set.insert(conflict_bundle) { conflicts.push(conflict_bundle); max_conflict_weight = core::cmp::max( max_conflict_weight, self.bundles[conflict_bundle].cached_spill_weight(), ); if max_allowable_cost.is_some() && max_conflict_weight > max_allowable_cost.unwrap() { trace!(" -> reached high cost, retrying early"); return AllocRegResult::ConflictHighCost; } } if first_conflict.is_none() { first_conflict = Some(ProgPoint::from_index(core::cmp::max( preg_key.from, key.from, ))); } } else { trace!(" -> conflict with fixed reservation"); // range from a direct use of the PReg (due to clobber). return AllocRegResult::ConflictWithFixed( max_conflict_weight, ProgPoint::from_index(preg_key.from), ); } } } if conflicts.len() > 0 { return AllocRegResult::Conflict(conflicts, first_conflict.unwrap()); } // We can allocate! Add our ranges to the preg's BTree. let preg = PReg::from_index(reg.index()); trace!(" -> bundle {:?} assigned to preg {:?}", bundle, preg); self.bundles[bundle].allocation = Allocation::reg(preg); for entry in &self.bundles[bundle].ranges { let key = LiveRangeKey::from_range(&entry.range); let res = self.pregs[reg.index()] .allocations .btree .insert(key, entry.index); // We disallow LR overlap within bundles, so this should never be possible. debug_assert!(res.is_none()); } AllocRegResult::Allocated(Allocation::reg(preg)) } pub fn evict_bundle(&mut self, bundle: LiveBundleIndex) { trace!( "evicting bundle {:?}: alloc {:?}", bundle, self.bundles[bundle].allocation ); let preg = match self.bundles[bundle].allocation.as_reg() { Some(preg) => preg, None => { trace!( " -> has no allocation! {:?}", self.bundles[bundle].allocation ); return; } }; let preg_idx = PRegIndex::new(preg.index()); self.bundles[bundle].allocation = Allocation::none(); for entry in &self.bundles[bundle].ranges { trace!(" -> removing LR {:?} from reg {:?}", entry.index, preg_idx); self.pregs[preg_idx.index()] .allocations .btree .remove(&LiveRangeKey::from_range(&entry.range)); } let prio = self.bundles[bundle].prio; trace!(" -> prio {}; back into queue", prio); self.allocation_queue .insert(bundle, prio as usize, PReg::invalid()); } pub fn bundle_spill_weight(&self, bundle: LiveBundleIndex) -> u32 { self.bundles[bundle].cached_spill_weight() } pub fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { trace!("maximum_spill_weight_in_bundle_set: {:?}", bundles); let m = bundles .iter() .map(|&b| { let w = self.bundles[b].cached_spill_weight(); trace!("bundle{}: {}", b.index(), w); w }) .max() .unwrap_or(0); trace!(" -> max: {}", m); m } pub fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { trace!("recompute bundle properties: bundle {:?}", bundle); let minimal; let mut fixed = false; let mut fixed_def = false; let bundledata = &self.bundles[bundle]; let first_range = bundledata.ranges[0].index; let first_range_data = &self.ranges[first_range]; self.bundles[bundle].prio = self.compute_bundle_prio(bundle); if first_range_data.vreg.is_invalid() { trace!(" -> no vreg; minimal and fixed"); minimal = true; fixed = true; } else { for u in &first_range_data.uses { trace!(" -> use: {:?}", u); if let OperandConstraint::FixedReg(_) = u.operand.constraint() { trace!(" -> fixed operand at {:?}: {:?}", u.pos, u.operand); fixed = true; if u.operand.kind() == OperandKind::Def { trace!(" -> is fixed def"); fixed_def = true; } break; } } // Minimal if the range covers only one instruction. Note // that it could cover just one ProgPoint, // i.e. X.Before..X.After, or two ProgPoints, // i.e. X.Before..X+1.Before. trace!(" -> first range has range {:?}", first_range_data.range); let bundle_start = self.bundles[bundle].ranges.first().unwrap().range.from; let bundle_end = self.bundles[bundle].ranges.last().unwrap().range.to; minimal = bundle_start.inst() == bundle_end.prev().inst(); trace!(" -> minimal: {}", minimal); } let spill_weight = if minimal { if fixed { trace!(" -> fixed and minimal"); MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT } else { trace!(" -> non-fixed and minimal"); MINIMAL_BUNDLE_SPILL_WEIGHT } } else { let mut total = SpillWeight::zero(); for entry in &self.bundles[bundle].ranges { let range_data = &self.ranges[entry.index]; trace!( " -> uses spill weight: +{:?}", range_data.uses_spill_weight() ); total = total + range_data.uses_spill_weight(); } if self.bundles[bundle].prio > 0 { let final_weight = (total.to_f32() as u32) / self.bundles[bundle].prio; trace!( " -> dividing by prio {}; final weight {}", self.bundles[bundle].prio, final_weight ); core::cmp::min(BUNDLE_MAX_NORMAL_SPILL_WEIGHT, final_weight) } else { 0 } }; self.bundles[bundle].set_cached_spill_weight_and_props( spill_weight, minimal, fixed, fixed_def, ); } pub fn minimal_bundle(&self, bundle: LiveBundleIndex) -> bool { self.bundles[bundle].cached_minimal() } pub fn recompute_range_properties(&mut self, range: LiveRangeIndex) { let rangedata = &mut self.ranges[range]; let mut w = SpillWeight::zero(); for u in &rangedata.uses { w = w + SpillWeight::from_bits(u.weight); trace!("range{}: use {:?}", range.index(), u); } rangedata.set_uses_spill_weight(w); if rangedata.uses.len() > 0 && rangedata.uses[0].operand.kind() == OperandKind::Def { // Note that we *set* the flag here, but we never *clear* // it: it may be set by a progmove as well (which does not // create an explicit use or def), and we want to preserve // that. We will never split or trim ranges in a way that // removes a def at the front and requires the flag to be // cleared. rangedata.set_flag(LiveRangeFlag::StartsAtDef); } } pub fn get_or_create_spill_bundle( &mut self, bundle: LiveBundleIndex, create_if_absent: bool, ) -> Option { let ssidx = self.bundles[bundle].spillset; let idx = self.spillsets[ssidx].spill_bundle; if idx.is_valid() { Some(idx) } else if create_if_absent { let idx = self.bundles.add(); self.spillsets[ssidx].spill_bundle = idx; self.bundles[idx].spillset = ssidx; self.spilled_bundles.push(idx); Some(idx) } else { None } } pub fn split_and_requeue_bundle( &mut self, bundle: LiveBundleIndex, mut split_at: ProgPoint, reg_hint: PReg, // Do we trim the parts around the split and put them in the // spill bundle? mut trim_ends_into_spill_bundle: bool, ) { self.stats.splits += 1; trace!( "split bundle {:?} at {:?} and requeue with reg hint (for first part) {:?}", bundle, split_at, reg_hint, ); // Split `bundle` at `split_at`, creating new LiveRanges and // bundles (and updating vregs' linked lists appropriately), // and enqueue the new bundles. let spillset = self.bundles[bundle].spillset; // Have we reached the maximum split count? If so, fall back // to a "minimal bundles and spill bundle" setup for this // bundle. See the doc-comment on // `split_into_minimal_bundles()` above for more. if self.spillsets[spillset].splits >= MAX_SPLITS_PER_SPILLSET { self.split_into_minimal_bundles(bundle, reg_hint); return; } self.spillsets[spillset].splits += 1; debug_assert!(!self.bundles[bundle].ranges.is_empty()); // Split point *at* start is OK; this means we peel off // exactly one use to create a minimal bundle. let bundle_start = self.bundles[bundle].ranges.first().unwrap().range.from; debug_assert!(split_at >= bundle_start); let bundle_end = self.bundles[bundle].ranges.last().unwrap().range.to; debug_assert!(split_at < bundle_end); // Is the split point *at* the start? If so, peel off the // first use: set the split point just after it, or just // before it if it comes after the start of the bundle. if split_at == bundle_start { // Find any uses; if none, just chop off one instruction. let mut first_use = None; 'outer: for entry in &self.bundles[bundle].ranges { for u in &self.ranges[entry.index].uses { first_use = Some(u.pos); break 'outer; } } trace!(" -> first use loc is {:?}", first_use); split_at = match first_use { Some(pos) => { if pos.inst() == bundle_start.inst() { ProgPoint::before(pos.inst().next()) } else { ProgPoint::before(pos.inst()) } } None => ProgPoint::before( self.bundles[bundle] .ranges .first() .unwrap() .range .from .inst() .next(), ), }; trace!( "split point is at bundle start; advancing to {:?}", split_at ); } else { // Don't split in the middle of an instruction -- this could // create impossible moves (we cannot insert a move between an // instruction's uses and defs). if split_at.pos() == InstPosition::After { split_at = split_at.next(); } if split_at >= bundle_end { split_at = split_at.prev().prev(); } } debug_assert!(split_at > bundle_start && split_at < bundle_end); // We need to find which LRs fall on each side of the split, // which LR we need to split down the middle, then update the // current bundle, create a new one, and (re)-queue both. trace!(" -> LRs: {:?}", self.bundles[bundle].ranges); let mut last_lr_in_old_bundle_idx = 0; // last LR-list index in old bundle let mut first_lr_in_new_bundle_idx = 0; // first LR-list index in new bundle for (i, entry) in self.bundles[bundle].ranges.iter().enumerate() { if split_at > entry.range.from { last_lr_in_old_bundle_idx = i; first_lr_in_new_bundle_idx = i; } if split_at < entry.range.to { first_lr_in_new_bundle_idx = i; // When the bundle contains a fixed constraint, we advance the split point to right // before the first instruction with a fixed use present. if self.bundles[bundle].cached_fixed() { for u in &self.ranges[entry.index].uses { if u.pos < split_at { continue; } if matches!(u.operand.constraint(), OperandConstraint::FixedReg { .. }) { split_at = ProgPoint::before(u.pos.inst()); if split_at > entry.range.from { last_lr_in_old_bundle_idx = i; } trace!(" -> advancing split point to {split_at:?}"); trim_ends_into_spill_bundle = false; break; } } } break; } } trace!( " -> last LR in old bundle: LR {:?}", self.bundles[bundle].ranges[last_lr_in_old_bundle_idx] ); trace!( " -> first LR in new bundle: LR {:?}", self.bundles[bundle].ranges[first_lr_in_new_bundle_idx] ); // Take the sublist of LRs that will go in the new bundle. let mut new_lr_list: LiveRangeList = self.bundles[bundle] .ranges .iter() .cloned() .skip(first_lr_in_new_bundle_idx) .collect(); self.bundles[bundle] .ranges .truncate(last_lr_in_old_bundle_idx + 1); self.bundles[bundle].ranges.shrink_to_fit(); // If the first entry in `new_lr_list` is a LR that is split // down the middle, replace it with a new LR and chop off the // end of the same LR in the original list. if split_at > new_lr_list[0].range.from { debug_assert_eq!(last_lr_in_old_bundle_idx, first_lr_in_new_bundle_idx); let orig_lr = new_lr_list[0].index; let new_lr = self.ranges.add(CodeRange { from: split_at, to: new_lr_list[0].range.to, }); self.ranges[new_lr].vreg = self.ranges[orig_lr].vreg; trace!(" -> splitting LR {:?} into {:?}", orig_lr, new_lr); let first_use = self.ranges[orig_lr] .uses .iter() .position(|u| u.pos >= split_at) .unwrap_or(self.ranges[orig_lr].uses.len()); let rest_uses: UseList = self.ranges[orig_lr] .uses .iter() .cloned() .skip(first_use) .collect(); self.ranges[new_lr].uses = rest_uses; self.ranges[orig_lr].uses.truncate(first_use); self.ranges[orig_lr].uses.shrink_to_fit(); self.recompute_range_properties(orig_lr); self.recompute_range_properties(new_lr); new_lr_list[0].index = new_lr; new_lr_list[0].range = self.ranges[new_lr].range; self.ranges[orig_lr].range.to = split_at; self.bundles[bundle].ranges[last_lr_in_old_bundle_idx].range = self.ranges[orig_lr].range; // Perform a lazy split in the VReg data. We just // append the new LR and its range; we will sort by // start of range, and fix up range ends, once when we // iterate over the VReg's ranges after allocation // completes (this is the only time when order // matters). self.vregs[self.ranges[new_lr].vreg] .ranges .push(LiveRangeListEntry { range: self.ranges[new_lr].range, index: new_lr, }); } let new_bundle = self.bundles.add(); trace!(" -> creating new bundle {:?}", new_bundle); self.bundles[new_bundle].spillset = spillset; for entry in &new_lr_list { self.ranges[entry.index].bundle = new_bundle; } self.bundles[new_bundle].ranges = new_lr_list; if trim_ends_into_spill_bundle { // Finally, handle moving LRs to the spill bundle when // appropriate: If the first range in `new_bundle` or last // range in `bundle` has "empty space" beyond the first or // last use (respectively), trim it and put an empty LR into // the spill bundle. (We are careful to treat the "starts at // def" flag as an implicit first def even if no def-type Use // is present.) while let Some(entry) = self.bundles[bundle].ranges.last().cloned() { let end = entry.range.to; let vreg = self.ranges[entry.index].vreg; let last_use = self.ranges[entry.index].uses.last().map(|u| u.pos); if last_use.is_none() { let spill = self .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) .unwrap(); trace!( " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", bundle, entry.index, spill ); self.bundles[spill].ranges.push(entry); self.bundles[bundle].ranges.pop(); self.ranges[entry.index].bundle = spill; continue; } let last_use = last_use.unwrap(); let split = ProgPoint::before(last_use.inst().next()); if split < end { let spill = self .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) .unwrap(); self.bundles[bundle].ranges.last_mut().unwrap().range.to = split; self.ranges[self.bundles[bundle].ranges.last().unwrap().index] .range .to = split; let range = CodeRange { from: split, to: end, }; let empty_lr = self.ranges.add(range); self.bundles[spill].ranges.push(LiveRangeListEntry { range, index: empty_lr, }); self.ranges[empty_lr].bundle = spill; self.vregs[vreg].ranges.push(LiveRangeListEntry { range, index: empty_lr, }); trace!( " -> bundle {:?} range {:?}: last use implies split point {:?}", bundle, entry.index, split ); trace!( " -> moving trailing empty region to new spill bundle {:?} with new LR {:?}", spill, empty_lr ); } break; } while let Some(entry) = self.bundles[new_bundle].ranges.first().cloned() { if self.ranges[entry.index].has_flag(LiveRangeFlag::StartsAtDef) { break; } let start = entry.range.from; let vreg = self.ranges[entry.index].vreg; let first_use = self.ranges[entry.index].uses.first().map(|u| u.pos); if first_use.is_none() { let spill = self .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) .unwrap(); trace!( " -> bundle {:?} range {:?}: no uses; moving to spill bundle {:?}", new_bundle, entry.index, spill ); self.bundles[spill].ranges.push(entry); self.bundles[new_bundle].ranges.drain(..1); self.ranges[entry.index].bundle = spill; continue; } let first_use = first_use.unwrap(); let split = ProgPoint::before(first_use.inst()); if split > start { let spill = self .get_or_create_spill_bundle(new_bundle, /* create_if_absent = */ true) .unwrap(); self.bundles[new_bundle] .ranges .first_mut() .unwrap() .range .from = split; self.ranges[self.bundles[new_bundle].ranges.first().unwrap().index] .range .from = split; let range = CodeRange { from: start, to: split, }; let empty_lr = self.ranges.add(range); self.bundles[spill].ranges.push(LiveRangeListEntry { range, index: empty_lr, }); self.ranges[empty_lr].bundle = spill; self.vregs[vreg].ranges.push(LiveRangeListEntry { range, index: empty_lr, }); trace!( " -> bundle {:?} range {:?}: first use implies split point {:?}", bundle, entry.index, first_use, ); trace!( " -> moving leading empty region to new spill bundle {:?} with new LR {:?}", spill, empty_lr ); } break; } } if self.bundles[bundle].ranges.len() > 0 { self.recompute_bundle_properties(bundle); let prio = self.bundles[bundle].prio; self.allocation_queue .insert(bundle, prio as usize, reg_hint); } if self.bundles[new_bundle].ranges.len() > 0 { self.recompute_bundle_properties(new_bundle); let prio = self.bundles[new_bundle].prio; self.allocation_queue .insert(new_bundle, prio as usize, reg_hint); } } /// Splits the given bundle into minimal bundles per Use, falling /// back onto the spill bundle. This must work for any bundle no /// matter how many conflicts. /// /// This is meant to solve a quadratic-cost problem that exists /// with "normal" splitting as implemented above. With that /// procedure, , splitting a bundle produces two /// halves. Furthermore, it has cost linear in the length of the /// bundle, because the resulting half-bundles have their /// requirements recomputed with a new scan, and because we copy /// half the use-list over to the tail end sub-bundle. /// /// This works fine when a bundle has a handful of splits overall, /// but not when an input has a systematic pattern of conflicts /// that will require O(|bundle|) splits (e.g., every Use is /// constrained to a different fixed register than the last /// one). In such a case, we get quadratic behavior. /// /// This method implements a direct split into minimal bundles /// along the whole length of the bundle, putting the regions /// without uses in the spill bundle. We do this once the number /// of splits in an original bundle (tracked by spillset) reaches /// a pre-determined limit. /// /// This basically approximates what a non-splitting allocator /// would do: it "spills" the whole bundle to possibly a /// stackslot, or a second-chance register allocation at best, via /// the spill bundle; and then does minimal reservations of /// registers just at uses/defs and moves the "spilled" value /// into/out of them immediately. pub fn split_into_minimal_bundles(&mut self, bundle: LiveBundleIndex, reg_hint: PReg) { let mut removed_lrs: FxHashSet = FxHashSet::default(); let mut removed_lrs_vregs: FxHashSet = FxHashSet::default(); let mut new_lrs: SmallVec<[(VRegIndex, LiveRangeIndex); 16]> = smallvec![]; let mut new_bundles: SmallVec<[LiveBundleIndex; 16]> = smallvec![]; let spillset = self.bundles[bundle].spillset; let spill = self .get_or_create_spill_bundle(bundle, /* create_if_absent = */ true) .unwrap(); trace!( "Splitting bundle {:?} into minimal bundles with reg hint {}", bundle, reg_hint ); let mut last_lr: Option = None; let mut last_bundle: Option = None; let mut last_inst: Option = None; let mut last_vreg: Option = None; let mut spill_uses = UseList::new(); for entry in core::mem::take(&mut self.bundles[bundle].ranges) { let lr_from = entry.range.from; let lr_to = entry.range.to; let vreg = self.ranges[entry.index].vreg; removed_lrs.insert(entry.index); removed_lrs_vregs.insert(vreg); trace!(" -> removing old LR {:?} for vreg {:?}", entry.index, vreg); let mut spill_range = entry.range; let mut spill_starts_def = false; let mut last_live_pos = entry.range.from; for u in core::mem::take(&mut self.ranges[entry.index].uses) { trace!(" -> use {:?} (last_live_pos {:?})", u, last_live_pos); let is_def = u.operand.kind() == OperandKind::Def; // If this use has an `any` constraint, eagerly migrate it to the spill range. The // reasoning here is that in the second-chance allocation for the spill bundle, // any-constrained uses will be easy to satisfy. Solving those constraints earlier // could create unnecessary conflicts with existing bundles that need to fit in a // register, more strict requirements, so we delay them eagerly. if u.operand.constraint() == OperandConstraint::Any { trace!(" -> migrating this any-constrained use to the spill range"); spill_uses.push(u); // Remember if we're moving the def of this vreg into the spill range, so that // we can set the appropriate flags on it later. spill_starts_def = spill_starts_def || is_def; continue; } // If this is a def of the vreg the entry cares about, make sure that the spill // range starts right before the next instruction so that the value is available. if is_def { trace!(" -> moving the spill range forward by one"); spill_range.from = ProgPoint::before(u.pos.inst().next()); } // If we just created a LR for this inst at the last // pos, add this use to the same LR. if Some(u.pos.inst()) == last_inst && Some(vreg) == last_vreg { self.ranges[last_lr.unwrap()].uses.push(u); trace!(" -> appended to last LR {:?}", last_lr.unwrap()); continue; } // The minimal bundle runs through the whole inst // (up to the Before of the next inst), *unless* // the original LR was only over the Before (up to // the After) of this inst. let to = core::cmp::min(ProgPoint::before(u.pos.inst().next()), lr_to); // If the last bundle was at the same inst, add a new // LR to the same bundle; otherwise, create a LR and a // new bundle. if Some(u.pos.inst()) == last_inst { let cr = CodeRange { from: u.pos, to }; let lr = self.ranges.add(cr); new_lrs.push((vreg, lr)); self.ranges[lr].uses.push(u); self.ranges[lr].vreg = vreg; trace!( " -> created new LR {:?} but adding to existing bundle {:?}", lr, last_bundle.unwrap() ); // Edit the previous LR to end mid-inst. self.bundles[last_bundle.unwrap()] .ranges .last_mut() .unwrap() .range .to = u.pos; self.ranges[last_lr.unwrap()].range.to = u.pos; // Add this LR to the bundle. self.bundles[last_bundle.unwrap()] .ranges .push(LiveRangeListEntry { range: cr, index: lr, }); self.ranges[lr].bundle = last_bundle.unwrap(); last_live_pos = ProgPoint::before(u.pos.inst().next()); continue; } // Otherwise, create a new LR. let pos = ProgPoint::before(u.pos.inst()); let pos = core::cmp::max(lr_from, pos); let cr = CodeRange { from: pos, to }; let lr = self.ranges.add(cr); new_lrs.push((vreg, lr)); self.ranges[lr].uses.push(u); self.ranges[lr].vreg = vreg; // Create a new bundle that contains only this LR. let new_bundle = self.bundles.add(); self.ranges[lr].bundle = new_bundle; self.bundles[new_bundle].spillset = spillset; self.bundles[new_bundle].ranges.push(LiveRangeListEntry { range: cr, index: lr, }); new_bundles.push(new_bundle); // If this use was a Def, set the StartsAtDef flag for the new LR. if is_def { self.ranges[lr].set_flag(LiveRangeFlag::StartsAtDef); } trace!( " -> created new LR {:?} range {:?} with new bundle {:?} for this use", lr, cr, new_bundle ); last_live_pos = ProgPoint::before(u.pos.inst().next()); last_lr = Some(lr); last_bundle = Some(new_bundle); last_inst = Some(u.pos.inst()); last_vreg = Some(vreg); } if !spill_range.is_empty() { // Make one entry in the spill bundle that covers the whole range. // TODO: it might be worth tracking enough state to only create this LR when there is // open space in the original LR. let spill_lr = self.ranges.add(spill_range); self.ranges[spill_lr].vreg = vreg; self.ranges[spill_lr].bundle = spill; self.ranges[spill_lr].uses.extend(spill_uses.drain(..)); new_lrs.push((vreg, spill_lr)); if spill_starts_def { self.ranges[spill_lr].set_flag(LiveRangeFlag::StartsAtDef); } self.bundles[spill].ranges.push(LiveRangeListEntry { range: spill_range, index: spill_lr, }); self.ranges[spill_lr].bundle = spill; trace!( " -> added spill range {:?} in new LR {:?} in spill bundle {:?}", spill_range, spill_lr, spill ); } else { assert!(spill_uses.is_empty()); } } // Remove all of the removed LRs from respective vregs' lists. for vreg in removed_lrs_vregs { self.vregs[vreg] .ranges .retain(|entry| !removed_lrs.contains(&entry.index)); } // Add the new LRs to their respective vreg lists. for (vreg, lr) in new_lrs { let range = self.ranges[lr].range; let entry = LiveRangeListEntry { range, index: lr }; self.vregs[vreg].ranges.push(entry); } // Recompute bundle properties for all new bundles and enqueue // them. for bundle in new_bundles { if self.bundles[bundle].ranges.len() > 0 { self.recompute_bundle_properties(bundle); let prio = self.bundles[bundle].prio; self.allocation_queue .insert(bundle, prio as usize, reg_hint); } } } pub fn process_bundle( &mut self, bundle: LiveBundleIndex, reg_hint: PReg, ) -> Result<(), RegAllocError> { let class = self.spillsets[self.bundles[bundle].spillset].class; // Grab a hint from either the queue or our spillset, if any. let mut hint_reg = if reg_hint != PReg::invalid() { reg_hint } else { self.spillsets[self.bundles[bundle].spillset].reg_hint }; if self.pregs[hint_reg.index()].is_stack { hint_reg = PReg::invalid(); } trace!("process_bundle: bundle {:?} hint {:?}", bundle, hint_reg,); let req = match self.compute_requirement(bundle) { Ok(req) => req, Err(conflict) => { trace!("conflict!: {:?}", conflict); // We have to split right away. We'll find a point to // split that would allow at least the first half of the // split to be conflict-free. debug_assert!( !self.minimal_bundle(bundle), "Minimal bundle with conflict!" ); self.split_and_requeue_bundle( bundle, /* split_at_point = */ conflict.suggested_split_point(), reg_hint, /* trim_ends_into_spill_bundle = */ conflict.should_trim_edges_around_split(), ); return Ok(()); } }; // If no requirement at all (because no uses), and *if* a // spill bundle is already present, then move the LRs over to // the spill bundle right away. match req { Requirement::Any => { if let Some(spill) = self.get_or_create_spill_bundle(bundle, /* create_if_absent = */ false) { let mut list = core::mem::replace(&mut self.bundles[bundle].ranges, smallvec![]); for entry in &list { self.ranges[entry.index].bundle = spill; } self.bundles[spill].ranges.extend(list.drain(..)); return Ok(()); } } _ => {} } // Try to allocate! let mut attempts = 0; loop { attempts += 1; trace!("attempt {}, req {:?}", attempts, req); debug_assert!(attempts < 100 * self.func.num_insts()); let fixed_preg = match req { Requirement::FixedReg(preg) | Requirement::FixedStack(preg) => Some(preg), Requirement::Register => None, Requirement::Any => { self.spilled_bundles.push(bundle); return Ok(()); } }; // Scan all pregs, or the one fixed preg, and attempt to allocate. let mut lowest_cost_evict_conflict_set: Option = None; let mut lowest_cost_evict_conflict_cost: Option = None; let mut lowest_cost_split_conflict_cost: Option = None; let mut lowest_cost_split_conflict_point = ProgPoint::before(Inst::new(0)); let mut lowest_cost_split_conflict_reg = PReg::invalid(); // Heuristic: start the scan for an available // register at an offset influenced both by our // location in the code and by the bundle we're // considering. This has the effect of spreading // demand more evenly across registers. let scan_offset = self.ranges[self.bundles[bundle].ranges[0].index] .range .from .inst() .index() + bundle.index(); self.stats.process_bundle_reg_probe_start_any += 1; for preg in RegTraversalIter::new( self.env, class, hint_reg, PReg::invalid(), scan_offset, fixed_preg, ) { self.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); trace!("trying preg {:?}", preg_idx); let scan_limit_cost = match ( lowest_cost_evict_conflict_cost, lowest_cost_split_conflict_cost, ) { (Some(a), Some(b)) => Some(core::cmp::max(a, b)), _ => None, }; match self.try_to_allocate_bundle_to_reg(bundle, preg_idx, scan_limit_cost) { AllocRegResult::Allocated(alloc) => { self.stats.process_bundle_reg_success_any += 1; trace!(" -> allocated to any {:?}", preg_idx); self.spillsets[self.bundles[bundle].spillset].reg_hint = alloc.as_reg().unwrap(); return Ok(()); } AllocRegResult::Conflict(bundles, first_conflict_point) => { trace!( " -> conflict with bundles {:?}, first conflict at {:?}", bundles, first_conflict_point ); let conflict_cost = self.maximum_spill_weight_in_bundle_set(&bundles); if lowest_cost_evict_conflict_cost.is_none() || conflict_cost < lowest_cost_evict_conflict_cost.unwrap() { lowest_cost_evict_conflict_cost = Some(conflict_cost); lowest_cost_evict_conflict_set = Some(bundles); } let loop_depth = self.cfginfo.approx_loop_depth [self.cfginfo.insn_block[first_conflict_point.inst().index()].index()]; let move_cost = spill_weight_from_constraint( OperandConstraint::Reg, loop_depth as usize, /* is_def = */ true, ) .to_int(); if lowest_cost_split_conflict_cost.is_none() || (conflict_cost + move_cost) < lowest_cost_split_conflict_cost.unwrap() { lowest_cost_split_conflict_cost = Some(conflict_cost + move_cost); lowest_cost_split_conflict_point = first_conflict_point; lowest_cost_split_conflict_reg = preg; } } AllocRegResult::ConflictWithFixed(max_cost, point) => { trace!(" -> conflict with fixed alloc; cost of other bundles up to point is {}, conflict at {:?}", max_cost, point); let loop_depth = self.cfginfo.approx_loop_depth [self.cfginfo.insn_block[point.inst().index()].index()]; let move_cost = spill_weight_from_constraint( OperandConstraint::Reg, loop_depth as usize, /* is_def = */ true, ) .to_int(); if lowest_cost_split_conflict_cost.is_none() || (max_cost + move_cost) < lowest_cost_split_conflict_cost.unwrap() { lowest_cost_split_conflict_cost = Some(max_cost + move_cost); lowest_cost_split_conflict_point = point; lowest_cost_split_conflict_reg = preg; } } AllocRegResult::ConflictHighCost => { // Simply don't consider -- we already have // a lower-cost conflict bundle option // to evict. continue; } } } // Otherwise, we *require* a register, but didn't fit into // any with current bundle assignments. Hence, we will need // to either split or attempt to evict some bundles. trace!( " -> lowest cost evict: set {:?}, cost {:?}", lowest_cost_evict_conflict_set, lowest_cost_evict_conflict_cost, ); trace!( " -> lowest cost split: cost {:?}, point {:?}, reg {:?}", lowest_cost_split_conflict_cost, lowest_cost_split_conflict_point, lowest_cost_split_conflict_reg ); // If we reach here, we *must* have an option either to split or evict. debug_assert!( lowest_cost_split_conflict_cost.is_some() || lowest_cost_evict_conflict_cost.is_some() ); let our_spill_weight = self.bundle_spill_weight(bundle); trace!(" -> our spill weight: {}", our_spill_weight); // We detect the "too-many-live-registers" case here and // return an error cleanly, rather than panicking, because // the regalloc.rs fuzzer depends on the register // allocator to correctly reject impossible-to-allocate // programs in order to discard invalid test cases. if self.minimal_bundle(bundle) && (attempts >= 2 || lowest_cost_evict_conflict_cost.is_none() || lowest_cost_evict_conflict_cost.unwrap() >= our_spill_weight) { if let Requirement::Register = req { // Check if this is a too-many-live-registers situation. let range = self.bundles[bundle].ranges[0].range; trace!("checking for too many live regs"); let mut min_bundles_assigned = 0; let mut fixed_assigned = 0; let mut total_regs = 0; for preg in self.env.preferred_regs_by_class[class as u8 as usize] .iter() .chain(self.env.non_preferred_regs_by_class[class as u8 as usize].iter()) { trace!(" -> PR {:?}", preg); let start = LiveRangeKey::from_range(&CodeRange { from: range.from.prev(), to: range.from.prev(), }); for (key, lr) in self.pregs[preg.index()].allocations.btree.range(start..) { let preg_range = key.to_range(); if preg_range.to <= range.from { continue; } if preg_range.from >= range.to { break; } if lr.is_valid() { if self.minimal_bundle(self.ranges[*lr].bundle) { trace!(" -> min bundle {:?}", lr); min_bundles_assigned += 1; } else { trace!(" -> non-min bundle {:?}", lr); } } else { trace!(" -> fixed bundle"); fixed_assigned += 1; } } total_regs += 1; } trace!( " -> total {}, fixed {}, min {}", total_regs, fixed_assigned, min_bundles_assigned ); if min_bundles_assigned + fixed_assigned >= total_regs { return Err(RegAllocError::TooManyLiveRegs); } } panic!("Could not allocate minimal bundle, but the allocation problem should be possible to solve"); } // If our bundle's weight is less than or equal to(*) the // evict cost, choose to split. Also pick splitting if // we're on our second or more attempt and we didn't // allocate. Also pick splitting if the conflict set is // empty, meaning a fixed conflict that can't be evicted. // // (*) the "equal to" part is very important: it prevents // an infinite loop where two bundles with equal spill // cost continually evict each other in an infinite // allocation loop. In such a case, the first bundle in // wins, and the other splits. // // Note that we don't split if the bundle is minimal. if !self.minimal_bundle(bundle) && (attempts >= 2 || lowest_cost_evict_conflict_cost.is_none() || our_spill_weight <= lowest_cost_evict_conflict_cost.unwrap()) { trace!( " -> deciding to split: our spill weight is {}", self.bundle_spill_weight(bundle) ); let bundle_start = self.bundles[bundle].ranges[0].range.from; let mut split_at_point = core::cmp::max(lowest_cost_split_conflict_point, bundle_start); let requeue_with_reg = lowest_cost_split_conflict_reg; // Adjust `split_at_point` if it is within a deeper loop // than the bundle start -- hoist it to just before the // first loop header it encounters. let bundle_start_depth = self.cfginfo.approx_loop_depth [self.cfginfo.insn_block[bundle_start.inst().index()].index()]; let split_at_depth = self.cfginfo.approx_loop_depth [self.cfginfo.insn_block[split_at_point.inst().index()].index()]; if split_at_depth > bundle_start_depth { for block in (self.cfginfo.insn_block[bundle_start.inst().index()].index() + 1) ..=self.cfginfo.insn_block[split_at_point.inst().index()].index() { if self.cfginfo.approx_loop_depth[block] > bundle_start_depth { split_at_point = self.cfginfo.block_entry[block]; break; } } } self.split_and_requeue_bundle( bundle, split_at_point, requeue_with_reg, /* should_trim = */ true, ); return Ok(()); } else { // Evict all bundles in `conflicting bundles` and try again. self.stats.evict_bundle_event += 1; for &bundle in &lowest_cost_evict_conflict_set.unwrap() { trace!(" -> evicting {:?}", bundle); self.evict_bundle(bundle); self.stats.evict_bundle_count += 1; } } } } } regalloc2-0.10.2/src/ion/redundant_moves.rs000066400000000000000000000101201467034227200205640ustar00rootroot00000000000000//! Redundant-move elimination. use crate::{Allocation, FxHashMap, VReg}; use smallvec::{smallvec, SmallVec}; #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub enum RedundantMoveState { Copy(Allocation, Option), Orig(VReg), None, } #[derive(Clone, Debug, Default)] pub struct RedundantMoveEliminator { allocs: FxHashMap, reverse_allocs: FxHashMap>, } #[derive(Copy, Clone, Debug)] pub struct RedundantMoveAction { pub elide: bool, } impl RedundantMoveEliminator { pub fn process_move( &mut self, from: Allocation, to: Allocation, to_vreg: Option, ) -> RedundantMoveAction { // Look up the src and dest. let from_state = self .allocs .get(&from) .map(|&p| p) .unwrap_or(RedundantMoveState::None); let to_state = self .allocs .get(&to) .map(|&p| p) .unwrap_or(RedundantMoveState::None); trace!( " -> redundant move tracker: from {} to {} to_vreg {:?}", from, to, to_vreg ); trace!( " -> from_state {:?} to_state {:?}", from_state, to_state ); if from == to && to_vreg.is_some() { self.clear_alloc(to); self.allocs .insert(to, RedundantMoveState::Orig(to_vreg.unwrap())); return RedundantMoveAction { elide: true }; } let src_vreg = match from_state { RedundantMoveState::Copy(_, opt_r) => opt_r, RedundantMoveState::Orig(r) => Some(r), _ => None, }; trace!(" -> src_vreg {:?}", src_vreg); let dst_vreg = to_vreg.or(src_vreg); trace!(" -> dst_vreg {:?}", dst_vreg); let existing_dst_vreg = match to_state { RedundantMoveState::Copy(_, opt_r) => opt_r, RedundantMoveState::Orig(r) => Some(r), _ => None, }; trace!(" -> existing_dst_vreg {:?}", existing_dst_vreg); let elide = match (from_state, to_state) { (_, RedundantMoveState::Copy(orig_alloc, _)) if orig_alloc == from => true, (RedundantMoveState::Copy(new_alloc, _), _) if new_alloc == to => true, _ => false, }; trace!(" -> elide {}", elide); // Invalidate all existing copies of `to` if `to` actually changed value. if !elide { self.clear_alloc(to); } // Set up forward and reverse mapping. Don't track stack-to-stack copies. if from.is_reg() || to.is_reg() { self.allocs .insert(to, RedundantMoveState::Copy(from, dst_vreg)); trace!( " -> create mapping {} -> {:?}", to, RedundantMoveState::Copy(from, dst_vreg) ); self.reverse_allocs .entry(from) .or_insert_with(|| smallvec![]) .push(to); } RedundantMoveAction { elide } } pub fn clear(&mut self) { trace!(" redundant move eliminator cleared"); self.allocs.clear(); self.reverse_allocs.clear(); } pub fn clear_alloc(&mut self, alloc: Allocation) { trace!(" redundant move eliminator: clear {:?}", alloc); if let Some(ref mut existing_copies) = self.reverse_allocs.get_mut(&alloc) { for to_inval in existing_copies.drain(..) { trace!(" -> clear existing copy: {:?}", to_inval); if let Some(val) = self.allocs.get_mut(&to_inval) { match val { RedundantMoveState::Copy(_, Some(vreg)) => { *val = RedundantMoveState::Orig(*vreg); } _ => *val = RedundantMoveState::None, } } self.allocs.remove(&to_inval); } } self.allocs.remove(&alloc); } } regalloc2-0.10.2/src/ion/reg_traversal.rs000066400000000000000000000074231467034227200202430ustar00rootroot00000000000000use crate::{MachineEnv, PReg, RegClass}; /// This iterator represents a traversal through all allocatable /// registers of a given class, in a certain order designed to /// minimize allocation contention. /// /// The order in which we try registers is somewhat complex: /// - First, if there is a hint, we try that. /// - Then, we try registers in a traversal order that is based on an /// "offset" (usually the bundle index) spreading pressure evenly /// among registers to reduce commitment-map contention. /// - Within that scan, we try registers in two groups: first, /// prferred registers; then, non-preferred registers. (In normal /// usage, these consist of caller-save and callee-save registers /// respectively, to minimize clobber-saves; but they need not.) pub struct RegTraversalIter<'a> { env: &'a MachineEnv, class: usize, hints: [Option; 2], hint_idx: usize, pref_idx: usize, non_pref_idx: usize, offset_pref: usize, offset_non_pref: usize, is_fixed: bool, fixed: Option, } impl<'a> RegTraversalIter<'a> { pub fn new( env: &'a MachineEnv, class: RegClass, hint_reg: PReg, hint2_reg: PReg, offset: usize, fixed: Option, ) -> Self { let mut hint_reg = if hint_reg != PReg::invalid() { Some(hint_reg) } else { None }; let mut hint2_reg = if hint2_reg != PReg::invalid() { Some(hint2_reg) } else { None }; if hint_reg.is_none() { hint_reg = hint2_reg; hint2_reg = None; } let hints = [hint_reg, hint2_reg]; let class = class as u8 as usize; let offset_pref = if env.preferred_regs_by_class[class].len() > 0 { offset % env.preferred_regs_by_class[class].len() } else { 0 }; let offset_non_pref = if env.non_preferred_regs_by_class[class].len() > 0 { offset % env.non_preferred_regs_by_class[class].len() } else { 0 }; Self { env, class, hints, hint_idx: 0, pref_idx: 0, non_pref_idx: 0, offset_pref, offset_non_pref, is_fixed: fixed.is_some(), fixed, } } } impl<'a> core::iter::Iterator for RegTraversalIter<'a> { type Item = PReg; fn next(&mut self) -> Option { if self.is_fixed { let ret = self.fixed; self.fixed = None; return ret; } fn wrap(idx: usize, limit: usize) -> usize { if idx >= limit { idx - limit } else { idx } } if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { let h = self.hints[self.hint_idx]; self.hint_idx += 1; return h; } while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { let arr = &self.env.preferred_regs_by_class[self.class][..]; let r = arr[wrap(self.pref_idx + self.offset_pref, arr.len())]; self.pref_idx += 1; if Some(r) == self.hints[0] || Some(r) == self.hints[1] { continue; } return Some(r); } while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { let arr = &self.env.non_preferred_regs_by_class[self.class][..]; let r = arr[wrap(self.non_pref_idx + self.offset_non_pref, arr.len())]; self.non_pref_idx += 1; if Some(r) == self.hints[0] || Some(r) == self.hints[1] { continue; } return Some(r); } None } } regalloc2-0.10.2/src/ion/requirement.rs000066400000000000000000000141341467034227200177400ustar00rootroot00000000000000/* * This file was initially derived from the files * `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was * originally licensed under the Mozilla Public License 2.0. We * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see * https://github.com/bytecodealliance/regalloc2/issues/7). * * Since the initial port, the design has been substantially evolved * and optimized. */ //! Requirements computation. use super::{Env, LiveBundleIndex}; use crate::{Function, Inst, Operand, OperandConstraint, PReg, ProgPoint}; pub struct RequirementConflict; #[derive(Clone, Copy, Debug)] pub enum RequirementConflictAt { /// A transition from a stack-constrained to a reg-constrained /// segment. The suggested split point is late, to keep the /// intervening region with the stackslot (which is cheaper). StackToReg(ProgPoint), /// A transition from a reg-constraint to a stack-constrained /// segment. Mirror of above: the suggested split point is early /// (just after the last register use). RegToStack(ProgPoint), /// Any other transition. The suggested split point is late (just /// before the conflicting use), but the split will also trim the /// ends and create a split bundle, so the intervening region will /// not appear with either side. This is probably for the best /// when e.g. the two sides of the split are both constrained to /// different physical registers: the part in the middle should be /// constrained to neither. Other(ProgPoint), } impl RequirementConflictAt { #[inline(always)] pub fn should_trim_edges_around_split(self) -> bool { match self { RequirementConflictAt::RegToStack(..) | RequirementConflictAt::StackToReg(..) => false, RequirementConflictAt::Other(..) => true, } } #[inline(always)] pub fn suggested_split_point(self) -> ProgPoint { match self { RequirementConflictAt::RegToStack(pt) | RequirementConflictAt::StackToReg(pt) | RequirementConflictAt::Other(pt) => pt, } } } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Requirement { FixedReg(PReg), FixedStack(PReg), Register, Any, } impl Requirement { #[inline(always)] pub fn merge(self, other: Requirement) -> Result { match (self, other) { (other, Requirement::Any) | (Requirement::Any, other) => Ok(other), (Requirement::Register, Requirement::Register) => Ok(self), (Requirement::Register, Requirement::FixedReg(preg)) | (Requirement::FixedReg(preg), Requirement::Register) => { Ok(Requirement::FixedReg(preg)) } (Requirement::FixedReg(a), Requirement::FixedReg(b)) if a == b => Ok(self), (Requirement::FixedStack(a), Requirement::FixedStack(b)) if a == b => Ok(self), _ => Err(RequirementConflict), } } #[inline(always)] pub fn is_stack(self) -> bool { match self { Requirement::FixedStack(..) => true, Requirement::Register | Requirement::FixedReg(..) => false, Requirement::Any => false, } } #[inline(always)] pub fn is_reg(self) -> bool { match self { Requirement::Register | Requirement::FixedReg(..) => true, Requirement::FixedStack(..) => false, Requirement::Any => false, } } } impl<'a, F: Function> Env<'a, F> { #[inline(always)] pub fn requirement_from_operand(&self, op: Operand) -> Requirement { match op.constraint() { OperandConstraint::FixedReg(preg) => { if self.pregs[preg.index()].is_stack { Requirement::FixedStack(preg) } else { Requirement::FixedReg(preg) } } OperandConstraint::Reg | OperandConstraint::Reuse(_) => Requirement::Register, OperandConstraint::Any => Requirement::Any, } } pub fn compute_requirement( &self, bundle: LiveBundleIndex, ) -> Result { let mut req = Requirement::Any; let mut last_pos = ProgPoint::before(Inst::new(0)); trace!("compute_requirement: {:?}", bundle); let ranges = &self.bundles[bundle].ranges; for entry in ranges { trace!(" -> LR {:?}: {:?}", entry.index, entry.range); for u in &self.ranges[entry.index].uses { trace!(" -> use {:?}", u); let r = self.requirement_from_operand(u.operand); req = req.merge(r).map_err(|_| { trace!(" -> conflict"); if req.is_stack() && r.is_reg() { // Suggested split point just before the reg (i.e., late split). RequirementConflictAt::StackToReg(u.pos) } else if req.is_reg() && r.is_stack() { // Suggested split point just after the stack // (i.e., early split). Note that splitting // with a use *right* at the beginning is // interpreted by `split_and_requeue_bundle` // as splitting off the first use. RequirementConflictAt::RegToStack(last_pos) } else { RequirementConflictAt::Other(u.pos) } })?; last_pos = u.pos; trace!(" -> req {:?}", req); } } trace!(" -> final: {:?}", req); Ok(req) } pub fn merge_bundle_requirements( &self, a: LiveBundleIndex, b: LiveBundleIndex, ) -> Result { let req_a = self .compute_requirement(a) .map_err(|_| RequirementConflict)?; let req_b = self .compute_requirement(b) .map_err(|_| RequirementConflict)?; req_a.merge(req_b) } } regalloc2-0.10.2/src/ion/spill.rs000066400000000000000000000143301467034227200165210ustar00rootroot00000000000000/* * This file was initially derived from the files * `js/src/jit/BacktrackingAllocator.h` and * `js/src/jit/BacktrackingAllocator.cpp` in Mozilla Firefox, and was * originally licensed under the Mozilla Public License 2.0. We * subsequently relicensed it to Apache-2.0 WITH LLVM-exception (see * https://github.com/bytecodealliance/regalloc2/issues/7). * * Since the initial port, the design has been substantially evolved * and optimized. */ //! Spillslot allocation. use super::{ AllocRegResult, Env, LiveRangeKey, PReg, PRegIndex, RegTraversalIter, SpillSetIndex, SpillSlotData, SpillSlotIndex, }; use crate::{ion::data_structures::SpillSetRanges, Allocation, Function, SpillSlot}; impl<'a, F: Function> Env<'a, F> { pub fn try_allocating_regs_for_spilled_bundles(&mut self) { trace!("allocating regs for spilled bundles"); for i in 0..self.spilled_bundles.len() { let bundle = self.spilled_bundles[i]; // don't borrow self if self.bundles[bundle].ranges.is_empty() { continue; } let class = self.spillsets[self.bundles[bundle].spillset].class; let hint = self.spillsets[self.bundles[bundle].spillset].reg_hint; // This may be an empty-range bundle whose ranges are not // sorted; sort all range-lists again here. self.bundles[bundle] .ranges .sort_unstable_by_key(|entry| entry.range.from); let mut success = false; self.stats.spill_bundle_reg_probes += 1; for preg in RegTraversalIter::new(self.env, class, hint, PReg::invalid(), bundle.index(), None) { trace!("trying bundle {:?} to preg {:?}", bundle, preg); let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = self.try_to_allocate_bundle_to_reg(bundle, preg_idx, None) { self.stats.spill_bundle_reg_success += 1; success = true; break; } } if !success { trace!( "spilling bundle {:?}: marking spillset {:?} as required", bundle, self.bundles[bundle].spillset ); self.spillsets[self.bundles[bundle].spillset].required = true; } } } pub fn spillslot_can_fit_spillset( &mut self, spillslot: SpillSlotIndex, spillset: SpillSetIndex, ) -> bool { !self.spillslots[spillslot.index()] .ranges .btree .contains_key(&LiveRangeKey::from_range(&self.spillsets[spillset].range)) } pub fn allocate_spillset_to_spillslot( &mut self, spillset: SpillSetIndex, spillslot: SpillSlotIndex, ) { self.spillsets[spillset].slot = spillslot; let res = self.spillslots[spillslot.index()].ranges.btree.insert( LiveRangeKey::from_range(&self.spillsets[spillset].range), spillset, ); debug_assert!(res.is_none()); } pub fn allocate_spillslots(&mut self) { const MAX_ATTEMPTS: usize = 10; for spillset in 0..self.spillsets.len() { trace!("allocate spillslot: {}", spillset); let spillset = SpillSetIndex::new(spillset); if !self.spillsets[spillset].required { continue; } let class = self.spillsets[spillset].class as usize; // Try a few existing spillslots. let mut i = self.slots_by_class[class].probe_start; let mut success = false; // Never probe the same element more than once: limit the // attempt count to the number of slots in existence. for _attempt in 0..core::cmp::min(self.slots_by_class[class].slots.len(), MAX_ATTEMPTS) { // Note: this indexing of `slots` is always valid // because either the `slots` list is empty and the // iteration limit above consequently means we don't // run this loop at all, or else `probe_start` is // in-bounds (because it is made so below when we add // a slot, and it always takes on the last index `i` // after this loop). let spillslot = self.slots_by_class[class].slots[i]; if self.spillslot_can_fit_spillset(spillslot, spillset) { self.allocate_spillset_to_spillslot(spillset, spillslot); success = true; self.slots_by_class[class].probe_start = i; break; } i = self.slots_by_class[class].next_index(i); } if !success { // Allocate a new spillslot. let spillslot = SpillSlotIndex::new(self.spillslots.len()); self.spillslots.push(SpillSlotData { ranges: SpillSetRanges::new(), alloc: Allocation::none(), slots: self.func.spillslot_size(self.spillsets[spillset].class) as u32, }); self.slots_by_class[class].slots.push(spillslot); self.slots_by_class[class].probe_start = self.slots_by_class[class].slots.len() - 1; self.allocate_spillset_to_spillslot(spillset, spillslot); } } // Assign actual slot indices to spillslots. for i in 0..self.spillslots.len() { self.spillslots[i].alloc = self.allocate_spillslot(self.spillslots[i].slots); } trace!("spillslot allocator done"); } pub fn allocate_spillslot(&mut self, size: u32) -> Allocation { let mut offset = self.num_spillslots; // Align up to `size`. debug_assert!(size.is_power_of_two()); offset = (offset + size - 1) & !(size - 1); let slot = if self.func.multi_spillslot_named_by_last_slot() { offset + size - 1 } else { offset }; offset += size; self.num_spillslots = offset; Allocation::stack(SpillSlot::new(slot as usize)) } } regalloc2-0.10.2/src/lib.rs000066400000000000000000001444021467034227200153630ustar00rootroot00000000000000/* * The following license applies to this file, which derives many * details (register and constraint definitions, for example) from the * files `BacktrackingAllocator.h`, `BacktrackingAllocator.cpp`, * `LIR.h`, and possibly definitions in other related files in * `js/src/jit/`: * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #![allow(dead_code)] #![no_std] #[cfg(feature = "std")] extern crate std; extern crate alloc; // Even when trace logging is disabled, the trace macro has a significant // performance cost so we disable it in release builds. macro_rules! trace { ($($tt:tt)*) => { if cfg!(feature = "trace-log") { ::log::trace!($($tt)*); } }; } macro_rules! trace_enabled { () => { cfg!(feature = "trace-log") && ::log::log_enabled!(::log::Level::Trace) }; } use core::hash::BuildHasherDefault; use rustc_hash::FxHasher; type FxHashMap = hashbrown::HashMap>; type FxHashSet = hashbrown::HashSet>; pub(crate) mod cfg; pub(crate) mod domtree; pub mod indexset; pub(crate) mod ion; pub(crate) mod moves; pub(crate) mod postorder; pub mod ssa; #[macro_use] mod index; use alloc::vec::Vec; pub use index::{Block, Inst, InstRange}; pub mod checker; #[cfg(feature = "fuzzing")] pub mod fuzzing; #[cfg(feature = "enable-serde")] pub mod serialize; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; /// Register classes. /// /// Every value has a "register class", which is like a type at the /// register-allocator level. Every register must belong to only one /// class; i.e., they are disjoint. /// /// For tight bit-packing throughout our data structures, we support /// only three classes, "int", "float" and "vector". Usually two will /// be enough on modern machines, as they have one class of general-purpose /// integer registers of machine width (e.g. 64 bits), and another /// class of float/vector registers used both for FP and for vector /// operations. Additionally for machines with totally separate vector /// registers a third class is provided. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum RegClass { Int = 0, Float = 1, Vector = 2, } /// A physical register. Contains a physical register number and a class. /// /// The `hw_enc` field contains the physical register number and is in /// a logically separate index space per class; in other words, Int /// register 0 is different than Float register 0. /// /// Because of bit-packed encodings throughout the implementation, /// `hw_enc` must fit in 6 bits, i.e., at most 64 registers per class. /// /// The value returned by `index()`, in contrast, is in a single index /// space shared by all classes, in order to enable uniform reasoning /// about physical registers. This is done by putting the class bit at /// the MSB, or equivalently, declaring that indices 0..=63 are the 64 /// integer registers and indices 64..=127 are the 64 float registers. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct PReg { bits: u8, } impl PReg { pub const MAX_BITS: usize = 6; pub const MAX: usize = (1 << Self::MAX_BITS) - 1; pub const NUM_INDEX: usize = 1 << (Self::MAX_BITS + 2); // including RegClass bits /// Create a new PReg. The `hw_enc` range is 6 bits. #[inline(always)] pub const fn new(hw_enc: usize, class: RegClass) -> Self { debug_assert!(hw_enc <= PReg::MAX); PReg { bits: ((class as u8) << Self::MAX_BITS) | (hw_enc as u8), } } /// The physical register number, as encoded by the ISA for the particular register class. #[inline(always)] pub const fn hw_enc(self) -> usize { self.bits as usize & Self::MAX } /// The register class. #[inline(always)] pub const fn class(self) -> RegClass { match (self.bits >> Self::MAX_BITS) & 0b11 { 0 => RegClass::Int, 1 => RegClass::Float, 2 => RegClass::Vector, _ => unreachable!(), } } /// Get an index into the (not necessarily contiguous) index space of /// all physical registers. Allows one to maintain an array of data for /// all PRegs and index it efficiently. #[inline(always)] pub const fn index(self) -> usize { self.bits as usize } /// Construct a PReg from the value returned from `.index()`. #[inline(always)] pub const fn from_index(index: usize) -> Self { PReg { bits: (index & (Self::NUM_INDEX - 1)) as u8, } } /// Return the "invalid PReg", which can be used to initialize /// data structures. #[inline(always)] pub const fn invalid() -> Self { PReg::new(Self::MAX, RegClass::Int) } } impl core::fmt::Debug for PReg { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!( f, "PReg(hw = {}, class = {:?}, index = {})", self.hw_enc(), self.class(), self.index() ) } } impl core::fmt::Display for PReg { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { let class = match self.class() { RegClass::Int => "i", RegClass::Float => "f", RegClass::Vector => "v", }; write!(f, "p{}{}", self.hw_enc(), class) } } /// A type for internal bit arrays. type Bits = u64; /// A physical register set. Used to represent clobbers /// efficiently. /// /// The set is `Copy` and is guaranteed to have constant, and small, /// size, as it is based on a bitset internally. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct PRegSet { bits: [Bits; Self::LEN], } impl PRegSet { /// The number of bits per element in the internal bit array. const BITS: usize = core::mem::size_of::() * 8; /// Length of the internal bit array. const LEN: usize = (PReg::NUM_INDEX + Self::BITS - 1) / Self::BITS; /// Create an empty set. pub const fn empty() -> Self { Self { bits: [0; Self::LEN], } } /// Splits the given register index into parts to access the internal bit array. const fn split_index(reg: PReg) -> (usize, usize) { let index = reg.index(); (index >> Self::BITS.ilog2(), index & (Self::BITS - 1)) } /// Returns whether the given register is part of the set. pub fn contains(&self, reg: PReg) -> bool { let (index, bit) = Self::split_index(reg); self.bits[index] & (1 << bit) != 0 } /// Add a physical register (PReg) to the set, returning the new value. pub const fn with(self, reg: PReg) -> Self { let (index, bit) = Self::split_index(reg); let mut out = self; out.bits[index] |= 1 << bit; out } /// Add a physical register (PReg) to the set. pub fn add(&mut self, reg: PReg) { let (index, bit) = Self::split_index(reg); self.bits[index] |= 1 << bit; } /// Remove a physical register (PReg) from the set. pub fn remove(&mut self, reg: PReg) { let (index, bit) = Self::split_index(reg); self.bits[index] &= !(1 << bit); } /// Add all of the registers in one set to this one, mutating in /// place. pub fn union_from(&mut self, other: PRegSet) { for i in 0..self.bits.len() { self.bits[i] |= other.bits[i]; } } } impl IntoIterator for PRegSet { type Item = PReg; type IntoIter = PRegSetIter; fn into_iter(self) -> PRegSetIter { PRegSetIter { bits: self.bits, cur: 0, } } } pub struct PRegSetIter { bits: [Bits; PRegSet::LEN], cur: usize, } impl Iterator for PRegSetIter { type Item = PReg; fn next(&mut self) -> Option { loop { let bits = self.bits.get_mut(self.cur)?; if *bits != 0 { let bit = bits.trailing_zeros(); *bits &= !(1 << bit); let index = bit as usize + self.cur * PRegSet::BITS; return Some(PReg::from_index(index)); } self.cur += 1; } } } impl From<&MachineEnv> for PRegSet { fn from(env: &MachineEnv) -> Self { let mut res = Self::default(); for class in env.preferred_regs_by_class.iter() { for preg in class { res.add(*preg) } } for class in env.non_preferred_regs_by_class.iter() { for preg in class { res.add(*preg) } } res } } /// A virtual register. Contains a virtual register number and a /// class. /// /// A virtual register ("vreg") corresponds to an SSA value. All /// dataflow in the input program is specified via flow through a /// virtual register; even uses of specially-constrained locations, /// such as fixed physical registers, are done by using vregs, because /// we need the vreg's live range in order to track the use of that /// location. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct VReg { bits: u32, } impl VReg { pub const MAX_BITS: usize = 21; pub const MAX: usize = (1 << Self::MAX_BITS) - 1; #[inline(always)] pub const fn new(virt_reg: usize, class: RegClass) -> Self { debug_assert!(virt_reg <= VReg::MAX); VReg { bits: ((virt_reg as u32) << 2) | (class as u8 as u32), } } #[inline(always)] pub const fn vreg(self) -> usize { let vreg = (self.bits >> 2) as usize; vreg } #[inline(always)] pub const fn class(self) -> RegClass { match self.bits & 0b11 { 0 => RegClass::Int, 1 => RegClass::Float, 2 => RegClass::Vector, _ => unreachable!(), } } #[inline(always)] pub const fn invalid() -> Self { VReg::new(Self::MAX, RegClass::Int) } } impl core::fmt::Debug for VReg { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!( f, "VReg(vreg = {}, class = {:?})", self.vreg(), self.class() ) } } impl core::fmt::Display for VReg { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!(f, "v{}", self.vreg()) } } /// A spillslot is a space in the stackframe used by the allocator to /// temporarily store a value. /// /// The allocator is responsible for allocating indices in this space, /// and will specify how many spillslots have been used when the /// allocation is completed. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct SpillSlot { bits: u32, } impl SpillSlot { /// The maximum spillslot index. pub const MAX: usize = (1 << 24) - 1; /// Create a new SpillSlot. #[inline(always)] pub fn new(slot: usize) -> Self { debug_assert!(slot <= Self::MAX); SpillSlot { bits: slot as u32 } } /// Get the spillslot index for this spillslot. #[inline(always)] pub fn index(self) -> usize { (self.bits & 0x00ffffff) as usize } /// Get the spillslot `offset` slots away. #[inline(always)] pub fn plus(self, offset: usize) -> Self { SpillSlot::new(self.index() + offset) } /// Get the invalid spillslot, used for initializing data structures. #[inline(always)] pub fn invalid() -> Self { SpillSlot { bits: 0xffff_ffff } } /// Is this the invalid spillslot? #[inline(always)] pub fn is_invalid(self) -> bool { self == Self::invalid() } /// Is this a valid spillslot (not `SpillSlot::invalid()`)? #[inline(always)] pub fn is_valid(self) -> bool { self != Self::invalid() } } impl core::fmt::Display for SpillSlot { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!(f, "stack{}", self.index()) } } /// An `OperandConstraint` specifies where a vreg's value must be /// placed at a particular reference to that vreg via an /// `Operand`. The constraint may be loose -- "any register of a given /// class", for example -- or very specific, such as "this particular /// physical register". The allocator's result will always satisfy all /// given constraints; however, if the input has a combination of /// constraints that are impossible to satisfy, then allocation may /// fail or the allocator may panic (providing impossible constraints /// is usually a programming error in the client, rather than a /// function of bad input). #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum OperandConstraint { /// Any location is fine (register or stack slot). Any, /// Operand must be in a register. Register is read-only for Uses. Reg, /// Operand must be in a fixed register. FixedReg(PReg), /// On defs only: reuse a use's register. Reuse(usize), } impl core::fmt::Display for OperandConstraint { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { match self { Self::Any => write!(f, "any"), Self::Reg => write!(f, "reg"), Self::FixedReg(preg) => write!(f, "fixed({})", preg), Self::Reuse(idx) => write!(f, "reuse({})", idx), } } } /// The "kind" of the operand: whether it reads a vreg (Use) or writes /// a vreg (Def). #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum OperandKind { Def = 0, Use = 1, } /// The "position" of the operand: where it has its read/write /// effects. These are positions "in" the instruction, and "early" and /// "late" are relative to the instruction's main effect or /// computation. In other words, the allocator assumes that the /// instruction (i) performs all reads and writes of "early" operands, /// (ii) does its work, and (iii) performs all reads and writes of its /// "late" operands. /// /// A "write" (def) at "early" or a "read" (use) at "late" may be /// slightly nonsensical, given the above, if the read is necessary /// for the computation or the write is a result of it. A way to think /// of it is that the value (even if a result of execution) *could* /// have been read or written at the given location without causing /// any register-usage conflicts. In other words, these write-early or /// use-late operands ensure that the particular allocations are valid /// for longer than usual and that a register is not reused between /// the use (normally complete at "Early") and the def (normally /// starting at "Late"). See `Operand` for more. #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum OperandPos { Early = 0, Late = 1, } /// An `Operand` encodes everything about a mention of a register in /// an instruction: virtual register number, and any constraint that /// applies to the register at this program point. /// /// An Operand may be a use or def (this corresponds to `LUse` and /// `LAllocation` in Ion). /// /// Generally, regalloc2 considers operands to have their effects at /// one of two points that exist in an instruction: "Early" or /// "Late". All operands at a given program-point are assigned /// non-conflicting locations based on their constraints. Each operand /// has a "kind", one of use/def/mod, corresponding to /// read/write/read-write, respectively. /// /// Usually, an instruction's inputs will be "early uses" and outputs /// will be "late defs", though there are valid use-cases for other /// combinations too. For example, a single "instruction" seen by the /// regalloc that lowers into multiple machine instructions and reads /// some of its inputs after it starts to write outputs must either /// make those input(s) "late uses" or those output(s) "early defs" so /// that the conflict (overlap) is properly accounted for. See /// comments on the constructors below for more. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct Operand { /// Bit-pack into 32 bits. /// /// constraint:7 kind:1 pos:1 class:2 vreg:21 /// /// where `constraint` is an `OperandConstraint`, `kind` is an /// `OperandKind`, `pos` is an `OperandPos`, `class` is a /// `RegClass`, and `vreg` is a vreg index. /// /// The constraints are encoded as follows: /// - 1xxxxxx => FixedReg(preg) /// - 01xxxxx => Reuse(index) /// - 0000000 => Any /// - 0000001 => Reg /// - 0000010 => Stack /// - _ => Unused for now bits: u32, } impl Operand { /// Construct a new operand. #[inline(always)] pub fn new( vreg: VReg, constraint: OperandConstraint, kind: OperandKind, pos: OperandPos, ) -> Self { let constraint_field = match constraint { OperandConstraint::Any => 0, OperandConstraint::Reg => 1, OperandConstraint::FixedReg(preg) => { debug_assert_eq!(preg.class(), vreg.class()); 0b1000000 | preg.hw_enc() as u32 } OperandConstraint::Reuse(which) => { debug_assert!(which <= 31); 0b0100000 | which as u32 } }; let class_field = vreg.class() as u8 as u32; let pos_field = pos as u8 as u32; let kind_field = kind as u8 as u32; Operand { bits: vreg.vreg() as u32 | (class_field << 21) | (pos_field << 23) | (kind_field << 24) | (constraint_field << 25), } } /// Create an `Operand` that designates a use of a VReg that must /// be in a register, and that is used at the "before" point, /// i.e., can be overwritten by a result. #[inline(always)] pub fn reg_use(vreg: VReg) -> Self { Operand::new( vreg, OperandConstraint::Reg, OperandKind::Use, OperandPos::Early, ) } /// Create an `Operand` that designates a use of a VReg that must /// be in a register, and that is used up until the "after" point, /// i.e., must not conflict with any results. #[inline(always)] pub fn reg_use_at_end(vreg: VReg) -> Self { Operand::new( vreg, OperandConstraint::Reg, OperandKind::Use, OperandPos::Late, ) } /// Create an `Operand` that designates a definition of a VReg /// that must be in a register, and that occurs at the "after" /// point, i.e. may reuse a register that carried a use into this /// instruction. #[inline(always)] pub fn reg_def(vreg: VReg) -> Self { Operand::new( vreg, OperandConstraint::Reg, OperandKind::Def, OperandPos::Late, ) } /// Create an `Operand` that designates a definition of a VReg /// that must be in a register, and that occurs early at the /// "before" point, i.e., must not conflict with any input to the /// instruction. /// /// Note that the register allocator will ensure that such an /// early-def operand is live throughout the instruction, i.e., also /// at the after-point. Hence it will also avoid conflicts with all /// outputs to the instruction. As such, early defs are appropriate /// for use as "temporary registers" that an instruction can use /// throughout its execution separately from the inputs and outputs. #[inline(always)] pub fn reg_def_at_start(vreg: VReg) -> Self { Operand::new( vreg, OperandConstraint::Reg, OperandKind::Def, OperandPos::Early, ) } /// Create an `Operand` that designates a def (and use) of a /// temporary *within* the instruction. This register is assumed /// to be written by the instruction, and will not conflict with /// any input or output, but should not be used after the /// instruction completes. /// /// Note that within a single instruction, the dedicated scratch /// register (as specified in the `MachineEnv`) is also always /// available for use. The register allocator may use the register /// *between* instructions in order to implement certain sequences /// of moves, but will never hold a value live in the scratch /// register across an instruction. #[inline(always)] pub fn reg_temp(vreg: VReg) -> Self { // For now a temp is equivalent to a def-at-start operand, // which gives the desired semantics but does not enforce the // "not reused later" constraint. Operand::new( vreg, OperandConstraint::Reg, OperandKind::Def, OperandPos::Early, ) } /// Create an `Operand` that designates a def of a vreg that must /// reuse the register assigned to an input to the /// instruction. The input is identified by `idx` (is the `idx`th /// `Operand` for the instruction) and must be constraint to a /// register, i.e., be the result of `Operand::reg_use(vreg)`. #[inline(always)] pub fn reg_reuse_def(vreg: VReg, idx: usize) -> Self { Operand::new( vreg, OperandConstraint::Reuse(idx), OperandKind::Def, OperandPos::Late, ) } /// Create an `Operand` that designates a use of a vreg and /// ensures that it is placed in the given, fixed PReg at the /// use. It is guaranteed that the `Allocation` resulting for this /// operand will be `preg`. #[inline(always)] pub fn reg_fixed_use(vreg: VReg, preg: PReg) -> Self { Operand::new( vreg, OperandConstraint::FixedReg(preg), OperandKind::Use, OperandPos::Early, ) } /// Create an `Operand` that designates a def of a vreg and /// ensures that it is placed in the given, fixed PReg at the /// def. It is guaranteed that the `Allocation` resulting for this /// operand will be `preg`. #[inline(always)] pub fn reg_fixed_def(vreg: VReg, preg: PReg) -> Self { Operand::new( vreg, OperandConstraint::FixedReg(preg), OperandKind::Def, OperandPos::Late, ) } /// Same as `reg_fixed_use` but at `OperandPos::Late`. #[inline(always)] pub fn reg_fixed_use_at_end(vreg: VReg, preg: PReg) -> Self { Operand::new( vreg, OperandConstraint::FixedReg(preg), OperandKind::Use, OperandPos::Late, ) } /// Same as `reg_fixed_def` but at `OperandPos::Early`. #[inline(always)] pub fn reg_fixed_def_at_start(vreg: VReg, preg: PReg) -> Self { Operand::new( vreg, OperandConstraint::FixedReg(preg), OperandKind::Def, OperandPos::Early, ) } /// Create an `Operand` that designates a use of a vreg and places /// no constraints on its location (i.e., it can be allocated into /// either a register or on the stack). #[inline(always)] pub fn any_use(vreg: VReg) -> Self { Operand::new( vreg, OperandConstraint::Any, OperandKind::Use, OperandPos::Early, ) } /// Create an `Operand` that designates a def of a vreg and places /// no constraints on its location (i.e., it can be allocated into /// either a register or on the stack). #[inline(always)] pub fn any_def(vreg: VReg) -> Self { Operand::new( vreg, OperandConstraint::Any, OperandKind::Def, OperandPos::Late, ) } /// Create an `Operand` that always results in an assignment to the /// given fixed `preg`, *without* tracking liveranges in that /// `preg`. Must only be used for non-allocatable registers. #[inline(always)] pub fn fixed_nonallocatable(preg: PReg) -> Self { Operand::new( VReg::new(VReg::MAX, preg.class()), OperandConstraint::FixedReg(preg), OperandKind::Use, OperandPos::Early, ) } /// Get the virtual register designated by an operand. Every /// operand must name some virtual register, even if it constrains /// the operand to a fixed physical register as well; the vregs /// are used to track dataflow. #[inline(always)] pub fn vreg(self) -> VReg { let vreg_idx = ((self.bits as usize) & VReg::MAX) as usize; VReg::new(vreg_idx, self.class()) } /// Get the register class used by this operand. #[inline(always)] pub fn class(self) -> RegClass { let class_field = (self.bits >> 21) & 3; match class_field { 0 => RegClass::Int, 1 => RegClass::Float, 2 => RegClass::Vector, _ => unreachable!(), } } /// Get the "kind" of this operand: a definition (write) or a use /// (read). #[inline(always)] pub fn kind(self) -> OperandKind { let kind_field = (self.bits >> 24) & 1; match kind_field { 0 => OperandKind::Def, 1 => OperandKind::Use, _ => unreachable!(), } } /// Get the "position" of this operand, i.e., where its read /// and/or write occurs: either before the instruction executes, /// or after it does. Ordinarily, uses occur at "before" and defs /// at "after", though there are cases where this is not true. #[inline(always)] pub fn pos(self) -> OperandPos { let pos_field = (self.bits >> 23) & 1; match pos_field { 0 => OperandPos::Early, 1 => OperandPos::Late, _ => unreachable!(), } } /// Get the "constraint" of this operand, i.e., what requirements /// its allocation must fulfill. #[inline(always)] pub fn constraint(self) -> OperandConstraint { let constraint_field = ((self.bits >> 25) as usize) & 127; if constraint_field & 0b1000000 != 0 { OperandConstraint::FixedReg(PReg::new(constraint_field & 0b0111111, self.class())) } else if constraint_field & 0b0100000 != 0 { OperandConstraint::Reuse(constraint_field & 0b0011111) } else { match constraint_field { 0 => OperandConstraint::Any, 1 => OperandConstraint::Reg, _ => unreachable!(), } } } /// If this operand is for a fixed non-allocatable register (see /// [`Operand::fixed`]), then returns the physical register that it will /// be assigned to. #[inline(always)] pub fn as_fixed_nonallocatable(self) -> Option { match self.constraint() { OperandConstraint::FixedReg(preg) if self.vreg().vreg() == VReg::MAX => Some(preg), _ => None, } } /// Get the raw 32-bit encoding of this operand's fields. #[inline(always)] pub fn bits(self) -> u32 { self.bits } /// Construct an `Operand` from the raw 32-bit encoding returned /// from `bits()`. #[inline(always)] pub fn from_bits(bits: u32) -> Self { debug_assert!(bits >> 29 <= 4); Operand { bits } } } impl core::fmt::Debug for Operand { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { core::fmt::Display::fmt(self, f) } } impl core::fmt::Display for Operand { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { if let Some(preg) = self.as_fixed_nonallocatable() { return write!(f, "Fixed: {preg}"); } match (self.kind(), self.pos()) { (OperandKind::Def, OperandPos::Late) | (OperandKind::Use, OperandPos::Early) => { write!(f, "{:?}", self.kind())?; } _ => { write!(f, "{:?}@{:?}", self.kind(), self.pos())?; } } write!( f, ": {}{} {}", self.vreg(), match self.class() { RegClass::Int => "i", RegClass::Float => "f", RegClass::Vector => "v", }, self.constraint() ) } } /// An Allocation represents the end result of regalloc for an /// Operand. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct Allocation { /// Bit-pack in 32 bits. /// /// kind:3 unused:1 index:28 bits: u32, } impl core::fmt::Debug for Allocation { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { core::fmt::Display::fmt(self, f) } } impl core::fmt::Display for Allocation { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { match self.kind() { AllocationKind::None => write!(f, "none"), AllocationKind::Reg => write!(f, "{}", self.as_reg().unwrap()), AllocationKind::Stack => write!(f, "{}", self.as_stack().unwrap()), } } } impl Allocation { /// Construct a new Allocation. #[inline(always)] pub(crate) fn new(kind: AllocationKind, index: usize) -> Self { debug_assert!(index < (1 << 28)); Self { bits: ((kind as u8 as u32) << 29) | (index as u32), } } /// Get the "none" allocation, which is distinct from the other /// possibilities and is used to initialize data structures. #[inline(always)] pub fn none() -> Allocation { Allocation::new(AllocationKind::None, 0) } /// Create an allocation into a register. #[inline(always)] pub fn reg(preg: PReg) -> Allocation { Allocation::new(AllocationKind::Reg, preg.index()) } /// Create an allocation into a spillslot. #[inline(always)] pub fn stack(slot: SpillSlot) -> Allocation { Allocation::new(AllocationKind::Stack, slot.bits as usize) } /// Get the allocation's "kind": none, register, or stack (spillslot). #[inline(always)] pub fn kind(self) -> AllocationKind { match (self.bits >> 29) & 7 { 0 => AllocationKind::None, 1 => AllocationKind::Reg, 2 => AllocationKind::Stack, _ => unreachable!(), } } /// Is the allocation "none"? #[inline(always)] pub fn is_none(self) -> bool { self.kind() == AllocationKind::None } /// Is the allocation not "none"? #[inline(always)] pub fn is_some(self) -> bool { self.kind() != AllocationKind::None } /// Is the allocation a register? #[inline(always)] pub fn is_reg(self) -> bool { self.kind() == AllocationKind::Reg } /// Is the allocation on the stack (a spillslot)? #[inline(always)] pub fn is_stack(self) -> bool { self.kind() == AllocationKind::Stack } /// Get the index of the spillslot or register. If register, this /// is an index that can be used by `PReg::from_index()`. #[inline(always)] pub fn index(self) -> usize { (self.bits & ((1 << 28) - 1)) as usize } /// Get the allocation as a physical register, if any. #[inline(always)] pub fn as_reg(self) -> Option { if self.kind() == AllocationKind::Reg { Some(PReg::from_index(self.index())) } else { None } } /// Get the allocation as a spillslot, if any. #[inline(always)] pub fn as_stack(self) -> Option { if self.kind() == AllocationKind::Stack { Some(SpillSlot { bits: self.index() as u32, }) } else { None } } /// Get the raw bits for the packed encoding of this allocation. #[inline(always)] pub fn bits(self) -> u32 { self.bits } /// Construct an allocation from its packed encoding. #[inline(always)] pub fn from_bits(bits: u32) -> Self { debug_assert!(bits >> 29 >= 5); Self { bits } } } /// An allocation is one of two "kinds" (or "none"): register or /// spillslot/stack. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(u8)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum AllocationKind { None = 0, Reg = 1, Stack = 2, } /// A trait defined by the regalloc client to provide access to its /// machine-instruction / CFG representation. /// /// (This trait's design is inspired by, and derives heavily from, the /// trait of the same name in regalloc.rs.) pub trait Function { // ------------- // CFG traversal // ------------- /// How many instructions are there? fn num_insts(&self) -> usize; /// How many blocks are there? fn num_blocks(&self) -> usize; /// Get the index of the entry block. fn entry_block(&self) -> Block; /// Provide the range of instruction indices contained in each block. fn block_insns(&self, block: Block) -> InstRange; /// Get CFG successors for a given block. fn block_succs(&self, block: Block) -> &[Block]; /// Get the CFG predecessors for a given block. fn block_preds(&self, block: Block) -> &[Block]; /// Get the block parameters for a given block. fn block_params(&self, block: Block) -> &[VReg]; /// Determine whether an instruction is a return instruction. fn is_ret(&self, insn: Inst) -> bool; /// Determine whether an instruction is the end-of-block /// branch. fn is_branch(&self, insn: Inst) -> bool; /// If `insn` is a branch at the end of `block`, returns the /// outgoing blockparam arguments for the given successor. The /// number of arguments must match the number incoming blockparams /// for each respective successor block. fn branch_blockparams(&self, block: Block, insn: Inst, succ_idx: usize) -> &[VReg]; // -------------------------- // Instruction register slots // -------------------------- /// Get the Operands for an instruction. fn inst_operands(&self, insn: Inst) -> &[Operand]; /// Get the clobbers for an instruction; these are the registers /// that, after the instruction has executed, hold values that are /// arbitrary, separately from the usual outputs to the /// instruction. It is invalid to read a register that has been /// clobbered; the register allocator is free to assume that /// clobbered registers are filled with garbage and available for /// reuse. It will avoid storing any value in a clobbered register /// that must be live across the instruction. /// /// Another way of seeing this is that a clobber is equivalent to /// a "late def" of a fresh vreg that is not used anywhere else /// in the program, with a fixed-register constraint that places /// it in a given PReg chosen by the client prior to regalloc. /// /// Every register written by an instruction must either /// correspond to (be assigned to) an Operand of kind `Def`, or /// else must be a "clobber". /// /// This can be used to, for example, describe ABI-specified /// registers that are not preserved by a call instruction, or /// fixed physical registers written by an instruction but not /// used as a vreg output, or fixed physical registers used as /// temps within an instruction out of necessity. /// /// Note that it is legal for a register to be both a clobber and /// an actual def (via pinned vreg or via operand constrained to /// the reg). This is for convenience: e.g., a call instruction /// might have a constant clobber set determined by the ABI, but /// some of those clobbered registers are sometimes return /// value(s). fn inst_clobbers(&self, insn: Inst) -> PRegSet; /// Get the number of `VReg` in use in this function. fn num_vregs(&self) -> usize; /// Get the VRegs for which we should generate value-location /// metadata for debugging purposes. This can be used to generate /// e.g. DWARF with valid prgram-point ranges for each value /// expression in a way that is more efficient than a post-hoc /// analysis of the allocator's output. /// /// Each tuple is (vreg, inclusive_start, exclusive_end, /// label). In the `Output` there will be (label, inclusive_start, /// exclusive_end, alloc)` tuples. The ranges may not exactly /// match -- specifically, the returned metadata may cover only a /// subset of the requested ranges -- if the value is not live for /// the entire requested ranges. /// /// The instruction indices imply a program point just *before* /// the instruction. /// /// Precondition: we require this slice to be sorted by vreg. fn debug_value_labels(&self) -> &[(VReg, Inst, Inst, u32)] { &[] } // -------------- // Spills/reloads // -------------- /// How many logical spill slots does the given regclass require? E.g., on /// a 64-bit machine, spill slots may nominally be 64-bit words, but a /// 128-bit vector value will require two slots. The regalloc will always /// align on this size. /// /// (This trait method's design and doc text derives from /// regalloc.rs' trait of the same name.) fn spillslot_size(&self, regclass: RegClass) -> usize; /// When providing a spillslot number for a multi-slot spillslot, /// do we provide the first or the last? This is usually related /// to which direction the stack grows and different clients may /// have different preferences. fn multi_spillslot_named_by_last_slot(&self) -> bool { false } // ----------- // Misc config // ----------- /// Allow a single instruction to define a vreg multiple times. If /// allowed, the semantics are as if the definition occurs only /// once, and all defs will get the same alloc. This flexibility is /// meant to allow the embedder to more easily aggregate operands /// together in macro/pseudoinstructions, or e.g. add additional /// clobbered vregs without taking care to deduplicate. This may be /// particularly useful when referring to physical registers via /// pinned vregs. It is optional functionality because a strict mode /// (at most one def per vreg) is also useful for finding bugs in /// other applications. fn allow_multiple_vreg_defs(&self) -> bool { false } } /// A position before or after an instruction at which we can make an /// edit. /// /// Note that this differs from `OperandPos` in that the former /// describes specifically a constraint on an operand, while this /// describes a program point. `OperandPos` could grow more options in /// the future, for example if we decide that an "early write" or /// "late read" phase makes sense, while `InstPosition` will always /// describe these two insertion points. #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(u8)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum InstPosition { Before = 0, After = 1, } /// A program point: a single point before or after a given instruction. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct ProgPoint { bits: u32, } impl core::fmt::Debug for ProgPoint { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!( f, "progpoint{}{}", self.inst().index(), match self.pos() { InstPosition::Before => "-pre", InstPosition::After => "-post", } ) } } impl ProgPoint { /// Create a new ProgPoint before or after the given instruction. #[inline(always)] pub fn new(inst: Inst, pos: InstPosition) -> Self { let bits = ((inst.0 as u32) << 1) | (pos as u8 as u32); Self { bits } } /// Create a new ProgPoint before the given instruction. #[inline(always)] pub fn before(inst: Inst) -> Self { Self::new(inst, InstPosition::Before) } /// Create a new ProgPoint after the given instruction. #[inline(always)] pub fn after(inst: Inst) -> Self { Self::new(inst, InstPosition::After) } /// Get the instruction that this ProgPoint is before or after. #[inline(always)] pub fn inst(self) -> Inst { // Cast to i32 to do an arithmetic right-shift, which will // preserve an `Inst::invalid()` (which is -1, or all-ones). Inst::new(((self.bits as i32) >> 1) as usize) } /// Get the "position" (Before or After) relative to the /// instruction. #[inline(always)] pub fn pos(self) -> InstPosition { match self.bits & 1 { 0 => InstPosition::Before, 1 => InstPosition::After, _ => unreachable!(), } } /// Get the "next" program point: for After, this is the Before of /// the next instruction, while for Before, this is After of the /// same instruction. #[inline(always)] pub fn next(self) -> ProgPoint { Self { bits: self.bits + 1, } } /// Get the "previous" program point, the inverse of `.next()` /// above. #[inline(always)] pub fn prev(self) -> ProgPoint { Self { bits: self.bits - 1, } } /// Convert to a raw encoding in 32 bits. #[inline(always)] pub fn to_index(self) -> u32 { self.bits } /// Construct from the raw 32-bit encoding. #[inline(always)] pub fn from_index(index: u32) -> Self { Self { bits: index } } } /// An instruction to insert into the program to perform some data movement. #[derive(Clone, Debug)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum Edit { /// Move one allocation to another. Each allocation may be a /// register or a stack slot (spillslot). However, stack-to-stack /// moves will never be generated. /// /// `Move` edits will be generated even if src and dst allocation /// are the same if the vreg changes; this allows proper metadata /// tracking even when moves are elided. Move { from: Allocation, to: Allocation }, } /// Wrapper around either an original instruction or an inserted edit. #[derive(Clone, Debug)] pub enum InstOrEdit<'a> { Inst(Inst), Edit(&'a Edit), } /// Iterator over the instructions and edits in a block. pub struct OutputIter<'a> { /// List of edits starting at the first for the current block. edits: &'a [(ProgPoint, Edit)], /// Remaining instructions in the current block. inst_range: InstRange, } impl<'a> Iterator for OutputIter<'a> { type Item = InstOrEdit<'a>; fn next(&mut self) -> Option> { // There can't be any edits after the last instruction in a block, so // we don't need to worry about that case. if self.inst_range.len() == 0 { return None; } // Return any edits that happen before the next instruction first. let next_inst = self.inst_range.first(); if let Some((edit, remaining_edits)) = self.edits.split_first() { if edit.0 <= ProgPoint::before(next_inst) { self.edits = remaining_edits; return Some(InstOrEdit::Edit(&edit.1)); } } self.inst_range = self.inst_range.rest(); Some(InstOrEdit::Inst(next_inst)) } } /// A machine environment tells the register allocator which registers /// are available to allocate and what register may be used as a /// scratch register for each class, and some other miscellaneous info /// as well. #[derive(Clone, Debug)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct MachineEnv { /// Preferred physical registers for each class. These are the /// registers that will be allocated first, if free. /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. pub preferred_regs_by_class: [Vec; 3], /// Non-preferred physical registers for each class. These are the /// registers that will be allocated if a preferred register is /// not available; using one of these is considered suboptimal, /// but still better than spilling. /// /// If an explicit scratch register is provided in `scratch_by_class` then /// it must not appear in this list. pub non_preferred_regs_by_class: [Vec; 3], /// Optional dedicated scratch register per class. This is needed to perform /// moves between registers when cyclic move patterns occur. The /// register should not be placed in either the preferred or /// non-preferred list (i.e., it is not otherwise allocatable). /// /// Note that the register allocator will freely use this register /// between instructions, but *within* the machine code generated /// by a single (regalloc-level) instruction, the client is free /// to use the scratch register. E.g., if one "instruction" causes /// the emission of two machine-code instructions, this lowering /// can use the scratch register between them. /// /// If a scratch register is not provided then the register allocator will /// automatically allocate one as needed, spilling a value to the stack if /// necessary. pub scratch_by_class: [Option; 3], /// Some `PReg`s can be designated as locations on the stack rather than /// actual registers. These can be used to tell the register allocator about /// pre-defined stack slots used for function arguments and return values. /// /// `PReg`s in this list cannot be used as an allocatable or scratch /// register. pub fixed_stack_slots: Vec, } /// The output of the register allocator. #[derive(Clone, Debug)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct Output { /// How many spillslots are needed in the frame? pub num_spillslots: usize, /// Edits (insertions or removals). Guaranteed to be sorted by /// program point. pub edits: Vec<(ProgPoint, Edit)>, /// Allocations for each operand. Mapping from instruction to /// allocations provided by `inst_alloc_offsets` below. pub allocs: Vec, /// Allocation offset in `allocs` for each instruction. pub inst_alloc_offsets: Vec, /// Debug info: a labeled value (as applied to vregs by /// `Function::debug_value_labels()` on the input side) is located /// in the given allocation from the first program point /// (inclusive) to the second (exclusive). Guaranteed to be sorted /// by label and program point, and the ranges are guaranteed to /// be disjoint. pub debug_locations: Vec<(u32, ProgPoint, ProgPoint, Allocation)>, /// Internal stats from the allocator. pub stats: ion::Stats, } impl Output { /// Get the allocations assigned to a given instruction. pub fn inst_allocs(&self, inst: Inst) -> &[Allocation] { let start = self.inst_alloc_offsets[inst.index()] as usize; let end = if inst.index() + 1 == self.inst_alloc_offsets.len() { self.allocs.len() } else { self.inst_alloc_offsets[inst.index() + 1] as usize }; &self.allocs[start..end] } /// Returns an iterator over the instructions and edits in a block, in /// order. pub fn block_insts_and_edits(&self, func: &impl Function, block: Block) -> OutputIter<'_> { let inst_range = func.block_insns(block); let edit_idx = self .edits .binary_search_by(|&(pos, _)| { // This predicate effectively searches for a point *just* before // the first ProgPoint. This never returns Ordering::Equal, but // binary_search_by returns the index of where it would have // been inserted in Err. if pos < ProgPoint::before(inst_range.first()) { core::cmp::Ordering::Less } else { core::cmp::Ordering::Greater } }) .unwrap_err(); let edits = &self.edits[edit_idx..]; OutputIter { inst_range, edits } } } /// An error that prevents allocation. #[derive(Clone, Debug)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum RegAllocError { /// Critical edge is not split between given blocks. CritEdge(Block, Block), /// Invalid SSA for given vreg at given inst: multiple defs or /// illegal use. `inst` may be `Inst::invalid()` if this concerns /// a block param. SSA(VReg, Inst), /// Invalid basic block: does not end in branch/ret, or contains a /// branch/ret in the middle. BB(Block), /// Invalid branch: operand count does not match sum of block /// params of successor blocks. Branch(Inst), /// A VReg is live-in on entry; this is not allowed. EntryLivein, /// A branch has non-blockparam arg(s) and at least one of the /// successor blocks has more than one predecessor, forcing /// edge-moves before this branch. This is disallowed because it /// places a use after the edge moves occur; insert an edge block /// to avoid the situation. DisallowedBranchArg(Inst), /// Too many pinned VRegs + Reg-constrained Operands are live at /// once, making allocation impossible. TooManyLiveRegs, } impl core::fmt::Display for RegAllocError { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!(f, "{:?}", self) } } #[cfg(feature = "std")] impl std::error::Error for RegAllocError {} /// Run the allocator. pub fn run( func: &F, env: &MachineEnv, options: &RegallocOptions, ) -> Result { ion::run(func, env, options.verbose_log, options.validate_ssa) } /// Options for allocation. #[derive(Clone, Copy, Debug, Default)] pub struct RegallocOptions { /// Add extra verbosity to debug logs. pub verbose_log: bool, /// Run the SSA validator before allocating registers. pub validate_ssa: bool, } regalloc2-0.10.2/src/moves.rs000066400000000000000000000446431467034227200157540ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ use crate::{ion::data_structures::u64_key, Allocation, PReg}; use core::fmt::Debug; use smallvec::{smallvec, SmallVec}; /// A list of moves to be performed in sequence, with auxiliary data /// attached to each. pub type MoveVec = SmallVec<[(Allocation, Allocation, T); 16]>; /// A list of moves to be performance in sequence, like a /// `MoveVec`, except that an unchosen scratch space may occur as /// well, represented by `Allocation::none()`. #[derive(Clone, Debug)] pub enum MoveVecWithScratch { /// No scratch was actually used. NoScratch(MoveVec), /// A scratch space was used. Scratch(MoveVec), } /// A `ParallelMoves` represents a list of alloc-to-alloc moves that /// must happen in parallel -- i.e., all reads of sources semantically /// happen before all writes of destinations, and destinations are /// allowed to overwrite sources. It can compute a list of sequential /// moves that will produce the equivalent data movement, possibly /// using a scratch register if one is necessary. pub struct ParallelMoves { parallel_moves: MoveVec, } impl ParallelMoves { pub fn new() -> Self { Self { parallel_moves: smallvec![], } } pub fn add(&mut self, from: Allocation, to: Allocation, t: T) { self.parallel_moves.push((from, to, t)); } fn sources_overlap_dests(&self) -> bool { // Assumes `parallel_moves` has already been sorted by `dst` // in `resolve()` below. The O(n log n) cost of this loop is no // worse than the sort we already did. for &(src, _, _) in &self.parallel_moves { if self .parallel_moves .binary_search_by_key(&src, |&(_, dst, _)| dst) .is_ok() { return true; } } false } /// Resolve the parallel-moves problem to a sequence of separate /// moves, such that the combined effect of the sequential moves /// is as-if all of the moves added to this `ParallelMoves` /// resolver happened in parallel. /// /// Sometimes, if there is a cycle, a scratch register is /// necessary to allow the moves to occur sequentially. In this /// case, `Allocation::none()` is returned to represent the /// scratch register. The caller may choose to always hold a /// separate scratch register unused to allow this to be trivially /// rewritten; or may dynamically search for or create a free /// register as needed, if none are available. pub fn resolve(mut self) -> MoveVecWithScratch { // Easy case: zero or one move. Just return our vec. if self.parallel_moves.len() <= 1 { return MoveVecWithScratch::NoScratch(self.parallel_moves); } // Sort moves so that we can efficiently test for presence. // For that purpose it doesn't matter whether we sort by // source or destination, but later we'll want them sorted // by destination. self.parallel_moves .sort_by_key(|&(src, dst, _)| u64_key(dst.bits(), src.bits())); // Duplicate moves cannot change the semantics of this // parallel move set, so remove them. This is cheap since we // just sorted the list. self.parallel_moves.dedup(); // General case: some moves overwrite dests that other moves // read as sources. We'll use a general algorithm. // // *Important property*: because we expect that each register // has only one writer (otherwise the effect of the parallel // move is undefined), each move can only block one other move // (with its one source corresponding to the one writer of // that source). Thus, we *can only have simple cycles* (those // that are a ring of nodes, i.e., with only one path from a // node back to itself); there are no SCCs that are more // complex than that. We leverage this fact below to avoid // having to do a full Tarjan SCC DFS (with lowest-index // computation, etc.): instead, as soon as we find a cycle, we // know we have the full cycle and we can do a cyclic move // sequence and continue. // Check that each destination has only one writer. if cfg!(debug_assertions) { let mut last_dst = None; for &(_, dst, _) in &self.parallel_moves { if last_dst.is_some() { debug_assert!(last_dst.unwrap() != dst); } last_dst = Some(dst); } } // Moving an allocation into itself is technically a cycle but // should have no effect, as long as there are no other writes // into that destination. self.parallel_moves.retain(|&mut (src, dst, _)| src != dst); // Do any dests overlap sources? If not, we can also just // return the list. if !self.sources_overlap_dests() { return MoveVecWithScratch::NoScratch(self.parallel_moves); } // Construct a mapping from move indices to moves they must // come before. Any given move must come before a move that // overwrites its destination; we have moves sorted by dest // above so we can efficiently find such a move, if any. const NONE: usize = usize::MAX; let must_come_before: SmallVec<[usize; 16]> = self .parallel_moves .iter() .map(|&(src, _, _)| { self.parallel_moves .binary_search_by_key(&src, |&(_, dst, _)| dst) .unwrap_or(NONE) }) .collect(); // Do a simple stack-based DFS and emit moves in postorder, // then reverse at the end for RPO. Unlike Tarjan's SCC // algorithm, we can emit a cycle as soon as we find one, as // noted above. #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum State { /// Not on stack, not visited ToDo, /// On stack, not yet visited Pending, /// Visited Done, } let mut ret: MoveVec = smallvec![]; let mut stack: SmallVec<[usize; 16]> = smallvec![]; let mut state: SmallVec<[State; 16]> = smallvec![State::ToDo; self.parallel_moves.len()]; let mut scratch_used = false; while let Some(next) = state.iter().position(|&state| state == State::ToDo) { stack.push(next); state[next] = State::Pending; while let Some(&top) = stack.last() { debug_assert_eq!(state[top], State::Pending); let next = must_come_before[top]; if next == NONE || state[next] == State::Done { ret.push(self.parallel_moves[top]); state[top] = State::Done; stack.pop(); while let Some(top) = stack.pop() { ret.push(self.parallel_moves[top]); state[top] = State::Done; } } else if state[next] == State::ToDo { stack.push(next); state[next] = State::Pending; } else { // Found a cycle -- emit a cyclic-move sequence // for the cycle on the top of stack, then normal // moves below it. Recall that these moves will be // reversed in sequence, so from the original // parallel move set // // { B := A, C := B, A := B } // // we will generate something like: // // A := scratch // B := A // C := B // scratch := C // // which will become: // // scratch := C // C := B // B := A // A := scratch debug_assert_ne!(top, next); state[top] = State::Done; stack.pop(); let (scratch_src, dst, dst_t) = self.parallel_moves[top]; scratch_used = true; ret.push((Allocation::none(), dst, dst_t)); while let Some(move_idx) = stack.pop() { state[move_idx] = State::Done; ret.push(self.parallel_moves[move_idx]); if move_idx == next { break; } } ret.push((scratch_src, Allocation::none(), T::default())); } } } ret.reverse(); if scratch_used { MoveVecWithScratch::Scratch(ret) } else { MoveVecWithScratch::NoScratch(ret) } } } impl MoveVecWithScratch { /// Fills in the scratch space, if needed, with the given /// register/allocation and returns a final list of moves. The /// scratch register must not occur anywhere in the parallel-move /// problem given to the resolver that produced this /// `MoveVecWithScratch`. pub fn with_scratch(self, scratch: Allocation) -> MoveVec { match self { MoveVecWithScratch::NoScratch(moves) => moves, MoveVecWithScratch::Scratch(mut moves) => { for (src, dst, _) in &mut moves { debug_assert!( *src != scratch && *dst != scratch, "Scratch register should not also be an actual source or dest of moves" ); debug_assert!( !(src.is_none() && dst.is_none()), "Move resolution should not have produced a scratch-to-scratch move" ); if src.is_none() { *src = scratch; } if dst.is_none() { *dst = scratch; } } moves } } } /// Unwrap without a scratch register. pub fn without_scratch(self) -> Option> { match self { MoveVecWithScratch::NoScratch(moves) => Some(moves), MoveVecWithScratch::Scratch(..) => None, } } /// Do we need a scratch register? pub fn needs_scratch(&self) -> bool { match self { MoveVecWithScratch::NoScratch(..) => false, MoveVecWithScratch::Scratch(..) => true, } } } /// Final stage of move resolution: finding or using scratch /// registers, creating them if necessary by using stackslots, and /// ensuring that the final list of moves contains no stack-to-stack /// moves. /// /// The resolved list of moves may need one or two scratch registers, /// and maybe a stackslot, to ensure these conditions. Our general /// strategy is in two steps. /// /// First, we find a scratch register, so we only have to worry about /// a list of moves, all with real locations as src and dest. If we're /// lucky and there are any registers not allocated at this /// program-point, we can use a real register. Otherwise, we use an /// extra stackslot. This is fine, because at this step, /// stack-to-stack moves are OK. /// /// Then, we resolve stack-to-stack moves into stack-to-reg / /// reg-to-stack pairs. For this, we try to allocate a second free /// register. If unavailable, we create a new scratch stackslot to /// serve as a backup of one of the in-use registers, then borrow that /// register as the scratch register in the middle of stack-to-stack /// moves. pub struct MoveAndScratchResolver where GetReg: FnMut() -> Option, GetStackSlot: FnMut() -> Allocation, IsStackAlloc: Fn(Allocation) -> bool, { /// Closure that finds us a PReg at the current location. pub find_free_reg: GetReg, /// Closure that gets us a stackslot, if needed. pub get_stackslot: GetStackSlot, /// Closure to determine whether an `Allocation` refers to a stack slot. pub is_stack_alloc: IsStackAlloc, /// Use this register if no free register is available to use as a /// temporary in stack-to-stack moves. If we do use this register /// for that purpose, its value will be restored by the end of the /// move sequence. Provided by caller and statically chosen. This is /// a very last-ditch option, so static choice is OK. pub borrowed_scratch_reg: PReg, } impl MoveAndScratchResolver where GetReg: FnMut() -> Option, GetStackSlot: FnMut() -> Allocation, IsStackAlloc: Fn(Allocation) -> bool, { pub fn compute( mut self, moves: MoveVecWithScratch, ) -> MoveVec { let moves = if moves.needs_scratch() { // Now, find a scratch allocation in order to resolve cycles. let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)()); trace!("scratch resolver: scratch alloc {:?}", scratch); moves.with_scratch(scratch) } else { moves.without_scratch().unwrap() }; // Do we have any stack-to-stack moves? Fast return if not. let stack_to_stack = moves .iter() .any(|&(src, dst, _)| self.is_stack_to_stack_move(src, dst)); if !stack_to_stack { return moves; } // Allocate a scratch register for stack-to-stack move expansion. let (scratch_reg, save_slot) = if let Some(reg) = (self.find_free_reg)() { trace!( "scratch resolver: have free stack-to-stack scratch preg: {:?}", reg ); (reg, None) } else { let reg = Allocation::reg(self.borrowed_scratch_reg); // Stackslot into which we need to save the stack-to-stack // scratch reg before doing any stack-to-stack moves, if we stole // the reg. let save = (self.get_stackslot)(); trace!( "scratch resolver: stack-to-stack borrowing {:?} with save stackslot {:?}", reg, save ); (reg, Some(save)) }; // Mutually exclusive flags for whether either scratch_reg or // save_slot need to be restored from the other. Initially, // scratch_reg has a value we should preserve and save_slot // has garbage. let mut scratch_dirty = false; let mut save_dirty = true; let mut result = smallvec![]; for &(src, dst, data) in &moves { // Do we have a stack-to-stack move? If so, resolve. if self.is_stack_to_stack_move(src, dst) { trace!("scratch resolver: stack to stack: {:?} -> {:?}", src, dst); // If the selected scratch register is stolen from the // set of in-use registers, then we need to save the // current contents of the scratch register before using // it as a temporary. if let Some(save_slot) = save_slot { // However we may have already done so for an earlier // stack-to-stack move in which case we don't need // to do it again. if save_dirty { debug_assert!(!scratch_dirty); result.push((scratch_reg, save_slot, T::default())); save_dirty = false; } } // We can't move directly from one stack slot to another // on any architecture we care about, so stack-to-stack // moves must go via a scratch register. result.push((src, scratch_reg, data)); result.push((scratch_reg, dst, data)); scratch_dirty = true; } else { // This is not a stack-to-stack move, but we need to // make sure that the scratch register is in the correct // state if this move interacts with that register. if src == scratch_reg && scratch_dirty { // We're copying from the scratch register so if // it was stolen for a stack-to-stack move then we // need to make sure it has the correct contents, // not whatever was temporarily copied into it. If // we got scratch_reg from find_free_reg then it // had better not have been used as the source of // a move. So if we're here it's because we fell // back to the caller-provided last-resort scratch // register, and we must therefore have a save-slot // allocated too. debug_assert!(!save_dirty); let save_slot = save_slot.expect("move source should not be a free register"); result.push((save_slot, scratch_reg, T::default())); scratch_dirty = false; } if dst == scratch_reg { // We are writing something to the scratch register // so it doesn't matter what was there before. We // can avoid restoring it, but we will need to save // it again before the next stack-to-stack move. scratch_dirty = false; save_dirty = true; } result.push((src, dst, data)); } } // Now that all the stack-to-stack moves are done, restore the // scratch register if necessary. if let Some(save_slot) = save_slot { if scratch_dirty { debug_assert!(!save_dirty); result.push((save_slot, scratch_reg, T::default())); } } trace!("scratch resolver: got {:?}", result); result } fn is_stack_to_stack_move(&self, src: Allocation, dst: Allocation) -> bool { (self.is_stack_alloc)(src) && (self.is_stack_alloc)(dst) } } regalloc2-0.10.2/src/postorder.rs000066400000000000000000000026741467034227200166420ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ //! Fast postorder computation. use crate::Block; use alloc::vec; use alloc::vec::Vec; use smallvec::{smallvec, SmallVec}; pub fn calculate<'a, SuccFn: Fn(Block) -> &'a [Block]>( num_blocks: usize, entry: Block, succ_blocks: SuccFn, ) -> Vec { let mut ret = vec![]; // State: visited-block map, and explicit DFS stack. let mut visited = vec![false; num_blocks]; struct State<'a> { block: Block, succs: &'a [Block], next_succ: usize, } let mut stack: SmallVec<[State; 64]> = smallvec![]; visited[entry.index()] = true; stack.push(State { block: entry, succs: succ_blocks(entry), next_succ: 0, }); while let Some(ref mut state) = stack.last_mut() { // Perform one action: push to new succ, skip an already-visited succ, or pop. if state.next_succ < state.succs.len() { let succ = state.succs[state.next_succ]; state.next_succ += 1; if !visited[succ.index()] { visited[succ.index()] = true; stack.push(State { block: succ, succs: succ_blocks(succ), next_succ: 0, }); } } else { ret.push(state.block); stack.pop(); } } ret } regalloc2-0.10.2/src/serialize.rs000066400000000000000000000224011467034227200165760ustar00rootroot00000000000000use core::fmt; use alloc::{format, string::ToString, vec::Vec}; use serde::{Deserialize, Serialize}; use crate::{Block, Function, Inst, InstRange, MachineEnv, Operand, PRegSet, RegClass, VReg}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] enum InstOpcode { Op, Ret, Branch, } impl fmt::Display for InstOpcode { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { InstOpcode::Op => f.write_str("op"), InstOpcode::Ret => f.write_str("ret"), InstOpcode::Branch => f.write_str("branch"), } } } #[derive(Clone, Debug, Serialize, Deserialize)] struct InstData { op: InstOpcode, operands: Vec, clobbers: PRegSet, } /// A wrapper around a `Function` and `MachineEnv` that can be serialized and /// deserialized. /// /// The serialized form of this structure is not stable: it is intended to be /// deserialized with the exact same version of regalloc2 as the one that it /// was created with. #[derive(Serialize, Deserialize)] pub struct SerializableFunction { machine_env: MachineEnv, entry_block: Block, insts: Vec, blocks: Vec, block_preds: Vec>, block_succs: Vec>, block_params_in: Vec>, block_params_out: Vec>>, num_vregs: usize, debug_value_labels: Vec<(VReg, Inst, Inst, u32)>, spillslot_size: Vec, multi_spillslot_named_by_last_slot: bool, allow_multiple_vreg_defs: bool, } impl SerializableFunction { /// Creates a new `SerializableFunction` from an arbitrary `Function` and /// `MachineEnv`. pub fn new(func: &impl Function, machine_env: MachineEnv) -> Self { Self { machine_env, entry_block: func.entry_block(), insts: (0..func.num_insts()) .map(|i| { let inst = Inst::new(i); let op = if func.is_ret(inst) { InstOpcode::Ret } else if func.is_branch(inst) { InstOpcode::Branch } else { InstOpcode::Op }; InstData { op, operands: func.inst_operands(inst).to_vec(), clobbers: func.inst_clobbers(inst), } }) .collect(), blocks: (0..func.num_blocks()) .map(|i| { let block = Block::new(i); func.block_insns(block) }) .collect(), block_preds: (0..func.num_blocks()) .map(|i| { let block = Block::new(i); func.block_preds(block).to_vec() }) .collect(), block_succs: (0..func.num_blocks()) .map(|i| { let block = Block::new(i); func.block_succs(block).to_vec() }) .collect(), block_params_in: (0..func.num_blocks()) .map(|i| { let block = Block::new(i); func.block_params(block).to_vec() }) .collect(), block_params_out: (0..func.num_blocks()) .map(|i| { let block = Block::new(i); let inst = func.block_insns(block).last(); (0..func.block_succs(block).len()) .map(|succ_idx| func.branch_blockparams(block, inst, succ_idx).to_vec()) .collect() }) .collect(), num_vregs: func.num_vregs(), debug_value_labels: func.debug_value_labels().to_vec(), spillslot_size: [ func.spillslot_size(RegClass::Int), func.spillslot_size(RegClass::Float), func.spillslot_size(RegClass::Vector), ] .to_vec(), multi_spillslot_named_by_last_slot: func.multi_spillslot_named_by_last_slot(), allow_multiple_vreg_defs: func.allow_multiple_vreg_defs(), } } /// Returns the `MachineEnv` associated with this function. pub fn machine_env(&self) -> &MachineEnv { &self.machine_env } } impl Function for SerializableFunction { fn num_insts(&self) -> usize { self.insts.len() } fn num_blocks(&self) -> usize { self.blocks.len() } fn entry_block(&self) -> Block { self.entry_block } fn block_insns(&self, block: Block) -> InstRange { self.blocks[block.index()] } fn block_succs(&self, block: Block) -> &[Block] { &self.block_succs[block.index()][..] } fn block_preds(&self, block: Block) -> &[Block] { &self.block_preds[block.index()][..] } fn block_params(&self, block: Block) -> &[VReg] { &self.block_params_in[block.index()][..] } fn is_ret(&self, insn: Inst) -> bool { self.insts[insn.index()].op == InstOpcode::Ret } fn is_branch(&self, insn: Inst) -> bool { self.insts[insn.index()].op == InstOpcode::Branch } fn branch_blockparams(&self, block: Block, _: Inst, succ: usize) -> &[VReg] { &self.block_params_out[block.index()][succ][..] } fn inst_operands(&self, insn: Inst) -> &[Operand] { &self.insts[insn.index()].operands[..] } fn inst_clobbers(&self, insn: Inst) -> PRegSet { self.insts[insn.index()].clobbers } fn num_vregs(&self) -> usize { self.num_vregs } fn debug_value_labels(&self) -> &[(VReg, Inst, Inst, u32)] { &self.debug_value_labels[..] } fn spillslot_size(&self, regclass: RegClass) -> usize { self.spillslot_size[regclass as usize] } fn multi_spillslot_named_by_last_slot(&self) -> bool { self.multi_spillslot_named_by_last_slot } fn allow_multiple_vreg_defs(&self) -> bool { self.allow_multiple_vreg_defs } } impl fmt::Debug for SerializableFunction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{{\n")?; write!(f, " machine_env: {:#?}\n", self.machine_env())?; write!( f, " spillslot_size(Int): {}\n", self.spillslot_size(RegClass::Int) )?; write!( f, " spillslot_size(Float): {}\n", self.spillslot_size(RegClass::Float) )?; write!( f, " spillslot_size(Vector): {}\n", self.spillslot_size(RegClass::Vector) )?; write!( f, " multi_spillslot_named_by_last_slot: {}\n", self.multi_spillslot_named_by_last_slot() )?; write!( f, " allow_multiple_vreg_defs: {}\n", self.allow_multiple_vreg_defs() )?; for (i, blockrange) in self.blocks.iter().enumerate() { let succs = self.block_succs[i] .iter() .map(|b| b.index()) .collect::>(); let preds = self.block_preds[i] .iter() .map(|b| b.index()) .collect::>(); let params_in = self.block_params_in[i] .iter() .map(|v| format!("v{}", v.vreg())) .collect::>() .join(", "); let params_out = self.block_params_out[i] .iter() .enumerate() .map(|(succ_idx, vec)| { let succ = self.block_succs[i][succ_idx]; let params = vec .iter() .map(|v| format!("v{}", v.vreg())) .collect::>() .join(", "); format!("block{}({})", succ.index(), params) }) .collect::>() .join(", "); write!( f, " block{i}({params_in}): # succs:{succs:?} preds:{preds:?}\n", )?; for inst in blockrange.iter() { let ops: Vec<_> = self .inst_operands(inst) .iter() .map(|op| op.to_string()) .collect(); let ops = ops.join(", "); let clobbers = if self.inst_clobbers(inst) == PRegSet::empty() { format!("") } else { let clobbers: Vec<_> = self .inst_clobbers(inst) .into_iter() .map(|preg| format!("Clobber: {preg}")) .collect(); format!(", {}", clobbers.join(", ")) }; write!( f, " inst{}: {} {ops}{clobbers}\n", inst.index(), self.insts[inst.index()].op, )?; if let InstOpcode::Branch = self.insts[inst.index()].op { write!(f, " params: {}\n", params_out)?; } } } write!(f, "}}\n")?; Ok(()) } } regalloc2-0.10.2/src/ssa.rs000066400000000000000000000123341467034227200154010ustar00rootroot00000000000000/* * Released under the terms of the Apache 2.0 license with LLVM * exception. See `LICENSE` for details. */ //! SSA-related utilities. use alloc::vec; use hashbrown::HashSet; use crate::cfg::CFGInfo; use crate::{Block, Function, Inst, OperandKind, RegAllocError, VReg}; pub fn validate_ssa(f: &F, cfginfo: &CFGInfo) -> Result<(), RegAllocError> { // For every block param and inst def, check that this is the only def. let mut defined_in = vec![Block::invalid(); f.num_vregs()]; for block in 0..f.num_blocks() { let block = Block::new(block); let mut def = |vreg: VReg, inst| { if defined_in[vreg.vreg()].is_valid() { trace!("Multiple def constraints for {:?}", vreg); Err(RegAllocError::SSA(vreg, inst)) } else { defined_in[vreg.vreg()] = block; Ok(()) } }; for ¶m in f.block_params(block) { def(param, Inst::invalid())?; } for inst in f.block_insns(block).iter() { for operand in f.inst_operands(inst) { if let OperandKind::Def = operand.kind() { def(operand.vreg(), inst)?; } } } } // Walk the blocks in arbitrary order. Check, for every use, that // the def is either in the same block in an earlier inst, or is // defined (by inst or blockparam) in some other block that // dominates this one. let mut local = HashSet::new(); for block in 0..f.num_blocks() { let block = Block::new(block); local.clear(); local.extend(f.block_params(block)); for iix in f.block_insns(block).iter() { let operands = f.inst_operands(iix); for operand in operands { // Fixed registers uses will likely not be SSA, but they also // won't receive assignments. if operand.as_fixed_nonallocatable().is_some() { continue; } match operand.kind() { OperandKind::Use => { let def_block = defined_in[operand.vreg().vreg()]; let okay = def_block.is_valid() && if def_block == block { local.contains(&operand.vreg()) } else { cfginfo.dominates(def_block, block) }; if !okay { trace!("Invalid use {:?}", operand.vreg()); return Err(RegAllocError::SSA(operand.vreg(), iix)); } } OperandKind::Def => { // Check all the uses in this instruction // first, before recording its defs below. } } } // In SSA form, an instruction can't use a VReg that it // also defines. So only record this instruction's defs // after its uses have been checked. for operand in operands { if let OperandKind::Def = operand.kind() { local.insert(operand.vreg()); } } } } // Check that the length of branch args matches the sum of the // number of blockparams in their succs, and that the end of every // block ends in this branch or in a ret, and that there are no // other branches or rets in the middle of the block. for block in 0..f.num_blocks() { let block = Block::new(block); let insns = f.block_insns(block); for insn in insns.iter() { if insn == insns.last() { if !(f.is_branch(insn) || f.is_ret(insn)) { trace!("block {:?} is not terminated by a branch or ret!", block); return Err(RegAllocError::BB(block)); } if f.is_branch(insn) { for (i, &succ) in f.block_succs(block).iter().enumerate() { let blockparams_in = f.block_params(succ); let blockparams_out = f.branch_blockparams(block, insn, i); if blockparams_in.len() != blockparams_out.len() { trace!( "Mismatch on block params, found {} expected {}", blockparams_out.len(), blockparams_in.len() ); return Err(RegAllocError::Branch(insn)); } } } } else { if f.is_branch(insn) || f.is_ret(insn) { trace!("Block terminator found in the middle of a block"); return Err(RegAllocError::BB(block)); } } } } // Check that the entry block has no block args: otherwise it is // undefined what their value would be. if f.block_params(f.entry_block()).len() > 0 { trace!("Entry block contains block args"); return Err(RegAllocError::BB(f.entry_block())); } Ok(()) }