Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
rust source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.9.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
Options
Source code
#![feature(core_intrinsics)] #![feature(pointer_is_aligned)] #![feature(strict_provenance)] pub unsafe fn a(ptr: *const (), align: usize) -> bool { unsafe { std::intrinsics::assume(align.is_power_of_two()); } // ptr.align_offset(align) == 0 align_offset(ptr, align) == 0 } pub unsafe fn a1(ptr: *const ()) -> bool { ptr.align_offset(1) == 0 } pub unsafe fn a2(ptr: *const ()) -> bool { ptr.align_offset(2) == 0 } pub unsafe fn a4(ptr: *const ()) -> bool { ptr.align_offset(4) == 0 } pub unsafe fn a64(ptr: *const ()) -> bool { ptr.align_offset(64) == 0 } unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize { // FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <= // 1, where the method versions of these operations are not inlined. use std::intrinsics::{ cttz_nonzero, exact_div, unchecked_rem, unchecked_shl, unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub, }; /// Calculate multiplicative modular inverse of `x` modulo `m`. /// /// This implementation is tailored for `align_offset` and has following preconditions: /// /// * `m` is a power-of-two; /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead) /// /// Implementation of this function shall not panic. Ever. #[inline] unsafe fn mod_inv(x: usize, m: usize) -> usize { /// Multiplicative modular inverse table modulo 2⁴ = 16. /// /// Note, that this table does not contain values where inverse does not exist (i.e., for /// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.) const INV_TABLE_MOD_16: [u8; 8] = [1, 11, 13, 7, 9, 3, 5, 15]; /// Modulo for which the `INV_TABLE_MOD_16` is intended. const INV_TABLE_MOD: usize = 16; /// INV_TABLE_MOD² const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD; let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1] as usize; // SAFETY: `m` is required to be a power-of-two, hence non-zero. let m_minus_one = unsafe { unchecked_sub(m, 1) }; if m <= INV_TABLE_MOD { table_inverse & m_minus_one } else { // We iterate "up" using the following formula: // // $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$ // // until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`. let mut inverse = table_inverse; let mut going_mod = INV_TABLE_MOD_SQUARED; loop { // y = y * (2 - xy) mod n // // Note, that we use wrapping operations here intentionally – the original formula // uses e.g., subtraction `mod n`. It is entirely fine to do them `mod // usize::MAX` instead, because we take the result `mod n` at the end // anyway. inverse = wrapping_mul(inverse, wrapping_sub(2usize, wrapping_mul(x, inverse))); if going_mod >= m { return inverse & m_minus_one; } going_mod = wrapping_mul(going_mod, going_mod); } } } let addr = p.addr(); let stride = std::mem::size_of::<T>(); // SAFETY: `a` is a power-of-two, therefore non-zero. let a_minus_one = unsafe { unchecked_sub(a, 1) }; if stride == 0 { // SPECIAL_CASE: handle 0-sized types. No matter how many times we step, the address will // stay the same, so no offset will be able to align the pointer unless it is already // aligned. This branch _will_ be optimized out as `stride` is known at compile-time. let p_mod_a = addr & a_minus_one; return if p_mod_a == 0 { 0 } else { usize::MAX }; } // SAFETY: `stride == 0` case has been handled by the special case above. let a_mod_stride = unsafe { unchecked_rem(a, stride) }; if a_mod_stride == 0 { // SPECIAL_CASE: In cases where the `a` is divisible by `stride`, byte offset to align a // pointer can be computed more simply through `-p (mod a)`. In the off-chance the byte // offset is not a multiple of `stride`, the input pointer was misaligned and no pointer // offset will be able to produce a `p` aligned to the specified `a`. // // The naive `-p (mod a)` equation inhibits LLVM's ability to select instructions // like `lea`. We compute `(round_up_to_next_alignment(p, a) - p)` instead. This // redistributes operations around the load-bearing, but pessimizing `and` instruction // sufficiently for LLVM to be able to utilize the various optimizations it knows about. // // LLVM handles the branch here particularly nicely. If this branch needs to be evaluated // at runtime, it will produce a mask `if addr_mod_stride == 0 { 0 } else { usize::MAX }` // in a branch-free way and then bitwise-OR it with whatever result the `-p mod a` // computation produces. // SAFETY: `stride == 0` case has been handled by the special case above. let addr_mod_stride = unsafe { unchecked_rem(addr, stride) }; return if addr_mod_stride == 0 { let aligned_address = wrapping_add(addr, a_minus_one) & wrapping_sub(0, a); let byte_offset = wrapping_sub(aligned_address, addr); // SAFETY: `stride` is non-zero. This is guaranteed to divide exactly as well, because // addr has been verified to be aligned to the original type’s alignment requirements. unsafe { exact_div(byte_offset, stride) } } else { usize::MAX }; } // GENERAL_CASE: From here on we’re handling the very general case where `addr` may be // misaligned, there isn’t an obvious relationship between `stride` and `a` that we can take an // advantage of, etc. This case produces machine code that isn’t particularly high quality, // compared to the special cases above. The code produced here is still within the realm of // miracles, given the situations this case has to deal with. // SAFETY: a is power-of-two hence non-zero. stride == 0 case is handled above. let gcdpow = unsafe { cttz_nonzero(stride).min(cttz_nonzero(a)) }; // SAFETY: gcdpow has an upper-bound that’s at most the number of bits in a usize. let gcd = unsafe { unchecked_shl(1usize, gcdpow) }; // SAFETY: gcd is always greater or equal to 1. if addr & unsafe { unchecked_sub(gcd, 1) } == 0 { // This branch solves for the following linear congruence equation: // // ` p + so = 0 mod a ` // // `p` here is the pointer value, `s` - stride of `T`, `o` offset in `T`s, and `a` - the // requested alignment. // // With `g = gcd(a, s)`, and the above condition asserting that `p` is also divisible by // `g`, we can denote `a' = a/g`, `s' = s/g`, `p' = p/g`, then this becomes equivalent to: // // ` p' + s'o = 0 mod a' ` // ` o = (a' - (p' mod a')) * (s'^-1 mod a') ` // // The first term is "the relative alignment of `p` to `a`" (divided by the `g`), the // second term is "how does incrementing `p` by `s` bytes change the relative alignment of // `p`" (again divided by `g`). Division by `g` is necessary to make the inverse well // formed if `a` and `s` are not co-prime. // // Furthermore, the result produced by this solution is not "minimal", so it is necessary // to take the result `o mod lcm(s, a)`. This `lcm(s, a)` is the same as `a'`. // SAFETY: `gcdpow` has an upper-bound not greater than the number of trailing 0-bits in // `a`. let a2 = unsafe { unchecked_shr(a, gcdpow) }; // SAFETY: `a2` is non-zero. Shifting `a` by `gcdpow` cannot shift out any of the set bits // in `a` (of which it has exactly one). let a2minus1 = unsafe { unchecked_sub(a2, 1) }; // SAFETY: `gcdpow` has an upper-bound not greater than the number of trailing 0-bits in // `a`. let s2 = unsafe { unchecked_shr(stride & a_minus_one, gcdpow) }; // SAFETY: `gcdpow` has an upper-bound not greater than the number of trailing 0-bits in // `a`. Furthermore, the subtraction cannot overflow, because `a2 = a >> gcdpow` will // always be strictly greater than `(p % a) >> gcdpow`. let minusp2 = unsafe { unchecked_sub(a2, unchecked_shr(addr & a_minus_one, gcdpow)) }; // SAFETY: `a2` is a power-of-two, as proven above. `s2` is strictly less than `a2` // because `(s % a) >> gcdpow` is strictly less than `a >> gcdpow`. return wrapping_mul(minusp2, unsafe { mod_inv(s2, a2) }) & a2minus1; } // Cannot be aligned at all. usize::MAX }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
About the author
Statistics
Changelog
Version tree