Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
rust source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.9.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
Options
Source code
#![feature(core_intrinsics)] unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize { /// Calculate multiplicative modular inverse of `x` modulo `m`. /// /// This implementation is tailored for align_offset and has following preconditions: /// /// * `m` is a power-of-two; /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead) /// /// Implementation of this function shall not panic. Ever. #[inline] fn mod_inv(x: usize, m: usize) -> usize { /// Multiplicative modular inverse table modulo 2⁴ = 16. /// /// Note, that this table does not contain values where inverse does not exist (i.e., for /// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.) const INV_TABLE_MOD_16: [u8; 8] = [1, 11, 13, 7, 9, 3, 5, 15]; /// Modulo for which the `INV_TABLE_MOD_16` is intended. const INV_TABLE_MOD: usize = 16; /// INV_TABLE_MOD² const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD; let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1] as usize; if m <= INV_TABLE_MOD { table_inverse & (m - 1) } else { // We iterate "up" using the following formula: // // $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$ // // until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`. let mut inverse = table_inverse; let mut going_mod = INV_TABLE_MOD_SQUARED; loop { // y = y * (2 - xy) mod n // // Note, that we use wrapping operations here intentionally – the original formula // uses e.g., subtraction `mod n`. It is entirely fine to do them `mod // usize::max_value()` instead, because we take the result `mod n` at the end // anyway. inverse = inverse.wrapping_mul(2usize.wrapping_sub(x.wrapping_mul(inverse))) & (going_mod - 1); if going_mod > m { return inverse & (m - 1); } going_mod = going_mod.wrapping_mul(going_mod); } } } let stride = std::mem::size_of::<T>(); let a_minus_one = a.wrapping_sub(1); let pmoda = p as usize & a_minus_one; if pmoda == 0 { // Already aligned. Yay! return 0; } if stride <= 1 { return if stride == 0 { // If the pointer is not aligned, and the element is zero-sized, then no amount of // elements will ever align the pointer. !0 } else { a.wrapping_sub(pmoda) }; } let smoda = stride & a_minus_one; // a is power-of-two so cannot be 0. stride = 0 is handled above. let gcdpow = std::intrinsics::cttz_nonzero(stride).min(std::intrinsics::cttz_nonzero(a)); let gcd = 1usize << gcdpow; if p as usize & (gcd - 1) == 0 { // This branch solves for the following linear congruence equation: // // $$ p + so ≡ 0 mod a $$ // // $p$ here is the pointer value, $s$ – stride of `T`, $o$ offset in `T`s, and $a$ – the // requested alignment. // // g = gcd(a, s) // o = (a - (p mod a))/g * ((s/g)⁻¹ mod a) // // The first term is “the relative alignment of p to a”, the second term is “how does // incrementing p by s bytes change the relative alignment of p”. Division by `g` is // necessary to make this equation well formed if $a$ and $s$ are not co-prime. // // Furthermore, the result produced by this solution is not “minimal”, so it is necessary // to take the result $o mod lcm(s, a)$. We can replace $lcm(s, a)$ with just a $a / g$. let j = a.wrapping_sub(pmoda) >> gcdpow; let k = smoda >> gcdpow; return std::intrinsics::unchecked_rem(j.wrapping_mul(mod_inv(k, a)), a >> gcdpow); } // Cannot be aligned at all. usize::max_value() } #[repr(packed)] pub struct size3(u16, u8); pub fn const_align_v0(p: *const size3) -> usize { unsafe { align_offset(p, 8) } } pub fn variable_align_v0(p: *const size3, align: usize) -> usize { unsafe { align_offset(p, align) } }
rust source #2
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.9.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
Options
Source code
#![feature(core_intrinsics)] unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize { /// Calculate multiplicative modular inverse of `x` modulo `m`, where /// `m = 2^mpow` and `mask = m - 1`. /// /// This implementation is tailored for align_offset and has following preconditions: /// /// * The requested modulu `m` is a power-of-two, so `mpow` can be an argument; /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead) /// /// It also leaves reducing the result modulu `m` to the caller, so the result may be larger /// than `m`. /// /// Implementation of this function shall not panic. Ever. #[inline] fn mod_pow_2_inv(x: usize, mpow: usize, mask: usize) -> usize { /// Multiplicative modular inverse table modulo 2⁴ = 16. /// /// Note, that this table does not contain values where inverse does not exist (i.e., for /// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.) const INV_TABLE_MOD_16: [u8; 8] = [1, 11, 13, 7, 9, 3, 5, 15]; /// Modulo for which the `INV_TABLE_MOD_16` is intended. const INV_TABLE_MOD: usize = 1 << INV_TABLE_MOD_POW; /// `s` such that `INV_TABLE_MOD == 2^s`. const INV_TABLE_MOD_POW: usize = 4; /// `s` such that `INV_TABLE_MOD == 2^(s/2)`. const INV_TABLE_MOD_POW_TIMES_2: usize = INV_TABLE_MOD_POW << 1; let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1] as usize; if mpow <= INV_TABLE_MOD_POW { table_inverse & mask } else { // We iterate "up" using the following formula: // // $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$ // // until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`. // // Running $k$ iterations starting with a solution valid mod $2^s$ will get us a // solution valid mod $2^((2^k) * s)$, so we need to calculate for which $k$, // $2^k * s > log2(m)$. let mut inverse = table_inverse; let mut going_modpow = INV_TABLE_MOD_POW_TIMES_2; loop { // y = y * (2 - xy) // // Note, that we use wrapping operations here intentionally – the original formula // uses e.g., subtraction `mod n`. It is entirely fine to do them `mod // usize::max_value()` instead, because we take the result `mod n` at the end // anyway. inverse = inverse.wrapping_mul(2usize.wrapping_sub(x.wrapping_mul(inverse))); if going_modpow >= mpow { return inverse; } going_modpow <<= 1; } } } let stride = std::mem::size_of::<T>(); let a_minus_one = a.wrapping_sub(1); let pmoda = p as usize & a_minus_one; if pmoda == 0 { // Already aligned. Yay! return 0; } if stride <= 1 { return if stride == 0 { // If the pointer is not aligned, and the element is zero-sized, then no amount of // elements will ever align the pointer. !0 } else { a.wrapping_sub(pmoda) }; } let smoda = stride & a_minus_one; // a is power-of-two so cannot be 0. stride = 0 is handled above. let apow = std::intrinsics::cttz_nonzero(a); let gcdpow = std::intrinsics::cttz_nonzero(stride).min(apow); let gcd = 1usize << gcdpow; if p as usize & (gcd.wrapping_sub(1)) == 0 { // This branch solves for the following linear congruence equation: // // $$ p + so ≡ 0 mod a $$ // // $p$ here is the pointer value, $s$ – stride of `T`, $o$ offset in `T`s, and $a$ – the // requested alignment. // // With $g = gcd(a, s)$$, and the above asserting that $p$ is also divisible by $g$, we can // denote $a' = a/g$, $s' = s/g$, $p' = p/g$, then this becomes equivalent to: // // $$ p' + s'o ≡ 0 mod a' $$ // $$ o = (a' - (p' mod a')) * ((s')⁻¹ mod a') // // The first term is “the relative alignment of $p$ to $a$” (divided by the $g$), the second // term is “how does incrementing $p$ by $s$ bytes change the relative alignment of $p$” (again // divided by $g$). // Division by $g$ is necessary to make the inverse well formed if $a$ and $s$ are not // co-prime. // // Furthermore, the result produced by this solution is not “minimal”, so it is necessary // to take the result $o mod lcm(s, a)$. We can replace $lcm(s, a)$ with just a $a'$. let a2 = a >> gcdpow; let a2minus1 = a2.wrapping_sub(1); let s2 = smoda >> gcdpow; let minusp2 = a2.wrapping_sub(pmoda >> gcdpow); // mod_pow_2_inv returns a result which may be out of $a'$-s range, but it's fine to // multiply modulu usize::max_value() here, and then take modulu $a'$ afterwards. return (minusp2.wrapping_mul(mod_pow_2_inv(s2, apow.wrapping_sub(gcdpow), a2minus1))) & a2minus1; } // Cannot be aligned at all. usize::max_value() } #[repr(packed)] pub struct size3(u16, u8); pub fn const_align_v4(p: *const size3) -> usize { unsafe { align_offset(p, 8) } } pub fn variable_align_v4(p: *const size3, align: usize) -> usize { unsafe { align_offset(p, align) } }
rust source #3
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.9.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
Options
Source code
#![feature(core_intrinsics)] unsafe fn align_offset<T: Sized>(p: *const T, a: usize) -> usize { /// Calculate multiplicative modular inverse of `x` modulo `m`. /// /// This implementation is tailored for align_offset and has following preconditions: /// /// * `m` is a power-of-two; /// * `x < m`; (if `x ≥ m`, pass in `x % m` instead) /// /// Implementation of this function shall not panic. Ever. #[inline] fn mod_inv(x: usize, m: usize) -> usize { /// Multiplicative modular inverse table modulo 2⁴ = 16. /// /// Note, that this table does not contain values where inverse does not exist (i.e., for /// `0⁻¹ mod 16`, `2⁻¹ mod 16`, etc.) const INV_TABLE_MOD_16: [u8; 8] = [1, 11, 13, 7, 9, 3, 5, 15]; /// Modulo for which the `INV_TABLE_MOD_16` is intended. const INV_TABLE_MOD: usize = 16; /// INV_TABLE_MOD² const INV_TABLE_MOD_SQUARED: usize = INV_TABLE_MOD * INV_TABLE_MOD; let table_inverse = INV_TABLE_MOD_16[(x & (INV_TABLE_MOD - 1)) >> 1] as usize; if m <= INV_TABLE_MOD { table_inverse & (m - 1) } else { // We iterate "up" using the following formula: // // $$ xy ≡ 1 (mod 2ⁿ) → xy (2 - xy) ≡ 1 (mod 2²ⁿ) $$ // // until 2²ⁿ ≥ m. Then we can reduce to our desired `m` by taking the result `mod m`. let mut inverse = table_inverse; let mut going_mod = INV_TABLE_MOD_SQUARED; loop { // y = y * (2 - xy) mod n // // Note, that we use wrapping operations here intentionally – the original formula // uses e.g., subtraction `mod n`. It is entirely fine to do them `mod // usize::max_value()` instead, because we take the result `mod n` at the end // anyway. inverse = inverse.wrapping_mul(2usize.wrapping_sub(x.wrapping_mul(inverse))); if going_mod >= m { return inverse & (m - 1); } going_mod = going_mod.wrapping_mul(going_mod); } } } let stride = std::mem::size_of::<T>(); let a_minus_one = a.wrapping_sub(1); let pmoda = p as usize & a_minus_one; if pmoda == 0 { // Already aligned. Yay! return 0; } if stride <= 1 { return if stride == 0 { // If the pointer is not aligned, and the element is zero-sized, then no amount of // elements will ever align the pointer. !0 } else { a.wrapping_sub(pmoda) }; } let smoda = stride & a_minus_one; // a is power-of-two so cannot be 0. stride = 0 is handled above. let gcdpow = std::intrinsics::cttz_nonzero(stride).min(std::intrinsics::cttz_nonzero(a)); let gcd = 1usize << gcdpow; if p as usize & (gcd.wrapping_sub(1)) == 0 { // This branch solves for the following linear congruence equation: // // $$ p + so ≡ 0 mod a $$ // // $p$ here is the pointer value, $s$ – stride of `T`, $o$ offset in `T`s, and $a$ – the // requested alignment. // // With $g = gcd(a, s)$$, and the above asserting that $p$ is also divisible by $g$, we can // denote $a' = a/g$, $s' = s/g$, $p' = p/g$, then this becomes equivalent to: // // $$ p' + s'o ≡ 0 mod a' $$ // $$ o = (a' - (p' mod a')) * ((s')⁻¹ mod a') // // The first term is “the relative alignment of $p$ to $a$” (divided by the $g$), the second // term is “how does incrementing $p$ by $s$ bytes change the relative alignment of $p$” (again // divided by $g$). // Division by $g$ is necessary to make the inverse well formed if $a$ and $s$ are not // co-prime. // // Furthermore, the result produced by this solution is not “minimal”, so it is necessary // to take the result $o mod lcm(s, a)$. We can replace $lcm(s, a)$ with just a $a'$. let a2 = a >> gcdpow; let a2minus1 = a2.wrapping_sub(1); let s2 = smoda >> gcdpow; let minusp2 = a2.wrapping_sub(pmoda >> gcdpow); return (minusp2.wrapping_mul(mod_inv(s2, a2))) & a2minus1; } // Cannot be aligned at all. usize::max_value() } #[repr(packed)] pub struct size3(u16, u8); pub fn const_align_v2(p: *const size3) -> usize { unsafe { align_offset(p, 8) } } pub fn variable_align_v2(p: *const size3, align: usize) -> usize { unsafe { align_offset(p, align) } }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
About the author
Statistics
Changelog
Version tree