Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
Clojure
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Helion
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
Triton
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Yul (Solidity IR)
Zig
Javascript
GIMPLE
Ygen
sway
rust source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.88.0
rustc 1.89.0
rustc 1.9.0
rustc 1.90.0
rustc 1.91.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
x86-64 GCCRS 15.2 (GCC assertions)
x86-64 GCCRS 15.2 (GCC)
Options
Source code
#![allow(unused_imports)] #![feature(ptr_sub_ptr, core_intrinsics, maybe_uninit_slice, strict_provenance)] use std::intrinsics; use std::mem::{self, ManuallyDrop, MaybeUninit}; use std::ptr; use std::cmp; /// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal /// to `pivot`. /// /// Returns the number of elements smaller than `pivot`. /// /// Partitioning is performed block-by-block in order to minimize the cost of branching operations. /// This idea is presented in the [BlockQuicksort][pdf] paper. /// /// [pdf]: https://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf #[cfg_attr(feature = "no_inline_sub_functions", inline(never))] fn partition<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize { // Number of elements in a typical block. const BLOCK: usize = 128; // The partitioning algorithm repeats the following steps until completion: // // 1. Trace a block from the left side to identify elements greater than or equal to the pivot. // 2. Trace a block from the right side to identify elements smaller than the pivot. // 3. Exchange the identified elements between the left and right side. // // We keep the following variables for a block of elements: // // 1. `block` - Number of elements in the block. // 2. `start` - Start pointer into the `offsets` array. // 3. `end` - End pointer into the `offsets` array. // 4. `offsets - Indices of out-of-order elements within the block. // The current block on the left side (from `l` to `l.add(block_l)`). let mut l = v.as_mut_ptr(); let mut block_l = BLOCK; let mut start_l = ptr::null_mut(); let mut end_l = ptr::null_mut(); let mut offsets_l = [mem::MaybeUninit::<u8>::uninit(); BLOCK]; // The current block on the right side (from `r.sub(block_r)` to `r`). // SAFETY: The documentation for .add() specifically mention that `vec.as_ptr().add(vec.len())` is always safe` let mut r = unsafe { l.add(v.len()) }; let mut block_r = BLOCK; let mut start_r = ptr::null_mut(); let mut end_r = ptr::null_mut(); let mut offsets_r = [mem::MaybeUninit::<u8>::uninit(); BLOCK]; // FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather // than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient. // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive). fn width<T>(l: *mut T, r: *mut T) -> usize { debug_assert!(r.addr() >= l.addr()); // SAFETY: r >= l and not T::IS_ZST unsafe { intrinsics::ptr_offset_from_unsigned(r, l) } } loop { // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do // some patch-up work in order to partition the remaining elements in between. let is_done = width(l, r) <= 2 * BLOCK; if is_done { // Number of remaining elements (still not compared to the pivot). let mut rem = width(l, r); if start_l < end_l || start_r < end_r { rem -= BLOCK; } // Adjust block sizes so that the left and right block don't overlap, but get perfectly // aligned to cover the whole remaining gap. if start_l < end_l { block_r = rem; } else if start_r < end_r { block_l = rem; } else { // There were the same number of elements to switch on both blocks during the last // iteration, so there are no remaining elements on either block. Cover the remaining // items with roughly equally-sized blocks. block_l = rem / 2; block_r = rem - block_l; } debug_assert!(block_l <= BLOCK && block_r <= BLOCK); debug_assert!(width(l, r) == block_l + block_r); } if start_l == end_l { // Trace `block_l` elements from the left side. start_l = mem::MaybeUninit::slice_as_mut_ptr(&mut offsets_l); end_l = start_l; let mut elem = l; for i in 0..block_l { // SAFETY: The unsafety operations below involve the usage of the `offset`. // According to the conditions required by the function, we satisfy them because: // 1. `offsets_l` is stack-allocated, and thus considered separate allocated object. // 2. The function `is_less` returns a `bool`. // Casting a `bool` will never overflow `isize`. // 3. We have guaranteed that `block_l` will be `<= BLOCK`. // Plus, `end_l` was initially set to the begin pointer of `offsets_` which was declared on the stack. // Thus, we know that even in the worst case (all invocations of `is_less` returns false) we will only be at most 1 byte pass the end. // Another unsafety operation here is dereferencing `elem`. // However, `elem` was initially the begin pointer to the slice which is always valid. unsafe { // Branchless comparison. *end_l = i as u8; end_l = end_l.offset(!is_less(&*elem, pivot) as isize); elem = elem.offset(1); } } } if start_r == end_r { // Trace `block_r` elements from the right side. start_r = mem::MaybeUninit::slice_as_mut_ptr(&mut offsets_r); end_r = start_r; let mut elem = r; for i in 0..block_r { // SAFETY: The unsafety operations below involve the usage of the `offset`. // According to the conditions required by the function, we satisfy them because: // 1. `offsets_r` is stack-allocated, and thus considered separate allocated object. // 2. The function `is_less` returns a `bool`. // Casting a `bool` will never overflow `isize`. // 3. We have guaranteed that `block_r` will be `<= BLOCK`. // Plus, `end_r` was initially set to the begin pointer of `offsets_` which was declared on the stack. // Thus, we know that even in the worst case (all invocations of `is_less` returns true) we will only be at most 1 byte pass the end. // Another unsafety operation here is dereferencing `elem`. // However, `elem` was initially `1 * sizeof(T)` past the end and we decrement it by `1 * sizeof(T)` before accessing it. // Plus, `block_r` was asserted to be less than `BLOCK` and `elem` will therefore at most be pointing to the beginning of the slice. unsafe { // Branchless comparison. elem = elem.offset(-1); *end_r = i as u8; end_r = end_r.offset(is_less(&*elem, pivot) as isize); } } } // Number of out-of-order elements to swap between the left and right side. let count = cmp::min(width(start_l, end_l), width(start_r, end_r)); if count > 0 { macro_rules! left { () => { l.offset(*start_l as isize) }; } macro_rules! right { () => { r.offset(-(*start_r as isize) - 1) }; } // Instead of swapping one pair at the time, it is more efficient to perform a cyclic // permutation. This is not strictly equivalent to swapping, but produces a similar // result using fewer memory operations. // SAFETY: The use of `ptr::read` is valid because there is at least one element in // both `offsets_l` and `offsets_r`, so `left!` is a valid pointer to read from. // // The uses of `left!` involve calls to `offset` on `l`, which points to the // beginning of `v`. All the offsets pointed-to by `start_l` are at most `block_l`, so // these `offset` calls are safe as all reads are within the block. The same argument // applies for the uses of `right!`. // // The calls to `start_l.offset` are valid because there are at most `count-1` of them, // plus the final one at the end of the unsafe block, where `count` is the minimum number // of collected offsets in `offsets_l` and `offsets_r`, so there is no risk of there not // being enough elements. The same reasoning applies to the calls to `start_r.offset`. // // The calls to `copy_nonoverlapping` are safe because `left!` and `right!` are guaranteed // not to overlap, and are valid because of the reasoning above. unsafe { let tmp = ptr::read(left!()); ptr::copy_nonoverlapping(right!(), left!(), 1); for _ in 1..count { start_l = start_l.offset(1); ptr::copy_nonoverlapping(left!(), right!(), 1); start_r = start_r.offset(1); ptr::copy_nonoverlapping(right!(), left!(), 1); } ptr::copy_nonoverlapping(&tmp, right!(), 1); mem::forget(tmp); start_l = start_l.offset(1); start_r = start_r.offset(1); } } if start_l == end_l { // All out-of-order elements in the left block were moved. Move to the next block. // block-width-guarantee // SAFETY: if `!is_done` then the slice width is guaranteed to be at least `2*BLOCK` wide. There // are at most `BLOCK` elements in `offsets_l` because of its size, so the `offset` operation is // safe. Otherwise, the debug assertions in the `is_done` case guarantee that // `width(l, r) == block_l + block_r`, namely, that the block sizes have been adjusted to account // for the smaller number of remaining elements. l = unsafe { l.offset(block_l as isize) }; } if start_r == end_r { // All out-of-order elements in the right block were moved. Move to the previous block. // SAFETY: Same argument as [block-width-guarantee]. Either this is a full block `2*BLOCK`-wide, // or `block_r` has been adjusted for the last handful of elements. r = unsafe { r.offset(-(block_r as isize)) }; } if is_done { break; } } // All that remains now is at most one block (either the left or the right) with out-of-order // elements that need to be moved. Such remaining elements can be simply shifted to the end // within their block. if start_l < end_l { // The left block remains. // Move its remaining out-of-order elements to the far right. debug_assert_eq!(width(l, r), block_l); while start_l < end_l { // remaining-elements-safety // SAFETY: while the loop condition holds there are still elements in `offsets_l`, so it // is safe to point `end_l` to the previous element. // // The `ptr::swap` is safe if both its arguments are valid for reads and writes: // - Per the debug assert above, the distance between `l` and `r` is `block_l` // elements, so there can be at most `block_l` remaining offsets between `start_l` // and `end_l`. This means `r` will be moved at most `block_l` steps back, which // makes the `r.offset` calls valid (at that point `l == r`). // - `offsets_l` contains valid offsets into `v` collected during the partitioning of // the last block, so the `l.offset` calls are valid. unsafe { end_l = end_l.offset(-1); ptr::swap(l.offset(*end_l as isize), r.offset(-1)); r = r.offset(-1); } } width(v.as_mut_ptr(), r) } else if start_r < end_r { // The right block remains. // Move its remaining out-of-order elements to the far left. debug_assert_eq!(width(l, r), block_r); while start_r < end_r { // SAFETY: See the reasoning in [remaining-elements-safety]. unsafe { end_r = end_r.offset(-1); ptr::swap(l, r.offset(-(*end_r as isize) - 1)); l = l.offset(1); } } width(v.as_mut_ptr(), l) } else { // Nothing else to do, we're done. width(v.as_mut_ptr(), l) } } type TestT = u64; pub fn xx(v: &mut [TestT], pivot: &TestT) -> usize { partition(v, pivot, &mut |a, b| a.lt(b)) }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Compiler Explorer Shop
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
Statistics
Changelog
Version tree