Thanks for using Compiler Explorer
Sponsors
Jakt
C++
Ada
Algol68
Analysis
Android Java
Android Kotlin
Assembly
C
C3
Carbon
C with Coccinelle
C++ with Coccinelle
C++ (Circle)
CIRCT
Clean
CMake
CMakeScript
COBOL
C++ for OpenCL
MLIR
Cppx
Cppx-Blue
Cppx-Gold
Cpp2-cppfront
Crystal
C#
CUDA C++
D
Dart
Elixir
Erlang
Fortran
F#
GLSL
Go
Haskell
HLSL
Hook
Hylo
IL
ispc
Java
Julia
Kotlin
LLVM IR
LLVM MIR
Modula-2
Mojo
Nim
Numba
Nix
Objective-C
Objective-C++
OCaml
Odin
OpenCL C
Pascal
Pony
PTX
Python
Racket
Raku
Ruby
Rust
Sail
Snowball
Scala
Slang
Solidity
Spice
SPIR-V
Swift
LLVM TableGen
Toit
TypeScript Native
V
Vala
Visual Basic
Vyper
WASM
Zig
Javascript
GIMPLE
Ygen
sway
rust source #1
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.9.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
Options
Source code
use core::mem; pub fn push(s: &mut String, ch: char) { let len = s.len(); let ch_len = ch.len_utf8(); s.reserve(ch_len); // SAFETY: Just reserved capacity for at least the length needed to encode `ch`. unsafe { encode_utf8_raw_unchecked(ch as u32, s.as_mut_vec().spare_capacity_mut()); s.as_mut_vec().set_len(len + ch_len); } } const TAG_CONT: u8 = 0b1000_0000; const TAG_TWO_B: u8 = 0b1100_0000; const TAG_THREE_B: u8 = 0b1110_0000; const TAG_FOUR_B: u8 = 0b1111_0000; const MAX_ONE_B: u32 = 0x80; const MAX_TWO_B: u32 = 0x800; const MAX_THREE_B: u32 = 0x10000; #[inline] const fn len_utf8(code: u32) -> usize { if code < MAX_ONE_B { 1 } else if code < MAX_TWO_B { 2 } else if code < MAX_THREE_B { 3 } else { 4 } } /// Encodes a raw u32 value as UTF-8 into the provided byte buffer, /// and then returns the subslice of the buffer that contains the encoded character. /// /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range. /// (Creating a `char` in the surrogate range is UB.) /// The result is valid [generalized UTF-8] but not valid UTF-8. /// /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8 /// /// # Panics /// /// Panics if the buffer is not large enough. /// A buffer of length four is large enough to encode any `char`. #[inline] pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] { let len = len_utf8(code); if dst.len() < len { panic!( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", len, code, dst.len(), ); } // SAFETY: it's safe to transmute a slice of `T` to a slice of `MaybeUninit<T>` let dst = unsafe { &mut *(dst as *mut [u8] as *mut [mem::MaybeUninit<u8>]) }; // SAFETY: `dst` is checked to have at least the length needed to encode the codepoint unsafe { encode_utf8_raw_unchecked(code, dst) } } /// Encodes a raw u32 value as UTF-8 into the provided possibly uninitialized byte buffer, /// and then returns the subslice of the buffer that contains the encoded character. /// /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range. /// (Creating a `char` in the surrogate range is UB.) /// The result is valid [generalized UTF-8] but not valid UTF-8. /// /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8 /// /// # Safety /// /// The behavior is undefined if the buffer is not large enough to hold the encoded codepoint. /// A buffer of length four is large enough to encode any `char`. /// /// For a safe version of this function, see the [`encode_utf8_raw`] function. #[inline] pub unsafe fn encode_utf8_raw_unchecked(code: u32, dst: &mut [mem::MaybeUninit<u8>]) -> &mut [u8] { let len = len_utf8(code); // SAFETY: the caller must guarantee that `dst` is at least `len` bytes long unsafe { match len { 1 => { dst.get_unchecked_mut(0).write(code as u8); } 2 => { dst.get_unchecked_mut(0) .write((code >> 6 & 0x1F) as u8 | TAG_TWO_B); dst.get_unchecked_mut(1) .write((code & 0x3F) as u8 | TAG_CONT); } 3 => { dst.get_unchecked_mut(0) .write((code >> 12 & 0x0F) as u8 | TAG_THREE_B); dst.get_unchecked_mut(1) .write((code >> 6 & 0x3F) as u8 | TAG_CONT); dst.get_unchecked_mut(2) .write((code & 0x3F) as u8 | TAG_CONT); } 4 => { dst.get_unchecked_mut(0) .write((code >> 18 & 0x07) as u8 | TAG_FOUR_B); dst.get_unchecked_mut(1) .write((code >> 12 & 0x3F) as u8 | TAG_CONT); dst.get_unchecked_mut(2) .write((code >> 6 & 0x3F) as u8 | TAG_CONT); dst.get_unchecked_mut(3) .write((code & 0x3F) as u8 | TAG_CONT); } _ => unreachable!(), } } // SAFETY: data has been written to the first `len` bytes unsafe { &mut *(dst.get_unchecked_mut(..len) as *mut [mem::MaybeUninit<u8>] as *mut [u8]) } }
rust source #2
Output
Compile to binary object
Link to binary
Execute the code
Intel asm syntax
Demangle identifiers
Verbose demangling
Filters
Unused labels
Library functions
Directives
Comments
Horizontal whitespace
Debug intrinsics
Compiler
mrustc (master)
rustc 1.0.0
rustc 1.1.0
rustc 1.10.0
rustc 1.11.0
rustc 1.12.0
rustc 1.13.0
rustc 1.14.0
rustc 1.15.1
rustc 1.16.0
rustc 1.17.0
rustc 1.18.0
rustc 1.19.0
rustc 1.2.0
rustc 1.20.0
rustc 1.21.0
rustc 1.22.0
rustc 1.23.0
rustc 1.24.0
rustc 1.25.0
rustc 1.26.0
rustc 1.27.0
rustc 1.27.1
rustc 1.28.0
rustc 1.29.0
rustc 1.3.0
rustc 1.30.0
rustc 1.31.0
rustc 1.32.0
rustc 1.33.0
rustc 1.34.0
rustc 1.35.0
rustc 1.36.0
rustc 1.37.0
rustc 1.38.0
rustc 1.39.0
rustc 1.4.0
rustc 1.40.0
rustc 1.41.0
rustc 1.42.0
rustc 1.43.0
rustc 1.44.0
rustc 1.45.0
rustc 1.45.2
rustc 1.46.0
rustc 1.47.0
rustc 1.48.0
rustc 1.49.0
rustc 1.5.0
rustc 1.50.0
rustc 1.51.0
rustc 1.52.0
rustc 1.53.0
rustc 1.54.0
rustc 1.55.0
rustc 1.56.0
rustc 1.57.0
rustc 1.58.0
rustc 1.59.0
rustc 1.6.0
rustc 1.60.0
rustc 1.61.0
rustc 1.62.0
rustc 1.63.0
rustc 1.64.0
rustc 1.65.0
rustc 1.66.0
rustc 1.67.0
rustc 1.68.0
rustc 1.69.0
rustc 1.7.0
rustc 1.70.0
rustc 1.71.0
rustc 1.72.0
rustc 1.73.0
rustc 1.74.0
rustc 1.75.0
rustc 1.76.0
rustc 1.77.0
rustc 1.78.0
rustc 1.79.0
rustc 1.8.0
rustc 1.80.0
rustc 1.81.0
rustc 1.82.0
rustc 1.83.0
rustc 1.84.0
rustc 1.85.0
rustc 1.86.0
rustc 1.87.0
rustc 1.9.0
rustc beta
rustc nightly
rustc-cg-gcc (master)
x86-64 GCCRS (GCC master)
x86-64 GCCRS (GCCRS master)
x86-64 GCCRS 14.1 (GCC assertions)
x86-64 GCCRS 14.1 (GCC)
x86-64 GCCRS 14.2 (GCC assertions)
x86-64 GCCRS 14.2 (GCC)
x86-64 GCCRS 14.3 (GCC assertions)
x86-64 GCCRS 14.3 (GCC)
x86-64 GCCRS 15.1 (GCC assertions)
x86-64 GCCRS 15.1 (GCC)
Options
Source code
use core::slice; pub fn push(s: &mut String, ch: char) { let len = s.len(); let ch_len = ch.len_utf8(); s.reserve(ch_len); // SAFETY: Just reserved capacity for at least the length needed to encode `ch`. unsafe { encode_utf8_raw_unchecked(ch as u32, s.as_mut_ptr().add(s.len())); s.as_mut_vec().set_len(len + ch_len); } } const TAG_CONT: u8 = 0b1000_0000; const TAG_TWO_B: u8 = 0b1100_0000; const TAG_THREE_B: u8 = 0b1110_0000; const TAG_FOUR_B: u8 = 0b1111_0000; const MAX_ONE_B: u32 = 0x80; const MAX_TWO_B: u32 = 0x800; const MAX_THREE_B: u32 = 0x10000; #[inline] const fn len_utf8(code: u32) -> usize { match code { ..MAX_ONE_B => 1, ..MAX_TWO_B => 2, ..MAX_THREE_B => 3, _ => 4, } } /// Encodes a raw u32 value as UTF-8 into the provided byte buffer, /// and then returns the subslice of the buffer that contains the encoded character. /// /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range. /// (Creating a `char` in the surrogate range is UB.) /// The result is valid [generalized UTF-8] but not valid UTF-8. /// /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8 /// /// # Panics /// /// Panics if the buffer is not large enough. /// A buffer of length four is large enough to encode any `char`. #[inline] pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] { let len = len_utf8(code); if dst.len() < len { panic!( "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}", len, code, dst.len(), ); } // SAFETY: `dst` is checked to be at least the length needed to encode the codepoint unsafe { encode_utf8_raw_unchecked(code, dst.as_mut_ptr()) }; // SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds. unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) } } /// Encodes a raw u32 value as UTF-8 to the provided destination buffer. /// /// Unlike `char::encode_utf8`, this method also handles codepoints in the surrogate range. /// (Creating a `char` in the surrogate range is UB.) /// The result is valid [generalized UTF-8] but not valid UTF-8. /// /// [generalized UTF-8]: https://simonsapin.github.io/wtf-8/#generalized-utf8 /// /// # Safety /// /// The behavior is undefined if the buffer pointed to by `dst` is not /// large enough to hold the encoded codepoint. A buffer of length four /// is large enough to encode any `char`. /// /// For a safe version of this function, see the [`encode_utf8_raw`] function. #[inline] pub const unsafe fn encode_utf8_raw_unchecked(code: u32, dst: *mut u8) { let len = len_utf8(code); // SAFETY: The caller must guarantee that the buffer pointed to by `dst` // is at least `len` bytes long. unsafe { match len { 1 => { *dst = code as u8; } 2 => { *dst = (code >> 6 & 0x1F) as u8 | TAG_TWO_B; *dst.add(1) = (code & 0x3F) as u8 | TAG_CONT; } 3 => { *dst = (code >> 12 & 0x0F) as u8 | TAG_THREE_B; *dst.add(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT; *dst.add(2) = (code & 0x3F) as u8 | TAG_CONT; } 4 => { *dst = (code >> 18 & 0x07) as u8 | TAG_FOUR_B; *dst.add(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT; *dst.add(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT; *dst.add(3) = (code & 0x3F) as u8 | TAG_CONT; } // SAFETY: `char` always takes between 1 and 4 bytes to encode in UTF-8. _ => core::hint::unreachable_unchecked(), } } }
Become a Patron
Sponsor on GitHub
Donate via PayPal
Source on GitHub
Mailing list
Installed libraries
Wiki
Report an issue
How it works
Contact the author
CE on Mastodon
CE on Bluesky
About the author
Statistics
Changelog
Version tree