core/
hint.rs

1#![stable(feature = "core_hint", since = "1.27.0")]
2
3//! Hints to compiler that affects how code should be emitted or optimized.
4//!
5//! Hints may be compile time or runtime.
6
7use crate::marker::Destruct;
8use crate::mem::MaybeUninit;
9use crate::{intrinsics, ub_checks};
10
11/// Informs the compiler that the site which is calling this function is not
12/// reachable, possibly enabling further optimizations.
13///
14/// # Safety
15///
16/// Reaching this function is *Undefined Behavior*.
17///
18/// As the compiler assumes that all forms of Undefined Behavior can never
19/// happen, it will eliminate all branches in the surrounding code that it can
20/// determine will invariably lead to a call to `unreachable_unchecked()`.
21///
22/// If the assumptions embedded in using this function turn out to be wrong -
23/// that is, if the site which is calling `unreachable_unchecked()` is actually
24/// reachable at runtime - the compiler may have generated nonsensical machine
25/// instructions for this situation, including in seemingly unrelated code,
26/// causing difficult-to-debug problems.
27///
28/// Use this function sparingly. Consider using the [`unreachable!`] macro,
29/// which may prevent some optimizations but will safely panic in case it is
30/// actually reached at runtime. Benchmark your code to find out if using
31/// `unreachable_unchecked()` comes with a performance benefit.
32///
33/// # Examples
34///
35/// `unreachable_unchecked()` can be used in situations where the compiler
36/// can't prove invariants that were previously established. Such situations
37/// have a higher chance of occurring if those invariants are upheld by
38/// external code that the compiler can't analyze.
39/// ```
40/// fn prepare_inputs(divisors: &mut Vec<u32>) {
41///     // Note to future-self when making changes: The invariant established
42///     // here is NOT checked in `do_computation()`; if this changes, you HAVE
43///     // to change `do_computation()`.
44///     divisors.retain(|divisor| *divisor != 0)
45/// }
46///
47/// /// # Safety
48/// /// All elements of `divisor` must be non-zero.
49/// unsafe fn do_computation(i: u32, divisors: &[u32]) -> u32 {
50///     divisors.iter().fold(i, |acc, divisor| {
51///         // Convince the compiler that a division by zero can't happen here
52///         // and a check is not needed below.
53///         if *divisor == 0 {
54///             // Safety: `divisor` can't be zero because of `prepare_inputs`,
55///             // but the compiler does not know about this. We *promise*
56///             // that we always call `prepare_inputs`.
57///             unsafe { std::hint::unreachable_unchecked() }
58///         }
59///         // The compiler would normally introduce a check here that prevents
60///         // a division by zero. However, if `divisor` was zero, the branch
61///         // above would reach what we explicitly marked as unreachable.
62///         // The compiler concludes that `divisor` can't be zero at this point
63///         // and removes the - now proven useless - check.
64///         acc / divisor
65///     })
66/// }
67///
68/// let mut divisors = vec![2, 0, 4];
69/// prepare_inputs(&mut divisors);
70/// let result = unsafe {
71///     // Safety: prepare_inputs() guarantees that divisors is non-zero
72///     do_computation(100, &divisors)
73/// };
74/// assert_eq!(result, 12);
75///
76/// ```
77///
78/// While using `unreachable_unchecked()` is perfectly sound in the following
79/// example, as the compiler is able to prove that a division by zero is not
80/// possible, benchmarking reveals that `unreachable_unchecked()` provides
81/// no benefit over using [`unreachable!`], while the latter does not introduce
82/// the possibility of Undefined Behavior.
83///
84/// ```
85/// fn div_1(a: u32, b: u32) -> u32 {
86///     use std::hint::unreachable_unchecked;
87///
88///     // `b.saturating_add(1)` is always positive (not zero),
89///     // hence `checked_div` will never return `None`.
90///     // Therefore, the else branch is unreachable.
91///     a.checked_div(b.saturating_add(1))
92///         .unwrap_or_else(|| unsafe { unreachable_unchecked() })
93/// }
94///
95/// assert_eq!(div_1(7, 0), 7);
96/// assert_eq!(div_1(9, 1), 4);
97/// assert_eq!(div_1(11, u32::MAX), 0);
98/// ```
99#[inline]
100#[stable(feature = "unreachable", since = "1.27.0")]
101#[rustc_const_stable(feature = "const_unreachable_unchecked", since = "1.57.0")]
102#[track_caller]
103pub const unsafe fn unreachable_unchecked() -> ! {
104    ub_checks::assert_unsafe_precondition!(
105        check_language_ub,
106        "hint::unreachable_unchecked must never be reached",
107        () => false
108    );
109    // SAFETY: the safety contract for `intrinsics::unreachable` must
110    // be upheld by the caller.
111    unsafe { intrinsics::unreachable() }
112}
113
114/// Makes a *soundness* promise to the compiler that `cond` holds.
115///
116/// This may allow the optimizer to simplify things, but it might also make the generated code
117/// slower. Either way, calling it will most likely make compilation take longer.
118///
119/// You may know this from other places as
120/// [`llvm.assume`](https://llvm.org/docs/LangRef.html#llvm-assume-intrinsic) or, in C,
121/// [`__builtin_assume`](https://clang.llvm.org/docs/LanguageExtensions.html#builtin-assume).
122///
123/// This promotes a correctness requirement to a soundness requirement. Don't do that without
124/// very good reason.
125///
126/// # Usage
127///
128/// This is a situational tool for micro-optimization, and is allowed to do nothing. Any use
129/// should come with a repeatable benchmark to show the value, with the expectation to drop it
130/// later should the optimizer get smarter and no longer need it.
131///
132/// The more complicated the condition, the less likely this is to be useful. For example,
133/// `assert_unchecked(foo.is_sorted())` is a complex enough value that the compiler is unlikely
134/// to be able to take advantage of it.
135///
136/// There's also no need to `assert_unchecked` basic properties of things.  For example, the
137/// compiler already knows the range of `count_ones`, so there is no benefit to
138/// `let n = u32::count_ones(x); assert_unchecked(n <= u32::BITS);`.
139///
140/// `assert_unchecked` is logically equivalent to `if !cond { unreachable_unchecked(); }`. If
141/// ever you are tempted to write `assert_unchecked(false)`, you should instead use
142/// [`unreachable_unchecked()`] directly.
143///
144/// # Safety
145///
146/// `cond` must be `true`. It is immediate UB to call this with `false`.
147///
148/// # Example
149///
150/// ```
151/// use core::hint;
152///
153/// /// # Safety
154/// ///
155/// /// `p` must be nonnull and valid
156/// pub unsafe fn next_value(p: *const i32) -> i32 {
157///     // SAFETY: caller invariants guarantee that `p` is not null
158///     unsafe { hint::assert_unchecked(!p.is_null()) }
159///
160///     if p.is_null() {
161///         return -1;
162///     } else {
163///         // SAFETY: caller invariants guarantee that `p` is valid
164///         unsafe { *p + 1 }
165///     }
166/// }
167/// ```
168///
169/// Without the `assert_unchecked`, the above function produces the following with optimizations
170/// enabled:
171///
172/// ```asm
173/// next_value:
174///         test    rdi, rdi
175///         je      .LBB0_1
176///         mov     eax, dword ptr [rdi]
177///         inc     eax
178///         ret
179/// .LBB0_1:
180///         mov     eax, -1
181///         ret
182/// ```
183///
184/// Adding the assertion allows the optimizer to remove the extra check:
185///
186/// ```asm
187/// next_value:
188///         mov     eax, dword ptr [rdi]
189///         inc     eax
190///         ret
191/// ```
192///
193/// This example is quite unlike anything that would be used in the real world: it is redundant
194/// to put an assertion right next to code that checks the same thing, and dereferencing a
195/// pointer already has the builtin assumption that it is nonnull. However, it illustrates the
196/// kind of changes the optimizer can make even when the behavior is less obviously related.
197#[track_caller]
198#[inline(always)]
199#[doc(alias = "assume")]
200#[stable(feature = "hint_assert_unchecked", since = "1.81.0")]
201#[rustc_const_stable(feature = "hint_assert_unchecked", since = "1.81.0")]
202pub const unsafe fn assert_unchecked(cond: bool) {
203    // SAFETY: The caller promised `cond` is true.
204    unsafe {
205        ub_checks::assert_unsafe_precondition!(
206            check_language_ub,
207            "hint::assert_unchecked must never be called when the condition is false",
208            (cond: bool = cond) => cond,
209        );
210        crate::intrinsics::assume(cond);
211    }
212}
213
214/// Emits a machine instruction to signal the processor that it is running in
215/// a busy-wait spin-loop ("spin lock").
216///
217/// Upon receiving the spin-loop signal the processor can optimize its behavior by,
218/// for example, saving power or switching hyper-threads.
219///
220/// This function is different from [`thread::yield_now`] which directly
221/// yields to the system's scheduler, whereas `spin_loop` does not interact
222/// with the operating system.
223///
224/// A common use case for `spin_loop` is implementing bounded optimistic
225/// spinning in a CAS loop in synchronization primitives. To avoid problems
226/// like priority inversion, it is strongly recommended that the spin loop is
227/// terminated after a finite amount of iterations and an appropriate blocking
228/// syscall is made.
229///
230/// **Note**: On platforms that do not support receiving spin-loop hints this
231/// function does not do anything at all.
232///
233/// # Examples
234///
235/// ```ignore-wasm
236/// use std::sync::atomic::{AtomicBool, Ordering};
237/// use std::sync::Arc;
238/// use std::{hint, thread};
239///
240/// // A shared atomic value that threads will use to coordinate
241/// let live = Arc::new(AtomicBool::new(false));
242///
243/// // In a background thread we'll eventually set the value
244/// let bg_work = {
245///     let live = live.clone();
246///     thread::spawn(move || {
247///         // Do some work, then make the value live
248///         do_some_work();
249///         live.store(true, Ordering::Release);
250///     })
251/// };
252///
253/// // Back on our current thread, we wait for the value to be set
254/// while !live.load(Ordering::Acquire) {
255///     // The spin loop is a hint to the CPU that we're waiting, but probably
256///     // not for very long
257///     hint::spin_loop();
258/// }
259///
260/// // The value is now set
261/// # fn do_some_work() {}
262/// do_some_work();
263/// bg_work.join()?;
264/// # Ok::<(), Box<dyn core::any::Any + Send + 'static>>(())
265/// ```
266///
267/// [`thread::yield_now`]: ../../std/thread/fn.yield_now.html
268#[inline(always)]
269#[stable(feature = "renamed_spin_loop", since = "1.49.0")]
270pub fn spin_loop() {
271    crate::cfg_select! {
272        miri => {
273            unsafe extern "Rust" {
274                safe fn miri_spin_loop();
275            }
276
277            // Miri does support some of the intrinsics that are called below, but to guarantee
278            // consistent behavior across targets, this custom function is used.
279            miri_spin_loop();
280        }
281        target_arch = "x86" => {
282            // SAFETY: the `cfg` attr ensures that we only execute this on x86 targets.
283            crate::arch::x86::_mm_pause()
284        }
285        target_arch = "x86_64" => {
286            // SAFETY: the `cfg` attr ensures that we only execute this on x86_64 targets.
287            crate::arch::x86_64::_mm_pause()
288        }
289        target_arch = "riscv32" => crate::arch::riscv32::pause(),
290        target_arch = "riscv64" => crate::arch::riscv64::pause(),
291        any(target_arch = "aarch64", target_arch = "arm64ec") => {
292            // SAFETY: the `cfg` attr ensures that we only execute this on aarch64 targets.
293            unsafe { crate::arch::aarch64::__isb(crate::arch::aarch64::SY) }
294        }
295        all(
296            target_arch = "arm",
297            any(
298                all(target_feature = "v6k", not(target_feature = "thumb-mode")),
299                target_feature = "v6t2",
300                all(target_feature = "v6", target_feature = "mclass"),
301            )
302        ) => {
303            // SAFETY: the `cfg` attr ensures that we only execute this on arm
304            // targets with support for the this feature. On ARMv6 in Thumb
305            // mode, T2 is required (see Arm DDI0406C Section A8.8.427),
306            // otherwise ARMv6-M or ARMv6K is enough
307            unsafe { crate::arch::arm::__yield() }
308        }
309        target_arch = "loongarch32" => crate::arch::loongarch32::ibar::<0>(),
310        target_arch = "loongarch64" => crate::arch::loongarch64::ibar::<0>(),
311        _ => { /* do nothing */ }
312    }
313}
314
315/// An identity function that *__hints__* to the compiler to be maximally pessimistic about what
316/// `black_box` could do.
317///
318/// Unlike [`std::convert::identity`], a Rust compiler is encouraged to assume that `black_box` can
319/// use `dummy` in any possible valid way that Rust code is allowed to without introducing undefined
320/// behavior in the calling code. This property makes `black_box` useful for writing code in which
321/// certain optimizations are not desired, such as benchmarks.
322///
323/// <div class="warning">
324///
325/// Note however, that `black_box` is only (and can only be) provided on a "best-effort" basis. The
326/// extent to which it can block optimisations may vary depending upon the platform and code-gen
327/// backend used. Programs cannot rely on `black_box` for *correctness*, beyond it behaving as the
328/// identity function. As such, it **must not be relied upon to control critical program behavior.**
329/// This also means that this function does not offer any guarantees for cryptographic or security
330/// purposes.
331///
332/// This limitation is not specific to `black_box`; there is no mechanism in the entire Rust
333/// language that can provide the guarantees required for constant-time cryptography.
334/// (There is also no such mechanism in LLVM, so the same is true for every other LLVM-based compiler.)
335///
336/// </div>
337///
338/// [`std::convert::identity`]: crate::convert::identity
339///
340/// # When is this useful?
341///
342/// While not suitable in those mission-critical cases, `black_box`'s functionality can generally be
343/// relied upon for benchmarking, and should be used there. It will try to ensure that the
344/// compiler doesn't optimize away part of the intended test code based on context. For
345/// example:
346///
347/// ```
348/// fn contains(haystack: &[&str], needle: &str) -> bool {
349///     haystack.iter().any(|x| x == &needle)
350/// }
351///
352/// pub fn benchmark() {
353///     let haystack = vec!["abc", "def", "ghi", "jkl", "mno"];
354///     let needle = "ghi";
355///     for _ in 0..10 {
356///         contains(&haystack, needle);
357///     }
358/// }
359/// ```
360///
361/// The compiler could theoretically make optimizations like the following:
362///
363/// - The `needle` and `haystack` do not change, move the call to `contains` outside the loop and
364///   delete the loop
365/// - Inline `contains`
366/// - `needle` and `haystack` have values known at compile time, `contains` is always true. Remove
367///   the call and replace with `true`
368/// - Nothing is done with the result of `contains`: delete this function call entirely
369/// - `benchmark` now has no purpose: delete this function
370///
371/// It is not likely that all of the above happens, but the compiler is definitely able to make some
372/// optimizations that could result in a very inaccurate benchmark. This is where `black_box` comes
373/// in:
374///
375/// ```
376/// use std::hint::black_box;
377///
378/// // Same `contains` function.
379/// fn contains(haystack: &[&str], needle: &str) -> bool {
380///     haystack.iter().any(|x| x == &needle)
381/// }
382///
383/// pub fn benchmark() {
384///     let haystack = vec!["abc", "def", "ghi", "jkl", "mno"];
385///     let needle = "ghi";
386///     for _ in 0..10 {
387///         // Force the compiler to run `contains`, even though it is a pure function whose
388///         // results are unused.
389///         black_box(contains(
390///             // Prevent the compiler from making assumptions about the input.
391///             black_box(&haystack),
392///             black_box(needle),
393///         ));
394///     }
395/// }
396/// ```
397///
398/// This essentially tells the compiler to block optimizations across any calls to `black_box`. So,
399/// it now:
400///
401/// - Treats both arguments to `contains` as unpredictable: the body of `contains` can no longer be
402///   optimized based on argument values
403/// - Treats the call to `contains` and its result as volatile: the body of `benchmark` cannot
404///   optimize this away
405///
406/// This makes our benchmark much more realistic to how the function would actually be used, where
407/// arguments are usually not known at compile time and the result is used in some way.
408///
409/// # How to use this
410///
411/// In practice, `black_box` serves two purposes:
412///
413/// 1. It prevents the compiler from making optimizations related to the value returned by `black_box`
414/// 2. It forces the value passed to `black_box` to be calculated, even if the return value of `black_box` is unused
415///
416/// ```
417/// use std::hint::black_box;
418///
419/// let zero = 0;
420/// let five = 5;
421///
422/// // The compiler will see this and remove the `* five` call, because it knows that multiplying
423/// // any integer by 0 will result in 0.
424/// let c = zero * five;
425///
426/// // Adding `black_box` here disables the compiler's ability to reason about the first operand in the multiplication.
427/// // It is forced to assume that it can be any possible number, so it cannot remove the `* five`
428/// // operation.
429/// let c = black_box(zero) * five;
430/// ```
431///
432/// While most cases will not be as clear-cut as the above example, it still illustrates how
433/// `black_box` can be used. When benchmarking a function, you usually want to wrap its inputs in
434/// `black_box` so the compiler cannot make optimizations that would be unrealistic in real-life
435/// use.
436///
437/// ```
438/// use std::hint::black_box;
439///
440/// // This is a simple function that increments its input by 1. Note that it is pure, meaning it
441/// // has no side-effects. This function has no effect if its result is unused. (An example of a
442/// // function *with* side-effects is `println!()`.)
443/// fn increment(x: u8) -> u8 {
444///     x + 1
445/// }
446///
447/// // Here, we call `increment` but discard its result. The compiler, seeing this and knowing that
448/// // `increment` is pure, will eliminate this function call entirely. This may not be desired,
449/// // though, especially if we're trying to track how much time `increment` takes to execute.
450/// let _ = increment(black_box(5));
451///
452/// // Here, we force `increment` to be executed. This is because the compiler treats `black_box`
453/// // as if it has side-effects, and thus must compute its input.
454/// let _ = black_box(increment(black_box(5)));
455/// ```
456///
457/// There may be additional situations where you want to wrap the result of a function in
458/// `black_box` to force its execution. This is situational though, and may not have any effect
459/// (such as when the function returns a zero-sized type such as [`()` unit][unit]).
460///
461/// Note that `black_box` has no effect on how its input is treated, only its output. As such,
462/// expressions passed to `black_box` may still be optimized:
463///
464/// ```
465/// use std::hint::black_box;
466///
467/// // The compiler sees this...
468/// let y = black_box(5 * 10);
469///
470/// // ...as this. As such, it will likely simplify `5 * 10` to just `50`.
471/// let _0 = 5 * 10;
472/// let y = black_box(_0);
473/// ```
474///
475/// In the above example, the `5 * 10` expression is considered distinct from the `black_box` call,
476/// and thus is still optimized by the compiler. You can prevent this by moving the multiplication
477/// operation outside of `black_box`:
478///
479/// ```
480/// use std::hint::black_box;
481///
482/// // No assumptions can be made about either operand, so the multiplication is not optimized out.
483/// let y = black_box(5) * black_box(10);
484/// ```
485///
486/// During constant evaluation, `black_box` is treated as a no-op.
487#[inline]
488#[stable(feature = "bench_black_box", since = "1.66.0")]
489#[rustc_const_stable(feature = "const_black_box", since = "1.86.0")]
490pub const fn black_box<T>(dummy: T) -> T {
491    crate::intrinsics::black_box(dummy)
492}
493
494/// An identity function that causes an `unused_must_use` warning to be
495/// triggered if the given value is not used (returned, stored in a variable,
496/// etc) by the caller.
497///
498/// This is primarily intended for use in macro-generated code, in which a
499/// [`#[must_use]` attribute][must_use] either on a type or a function would not
500/// be convenient.
501///
502/// [must_use]: https://doc.rust-lang.org/reference/attributes/diagnostics.html#the-must_use-attribute
503///
504/// # Example
505///
506/// ```
507/// #![feature(hint_must_use)]
508///
509/// use core::fmt;
510///
511/// pub struct Error(/* ... */);
512///
513/// #[macro_export]
514/// macro_rules! make_error {
515///     ($($args:expr),*) => {
516///         core::hint::must_use({
517///             let error = make_error(core::format_args!($($args),*));
518///             error
519///         })
520///     };
521/// }
522///
523/// // Implementation detail of make_error! macro.
524/// #[doc(hidden)]
525/// pub fn make_error(args: fmt::Arguments<'_>) -> Error {
526///     Error(/* ... */)
527/// }
528///
529/// fn demo() -> Option<Error> {
530///     if true {
531///         // Oops, meant to write `return Some(make_error!("..."));`
532///         Some(make_error!("..."));
533///     }
534///     None
535/// }
536/// #
537/// # // Make rustdoc not wrap the whole snippet in fn main, so that $crate::make_error works
538/// # fn main() {}
539/// ```
540///
541/// In the above example, we'd like an `unused_must_use` lint to apply to the
542/// value created by `make_error!`. However, neither `#[must_use]` on a struct
543/// nor `#[must_use]` on a function is appropriate here, so the macro expands
544/// using `core::hint::must_use` instead.
545///
546/// - We wouldn't want `#[must_use]` on the `struct Error` because that would
547///   make the following unproblematic code trigger a warning:
548///
549///   ```
550///   # struct Error;
551///   #
552///   fn f(arg: &str) -> Result<(), Error>
553///   # { Ok(()) }
554///
555///   #[test]
556///   fn t() {
557///       // Assert that `f` returns error if passed an empty string.
558///       // A value of type `Error` is unused here but that's not a problem.
559///       f("").unwrap_err();
560///   }
561///   ```
562///
563/// - Using `#[must_use]` on `fn make_error` can't help because the return value
564///   *is* used, as the right-hand side of a `let` statement. The `let`
565///   statement looks useless but is in fact necessary for ensuring that
566///   temporaries within the `format_args` expansion are not kept alive past the
567///   creation of the `Error`, as keeping them alive past that point can cause
568///   autotrait issues in async code:
569///
570///   ```
571///   # #![feature(hint_must_use)]
572///   #
573///   # struct Error;
574///   #
575///   # macro_rules! make_error {
576///   #     ($($args:expr),*) => {
577///   #         core::hint::must_use({
578///   #             // If `let` isn't used, then `f()` produces a non-Send future.
579///   #             let error = make_error(core::format_args!($($args),*));
580///   #             error
581///   #         })
582///   #     };
583///   # }
584///   #
585///   # fn make_error(args: core::fmt::Arguments<'_>) -> Error {
586///   #     Error
587///   # }
588///   #
589///   async fn f() {
590///       // Using `let` inside the make_error expansion causes temporaries like
591///       // `unsync()` to drop at the semicolon of that `let` statement, which
592///       // is prior to the await point. They would otherwise stay around until
593///       // the semicolon on *this* statement, which is after the await point,
594///       // and the enclosing Future would not implement Send.
595///       log(make_error!("look: {:p}", unsync())).await;
596///   }
597///
598///   async fn log(error: Error) {/* ... */}
599///
600///   // Returns something without a Sync impl.
601///   fn unsync() -> *const () {
602///       0 as *const ()
603///   }
604///   #
605///   # fn test() {
606///   #     fn assert_send(_: impl Send) {}
607///   #     assert_send(f());
608///   # }
609///   ```
610#[unstable(feature = "hint_must_use", issue = "94745")]
611#[must_use] // <-- :)
612#[inline(always)]
613pub const fn must_use<T>(value: T) -> T {
614    value
615}
616
617/// Hints to the compiler that a branch condition is likely to be true.
618/// Returns the value passed to it.
619///
620/// It can be used with `if` or boolean `match` expressions.
621///
622/// When used outside of a branch condition, it may still influence a nearby branch, but
623/// probably will not have any effect.
624///
625/// It can also be applied to parts of expressions, such as `likely(a) && unlikely(b)`, or to
626/// compound expressions, such as `likely(a && b)`. When applied to compound expressions, it has
627/// the following effect:
628/// ```text
629///     likely(!a) => !unlikely(a)
630///     likely(a && b) => likely(a) && likely(b)
631///     likely(a || b) => a || likely(b)
632/// ```
633///
634/// See also the function [`cold_path()`] which may be more appropriate for idiomatic Rust code.
635///
636/// # Examples
637///
638/// ```
639/// #![feature(likely_unlikely)]
640/// use core::hint::likely;
641///
642/// fn foo(x: i32) {
643///     if likely(x > 0) {
644///         println!("this branch is likely to be taken");
645///     } else {
646///         println!("this branch is unlikely to be taken");
647///     }
648///
649///     match likely(x > 0) {
650///         true => println!("this branch is likely to be taken"),
651///         false => println!("this branch is unlikely to be taken"),
652///     }
653///
654///     // Use outside of a branch condition may still influence a nearby branch
655///     let cond = likely(x != 0);
656///     if cond {
657///         println!("this branch is likely to be taken");
658///     }
659/// }
660/// ```
661#[unstable(feature = "likely_unlikely", issue = "151619")]
662#[inline(always)]
663pub const fn likely(b: bool) -> bool {
664    crate::intrinsics::likely(b)
665}
666
667/// Hints to the compiler that a branch condition is unlikely to be true.
668/// Returns the value passed to it.
669///
670/// It can be used with `if` or boolean `match` expressions.
671///
672/// When used outside of a branch condition, it may still influence a nearby branch, but
673/// probably will not have any effect.
674///
675/// It can also be applied to parts of expressions, such as `likely(a) && unlikely(b)`, or to
676/// compound expressions, such as `unlikely(a && b)`. When applied to compound expressions, it has
677/// the following effect:
678/// ```text
679///     unlikely(!a) => !likely(a)
680///     unlikely(a && b) => a && unlikely(b)
681///     unlikely(a || b) => unlikely(a) || unlikely(b)
682/// ```
683///
684/// See also the function [`cold_path()`] which may be more appropriate for idiomatic Rust code.
685///
686/// # Examples
687///
688/// ```
689/// #![feature(likely_unlikely)]
690/// use core::hint::unlikely;
691///
692/// fn foo(x: i32) {
693///     if unlikely(x > 0) {
694///         println!("this branch is unlikely to be taken");
695///     } else {
696///         println!("this branch is likely to be taken");
697///     }
698///
699///     match unlikely(x > 0) {
700///         true => println!("this branch is unlikely to be taken"),
701///         false => println!("this branch is likely to be taken"),
702///     }
703///
704///     // Use outside of a branch condition may still influence a nearby branch
705///     let cond = unlikely(x != 0);
706///     if cond {
707///         println!("this branch is likely to be taken");
708///     }
709/// }
710/// ```
711#[unstable(feature = "likely_unlikely", issue = "151619")]
712#[inline(always)]
713pub const fn unlikely(b: bool) -> bool {
714    crate::intrinsics::unlikely(b)
715}
716
717/// Hints to the compiler that given path is cold, i.e., unlikely to be taken. The compiler may
718/// choose to optimize paths that are not cold at the expense of paths that are cold.
719///
720/// Note that like all hints, the exact effect to codegen is not guaranteed. Using `cold_path`
721/// can actually *decrease* performance if the branch is called more than expected. It is advisable
722/// to perform benchmarks to tell if this function is useful.
723///
724/// # Examples
725///
726/// ```
727/// use core::hint::cold_path;
728///
729/// fn foo(x: &[i32]) {
730///     if let Some(first) = x.get(0) {
731///         // this is the fast path
732///     } else {
733///         // this path is unlikely
734///         cold_path();
735///     }
736/// }
737///
738/// fn bar(x: i32) -> i32 {
739///     match x {
740///         1 => 10,
741///         2 => 100,
742///         3 => { cold_path(); 1000 }, // this branch is unlikely
743///         _ => { cold_path(); 10000 }, // this is also unlikely
744///     }
745/// }
746/// ```
747///
748/// This can also be used to implement `likely` and `unlikely` helpers to hint the condition rather
749/// than the branch:
750///
751/// ```
752/// use core::hint::cold_path;
753///
754/// #[inline(always)]
755/// pub const fn likely(b: bool) -> bool {
756///     if !b {
757///         cold_path();
758///     }
759///     b
760/// }
761///
762/// #[inline(always)]
763/// pub const fn unlikely(b: bool) -> bool {
764///     if b {
765///         cold_path();
766///     }
767///     b
768/// }
769///
770/// fn foo(x: i32) {
771///     if likely(x > 0) {
772///         println!("this branch is likely to be taken");
773///     } else {
774///         println!("this branch is unlikely to be taken");
775///     }
776/// }
777/// ```
778#[stable(feature = "cold_path", since = "1.95.0")]
779#[rustc_const_stable(feature = "cold_path", since = "1.95.0")]
780#[inline(always)]
781pub const fn cold_path() {
782    crate::intrinsics::cold_path()
783}
784
785/// Returns either `true_val` or `false_val` depending on the value of
786/// `condition`, with a hint to the compiler that `condition` is unlikely to be
787/// correctly predicted by a CPU’s branch predictor.
788///
789/// This method is functionally equivalent to
790/// ```ignore (this is just for illustrative purposes)
791/// fn select_unpredictable<T>(b: bool, true_val: T, false_val: T) -> T {
792///     if b { true_val } else { false_val }
793/// }
794/// ```
795/// but might generate different assembly. In particular, on platforms with
796/// a conditional move or select instruction (like `cmov` on x86 or `csel`
797/// on ARM) the optimizer might use these instructions to avoid branches,
798/// which can benefit performance if the branch predictor is struggling
799/// with predicting `condition`, such as in an implementation of binary
800/// search.
801///
802/// Note however that this lowering is not guaranteed (on any platform) and
803/// should not be relied upon when trying to write cryptographic constant-time
804/// code. Also be aware that this lowering might *decrease* performance if
805/// `condition` is well-predictable. It is advisable to perform benchmarks to
806/// tell if this function is useful.
807///
808/// # Examples
809///
810/// Distribute values evenly between two buckets:
811/// ```
812/// use std::hash::BuildHasher;
813/// use std::hint;
814///
815/// fn append<H: BuildHasher>(hasher: &H, v: i32, bucket_one: &mut Vec<i32>, bucket_two: &mut Vec<i32>) {
816///     let hash = hasher.hash_one(&v);
817///     let bucket = hint::select_unpredictable(hash % 2 == 0, bucket_one, bucket_two);
818///     bucket.push(v);
819/// }
820/// # let hasher = std::collections::hash_map::RandomState::new();
821/// # let mut bucket_one = Vec::new();
822/// # let mut bucket_two = Vec::new();
823/// # append(&hasher, 42, &mut bucket_one, &mut bucket_two);
824/// # assert_eq!(bucket_one.len() + bucket_two.len(), 1);
825/// ```
826#[inline(always)]
827#[stable(feature = "select_unpredictable", since = "1.88.0")]
828#[rustc_const_unstable(feature = "const_select_unpredictable", issue = "145938")]
829pub const fn select_unpredictable<T>(condition: bool, true_val: T, false_val: T) -> T
830where
831    T: [const] Destruct,
832{
833    // FIXME(https://github.com/rust-lang/unsafe-code-guidelines/issues/245):
834    // Change this to use ManuallyDrop instead.
835    let mut true_val = MaybeUninit::new(true_val);
836    let mut false_val = MaybeUninit::new(false_val);
837
838    struct DropOnPanic<T> {
839        // Invariant: valid pointer and points to an initialized value that is not further used,
840        // i.e. it can be dropped by this guard.
841        inner: *mut T,
842    }
843
844    impl<T> Drop for DropOnPanic<T> {
845        fn drop(&mut self) {
846            // SAFETY: Must be guaranteed on construction of local type `DropOnPanic`.
847            unsafe { self.inner.drop_in_place() }
848        }
849    }
850
851    let true_ptr = true_val.as_mut_ptr();
852    let false_ptr = false_val.as_mut_ptr();
853
854    // SAFETY: The value that is not selected is dropped, and the selected one
855    // is returned. This is necessary because the intrinsic doesn't drop the
856    // value that is  not selected.
857    unsafe {
858        // Extract the selected value first, ensure it is dropped as well if dropping the unselected
859        // value panics. We construct a temporary by-pointer guard around the selected value while
860        // dropping the unselected value. Arguments overlap here, so we can not use mutable
861        // reference for these arguments.
862        let guard = crate::intrinsics::select_unpredictable(condition, true_ptr, false_ptr);
863        let drop = crate::intrinsics::select_unpredictable(condition, false_ptr, true_ptr);
864
865        // SAFETY: both pointers are well-aligned and point to initialized values inside a
866        // `MaybeUninit` each. In both possible values for `condition` the pointer `guard` and
867        // `drop` do not alias (even though the two argument pairs we have selected from did alias
868        // each other).
869        let guard = DropOnPanic { inner: guard };
870        drop.drop_in_place();
871        crate::mem::forget(guard);
872
873        // Note that it is important to use the values here. Reading from the pointer we got makes
874        // LLVM forget the !unpredictable annotation sometimes (in tests, integer sized values in
875        // particular seemed to confuse it, also observed in llvm/llvm-project #82340).
876        crate::intrinsics::select_unpredictable(condition, true_val, false_val).assume_init()
877    }
878}
879
880/// The expected temporal locality of a memory prefetch operation.
881///
882/// Locality expresses how likely the prefetched data is to be reused soon,
883/// and therefore which level of cache it should be brought into.
884///
885/// The locality is just a hint, and may be ignored on some targets or by the hardware.
886///
887/// Used with functions like [`prefetch_read`] and [`prefetch_write`].
888///
889/// [`prefetch_read`]: crate::hint::prefetch_read
890/// [`prefetch_write`]: crate::hint::prefetch_write
891#[unstable(feature = "hint_prefetch", issue = "146941")]
892#[non_exhaustive]
893#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
894pub enum Locality {
895    /// Data is expected to be reused eventually.
896    ///
897    /// Typically prefetches into L3 cache (if the CPU supports it).
898    L3,
899    /// Data is expected to be reused in the near future.
900    ///
901    /// Typically prefetches into L2 cache.
902    L2,
903    /// Data is expected to be reused very soon.
904    ///
905    /// Typically prefetches into L1 cache.
906    L1,
907}
908
909impl Locality {
910    /// Convert to the constant that LLVM associates with a locality.
911    const fn to_llvm(self) -> i32 {
912        match self {
913            Self::L3 => 1,
914            Self::L2 => 2,
915            Self::L1 => 3,
916        }
917    }
918}
919
920/// Prefetch the cache line containing `ptr` for a future read.
921///
922/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
923/// soon after, but may also increase bandwidth usage or evict other cache lines.
924///
925/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
926///
927/// Passing a dangling or invalid pointer is permitted: the memory will not
928/// actually be dereferenced, and no faults are raised.
929///
930/// # Examples
931///
932/// ```
933/// #![feature(hint_prefetch)]
934/// use std::hint::{Locality, prefetch_read};
935/// use std::mem::size_of_val;
936///
937/// // Prefetch all of `slice` into the L1 cache.
938/// fn prefetch_slice<T>(slice: &[T]) {
939///     // On most systems the cache line size is 64 bytes.
940///     for offset in (0..size_of_val(slice)).step_by(64) {
941///         prefetch_read(slice.as_ptr().wrapping_add(offset), Locality::L1);
942///     }
943/// }
944/// ```
945#[inline(always)]
946#[unstable(feature = "hint_prefetch", issue = "146941")]
947pub const fn prefetch_read<T>(ptr: *const T, locality: Locality) {
948    match locality {
949        Locality::L3 => intrinsics::prefetch_read_data::<T, { Locality::L3.to_llvm() }>(ptr),
950        Locality::L2 => intrinsics::prefetch_read_data::<T, { Locality::L2.to_llvm() }>(ptr),
951        Locality::L1 => intrinsics::prefetch_read_data::<T, { Locality::L1.to_llvm() }>(ptr),
952    }
953}
954
955/// Prefetch the cache line containing `ptr` for a single future read, but attempt to avoid
956/// polluting the cache.
957///
958/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
959/// soon after, but may also increase bandwidth usage or evict other cache lines.
960///
961/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
962///
963/// Passing a dangling or invalid pointer is permitted: the memory will not
964/// actually be dereferenced, and no faults are raised.
965#[inline(always)]
966#[unstable(feature = "hint_prefetch", issue = "146941")]
967pub const fn prefetch_read_non_temporal<T>(ptr: *const T, locality: Locality) {
968    // The LLVM intrinsic does not currently support specifying the locality.
969    let _ = locality;
970    intrinsics::prefetch_read_data::<T, 0>(ptr)
971}
972
973/// Prefetch the cache line containing `ptr` for a future write.
974///
975/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
976/// soon after, but may also increase bandwidth usage or evict other cache lines.
977///
978/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
979///
980/// Passing a dangling or invalid pointer is permitted: the memory will not
981/// actually be dereferenced, and no faults are raised.
982#[inline(always)]
983#[unstable(feature = "hint_prefetch", issue = "146941")]
984pub const fn prefetch_write<T>(ptr: *mut T, locality: Locality) {
985    match locality {
986        Locality::L3 => intrinsics::prefetch_write_data::<T, { Locality::L3.to_llvm() }>(ptr),
987        Locality::L2 => intrinsics::prefetch_write_data::<T, { Locality::L2.to_llvm() }>(ptr),
988        Locality::L1 => intrinsics::prefetch_write_data::<T, { Locality::L1.to_llvm() }>(ptr),
989    }
990}
991
992/// Prefetch the cache line containing `ptr` for a single future write, but attempt to avoid
993/// polluting the cache.
994///
995/// A strategically placed prefetch can reduce cache miss latency if the data is accessed
996/// soon after, but may also increase bandwidth usage or evict other cache lines.
997///
998/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
999///
1000/// Passing a dangling or invalid pointer is permitted: the memory will not
1001/// actually be dereferenced, and no faults are raised.
1002#[inline(always)]
1003#[unstable(feature = "hint_prefetch", issue = "146941")]
1004pub const fn prefetch_write_non_temporal<T>(ptr: *const T, locality: Locality) {
1005    // The LLVM intrinsic does not currently support specifying the locality.
1006    let _ = locality;
1007    intrinsics::prefetch_write_data::<T, 0>(ptr)
1008}
1009
1010/// Prefetch the cache line containing `ptr` into the instruction cache for a future read.
1011///
1012/// A strategically placed prefetch can reduce cache miss latency if the instructions are
1013/// accessed soon after, but may also increase bandwidth usage or evict other cache lines.
1014///
1015/// A prefetch is a *hint*, and may be ignored on certain targets or by the hardware.
1016///
1017/// Passing a dangling or invalid pointer is permitted: the memory will not
1018/// actually be dereferenced, and no faults are raised.
1019#[inline(always)]
1020#[unstable(feature = "hint_prefetch", issue = "146941")]
1021pub const fn prefetch_read_instruction<T>(ptr: *const T, locality: Locality) {
1022    match locality {
1023        Locality::L3 => intrinsics::prefetch_read_instruction::<T, { Locality::L3.to_llvm() }>(ptr),
1024        Locality::L2 => intrinsics::prefetch_read_instruction::<T, { Locality::L2.to_llvm() }>(ptr),
1025        Locality::L1 => intrinsics::prefetch_read_instruction::<T, { Locality::L1.to_llvm() }>(ptr),
1026    }
1027}
core/hint.rs

core/
hint.rs