portable_atomic/imp/fallback/
mod.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2
3/*
4Fallback implementation using global locks.
5
6This implementation uses seqlock for global locks.
7
8This is basically based on global locks in crossbeam-utils's `AtomicCell`,
9but seqlock is implemented in a way that does not depend on UB
10(see comments in optimistic_read method in atomic! macro for details).
11
12Note that we cannot use a lock per atomic type, since the in-memory representation of the atomic
13type and the value type must be the same.
14*/
15
16#![cfg_attr(
17    any(
18        all(
19            target_arch = "x86_64",
20            not(portable_atomic_no_outline_atomics),
21            not(any(target_env = "sgx", miri)),
22        ),
23        all(
24            target_arch = "powerpc64",
25            feature = "fallback",
26            not(portable_atomic_no_outline_atomics),
27            any(
28                all(
29                    target_os = "linux",
30                    any(
31                        all(
32                            target_env = "gnu",
33                            any(target_endian = "little", not(target_feature = "crt-static")),
34                        ),
35                        all(
36                            target_env = "musl",
37                            any(not(target_feature = "crt-static"), feature = "std"),
38                        ),
39                        target_env = "ohos",
40                        all(target_env = "uclibc", not(target_feature = "crt-static")),
41                        portable_atomic_outline_atomics,
42                    ),
43                ),
44                target_os = "android",
45                all(
46                    target_os = "freebsd",
47                    any(
48                        target_endian = "little",
49                        not(target_feature = "crt-static"),
50                        portable_atomic_outline_atomics,
51                    ),
52                ),
53                target_os = "openbsd",
54                all(
55                    target_os = "aix",
56                    not(portable_atomic_pre_llvm_20),
57                    portable_atomic_outline_atomics, // TODO(aix): currently disabled by default
58                ),
59            ),
60            not(any(miri, portable_atomic_sanitize_thread)),
61        ),
62        all(
63            target_arch = "riscv32",
64            not(any(miri, portable_atomic_sanitize_thread)),
65            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
66            any(
67                target_feature = "zacas",
68                portable_atomic_target_feature = "zacas",
69                all(
70                    feature = "fallback",
71                    not(portable_atomic_no_outline_atomics),
72                    any(target_os = "linux", target_os = "android"),
73                ),
74            ),
75        ),
76        all(
77            target_arch = "riscv64",
78            not(any(miri, portable_atomic_sanitize_thread)),
79            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
80            any(
81                target_feature = "zacas",
82                portable_atomic_target_feature = "zacas",
83                all(
84                    feature = "fallback",
85                    not(portable_atomic_no_outline_atomics),
86                    any(target_os = "linux", target_os = "android"),
87                ),
88            ),
89        ),
90        all(
91            target_arch = "arm",
92            any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
93            any(target_os = "linux", target_os = "android"),
94            not(portable_atomic_no_outline_atomics),
95        ),
96    ),
97    allow(dead_code)
98)]
99
100#[macro_use]
101pub(crate) mod utils;
102
103// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
104// around.
105//
106// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
107// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
108// counter will not be increased that fast.
109//
110// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
111// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is available and fast,
112// so use it to implement normal sequence lock.
113cfg_has_fast_atomic_64! {
114    mod seq_lock;
115}
116cfg_no_fast_atomic_64! {
117    #[path = "seq_lock_wide.rs"]
118    mod seq_lock;
119}
120
121use core::{cell::UnsafeCell, mem, sync::atomic::Ordering};
122
123use self::{
124    seq_lock::{SeqLock, SeqLockWriteGuard},
125    utils::CachePadded,
126};
127#[cfg(portable_atomic_no_strict_provenance)]
128use crate::utils::ptr::PtrExt as _;
129
130// Some 64-bit architectures have ABI with 32-bit pointer width (e.g., x86_64 X32 ABI,
131// AArch64 ILP32 ABI, mips64 N32 ABI). On those targets, AtomicU64 is fast,
132// so use it to reduce chunks of byte-wise atomic memcpy.
133use self::seq_lock::{AtomicChunk, Chunk};
134
135// Adapted from https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L969-L1016.
136#[inline]
137#[must_use]
138fn lock(addr: usize) -> &'static SeqLock {
139    // The number of locks is a prime number because we want to make sure `addr % LEN` gets
140    // dispersed across all locks.
141    //
142    // crossbeam-utils 0.8.7 uses 97 here but does not use CachePadded,
143    // so the actual concurrency level will be smaller.
144    const LEN: usize = 67;
145    const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new());
146    static LOCKS: [CachePadded<SeqLock>; LEN] = [
147        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
148        L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L, L,
149        L, L, L, L, L, L, L,
150    ];
151
152    // If the modulus is a constant number, the compiler will use crazy math to transform this into
153    // a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
154    &LOCKS[addr % LEN]
155}
156
157macro_rules! atomic {
158    ($atomic_type:ident, $int_type:ident, $align:literal) => {
159        #[repr(C, align($align))]
160        pub(crate) struct $atomic_type {
161            v: UnsafeCell<$int_type>,
162        }
163
164        impl $atomic_type {
165            const LEN: usize = mem::size_of::<$int_type>() / mem::size_of::<Chunk>();
166
167            #[inline]
168            unsafe fn chunks(&self) -> &[AtomicChunk; Self::LEN] {
169                static_assert!($atomic_type::LEN > 1);
170                static_assert!(mem::size_of::<$int_type>() % mem::size_of::<Chunk>() == 0);
171
172                // SAFETY: the caller must uphold the safety contract for `chunks`.
173                unsafe { &*(self.v.get() as *const $int_type as *const [AtomicChunk; Self::LEN]) }
174            }
175
176            #[inline]
177            fn optimistic_read(&self) -> $int_type {
178                // Using `MaybeUninit<[usize; Self::LEN]>` here doesn't change codegen: https://godbolt.org/z/86f8s733M
179                let mut dst: [Chunk; Self::LEN] = [0; Self::LEN];
180                // SAFETY:
181                // - There are no threads that perform non-atomic concurrent write operations.
182                // - There is no writer that updates the value using atomic operations of different granularity.
183                //
184                // If the atomic operation is not used here, it will cause a data race
185                // when `write` performs concurrent write operation.
186                // Such a data race is sometimes considered virtually unproblematic
187                // in SeqLock implementations:
188                //
189                // - https://github.com/Amanieu/seqlock/issues/2
190                // - https://github.com/crossbeam-rs/crossbeam/blob/crossbeam-utils-0.8.7/crossbeam-utils/src/atomic/atomic_cell.rs#L1111-L1116
191                // - https://rust-lang.zulipchat.com/#narrow/stream/136281-t-lang.2Fwg-unsafe-code-guidelines/topic/avoiding.20UB.20due.20to.20races.20by.20discarding.20result.3F
192                //
193                // However, in our use case, the implementation that loads/stores value as
194                // chunks of usize is enough fast and sound, so we use that implementation.
195                //
196                // See also atomic-memcpy crate, a generic implementation of this pattern:
197                // https://github.com/taiki-e/atomic-memcpy
198                let chunks = unsafe { self.chunks() };
199                for i in 0..Self::LEN {
200                    dst[i] = chunks[i].load(Ordering::Relaxed);
201                }
202                // SAFETY: integers are plain old data types so we can always transmute to them.
203                unsafe { mem::transmute::<[Chunk; Self::LEN], $int_type>(dst) }
204            }
205
206            #[inline]
207            fn read(&self, _guard: &SeqLockWriteGuard<'static>) -> $int_type {
208                // This calls optimistic_read that can return teared value, but the resulting value
209                // is guaranteed not to be teared because we hold the lock to write.
210                self.optimistic_read()
211            }
212
213            #[inline]
214            fn write(&self, val: $int_type, _guard: &SeqLockWriteGuard<'static>) {
215                // SAFETY: integers are plain old data types so we can always transmute them to arrays of integers.
216                let val = unsafe { mem::transmute::<$int_type, [Chunk; Self::LEN]>(val) };
217                // SAFETY:
218                // - The guard guarantees that we hold the lock to write.
219                // - There are no threads that perform non-atomic concurrent read or write operations.
220                //
221                // See optimistic_read for the reason that atomic operations are used here.
222                let chunks = unsafe { self.chunks() };
223                for i in 0..Self::LEN {
224                    chunks[i].store(val[i], Ordering::Relaxed);
225                }
226            }
227        }
228
229        // Send is implicitly implemented.
230        // SAFETY: any data races are prevented by the lock and atomic operation.
231        unsafe impl Sync for $atomic_type {}
232
233        impl_default_no_fetch_ops!($atomic_type, $int_type);
234        impl_default_bit_opts!($atomic_type, $int_type);
235        impl $atomic_type {
236            #[inline]
237            pub(crate) const fn new(v: $int_type) -> Self {
238                Self { v: UnsafeCell::new(v) }
239            }
240
241            #[inline]
242            pub(crate) fn is_lock_free() -> bool {
243                Self::IS_ALWAYS_LOCK_FREE
244            }
245            pub(crate) const IS_ALWAYS_LOCK_FREE: bool = false;
246
247            #[inline]
248            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
249            pub(crate) fn load(&self, order: Ordering) -> $int_type {
250                crate::utils::assert_load_ordering(order);
251                let lock = lock(self.v.get().addr());
252
253                // Try doing an optimistic read first.
254                if let Some(stamp) = lock.optimistic_read() {
255                    let val = self.optimistic_read();
256
257                    if lock.validate_read(stamp) {
258                        return val;
259                    }
260                }
261
262                // Grab a regular write lock so that writers don't starve this load.
263                let guard = lock.write();
264                let val = self.read(&guard);
265                // The value hasn't been changed. Drop the guard without incrementing the stamp.
266                guard.abort();
267                val
268            }
269
270            #[inline]
271            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
272            pub(crate) fn store(&self, val: $int_type, order: Ordering) {
273                crate::utils::assert_store_ordering(order);
274                let guard = lock(self.v.get().addr()).write();
275                self.write(val, &guard)
276            }
277
278            #[inline]
279            pub(crate) fn swap(&self, val: $int_type, _order: Ordering) -> $int_type {
280                let guard = lock(self.v.get().addr()).write();
281                let prev = self.read(&guard);
282                self.write(val, &guard);
283                prev
284            }
285
286            #[inline]
287            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
288            pub(crate) fn compare_exchange(
289                &self,
290                current: $int_type,
291                new: $int_type,
292                success: Ordering,
293                failure: Ordering,
294            ) -> Result<$int_type, $int_type> {
295                crate::utils::assert_compare_exchange_ordering(success, failure);
296                let guard = lock(self.v.get().addr()).write();
297                let prev = self.read(&guard);
298                if prev == current {
299                    self.write(new, &guard);
300                    Ok(prev)
301                } else {
302                    // The value hasn't been changed. Drop the guard without incrementing the stamp.
303                    guard.abort();
304                    Err(prev)
305                }
306            }
307
308            #[inline]
309            #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)]
310            pub(crate) fn compare_exchange_weak(
311                &self,
312                current: $int_type,
313                new: $int_type,
314                success: Ordering,
315                failure: Ordering,
316            ) -> Result<$int_type, $int_type> {
317                self.compare_exchange(current, new, success, failure)
318            }
319
320            #[inline]
321            pub(crate) fn fetch_add(&self, val: $int_type, _order: Ordering) -> $int_type {
322                let guard = lock(self.v.get().addr()).write();
323                let prev = self.read(&guard);
324                self.write(prev.wrapping_add(val), &guard);
325                prev
326            }
327
328            #[inline]
329            pub(crate) fn fetch_sub(&self, val: $int_type, _order: Ordering) -> $int_type {
330                let guard = lock(self.v.get().addr()).write();
331                let prev = self.read(&guard);
332                self.write(prev.wrapping_sub(val), &guard);
333                prev
334            }
335
336            #[inline]
337            pub(crate) fn fetch_and(&self, val: $int_type, _order: Ordering) -> $int_type {
338                let guard = lock(self.v.get().addr()).write();
339                let prev = self.read(&guard);
340                self.write(prev & val, &guard);
341                prev
342            }
343
344            #[inline]
345            pub(crate) fn fetch_nand(&self, val: $int_type, _order: Ordering) -> $int_type {
346                let guard = lock(self.v.get().addr()).write();
347                let prev = self.read(&guard);
348                self.write(!(prev & val), &guard);
349                prev
350            }
351
352            #[inline]
353            pub(crate) fn fetch_or(&self, val: $int_type, _order: Ordering) -> $int_type {
354                let guard = lock(self.v.get().addr()).write();
355                let prev = self.read(&guard);
356                self.write(prev | val, &guard);
357                prev
358            }
359
360            #[inline]
361            pub(crate) fn fetch_xor(&self, val: $int_type, _order: Ordering) -> $int_type {
362                let guard = lock(self.v.get().addr()).write();
363                let prev = self.read(&guard);
364                self.write(prev ^ val, &guard);
365                prev
366            }
367
368            #[inline]
369            pub(crate) fn fetch_max(&self, val: $int_type, _order: Ordering) -> $int_type {
370                let guard = lock(self.v.get().addr()).write();
371                let prev = self.read(&guard);
372                self.write(core::cmp::max(prev, val), &guard);
373                prev
374            }
375
376            #[inline]
377            pub(crate) fn fetch_min(&self, val: $int_type, _order: Ordering) -> $int_type {
378                let guard = lock(self.v.get().addr()).write();
379                let prev = self.read(&guard);
380                self.write(core::cmp::min(prev, val), &guard);
381                prev
382            }
383
384            #[inline]
385            pub(crate) fn fetch_not(&self, _order: Ordering) -> $int_type {
386                let guard = lock(self.v.get().addr()).write();
387                let prev = self.read(&guard);
388                self.write(!prev, &guard);
389                prev
390            }
391            #[inline]
392            pub(crate) fn not(&self, order: Ordering) {
393                self.fetch_not(order);
394            }
395
396            #[inline]
397            pub(crate) fn fetch_neg(&self, _order: Ordering) -> $int_type {
398                let guard = lock(self.v.get().addr()).write();
399                let prev = self.read(&guard);
400                self.write(prev.wrapping_neg(), &guard);
401                prev
402            }
403            #[inline]
404            pub(crate) fn neg(&self, order: Ordering) {
405                self.fetch_neg(order);
406            }
407
408            #[inline]
409            pub(crate) const fn as_ptr(&self) -> *mut $int_type {
410                self.v.get()
411            }
412        }
413    };
414}
415
416#[cfg_attr(
417    portable_atomic_no_cfg_target_has_atomic,
418    cfg(any(
419        test,
420        not(any(
421            not(portable_atomic_no_atomic_64),
422            all(
423                target_arch = "riscv32",
424                not(any(miri, portable_atomic_sanitize_thread)),
425                any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
426                any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
427            ),
428        ))
429    ))
430)]
431#[cfg_attr(
432    not(portable_atomic_no_cfg_target_has_atomic),
433    cfg(any(
434        test,
435        not(any(
436            target_has_atomic = "64",
437            all(
438                target_arch = "riscv32",
439                not(any(miri, portable_atomic_sanitize_thread)),
440                any(not(portable_atomic_no_asm), portable_atomic_unstable_asm),
441                any(target_feature = "zacas", portable_atomic_target_feature = "zacas"),
442            ),
443        ))
444    ))
445)]
446cfg_no_fast_atomic_64! {
447    atomic!(AtomicI64, i64, 8);
448    atomic!(AtomicU64, u64, 8);
449}
450
451atomic!(AtomicI128, i128, 16);
452atomic!(AtomicU128, u128, 16);
453
454#[cfg(test)]
455mod tests {
456    use super::*;
457
458    cfg_no_fast_atomic_64! {
459        test_atomic_int!(i64);
460        test_atomic_int!(u64);
461    }
462    test_atomic_int!(i128);
463    test_atomic_int!(u128);
464
465    // load/store/swap implementation is not affected by signedness, so it is
466    // enough to test only unsigned types.
467    cfg_no_fast_atomic_64! {
468        stress_test!(u64);
469    }
470    stress_test!(u128);
471}