| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| #ifndef BIONIC_ATOMIC_ARM_H |
| #define BIONIC_ATOMIC_ARM_H |
| |
| #include <machine/cpu-features.h> |
| |
| /* Some of the harware instructions used below are not available in Thumb-1 |
| * mode (they are if you build in ARM or Thumb-2 mode though). To solve this |
| * problem, we're going to use the same technique than libatomics_ops, |
| * which is to temporarily switch to ARM, do the operation, then switch |
| * back to Thumb-1. |
| * |
| * This results in two 'bx' jumps, just like a normal function call, but |
| * everything is kept inlined, avoids loading or computing the function's |
| * address, and prevents a little I-cache trashing too. |
| * |
| * However, it is highly recommended to avoid compiling any C library source |
| * file that use these functions in Thumb-1 mode. |
| * |
| * Define three helper macros to implement this: |
| */ |
| #if defined(__thumb__) && !defined(__thumb2__) |
| # define __ATOMIC_SWITCH_TO_ARM \ |
| "adr r3, 5f\n" \ |
| "bx r3\n" \ |
| ".align\n" \ |
| ".arm\n" \ |
| "5:\n" |
| /* note: the leading \n below is intentional */ |
| # define __ATOMIC_SWITCH_TO_THUMB \ |
| "\n" \ |
| "adr r3, 6f\n" \ |
| "bx r3\n" \ |
| ".thumb" \ |
| "6:\n" |
| |
| # define __ATOMIC_CLOBBERS "r3" /* list of clobbered registers */ |
| |
| /* Warn the user that ARM mode should really be preferred! */ |
| # warning Rebuilding this source file in ARM mode is highly recommended for performance!! |
| |
| #else |
| # define __ATOMIC_SWITCH_TO_ARM /* nothing */ |
| # define __ATOMIC_SWITCH_TO_THUMB /* nothing */ |
| # define __ATOMIC_CLOBBERS /* nothing */ |
| #endif |
| |
| |
| /* Define a full memory barrier, this is only needed if we build the |
| * platform for a multi-core device. For the record, using a 'dmb' |
| * instruction on a Nexus One device can take up to 180 ns even if |
| * it is completely un-necessary on this device. |
| * |
| * NOTE: This is where the platform and NDK headers atomic headers are |
| * going to diverge. With the NDK, we don't know if the generated |
| * code is going to run on a single or multi-core device, so we |
| * need to be cautious. |
| * |
| * Fortunately, we can use the kernel helper function that is |
| * mapped at address 0xffff0fa0 in all user process, and that |
| * provides a device-specific barrier operation. |
| * |
| * I.e. on single-core devices, the helper immediately returns, |
| * on multi-core devices, it uses "dmb" or any other means to |
| * perform a full-memory barrier. |
| * |
| * There are three cases to consider for the platform: |
| * |
| * - multi-core ARMv7-A => use the 'dmb' hardware instruction |
| * - multi-core ARMv6 => use the coprocessor |
| * - single core ARMv5TE/6/7 => do not use any hardware barrier |
| */ |
| #if defined(ANDROID_SMP) && ANDROID_SMP == 1 |
| |
| /* Sanity check, multi-core is only supported starting from ARMv6 */ |
| # if __ARM_ARCH__ < 6 |
| # error ANDROID_SMP should not be set to 1 for an ARM architecture less than 6 |
| # endif |
| |
| # ifdef __ARM_HAVE_DMB |
| /* For ARMv7-A, we can use the 'dmb' instruction directly */ |
| __ATOMIC_INLINE__ void |
| __bionic_memory_barrier(void) |
| { |
| /* Note: we always build in ARM or Thumb-2 on ARMv7-A, so don't |
| * bother with __ATOMIC_SWITCH_TO_ARM */ |
| __asm__ __volatile__ ( "dmb" : : : "memory" ); |
| } |
| # else /* !__ARM_HAVE_DMB */ |
| /* Otherwise, i.e. for multi-core ARMv6, we need to use the coprocessor, |
| * which requires the use of a general-purpose register, which is slightly |
| * less efficient. |
| */ |
| __ATOMIC_INLINE__ void |
| __bionic_memory_barrier(void) |
| { |
| __asm__ __volatile__ ( |
| __SWITCH_TO_ARM |
| "mcr p15, 0, %0, c7, c10, 5" |
| __SWITCH_TO_THUMB |
| : : "r" (0) : __ATOMIC_CLOBBERS "memory"); |
| } |
| # endif /* !__ARM_HAVE_DMB */ |
| #else /* !ANDROID_SMP */ |
| __ATOMIC_INLINE__ void |
| __bionic_memory_barrier(void) |
| { |
| /* A simple compiler barrier */ |
| __asm__ __volatile__ ( "" : : : "memory" ); |
| } |
| #endif /* !ANDROID_SMP */ |
| |
| /* Compare-and-swap, without any explicit barriers. Note that this functions |
| * returns 0 on success, and 1 on failure. The opposite convention is typically |
| * used on other platforms. |
| * |
| * There are two cases to consider: |
| * |
| * - ARMv6+ => use LDREX/STREX instructions |
| * - < ARMv6 => use kernel helper function mapped at 0xffff0fc0 |
| * |
| * LDREX/STREX are only available starting from ARMv6 |
| */ |
| #ifdef __ARM_HAVE_LDREX_STREX |
| __ATOMIC_INLINE__ int |
| __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) |
| { |
| int32_t prev, status; |
| do { |
| __asm__ __volatile__ ( |
| __ATOMIC_SWITCH_TO_ARM |
| "ldrex %0, [%3]\n" |
| "mov %1, #0\n" |
| "teq %0, %4\n" |
| #ifdef __thumb2__ |
| "it eq\n" |
| #endif |
| "strexeq %1, %5, [%3]" |
| __ATOMIC_SWITCH_TO_THUMB |
| : "=&r" (prev), "=&r" (status), "+m"(*ptr) |
| : "r" (ptr), "Ir" (old_value), "r" (new_value) |
| : __ATOMIC_CLOBBERS "cc"); |
| } while (__builtin_expect(status != 0, 0)); |
| return prev != old_value; |
| } |
| # else /* !__ARM_HAVE_LDREX_STREX */ |
| |
| /* Use the handy kernel helper function mapped at 0xffff0fc0 */ |
| typedef int (kernel_cmpxchg)(int32_t, int32_t, volatile int32_t *); |
| |
| __ATOMIC_INLINE__ int |
| __kernel_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) |
| { |
| /* Note: the kernel function returns 0 on success too */ |
| return (*(kernel_cmpxchg *)0xffff0fc0)(old_value, new_value, ptr); |
| } |
| |
| __ATOMIC_INLINE__ int |
| __bionic_cmpxchg(int32_t old_value, int32_t new_value, volatile int32_t* ptr) |
| { |
| return __kernel_cmpxchg(old_value, new_value, ptr); |
| } |
| #endif /* !__ARM_HAVE_LDREX_STREX */ |
| |
| /* Swap operation, without any explicit barriers. |
| * There are again two similar cases to consider: |
| * |
| * ARMv6+ => use LDREX/STREX |
| * < ARMv6 => use SWP instead. |
| */ |
| #ifdef __ARM_HAVE_LDREX_STREX |
| __ATOMIC_INLINE__ int32_t |
| __bionic_swap(int32_t new_value, volatile int32_t* ptr) |
| { |
| int32_t prev, status; |
| do { |
| __asm__ __volatile__ ( |
| __ATOMIC_SWITCH_TO_ARM |
| "ldrex %0, [%3]\n" |
| "strex %1, %4, [%3]" |
| __ATOMIC_SWITCH_TO_THUMB |
| : "=&r" (prev), "=&r" (status), "+m" (*ptr) |
| : "r" (ptr), "r" (new_value) |
| : __ATOMIC_CLOBBERS "cc"); |
| } while (__builtin_expect(status != 0, 0)); |
| return prev; |
| } |
| #else /* !__ARM_HAVE_LDREX_STREX */ |
| __ATOMIC_INLINE__ int32_t |
| __bionic_swap(int32_t new_value, volatile int32_t* ptr) |
| { |
| int32_t prev; |
| /* NOTE: SWP is available in Thumb-1 too */ |
| __asm__ __volatile__ ("swp %0, %2, [%3]" |
| : "=&r" (prev), "+m" (*ptr) |
| : "r" (new_value), "r" (ptr) |
| : "cc"); |
| return prev; |
| } |
| #endif /* !__ARM_HAVE_LDREX_STREX */ |
| |
| /* Atomic increment - without any barriers |
| * This returns the old value |
| */ |
| #ifdef __ARM_HAVE_LDREX_STREX |
| __ATOMIC_INLINE__ int32_t |
| __bionic_atomic_inc(volatile int32_t* ptr) |
| { |
| int32_t prev, tmp, status; |
| do { |
| __asm__ __volatile__ ( |
| __ATOMIC_SWITCH_TO_ARM |
| "ldrex %0, [%4]\n" |
| "add %1, %0, #1\n" |
| "strex %2, %1, [%4]" |
| __ATOMIC_SWITCH_TO_THUMB |
| : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr) |
| : "r" (ptr) |
| : __ATOMIC_CLOBBERS "cc"); |
| } while (__builtin_expect(status != 0, 0)); |
| return prev; |
| } |
| #else |
| __ATOMIC_INLINE__ int32_t |
| __bionic_atomic_inc(volatile int32_t* ptr) |
| { |
| int32_t prev, status; |
| do { |
| prev = *ptr; |
| status = __kernel_cmpxchg(prev, prev+1, ptr); |
| } while (__builtin_expect(status != 0, 0)); |
| return prev; |
| } |
| #endif |
| |
| /* Atomic decrement - without any barriers |
| * This returns the old value. |
| */ |
| #ifdef __ARM_HAVE_LDREX_STREX |
| __ATOMIC_INLINE__ int32_t |
| __bionic_atomic_dec(volatile int32_t* ptr) |
| { |
| int32_t prev, tmp, status; |
| do { |
| __asm__ __volatile__ ( |
| __ATOMIC_SWITCH_TO_ARM |
| "ldrex %0, [%4]\n" |
| "sub %1, %0, #1\n" |
| "strex %2, %1, [%4]" |
| __ATOMIC_SWITCH_TO_THUMB |
| : "=&r" (prev), "=&r" (tmp), "=&r" (status), "+m"(*ptr) |
| : "r" (ptr) |
| : __ATOMIC_CLOBBERS "cc"); |
| } while (__builtin_expect(status != 0, 0)); |
| return prev; |
| } |
| #else |
| __ATOMIC_INLINE__ int32_t |
| __bionic_atomic_dec(volatile int32_t* ptr) |
| { |
| int32_t prev, status; |
| do { |
| prev = *ptr; |
| status = __kernel_cmpxchg(prev, prev-1, ptr); |
| } while (__builtin_expect(status != 0, 0)); |
| return prev; |
| } |
| #endif |
| |
| #endif /* SYS_ATOMICS_ARM_H */ |