[API-NEXT,v9,2/6] linux-gen: sched scalable: add arch files

Message ID 20170619191247.51385-3-brian.brooks@arm.com
State Superseded
Headers show
Series
  • A scalable software scheduler
Related show

Commit Message

Brian Brooks June 19, 2017, 7:12 p.m.
Signed-off-by: Brian Brooks <brian.brooks@arm.com>

Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

---
 configure.ac                                     |   1 +
 platform/linux-generic/Makefile.am               |   8 +
 platform/linux-generic/arch/arm/odp_atomic.h     | 210 +++++++++++++++++++
 platform/linux-generic/arch/arm/odp_cpu.h        |  65 ++++++
 platform/linux-generic/arch/arm/odp_cpu_idling.h |  51 +++++
 platform/linux-generic/arch/arm/odp_llsc.h       | 249 +++++++++++++++++++++++
 platform/linux-generic/arch/default/odp_cpu.h    |  41 ++++
 platform/linux-generic/arch/mips64/odp_cpu.h     |  41 ++++
 platform/linux-generic/arch/powerpc/odp_cpu.h    |  41 ++++
 platform/linux-generic/arch/x86/odp_cpu.h        |  41 ++++
 10 files changed, 748 insertions(+)
 create mode 100644 platform/linux-generic/arch/arm/odp_atomic.h
 create mode 100644 platform/linux-generic/arch/arm/odp_cpu.h
 create mode 100644 platform/linux-generic/arch/arm/odp_cpu_idling.h
 create mode 100644 platform/linux-generic/arch/arm/odp_llsc.h
 create mode 100644 platform/linux-generic/arch/default/odp_cpu.h
 create mode 100644 platform/linux-generic/arch/mips64/odp_cpu.h
 create mode 100644 platform/linux-generic/arch/powerpc/odp_cpu.h
 create mode 100644 platform/linux-generic/arch/x86/odp_cpu.h

-- 
2.13.1

Comments

Savolainen, Petri (Nokia - FI/Espoo) June 20, 2017, 1 p.m. | #1
> +#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H */

> diff --git a/platform/linux-generic/arch/arm/odp_llsc.h b/platform/linux-

> generic/arch/arm/odp_llsc.h

> new file mode 100644

> index 00000000..3ab5c909

> --- /dev/null

> +++ b/platform/linux-generic/arch/arm/odp_llsc.h

> @@ -0,0 +1,249 @@

> +/* Copyright (c) 2017, ARM Limited

> + * All rights reserved.

> + *

> + * SPDX-License-Identifier:	BSD-3-Clause

> + */

> +

> +#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H

> +#define PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H

> +

> +#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H

> +#error This file should not be included directly, please include

> odp_cpu.h

> +#endif

> +

> +#if __ARM_ARCH == 7 || (__ARM_ARCH == 8 && __ARM_64BIT_STATE == 0)

> +



> +

> +#if __ARM_ARCH == 8 && __ARM_64BIT_STATE == 1

> +


Build broken for ARMv6? There are so many #ifdefs that it's hard to tell which code path is built. Maybe it would make sense to explicitly document/report an error when building for a non-supported ARM target. E.g. the original raspberry pi is ARMv6, it's possible that someone is building odp-linux for that...

-Petri
Ola Liljedahl June 21, 2017, 4:26 p.m. | #2
On 20/06/2017, 15:00, "Savolainen, Petri (Nokia - FI/Espoo)"
<petri.savolainen@nokia.com> wrote:

>

>> +#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H */

>> diff --git a/platform/linux-generic/arch/arm/odp_llsc.h

>>b/platform/linux-

>> generic/arch/arm/odp_llsc.h

>> new file mode 100644

>> index 00000000..3ab5c909

>> --- /dev/null

>> +++ b/platform/linux-generic/arch/arm/odp_llsc.h

>> @@ -0,0 +1,249 @@

>> +/* Copyright (c) 2017, ARM Limited

>> + * All rights reserved.

>> + *

>> + * SPDX-License-Identifier:	BSD-3-Clause

>> + */

>> +

>> +#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H

>> +#define PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H

>> +

>> +#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H

>> +#error This file should not be included directly, please include

>> odp_cpu.h

>> +#endif

>> +

>> +#if __ARM_ARCH == 7 || (__ARM_ARCH == 8 && __ARM_64BIT_STATE == 0)

>> +

>

>

>> +

>> +#if __ARM_ARCH == 8 && __ARM_64BIT_STATE == 1

>> +

>

>Build broken for ARMv6? There are so many #ifdefs that it's hard to tell

>which code path is built.

GCC preprocessor symbols for the ARM architecture(s) is a mess (and
different with different compiler versions I think) but thatĀ¹s not our
fault.

> Maybe it would make sense to explicitly document/report an error when

>building for a non-supported ARM target. E.g. the original raspberry pi

>is ARMv6, it's possible that someone is building odp-linux for that...

Different versions of the ARM architecture are actually more or less
different architectures from an ODP perspective. Perhaps the architecture
recognition should be more specific and identify ARMv7a and ARMv8a (32-bit
and 64-bit) and treat everything else as "defaultĀ². I think ODP should use
the default (aka generic) arch implementation when the architecture is not
recognised/supported.


>

>-Petri

>

>

Patch hide | download patch | download mbox

diff --git a/configure.ac b/configure.ac
index fe36ce16..6f7357bc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -226,6 +226,7 @@  AM_CONDITIONAL([user_guide], [test "x${user_guides}" = "xyes" ])
 AM_CONDITIONAL([HAVE_MSCGEN], [test "x${MSCGEN}" = "xmscgen"])
 AM_CONDITIONAL([helper_linux], [test x$helper_linux = xyes ])
 AM_CONDITIONAL([ARCH_IS_X86], [test "x${ARCH_DIR}" = "xx86"])
+AM_CONDITIONAL([ARCH_IS_ARM], [test "x${ARCH_DIR}" = "xarm"])
 
 ##########################################################################
 # Setup doxygen documentation
diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index 58c73767..4690a650 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -8,6 +8,7 @@  AM_CFLAGS +=  -I$(srcdir)/include
 AM_CFLAGS +=  -I$(top_srcdir)/include
 AM_CFLAGS +=  -I$(top_srcdir)/include/odp/arch/@ARCH_ABI@
 AM_CFLAGS +=  -I$(top_builddir)/include
+AM_CFLAGS +=  -I$(top_srcdir)/arch/@ARCH_DIR@
 AM_CFLAGS +=  -Iinclude
 AM_CFLAGS +=  -DSYSCONFDIR=\"@sysconfdir@\"
 AM_CFLAGS +=  -D_ODP_PKTIO_IPC
@@ -183,8 +184,15 @@  noinst_HEADERS = \
 		  ${srcdir}/include/protocols/ipsec.h \
 		  ${srcdir}/include/protocols/tcp.h \
 		  ${srcdir}/include/protocols/udp.h \
+		  ${srcdir}/arch/@ARCH_DIR@/odp_cpu.h \
 		  ${srcdir}/Makefile.inc
 
+if ARCH_IS_ARM
+noinst_HEADERS += ${srcdir}/arch/@ARCH_DIR@/odp_atomic.h \
+		  ${srcdir}/arch/@ARCH_DIR@/odp_cpu_idling.h \
+		  ${srcdir}/arch/@ARCH_DIR@/odp_llsc.h
+endif
+
 __LIB__libodp_linux_la_SOURCES = \
 			   _fdserver.c \
 			   _ishm.c \
diff --git a/platform/linux-generic/arch/arm/odp_atomic.h b/platform/linux-generic/arch/arm/odp_atomic.h
new file mode 100644
index 00000000..0ddd8a11
--- /dev/null
+++ b/platform/linux-generic/arch/arm/odp_atomic.h
@@ -0,0 +1,210 @@ 
+/* Copyright (c) 2017, ARM Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:	BSD-3-Clause
+ */
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H
+#define PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
+#error This file should not be included directly, please include odp_cpu.h
+#endif
+
+#ifdef CONFIG_DMBSTR
+
+#define atomic_store_release(loc, val, ro)		\
+do {							\
+	_odp_release_barrier(ro);			\
+	__atomic_store_n(loc, val, __ATOMIC_RELAXED);   \
+} while (0)
+
+#else
+
+#define atomic_store_release(loc, val, ro) \
+	__atomic_store_n(loc, val, __ATOMIC_RELEASE)
+
+#endif  /* CONFIG_DMBSTR */
+
+#if __ARM_ARCH == 8
+
+#define HAS_ACQ(mo) ((mo) != __ATOMIC_RELAXED && (mo) != __ATOMIC_RELEASE)
+#define HAS_RLS(mo) ((mo) == __ATOMIC_RELEASE || (mo) == __ATOMIC_ACQ_REL || \
+		     (mo) == __ATOMIC_SEQ_CST)
+
+#define LL_MO(mo) (HAS_ACQ((mo)) ? __ATOMIC_ACQUIRE : __ATOMIC_RELAXED)
+#define SC_MO(mo) (HAS_RLS((mo)) ? __ATOMIC_RELEASE : __ATOMIC_RELAXED)
+
+#ifndef __ARM_FEATURE_QRDMX /* Feature only available in v8.1a and beyond */
+static inline bool
+__lockfree_compare_exchange_16(register __int128 *var, __int128 *exp,
+			       register __int128 neu, bool weak, int mo_success,
+			       int mo_failure)
+{
+	(void)weak; /* Always do strong CAS or we can't perform atomic read */
+	/* Ignore memory ordering for failure, memory order for
+	 * success must be stronger or equal. */
+	(void)mo_failure;
+	register __int128 old;
+	register __int128 expected;
+	int ll_mo = LL_MO(mo_success);
+	int sc_mo = SC_MO(mo_success);
+
+	expected = *exp;
+	__asm__ volatile("" ::: "memory");
+	do {
+		/* Atomicity of LLD is not guaranteed */
+		old = lld(var, ll_mo);
+		/* Must write back neu or old to verify atomicity of LLD */
+	} while (odp_unlikely(scd(var, old == expected ? neu : old, sc_mo)));
+	*exp = old; /* Always update, atomically read value */
+	return old == expected;
+}
+
+static inline __int128 __lockfree_exchange_16(__int128 *var, __int128 neu,
+					      int mo)
+{
+	register __int128 old;
+	int ll_mo = LL_MO(mo);
+	int sc_mo = SC_MO(mo);
+
+	do {
+		/* Atomicity of LLD is not guaranteed */
+		old = lld(var, ll_mo);
+		/* Must successfully write back to verify atomicity of LLD */
+	} while (odp_unlikely(scd(var, neu, sc_mo)));
+	return old;
+}
+
+static inline __int128 __lockfree_fetch_and_16(__int128 *var, __int128 mask,
+					       int mo)
+{
+	register __int128 old;
+	int ll_mo = LL_MO(mo);
+	int sc_mo = SC_MO(mo);
+
+	do {
+		/* Atomicity of LLD is not guaranteed */
+		old = lld(var, ll_mo);
+		/* Must successfully write back to verify atomicity of LLD */
+	} while (odp_unlikely(scd(var, old & mask, sc_mo)));
+	return old;
+}
+
+static inline __int128 __lockfree_fetch_or_16(__int128 *var, __int128 mask,
+					      int mo)
+{
+	register __int128 old;
+	int ll_mo = LL_MO(mo);
+	int sc_mo = SC_MO(mo);
+
+	do {
+		/* Atomicity of LLD is not guaranteed */
+		old = lld(var, ll_mo);
+		/* Must successfully write back to verify atomicity of LLD */
+	} while (odp_unlikely(scd(var, old | mask, sc_mo)));
+	return old;
+}
+
+#else
+
+static inline __int128 casp(__int128 *var, __int128 old, __int128 neu, int mo)
+{
+	if (mo == __ATOMIC_RELAXED) {
+		__asm__ volatile("casp %0, %H0, %1, %H1, [%2]"
+				 : "+r" (old)
+				 : "r" (neu), "r" (var)
+				 : "memory");
+	} else if (mo == __ATOMIC_ACQUIRE) {
+		__asm__ volatile("caspa %0, %H0, %1, %H1, [%2]"
+				 : "+r" (old)
+				 : "r" (neu), "r" (var)
+				 : "memory");
+	} else if (mo == __ATOMIC_ACQ_REL) {
+		__asm__ volatile("caspal %0, %H0, %1, %H1, [%2]"
+				 : "+r" (old)
+				 : "r" (neu), "r" (var)
+				 : "memory");
+	} else if (mo == __ATOMIC_RELEASE) {
+		__asm__ volatile("caspl %0, %H0, %1, %H1, [%2]"
+				 : "+r" (old)
+				 : "r" (neu), "r" (var)
+				 : "memory");
+	} else {
+		abort();
+	}
+	return old;
+}
+
+static inline bool
+__lockfree_compare_exchange_16(register __int128 *var, __int128 *exp,
+			       register __int128 neu, bool weak, int mo_success,
+			       int mo_failure)
+{
+	(void)weak;
+	(void)mo_failure;
+	__int128 old;
+	__int128 expected;
+
+	expected = *exp;
+	old = casp(var, expected, neu, mo_success);
+	*exp = old; /* Always update, atomically read value */
+	return old == expected;
+}
+
+static inline __int128 __lockfree_exchange_16(__int128 *var, __int128 neu,
+					      int mo)
+{
+	__int128 old;
+	__int128 expected;
+
+	do {
+		expected = *var;
+		old = casp(var, expected, neu, mo);
+	} while (old != expected);
+	return old;
+}
+
+static inline __int128 __lockfree_fetch_and_16(__int128 *var, __int128 mask,
+					       int mo)
+{
+	__int128 old;
+	__int128 expected;
+
+	do {
+		expected = *var;
+		old = casp(var, expected, expected & mask, mo);
+	} while (old != expected);
+	return old;
+}
+
+static inline __int128 __lockfree_fetch_or_16(__int128 *var, __int128 mask,
+					      int mo)
+{
+	__int128 old;
+	__int128 expected;
+
+	do {
+		expected = *var;
+		old = casp(var, expected, expected | mask, mo);
+	} while (old != expected);
+	return old;
+}
+
+#endif
+
+static inline __int128 __lockfree_load_16(__int128 *var, int mo)
+{
+	__int128 old = *var; /* Possibly torn read */
+
+	/* Do CAS to ensure atomicity
+	 * Either CAS succeeds (writing back the same value)
+	 * Or CAS fails and returns the old value (atomic read)
+	 */
+	(void)__lockfree_compare_exchange_16(var, &old, old, false, mo, mo);
+	return old;
+}
+
+#endif
+
+#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_ATOMIC_H */
diff --git a/platform/linux-generic/arch/arm/odp_cpu.h b/platform/linux-generic/arch/arm/odp_cpu.h
new file mode 100644
index 00000000..f2a6beb8
--- /dev/null
+++ b/platform/linux-generic/arch/arm/odp_cpu.h
@@ -0,0 +1,65 @@ 
+/* Copyright (c) 2017, ARM Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:	BSD-3-Clause
+ */
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
+#define PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
+
+#if !defined(__ARM_ARCH)
+#error Use this file only when compiling for ARM architecture
+#endif
+
+#include <odp_debug_internal.h>
+
+/*
+ * Use LLD/SCD atomic primitives instead of lock-based code path in llqueue
+ * LLD/SCD is on ARM the fastest way to enqueue and dequeue elements from a
+ * linked list queue.
+ */
+#define CONFIG_LLDSCD
+
+/*
+ * Use DMB;STR instead of STRL on ARM
+ * On early ARMv8 implementations (e.g. Cortex-A57) this is noticeably more
+ * performant than using store-release.
+ * This also allows for load-only barriers (DMB ISHLD) which are much cheaper
+ * than a full barrier
+ */
+#define CONFIG_DMBSTR
+
+/*
+ * Use ARM event signalling mechanism
+ * Event signalling minimises spinning (busy waiting) which decreases
+ * cache coherency traffic when spinning on shared locations (thus faster and
+ * more scalable) and enables the CPU to enter a sleep state (lower power
+ * consumption).
+ */
+#define CONFIG_WFE
+
+static inline void dmb(void)
+{
+	__asm__ volatile("dmb" : : : "memory");
+}
+
+#if __ARM_ARCH == 8
+/* Only ARMv8 supports DMB ISHLD */
+/* A load only barrier is much cheaper than full barrier */
+#define _odp_release_barrier(ro) \
+do {							     \
+	if (ro)						     \
+		__asm__ volatile("dmb ishld" ::: "memory");  \
+	else						     \
+		__asm__ volatile("dmb ish" ::: "memory");    \
+} while (0)
+#else
+#define _odp_release_barrier(ro) \
+	__atomic_thread_fence(__ATOMIC_RELEASE)
+#endif
+
+#include "odp_llsc.h"
+#include "odp_atomic.h"
+#include "odp_cpu_idling.h"
+
+#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H */
diff --git a/platform/linux-generic/arch/arm/odp_cpu_idling.h b/platform/linux-generic/arch/arm/odp_cpu_idling.h
new file mode 100644
index 00000000..4a65befd
--- /dev/null
+++ b/platform/linux-generic/arch/arm/odp_cpu_idling.h
@@ -0,0 +1,51 @@ 
+/* Copyright (c) 2017, ARM Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:	BSD-3-Clause
+ */
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H
+#define PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
+#error This file should not be included directly, please include odp_cpu.h
+#endif
+
+static inline void sevl(void)
+{
+#ifdef CONFIG_WFE
+	__asm__ volatile("sevl" : : : );
+#endif
+}
+
+static inline int wfe(void)
+{
+#ifdef CONFIG_WFE
+	__asm__ volatile("wfe" : : : "memory");
+#endif
+	return 1;
+}
+
+static inline void doze(void)
+{
+#ifndef CONFIG_WFE
+	/* When using WFE do not stall the pipeline using other means */
+	odp_cpu_pause();
+#endif
+}
+
+#ifdef CONFIG_WFE
+#if __ARM_ARCH == 8 && __ARM_64BIT_STATE == 1
+#define monitor128(addr, mo) lld((addr), (mo))
+#endif
+#define monitor64(addr, mo) ll64((addr), (mo))
+#define monitor32(addr, mo) ll32((addr), (mo))
+#define monitor8(addr, mo) ll8((addr), (mo))
+#else
+#define monitor128(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor64(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor32(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor8(addr, mo) __atomic_load_n((addr), (mo))
+#endif
+
+#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_CPU_IDLING_H */
diff --git a/platform/linux-generic/arch/arm/odp_llsc.h b/platform/linux-generic/arch/arm/odp_llsc.h
new file mode 100644
index 00000000..3ab5c909
--- /dev/null
+++ b/platform/linux-generic/arch/arm/odp_llsc.h
@@ -0,0 +1,249 @@ 
+/* Copyright (c) 2017, ARM Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:	BSD-3-Clause
+ */
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H
+#define PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H
+
+#ifndef PLATFORM_LINUXGENERIC_ARCH_ARM_ODP_CPU_H
+#error This file should not be included directly, please include odp_cpu.h
+#endif
+
+#if __ARM_ARCH == 7 || (__ARM_ARCH == 8 && __ARM_64BIT_STATE == 0)
+
+static inline uint32_t ll8(uint8_t *var, int mm)
+{
+	uint8_t old;
+
+	__asm__ volatile("ldrexb %0, [%1]"
+			 : "=&r" (old)
+			 : "r" (var)
+			 : );
+	/* Barrier after an acquiring load */
+	if (mm == __ATOMIC_ACQUIRE)
+		dmb();
+	return old;
+}
+
+static inline uint32_t ll(uint32_t *var, int mm)
+{
+	uint32_t old;
+
+	__asm__ volatile("ldrex %0, [%1]"
+			 : "=&r" (old)
+			 : "r" (var)
+			 : );
+	/* Barrier after an acquiring load */
+	if (mm == __ATOMIC_ACQUIRE)
+		dmb();
+	return old;
+}
+
+#define ll32(a, b) ll((a), (b))
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t sc(uint32_t *var, uint32_t neu, int mm)
+{
+	uint32_t ret;
+
+	/* Barrier before a releasing store */
+	if (mm == __ATOMIC_RELEASE)
+		dmb();
+	__asm__ volatile("strex %0, %1, [%2]"
+			 : "=&r" (ret)
+			 : "r" (neu), "r" (var)
+			 : );
+	return ret;
+}
+
+#define sc32(a, b, c) sc((a), (b), (c))
+
+static inline uint64_t lld(uint64_t *var, int mm)
+{
+	uint64_t old;
+
+	__asm__ volatile("ldrexd %0, %H0, [%1]"
+			 : "=&r" (old)
+			 : "r" (var)
+			 : );
+	/* Barrier after an acquiring load */
+	if (mm == __ATOMIC_ACQUIRE)
+		dmb();
+	return old;
+}
+
+#define ll64(a, b) lld((a), (b))
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t scd(uint64_t *var, uint64_t neu, int mm)
+{
+	uint32_t ret;
+
+	/* Barrier before a releasing store */
+	if (mm == __ATOMIC_RELEASE)
+		dmb();
+	__asm__ volatile("strexd %0, %1, %H1, [%2]"
+			 : "=&r" (ret)
+			 : "r" (neu), "r" (var)
+			 : );
+	return ret;
+}
+
+#define sc64(a, b, c) scd((a), (b), (c))
+
+#endif
+
+#if __ARM_ARCH == 8 && __ARM_64BIT_STATE == 1
+static inline uint16_t ll8(uint8_t *var, int mm)
+{
+	uint16_t old;
+
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("ldaxrb %w0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : "memory");
+	else if (mm == __ATOMIC_RELAXED)
+		__asm__ volatile("ldxrb %w0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : );
+	else
+		ODP_ABORT();
+	return old;
+}
+
+static inline uint32_t ll32(uint32_t *var, int mm)
+{
+	uint32_t old;
+
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("ldaxr %w0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : "memory");
+	else if (mm == __ATOMIC_RELAXED)
+		__asm__ volatile("ldxr %w0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : );
+	else
+		ODP_ABORT();
+	return old;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t sc32(uint32_t *var, uint32_t neu, int mm)
+{
+	uint32_t ret;
+
+	if (mm == __ATOMIC_RELEASE)
+		__asm__ volatile("stlxr %w0, %w1, [%2]"
+				 : "=&r" (ret)
+				 : "r" (neu), "r" (var)
+				 : "memory");
+	else if (mm == __ATOMIC_RELAXED)
+		__asm__ volatile("stxr %w0, %w1, [%2]"
+				 : "=&r" (ret)
+				 : "r" (neu), "r" (var)
+				 : );
+	else
+		ODP_ABORT();
+	return ret;
+}
+
+static inline uint64_t ll(uint64_t *var, int mm)
+{
+	uint64_t old;
+
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("ldaxr %0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : "memory");
+	else if (mm == __ATOMIC_RELAXED)
+		__asm__ volatile("ldxr %0, [%1]"
+				 : "=&r" (old)
+				 : "r" (var)
+				 : );
+	else
+		ODP_ABORT();
+	return old;
+}
+
+#define ll64(a, b) ll((a), (b))
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t sc(uint64_t *var, uint64_t neu, int mm)
+{
+	uint32_t ret;
+
+	if (mm == __ATOMIC_RELEASE)
+		__asm__ volatile("stlxr %w0, %1, [%2]"
+				 : "=&r" (ret)
+				 : "r" (neu), "r" (var)
+				 : "memory");
+	else if (mm == __ATOMIC_RELAXED)
+		__asm__ volatile("stxr %w0, %1, [%2]"
+				 : "=&r" (ret)
+				 : "r" (neu), "r" (var)
+				 : );
+	else
+		ODP_ABORT();
+	return ret;
+}
+
+#define sc64(a, b, c) sc((a), (b), (c))
+
+union i128 {
+	__int128 i128;
+	int64_t  i64[2];
+};
+
+static inline __int128 lld(__int128 *var, int mm)
+{
+	union i128 old;
+
+	if (mm == __ATOMIC_ACQUIRE)
+		__asm__ volatile("ldaxp %0, %1, [%2]"
+				 : "=&r" (old.i64[0]), "=&r" (old.i64[1])
+				 : "r" (var)
+				 : "memory");
+	else if (mm == __ATOMIC_RELAXED)
+		__asm__ volatile("ldxp %0, %1, [%2]"
+				 : "=&r" (old.i64[0]), "=&r" (old.i64[1])
+				 : "r" (var)
+				 : );
+	else
+		ODP_ABORT();
+	return old.i128;
+}
+
+/* Return 0 on success, 1 on failure */
+static inline uint32_t scd(__int128 *var, __int128 neu, int mm)
+{
+	uint32_t ret;
+
+	if (mm == __ATOMIC_RELEASE)
+		__asm__ volatile("stlxp %w0, %1, %2, [%3]"
+				 : "=&r" (ret)
+				 : "r" (((union i128)neu).i64[0]),
+				   "r" (((union i128)neu).i64[1]),
+				   "r" (var)
+				 : "memory");
+	else if (mm == __ATOMIC_RELAXED)
+		__asm__ volatile("stxp %w0, %1, %2, [%3]"
+				 : "=&r" (ret)
+				 : "r" (((union i128)neu).i64[0]),
+				   "r" (((union i128)neu).i64[1]),
+				   "r" (var)
+				 : );
+	else
+		ODP_ABORT();
+	return ret;
+}
+#endif
+
+#endif  /* PLATFORM_LINUXGENERIC_ARCH_ARM_LLSC_H */
diff --git a/platform/linux-generic/arch/default/odp_cpu.h b/platform/linux-generic/arch/default/odp_cpu.h
new file mode 100644
index 00000000..4e94dcd5
--- /dev/null
+++ b/platform/linux-generic/arch/default/odp_cpu.h
@@ -0,0 +1,41 @@ 
+/* Copyright (c) 2017, ARM Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#ifndef ODP_DEFAULT_CPU_H_
+#define ODP_DEFAULT_CPU_H_
+
+/******************************************************************************
+ * Atomics
+ *****************************************************************************/
+
+#define atomic_store_release(loc, val, ro) \
+	__atomic_store_n(loc, val, __ATOMIC_RELEASE)
+
+/******************************************************************************
+ * Idle mgmt
+ *****************************************************************************/
+
+static inline void sevl(void)
+{
+	/* empty */
+}
+
+static inline int wfe(void)
+{
+	return 1;
+}
+
+#define monitor128(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor64(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor32(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor8(addr, mo) __atomic_load_n((addr), (mo))
+
+static inline void doze(void)
+{
+	odp_cpu_pause();
+}
+
+#endif
diff --git a/platform/linux-generic/arch/mips64/odp_cpu.h b/platform/linux-generic/arch/mips64/odp_cpu.h
new file mode 100644
index 00000000..e6846166
--- /dev/null
+++ b/platform/linux-generic/arch/mips64/odp_cpu.h
@@ -0,0 +1,41 @@ 
+/* Copyright (c) 2017, ARM Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#ifndef ODP_MIPS64_CPU_H_
+#define ODP_MIPS64_CPU_H_
+
+/******************************************************************************
+ * Atomics
+ *****************************************************************************/
+
+#define atomic_store_release(loc, val, ro) \
+	__atomic_store_n(loc, val, __ATOMIC_RELEASE)
+
+/******************************************************************************
+ * Idle mgmt
+ *****************************************************************************/
+
+static inline void sevl(void)
+{
+	/* empty */
+}
+
+static inline int wfe(void)
+{
+	return 1;
+}
+
+#define monitor128(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor64(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor32(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor8(addr, mo) __atomic_load_n((addr), (mo))
+
+static inline void doze(void)
+{
+	odp_cpu_pause();
+}
+
+#endif
diff --git a/platform/linux-generic/arch/powerpc/odp_cpu.h b/platform/linux-generic/arch/powerpc/odp_cpu.h
new file mode 100644
index 00000000..584d7e50
--- /dev/null
+++ b/platform/linux-generic/arch/powerpc/odp_cpu.h
@@ -0,0 +1,41 @@ 
+/* Copyright (c) 2017, ARM Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#ifndef ODP_POWERPC_CPU_H_
+#define ODP_POWERPC_CPU_H_
+
+/******************************************************************************
+ * Atomics
+ *****************************************************************************/
+
+#define atomic_store_release(loc, val, ro) \
+	__atomic_store_n(loc, val, __ATOMIC_RELEASE)
+
+/******************************************************************************
+ * Idle mgmt
+ *****************************************************************************/
+
+static inline void sevl(void)
+{
+	/* empty */
+}
+
+static inline int wfe(void)
+{
+	return 1;
+}
+
+#define monitor128(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor64(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor32(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor8(addr, mo) __atomic_load_n((addr), (mo))
+
+static inline void doze(void)
+{
+	odp_cpu_pause();
+}
+
+#endif
diff --git a/platform/linux-generic/arch/x86/odp_cpu.h b/platform/linux-generic/arch/x86/odp_cpu.h
new file mode 100644
index 00000000..b752c93f
--- /dev/null
+++ b/platform/linux-generic/arch/x86/odp_cpu.h
@@ -0,0 +1,41 @@ 
+/* Copyright (c) 2017, ARM Limited
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:     BSD-3-Clause
+ */
+
+#ifndef ODP_X86_CPU_H_
+#define ODP_X86_CPU_H_
+
+/******************************************************************************
+ * Atomics
+ *****************************************************************************/
+
+#define atomic_store_release(loc, val, ro) \
+	__atomic_store_n(loc, val, __ATOMIC_RELEASE)
+
+/******************************************************************************
+ * Idle mgmt
+ *****************************************************************************/
+
+static inline void sevl(void)
+{
+	/* empty */
+}
+
+static inline int wfe(void)
+{
+	return 1;
+}
+
+#define monitor128(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor64(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor32(addr, mo) __atomic_load_n((addr), (mo))
+#define monitor8(addr, mo) __atomic_load_n((addr), (mo))
+
+static inline void doze(void)
+{
+	odp_cpu_pause();
+}
+
+#endif