[API-NEXT,v9,4/6] linux-gen: sched scalable: add a concurrent queue

Message ID 20170619191247.51385-5-brian.brooks@arm.com
State Superseded
Headers show
Series
  • A scalable software scheduler
Related show

Commit Message

Brian Brooks June 19, 2017, 7:12 p.m.
Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

Reviewed-by: Brian Brooks <brian.brooks@arm.com>

---
 platform/linux-generic/Makefile.am           |   1 +
 platform/linux-generic/include/odp_llqueue.h | 309 +++++++++++++++++++++++++++
 2 files changed, 310 insertions(+)
 create mode 100644 platform/linux-generic/include/odp_llqueue.h

-- 
2.13.1

Comments

Savolainen, Petri (Nokia - FI/Espoo) June 20, 2017, 1:12 p.m. | #1
> +++ b/platform/linux-generic/include/odp_llqueue.h

> @@ -0,0 +1,309 @@

> +/* Copyright (c) 2017, ARM Limited.

> + * All rights reserved.

> + *

> + * SPDX-License-Identifier:        BSD-3-Clause

> + */

> +

> +#ifndef ODP_LLQUEUE_H_

> +#define ODP_LLQUEUE_H_

> +

> +#include <odp/api/cpu.h>

> +#include <odp/api/hints.h>

> +#include <odp/api/spinlock.h>

> +

> +#include <odp_config_internal.h>

> +#include <odp_debug_internal.h>

> +#include <odp_cpu.h>

> +

> +#include <stdint.h>

> +#include <stdlib.h>

> +

> +/************************************************************************

> ******

> + * Linked list queues

> +

> **************************************************************************

> ***/

> +

> +struct llqueue;

> +struct llnode;

> +

> +static struct llnode *llq_head(struct llqueue *llq);

> +static void llqueue_init(struct llqueue *llq);

> +static void llq_enqueue(struct llqueue *llq, struct llnode *node);

> +static struct llnode *llq_dequeue(struct llqueue *llq);

> +static odp_bool_t llq_dequeue_cond(struct llqueue *llq, struct llnode

> *exp);

> +static odp_bool_t llq_cond_rotate(struct llqueue *llq, struct llnode

> *node);

> +static odp_bool_t llq_on_queue(struct llnode *node);

> +

> +/************************************************************************

> ******

> + * The implementation(s)

> +

> **************************************************************************

> ***/

> +

> +#define SENTINEL ((void *)~(uintptr_t)0)

> +

> +#ifdef CONFIG_LLDSCD

> +/* Implement queue operations using double-word LL/SC */


> +

> +#else

> +/* Implement queue operations protected by a spin lock */

> +


There's a lot of ifdef'ed code in this file, basically two full parallel implementations. The first is built only for ARM and the second for the rest. Would there be a way to build both always ?

-Petri
Maxim Uvarov June 21, 2017, 2:06 p.m. | #2
On 06/19/17 22:12, Brian Brooks wrote:
> --- /dev/null

> +++ b/platform/linux-generic/include/odp_llqueue.h

> @@ -0,0 +1,309 @@

> +/* Copyright (c) 2017, ARM Limited.

> + * All rights reserved.

> + *

> + * SPDX-License-Identifier:        BSD-3-Clause

> + */

> +


Has to be Linaro copyright.

Maxim.
Honnappa Nagarahalli June 21, 2017, 2:27 p.m. | #3
Hi Maxim,
    This is a new file added by us. Hence the ARM copyright.
Thanks,
Honnappa

On 21 June 2017 at 09:06, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:
> On 06/19/17 22:12, Brian Brooks wrote:

>>

>> --- /dev/null

>> +++ b/platform/linux-generic/include/odp_llqueue.h

>> @@ -0,0 +1,309 @@

>> +/* Copyright (c) 2017, ARM Limited.

>> + * All rights reserved.

>> + *

>> + * SPDX-License-Identifier:        BSD-3-Clause

>> + */

>> +

>

>

> Has to be Linaro copyright.

>

> Maxim.
Maxim Uvarov June 21, 2017, 3:08 p.m. | #4
On 06/19/17 22:12, Brian Brooks wrote:
> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

> Reviewed-by: Brian Brooks <brian.brooks@arm.com>

> ---

>   platform/linux-generic/Makefile.am           |   1 +

>   platform/linux-generic/include/odp_llqueue.h | 309 +++++++++++++++++++++++++++

>   2 files changed, 310 insertions(+)

>   create mode 100644 platform/linux-generic/include/odp_llqueue.h

>

> diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am

> index b869fd4b..90cc4ca6 100644

> --- a/platform/linux-generic/Makefile.am

> +++ b/platform/linux-generic/Makefile.am

> @@ -157,6 +157,7 @@ noinst_HEADERS = \

>   		  ${srcdir}/include/odp_errno_define.h \

>   		  ${srcdir}/include/odp_forward_typedefs_internal.h \

>   		  ${srcdir}/include/odp_internal.h \

> +		  ${srcdir}/include/odp_llqueue.h \

>   		  ${srcdir}/include/odp_name_table_internal.h \

>   		  ${srcdir}/include/odp_packet_internal.h \

>   		  ${srcdir}/include/odp_packet_io_internal.h \

> diff --git a/platform/linux-generic/include/odp_llqueue.h b/platform/linux-generic/include/odp_llqueue.h

> new file mode 100644

> index 00000000..758af490

> --- /dev/null

> +++ b/platform/linux-generic/include/odp_llqueue.h

> @@ -0,0 +1,309 @@

> +/* Copyright (c) 2017, ARM Limited.

> + * All rights reserved.

> + *

> + * SPDX-License-Identifier:        BSD-3-Clause

> + */

> +

> +#ifndef ODP_LLQUEUE_H_

> +#define ODP_LLQUEUE_H_

> +

> +#include <odp/api/cpu.h>

> +#include <odp/api/hints.h>

> +#include <odp/api/spinlock.h>

> +

> +#include <odp_config_internal.h>

> +#include <odp_debug_internal.h>

> +#include <odp_cpu.h>

> +

> +#include <stdint.h>

> +#include <stdlib.h>

> +

> +/******************************************************************************

> + * Linked list queues

> + *****************************************************************************/

> +

> +struct llqueue;

> +struct llnode;

> +

> +static struct llnode *llq_head(struct llqueue *llq);

> +static void llqueue_init(struct llqueue *llq);

> +static void llq_enqueue(struct llqueue *llq, struct llnode *node);

> +static struct llnode *llq_dequeue(struct llqueue *llq);

> +static odp_bool_t llq_dequeue_cond(struct llqueue *llq, struct llnode *exp);

> +static odp_bool_t llq_cond_rotate(struct llqueue *llq, struct llnode *node);

> +static odp_bool_t llq_on_queue(struct llnode *node);

> +

> +/******************************************************************************

> + * The implementation(s)

> + *****************************************************************************/

> +

> +#define SENTINEL ((void *)~(uintptr_t)0)

> +

> +#ifdef CONFIG_LLDSCD

> +/* Implement queue operations using double-word LL/SC */

> +

> +/* The scalar equivalent of a double pointer */

> +#if __SIZEOF_PTRDIFF_T__ == 4

> +typedef uint64_t dintptr_t;

> +#endif

> +#if __SIZEOF_PTRDIFF_T__ == 8

> +typedef __int128 dintptr_t;

> +#endif

> +

> +struct llnode {

> +	struct llnode *next;

> +};

> +

> +union llht {

> +	struct {

> +		struct llnode *head, *tail;

> +	} st;

> +	dintptr_t ui;


you use dintptr_t only once. It's here. No need for typedef. Remove 
ifdef above. Place it here and change to #else if. And code will be 2 
times smaller.

> +};

> +

> +struct llqueue {

> +	union llht u;

> +};


that looks not clear for me. 'struct llqueue' and 'union llht' is the 
same thing. After that you reference to it that as 'struct llqueue'  in 
function arguments and  'union llht' inside the function. Like bellow...

> +

> +static inline struct llnode *llq_head(struct llqueue *llq)

> +{

> +	return __atomic_load_n(&llq->u.st.head, __ATOMIC_RELAXED);

> +}

> +

> +static inline void llqueue_init(struct llqueue *llq)

> +{

> +	llq->u.st.head = NULL;

> +	llq->u.st.tail = NULL;

> +}

> +

> +static inline void llq_enqueue(struct llqueue *llq, struct llnode *node)

> +{

> +	union llht old, neu;

> +


here.

btw neu has to be new, right?

> +	ODP_ASSERT(node->next == NULL);

> +	node->next = SENTINEL;

> +	do {

> +		old.ui = lld(&llq->u.ui, __ATOMIC_RELAXED);

> +		neu.st.head = old.st.head == NULL ? node : old.st.head;

> +		neu.st.tail = node;

> +	} while (odp_unlikely(scd(&llq->u.ui, neu.ui, __ATOMIC_RELEASE)));

> +	if (old.st.tail != NULL) {

> +		/* List was not empty */

> +		ODP_ASSERT(old.st.tail->next == SENTINEL);

> +		old.st.tail->next = node;

> +	}

> +}

> +

> +static inline struct llnode *llq_dequeue(struct llqueue *llq)

> +{

> +	struct llnode *head;

> +	union llht old, neu;

> +

> +	/* llq_dequeue() may be used in a busy-waiting fashion

> +	 * Read head using plain load to avoid disturbing remote LL/SC

> +	 */

> +	head = __atomic_load_n(&llq->u.st.head, __ATOMIC_ACQUIRE);

> +	if (head == NULL)

> +		return NULL;

> +	/* Read head->next before LL to minimize cache miss latency

> +	 * in LL/SC below

> +	 */

> +	(void)__atomic_load_n(&head->next, __ATOMIC_RELAXED);

> +

> +	do {

> +		old.ui = lld(&llq->u.ui, __ATOMIC_RELAXED);

> +		if (odp_unlikely(old.st.head == NULL)) {

> +			/* Empty list */

> +			return NULL;

> +		} else if (odp_unlikely(old.st.head == old.st.tail)) {

> +			/* Single-element in list */

> +			neu.st.head = NULL;

> +			neu.st.tail = NULL;

> +		} else {

> +			/* Multi-element list, dequeue head */

> +			struct llnode *next;

> +			/* Wait until llq_enqueue() has written true next

> +			 * pointer

> +			 */

> +			while ((next = __atomic_load_n(&old.st.head->next,

> +						       __ATOMIC_RELAXED)) ==

> +				SENTINEL)

> +				odp_cpu_pause();

> +			neu.st.head = next;

> +			neu.st.tail = old.st.tail;

> +		}

> +	} while (odp_unlikely(scd(&llq->u.ui, neu.ui, __ATOMIC_RELAXED)));

> +	old.st.head->next = NULL;

> +	return old.st.head;

> +}

> +

> +static inline odp_bool_t llq_dequeue_cond(struct llqueue *llq,

> +					  struct llnode *exp)

> +{

> +	union llht old, neu;

> +

> +	do {

> +		old.ui = lld(&llq->u.ui, __ATOMIC_ACQUIRE);

> +		if (odp_unlikely(old.st.head == NULL || old.st.head != exp)) {

> +			/* Empty list or wrong head */

> +			return false;

> +		} else if (odp_unlikely(old.st.head == old.st.tail)) {

> +			/* Single-element in list */

> +			neu.st.head = NULL;

> +			neu.st.tail = NULL;

> +		} else {

> +			/* Multi-element list, dequeue head */

> +			struct llnode *next;

> +

> +			/* Wait until llq_enqueue() has written true next

> +			 * pointer */

> +			while ((next = __atomic_load_n(&old.st.head->next,

> +						       __ATOMIC_RELAXED)) ==

> +				SENTINEL)

> +				odp_cpu_pause();

> +

> +			neu.st.head = next;

> +			neu.st.tail = old.st.tail;

> +		}

> +	} while (odp_unlikely(scd(&llq->u.ui, neu.ui, __ATOMIC_RELAXED)));

> +	old.st.head->next = NULL;

> +	return true;

> +}

> +

> +/* If 'node' is a head of llq then move it to tail */

> +static inline odp_bool_t llq_cond_rotate(struct llqueue *llq,

> +					 struct llnode *node)

> +{

> +	/* Difficult to make this into a single atomic operation

> +	 * Instead use existing primitives.

> +	 */

> +	if (odp_likely(llq_dequeue_cond(llq, node))) {

> +		llq_enqueue(llq, node);

> +		return true;

> +	}

> +	return false;

> +}

> +

> +static inline odp_bool_t llq_on_queue(struct llnode *node)

> +{

> +	return node->next != NULL;

> +}

> +

> +#else



can chunk bellow be in separate file? I think it was on some earlier 
comments.

> +/* Implement queue operations protected by a spin lock */

> +

> +struct llnode {

> +	struct llnode *next;

> +};

> +

> +struct llqueue {

> +	struct llnode *head, *tail;

> +	odp_spinlock_t lock;

> +};

> +

> +static inline struct llnode *llq_head(struct llqueue *llq)

> +{

> +	return __atomic_load_n(&llq->head, __ATOMIC_RELAXED);

> +}

> +

> +static inline void llqueue_init(struct llqueue *llq)

> +{

> +	llq->head = NULL;

> +	llq->tail = NULL;

> +	odp_spinlock_init(&llq->lock);

> +}

> +

> +static inline void llq_enqueue(struct llqueue *llq, struct llnode *node)

> +{

> +	ODP_ASSERT(node->next == NULL);

> +	node->next = SENTINEL;

> +

> +	odp_spinlock_lock(&llq->lock);

> +	if (llq->head == NULL) {

> +		llq->head = node;

> +		llq->tail = node;

> +	} else {

> +		llq->tail->next = node;

> +		llq->tail = node;

> +	}

> +	odp_spinlock_unlock(&llq->lock);

> +}

> +

> +static inline struct llnode *llq_dequeue(struct llqueue *llq)

> +{

> +	struct llnode *head;

> +	struct llnode *node = NULL;

> +

> +	head = __atomic_load_n(&llq->head, __ATOMIC_RELAXED);

> +	if (head == NULL)

> +		return NULL;

> +

> +	odp_spinlock_lock(&llq->lock);

> +	if (llq->head != NULL) {

> +		node = llq->head;

> +		if (llq->head == llq->tail) {

> +			ODP_ASSERT(node->next == SENTINEL);

> +			llq->head = NULL;

> +			llq->tail = NULL;

> +		} else {

> +			ODP_ASSERT(node->next != SENTINEL);

> +			llq->head = node->next;

> +		}

> +		node->next = NULL;

> +	}

> +	odp_spinlock_unlock(&llq->lock);

> +	return node;

> +}

> +

> +static inline odp_bool_t llq_dequeue_cond(struct llqueue *llq,

> +					  struct llnode *node)

> +{

> +	odp_bool_t success = false;

> +

> +	odp_spinlock_lock(&llq->lock);

> +	if (odp_likely(llq->head != NULL && llq->head == node)) {

> +		success = true;

> +		if (llq->head == llq->tail) {

> +			ODP_ASSERT(node->next == SENTINEL);

> +			llq->head = NULL;

> +			llq->tail = NULL;

> +		} else {

> +			ODP_ASSERT(node->next != SENTINEL);

> +			llq->head = node->next;

> +		}

> +		node->next = NULL;

> +	}

> +	odp_spinlock_unlock(&llq->lock);

> +	return success;

> +}

> +

> +/* If 'node' is a head of llq then move it to tail */

> +static inline odp_bool_t llq_cond_rotate(struct llqueue *llq,

> +					 struct llnode *node)

> +{

> +	odp_bool_t success = false;

> +

> +	odp_spinlock_lock(&llq->lock);

> +	if (odp_likely(llq->head == node)) {

> +		success = true;

> +		if (llq->tail != node) {

> +			ODP_ASSERT(node->next != SENTINEL);

> +			llq->head = node->next;

> +			llq->tail->next = node;

> +			llq->tail = node;

> +			node->next = SENTINEL;

> +		}

> +		/* Else 'node' is only element on list => nothing to do */

> +	}

> +	odp_spinlock_unlock(&llq->lock);

> +	return success;

> +}

> +

> +static inline odp_bool_t llq_on_queue(struct llnode *node)

> +{

> +	return node->next != NULL;

> +}


that is common function, has to be out of ifdef.

Maxim.

> +

> +#endif

> +

> +#endif

>
Maxim Uvarov June 21, 2017, 3:10 p.m. | #5
On 06/21/17 17:27, Honnappa Nagarahalli wrote:
> Hi Maxim,

>      This is a new file added by us. Hence the ARM copyright.

> Thanks,

> Honnappa

>


Was this work done as Linaro member/assignee? If yes - it has to have 
Linaro copyright as I know.

Maxim.

> On 21 June 2017 at 09:06, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

>> On 06/19/17 22:12, Brian Brooks wrote:

>>>

>>> --- /dev/null

>>> +++ b/platform/linux-generic/include/odp_llqueue.h

>>> @@ -0,0 +1,309 @@

>>> +/* Copyright (c) 2017, ARM Limited.

>>> + * All rights reserved.

>>> + *

>>> + * SPDX-License-Identifier:        BSD-3-Clause

>>> + */

>>> +

>>

>>

>> Has to be Linaro copyright.

>>

>> Maxim.
Honnappa Nagarahalli June 21, 2017, 4:22 p.m. | #6
I think we have talked about this earlier. We have been directed from
our executives that we should use ARM copyright if a member has done
this work and Linaro/LNG/FF/Bill are in sync on this.

Thanks,
Honnappa

On 21 June 2017 at 10:10, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:
> On 06/21/17 17:27, Honnappa Nagarahalli wrote:

>>

>> Hi Maxim,

>>      This is a new file added by us. Hence the ARM copyright.

>> Thanks,

>> Honnappa

>>

>

> Was this work done as Linaro member/assignee? If yes - it has to have Linaro

> copyright as I know.

>

> Maxim.

>

>

>> On 21 June 2017 at 09:06, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

>>>

>>> On 06/19/17 22:12, Brian Brooks wrote:

>>>>

>>>>

>>>> --- /dev/null

>>>> +++ b/platform/linux-generic/include/odp_llqueue.h

>>>> @@ -0,0 +1,309 @@

>>>> +/* Copyright (c) 2017, ARM Limited.

>>>> + * All rights reserved.

>>>> + *

>>>> + * SPDX-License-Identifier:        BSD-3-Clause

>>>> + */

>>>> +

>>>

>>>

>>>

>>> Has to be Linaro copyright.

>>>

>>> Maxim.

>

>
Ola Liljedahl June 21, 2017, 4:31 p.m. | #7
On 20/06/2017, 15:12, "Savolainen, Petri (Nokia - FI/Espoo)"
<petri.savolainen@nokia.com> wrote:

>> +++ b/platform/linux-generic/include/odp_llqueue.h

>> @@ -0,0 +1,309 @@

>> +/* Copyright (c) 2017, ARM Limited.

>> + * All rights reserved.

>> + *

>> + * SPDX-License-Identifier:        BSD-3-Clause

>> + */

>> +

>> +#ifndef ODP_LLQUEUE_H_

>> +#define ODP_LLQUEUE_H_

>> +

>> +#include <odp/api/cpu.h>

>> +#include <odp/api/hints.h>

>> +#include <odp/api/spinlock.h>

>> +

>> +#include <odp_config_internal.h>

>> +#include <odp_debug_internal.h>

>> +#include <odp_cpu.h>

>> +

>> +#include <stdint.h>

>> +#include <stdlib.h>

>> +

>> 

>>+/***********************************************************************

>>*

>> ******

>> + * Linked list queues

>> +

>> 

>>*************************************************************************

>>*

>> ***/

>> +

>> +struct llqueue;

>> +struct llnode;

>> +

>> +static struct llnode *llq_head(struct llqueue *llq);

>> +static void llqueue_init(struct llqueue *llq);

>> +static void llq_enqueue(struct llqueue *llq, struct llnode *node);

>> +static struct llnode *llq_dequeue(struct llqueue *llq);

>> +static odp_bool_t llq_dequeue_cond(struct llqueue *llq, struct llnode

>> *exp);

>> +static odp_bool_t llq_cond_rotate(struct llqueue *llq, struct llnode

>> *node);

>> +static odp_bool_t llq_on_queue(struct llnode *node);

>> +

>> 

>>+/***********************************************************************

>>*

>> ******

>> + * The implementation(s)

>> +

>> 

>>*************************************************************************

>>*

>> ***/

>> +

>> +#define SENTINEL ((void *)~(uintptr_t)0)

>> +

>> +#ifdef CONFIG_LLDSCD

>> +/* Implement queue operations using double-word LL/SC */

>

>> +

>> +#else

>> +/* Implement queue operations protected by a spin lock */

>> +

>

>There's a lot of ifdef'ed code in this file, basically two full parallel

>implementations.

This horse has been flogged before on the mailing list.

> The first is built only for ARM and the second for the rest. Would there

>be a way to build both always ?

For ARMv7a and ARMv8a, you could build both versions. You really want to
use the LL/SC version on these architectures.

For architectures without double-word LL/SC, only the lock-based version
can be built.

>

>-Petri

>
Bill Fischofer June 21, 2017, 5:37 p.m. | #8
It's perfectly fine to have an ARM copyright as the original
contributor, but it also needs a Linaro copyright to be incorporated
into ODP. See traffic_mngr.c for an example of this.

On Wed, Jun 21, 2017 at 9:27 AM, Honnappa Nagarahalli
<honnappa.nagarahalli@linaro.org> wrote:
> Hi Maxim,

>     This is a new file added by us. Hence the ARM copyright.

> Thanks,

> Honnappa

>

> On 21 June 2017 at 09:06, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

>> On 06/19/17 22:12, Brian Brooks wrote:

>>>

>>> --- /dev/null

>>> +++ b/platform/linux-generic/include/odp_llqueue.h

>>> @@ -0,0 +1,309 @@

>>> +/* Copyright (c) 2017, ARM Limited.

>>> + * All rights reserved.

>>> + *

>>> + * SPDX-License-Identifier:        BSD-3-Clause

>>> + */

>>> +

>>

>>

>> Has to be Linaro copyright.

>>

>> Maxim.
Honnappa Nagarahalli June 21, 2017, 6:57 p.m. | #9
I cannot make a decision on this topic. This is what I have been told
and I am also told that Linaro is in sync on this. I will initiate
some discussions on this internally. Bill, do you want to check in
Linaro on this?

I suggest we go ahead with the patch and agree to change it when we
have a decision.

On 21 June 2017 at 12:37, Bill Fischofer <bill.fischofer@linaro.org> wrote:
> It's perfectly fine to have an ARM copyright as the original

> contributor, but it also needs a Linaro copyright to be incorporated

> into ODP. See traffic_mngr.c for an example of this.

>

> On Wed, Jun 21, 2017 at 9:27 AM, Honnappa Nagarahalli

> <honnappa.nagarahalli@linaro.org> wrote:

>> Hi Maxim,

>>     This is a new file added by us. Hence the ARM copyright.

>> Thanks,

>> Honnappa

>>

>> On 21 June 2017 at 09:06, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

>>> On 06/19/17 22:12, Brian Brooks wrote:

>>>>

>>>> --- /dev/null

>>>> +++ b/platform/linux-generic/include/odp_llqueue.h

>>>> @@ -0,0 +1,309 @@

>>>> +/* Copyright (c) 2017, ARM Limited.

>>>> + * All rights reserved.

>>>> + *

>>>> + * SPDX-License-Identifier:        BSD-3-Clause

>>>> + */

>>>> +

>>>

>>>

>>> Has to be Linaro copyright.

>>>

>>> Maxim.
Maxim Uvarov June 21, 2017, 7:25 p.m. | #10
On 06/21/17 21:57, Honnappa Nagarahalli wrote:
> I cannot make a decision on this topic. This is what I have been told

> and I am also told that Linaro is in sync on this. I will initiate

> some discussions on this internally. Bill, do you want to check in

> Linaro on this?

> 


we will check this first.


> I suggest we go ahead with the patch and agree to change it when we

> have a decision.

> 


and we can not go ahead until everything is resolved.

Maxim.

> On 21 June 2017 at 12:37, Bill Fischofer <bill.fischofer@linaro.org> wrote:

>> It's perfectly fine to have an ARM copyright as the original

>> contributor, but it also needs a Linaro copyright to be incorporated

>> into ODP. See traffic_mngr.c for an example of this.

>>

>> On Wed, Jun 21, 2017 at 9:27 AM, Honnappa Nagarahalli

>> <honnappa.nagarahalli@linaro.org> wrote:

>>> Hi Maxim,

>>>     This is a new file added by us. Hence the ARM copyright.

>>> Thanks,

>>> Honnappa

>>>

>>> On 21 June 2017 at 09:06, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

>>>> On 06/19/17 22:12, Brian Brooks wrote:

>>>>>

>>>>> --- /dev/null

>>>>> +++ b/platform/linux-generic/include/odp_llqueue.h

>>>>> @@ -0,0 +1,309 @@

>>>>> +/* Copyright (c) 2017, ARM Limited.

>>>>> + * All rights reserved.

>>>>> + *

>>>>> + * SPDX-License-Identifier:        BSD-3-Clause

>>>>> + */

>>>>> +

>>>>

>>>>

>>>> Has to be Linaro copyright.

>>>>

>>>> Maxim.
Savolainen, Petri (Nokia - FI/Espoo) June 22, 2017, 9:37 a.m. | #11
> -----Original Message-----

> From: Ola Liljedahl [mailto:Ola.Liljedahl@arm.com]

> Sent: Wednesday, June 21, 2017 7:31 PM

> To: Savolainen, Petri (Nokia - FI/Espoo) <petri.savolainen@nokia.com>;

> Brian Brooks <Brian.Brooks@arm.com>; lng-odp@lists.linaro.org

> Cc: nd <nd@arm.com>

> Subject: Re: [lng-odp] [API-NEXT PATCH v9 4/6] linux-gen: sched scalable:

> add a concurrent queue

> 

> 

> 

> 

> 

> On 20/06/2017, 15:12, "Savolainen, Petri (Nokia - FI/Espoo)"

> <petri.savolainen@nokia.com> wrote:

> 

> >> +++ b/platform/linux-generic/include/odp_llqueue.h

> >> @@ -0,0 +1,309 @@

> >> +/* Copyright (c) 2017, ARM Limited.

> >> + * All rights reserved.

> >> + *

> >> + * SPDX-License-Identifier:        BSD-3-Clause

> >> + */

> >> +

> >> +#ifndef ODP_LLQUEUE_H_

> >> +#define ODP_LLQUEUE_H_

> >> +

> >> +#include <odp/api/cpu.h>

> >> +#include <odp/api/hints.h>

> >> +#include <odp/api/spinlock.h>

> >> +

> >> +#include <odp_config_internal.h>

> >> +#include <odp_debug_internal.h>

> >> +#include <odp_cpu.h>

> >> +

> >> +#include <stdint.h>

> >> +#include <stdlib.h>

> >> +

> >>

> >>+/**********************************************************************

> *

> >>*

> >> ******

> >> + * Linked list queues

> >> +

> >>

> >>************************************************************************

> *

> >>*

> >> ***/

> >> +

> >> +struct llqueue;

> >> +struct llnode;

> >> +

> >> +static struct llnode *llq_head(struct llqueue *llq);

> >> +static void llqueue_init(struct llqueue *llq);

> >> +static void llq_enqueue(struct llqueue *llq, struct llnode *node);

> >> +static struct llnode *llq_dequeue(struct llqueue *llq);

> >> +static odp_bool_t llq_dequeue_cond(struct llqueue *llq, struct llnode

> >> *exp);

> >> +static odp_bool_t llq_cond_rotate(struct llqueue *llq, struct llnode

> >> *node);

> >> +static odp_bool_t llq_on_queue(struct llnode *node);

> >> +

> >>

> >>+/**********************************************************************

> *

> >>*

> >> ******

> >> + * The implementation(s)

> >> +

> >>

> >>************************************************************************

> *

> >>*

> >> ***/

> >> +

> >> +#define SENTINEL ((void *)~(uintptr_t)0)

> >> +

> >> +#ifdef CONFIG_LLDSCD

> >> +/* Implement queue operations using double-word LL/SC */

> >

> >> +

> >> +#else

> >> +/* Implement queue operations protected by a spin lock */

> >> +

> >

> >There's a lot of ifdef'ed code in this file, basically two full parallel

> >implementations.

> This horse has been flogged before on the mailing list.


Nothing has changed in our ifdef policy. The less ifdef'ed code, the better. This patch set introduces about 60 new #ifdef/#if/#ifndefs (when header file guards are not calculated).


> 

> > The first is built only for ARM and the second for the rest. Would there

> >be a way to build both always ?

> For ARMv7a and ARMv8a, you could build both versions. You really want to

> use the LL/SC version on these architectures.

> 

> For architectures without double-word LL/SC, only the lock-based version

> can be built.



You could *compile* the lock version always. It's based on locks, not on arch specific instructions.

-Petri
Brian Brooks June 22, 2017, 4:46 p.m. | #12
> > > The first is built only for ARM and the second for the rest. Would there

> > >be a way to build both always ?

> > For ARMv7a and ARMv8a, you could build both versions. You really want to

> > use the LL/SC version on these architectures.

> > 

> > For architectures without double-word LL/SC, only the lock-based version

> > can be built.

> 

> 

> You could *compile* the lock version always. It's based on locks, not on arch specific instructions.


That would require an abstraction layer consisting of function pointers
pointing to one of the two implementations. On architectures without support
for LLD/SCD, there would only be one implementation.

This could make sense if... you were benchmarking *many* different concurrent
queue implementations and wanted to keep the benchmark code extremely succinct
and were willing to pay for function pointers. But that is not the case here.

This code is deliberately written to be static inline and conditionally compiled.

> -Petri

>
Bill Fischofer June 22, 2017, 6:45 p.m. | #13
On Thu, Jun 22, 2017 at 11:46 AM, Brian Brooks <brian.brooks@arm.com> wrote:
>> > > The first is built only for ARM and the second for the rest. Would there

>> > >be a way to build both always ?

>> > For ARMv7a and ARMv8a, you could build both versions. You really want to

>> > use the LL/SC version on these architectures.

>> >

>> > For architectures without double-word LL/SC, only the lock-based version

>> > can be built.

>>

>>

>> You could *compile* the lock version always. It's based on locks, not on arch specific instructions.

>

> That would require an abstraction layer consisting of function pointers

> pointing to one of the two implementations. On architectures without support

> for LLD/SCD, there would only be one implementation.

>

> This could make sense if... you were benchmarking *many* different concurrent

> queue implementations and wanted to keep the benchmark code extremely succinct

> and were willing to pay for function pointers. But that is not the case here.

>

> This code is deliberately written to be static inline and conditionally compiled.


We're going to need to think about how to make microarchitecture
distinctions like this dynamic since we want a single binary to be
distributed to all ARM architectures. I agree that such dynamism is
outside the scope of this series, but it's something we're going to
need as the cloud work progresses.

>

>> -Petri

>>
Ola Liljedahl June 22, 2017, 7:20 p.m. | #14
>

>

>> -----Original Message-----

>> From: Ola Liljedahl [mailto:Ola.Liljedahl@arm.com]

>> Sent: Wednesday, June 21, 2017 7:31 PM

>> To: Savolainen, Petri (Nokia - FI/Espoo) <petri.savolainen@nokia.com>;

>> Brian Brooks <Brian.Brooks@arm.com>; lng-odp@lists.linaro.org

>> Cc: nd <nd@arm.com>

>> Subject: Re: [lng-odp] [API-NEXT PATCH v9 4/6] linux-gen: sched

>>scalable:

>> add a concurrent queue

>> 

>> 

>> 

>> 

>> 

>> On 20/06/2017, 15:12, "Savolainen, Petri (Nokia - FI/Espoo)"

>> <petri.savolainen@nokia.com> wrote:

>> 

>> >> +++ b/platform/linux-generic/include/odp_llqueue.h

>> >> @@ -0,0 +1,309 @@

>> >> +/* Copyright (c) 2017, ARM Limited.

>> >> + * All rights reserved.

>> >> + *

>> >> + * SPDX-License-Identifier:        BSD-3-Clause

>> >> + */

>> >> +

>> >> +#ifndef ODP_LLQUEUE_H_

>> >> +#define ODP_LLQUEUE_H_

>> >> +

>> >> +#include <odp/api/cpu.h>

>> >> +#include <odp/api/hints.h>

>> >> +#include <odp/api/spinlock.h>

>> >> +

>> >> +#include <odp_config_internal.h>

>> >> +#include <odp_debug_internal.h>

>> >> +#include <odp_cpu.h>

>> >> +

>> >> +#include <stdint.h>

>> >> +#include <stdlib.h>

>> >> +

>> >>

>> 

>>>>+/*********************************************************************

>>>>*

>> *

>> >>*

>> >> ******

>> >> + * Linked list queues

>> >> +

>> >>

>> 

>>>>***********************************************************************

>>>>*

>> *

>> >>*

>> >> ***/

>> >> +

>> >> +struct llqueue;

>> >> +struct llnode;

>> >> +

>> >> +static struct llnode *llq_head(struct llqueue *llq);

>> >> +static void llqueue_init(struct llqueue *llq);

>> >> +static void llq_enqueue(struct llqueue *llq, struct llnode *node);

>> >> +static struct llnode *llq_dequeue(struct llqueue *llq);

>> >> +static odp_bool_t llq_dequeue_cond(struct llqueue *llq, struct

>>llnode

>> >> *exp);

>> >> +static odp_bool_t llq_cond_rotate(struct llqueue *llq, struct llnode

>> >> *node);

>> >> +static odp_bool_t llq_on_queue(struct llnode *node);

>> >> +

>> >>

>> 

>>>>+/*********************************************************************

>>>>*

>> *

>> >>*

>> >> ******

>> >> + * The implementation(s)

>> >> +

>> >>

>> 

>>>>***********************************************************************

>>>>*

>> *

>> >>*

>> >> ***/

>> >> +

>> >> +#define SENTINEL ((void *)~(uintptr_t)0)

>> >> +

>> >> +#ifdef CONFIG_LLDSCD

>> >> +/* Implement queue operations using double-word LL/SC */

>> >

>> >> +

>> >> +#else

>> >> +/* Implement queue operations protected by a spin lock */

>> >> +

>> >

>> >There's a lot of ifdef'ed code in this file, basically two full

>>parallel

>> >implementations.

>> This horse has been flogged before on the mailing list.

>

>Nothing has changed in our ifdef policy. The less ifdef'ed code, the

>better. This patch set introduces about 60 new #ifdef/#if/#ifndefs (when

>header file guards are not calculated).

Yes but we already had agreement on how to handle this one. Now you are
disagreeing with the maintainers?

Ideally, files/definitions (on an item-by-item basis) in the arch-specific
subdirectories should override the default implementations in the
arch/default directory. Then we wouldn┬╣t have to put duplicate
implementations in every arch subdirectory (and ODP linux-generic would
build for any architecture, also those not explicitly recognised).
Arch/default would have an llqueue.c with the lock-based code and arch/arm
would have an llqueue.c with the lld/scd implementation.

>

>

>> 

>> > The first is built only for ARM and the second for the rest. Would

>>there

>> >be a way to build both always ?

>> For ARMv7a and ARMv8a, you could build both versions. You really want to

>> use the LL/SC version on these architectures.

>> 

>> For architectures without double-word LL/SC, only the lock-based version

>> can be built.

>

>

>You could *compile* the lock version always. It's based on locks, not on

>arch specific instructions.

Yes we could always build the lock-based code but then we would need to
refer to these functions from some other function in order to avoid the
function-not-used error (but there would most likely be some code that
would not actually be called even if it is compiled). It can be done.

>

>-Petri

>

Patch hide | download patch | download mbox

diff --git a/platform/linux-generic/Makefile.am b/platform/linux-generic/Makefile.am
index b869fd4b..90cc4ca6 100644
--- a/platform/linux-generic/Makefile.am
+++ b/platform/linux-generic/Makefile.am
@@ -157,6 +157,7 @@  noinst_HEADERS = \
 		  ${srcdir}/include/odp_errno_define.h \
 		  ${srcdir}/include/odp_forward_typedefs_internal.h \
 		  ${srcdir}/include/odp_internal.h \
+		  ${srcdir}/include/odp_llqueue.h \
 		  ${srcdir}/include/odp_name_table_internal.h \
 		  ${srcdir}/include/odp_packet_internal.h \
 		  ${srcdir}/include/odp_packet_io_internal.h \
diff --git a/platform/linux-generic/include/odp_llqueue.h b/platform/linux-generic/include/odp_llqueue.h
new file mode 100644
index 00000000..758af490
--- /dev/null
+++ b/platform/linux-generic/include/odp_llqueue.h
@@ -0,0 +1,309 @@ 
+/* Copyright (c) 2017, ARM Limited.
+ * All rights reserved.
+ *
+ * SPDX-License-Identifier:        BSD-3-Clause
+ */
+
+#ifndef ODP_LLQUEUE_H_
+#define ODP_LLQUEUE_H_
+
+#include <odp/api/cpu.h>
+#include <odp/api/hints.h>
+#include <odp/api/spinlock.h>
+
+#include <odp_config_internal.h>
+#include <odp_debug_internal.h>
+#include <odp_cpu.h>
+
+#include <stdint.h>
+#include <stdlib.h>
+
+/******************************************************************************
+ * Linked list queues
+ *****************************************************************************/
+
+struct llqueue;
+struct llnode;
+
+static struct llnode *llq_head(struct llqueue *llq);
+static void llqueue_init(struct llqueue *llq);
+static void llq_enqueue(struct llqueue *llq, struct llnode *node);
+static struct llnode *llq_dequeue(struct llqueue *llq);
+static odp_bool_t llq_dequeue_cond(struct llqueue *llq, struct llnode *exp);
+static odp_bool_t llq_cond_rotate(struct llqueue *llq, struct llnode *node);
+static odp_bool_t llq_on_queue(struct llnode *node);
+
+/******************************************************************************
+ * The implementation(s)
+ *****************************************************************************/
+
+#define SENTINEL ((void *)~(uintptr_t)0)
+
+#ifdef CONFIG_LLDSCD
+/* Implement queue operations using double-word LL/SC */
+
+/* The scalar equivalent of a double pointer */
+#if __SIZEOF_PTRDIFF_T__ == 4
+typedef uint64_t dintptr_t;
+#endif
+#if __SIZEOF_PTRDIFF_T__ == 8
+typedef __int128 dintptr_t;
+#endif
+
+struct llnode {
+	struct llnode *next;
+};
+
+union llht {
+	struct {
+		struct llnode *head, *tail;
+	} st;
+	dintptr_t ui;
+};
+
+struct llqueue {
+	union llht u;
+};
+
+static inline struct llnode *llq_head(struct llqueue *llq)
+{
+	return __atomic_load_n(&llq->u.st.head, __ATOMIC_RELAXED);
+}
+
+static inline void llqueue_init(struct llqueue *llq)
+{
+	llq->u.st.head = NULL;
+	llq->u.st.tail = NULL;
+}
+
+static inline void llq_enqueue(struct llqueue *llq, struct llnode *node)
+{
+	union llht old, neu;
+
+	ODP_ASSERT(node->next == NULL);
+	node->next = SENTINEL;
+	do {
+		old.ui = lld(&llq->u.ui, __ATOMIC_RELAXED);
+		neu.st.head = old.st.head == NULL ? node : old.st.head;
+		neu.st.tail = node;
+	} while (odp_unlikely(scd(&llq->u.ui, neu.ui, __ATOMIC_RELEASE)));
+	if (old.st.tail != NULL) {
+		/* List was not empty */
+		ODP_ASSERT(old.st.tail->next == SENTINEL);
+		old.st.tail->next = node;
+	}
+}
+
+static inline struct llnode *llq_dequeue(struct llqueue *llq)
+{
+	struct llnode *head;
+	union llht old, neu;
+
+	/* llq_dequeue() may be used in a busy-waiting fashion
+	 * Read head using plain load to avoid disturbing remote LL/SC
+	 */
+	head = __atomic_load_n(&llq->u.st.head, __ATOMIC_ACQUIRE);
+	if (head == NULL)
+		return NULL;
+	/* Read head->next before LL to minimize cache miss latency
+	 * in LL/SC below
+	 */
+	(void)__atomic_load_n(&head->next, __ATOMIC_RELAXED);
+
+	do {
+		old.ui = lld(&llq->u.ui, __ATOMIC_RELAXED);
+		if (odp_unlikely(old.st.head == NULL)) {
+			/* Empty list */
+			return NULL;
+		} else if (odp_unlikely(old.st.head == old.st.tail)) {
+			/* Single-element in list */
+			neu.st.head = NULL;
+			neu.st.tail = NULL;
+		} else {
+			/* Multi-element list, dequeue head */
+			struct llnode *next;
+			/* Wait until llq_enqueue() has written true next
+			 * pointer
+			 */
+			while ((next = __atomic_load_n(&old.st.head->next,
+						       __ATOMIC_RELAXED)) ==
+				SENTINEL)
+				odp_cpu_pause();
+			neu.st.head = next;
+			neu.st.tail = old.st.tail;
+		}
+	} while (odp_unlikely(scd(&llq->u.ui, neu.ui, __ATOMIC_RELAXED)));
+	old.st.head->next = NULL;
+	return old.st.head;
+}
+
+static inline odp_bool_t llq_dequeue_cond(struct llqueue *llq,
+					  struct llnode *exp)
+{
+	union llht old, neu;
+
+	do {
+		old.ui = lld(&llq->u.ui, __ATOMIC_ACQUIRE);
+		if (odp_unlikely(old.st.head == NULL || old.st.head != exp)) {
+			/* Empty list or wrong head */
+			return false;
+		} else if (odp_unlikely(old.st.head == old.st.tail)) {
+			/* Single-element in list */
+			neu.st.head = NULL;
+			neu.st.tail = NULL;
+		} else {
+			/* Multi-element list, dequeue head */
+			struct llnode *next;
+
+			/* Wait until llq_enqueue() has written true next
+			 * pointer */
+			while ((next = __atomic_load_n(&old.st.head->next,
+						       __ATOMIC_RELAXED)) ==
+				SENTINEL)
+				odp_cpu_pause();
+
+			neu.st.head = next;
+			neu.st.tail = old.st.tail;
+		}
+	} while (odp_unlikely(scd(&llq->u.ui, neu.ui, __ATOMIC_RELAXED)));
+	old.st.head->next = NULL;
+	return true;
+}
+
+/* If 'node' is a head of llq then move it to tail */
+static inline odp_bool_t llq_cond_rotate(struct llqueue *llq,
+					 struct llnode *node)
+{
+	/* Difficult to make this into a single atomic operation
+	 * Instead use existing primitives.
+	 */
+	if (odp_likely(llq_dequeue_cond(llq, node))) {
+		llq_enqueue(llq, node);
+		return true;
+	}
+	return false;
+}
+
+static inline odp_bool_t llq_on_queue(struct llnode *node)
+{
+	return node->next != NULL;
+}
+
+#else
+/* Implement queue operations protected by a spin lock */
+
+struct llnode {
+	struct llnode *next;
+};
+
+struct llqueue {
+	struct llnode *head, *tail;
+	odp_spinlock_t lock;
+};
+
+static inline struct llnode *llq_head(struct llqueue *llq)
+{
+	return __atomic_load_n(&llq->head, __ATOMIC_RELAXED);
+}
+
+static inline void llqueue_init(struct llqueue *llq)
+{
+	llq->head = NULL;
+	llq->tail = NULL;
+	odp_spinlock_init(&llq->lock);
+}
+
+static inline void llq_enqueue(struct llqueue *llq, struct llnode *node)
+{
+	ODP_ASSERT(node->next == NULL);
+	node->next = SENTINEL;
+
+	odp_spinlock_lock(&llq->lock);
+	if (llq->head == NULL) {
+		llq->head = node;
+		llq->tail = node;
+	} else {
+		llq->tail->next = node;
+		llq->tail = node;
+	}
+	odp_spinlock_unlock(&llq->lock);
+}
+
+static inline struct llnode *llq_dequeue(struct llqueue *llq)
+{
+	struct llnode *head;
+	struct llnode *node = NULL;
+
+	head = __atomic_load_n(&llq->head, __ATOMIC_RELAXED);
+	if (head == NULL)
+		return NULL;
+
+	odp_spinlock_lock(&llq->lock);
+	if (llq->head != NULL) {
+		node = llq->head;
+		if (llq->head == llq->tail) {
+			ODP_ASSERT(node->next == SENTINEL);
+			llq->head = NULL;
+			llq->tail = NULL;
+		} else {
+			ODP_ASSERT(node->next != SENTINEL);
+			llq->head = node->next;
+		}
+		node->next = NULL;
+	}
+	odp_spinlock_unlock(&llq->lock);
+	return node;
+}
+
+static inline odp_bool_t llq_dequeue_cond(struct llqueue *llq,
+					  struct llnode *node)
+{
+	odp_bool_t success = false;
+
+	odp_spinlock_lock(&llq->lock);
+	if (odp_likely(llq->head != NULL && llq->head == node)) {
+		success = true;
+		if (llq->head == llq->tail) {
+			ODP_ASSERT(node->next == SENTINEL);
+			llq->head = NULL;
+			llq->tail = NULL;
+		} else {
+			ODP_ASSERT(node->next != SENTINEL);
+			llq->head = node->next;
+		}
+		node->next = NULL;
+	}
+	odp_spinlock_unlock(&llq->lock);
+	return success;
+}
+
+/* If 'node' is a head of llq then move it to tail */
+static inline odp_bool_t llq_cond_rotate(struct llqueue *llq,
+					 struct llnode *node)
+{
+	odp_bool_t success = false;
+
+	odp_spinlock_lock(&llq->lock);
+	if (odp_likely(llq->head == node)) {
+		success = true;
+		if (llq->tail != node) {
+			ODP_ASSERT(node->next != SENTINEL);
+			llq->head = node->next;
+			llq->tail->next = node;
+			llq->tail = node;
+			node->next = SENTINEL;
+		}
+		/* Else 'node' is only element on list => nothing to do */
+	}
+	odp_spinlock_unlock(&llq->lock);
+	return success;
+}
+
+static inline odp_bool_t llq_on_queue(struct llnode *node)
+{
+	return node->next != NULL;
+}
+
+#endif
+
+#endif