ck_barrier_combining.c revision 343494
1/*
2 * Copyright 2011-2015 Samy Al Bahra.
3 * Copyright 2011 David Joseph.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <ck_barrier.h>
29#include <ck_cc.h>
30#include <ck_pr.h>
31#include <ck_spinlock.h>
32
33struct ck_barrier_combining_queue {
34	struct ck_barrier_combining_group *head;
35	struct ck_barrier_combining_group *tail;
36};
37
38static struct ck_barrier_combining_group *
39ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue)
40{
41	struct ck_barrier_combining_group *front = NULL;
42
43	if (queue->head != NULL) {
44		front = queue->head;
45		queue->head = queue->head->next;
46	}
47
48	return front;
49}
50
51static void
52ck_barrier_combining_insert(struct ck_barrier_combining_group *parent,
53    struct ck_barrier_combining_group *tnode,
54    struct ck_barrier_combining_group **child)
55{
56
57	*child = tnode;
58	tnode->parent = parent;
59
60	/*
61	 * After inserting, we must increment the parent group's count for
62	 * number of threads expected to reach it; otherwise, the
63	 * barrier may end prematurely.
64	 */
65	parent->k++;
66	return;
67}
68
69/*
70 * This implementation of software combining tree barriers
71 * uses level order traversal to insert new thread groups
72 * into the barrier's tree. We use a queue to implement this
73 * traversal.
74 */
75static void
76ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue,
77    struct ck_barrier_combining_group *node_value)
78{
79
80	node_value->next = NULL;
81	if (queue->head == NULL) {
82		queue->head = queue->tail = node_value;
83		return;
84	}
85
86	queue->tail->next = node_value;
87	queue->tail = node_value;
88
89	return;
90}
91
92
93void
94ck_barrier_combining_group_init(struct ck_barrier_combining *root,
95    struct ck_barrier_combining_group *tnode,
96    unsigned int nthr)
97{
98	struct ck_barrier_combining_group *node;
99	struct ck_barrier_combining_queue queue;
100
101	queue.head = queue.tail = NULL;
102
103	tnode->k = nthr;
104	tnode->count = 0;
105	tnode->sense = 0;
106	tnode->left = tnode->right = NULL;
107
108	/*
109	 * Finds the first available node for linkage into the combining
110	 * tree. The use of a spinlock is excusable as this is a one-time
111	 * initialization cost.
112	 */
113	ck_spinlock_fas_lock(&root->mutex);
114	ck_barrier_combining_queue_enqueue(&queue, root->root);
115	while (queue.head != NULL) {
116		node = ck_barrier_combining_queue_dequeue(&queue);
117
118		/* If the left child is free, link the group there. */
119		if (node->left == NULL) {
120			ck_barrier_combining_insert(node, tnode, &node->left);
121			goto leave;
122		}
123
124		/* If the right child is free, link the group there. */
125		if (node->right == NULL) {
126			ck_barrier_combining_insert(node, tnode, &node->right);
127			goto leave;
128		}
129
130		/*
131		 * If unsuccessful, try inserting as a child of the children of the
132		 * current node.
133		 */
134		ck_barrier_combining_queue_enqueue(&queue, node->left);
135		ck_barrier_combining_queue_enqueue(&queue, node->right);
136	}
137
138leave:
139	ck_spinlock_fas_unlock(&root->mutex);
140	return;
141}
142
143void
144ck_barrier_combining_init(struct ck_barrier_combining *root,
145    struct ck_barrier_combining_group *init_root)
146{
147
148	init_root->k = 0;
149	init_root->count = 0;
150	init_root->sense = 0;
151	init_root->parent = init_root->left = init_root->right = NULL;
152	ck_spinlock_fas_init(&root->mutex);
153	root->root = init_root;
154	return;
155}
156
157static void
158ck_barrier_combining_aux(struct ck_barrier_combining *barrier,
159    struct ck_barrier_combining_group *tnode,
160    unsigned int sense)
161{
162
163	/*
164	 * If this is the last thread in the group, it moves on to the parent group.
165	 * Otherwise, it spins on this group's sense.
166	 */
167	if (ck_pr_faa_uint(&tnode->count, 1) == tnode->k - 1) {
168		/*
169		 * If we are and will be the last thread entering the barrier for the
170		 * current group then signal the parent group if one exists.
171		 */
172		if (tnode->parent != NULL)
173			ck_barrier_combining_aux(barrier, tnode->parent, sense);
174
175		/*
176		 * Once the thread returns from its parent(s), it reinitializes the group's
177		 * arrival count and signals other threads to continue by flipping the group
178		 * sense. Order of these operations is not important since we assume a static
179		 * number of threads are members of a barrier for the lifetime of the barrier.
180		 * Since count is explicitly reinitialized, it is guaranteed that at any point
181		 * tnode->count is equivalent to tnode->k if and only if that many threads
182		 * are at the barrier.
183		 */
184		ck_pr_store_uint(&tnode->count, 0);
185		ck_pr_fence_store();
186		ck_pr_store_uint(&tnode->sense, ~tnode->sense);
187	} else {
188		while (sense != ck_pr_load_uint(&tnode->sense))
189			ck_pr_stall();
190	}
191	ck_pr_fence_memory();
192
193	return;
194}
195
196void
197ck_barrier_combining(struct ck_barrier_combining *barrier,
198    struct ck_barrier_combining_group *tnode,
199    struct ck_barrier_combining_state *state)
200{
201
202	ck_barrier_combining_aux(barrier, tnode, state->sense);
203
204	/* Reverse the execution context's sense for the next barrier. */
205	state->sense = ~state->sense;
206	return;
207}
208