1139825Simp/*	$NetBSD: subr_pserialize.c,v 1.5.2.1 2013/02/08 19:32:07 riz Exp $	*/
249076Swpaul
349076Swpaul/*-
449076Swpaul * Copyright (c) 2010, 2011 The NetBSD Foundation, Inc.
549076Swpaul * All rights reserved.
649076Swpaul *
749076Swpaul * Redistribution and use in source and binary forms, with or without
849076Swpaul * modification, are permitted provided that the following conditions
949076Swpaul * are met:
1049076Swpaul * 1. Redistributions of source code must retain the above copyright
1149076Swpaul *    notice, this list of conditions and the following disclaimer.
1249076Swpaul * 2. Redistributions in binary form must reproduce the above copyright
1349076Swpaul *    notice, this list of conditions and the following disclaimer in the
1449076Swpaul *    documentation and/or other materials provided with the distribution.
1549076Swpaul *
1649076Swpaul * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1749076Swpaul * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1849076Swpaul * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1949076Swpaul * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
2049076Swpaul * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2149076Swpaul * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2249076Swpaul * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2349076Swpaul * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2449076Swpaul * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2549076Swpaul * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2649076Swpaul * POSSIBILITY OF SUCH DAMAGE.
2749076Swpaul */
2849076Swpaul
2949076Swpaul/*
3049076Swpaul * Passive serialization.
3149076Swpaul *
3249076Swpaul * Implementation accurately matches the lapsed US patent 4809168, therefore
33122678Sobrien * code is patent-free in the United States.  Your use of this code is at
34122678Sobrien * your own risk.
35122678Sobrien *
3649076Swpaul * Note for NetBSD developers: all changes to this source file must be
3749076Swpaul * approved by the <core>.
3851682Swpaul */
39137835Sbrueffer
4049076Swpaul#include <sys/cdefs.h>
4149076Swpaul__KERNEL_RCSID(0, "$NetBSD: subr_pserialize.c,v 1.5.2.1 2013/02/08 19:32:07 riz Exp $");
4249076Swpaul
4349076Swpaul#include <sys/param.h>
4449076Swpaul
4549076Swpaul#include <sys/condvar.h>
4649076Swpaul#include <sys/cpu.h>
4749076Swpaul#include <sys/evcnt.h>
4849076Swpaul#include <sys/kmem.h>
4949076Swpaul#include <sys/mutex.h>
5049076Swpaul#include <sys/pserialize.h>
5149076Swpaul#include <sys/proc.h>
52175526Syongari#include <sys/queue.h>
53175526Syongari#include <sys/xcall.h>
5449076Swpaul
5549076Swpaulstruct pserialize {
5649076Swpaul	TAILQ_ENTRY(pserialize)	psz_chain;
5749076Swpaul	lwp_t *			psz_owner;
5849076Swpaul	kcpuset_t *		psz_target;
5949076Swpaul	kcpuset_t *		psz_pass;
6049076Swpaul};
6149076Swpaul
62175526Syongaristatic u_int			psz_work_todo	__cacheline_aligned;
63175526Syongaristatic kmutex_t			psz_lock	__cacheline_aligned;
64175526Syongaristatic struct evcnt		psz_ev_excl	__cacheline_aligned;
6549076Swpaul
6649076Swpaul/*
6749076Swpaul * As defined in "Method 1":
6849076Swpaul *	q0: "0 MP checkpoints have occured".
6949076Swpaul *	q1: "1 MP checkpoint has occured".
7049076Swpaul *	q2: "2 MP checkpoints have occured".
7149076Swpaul */
7249076Swpaulstatic TAILQ_HEAD(, pserialize)	psz_queue0	__cacheline_aligned;
7349076Swpaulstatic TAILQ_HEAD(, pserialize)	psz_queue1	__cacheline_aligned;
7449076Swpaulstatic TAILQ_HEAD(, pserialize)	psz_queue2	__cacheline_aligned;
7549076Swpaul
7649076Swpaul/*
7749076Swpaul * pserialize_init:
7849076Swpaul *
7949076Swpaul *	Initialize passive serialization structures.
8049076Swpaul */
8149076Swpaulvoid
8249076Swpaulpserialize_init(void)
83150968Sglebius{
84150968Sglebius
85150968Sglebius	psz_work_todo = 0;
86150968Sglebius	TAILQ_INIT(&psz_queue0);
8749076Swpaul	TAILQ_INIT(&psz_queue1);
8849076Swpaul	TAILQ_INIT(&psz_queue2);
89175526Syongari	mutex_init(&psz_lock, MUTEX_DEFAULT, IPL_SCHED);
90175526Syongari	evcnt_attach_dynamic(&psz_ev_excl, EVCNT_TYPE_MISC, NULL,
91175526Syongari	    "pserialize", "exclusive access");
92175526Syongari}
9349076Swpaul
94175526Syongari/*
95129878Sphk * pserialize_create:
9649076Swpaul *
97175526Syongari *	Create and initialize a passive serialization object.
98175526Syongari */
9949076Swpaulpserialize_t
100175526Syongaripserialize_create(void)
10149076Swpaul{
10249076Swpaul	pserialize_t psz;
10349076Swpaul
10449076Swpaul	psz = kmem_zalloc(sizeof(struct pserialize), KM_SLEEP);
10549076Swpaul	kcpuset_create(&psz->psz_target, true);
106147256Sbrooks	kcpuset_create(&psz->psz_pass, true);
107175526Syongari	psz->psz_owner = NULL;
10849076Swpaul
10950675Swpaul	return psz;
11050675Swpaul}
11150675Swpaul
112119288Simp/*
113119288Simp * pserialize_destroy:
11449076Swpaul *
115175526Syongari *	Destroy a passive serialization object.
11649076Swpaul */
117175520Syongarivoid
118175526Syongaripserialize_destroy(pserialize_t psz)
119175526Syongari{
12049076Swpaul
121175526Syongari	KASSERT(psz->psz_owner == NULL);
122175526Syongari
123175526Syongari	kcpuset_destroy(psz->psz_target);
124113506Smdodd	kcpuset_destroy(psz->psz_pass);
125113506Smdodd	kmem_free(psz, sizeof(struct pserialize));
12659758Speter}
12759758Speter
128175526Syongari/*
129175526Syongari * pserialize_perform:
130175526Syongari *
131175526Syongari *	Perform the write side of passive serialization.  The calling
132175526Syongari *	thread holds an exclusive lock on the data object(s) being updated.
13349076Swpaul *	We wait until every processor in the system has made at least two
134175526Syongari *	passes through cpu_swichto().  The wait is made with the caller's
135175526Syongari *	update lock held, but is short term.
136175526Syongari */
137175526Syongarivoid
138175526Syongaripserialize_perform(pserialize_t psz)
139175526Syongari{
140175526Syongari	uint64_t xc;
141175526Syongari
142175526Syongari	KASSERT(!cpu_intr_p());
143175526Syongari	KASSERT(!cpu_softintr_p());
144175526Syongari
145175526Syongari	if (__predict_false(panicstr != NULL)) {
146175526Syongari		return;
147175526Syongari	}
14849076Swpaul	KASSERT(psz->psz_owner == NULL);
14949076Swpaul	KASSERT(ncpu > 0);
150142407Simp
151142407Simp	/*
152142407Simp	 * Set up the object and put it onto the queue.  The lock
153175526Syongari	 * activity here provides the necessary memory barrier to
154175526Syongari	 * make the caller's data update completely visible to
155175526Syongari	 * other processors.
156142407Simp	 */
157175526Syongari	psz->psz_owner = curlwp;
158175526Syongari	kcpuset_copy(psz->psz_target, kcpuset_running);
159175526Syongari	kcpuset_zero(psz->psz_pass);
160175526Syongari
161175526Syongari	mutex_spin_enter(&psz_lock);
162193096Sattilio	TAILQ_INSERT_TAIL(&psz_queue0, psz, psz_chain);
163142407Simp	psz_work_todo++;
164175526Syongari
165142407Simp	do {
166149240Sjhb		mutex_spin_exit(&psz_lock);
167142407Simp
168175526Syongari		/*
169142407Simp		 * Force some context switch activity on every CPU, as
170149240Sjhb		 * the system may not be busy.  Pause to not flood.
171142407Simp		 */
172175526Syongari		xc = xc_broadcast(XC_HIGHPRI, (xcfunc_t)nullop, NULL, NULL);
173142407Simp		xc_wait(xc);
174233486Syongari		kpause("psrlz", false, 1, NULL);
175142407Simp
176142407Simp		mutex_spin_enter(&psz_lock);
177175526Syongari	} while (!kcpuset_iszero(psz->psz_target));
178175526Syongari
179142407Simp	psz_ev_excl.ev_count++;
180142407Simp	mutex_spin_exit(&psz_lock);
181175526Syongari
182175526Syongari	psz->psz_owner = NULL;
183175526Syongari}
184142407Simp
18549076Swpaulint
186175526Syongaripserialize_read_enter(void)
18749076Swpaul{
18849076Swpaul
189175526Syongari	KASSERT(!cpu_intr_p());
19049076Swpaul	return splsoftserial();
191142407Simp}
192142407Simp
193142407Simpvoid
194137557Sbruefferpserialize_read_exit(int s)
195193096Sattilio{
196150789Sglebius
19749076Swpaul	splx(s);
198175526Syongari}
199175526Syongari
200142407Simp/*
201175526Syongari * pserialize_switchpoint:
202175526Syongari *
203175526Syongari *	Monitor system context switch activity.  Called from machine
20449076Swpaul *	independent code after mi_switch() returns.
20549076Swpaul */
20649076Swpaulvoid
20749076Swpaulpserialize_switchpoint(void)
20849076Swpaul{
20949076Swpaul	pserialize_t psz, next;
21049076Swpaul	cpuid_t cid;
211175526Syongari
212175526Syongari	/*
21350675Swpaul	 * If no updates pending, bail out.  No need to lock in order to
21450675Swpaul	 * test psz_work_todo; the only ill effect of missing an update
21550675Swpaul	 * would be to delay LWPs waiting in pserialize_perform().  That
21650675Swpaul	 * will not happen because updates are on the queue before an
21750675Swpaul	 * xcall is generated (serialization) to tickle every CPU.
21850675Swpaul	 */
219229093Shselasky	if (__predict_true(psz_work_todo == 0)) {
22049076Swpaul		return;
22149076Swpaul	}
22249076Swpaul	mutex_spin_enter(&psz_lock);
22351455Swpaul	cid = cpu_index(curcpu());
22449076Swpaul
22549076Swpaul	/*
22649076Swpaul	 * At first, scan through the second queue and update each request,
22749076Swpaul	 * if passed all processors, then transfer to the third queue.
22849076Swpaul	 */
22949076Swpaul	for (psz = TAILQ_FIRST(&psz_queue1); psz != NULL; psz = next) {
230113506Smdodd		next = TAILQ_NEXT(psz, psz_chain);
23151473Swpaul		kcpuset_set(psz->psz_pass, cid);
23249076Swpaul		if (!kcpuset_match(psz->psz_pass, psz->psz_target)) {
23349076Swpaul			continue;
234105221Sphk		}
23549076Swpaul		kcpuset_zero(psz->psz_pass);
23649076Swpaul		TAILQ_REMOVE(&psz_queue1, psz, psz_chain);
237105221Sphk		TAILQ_INSERT_TAIL(&psz_queue2, psz, psz_chain);
23849076Swpaul	}
239175526Syongari	/*
240175526Syongari	 * Scan through the first queue and update each request,
24149076Swpaul	 * if passed all processors, then move to the second queue.
242175526Syongari	 */
24349076Swpaul	for (psz = TAILQ_FIRST(&psz_queue0); psz != NULL; psz = next) {
244175526Syongari		next = TAILQ_NEXT(psz, psz_chain);
245175526Syongari		kcpuset_set(psz->psz_pass, cid);
246175526Syongari		if (!kcpuset_match(psz->psz_pass, psz->psz_target)) {
247175526Syongari			continue;
248175526Syongari		}
249175526Syongari		kcpuset_zero(psz->psz_pass);
25049076Swpaul		TAILQ_REMOVE(&psz_queue0, psz, psz_chain);
251175526Syongari		TAILQ_INSERT_TAIL(&psz_queue1, psz, psz_chain);
25249076Swpaul	}
25349076Swpaul	/*
254175526Syongari	 * Process the third queue: entries have been seen twice on every
255175526Syongari	 * processor, remove from the queue and notify the updating thread.
25649076Swpaul	 */
257175526Syongari	while ((psz = TAILQ_FIRST(&psz_queue2)) != NULL) {
25849076Swpaul		TAILQ_REMOVE(&psz_queue2, psz, psz_chain);
25949076Swpaul		kcpuset_zero(psz->psz_target);
26049076Swpaul		psz_work_todo--;
26149076Swpaul	}
262175526Syongari	mutex_spin_exit(&psz_lock);
26349076Swpaul}
26449076Swpaul