• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/kernel/
1/*
2 * Read-Copy Update mechanism for mutual exclusion (tree-based version)
3 * Internal non-public definitions that provide either classic
4 * or preemptable semantics.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 *
20 * Copyright Red Hat, 2009
21 * Copyright IBM Corporation, 2009
22 *
23 * Author: Ingo Molnar <mingo@elte.hu>
24 *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
25 */
26
27#include <linux/delay.h>
28
29/*
30 * Check the RCU kernel configuration parameters and print informative
31 * messages about anything out of the ordinary.  If you like #ifdef, you
32 * will love this function.
33 */
34static void __init rcu_bootup_announce_oddness(void)
35{
36#ifdef CONFIG_RCU_TRACE
37	printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
38#endif
39#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && \
40	CONFIG_RCU_FANOUT != 32)
41	printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
42	       CONFIG_RCU_FANOUT);
43#endif
44#ifdef CONFIG_RCU_FANOUT_EXACT
45	printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
46#endif
47#ifdef CONFIG_RCU_FAST_NO_HZ
48	printk(KERN_INFO
49	       "\tRCU dyntick-idle grace-period acceleration is enabled.\n");
50#endif
51#ifdef CONFIG_PROVE_RCU
52	printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
53#endif
54#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
55	printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
56#endif
57#ifndef CONFIG_RCU_CPU_STALL_DETECTOR
58	printk(KERN_INFO
59	       "\tRCU-based detection of stalled CPUs is disabled.\n");
60#endif
61#ifndef CONFIG_RCU_CPU_STALL_VERBOSE
62	printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
63#endif
64#if NUM_RCU_LVL_4 != 0
65	printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
66#endif
67}
68
69#ifdef CONFIG_TREE_PREEMPT_RCU
70
71struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
72DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
73
74static int rcu_preempted_readers_exp(struct rcu_node *rnp);
75
76/*
77 * Tell them what RCU they are running.
78 */
79static void __init rcu_bootup_announce(void)
80{
81	printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n");
82	rcu_bootup_announce_oddness();
83}
84
85/*
86 * Return the number of RCU-preempt batches processed thus far
87 * for debug and statistics.
88 */
89long rcu_batches_completed_preempt(void)
90{
91	return rcu_preempt_state.completed;
92}
93EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
94
95/*
96 * Return the number of RCU batches processed thus far for debug & stats.
97 */
98long rcu_batches_completed(void)
99{
100	return rcu_batches_completed_preempt();
101}
102EXPORT_SYMBOL_GPL(rcu_batches_completed);
103
104/*
105 * Force a quiescent state for preemptible RCU.
106 */
107void rcu_force_quiescent_state(void)
108{
109	force_quiescent_state(&rcu_preempt_state, 0);
110}
111EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
112
113/*
114 * Record a preemptable-RCU quiescent state for the specified CPU.  Note
115 * that this just means that the task currently running on the CPU is
116 * not in a quiescent state.  There might be any number of tasks blocked
117 * while in an RCU read-side critical section.
118 *
119 * Unlike the other rcu_*_qs() functions, callers to this function
120 * must disable irqs in order to protect the assignment to
121 * ->rcu_read_unlock_special.
122 */
123static void rcu_preempt_qs(int cpu)
124{
125	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
126
127	rdp->passed_quiesc_completed = rdp->gpnum - 1;
128	barrier();
129	rdp->passed_quiesc = 1;
130	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
131}
132
133/*
134 * We have entered the scheduler, and the current task might soon be
135 * context-switched away from.  If this task is in an RCU read-side
136 * critical section, we will no longer be able to rely on the CPU to
137 * record that fact, so we enqueue the task on the appropriate entry
138 * of the blocked_tasks[] array.  The task will dequeue itself when
139 * it exits the outermost enclosing RCU read-side critical section.
140 * Therefore, the current grace period cannot be permitted to complete
141 * until the blocked_tasks[] entry indexed by the low-order bit of
142 * rnp->gpnum empties.
143 *
144 * Caller must disable preemption.
145 */
146static void rcu_preempt_note_context_switch(int cpu)
147{
148	struct task_struct *t = current;
149	unsigned long flags;
150	int phase;
151	struct rcu_data *rdp;
152	struct rcu_node *rnp;
153
154	if (t->rcu_read_lock_nesting &&
155	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
156
157		/* Possibly blocking in an RCU read-side critical section. */
158		rdp = rcu_preempt_state.rda[cpu];
159		rnp = rdp->mynode;
160		raw_spin_lock_irqsave(&rnp->lock, flags);
161		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
162		t->rcu_blocked_node = rnp;
163
164		/*
165		 * If this CPU has already checked in, then this task
166		 * will hold up the next grace period rather than the
167		 * current grace period.  Queue the task accordingly.
168		 * If the task is queued for the current grace period
169		 * (i.e., this CPU has not yet passed through a quiescent
170		 * state for the current grace period), then as long
171		 * as that task remains queued, the current grace period
172		 * cannot end.
173		 *
174		 * But first, note that the current CPU must still be
175		 * on line!
176		 */
177		WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
178		WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
179		phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
180		list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
181		raw_spin_unlock_irqrestore(&rnp->lock, flags);
182	}
183
184	/*
185	 * Either we were not in an RCU read-side critical section to
186	 * begin with, or we have now recorded that critical section
187	 * globally.  Either way, we can now note a quiescent state
188	 * for this CPU.  Again, if we were in an RCU read-side critical
189	 * section, and if that critical section was blocking the current
190	 * grace period, then the fact that the task has been enqueued
191	 * means that we continue to block the current grace period.
192	 */
193	local_irq_save(flags);
194	rcu_preempt_qs(cpu);
195	local_irq_restore(flags);
196}
197
198/*
199 * Tree-preemptable RCU implementation for rcu_read_lock().
200 * Just increment ->rcu_read_lock_nesting, shared state will be updated
201 * if we block.
202 */
203void __rcu_read_lock(void)
204{
205	ACCESS_ONCE(current->rcu_read_lock_nesting)++;
206	barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
207}
208EXPORT_SYMBOL_GPL(__rcu_read_lock);
209
210/*
211 * Check for preempted RCU readers blocking the current grace period
212 * for the specified rcu_node structure.  If the caller needs a reliable
213 * answer, it must hold the rcu_node's ->lock.
214 */
215static int rcu_preempted_readers(struct rcu_node *rnp)
216{
217	int phase = rnp->gpnum & 0x1;
218
219	return !list_empty(&rnp->blocked_tasks[phase]) ||
220	       !list_empty(&rnp->blocked_tasks[phase + 2]);
221}
222
223/*
224 * Record a quiescent state for all tasks that were previously queued
225 * on the specified rcu_node structure and that were blocking the current
226 * RCU grace period.  The caller must hold the specified rnp->lock with
227 * irqs disabled, and this lock is released upon return, but irqs remain
228 * disabled.
229 */
230static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
231	__releases(rnp->lock)
232{
233	unsigned long mask;
234	struct rcu_node *rnp_p;
235
236	if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
237		raw_spin_unlock_irqrestore(&rnp->lock, flags);
238		return;  /* Still need more quiescent states! */
239	}
240
241	rnp_p = rnp->parent;
242	if (rnp_p == NULL) {
243		/*
244		 * Either there is only one rcu_node in the tree,
245		 * or tasks were kicked up to root rcu_node due to
246		 * CPUs going offline.
247		 */
248		rcu_report_qs_rsp(&rcu_preempt_state, flags);
249		return;
250	}
251
252	/* Report up the rest of the hierarchy. */
253	mask = rnp->grpmask;
254	raw_spin_unlock(&rnp->lock);	/* irqs remain disabled. */
255	raw_spin_lock(&rnp_p->lock);	/* irqs already disabled. */
256	rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags);
257}
258
259/*
260 * Handle special cases during rcu_read_unlock(), such as needing to
261 * notify RCU core processing or task having blocked during the RCU
262 * read-side critical section.
263 */
264static void rcu_read_unlock_special(struct task_struct *t)
265{
266	int empty;
267	int empty_exp;
268	unsigned long flags;
269	struct rcu_node *rnp;
270	int special;
271
272	/* NMI handlers cannot block and cannot safely manipulate state. */
273	if (in_nmi())
274		return;
275
276	local_irq_save(flags);
277
278	/*
279	 * If RCU core is waiting for this CPU to exit critical section,
280	 * let it know that we have done so.
281	 */
282	special = t->rcu_read_unlock_special;
283	if (special & RCU_READ_UNLOCK_NEED_QS) {
284		rcu_preempt_qs(smp_processor_id());
285	}
286
287	/* Hardware IRQ handlers cannot block. */
288	if (in_irq()) {
289		local_irq_restore(flags);
290		return;
291	}
292
293	/* Clean up if blocked during RCU read-side critical section. */
294	if (special & RCU_READ_UNLOCK_BLOCKED) {
295		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
296
297		/*
298		 * Remove this task from the list it blocked on.  The
299		 * task can migrate while we acquire the lock, but at
300		 * most one time.  So at most two passes through loop.
301		 */
302		for (;;) {
303			rnp = t->rcu_blocked_node;
304			raw_spin_lock(&rnp->lock);  /* irqs already disabled. */
305			if (rnp == t->rcu_blocked_node)
306				break;
307			raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
308		}
309		empty = !rcu_preempted_readers(rnp);
310		empty_exp = !rcu_preempted_readers_exp(rnp);
311		smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
312		list_del_init(&t->rcu_node_entry);
313		t->rcu_blocked_node = NULL;
314
315		/*
316		 * If this was the last task on the current list, and if
317		 * we aren't waiting on any CPUs, report the quiescent state.
318		 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
319		 */
320		if (empty)
321			raw_spin_unlock_irqrestore(&rnp->lock, flags);
322		else
323			rcu_report_unblock_qs_rnp(rnp, flags);
324
325		/*
326		 * If this was the last task on the expedited lists,
327		 * then we need to report up the rcu_node hierarchy.
328		 */
329		if (!empty_exp && !rcu_preempted_readers_exp(rnp))
330			rcu_report_exp_rnp(&rcu_preempt_state, rnp);
331	} else {
332		local_irq_restore(flags);
333	}
334}
335
336/*
337 * Tree-preemptable RCU implementation for rcu_read_unlock().
338 * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
339 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
340 * invoke rcu_read_unlock_special() to clean up after a context switch
341 * in an RCU read-side critical section and other special cases.
342 */
343void __rcu_read_unlock(void)
344{
345	struct task_struct *t = current;
346
347	barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
348	if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
349	    unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
350		rcu_read_unlock_special(t);
351#ifdef CONFIG_PROVE_LOCKING
352	WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0);
353#endif /* #ifdef CONFIG_PROVE_LOCKING */
354}
355EXPORT_SYMBOL_GPL(__rcu_read_unlock);
356
357#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
358
359#ifdef CONFIG_RCU_CPU_STALL_VERBOSE
360
361/*
362 * Dump detailed information for all tasks blocking the current RCU
363 * grace period on the specified rcu_node structure.
364 */
365static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
366{
367	unsigned long flags;
368	struct list_head *lp;
369	int phase;
370	struct task_struct *t;
371
372	if (rcu_preempted_readers(rnp)) {
373		raw_spin_lock_irqsave(&rnp->lock, flags);
374		phase = rnp->gpnum & 0x1;
375		lp = &rnp->blocked_tasks[phase];
376		list_for_each_entry(t, lp, rcu_node_entry)
377			sched_show_task(t);
378		raw_spin_unlock_irqrestore(&rnp->lock, flags);
379	}
380}
381
382/*
383 * Dump detailed information for all tasks blocking the current RCU
384 * grace period.
385 */
386static void rcu_print_detail_task_stall(struct rcu_state *rsp)
387{
388	struct rcu_node *rnp = rcu_get_root(rsp);
389
390	rcu_print_detail_task_stall_rnp(rnp);
391	rcu_for_each_leaf_node(rsp, rnp)
392		rcu_print_detail_task_stall_rnp(rnp);
393}
394
395#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
396
397static void rcu_print_detail_task_stall(struct rcu_state *rsp)
398{
399}
400
401#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */
402
403/*
404 * Scan the current list of tasks blocked within RCU read-side critical
405 * sections, printing out the tid of each.
406 */
407static void rcu_print_task_stall(struct rcu_node *rnp)
408{
409	struct list_head *lp;
410	int phase;
411	struct task_struct *t;
412
413	if (rcu_preempted_readers(rnp)) {
414		phase = rnp->gpnum & 0x1;
415		lp = &rnp->blocked_tasks[phase];
416		list_for_each_entry(t, lp, rcu_node_entry)
417			printk(" P%d", t->pid);
418	}
419}
420
421#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
422
423/*
424 * Check that the list of blocked tasks for the newly completed grace
425 * period is in fact empty.  It is a serious bug to complete a grace
426 * period that still has RCU readers blocked!  This function must be
427 * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
428 * must be held by the caller.
429 */
430static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
431{
432	WARN_ON_ONCE(rcu_preempted_readers(rnp));
433	WARN_ON_ONCE(rnp->qsmask);
434}
435
436#ifdef CONFIG_HOTPLUG_CPU
437
438/*
439 * Handle tasklist migration for case in which all CPUs covered by the
440 * specified rcu_node have gone offline.  Move them up to the root
441 * rcu_node.  The reason for not just moving them to the immediate
442 * parent is to remove the need for rcu_read_unlock_special() to
443 * make more than two attempts to acquire the target rcu_node's lock.
444 * Returns true if there were tasks blocking the current RCU grace
445 * period.
446 *
447 * Returns 1 if there was previously a task blocking the current grace
448 * period on the specified rcu_node structure.
449 *
450 * The caller must hold rnp->lock with irqs disabled.
451 */
452static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
453				     struct rcu_node *rnp,
454				     struct rcu_data *rdp)
455{
456	int i;
457	struct list_head *lp;
458	struct list_head *lp_root;
459	int retval = 0;
460	struct rcu_node *rnp_root = rcu_get_root(rsp);
461	struct task_struct *tp;
462
463	if (rnp == rnp_root) {
464		WARN_ONCE(1, "Last CPU thought to be offlined?");
465		return 0;  /* Shouldn't happen: at least one CPU online. */
466	}
467	WARN_ON_ONCE(rnp != rdp->mynode &&
468		     (!list_empty(&rnp->blocked_tasks[0]) ||
469		      !list_empty(&rnp->blocked_tasks[1]) ||
470		      !list_empty(&rnp->blocked_tasks[2]) ||
471		      !list_empty(&rnp->blocked_tasks[3])));
472
473	/*
474	 * Move tasks up to root rcu_node.  Rely on the fact that the
475	 * root rcu_node can be at most one ahead of the rest of the
476	 * rcu_nodes in terms of gp_num value.  This fact allows us to
477	 * move the blocked_tasks[] array directly, element by element.
478	 */
479	if (rcu_preempted_readers(rnp))
480		retval |= RCU_OFL_TASKS_NORM_GP;
481	if (rcu_preempted_readers_exp(rnp))
482		retval |= RCU_OFL_TASKS_EXP_GP;
483	for (i = 0; i < 4; i++) {
484		lp = &rnp->blocked_tasks[i];
485		lp_root = &rnp_root->blocked_tasks[i];
486		while (!list_empty(lp)) {
487			tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
488			raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
489			list_del(&tp->rcu_node_entry);
490			tp->rcu_blocked_node = rnp_root;
491			list_add(&tp->rcu_node_entry, lp_root);
492			raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */
493		}
494	}
495	return retval;
496}
497
498/*
499 * Do CPU-offline processing for preemptable RCU.
500 */
501static void rcu_preempt_offline_cpu(int cpu)
502{
503	__rcu_offline_cpu(cpu, &rcu_preempt_state);
504}
505
506#endif /* #ifdef CONFIG_HOTPLUG_CPU */
507
508/*
509 * Check for a quiescent state from the current CPU.  When a task blocks,
510 * the task is recorded in the corresponding CPU's rcu_node structure,
511 * which is checked elsewhere.
512 *
513 * Caller must disable hard irqs.
514 */
515static void rcu_preempt_check_callbacks(int cpu)
516{
517	struct task_struct *t = current;
518
519	if (t->rcu_read_lock_nesting == 0) {
520		rcu_preempt_qs(cpu);
521		return;
522	}
523	if (per_cpu(rcu_preempt_data, cpu).qs_pending)
524		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
525}
526
527/*
528 * Process callbacks for preemptable RCU.
529 */
530static void rcu_preempt_process_callbacks(void)
531{
532	__rcu_process_callbacks(&rcu_preempt_state,
533				&__get_cpu_var(rcu_preempt_data));
534}
535
536/*
537 * Queue a preemptable-RCU callback for invocation after a grace period.
538 */
539void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
540{
541	__call_rcu(head, func, &rcu_preempt_state);
542}
543EXPORT_SYMBOL_GPL(call_rcu);
544
545/**
546 * synchronize_rcu - wait until a grace period has elapsed.
547 *
548 * Control will return to the caller some time after a full grace
549 * period has elapsed, in other words after all currently executing RCU
550 * read-side critical sections have completed.  RCU read-side critical
551 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
552 * and may be nested.
553 */
554void synchronize_rcu(void)
555{
556	struct rcu_synchronize rcu;
557
558	if (!rcu_scheduler_active)
559		return;
560
561	init_rcu_head_on_stack(&rcu.head);
562	init_completion(&rcu.completion);
563	/* Will wake me after RCU finished. */
564	call_rcu(&rcu.head, wakeme_after_rcu);
565	/* Wait for it. */
566	wait_for_completion(&rcu.completion);
567	destroy_rcu_head_on_stack(&rcu.head);
568}
569EXPORT_SYMBOL_GPL(synchronize_rcu);
570
571static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
572static long sync_rcu_preempt_exp_count;
573static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
574
575/*
576 * Return non-zero if there are any tasks in RCU read-side critical
577 * sections blocking the current preemptible-RCU expedited grace period.
578 * If there is no preemptible-RCU expedited grace period currently in
579 * progress, returns zero unconditionally.
580 */
581static int rcu_preempted_readers_exp(struct rcu_node *rnp)
582{
583	return !list_empty(&rnp->blocked_tasks[2]) ||
584	       !list_empty(&rnp->blocked_tasks[3]);
585}
586
587/*
588 * return non-zero if there is no RCU expedited grace period in progress
589 * for the specified rcu_node structure, in other words, if all CPUs and
590 * tasks covered by the specified rcu_node structure have done their bit
591 * for the current expedited grace period.  Works only for preemptible
592 * RCU -- other RCU implementation use other means.
593 *
594 * Caller must hold sync_rcu_preempt_exp_mutex.
595 */
596static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
597{
598	return !rcu_preempted_readers_exp(rnp) &&
599	       ACCESS_ONCE(rnp->expmask) == 0;
600}
601
602/*
603 * Report the exit from RCU read-side critical section for the last task
604 * that queued itself during or before the current expedited preemptible-RCU
605 * grace period.  This event is reported either to the rcu_node structure on
606 * which the task was queued or to one of that rcu_node structure's ancestors,
607 * recursively up the tree.  (Calm down, calm down, we do the recursion
608 * iteratively!)
609 *
610 * Caller must hold sync_rcu_preempt_exp_mutex.
611 */
612static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
613{
614	unsigned long flags;
615	unsigned long mask;
616
617	raw_spin_lock_irqsave(&rnp->lock, flags);
618	for (;;) {
619		if (!sync_rcu_preempt_exp_done(rnp))
620			break;
621		if (rnp->parent == NULL) {
622			wake_up(&sync_rcu_preempt_exp_wq);
623			break;
624		}
625		mask = rnp->grpmask;
626		raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
627		rnp = rnp->parent;
628		raw_spin_lock(&rnp->lock); /* irqs already disabled */
629		rnp->expmask &= ~mask;
630	}
631	raw_spin_unlock_irqrestore(&rnp->lock, flags);
632}
633
634/*
635 * Snapshot the tasks blocking the newly started preemptible-RCU expedited
636 * grace period for the specified rcu_node structure.  If there are no such
637 * tasks, report it up the rcu_node hierarchy.
638 *
639 * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock.
640 */
641static void
642sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
643{
644	int must_wait;
645
646	raw_spin_lock(&rnp->lock); /* irqs already disabled */
647	list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]);
648	list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]);
649	must_wait = rcu_preempted_readers_exp(rnp);
650	raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
651	if (!must_wait)
652		rcu_report_exp_rnp(rsp, rnp);
653}
654
655/*
656 * Wait for an rcu-preempt grace period, but expedite it.  The basic idea
657 * is to invoke synchronize_sched_expedited() to push all the tasks to
658 * the ->blocked_tasks[] lists, move all entries from the first set of
659 * ->blocked_tasks[] lists to the second set, and finally wait for this
660 * second set to drain.
661 */
662void synchronize_rcu_expedited(void)
663{
664	unsigned long flags;
665	struct rcu_node *rnp;
666	struct rcu_state *rsp = &rcu_preempt_state;
667	long snap;
668	int trycount = 0;
669
670	smp_mb(); /* Caller's modifications seen first by other CPUs. */
671	snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1;
672	smp_mb(); /* Above access cannot bleed into critical section. */
673
674	/*
675	 * Acquire lock, falling back to synchronize_rcu() if too many
676	 * lock-acquisition failures.  Of course, if someone does the
677	 * expedited grace period for us, just leave.
678	 */
679	while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) {
680		if (trycount++ < 10)
681			udelay(trycount * num_online_cpus());
682		else {
683			synchronize_rcu();
684			return;
685		}
686		if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
687			goto mb_ret; /* Others did our work for us. */
688	}
689	if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0)
690		goto unlock_mb_ret; /* Others did our work for us. */
691
692	/* force all RCU readers onto blocked_tasks[]. */
693	synchronize_sched_expedited();
694
695	raw_spin_lock_irqsave(&rsp->onofflock, flags);
696
697	/* Initialize ->expmask for all non-leaf rcu_node structures. */
698	rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
699		raw_spin_lock(&rnp->lock); /* irqs already disabled. */
700		rnp->expmask = rnp->qsmaskinit;
701		raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
702	}
703
704	/* Snapshot current state of ->blocked_tasks[] lists. */
705	rcu_for_each_leaf_node(rsp, rnp)
706		sync_rcu_preempt_exp_init(rsp, rnp);
707	if (NUM_RCU_NODES > 1)
708		sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp));
709
710	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
711
712	/* Wait for snapshotted ->blocked_tasks[] lists to drain. */
713	rnp = rcu_get_root(rsp);
714	wait_event(sync_rcu_preempt_exp_wq,
715		   sync_rcu_preempt_exp_done(rnp));
716
717	/* Clean up and exit. */
718	smp_mb(); /* ensure expedited GP seen before counter increment. */
719	ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
720unlock_mb_ret:
721	mutex_unlock(&sync_rcu_preempt_exp_mutex);
722mb_ret:
723	smp_mb(); /* ensure subsequent action seen after grace period. */
724}
725EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
726
727/*
728 * Check to see if there is any immediate preemptable-RCU-related work
729 * to be done.
730 */
731static int rcu_preempt_pending(int cpu)
732{
733	return __rcu_pending(&rcu_preempt_state,
734			     &per_cpu(rcu_preempt_data, cpu));
735}
736
737/*
738 * Does preemptable RCU need the CPU to stay out of dynticks mode?
739 */
740static int rcu_preempt_needs_cpu(int cpu)
741{
742	return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
743}
744
745/**
746 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
747 */
748void rcu_barrier(void)
749{
750	_rcu_barrier(&rcu_preempt_state, call_rcu);
751}
752EXPORT_SYMBOL_GPL(rcu_barrier);
753
754/*
755 * Initialize preemptable RCU's per-CPU data.
756 */
757static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
758{
759	rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
760}
761
762/*
763 * Move preemptable RCU's callbacks to ->orphan_cbs_list.
764 */
765static void rcu_preempt_send_cbs_to_orphanage(void)
766{
767	rcu_send_cbs_to_orphanage(&rcu_preempt_state);
768}
769
770/*
771 * Initialize preemptable RCU's state structures.
772 */
773static void __init __rcu_init_preempt(void)
774{
775	RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
776}
777
778/*
779 * Check for a task exiting while in a preemptable-RCU read-side
780 * critical section, clean up if so.  No need to issue warnings,
781 * as debug_check_no_locks_held() already does this if lockdep
782 * is enabled.
783 */
784void exit_rcu(void)
785{
786	struct task_struct *t = current;
787
788	if (t->rcu_read_lock_nesting == 0)
789		return;
790	t->rcu_read_lock_nesting = 1;
791	rcu_read_unlock();
792}
793
794#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
795
796/*
797 * Tell them what RCU they are running.
798 */
799static void __init rcu_bootup_announce(void)
800{
801	printk(KERN_INFO "Hierarchical RCU implementation.\n");
802	rcu_bootup_announce_oddness();
803}
804
805/*
806 * Return the number of RCU batches processed thus far for debug & stats.
807 */
808long rcu_batches_completed(void)
809{
810	return rcu_batches_completed_sched();
811}
812EXPORT_SYMBOL_GPL(rcu_batches_completed);
813
814/*
815 * Force a quiescent state for RCU, which, because there is no preemptible
816 * RCU, becomes the same as rcu-sched.
817 */
818void rcu_force_quiescent_state(void)
819{
820	rcu_sched_force_quiescent_state();
821}
822EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
823
824/*
825 * Because preemptable RCU does not exist, we never have to check for
826 * CPUs being in quiescent states.
827 */
828static void rcu_preempt_note_context_switch(int cpu)
829{
830}
831
832/*
833 * Because preemptable RCU does not exist, there are never any preempted
834 * RCU readers.
835 */
836static int rcu_preempted_readers(struct rcu_node *rnp)
837{
838	return 0;
839}
840
841#ifdef CONFIG_HOTPLUG_CPU
842
843/* Because preemptible RCU does not exist, no quieting of tasks. */
844static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
845{
846	raw_spin_unlock_irqrestore(&rnp->lock, flags);
847}
848
849#endif /* #ifdef CONFIG_HOTPLUG_CPU */
850
851#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
852
853/*
854 * Because preemptable RCU does not exist, we never have to check for
855 * tasks blocked within RCU read-side critical sections.
856 */
857static void rcu_print_detail_task_stall(struct rcu_state *rsp)
858{
859}
860
861/*
862 * Because preemptable RCU does not exist, we never have to check for
863 * tasks blocked within RCU read-side critical sections.
864 */
865static void rcu_print_task_stall(struct rcu_node *rnp)
866{
867}
868
869#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
870
871/*
872 * Because there is no preemptable RCU, there can be no readers blocked,
873 * so there is no need to check for blocked tasks.  So check only for
874 * bogus qsmask values.
875 */
876static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
877{
878	WARN_ON_ONCE(rnp->qsmask);
879}
880
881#ifdef CONFIG_HOTPLUG_CPU
882
883/*
884 * Because preemptable RCU does not exist, it never needs to migrate
885 * tasks that were blocked within RCU read-side critical sections, and
886 * such non-existent tasks cannot possibly have been blocking the current
887 * grace period.
888 */
889static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
890				     struct rcu_node *rnp,
891				     struct rcu_data *rdp)
892{
893	return 0;
894}
895
896/*
897 * Because preemptable RCU does not exist, it never needs CPU-offline
898 * processing.
899 */
900static void rcu_preempt_offline_cpu(int cpu)
901{
902}
903
904#endif /* #ifdef CONFIG_HOTPLUG_CPU */
905
906/*
907 * Because preemptable RCU does not exist, it never has any callbacks
908 * to check.
909 */
910static void rcu_preempt_check_callbacks(int cpu)
911{
912}
913
914/*
915 * Because preemptable RCU does not exist, it never has any callbacks
916 * to process.
917 */
918static void rcu_preempt_process_callbacks(void)
919{
920}
921
922/*
923 * In classic RCU, call_rcu() is just call_rcu_sched().
924 */
925void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
926{
927	call_rcu_sched(head, func);
928}
929EXPORT_SYMBOL_GPL(call_rcu);
930
931/*
932 * Wait for an rcu-preempt grace period, but make it happen quickly.
933 * But because preemptable RCU does not exist, map to rcu-sched.
934 */
935void synchronize_rcu_expedited(void)
936{
937	synchronize_sched_expedited();
938}
939EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
940
941#ifdef CONFIG_HOTPLUG_CPU
942
943/*
944 * Because preemptable RCU does not exist, there is never any need to
945 * report on tasks preempted in RCU read-side critical sections during
946 * expedited RCU grace periods.
947 */
948static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
949{
950	return;
951}
952
953#endif /* #ifdef CONFIG_HOTPLUG_CPU */
954
955/*
956 * Because preemptable RCU does not exist, it never has any work to do.
957 */
958static int rcu_preempt_pending(int cpu)
959{
960	return 0;
961}
962
963/*
964 * Because preemptable RCU does not exist, it never needs any CPU.
965 */
966static int rcu_preempt_needs_cpu(int cpu)
967{
968	return 0;
969}
970
971/*
972 * Because preemptable RCU does not exist, rcu_barrier() is just
973 * another name for rcu_barrier_sched().
974 */
975void rcu_barrier(void)
976{
977	rcu_barrier_sched();
978}
979EXPORT_SYMBOL_GPL(rcu_barrier);
980
981/*
982 * Because preemptable RCU does not exist, there is no per-CPU
983 * data to initialize.
984 */
985static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
986{
987}
988
989/*
990 * Because there is no preemptable RCU, there are no callbacks to move.
991 */
992static void rcu_preempt_send_cbs_to_orphanage(void)
993{
994}
995
996/*
997 * Because preemptable RCU does not exist, it need not be initialized.
998 */
999static void __init __rcu_init_preempt(void)
1000{
1001}
1002
1003#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1004
1005#if !defined(CONFIG_RCU_FAST_NO_HZ)
1006
1007/*
1008 * Check to see if any future RCU-related work will need to be done
1009 * by the current CPU, even if none need be done immediately, returning
1010 * 1 if so.  This function is part of the RCU implementation; it is -not-
1011 * an exported member of the RCU API.
1012 *
1013 * Because we have preemptible RCU, just check whether this CPU needs
1014 * any flavor of RCU.  Do not chew up lots of CPU cycles with preemption
1015 * disabled in a most-likely vain attempt to cause RCU not to need this CPU.
1016 */
1017int rcu_needs_cpu(int cpu)
1018{
1019	return rcu_needs_cpu_quick_check(cpu);
1020}
1021
1022/*
1023 * Check to see if we need to continue a callback-flush operations to
1024 * allow the last CPU to enter dyntick-idle mode.  But fast dyntick-idle
1025 * entry is not configured, so we never do need to.
1026 */
1027static void rcu_needs_cpu_flush(void)
1028{
1029}
1030
1031#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1032
1033#define RCU_NEEDS_CPU_FLUSHES 5
1034static DEFINE_PER_CPU(int, rcu_dyntick_drain);
1035static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
1036
1037/*
1038 * Check to see if any future RCU-related work will need to be done
1039 * by the current CPU, even if none need be done immediately, returning
1040 * 1 if so.  This function is part of the RCU implementation; it is -not-
1041 * an exported member of the RCU API.
1042 *
1043 * Because we are not supporting preemptible RCU, attempt to accelerate
1044 * any current grace periods so that RCU no longer needs this CPU, but
1045 * only if all other CPUs are already in dynticks-idle mode.  This will
1046 * allow the CPU cores to be powered down immediately, as opposed to after
1047 * waiting many milliseconds for grace periods to elapse.
1048 *
1049 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1050 * disabled, we do one pass of force_quiescent_state(), then do a
1051 * raise_softirq() to cause rcu_process_callbacks() to be invoked later.
1052 * The per-cpu rcu_dyntick_drain variable controls the sequencing.
1053 */
1054int rcu_needs_cpu(int cpu)
1055{
1056	int c = 0;
1057	int snap;
1058	int snap_nmi;
1059	int thatcpu;
1060
1061	/* Check for being in the holdoff period. */
1062	if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies)
1063		return rcu_needs_cpu_quick_check(cpu);
1064
1065	/* Don't bother unless we are the last non-dyntick-idle CPU. */
1066	for_each_online_cpu(thatcpu) {
1067		if (thatcpu == cpu)
1068			continue;
1069		snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
1070		snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
1071		smp_mb(); /* Order sampling of snap with end of grace period. */
1072		if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
1073			per_cpu(rcu_dyntick_drain, cpu) = 0;
1074			per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
1075			return rcu_needs_cpu_quick_check(cpu);
1076		}
1077	}
1078
1079	/* Check and update the rcu_dyntick_drain sequencing. */
1080	if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1081		/* First time through, initialize the counter. */
1082		per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES;
1083	} else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) {
1084		/* We have hit the limit, so time to give up. */
1085		per_cpu(rcu_dyntick_holdoff, cpu) = jiffies;
1086		return rcu_needs_cpu_quick_check(cpu);
1087	}
1088
1089	/* Do one step pushing remaining RCU callbacks through. */
1090	if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1091		rcu_sched_qs(cpu);
1092		force_quiescent_state(&rcu_sched_state, 0);
1093		c = c || per_cpu(rcu_sched_data, cpu).nxtlist;
1094	}
1095	if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1096		rcu_bh_qs(cpu);
1097		force_quiescent_state(&rcu_bh_state, 0);
1098		c = c || per_cpu(rcu_bh_data, cpu).nxtlist;
1099	}
1100
1101	/* If RCU callbacks are still pending, RCU still needs this CPU. */
1102	if (c)
1103		raise_softirq(RCU_SOFTIRQ);
1104	return c;
1105}
1106
1107/*
1108 * Check to see if we need to continue a callback-flush operations to
1109 * allow the last CPU to enter dyntick-idle mode.
1110 */
1111static void rcu_needs_cpu_flush(void)
1112{
1113	int cpu = smp_processor_id();
1114	unsigned long flags;
1115
1116	if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
1117		return;
1118	local_irq_save(flags);
1119	(void)rcu_needs_cpu(cpu);
1120	local_irq_restore(flags);
1121}
1122
1123#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1124