• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6/net/sunrpc/
1/*
2 * linux/net/sunrpc/svc.c
3 *
4 * High-level RPC service routines
5 *
6 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
7 *
8 * Multiple threads pools and NUMAisation
9 * Copyright (c) 2006 Silicon Graphics, Inc.
10 * by Greg Banks <gnb@melbourne.sgi.com>
11 */
12
13#include <linux/linkage.h>
14#include <linux/sched.h>
15#include <linux/errno.h>
16#include <linux/net.h>
17#include <linux/in.h>
18#include <linux/mm.h>
19#include <linux/interrupt.h>
20#include <linux/module.h>
21#include <linux/kthread.h>
22#include <linux/slab.h>
23
24#include <linux/sunrpc/types.h>
25#include <linux/sunrpc/xdr.h>
26#include <linux/sunrpc/stats.h>
27#include <linux/sunrpc/svcsock.h>
28#include <linux/sunrpc/clnt.h>
29#include <linux/sunrpc/bc_xprt.h>
30
31#define RPCDBG_FACILITY	RPCDBG_SVCDSP
32
33static void svc_unregister(const struct svc_serv *serv);
34
35#define svc_serv_is_pooled(serv)    ((serv)->sv_function)
36
37/*
38 * Mode for mapping cpus to pools.
39 */
40enum {
41	SVC_POOL_AUTO = -1,	/* choose one of the others */
42	SVC_POOL_GLOBAL,	/* no mapping, just a single global pool
43				 * (legacy & UP mode) */
44	SVC_POOL_PERCPU,	/* one pool per cpu */
45	SVC_POOL_PERNODE	/* one pool per numa node */
46};
47#define SVC_POOL_DEFAULT	SVC_POOL_GLOBAL
48
49/*
50 * Structure for mapping cpus to pools and vice versa.
51 * Setup once during sunrpc initialisation.
52 */
53static struct svc_pool_map {
54	int count;			/* How many svc_servs use us */
55	int mode;			/* Note: int not enum to avoid
56					 * warnings about "enumeration value
57					 * not handled in switch" */
58	unsigned int npools;
59	unsigned int *pool_to;		/* maps pool id to cpu or node */
60	unsigned int *to_pool;		/* maps cpu or node to pool id */
61} svc_pool_map = {
62	.count = 0,
63	.mode = SVC_POOL_DEFAULT
64};
65static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
66
67static int
68param_set_pool_mode(const char *val, struct kernel_param *kp)
69{
70	int *ip = (int *)kp->arg;
71	struct svc_pool_map *m = &svc_pool_map;
72	int err;
73
74	mutex_lock(&svc_pool_map_mutex);
75
76	err = -EBUSY;
77	if (m->count)
78		goto out;
79
80	err = 0;
81	if (!strncmp(val, "auto", 4))
82		*ip = SVC_POOL_AUTO;
83	else if (!strncmp(val, "global", 6))
84		*ip = SVC_POOL_GLOBAL;
85	else if (!strncmp(val, "percpu", 6))
86		*ip = SVC_POOL_PERCPU;
87	else if (!strncmp(val, "pernode", 7))
88		*ip = SVC_POOL_PERNODE;
89	else
90		err = -EINVAL;
91
92out:
93	mutex_unlock(&svc_pool_map_mutex);
94	return err;
95}
96
97static int
98param_get_pool_mode(char *buf, struct kernel_param *kp)
99{
100	int *ip = (int *)kp->arg;
101
102	switch (*ip)
103	{
104	case SVC_POOL_AUTO:
105		return strlcpy(buf, "auto", 20);
106	case SVC_POOL_GLOBAL:
107		return strlcpy(buf, "global", 20);
108	case SVC_POOL_PERCPU:
109		return strlcpy(buf, "percpu", 20);
110	case SVC_POOL_PERNODE:
111		return strlcpy(buf, "pernode", 20);
112	default:
113		return sprintf(buf, "%d", *ip);
114	}
115}
116
117module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
118		 &svc_pool_map.mode, 0644);
119
120/*
121 * Detect best pool mapping mode heuristically,
122 * according to the machine's topology.
123 */
124static int
125svc_pool_map_choose_mode(void)
126{
127	unsigned int node;
128
129	if (nr_online_nodes > 1) {
130		/*
131		 * Actually have multiple NUMA nodes,
132		 * so split pools on NUMA node boundaries
133		 */
134		return SVC_POOL_PERNODE;
135	}
136
137	node = first_online_node;
138	if (nr_cpus_node(node) > 2) {
139		/*
140		 * Non-trivial SMP, or CONFIG_NUMA on
141		 * non-NUMA hardware, e.g. with a generic
142		 * x86_64 kernel on Xeons.  In this case we
143		 * want to divide the pools on cpu boundaries.
144		 */
145		return SVC_POOL_PERCPU;
146	}
147
148	/* default: one global pool */
149	return SVC_POOL_GLOBAL;
150}
151
152/*
153 * Allocate the to_pool[] and pool_to[] arrays.
154 * Returns 0 on success or an errno.
155 */
156static int
157svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
158{
159	m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
160	if (!m->to_pool)
161		goto fail;
162	m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
163	if (!m->pool_to)
164		goto fail_free;
165
166	return 0;
167
168fail_free:
169	kfree(m->to_pool);
170fail:
171	return -ENOMEM;
172}
173
174/*
175 * Initialise the pool map for SVC_POOL_PERCPU mode.
176 * Returns number of pools or <0 on error.
177 */
178static int
179svc_pool_map_init_percpu(struct svc_pool_map *m)
180{
181	unsigned int maxpools = nr_cpu_ids;
182	unsigned int pidx = 0;
183	unsigned int cpu;
184	int err;
185
186	err = svc_pool_map_alloc_arrays(m, maxpools);
187	if (err)
188		return err;
189
190	for_each_online_cpu(cpu) {
191		BUG_ON(pidx > maxpools);
192		m->to_pool[cpu] = pidx;
193		m->pool_to[pidx] = cpu;
194		pidx++;
195	}
196	/* cpus brought online later all get mapped to pool0, sorry */
197
198	return pidx;
199};
200
201
202/*
203 * Initialise the pool map for SVC_POOL_PERNODE mode.
204 * Returns number of pools or <0 on error.
205 */
206static int
207svc_pool_map_init_pernode(struct svc_pool_map *m)
208{
209	unsigned int maxpools = nr_node_ids;
210	unsigned int pidx = 0;
211	unsigned int node;
212	int err;
213
214	err = svc_pool_map_alloc_arrays(m, maxpools);
215	if (err)
216		return err;
217
218	for_each_node_with_cpus(node) {
219		/* some architectures (e.g. SN2) have cpuless nodes */
220		BUG_ON(pidx > maxpools);
221		m->to_pool[node] = pidx;
222		m->pool_to[pidx] = node;
223		pidx++;
224	}
225	/* nodes brought online later all get mapped to pool0, sorry */
226
227	return pidx;
228}
229
230
231/*
232 * Add a reference to the global map of cpus to pools (and
233 * vice versa).  Initialise the map if we're the first user.
234 * Returns the number of pools.
235 */
236static unsigned int
237svc_pool_map_get(void)
238{
239	struct svc_pool_map *m = &svc_pool_map;
240	int npools = -1;
241
242	mutex_lock(&svc_pool_map_mutex);
243
244	if (m->count++) {
245		mutex_unlock(&svc_pool_map_mutex);
246		return m->npools;
247	}
248
249	if (m->mode == SVC_POOL_AUTO)
250		m->mode = svc_pool_map_choose_mode();
251
252	switch (m->mode) {
253	case SVC_POOL_PERCPU:
254		npools = svc_pool_map_init_percpu(m);
255		break;
256	case SVC_POOL_PERNODE:
257		npools = svc_pool_map_init_pernode(m);
258		break;
259	}
260
261	if (npools < 0) {
262		/* default, or memory allocation failure */
263		npools = 1;
264		m->mode = SVC_POOL_GLOBAL;
265	}
266	m->npools = npools;
267
268	mutex_unlock(&svc_pool_map_mutex);
269	return m->npools;
270}
271
272
273/*
274 * Drop a reference to the global map of cpus to pools.
275 * When the last reference is dropped, the map data is
276 * freed; this allows the sysadmin to change the pool
277 * mode using the pool_mode module option without
278 * rebooting or re-loading sunrpc.ko.
279 */
280static void
281svc_pool_map_put(void)
282{
283	struct svc_pool_map *m = &svc_pool_map;
284
285	mutex_lock(&svc_pool_map_mutex);
286
287	if (!--m->count) {
288		m->mode = SVC_POOL_DEFAULT;
289		kfree(m->to_pool);
290		kfree(m->pool_to);
291		m->npools = 0;
292	}
293
294	mutex_unlock(&svc_pool_map_mutex);
295}
296
297
298/*
299 * Set the given thread's cpus_allowed mask so that it
300 * will only run on cpus in the given pool.
301 */
302static inline void
303svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
304{
305	struct svc_pool_map *m = &svc_pool_map;
306	unsigned int node = m->pool_to[pidx];
307
308	/*
309	 * The caller checks for sv_nrpools > 1, which
310	 * implies that we've been initialized.
311	 */
312	BUG_ON(m->count == 0);
313
314	switch (m->mode) {
315	case SVC_POOL_PERCPU:
316	{
317		set_cpus_allowed_ptr(task, cpumask_of(node));
318		break;
319	}
320	case SVC_POOL_PERNODE:
321	{
322		set_cpus_allowed_ptr(task, cpumask_of_node(node));
323		break;
324	}
325	}
326}
327
328/*
329 * Use the mapping mode to choose a pool for a given CPU.
330 * Used when enqueueing an incoming RPC.  Always returns
331 * a non-NULL pool pointer.
332 */
333struct svc_pool *
334svc_pool_for_cpu(struct svc_serv *serv, int cpu)
335{
336	struct svc_pool_map *m = &svc_pool_map;
337	unsigned int pidx = 0;
338
339	/*
340	 * An uninitialised map happens in a pure client when
341	 * lockd is brought up, so silently treat it the
342	 * same as SVC_POOL_GLOBAL.
343	 */
344	if (svc_serv_is_pooled(serv)) {
345		switch (m->mode) {
346		case SVC_POOL_PERCPU:
347			pidx = m->to_pool[cpu];
348			break;
349		case SVC_POOL_PERNODE:
350			pidx = m->to_pool[cpu_to_node(cpu)];
351			break;
352		}
353	}
354	return &serv->sv_pools[pidx % serv->sv_nrpools];
355}
356
357
358/*
359 * Create an RPC service
360 */
361static struct svc_serv *
362__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
363	     void (*shutdown)(struct svc_serv *serv))
364{
365	struct svc_serv	*serv;
366	unsigned int vers;
367	unsigned int xdrsize;
368	unsigned int i;
369
370	if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
371		return NULL;
372	serv->sv_name      = prog->pg_name;
373	serv->sv_program   = prog;
374	serv->sv_nrthreads = 1;
375	serv->sv_stats     = prog->pg_stats;
376	if (bufsize > RPCSVC_MAXPAYLOAD)
377		bufsize = RPCSVC_MAXPAYLOAD;
378	serv->sv_max_payload = bufsize? bufsize : 4096;
379	serv->sv_max_mesg  = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE);
380	serv->sv_shutdown  = shutdown;
381	xdrsize = 0;
382	while (prog) {
383		prog->pg_lovers = prog->pg_nvers-1;
384		for (vers=0; vers<prog->pg_nvers ; vers++)
385			if (prog->pg_vers[vers]) {
386				prog->pg_hivers = vers;
387				if (prog->pg_lovers > vers)
388					prog->pg_lovers = vers;
389				if (prog->pg_vers[vers]->vs_xdrsize > xdrsize)
390					xdrsize = prog->pg_vers[vers]->vs_xdrsize;
391			}
392		prog = prog->pg_next;
393	}
394	serv->sv_xdrsize   = xdrsize;
395	INIT_LIST_HEAD(&serv->sv_tempsocks);
396	INIT_LIST_HEAD(&serv->sv_permsocks);
397	init_timer(&serv->sv_temptimer);
398	spin_lock_init(&serv->sv_lock);
399
400	serv->sv_nrpools = npools;
401	serv->sv_pools =
402		kcalloc(serv->sv_nrpools, sizeof(struct svc_pool),
403			GFP_KERNEL);
404	if (!serv->sv_pools) {
405		kfree(serv);
406		return NULL;
407	}
408
409	for (i = 0; i < serv->sv_nrpools; i++) {
410		struct svc_pool *pool = &serv->sv_pools[i];
411
412		dprintk("svc: initialising pool %u for %s\n",
413				i, serv->sv_name);
414
415		pool->sp_id = i;
416		INIT_LIST_HEAD(&pool->sp_threads);
417		INIT_LIST_HEAD(&pool->sp_sockets);
418		INIT_LIST_HEAD(&pool->sp_all_threads);
419		spin_lock_init(&pool->sp_lock);
420	}
421
422	/* Remove any stale portmap registrations */
423	svc_unregister(serv);
424
425	return serv;
426}
427
428struct svc_serv *
429svc_create(struct svc_program *prog, unsigned int bufsize,
430	   void (*shutdown)(struct svc_serv *serv))
431{
432	return __svc_create(prog, bufsize, /*npools*/1, shutdown);
433}
434EXPORT_SYMBOL_GPL(svc_create);
435
436struct svc_serv *
437svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
438		  void (*shutdown)(struct svc_serv *serv),
439		  svc_thread_fn func, struct module *mod)
440{
441	struct svc_serv *serv;
442	unsigned int npools = svc_pool_map_get();
443
444	serv = __svc_create(prog, bufsize, npools, shutdown);
445
446	if (serv != NULL) {
447		serv->sv_function = func;
448		serv->sv_module = mod;
449	}
450
451	return serv;
452}
453EXPORT_SYMBOL_GPL(svc_create_pooled);
454
455/*
456 * Destroy an RPC service. Should be called with appropriate locking to
457 * protect the sv_nrthreads, sv_permsocks and sv_tempsocks.
458 */
459void
460svc_destroy(struct svc_serv *serv)
461{
462	dprintk("svc: svc_destroy(%s, %d)\n",
463				serv->sv_program->pg_name,
464				serv->sv_nrthreads);
465
466	if (serv->sv_nrthreads) {
467		if (--(serv->sv_nrthreads) != 0) {
468			svc_sock_update_bufs(serv);
469			return;
470		}
471	} else
472		printk("svc_destroy: no threads for serv=%p!\n", serv);
473
474	del_timer_sync(&serv->sv_temptimer);
475
476	svc_close_all(&serv->sv_tempsocks);
477
478	if (serv->sv_shutdown)
479		serv->sv_shutdown(serv);
480
481	svc_close_all(&serv->sv_permsocks);
482
483	BUG_ON(!list_empty(&serv->sv_permsocks));
484	BUG_ON(!list_empty(&serv->sv_tempsocks));
485
486	cache_clean_deferred(serv);
487
488	if (svc_serv_is_pooled(serv))
489		svc_pool_map_put();
490
491#if defined(CONFIG_NFS_V4_1)
492	svc_sock_destroy(serv->bc_xprt);
493#endif /* CONFIG_NFS_V4_1 */
494
495	svc_unregister(serv);
496	kfree(serv->sv_pools);
497	kfree(serv);
498}
499EXPORT_SYMBOL_GPL(svc_destroy);
500
501/*
502 * Allocate an RPC server's buffer space.
503 * We allocate pages and place them in rq_argpages.
504 */
505static int
506svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
507{
508	unsigned int pages, arghi;
509
510	/* bc_xprt uses fore channel allocated buffers */
511	if (svc_is_backchannel(rqstp))
512		return 1;
513
514	pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply.
515				       * We assume one is at most one page
516				       */
517	arghi = 0;
518	BUG_ON(pages > RPCSVC_MAXPAGES);
519	while (pages) {
520		struct page *p = alloc_page(GFP_KERNEL);
521		if (!p)
522			break;
523		rqstp->rq_pages[arghi++] = p;
524		pages--;
525	}
526	return pages == 0;
527}
528
529/*
530 * Release an RPC server buffer
531 */
532static void
533svc_release_buffer(struct svc_rqst *rqstp)
534{
535	unsigned int i;
536
537	for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++)
538		if (rqstp->rq_pages[i])
539			put_page(rqstp->rq_pages[i]);
540}
541
542struct svc_rqst *
543svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool)
544{
545	struct svc_rqst	*rqstp;
546
547	rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
548	if (!rqstp)
549		goto out_enomem;
550
551	init_waitqueue_head(&rqstp->rq_wait);
552
553	serv->sv_nrthreads++;
554	spin_lock_bh(&pool->sp_lock);
555	pool->sp_nrthreads++;
556	list_add(&rqstp->rq_all, &pool->sp_all_threads);
557	spin_unlock_bh(&pool->sp_lock);
558	rqstp->rq_server = serv;
559	rqstp->rq_pool = pool;
560
561	rqstp->rq_argp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
562	if (!rqstp->rq_argp)
563		goto out_thread;
564
565	rqstp->rq_resp = kmalloc(serv->sv_xdrsize, GFP_KERNEL);
566	if (!rqstp->rq_resp)
567		goto out_thread;
568
569	if (!svc_init_buffer(rqstp, serv->sv_max_mesg))
570		goto out_thread;
571
572	return rqstp;
573out_thread:
574	svc_exit_thread(rqstp);
575out_enomem:
576	return ERR_PTR(-ENOMEM);
577}
578EXPORT_SYMBOL_GPL(svc_prepare_thread);
579
580/*
581 * Choose a pool in which to create a new thread, for svc_set_num_threads
582 */
583static inline struct svc_pool *
584choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
585{
586	if (pool != NULL)
587		return pool;
588
589	return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
590}
591
592/*
593 * Choose a thread to kill, for svc_set_num_threads
594 */
595static inline struct task_struct *
596choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
597{
598	unsigned int i;
599	struct task_struct *task = NULL;
600
601	if (pool != NULL) {
602		spin_lock_bh(&pool->sp_lock);
603	} else {
604		/* choose a pool in round-robin fashion */
605		for (i = 0; i < serv->sv_nrpools; i++) {
606			pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
607			spin_lock_bh(&pool->sp_lock);
608			if (!list_empty(&pool->sp_all_threads))
609				goto found_pool;
610			spin_unlock_bh(&pool->sp_lock);
611		}
612		return NULL;
613	}
614
615found_pool:
616	if (!list_empty(&pool->sp_all_threads)) {
617		struct svc_rqst *rqstp;
618
619		/*
620		 * Remove from the pool->sp_all_threads list
621		 * so we don't try to kill it again.
622		 */
623		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
624		list_del_init(&rqstp->rq_all);
625		task = rqstp->rq_task;
626	}
627	spin_unlock_bh(&pool->sp_lock);
628
629	return task;
630}
631
632/*
633 * Create or destroy enough new threads to make the number
634 * of threads the given number.  If `pool' is non-NULL, applies
635 * only to threads in that pool, otherwise round-robins between
636 * all pools.  Must be called with a svc_get() reference and
637 * the BKL or another lock to protect access to svc_serv fields.
638 *
639 * Destroying threads relies on the service threads filling in
640 * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
641 * has been created using svc_create_pooled().
642 *
643 * Based on code that used to be in nfsd_svc() but tweaked
644 * to be pool-aware.
645 */
646int
647svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
648{
649	struct svc_rqst	*rqstp;
650	struct task_struct *task;
651	struct svc_pool *chosen_pool;
652	int error = 0;
653	unsigned int state = serv->sv_nrthreads-1;
654
655	if (pool == NULL) {
656		/* The -1 assumes caller has done a svc_get() */
657		nrservs -= (serv->sv_nrthreads-1);
658	} else {
659		spin_lock_bh(&pool->sp_lock);
660		nrservs -= pool->sp_nrthreads;
661		spin_unlock_bh(&pool->sp_lock);
662	}
663
664	/* create new threads */
665	while (nrservs > 0) {
666		nrservs--;
667		chosen_pool = choose_pool(serv, pool, &state);
668
669		rqstp = svc_prepare_thread(serv, chosen_pool);
670		if (IS_ERR(rqstp)) {
671			error = PTR_ERR(rqstp);
672			break;
673		}
674
675		__module_get(serv->sv_module);
676		task = kthread_create(serv->sv_function, rqstp, serv->sv_name);
677		if (IS_ERR(task)) {
678			error = PTR_ERR(task);
679			module_put(serv->sv_module);
680			svc_exit_thread(rqstp);
681			break;
682		}
683
684		rqstp->rq_task = task;
685		if (serv->sv_nrpools > 1)
686			svc_pool_map_set_cpumask(task, chosen_pool->sp_id);
687
688		svc_sock_update_bufs(serv);
689		wake_up_process(task);
690	}
691	/* destroy old threads */
692	while (nrservs < 0 &&
693	       (task = choose_victim(serv, pool, &state)) != NULL) {
694		send_sig(SIGINT, task, 1);
695		nrservs++;
696	}
697
698	return error;
699}
700EXPORT_SYMBOL_GPL(svc_set_num_threads);
701
702/*
703 * Called from a server thread as it's exiting. Caller must hold the BKL or
704 * the "service mutex", whichever is appropriate for the service.
705 */
706void
707svc_exit_thread(struct svc_rqst *rqstp)
708{
709	struct svc_serv	*serv = rqstp->rq_server;
710	struct svc_pool	*pool = rqstp->rq_pool;
711
712	svc_release_buffer(rqstp);
713	kfree(rqstp->rq_resp);
714	kfree(rqstp->rq_argp);
715	kfree(rqstp->rq_auth_data);
716
717	spin_lock_bh(&pool->sp_lock);
718	pool->sp_nrthreads--;
719	list_del(&rqstp->rq_all);
720	spin_unlock_bh(&pool->sp_lock);
721
722	kfree(rqstp);
723
724	/* Release the server */
725	if (serv)
726		svc_destroy(serv);
727}
728EXPORT_SYMBOL_GPL(svc_exit_thread);
729
730/*
731 * Register an "inet" protocol family netid with the local
732 * rpcbind daemon via an rpcbind v4 SET request.
733 *
734 * No netconfig infrastructure is available in the kernel, so
735 * we map IP_ protocol numbers to netids by hand.
736 *
737 * Returns zero on success; a negative errno value is returned
738 * if any error occurs.
739 */
740static int __svc_rpcb_register4(const u32 program, const u32 version,
741				const unsigned short protocol,
742				const unsigned short port)
743{
744	const struct sockaddr_in sin = {
745		.sin_family		= AF_INET,
746		.sin_addr.s_addr	= htonl(INADDR_ANY),
747		.sin_port		= htons(port),
748	};
749	const char *netid;
750	int error;
751
752	switch (protocol) {
753	case IPPROTO_UDP:
754		netid = RPCBIND_NETID_UDP;
755		break;
756	case IPPROTO_TCP:
757		netid = RPCBIND_NETID_TCP;
758		break;
759	default:
760		return -ENOPROTOOPT;
761	}
762
763	error = rpcb_v4_register(program, version,
764					(const struct sockaddr *)&sin, netid);
765
766	/*
767	 * User space didn't support rpcbind v4, so retry this
768	 * registration request with the legacy rpcbind v2 protocol.
769	 */
770	if (error == -EPROTONOSUPPORT)
771		error = rpcb_register(program, version, protocol, port);
772
773	return error;
774}
775
776#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
777/*
778 * Register an "inet6" protocol family netid with the local
779 * rpcbind daemon via an rpcbind v4 SET request.
780 *
781 * No netconfig infrastructure is available in the kernel, so
782 * we map IP_ protocol numbers to netids by hand.
783 *
784 * Returns zero on success; a negative errno value is returned
785 * if any error occurs.
786 */
787static int __svc_rpcb_register6(const u32 program, const u32 version,
788				const unsigned short protocol,
789				const unsigned short port)
790{
791	const struct sockaddr_in6 sin6 = {
792		.sin6_family		= AF_INET6,
793		.sin6_addr		= IN6ADDR_ANY_INIT,
794		.sin6_port		= htons(port),
795	};
796	const char *netid;
797	int error;
798
799	switch (protocol) {
800	case IPPROTO_UDP:
801		netid = RPCBIND_NETID_UDP6;
802		break;
803	case IPPROTO_TCP:
804		netid = RPCBIND_NETID_TCP6;
805		break;
806	default:
807		return -ENOPROTOOPT;
808	}
809
810	error = rpcb_v4_register(program, version,
811					(const struct sockaddr *)&sin6, netid);
812
813	/*
814	 * User space didn't support rpcbind version 4, so we won't
815	 * use a PF_INET6 listener.
816	 */
817	if (error == -EPROTONOSUPPORT)
818		error = -EAFNOSUPPORT;
819
820	return error;
821}
822#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
823
824/*
825 * Register a kernel RPC service via rpcbind version 4.
826 *
827 * Returns zero on success; a negative errno value is returned
828 * if any error occurs.
829 */
830static int __svc_register(const char *progname,
831			  const u32 program, const u32 version,
832			  const int family,
833			  const unsigned short protocol,
834			  const unsigned short port)
835{
836	int error = -EAFNOSUPPORT;
837
838	switch (family) {
839	case PF_INET:
840		error = __svc_rpcb_register4(program, version,
841						protocol, port);
842		break;
843#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
844	case PF_INET6:
845		error = __svc_rpcb_register6(program, version,
846						protocol, port);
847#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
848	}
849
850	if (error < 0)
851		printk(KERN_WARNING "svc: failed to register %sv%u RPC "
852			"service (errno %d).\n", progname, version, -error);
853	return error;
854}
855
856/**
857 * svc_register - register an RPC service with the local portmapper
858 * @serv: svc_serv struct for the service to register
859 * @family: protocol family of service's listener socket
860 * @proto: transport protocol number to advertise
861 * @port: port to advertise
862 *
863 * Service is registered for any address in the passed-in protocol family
864 */
865int svc_register(const struct svc_serv *serv, const int family,
866		 const unsigned short proto, const unsigned short port)
867{
868	struct svc_program	*progp;
869	unsigned int		i;
870	int			error = 0;
871
872	BUG_ON(proto == 0 && port == 0);
873
874	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
875		for (i = 0; i < progp->pg_nvers; i++) {
876			if (progp->pg_vers[i] == NULL)
877				continue;
878
879			dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n",
880					progp->pg_name,
881					i,
882					proto == IPPROTO_UDP?  "udp" : "tcp",
883					port,
884					family,
885					progp->pg_vers[i]->vs_hidden?
886						" (but not telling portmap)" : "");
887
888			if (progp->pg_vers[i]->vs_hidden)
889				continue;
890
891			error = __svc_register(progp->pg_name, progp->pg_prog,
892						i, family, proto, port);
893			if (error < 0)
894				break;
895		}
896	}
897
898	return error;
899}
900
901/*
902 * If user space is running rpcbind, it should take the v4 UNSET
903 * and clear everything for this [program, version].  If user space
904 * is running portmap, it will reject the v4 UNSET, but won't have
905 * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
906 * in this case to clear all existing entries for [program, version].
907 */
908static void __svc_unregister(const u32 program, const u32 version,
909			     const char *progname)
910{
911	int error;
912
913	error = rpcb_v4_register(program, version, NULL, "");
914
915	/*
916	 * User space didn't support rpcbind v4, so retry this
917	 * request with the legacy rpcbind v2 protocol.
918	 */
919	if (error == -EPROTONOSUPPORT)
920		error = rpcb_register(program, version, 0, 0);
921
922	dprintk("svc: %s(%sv%u), error %d\n",
923			__func__, progname, version, error);
924}
925
926/*
927 * All netids, bind addresses and ports registered for [program, version]
928 * are removed from the local rpcbind database (if the service is not
929 * hidden) to make way for a new instance of the service.
930 *
931 * The result of unregistration is reported via dprintk for those who want
932 * verification of the result, but is otherwise not important.
933 */
934static void svc_unregister(const struct svc_serv *serv)
935{
936	struct svc_program *progp;
937	unsigned long flags;
938	unsigned int i;
939
940	clear_thread_flag(TIF_SIGPENDING);
941
942	for (progp = serv->sv_program; progp; progp = progp->pg_next) {
943		for (i = 0; i < progp->pg_nvers; i++) {
944			if (progp->pg_vers[i] == NULL)
945				continue;
946			if (progp->pg_vers[i]->vs_hidden)
947				continue;
948
949			__svc_unregister(progp->pg_prog, i, progp->pg_name);
950		}
951	}
952
953	spin_lock_irqsave(&current->sighand->siglock, flags);
954	recalc_sigpending();
955	spin_unlock_irqrestore(&current->sighand->siglock, flags);
956}
957
958/*
959 * Printk the given error with the address of the client that caused it.
960 */
961static int
962__attribute__ ((format (printf, 2, 3)))
963svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
964{
965	va_list args;
966	int 	r;
967	char 	buf[RPC_MAX_ADDRBUFLEN];
968
969	if (!net_ratelimit())
970		return 0;
971
972	printk(KERN_WARNING "svc: %s: ",
973		svc_print_addr(rqstp, buf, sizeof(buf)));
974
975	va_start(args, fmt);
976	r = vprintk(fmt, args);
977	va_end(args);
978
979	return r;
980}
981
982/*
983 * Common routine for processing the RPC request.
984 */
985static int
986svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
987{
988	struct svc_program	*progp;
989	struct svc_version	*versp = NULL;	/* compiler food */
990	struct svc_procedure	*procp = NULL;
991	struct svc_serv		*serv = rqstp->rq_server;
992	kxdrproc_t		xdr;
993	__be32			*statp;
994	u32			prog, vers, proc;
995	__be32			auth_stat, rpc_stat;
996	int			auth_res;
997	__be32			*reply_statp;
998
999	rpc_stat = rpc_success;
1000
1001	if (argv->iov_len < 6*4)
1002		goto err_short_len;
1003
1004	/* Will be turned off only in gss privacy case: */
1005	rqstp->rq_splice_ok = 1;
1006	/* Will be turned off only when NFSv4 Sessions are used */
1007	rqstp->rq_usedeferral = 1;
1008
1009	/* Setup reply header */
1010	rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
1011
1012	svc_putu32(resv, rqstp->rq_xid);
1013
1014	vers = svc_getnl(argv);
1015
1016	/* First words of reply: */
1017	svc_putnl(resv, 1);		/* REPLY */
1018
1019	if (vers != 2)		/* RPC version number */
1020		goto err_bad_rpc;
1021
1022	/* Save position in case we later decide to reject: */
1023	reply_statp = resv->iov_base + resv->iov_len;
1024
1025	svc_putnl(resv, 0);		/* ACCEPT */
1026
1027	rqstp->rq_prog = prog = svc_getnl(argv);	/* program number */
1028	rqstp->rq_vers = vers = svc_getnl(argv);	/* version number */
1029	rqstp->rq_proc = proc = svc_getnl(argv);	/* procedure number */
1030
1031	progp = serv->sv_program;
1032
1033	for (progp = serv->sv_program; progp; progp = progp->pg_next)
1034		if (prog == progp->pg_prog)
1035			break;
1036
1037	/*
1038	 * Decode auth data, and add verifier to reply buffer.
1039	 * We do this before anything else in order to get a decent
1040	 * auth verifier.
1041	 */
1042	auth_res = svc_authenticate(rqstp, &auth_stat);
1043	/* Also give the program a chance to reject this call: */
1044	if (auth_res == SVC_OK && progp) {
1045		auth_stat = rpc_autherr_badcred;
1046		auth_res = progp->pg_authenticate(rqstp);
1047	}
1048	switch (auth_res) {
1049	case SVC_OK:
1050		break;
1051	case SVC_GARBAGE:
1052		goto err_garbage;
1053	case SVC_SYSERR:
1054		rpc_stat = rpc_system_err;
1055		goto err_bad;
1056	case SVC_DENIED:
1057		goto err_bad_auth;
1058	case SVC_DROP:
1059		goto dropit;
1060	case SVC_COMPLETE:
1061		goto sendit;
1062	}
1063
1064	if (progp == NULL)
1065		goto err_bad_prog;
1066
1067	if (vers >= progp->pg_nvers ||
1068	  !(versp = progp->pg_vers[vers]))
1069		goto err_bad_vers;
1070
1071	procp = versp->vs_proc + proc;
1072	if (proc >= versp->vs_nproc || !procp->pc_func)
1073		goto err_bad_proc;
1074	rqstp->rq_procinfo = procp;
1075
1076	/* Syntactic check complete */
1077	serv->sv_stats->rpccnt++;
1078
1079	/* Build the reply header. */
1080	statp = resv->iov_base +resv->iov_len;
1081	svc_putnl(resv, RPC_SUCCESS);
1082
1083	/* Bump per-procedure stats counter */
1084	procp->pc_count++;
1085
1086	/* Initialize storage for argp and resp */
1087	memset(rqstp->rq_argp, 0, procp->pc_argsize);
1088	memset(rqstp->rq_resp, 0, procp->pc_ressize);
1089
1090	/* un-reserve some of the out-queue now that we have a
1091	 * better idea of reply size
1092	 */
1093	if (procp->pc_xdrressize)
1094		svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
1095
1096	/* Call the function that processes the request. */
1097	if (!versp->vs_dispatch) {
1098		/* Decode arguments */
1099		xdr = procp->pc_decode;
1100		if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
1101			goto err_garbage;
1102
1103		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
1104
1105		/* Encode reply */
1106		if (*statp == rpc_drop_reply) {
1107			if (procp->pc_release)
1108				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1109			goto dropit;
1110		}
1111		if (*statp == rpc_success &&
1112		    (xdr = procp->pc_encode) &&
1113		    !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
1114			dprintk("svc: failed to encode reply\n");
1115			/* serv->sv_stats->rpcsystemerr++; */
1116			*statp = rpc_system_err;
1117		}
1118	} else {
1119		dprintk("svc: calling dispatcher\n");
1120		if (!versp->vs_dispatch(rqstp, statp)) {
1121			/* Release reply info */
1122			if (procp->pc_release)
1123				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1124			goto dropit;
1125		}
1126	}
1127
1128	/* Check RPC status result */
1129	if (*statp != rpc_success)
1130		resv->iov_len = ((void*)statp)  - resv->iov_base + 4;
1131
1132	/* Release reply info */
1133	if (procp->pc_release)
1134		procp->pc_release(rqstp, NULL, rqstp->rq_resp);
1135
1136	if (procp->pc_encode == NULL)
1137		goto dropit;
1138
1139 sendit:
1140	if (svc_authorise(rqstp))
1141		goto dropit;
1142	return 1;		/* Caller can now send it */
1143
1144 dropit:
1145	svc_authorise(rqstp);	/* doesn't hurt to call this twice */
1146	dprintk("svc: svc_process dropit\n");
1147	svc_drop(rqstp);
1148	return 0;
1149
1150err_short_len:
1151	svc_printk(rqstp, "short len %Zd, dropping request\n",
1152			argv->iov_len);
1153
1154	goto dropit;			/* drop request */
1155
1156err_bad_rpc:
1157	serv->sv_stats->rpcbadfmt++;
1158	svc_putnl(resv, 1);	/* REJECT */
1159	svc_putnl(resv, 0);	/* RPC_MISMATCH */
1160	svc_putnl(resv, 2);	/* Only RPCv2 supported */
1161	svc_putnl(resv, 2);
1162	goto sendit;
1163
1164err_bad_auth:
1165	dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
1166	serv->sv_stats->rpcbadauth++;
1167	/* Restore write pointer to location of accept status: */
1168	xdr_ressize_check(rqstp, reply_statp);
1169	svc_putnl(resv, 1);	/* REJECT */
1170	svc_putnl(resv, 1);	/* AUTH_ERROR */
1171	svc_putnl(resv, ntohl(auth_stat));	/* status */
1172	goto sendit;
1173
1174err_bad_prog:
1175	dprintk("svc: unknown program %d\n", prog);
1176	serv->sv_stats->rpcbadfmt++;
1177	svc_putnl(resv, RPC_PROG_UNAVAIL);
1178	goto sendit;
1179
1180err_bad_vers:
1181	svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
1182		       vers, prog, progp->pg_name);
1183
1184	serv->sv_stats->rpcbadfmt++;
1185	svc_putnl(resv, RPC_PROG_MISMATCH);
1186	svc_putnl(resv, progp->pg_lovers);
1187	svc_putnl(resv, progp->pg_hivers);
1188	goto sendit;
1189
1190err_bad_proc:
1191	svc_printk(rqstp, "unknown procedure (%d)\n", proc);
1192
1193	serv->sv_stats->rpcbadfmt++;
1194	svc_putnl(resv, RPC_PROC_UNAVAIL);
1195	goto sendit;
1196
1197err_garbage:
1198	svc_printk(rqstp, "failed to decode args\n");
1199
1200	rpc_stat = rpc_garbage_args;
1201err_bad:
1202	serv->sv_stats->rpcbadfmt++;
1203	svc_putnl(resv, ntohl(rpc_stat));
1204	goto sendit;
1205}
1206EXPORT_SYMBOL_GPL(svc_process);
1207
1208/*
1209 * Process the RPC request.
1210 */
1211int
1212svc_process(struct svc_rqst *rqstp)
1213{
1214	struct kvec		*argv = &rqstp->rq_arg.head[0];
1215	struct kvec		*resv = &rqstp->rq_res.head[0];
1216	struct svc_serv		*serv = rqstp->rq_server;
1217	u32			dir;
1218	int			error;
1219
1220	/*
1221	 * Setup response xdr_buf.
1222	 * Initially it has just one page
1223	 */
1224	rqstp->rq_resused = 1;
1225	resv->iov_base = page_address(rqstp->rq_respages[0]);
1226	resv->iov_len = 0;
1227	rqstp->rq_res.pages = rqstp->rq_respages + 1;
1228	rqstp->rq_res.len = 0;
1229	rqstp->rq_res.page_base = 0;
1230	rqstp->rq_res.page_len = 0;
1231	rqstp->rq_res.buflen = PAGE_SIZE;
1232	rqstp->rq_res.tail[0].iov_base = NULL;
1233	rqstp->rq_res.tail[0].iov_len = 0;
1234
1235	rqstp->rq_xid = svc_getu32(argv);
1236
1237	dir  = svc_getnl(argv);
1238	if (dir != 0) {
1239		/* direction != CALL */
1240		svc_printk(rqstp, "bad direction %d, dropping request\n", dir);
1241		serv->sv_stats->rpcbadfmt++;
1242		svc_drop(rqstp);
1243		return 0;
1244	}
1245
1246	error = svc_process_common(rqstp, argv, resv);
1247	if (error <= 0)
1248		return error;
1249
1250	return svc_send(rqstp);
1251}
1252
1253#if defined(CONFIG_NFS_V4_1)
1254/*
1255 * Process a backchannel RPC request that arrived over an existing
1256 * outbound connection
1257 */
1258int
1259bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1260	       struct svc_rqst *rqstp)
1261{
1262	struct kvec	*argv = &rqstp->rq_arg.head[0];
1263	struct kvec	*resv = &rqstp->rq_res.head[0];
1264	int 		error;
1265
1266	/* Build the svc_rqst used by the common processing routine */
1267	rqstp->rq_xprt = serv->bc_xprt;
1268	rqstp->rq_xid = req->rq_xid;
1269	rqstp->rq_prot = req->rq_xprt->prot;
1270	rqstp->rq_server = serv;
1271
1272	rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
1273	memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
1274	memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg));
1275	memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res));
1276
1277	/* reset result send buffer "put" position */
1278	resv->iov_len = 0;
1279
1280	if (rqstp->rq_prot != IPPROTO_TCP) {
1281		printk(KERN_ERR "No support for Non-TCP transports!\n");
1282		BUG();
1283	}
1284
1285	/*
1286	 * Skip the next two words because they've already been
1287	 * processed in the trasport
1288	 */
1289	svc_getu32(argv);	/* XID */
1290	svc_getnl(argv);	/* CALLDIR */
1291
1292	error = svc_process_common(rqstp, argv, resv);
1293	if (error <= 0)
1294		return error;
1295
1296	memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
1297	return bc_send(req);
1298}
1299EXPORT_SYMBOL(bc_svc_process);
1300#endif /* CONFIG_NFS_V4_1 */
1301
1302/*
1303 * Return (transport-specific) limit on the rpc payload.
1304 */
1305u32 svc_max_payload(const struct svc_rqst *rqstp)
1306{
1307	u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload;
1308
1309	if (rqstp->rq_server->sv_max_payload < max)
1310		max = rqstp->rq_server->sv_max_payload;
1311	return max;
1312}
1313EXPORT_SYMBOL_GPL(svc_max_payload);
1314