1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37#include <sys/cdefs.h>
38#include "ipoib.h"
39#include <sys/eventhandler.h>
40
41#include <linux/module.h>
42
43#include <linux/slab.h>
44#include <linux/kernel.h>
45#include <linux/vmalloc.h>
46
47#include <linux/if_vlan.h>
48
49#include <net/infiniband.h>
50
51#include <rdma/ib_addr.h>
52#include <rdma/ib_cache.h>
53
54MODULE_AUTHOR("Roland Dreier");
55MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
56MODULE_LICENSE("Dual BSD/GPL");
57
58int ipoib_sendq_size = IPOIB_TX_RING_SIZE;
59int ipoib_recvq_size = IPOIB_RX_RING_SIZE;
60
61module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
62MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
63module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
64MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
65
66#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
67int ipoib_debug_level = 1;
68
69module_param_named(debug_level, ipoib_debug_level, int, 0644);
70MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
71#endif
72
73struct ipoib_path_iter {
74	struct ipoib_dev_priv *priv;
75	struct ipoib_path  path;
76};
77
78static const u8 ipv4_bcast_addr[] = {
79	0x00, 0xff, 0xff, 0xff,
80	0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
81	0x00, 0x00, 0x00, 0x00,	0xff, 0xff, 0xff, 0xff
82};
83
84struct workqueue_struct *ipoib_workqueue;
85
86struct ib_sa_client ipoib_sa_client;
87
88static void ipoib_add_one(struct ib_device *device);
89static void ipoib_remove_one(struct ib_device *device, void *client_data);
90static if_t ipoib_get_net_dev_by_params(
91		struct ib_device *dev, u8 port, u16 pkey,
92		const union ib_gid *gid, const struct sockaddr *addr,
93		void *client_data);
94static void ipoib_start(if_t dev);
95static int ipoib_ioctl(if_t ifp, u_long command, caddr_t data);
96
97static struct unrhdr *ipoib_unrhdr;
98
99static void
100ipoib_unrhdr_init(void *arg)
101{
102
103	ipoib_unrhdr = new_unrhdr(0, 65535, NULL);
104}
105SYSINIT(ipoib_unrhdr_init, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_init, NULL);
106
107static void
108ipoib_unrhdr_uninit(void *arg)
109{
110
111	if (ipoib_unrhdr != NULL) {
112		struct unrhdr *hdr;
113
114		hdr = ipoib_unrhdr;
115		ipoib_unrhdr = NULL;
116
117		delete_unrhdr(hdr);
118	}
119}
120SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL);
121
122static struct ib_client ipoib_client = {
123	.name   = "ipoib",
124	.add    = ipoib_add_one,
125	.remove = ipoib_remove_one,
126	.get_net_dev_by_params = ipoib_get_net_dev_by_params,
127};
128
129int
130ipoib_open(struct ipoib_dev_priv *priv)
131{
132	if_t dev = priv->dev;
133
134	ipoib_dbg(priv, "bringing up interface\n");
135
136	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
137
138	if (ipoib_pkey_dev_delay_open(priv))
139		return 0;
140
141	if (ipoib_ib_dev_open(priv))
142		goto err_disable;
143
144	if (ipoib_ib_dev_up(priv))
145		goto err_stop;
146
147	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
148		struct ipoib_dev_priv *cpriv;
149
150		/* Bring up any child interfaces too */
151		mutex_lock(&priv->vlan_mutex);
152		list_for_each_entry(cpriv, &priv->child_intfs, list)
153			if ((if_getdrvflags(cpriv->dev) & IFF_DRV_RUNNING) == 0)
154				ipoib_open(cpriv);
155		mutex_unlock(&priv->vlan_mutex);
156	}
157	if_setdrvflagbits(dev, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
158
159	return 0;
160
161err_stop:
162	ipoib_ib_dev_stop(priv, 1);
163
164err_disable:
165	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
166
167	return -EINVAL;
168}
169
170static void
171ipoib_init(void *arg)
172{
173	if_t dev;
174	struct ipoib_dev_priv *priv;
175
176	priv = arg;
177	dev = priv->dev;
178	if ((if_getdrvflags(dev) & IFF_DRV_RUNNING) == 0)
179		ipoib_open(priv);
180	queue_work(ipoib_workqueue, &priv->flush_light);
181}
182
183
184static int
185ipoib_stop(struct ipoib_dev_priv *priv)
186{
187	if_t dev = priv->dev;
188
189	ipoib_dbg(priv, "stopping interface\n");
190
191	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
192
193	if_setdrvflagbits(dev, 0, IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
194
195	ipoib_ib_dev_down(priv, 0);
196	ipoib_ib_dev_stop(priv, 0);
197
198	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
199		struct ipoib_dev_priv *cpriv;
200
201		/* Bring down any child interfaces too */
202		mutex_lock(&priv->vlan_mutex);
203		list_for_each_entry(cpriv, &priv->child_intfs, list)
204			if ((if_getdrvflags(cpriv->dev) & IFF_DRV_RUNNING) != 0)
205				ipoib_stop(cpriv);
206		mutex_unlock(&priv->vlan_mutex);
207	}
208
209	return 0;
210}
211
212static int
213ipoib_propagate_ifnet_mtu(struct ipoib_dev_priv *priv, int new_mtu,
214    bool propagate)
215{
216	if_t ifp;
217	struct ifreq ifr;
218	int error;
219
220	ifp = priv->dev;
221	if (if_getmtu(ifp) == new_mtu)
222		return (0);
223	if (propagate) {
224		strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ);
225		ifr.ifr_mtu = new_mtu;
226		CURVNET_SET(if_getvnet(ifp));
227		error = ifhwioctl(SIOCSIFMTU, ifp, (caddr_t)&ifr, curthread);
228		CURVNET_RESTORE();
229	} else {
230		if_setmtu(ifp, new_mtu);
231		error = 0;
232	}
233	return (error);
234}
235
236int
237ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate)
238{
239	int error, prev_admin_mtu;
240
241	/* dev->if_mtu > 2K ==> connected mode */
242	if (ipoib_cm_admin_enabled(priv)) {
243		if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)))
244			return -EINVAL;
245
246		if (new_mtu > priv->mcast_mtu)
247			ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
248				   priv->mcast_mtu);
249
250		return (ipoib_propagate_ifnet_mtu(priv, new_mtu, propagate));
251	}
252
253	if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
254		return -EINVAL;
255
256	prev_admin_mtu = priv->admin_mtu;
257	priv->admin_mtu = new_mtu;
258	error = ipoib_propagate_ifnet_mtu(priv, min(priv->mcast_mtu,
259	    priv->admin_mtu), propagate);
260	if (error == 0) {
261		/* check for MTU change to avoid infinite loop */
262		if (prev_admin_mtu != new_mtu)
263			queue_work(ipoib_workqueue, &priv->flush_light);
264	} else
265		priv->admin_mtu = prev_admin_mtu;
266	return (error);
267}
268
269static int
270ipoib_ioctl(if_t ifp, u_long command, caddr_t data)
271{
272	struct ipoib_dev_priv *priv = if_getsoftc(ifp);
273	struct ifaddr *ifa = (struct ifaddr *) data;
274	struct ifreq *ifr = (struct ifreq *) data;
275	int error = 0;
276
277	/* check if detaching */
278	if (priv == NULL)
279		return (ENXIO);
280	/* wait for device to become ready, if any */
281	while (priv->gone == 2)
282		pause("W", 1);
283	/* check for device gone */
284	if (priv->gone != 0)
285		return (ENXIO);
286
287	switch (command) {
288	case SIOCSIFFLAGS:
289		if (if_getflags(ifp) & IFF_UP) {
290			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
291				error = -ipoib_open(priv);
292		} else
293			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
294				ipoib_stop(priv);
295		break;
296	case SIOCADDMULTI:
297	case SIOCDELMULTI:
298		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
299			queue_work(ipoib_workqueue, &priv->restart_task);
300		break;
301	case SIOCSIFADDR:
302		if_setflagbits(ifp, IFF_UP, 0);
303
304		switch (ifa->ifa_addr->sa_family) {
305#ifdef INET
306		case AF_INET:
307			if_init(ifp, if_getsoftc(ifp));	/* before arpwhohas */
308			arp_ifinit(ifp, ifa);
309			break;
310#endif
311		default:
312			if_init(ifp, if_getsoftc(ifp));
313			break;
314		}
315		break;
316
317	case SIOCGIFADDR:
318			bcopy(if_getlladdr(ifp), &ifr->ifr_addr.sa_data[0],
319                            INFINIBAND_ALEN);
320		break;
321
322	case SIOCSIFMTU:
323		/*
324		 * Set the interface MTU.
325		 */
326		error = -ipoib_change_mtu(priv, ifr->ifr_mtu, false);
327		break;
328	default:
329		error = EINVAL;
330		break;
331	}
332	return (error);
333}
334
335
336static struct ipoib_path *
337__path_find(struct ipoib_dev_priv *priv, void *gid)
338{
339	struct rb_node *n = priv->path_tree.rb_node;
340	struct ipoib_path *path;
341	int ret;
342
343	while (n) {
344		path = rb_entry(n, struct ipoib_path, rb_node);
345
346		ret = memcmp(gid, path->pathrec.dgid.raw,
347			     sizeof (union ib_gid));
348
349		if (ret < 0)
350			n = n->rb_left;
351		else if (ret > 0)
352			n = n->rb_right;
353		else
354			return path;
355	}
356
357	return NULL;
358}
359
360static int
361__path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path)
362{
363	struct rb_node **n = &priv->path_tree.rb_node;
364	struct rb_node *pn = NULL;
365	struct ipoib_path *tpath;
366	int ret;
367
368	while (*n) {
369		pn = *n;
370		tpath = rb_entry(pn, struct ipoib_path, rb_node);
371
372		ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
373			     sizeof (union ib_gid));
374		if (ret < 0)
375			n = &pn->rb_left;
376		else if (ret > 0)
377			n = &pn->rb_right;
378		else
379			return -EEXIST;
380	}
381
382	rb_link_node(&path->rb_node, pn, n);
383	rb_insert_color(&path->rb_node, &priv->path_tree);
384
385	list_add_tail(&path->list, &priv->path_list);
386
387	return 0;
388}
389
390void
391ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path)
392{
393
394	_IF_DRAIN(&path->queue);
395
396	if (path->ah)
397		ipoib_put_ah(path->ah);
398	if (ipoib_cm_get(path))
399		ipoib_cm_destroy_tx(ipoib_cm_get(path));
400
401	kfree(path);
402}
403
404#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
405
406struct ipoib_path_iter *
407ipoib_path_iter_init(struct ipoib_dev_priv *priv)
408{
409	struct ipoib_path_iter *iter;
410
411	iter = kmalloc(sizeof *iter, GFP_KERNEL);
412	if (!iter)
413		return NULL;
414
415	iter->priv = priv;
416	memset(iter->path.pathrec.dgid.raw, 0, 16);
417
418	if (ipoib_path_iter_next(iter)) {
419		kfree(iter);
420		return NULL;
421	}
422
423	return iter;
424}
425
426int
427ipoib_path_iter_next(struct ipoib_path_iter *iter)
428{
429	struct ipoib_dev_priv *priv = iter->priv;
430	struct rb_node *n;
431	struct ipoib_path *path;
432	int ret = 1;
433
434	spin_lock_irq(&priv->lock);
435
436	n = rb_first(&priv->path_tree);
437
438	while (n) {
439		path = rb_entry(n, struct ipoib_path, rb_node);
440
441		if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
442			   sizeof (union ib_gid)) < 0) {
443			iter->path = *path;
444			ret = 0;
445			break;
446		}
447
448		n = rb_next(n);
449	}
450
451	spin_unlock_irq(&priv->lock);
452
453	return ret;
454}
455
456void
457ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path)
458{
459	*path = iter->path;
460}
461
462#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
463
464void
465ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv)
466{
467	struct ipoib_path *path, *tp;
468
469	spin_lock_irq(&priv->lock);
470
471	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
472		ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n",
473			be16_to_cpu(path->pathrec.dlid),
474			path->pathrec.dgid.raw, ":");
475		path->valid =  0;
476	}
477
478	spin_unlock_irq(&priv->lock);
479}
480
481void
482ipoib_flush_paths(struct ipoib_dev_priv *priv)
483{
484	struct ipoib_path *path, *tp;
485	LIST_HEAD(remove_list);
486	unsigned long flags;
487
488	spin_lock_irqsave(&priv->lock, flags);
489
490	list_splice_init(&priv->path_list, &remove_list);
491
492	list_for_each_entry(path, &remove_list, list)
493		rb_erase(&path->rb_node, &priv->path_tree);
494
495	list_for_each_entry_safe(path, tp, &remove_list, list) {
496		if (path->query)
497			ib_sa_cancel_query(path->query_id, path->query);
498		spin_unlock_irqrestore(&priv->lock, flags);
499		wait_for_completion(&path->done);
500		ipoib_path_free(priv, path);
501		spin_lock_irqsave(&priv->lock, flags);
502	}
503
504	spin_unlock_irqrestore(&priv->lock, flags);
505}
506
507static void
508path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr)
509{
510	struct ipoib_path *path = path_ptr;
511	struct ipoib_dev_priv *priv = path->priv;
512	if_t dev = priv->dev;
513	struct ipoib_ah *ah = NULL;
514	struct ipoib_ah *old_ah = NULL;
515	struct epoch_tracker et;
516	struct ifqueue mbqueue;
517	struct mbuf *mb;
518	unsigned long flags;
519
520	if (!status)
521		ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n",
522			  be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":");
523	else
524		ipoib_dbg(priv, "PathRec status %d for GID %16D\n",
525			  status, path->pathrec.dgid.raw, ":");
526
527	bzero(&mbqueue, sizeof(mbqueue));
528
529	if (!status) {
530		struct ib_ah_attr av;
531
532		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
533			ah = ipoib_create_ah(priv, priv->pd, &av);
534	}
535
536	spin_lock_irqsave(&priv->lock, flags);
537
538	if (ah) {
539		path->pathrec = *pathrec;
540
541		old_ah   = path->ah;
542		path->ah = ah;
543
544		ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
545			  ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
546
547		for (;;) {
548			_IF_DEQUEUE(&path->queue, mb);
549			if (mb == NULL)
550				break;
551			_IF_ENQUEUE(&mbqueue, mb);
552		}
553
554#ifdef CONFIG_INFINIBAND_IPOIB_CM
555		if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path))
556			ipoib_cm_set(path, ipoib_cm_create_tx(priv, path));
557#endif
558
559		path->valid = 1;
560	}
561
562	path->query = NULL;
563	complete(&path->done);
564
565	spin_unlock_irqrestore(&priv->lock, flags);
566
567	if (old_ah)
568		ipoib_put_ah(old_ah);
569
570	NET_EPOCH_ENTER(et);
571	for (;;) {
572		_IF_DEQUEUE(&mbqueue, mb);
573		if (mb == NULL)
574			break;
575		mb->m_pkthdr.rcvif = dev;
576		if (if_transmit(dev, mb))
577			ipoib_warn(priv, "dev_queue_xmit failed "
578				   "to requeue packet\n");
579	}
580	NET_EPOCH_EXIT(et);
581}
582
583static struct ipoib_path *
584path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr)
585{
586	struct ipoib_path *path;
587
588	if (!priv->broadcast)
589		return NULL;
590
591	path = kzalloc(sizeof *path, GFP_ATOMIC);
592	if (!path)
593		return NULL;
594
595	path->priv = priv;
596
597	bzero(&path->queue, sizeof(path->queue));
598
599#ifdef CONFIG_INFINIBAND_IPOIB_CM
600	memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN);
601#endif
602	memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid));
603	path->pathrec.sgid	    = priv->local_gid;
604	path->pathrec.pkey	    = cpu_to_be16(priv->pkey);
605	path->pathrec.numb_path     = 1;
606	path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
607
608	return path;
609}
610
611static int
612path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path)
613{
614	if_t dev = priv->dev;
615
616	ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU;
617	struct ib_sa_path_rec p_rec;
618
619	p_rec = path->pathrec;
620	p_rec.mtu_selector = IB_SA_GT;
621
622	switch (roundup_pow_of_two(if_getmtu(dev) + IPOIB_ENCAP_LEN)) {
623	case 512:
624		p_rec.mtu = IB_MTU_256;
625		break;
626	case 1024:
627		p_rec.mtu = IB_MTU_512;
628		break;
629	case 2048:
630		p_rec.mtu = IB_MTU_1024;
631		break;
632	case 4096:
633		p_rec.mtu = IB_MTU_2048;
634		break;
635	default:
636		/* Wildcard everything */
637		comp_mask = 0;
638		p_rec.mtu = 0;
639		p_rec.mtu_selector = 0;
640	}
641
642	ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n",
643		  p_rec.dgid.raw, ":",
644		  comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0);
645
646	init_completion(&path->done);
647
648	path->query_id =
649		ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
650				   &p_rec, comp_mask		|
651				   IB_SA_PATH_REC_DGID		|
652				   IB_SA_PATH_REC_SGID		|
653				   IB_SA_PATH_REC_NUMB_PATH	|
654				   IB_SA_PATH_REC_TRAFFIC_CLASS |
655				   IB_SA_PATH_REC_PKEY,
656				   1000, GFP_ATOMIC,
657				   path_rec_completion,
658				   path, &path->query);
659	if (path->query_id < 0) {
660		ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
661		path->query = NULL;
662		complete(&path->done);
663		return path->query_id;
664	}
665
666	return 0;
667}
668
669static void
670ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh)
671{
672	struct ipoib_path *path;
673
674	path = __path_find(priv, eh->hwaddr + 4);
675	if (!path || !path->valid) {
676		int new_path = 0;
677
678		if (!path) {
679			path = path_rec_create(priv, eh->hwaddr);
680			new_path = 1;
681		}
682		if (path) {
683			if (_IF_QLEN(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE)
684				_IF_ENQUEUE(&path->queue, mb);
685			else {
686				if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
687				m_freem(mb);
688			}
689
690			if (!path->query && path_rec_start(priv, path)) {
691				if (new_path)
692					ipoib_path_free(priv, path);
693				return;
694			} else
695				__path_add(priv, path);
696		} else {
697			if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
698			m_freem(mb);
699		}
700
701		return;
702	}
703
704	if (ipoib_cm_get(path) && ipoib_cm_up(path)) {
705		ipoib_cm_send(priv, mb, ipoib_cm_get(path));
706	} else if (path->ah) {
707		ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr));
708	} else if ((path->query || !path_rec_start(priv, path)) &&
709		    path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) {
710		_IF_ENQUEUE(&path->queue, mb);
711	} else {
712		if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
713		m_freem(mb);
714	}
715}
716
717static int
718ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb)
719{
720	struct ipoib_header *eh;
721
722	eh = mtod(mb, struct ipoib_header *);
723	if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
724		/* Add in the P_Key for multicast*/
725		eh->hwaddr[8] = (priv->pkey >> 8) & 0xff;
726		eh->hwaddr[9] = priv->pkey & 0xff;
727
728		ipoib_mcast_send(priv, eh->hwaddr + 4, mb);
729	} else
730		ipoib_unicast_send(mb, priv, eh);
731
732	return 0;
733}
734
735void
736ipoib_start_locked(if_t dev, struct ipoib_dev_priv *priv)
737{
738	struct mbuf *mb;
739
740	assert_spin_locked(&priv->lock);
741
742	while (!if_sendq_empty(dev) &&
743	    (if_getdrvflags(dev) & IFF_DRV_OACTIVE) == 0) {
744		mb = if_dequeue(dev);
745		if (mb == NULL)
746			break;
747		infiniband_bpf_mtap(dev, mb);
748		ipoib_send_one(priv, mb);
749	}
750}
751
752static void
753_ipoib_start(if_t dev, struct ipoib_dev_priv *priv)
754{
755
756	if ((if_getdrvflags(dev) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
757	    IFF_DRV_RUNNING)
758		return;
759
760	spin_lock(&priv->lock);
761	ipoib_start_locked(dev, priv);
762	spin_unlock(&priv->lock);
763}
764
765static void
766ipoib_start(if_t dev)
767{
768	_ipoib_start(dev, if_getsoftc(dev));
769}
770
771static void
772ipoib_vlan_start(if_t dev)
773{
774	struct ipoib_dev_priv *priv;
775	struct mbuf *mb;
776
777	priv = VLAN_COOKIE(dev);
778	if (priv != NULL)
779		return _ipoib_start(dev, priv);
780	while (!if_sendq_empty(dev)) {
781		mb = if_dequeue(dev);
782		if (mb == NULL)
783			break;
784		m_freem(mb);
785		if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
786	}
787}
788
789int
790ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port)
791{
792
793	/* Allocate RX/TX "rings" to hold queued mbs */
794	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
795				GFP_KERNEL);
796	if (!priv->rx_ring) {
797		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
798		       ca->name, ipoib_recvq_size);
799		goto out;
800	}
801
802	priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL);
803	if (!priv->tx_ring) {
804		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
805		       ca->name, ipoib_sendq_size);
806		goto out_rx_ring_cleanup;
807	}
808	memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring);
809
810	/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
811
812	if (ipoib_ib_dev_init(priv, ca, port))
813		goto out_tx_ring_cleanup;
814
815	return 0;
816
817out_tx_ring_cleanup:
818	kfree(priv->tx_ring);
819
820out_rx_ring_cleanup:
821	kfree(priv->rx_ring);
822
823out:
824	return -ENOMEM;
825}
826
827static void
828ipoib_ifdetach(struct ipoib_dev_priv *priv)
829{
830	if_t dev;
831
832	dev = priv->dev;
833	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
834		priv->gone = 1;
835		infiniband_ifdetach(dev);
836	}
837}
838
839static void
840ipoib_detach(struct ipoib_dev_priv *priv)
841{
842	if_t dev;
843
844	dev = priv->dev;
845	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
846		if_free(dev);
847		free_unr(ipoib_unrhdr, priv->unit);
848	} else
849		VLAN_SETCOOKIE(priv->dev, NULL);
850
851	free(priv, M_TEMP);
852}
853
854void
855ipoib_dev_cleanup(struct ipoib_dev_priv *priv)
856{
857	struct ipoib_dev_priv *cpriv, *tcpriv;
858
859	/* Delete any child interfaces first */
860	list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
861		ipoib_ifdetach(cpriv);
862		ipoib_dev_cleanup(cpriv);
863		ipoib_detach(cpriv);
864	}
865
866	ipoib_ib_dev_cleanup(priv);
867
868	kfree(priv->rx_ring);
869	kfree(priv->tx_ring);
870
871	priv->rx_ring = NULL;
872	priv->tx_ring = NULL;
873}
874
875static struct ipoib_dev_priv *
876ipoib_priv_alloc(void)
877{
878	struct ipoib_dev_priv *priv;
879
880	priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK);
881	spin_lock_init(&priv->lock);
882	spin_lock_init(&priv->drain_lock);
883	mutex_init(&priv->vlan_mutex);
884	INIT_LIST_HEAD(&priv->path_list);
885	INIT_LIST_HEAD(&priv->child_intfs);
886	INIT_LIST_HEAD(&priv->dead_ahs);
887	INIT_LIST_HEAD(&priv->multicast_list);
888	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
889	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
890	INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
891	INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
892	INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
893	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
894	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
895	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
896	memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN);
897
898	return (priv);
899}
900
901struct ipoib_dev_priv *
902ipoib_intf_alloc(const char *name, struct ib_device *hca)
903{
904	struct ipoib_dev_priv *priv;
905	if_t dev;
906
907	priv = ipoib_priv_alloc();
908	dev = priv->dev = if_alloc(IFT_INFINIBAND);
909	if (!dev) {
910		free(priv, M_TEMP);
911		return NULL;
912	}
913	if_setsoftc(dev, priv);
914	priv->gone = 2; /* initializing */
915	priv->unit = alloc_unr(ipoib_unrhdr);
916	if (priv->unit == -1) {
917		if_free(dev);
918		free(priv, M_TEMP);
919		return NULL;
920	}
921	if_initname(dev, name, priv->unit);
922	if_setflags(dev, IFF_BROADCAST | IFF_MULTICAST);
923	if ((hca->attrs.device_cap_flags & IB_DEVICE_KNOWSEPOCH) == 0)
924		if_setflagbits(dev, IFF_NEEDSEPOCH, 0);
925
926	infiniband_ifattach(priv->dev, NULL, priv->broadcastaddr);
927
928	if_setinitfn(dev, ipoib_init);
929	if_setioctlfn(dev, ipoib_ioctl);
930	if_setstartfn(dev, ipoib_start);
931
932	if_setsendqlen(dev, ipoib_sendq_size * 2);
933
934	priv->dev = dev;
935	if_link_state_change(priv->dev, LINK_STATE_DOWN);
936
937	return if_getsoftc(dev);
938}
939
940int
941ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
942{
943	struct ib_device_attr *device_attr = &hca->attrs;
944
945	priv->hca_caps = device_attr->device_cap_flags;
946
947	if_sethwassist(priv->dev, 0);
948	if_setcapabilities(priv->dev, 0);
949
950#ifndef CONFIG_INFINIBAND_IPOIB_CM
951	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
952		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
953		if_sethwassist(priv->dev, CSUM_IP | CSUM_TCP | CSUM_UDP);
954		if_setcapabilities(priv->dev, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM);
955	}
956
957#if 0
958	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) {
959		priv->dev->if_capabilities |= IFCAP_TSO4;
960		priv->dev->if_hwassist |= CSUM_TSO;
961	}
962#endif
963#endif
964	if_setcapabilitiesbit(priv->dev,
965	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE, 0);
966	if_setcapenable(priv->dev, if_getcapabilities(priv->dev));
967
968	return 0;
969}
970
971
972static if_t
973ipoib_add_port(const char *format, struct ib_device *hca, u8 port)
974{
975	struct ipoib_dev_priv *priv;
976	struct ib_port_attr attr;
977	int result = -ENOMEM;
978
979	priv = ipoib_intf_alloc(format, hca);
980	if (!priv)
981		goto alloc_mem_failed;
982
983	if (!ib_query_port(hca, port, &attr))
984		priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
985	else {
986		printk(KERN_WARNING "%s: ib_query_port %d failed\n",
987		       hca->name, port);
988		goto device_init_failed;
989	}
990
991	/* MTU will be reset when mcast join happens */
992	if_setmtu(priv->dev, IPOIB_UD_MTU(priv->max_ib_mtu));
993	priv->mcast_mtu = priv->admin_mtu = if_getmtu(priv->dev);
994
995	result = ib_query_pkey(hca, port, 0, &priv->pkey);
996	if (result) {
997		printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
998		       hca->name, port, result);
999		goto device_init_failed;
1000	}
1001
1002	if (ipoib_set_dev_features(priv, hca))
1003		goto device_init_failed;
1004
1005	/*
1006	 * Set the full membership bit, so that we join the right
1007	 * broadcast group, etc.
1008	 */
1009	priv->pkey |= 0x8000;
1010
1011	priv->broadcastaddr[8] = priv->pkey >> 8;
1012	priv->broadcastaddr[9] = priv->pkey & 0xff;
1013
1014	result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
1015	if (result) {
1016		printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
1017		       hca->name, port, result);
1018		goto device_init_failed;
1019	}
1020	memcpy(if_getlladdr(priv->dev) + 4, priv->local_gid.raw, sizeof(union ib_gid));
1021
1022	result = ipoib_dev_init(priv, hca, port);
1023	if (result < 0) {
1024		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
1025		       hca->name, port, result);
1026		goto device_init_failed;
1027	}
1028	if (ipoib_cm_admin_enabled(priv))
1029		if_setmtu(priv->dev, IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)));
1030
1031	INIT_IB_EVENT_HANDLER(&priv->event_handler,
1032			      priv->ca, ipoib_event);
1033	result = ib_register_event_handler(&priv->event_handler);
1034	if (result < 0) {
1035		printk(KERN_WARNING "%s: ib_register_event_handler failed for "
1036		       "port %d (ret = %d)\n",
1037		       hca->name, port, result);
1038		goto event_failed;
1039	}
1040	if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port);
1041
1042	priv->gone = 0;	/* ready */
1043
1044	return priv->dev;
1045
1046event_failed:
1047	ipoib_dev_cleanup(priv);
1048
1049device_init_failed:
1050	ipoib_ifdetach(priv);
1051	ipoib_detach(priv);
1052
1053alloc_mem_failed:
1054	return ERR_PTR(result);
1055}
1056
1057static void
1058ipoib_add_one(struct ib_device *device)
1059{
1060	struct list_head *dev_list;
1061	if_t dev;
1062	struct ipoib_dev_priv *priv;
1063	int s, e, p;
1064
1065	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1066		return;
1067
1068	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
1069	if (!dev_list)
1070		return;
1071
1072	INIT_LIST_HEAD(dev_list);
1073
1074	if (device->node_type == RDMA_NODE_IB_SWITCH) {
1075		s = 0;
1076		e = 0;
1077	} else {
1078		s = 1;
1079		e = device->phys_port_cnt;
1080	}
1081
1082	for (p = s; p <= e; ++p) {
1083		if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND)
1084			continue;
1085		dev = ipoib_add_port("ib", device, p);
1086		if (!IS_ERR(dev)) {
1087			priv = if_getsoftc(dev);
1088			list_add_tail(&priv->list, dev_list);
1089		}
1090	}
1091
1092	ib_set_client_data(device, &ipoib_client, dev_list);
1093}
1094
1095static void
1096ipoib_remove_one(struct ib_device *device, void *client_data)
1097{
1098	struct ipoib_dev_priv *priv, *tmp;
1099	struct list_head *dev_list = client_data;
1100
1101	if (!dev_list)
1102		return;
1103
1104	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1105		return;
1106
1107	list_for_each_entry_safe(priv, tmp, dev_list, list) {
1108		if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND)
1109			continue;
1110
1111		ipoib_ifdetach(priv);
1112		ipoib_stop(priv);
1113
1114		ib_unregister_event_handler(&priv->event_handler);
1115
1116		flush_workqueue(ipoib_workqueue);
1117
1118		ipoib_dev_cleanup(priv);
1119		ipoib_detach(priv);
1120	}
1121
1122	kfree(dev_list);
1123}
1124
1125static u_int
1126ipoib_match_dev_addr_cb(void *arg, struct ifaddr *ifa, u_int count)
1127{
1128	struct sockaddr *addr = arg;
1129
1130	/* If a match is already found, skip this. */
1131	if (count > 0)
1132		return (0);
1133
1134	if (ifa->ifa_addr->sa_len != addr->sa_len)
1135		return (0);
1136
1137	if (memcmp(ifa->ifa_addr, addr, addr->sa_len) == 0)
1138		return (1);
1139
1140	return (0);
1141}
1142
1143static int
1144ipoib_match_dev_addr(const struct sockaddr *addr, if_t dev)
1145{
1146	struct epoch_tracker et;
1147	int retval = 0;
1148
1149	NET_EPOCH_ENTER(et);
1150	retval = if_foreach_addr_type(dev, addr->sa_family,
1151	    ipoib_match_dev_addr_cb, __DECONST(void *, addr));
1152	NET_EPOCH_EXIT(et);
1153
1154	return (retval);
1155}
1156
1157/*
1158 * ipoib_match_gid_pkey_addr - returns the number of IPoIB netdevs on
1159 * top a given ipoib device matching a pkey_index and address, if one
1160 * exists.
1161 *
1162 * @found_net_dev: contains a matching net_device if the return value
1163 * >= 1, with a reference held.
1164 */
1165static int
1166ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
1167    const union ib_gid *gid, u16 pkey_index, const struct sockaddr *addr,
1168    if_t *found_net_dev)
1169{
1170	struct ipoib_dev_priv *child_priv;
1171	int matches = 0;
1172
1173	if (priv->pkey_index == pkey_index &&
1174	    (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) {
1175		if (addr == NULL || ipoib_match_dev_addr(addr, priv->dev) != 0) {
1176			if (*found_net_dev == NULL) {
1177				if_t net_dev;
1178
1179				if (priv->parent != NULL)
1180					net_dev = priv->parent;
1181				else
1182					net_dev = priv->dev;
1183				*found_net_dev = net_dev;
1184				dev_hold(net_dev);
1185			}
1186			matches++;
1187		}
1188	}
1189
1190	/* Check child interfaces */
1191	mutex_lock(&priv->vlan_mutex);
1192	list_for_each_entry(child_priv, &priv->child_intfs, list) {
1193		matches += ipoib_match_gid_pkey_addr(child_priv, gid,
1194		    pkey_index, addr, found_net_dev);
1195		if (matches > 1)
1196			break;
1197	}
1198	mutex_unlock(&priv->vlan_mutex);
1199
1200	return matches;
1201}
1202
1203/*
1204 * __ipoib_get_net_dev_by_params - returns the number of matching
1205 * net_devs found (between 0 and 2). Also return the matching
1206 * net_device in the @net_dev parameter, holding a reference to the
1207 * net_device, if the number of matches >= 1
1208 */
1209static int
1210__ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
1211    u16 pkey_index, const union ib_gid *gid,
1212    const struct sockaddr *addr, if_t *net_dev)
1213{
1214	struct ipoib_dev_priv *priv;
1215	int matches = 0;
1216
1217	*net_dev = NULL;
1218
1219	list_for_each_entry(priv, dev_list, list) {
1220		if (priv->port != port)
1221			continue;
1222
1223		matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index,
1224		    addr, net_dev);
1225
1226		if (matches > 1)
1227			break;
1228	}
1229
1230	return matches;
1231}
1232
1233static if_t
1234ipoib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey,
1235    const union ib_gid *gid, const struct sockaddr *addr, void *client_data)
1236{
1237	if_t net_dev;
1238	struct list_head *dev_list = client_data;
1239	u16 pkey_index;
1240	int matches;
1241	int ret;
1242
1243	if (!rdma_protocol_ib(dev, port))
1244		return NULL;
1245
1246	ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index);
1247	if (ret)
1248		return NULL;
1249
1250	if (!dev_list)
1251		return NULL;
1252
1253	/* See if we can find a unique device matching the L2 parameters */
1254	matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
1255						gid, NULL, &net_dev);
1256
1257	switch (matches) {
1258	case 0:
1259		return NULL;
1260	case 1:
1261		return net_dev;
1262	}
1263
1264	dev_put(net_dev);
1265
1266	/* Couldn't find a unique device with L2 parameters only. Use L3
1267	 * address to uniquely match the net device */
1268	matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
1269						gid, addr, &net_dev);
1270	switch (matches) {
1271	case 0:
1272		return NULL;
1273	default:
1274		dev_warn_ratelimited(&dev->dev,
1275				     "duplicate IP address detected\n");
1276		/* Fall through */
1277	case 1:
1278		return net_dev;
1279	}
1280}
1281
1282static void
1283ipoib_config_vlan(void *arg, if_t ifp, uint16_t vtag)
1284{
1285	struct ipoib_dev_priv *parent;
1286	struct ipoib_dev_priv *priv;
1287	struct epoch_tracker et;
1288	if_t dev;
1289	uint16_t pkey;
1290	int error;
1291
1292	if (if_gettype(ifp) != IFT_INFINIBAND)
1293		return;
1294	NET_EPOCH_ENTER(et);
1295	dev = VLAN_DEVAT(ifp, vtag);
1296	NET_EPOCH_EXIT(et);
1297	if (dev == NULL)
1298		return;
1299	priv = NULL;
1300	error = 0;
1301	parent = if_getsoftc(ifp);
1302	/* We only support 15 bits of pkey. */
1303	if (vtag & 0x8000)
1304		return;
1305	pkey = vtag | 0x8000;	/* Set full membership bit. */
1306	if (pkey == parent->pkey)
1307		return;
1308	/* Check for dups */
1309	mutex_lock(&parent->vlan_mutex);
1310	list_for_each_entry(priv, &parent->child_intfs, list) {
1311		if (priv->pkey == pkey) {
1312			priv = NULL;
1313			error = EBUSY;
1314			goto out;
1315		}
1316	}
1317	priv = ipoib_priv_alloc();
1318	priv->dev = dev;
1319	priv->max_ib_mtu = parent->max_ib_mtu;
1320	priv->mcast_mtu = priv->admin_mtu = if_getmtu(parent->dev);
1321	set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
1322	error = ipoib_set_dev_features(priv, parent->ca);
1323	if (error)
1324		goto out;
1325	priv->pkey = pkey;
1326	priv->broadcastaddr[8] = pkey >> 8;
1327	priv->broadcastaddr[9] = pkey & 0xff;
1328	if_setbroadcastaddr(dev, priv->broadcastaddr);
1329	error = ipoib_dev_init(priv, parent->ca, parent->port);
1330	if (error)
1331		goto out;
1332	priv->parent = parent->dev;
1333	list_add_tail(&priv->list, &parent->child_intfs);
1334	VLAN_SETCOOKIE(dev, priv);
1335	if_setstartfn(dev, ipoib_vlan_start);
1336	if_setdrvflagbits(dev, 0, IFF_DRV_RUNNING);
1337	if_setifheaderlen(dev, IPOIB_HEADER_LEN);
1338	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1339		ipoib_open(priv);
1340	mutex_unlock(&parent->vlan_mutex);
1341	return;
1342out:
1343	mutex_unlock(&parent->vlan_mutex);
1344	if (priv)
1345		free(priv, M_TEMP);
1346	if (error)
1347		ipoib_warn(parent,
1348		    "failed to initialize subinterface: device %s, port %d vtag 0x%X",
1349		    parent->ca->name, parent->port, vtag);
1350	return;
1351}
1352
1353static void
1354ipoib_unconfig_vlan(void *arg, if_t ifp, uint16_t vtag)
1355{
1356	struct ipoib_dev_priv *parent;
1357	struct ipoib_dev_priv *priv;
1358	struct epoch_tracker et;
1359	if_t dev;
1360	uint16_t pkey;
1361
1362	if (if_gettype(ifp) != IFT_INFINIBAND)
1363		return;
1364
1365	NET_EPOCH_ENTER(et);
1366	dev = VLAN_DEVAT(ifp, vtag);
1367	NET_EPOCH_EXIT(et);
1368	if (dev)
1369		VLAN_SETCOOKIE(dev, NULL);
1370	pkey = vtag | 0x8000;
1371	parent = if_getsoftc(ifp);
1372	mutex_lock(&parent->vlan_mutex);
1373	list_for_each_entry(priv, &parent->child_intfs, list) {
1374		if (priv->pkey == pkey) {
1375			ipoib_dev_cleanup(priv);
1376			list_del(&priv->list);
1377			break;
1378		}
1379	}
1380	mutex_unlock(&parent->vlan_mutex);
1381}
1382
1383eventhandler_tag ipoib_vlan_attach;
1384eventhandler_tag ipoib_vlan_detach;
1385
1386static int __init
1387ipoib_init_module(void)
1388{
1389	int ret;
1390
1391	ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
1392	ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
1393	ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
1394
1395	ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
1396	ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
1397	ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
1398						     IPOIB_MIN_QUEUE_SIZE));
1399#ifdef CONFIG_INFINIBAND_IPOIB_CM
1400	ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1401#endif
1402
1403	ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1404		ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST);
1405	ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1406		ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST);
1407
1408	/*
1409	 * We create our own workqueue mainly because we want to be
1410	 * able to flush it when devices are being removed.  We can't
1411	 * use schedule_work()/flush_scheduled_work() because both
1412	 * unregister_netdev() and linkwatch_event take the rtnl lock,
1413	 * so flush_scheduled_work() can deadlock during device
1414	 * removal.
1415	 */
1416	ipoib_workqueue = create_singlethread_workqueue("ipoib");
1417	if (!ipoib_workqueue) {
1418		ret = -ENOMEM;
1419		goto err_fs;
1420	}
1421
1422	ib_sa_register_client(&ipoib_sa_client);
1423
1424	ret = ib_register_client(&ipoib_client);
1425	if (ret)
1426		goto err_sa;
1427
1428	return 0;
1429
1430err_sa:
1431	ib_sa_unregister_client(&ipoib_sa_client);
1432	destroy_workqueue(ipoib_workqueue);
1433
1434err_fs:
1435	return ret;
1436}
1437
1438static void __exit
1439ipoib_cleanup_module(void)
1440{
1441
1442	EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach);
1443	EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach);
1444	ib_unregister_client(&ipoib_client);
1445	ib_sa_unregister_client(&ipoib_sa_client);
1446	destroy_workqueue(ipoib_workqueue);
1447}
1448module_init_order(ipoib_init_module, SI_ORDER_FIFTH);
1449module_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH);
1450
1451static int
1452ipoib_evhand(module_t mod, int event, void *arg)
1453{
1454	return (0);
1455}
1456
1457static moduledata_t ipoib_mod = {
1458	.name = "ipoib",
1459	.evhand = ipoib_evhand,
1460};
1461
1462DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY);
1463MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
1464MODULE_DEPEND(ipoib, if_infiniband, 1, 1, 1);
1465MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1);
1466