1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40#include "ipoib.h"
41
42#include <linux/module.h>
43
44#include <linux/slab.h>
45#include <linux/kernel.h>
46#include <linux/vmalloc.h>
47
48#include <linux/if_vlan.h>
49
50#include <net/infiniband.h>
51
52#include <rdma/ib_cache.h>
53
54MODULE_AUTHOR("Roland Dreier");
55MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
56MODULE_LICENSE("Dual BSD/GPL");
57
58int ipoib_sendq_size = IPOIB_TX_RING_SIZE;
59int ipoib_recvq_size = IPOIB_RX_RING_SIZE;
60
61module_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
62MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
63module_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
64MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
65
66#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
67int ipoib_debug_level = 1;
68
69module_param_named(debug_level, ipoib_debug_level, int, 0644);
70MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
71#endif
72
73struct ipoib_path_iter {
74	struct ipoib_dev_priv *priv;
75	struct ipoib_path  path;
76};
77
78static const u8 ipv4_bcast_addr[] = {
79	0x00, 0xff, 0xff, 0xff,
80	0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
81	0x00, 0x00, 0x00, 0x00,	0xff, 0xff, 0xff, 0xff
82};
83
84struct workqueue_struct *ipoib_workqueue;
85
86struct ib_sa_client ipoib_sa_client;
87
88static void ipoib_add_one(struct ib_device *device);
89static void ipoib_remove_one(struct ib_device *device, void *client_data);
90static struct net_device *ipoib_get_net_dev_by_params(
91		struct ib_device *dev, u8 port, u16 pkey,
92		const union ib_gid *gid, const struct sockaddr *addr,
93		void *client_data);
94static void ipoib_start(struct ifnet *dev);
95static int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
96
97static struct unrhdr *ipoib_unrhdr;
98
99static void
100ipoib_unrhdr_init(void *arg)
101{
102
103	ipoib_unrhdr = new_unrhdr(0, 65535, NULL);
104}
105SYSINIT(ipoib_unrhdr_init, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_init, NULL);
106
107static void
108ipoib_unrhdr_uninit(void *arg)
109{
110
111	if (ipoib_unrhdr != NULL) {
112		struct unrhdr *hdr;
113
114		hdr = ipoib_unrhdr;
115		ipoib_unrhdr = NULL;
116
117		delete_unrhdr(hdr);
118	}
119}
120SYSUNINIT(ipoib_unrhdr_uninit, SI_SUB_KLD - 1, SI_ORDER_ANY, ipoib_unrhdr_uninit, NULL);
121
122static struct ib_client ipoib_client = {
123	.name   = "ipoib",
124	.add    = ipoib_add_one,
125	.remove = ipoib_remove_one,
126	.get_net_dev_by_params = ipoib_get_net_dev_by_params,
127};
128
129int
130ipoib_open(struct ipoib_dev_priv *priv)
131{
132	struct ifnet *dev = priv->dev;
133
134	ipoib_dbg(priv, "bringing up interface\n");
135
136	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
137
138	if (ipoib_pkey_dev_delay_open(priv))
139		return 0;
140
141	if (ipoib_ib_dev_open(priv))
142		goto err_disable;
143
144	if (ipoib_ib_dev_up(priv))
145		goto err_stop;
146
147	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
148		struct ipoib_dev_priv *cpriv;
149
150		/* Bring up any child interfaces too */
151		mutex_lock(&priv->vlan_mutex);
152		list_for_each_entry(cpriv, &priv->child_intfs, list)
153			if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
154				ipoib_open(cpriv);
155		mutex_unlock(&priv->vlan_mutex);
156	}
157	dev->if_drv_flags |= IFF_DRV_RUNNING;
158	dev->if_drv_flags &= ~IFF_DRV_OACTIVE;
159
160	return 0;
161
162err_stop:
163	ipoib_ib_dev_stop(priv, 1);
164
165err_disable:
166	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
167
168	return -EINVAL;
169}
170
171static void
172ipoib_init(void *arg)
173{
174	struct ifnet *dev;
175	struct ipoib_dev_priv *priv;
176
177	priv = arg;
178	dev = priv->dev;
179	if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
180		ipoib_open(priv);
181	queue_work(ipoib_workqueue, &priv->flush_light);
182}
183
184
185static int
186ipoib_stop(struct ipoib_dev_priv *priv)
187{
188	struct ifnet *dev = priv->dev;
189
190	ipoib_dbg(priv, "stopping interface\n");
191
192	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
193
194	dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
195
196	ipoib_ib_dev_down(priv, 0);
197	ipoib_ib_dev_stop(priv, 0);
198
199	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
200		struct ipoib_dev_priv *cpriv;
201
202		/* Bring down any child interfaces too */
203		mutex_lock(&priv->vlan_mutex);
204		list_for_each_entry(cpriv, &priv->child_intfs, list)
205			if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0)
206				ipoib_stop(cpriv);
207		mutex_unlock(&priv->vlan_mutex);
208	}
209
210	return 0;
211}
212
213static int
214ipoib_propagate_ifnet_mtu(struct ipoib_dev_priv *priv, int new_mtu,
215    bool propagate)
216{
217	struct ifnet *ifp;
218	struct ifreq ifr;
219	int error;
220
221	ifp = priv->dev;
222	if (ifp->if_mtu == new_mtu)
223		return (0);
224	if (propagate) {
225		strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ);
226		ifr.ifr_mtu = new_mtu;
227		CURVNET_SET(ifp->if_vnet);
228		error = ifhwioctl(SIOCSIFMTU, ifp, (caddr_t)&ifr, curthread);
229		CURVNET_RESTORE();
230	} else {
231		ifp->if_mtu = new_mtu;
232		error = 0;
233	}
234	return (error);
235}
236
237int
238ipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu, bool propagate)
239{
240	int error, prev_admin_mtu;
241
242	/* dev->if_mtu > 2K ==> connected mode */
243	if (ipoib_cm_admin_enabled(priv)) {
244		if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)))
245			return -EINVAL;
246
247		if (new_mtu > priv->mcast_mtu)
248			ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
249				   priv->mcast_mtu);
250
251		return (ipoib_propagate_ifnet_mtu(priv, new_mtu, propagate));
252	}
253
254	if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
255		return -EINVAL;
256
257	prev_admin_mtu = priv->admin_mtu;
258	priv->admin_mtu = new_mtu;
259	error = ipoib_propagate_ifnet_mtu(priv, min(priv->mcast_mtu,
260	    priv->admin_mtu), propagate);
261	if (error == 0) {
262		/* check for MTU change to avoid infinite loop */
263		if (prev_admin_mtu != new_mtu)
264			queue_work(ipoib_workqueue, &priv->flush_light);
265	} else
266		priv->admin_mtu = prev_admin_mtu;
267	return (error);
268}
269
270static int
271ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
272{
273	struct ipoib_dev_priv *priv = ifp->if_softc;
274	struct ifaddr *ifa = (struct ifaddr *) data;
275	struct ifreq *ifr = (struct ifreq *) data;
276	int error = 0;
277
278	/* check if detaching */
279	if (priv == NULL || priv->gone != 0)
280		return (ENXIO);
281
282	switch (command) {
283	case SIOCSIFFLAGS:
284		if (ifp->if_flags & IFF_UP) {
285			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
286				error = -ipoib_open(priv);
287		} else
288			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
289				ipoib_stop(priv);
290		break;
291	case SIOCADDMULTI:
292	case SIOCDELMULTI:
293		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
294			queue_work(ipoib_workqueue, &priv->restart_task);
295		break;
296	case SIOCSIFADDR:
297		ifp->if_flags |= IFF_UP;
298
299		switch (ifa->ifa_addr->sa_family) {
300#ifdef INET
301		case AF_INET:
302			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
303			arp_ifinit(ifp, ifa);
304			break;
305#endif
306		default:
307			ifp->if_init(ifp->if_softc);
308			break;
309		}
310		break;
311
312	case SIOCGIFADDR:
313			bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
314                            INFINIBAND_ALEN);
315		break;
316
317	case SIOCSIFMTU:
318		/*
319		 * Set the interface MTU.
320		 */
321		error = -ipoib_change_mtu(priv, ifr->ifr_mtu, false);
322		break;
323	default:
324		error = EINVAL;
325		break;
326	}
327	return (error);
328}
329
330
331static struct ipoib_path *
332__path_find(struct ipoib_dev_priv *priv, void *gid)
333{
334	struct rb_node *n = priv->path_tree.rb_node;
335	struct ipoib_path *path;
336	int ret;
337
338	while (n) {
339		path = rb_entry(n, struct ipoib_path, rb_node);
340
341		ret = memcmp(gid, path->pathrec.dgid.raw,
342			     sizeof (union ib_gid));
343
344		if (ret < 0)
345			n = n->rb_left;
346		else if (ret > 0)
347			n = n->rb_right;
348		else
349			return path;
350	}
351
352	return NULL;
353}
354
355static int
356__path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path)
357{
358	struct rb_node **n = &priv->path_tree.rb_node;
359	struct rb_node *pn = NULL;
360	struct ipoib_path *tpath;
361	int ret;
362
363	while (*n) {
364		pn = *n;
365		tpath = rb_entry(pn, struct ipoib_path, rb_node);
366
367		ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
368			     sizeof (union ib_gid));
369		if (ret < 0)
370			n = &pn->rb_left;
371		else if (ret > 0)
372			n = &pn->rb_right;
373		else
374			return -EEXIST;
375	}
376
377	rb_link_node(&path->rb_node, pn, n);
378	rb_insert_color(&path->rb_node, &priv->path_tree);
379
380	list_add_tail(&path->list, &priv->path_list);
381
382	return 0;
383}
384
385void
386ipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path)
387{
388
389	_IF_DRAIN(&path->queue);
390
391	if (path->ah)
392		ipoib_put_ah(path->ah);
393	if (ipoib_cm_get(path))
394		ipoib_cm_destroy_tx(ipoib_cm_get(path));
395
396	kfree(path);
397}
398
399#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
400
401struct ipoib_path_iter *
402ipoib_path_iter_init(struct ipoib_dev_priv *priv)
403{
404	struct ipoib_path_iter *iter;
405
406	iter = kmalloc(sizeof *iter, GFP_KERNEL);
407	if (!iter)
408		return NULL;
409
410	iter->priv = priv;
411	memset(iter->path.pathrec.dgid.raw, 0, 16);
412
413	if (ipoib_path_iter_next(iter)) {
414		kfree(iter);
415		return NULL;
416	}
417
418	return iter;
419}
420
421int
422ipoib_path_iter_next(struct ipoib_path_iter *iter)
423{
424	struct ipoib_dev_priv *priv = iter->priv;
425	struct rb_node *n;
426	struct ipoib_path *path;
427	int ret = 1;
428
429	spin_lock_irq(&priv->lock);
430
431	n = rb_first(&priv->path_tree);
432
433	while (n) {
434		path = rb_entry(n, struct ipoib_path, rb_node);
435
436		if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
437			   sizeof (union ib_gid)) < 0) {
438			iter->path = *path;
439			ret = 0;
440			break;
441		}
442
443		n = rb_next(n);
444	}
445
446	spin_unlock_irq(&priv->lock);
447
448	return ret;
449}
450
451void
452ipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path)
453{
454	*path = iter->path;
455}
456
457#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
458
459void
460ipoib_mark_paths_invalid(struct ipoib_dev_priv *priv)
461{
462	struct ipoib_path *path, *tp;
463
464	spin_lock_irq(&priv->lock);
465
466	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
467		ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n",
468			be16_to_cpu(path->pathrec.dlid),
469			path->pathrec.dgid.raw, ":");
470		path->valid =  0;
471	}
472
473	spin_unlock_irq(&priv->lock);
474}
475
476void
477ipoib_flush_paths(struct ipoib_dev_priv *priv)
478{
479	struct ipoib_path *path, *tp;
480	LIST_HEAD(remove_list);
481	unsigned long flags;
482
483	spin_lock_irqsave(&priv->lock, flags);
484
485	list_splice_init(&priv->path_list, &remove_list);
486
487	list_for_each_entry(path, &remove_list, list)
488		rb_erase(&path->rb_node, &priv->path_tree);
489
490	list_for_each_entry_safe(path, tp, &remove_list, list) {
491		if (path->query)
492			ib_sa_cancel_query(path->query_id, path->query);
493		spin_unlock_irqrestore(&priv->lock, flags);
494		wait_for_completion(&path->done);
495		ipoib_path_free(priv, path);
496		spin_lock_irqsave(&priv->lock, flags);
497	}
498
499	spin_unlock_irqrestore(&priv->lock, flags);
500}
501
502static void
503path_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr)
504{
505	struct ipoib_path *path = path_ptr;
506	struct ipoib_dev_priv *priv = path->priv;
507	struct ifnet *dev = priv->dev;
508	struct ipoib_ah *ah = NULL;
509	struct ipoib_ah *old_ah = NULL;
510	struct ifqueue mbqueue;
511	struct mbuf *mb;
512	unsigned long flags;
513
514	if (!status)
515		ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n",
516			  be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":");
517	else
518		ipoib_dbg(priv, "PathRec status %d for GID %16D\n",
519			  status, path->pathrec.dgid.raw, ":");
520
521	bzero(&mbqueue, sizeof(mbqueue));
522
523	if (!status) {
524		struct ib_ah_attr av;
525
526		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
527			ah = ipoib_create_ah(priv, priv->pd, &av);
528	}
529
530	spin_lock_irqsave(&priv->lock, flags);
531
532	if (ah) {
533		path->pathrec = *pathrec;
534
535		old_ah   = path->ah;
536		path->ah = ah;
537
538		ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
539			  ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
540
541		for (;;) {
542			_IF_DEQUEUE(&path->queue, mb);
543			if (mb == NULL)
544				break;
545			_IF_ENQUEUE(&mbqueue, mb);
546		}
547
548#ifdef CONFIG_INFINIBAND_IPOIB_CM
549		if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path))
550			ipoib_cm_set(path, ipoib_cm_create_tx(priv, path));
551#endif
552
553		path->valid = 1;
554	}
555
556	path->query = NULL;
557	complete(&path->done);
558
559	spin_unlock_irqrestore(&priv->lock, flags);
560
561	if (old_ah)
562		ipoib_put_ah(old_ah);
563
564	for (;;) {
565		_IF_DEQUEUE(&mbqueue, mb);
566		if (mb == NULL)
567			break;
568		mb->m_pkthdr.rcvif = dev;
569		if (dev->if_transmit(dev, mb))
570			ipoib_warn(priv, "dev_queue_xmit failed "
571				   "to requeue packet\n");
572	}
573}
574
575static struct ipoib_path *
576path_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr)
577{
578	struct ipoib_path *path;
579
580	if (!priv->broadcast)
581		return NULL;
582
583	path = kzalloc(sizeof *path, GFP_ATOMIC);
584	if (!path)
585		return NULL;
586
587	path->priv = priv;
588
589	bzero(&path->queue, sizeof(path->queue));
590
591#ifdef CONFIG_INFINIBAND_IPOIB_CM
592	memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN);
593#endif
594	memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid));
595	path->pathrec.sgid	    = priv->local_gid;
596	path->pathrec.pkey	    = cpu_to_be16(priv->pkey);
597	path->pathrec.numb_path     = 1;
598	path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
599
600	return path;
601}
602
603static int
604path_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path)
605{
606	struct ifnet *dev = priv->dev;
607
608	ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU;
609	struct ib_sa_path_rec p_rec;
610
611	p_rec = path->pathrec;
612	p_rec.mtu_selector = IB_SA_GT;
613
614	switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) {
615	case 512:
616		p_rec.mtu = IB_MTU_256;
617		break;
618	case 1024:
619		p_rec.mtu = IB_MTU_512;
620		break;
621	case 2048:
622		p_rec.mtu = IB_MTU_1024;
623		break;
624	case 4096:
625		p_rec.mtu = IB_MTU_2048;
626		break;
627	default:
628		/* Wildcard everything */
629		comp_mask = 0;
630		p_rec.mtu = 0;
631		p_rec.mtu_selector = 0;
632	}
633
634	ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n",
635		  p_rec.dgid.raw, ":",
636		  comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0);
637
638	init_completion(&path->done);
639
640	path->query_id =
641		ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
642				   &p_rec, comp_mask		|
643				   IB_SA_PATH_REC_DGID		|
644				   IB_SA_PATH_REC_SGID		|
645				   IB_SA_PATH_REC_NUMB_PATH	|
646				   IB_SA_PATH_REC_TRAFFIC_CLASS |
647				   IB_SA_PATH_REC_PKEY,
648				   1000, GFP_ATOMIC,
649				   path_rec_completion,
650				   path, &path->query);
651	if (path->query_id < 0) {
652		ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
653		path->query = NULL;
654		complete(&path->done);
655		return path->query_id;
656	}
657
658	return 0;
659}
660
661static void
662ipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh)
663{
664	struct ipoib_path *path;
665
666	path = __path_find(priv, eh->hwaddr + 4);
667	if (!path || !path->valid) {
668		int new_path = 0;
669
670		if (!path) {
671			path = path_rec_create(priv, eh->hwaddr);
672			new_path = 1;
673		}
674		if (path) {
675			if (_IF_QLEN(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE)
676				_IF_ENQUEUE(&path->queue, mb);
677			else {
678				if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
679				m_freem(mb);
680			}
681
682			if (!path->query && path_rec_start(priv, path)) {
683				if (new_path)
684					ipoib_path_free(priv, path);
685				return;
686			} else
687				__path_add(priv, path);
688		} else {
689			if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
690			m_freem(mb);
691		}
692
693		return;
694	}
695
696	if (ipoib_cm_get(path) && ipoib_cm_up(path)) {
697		ipoib_cm_send(priv, mb, ipoib_cm_get(path));
698	} else if (path->ah) {
699		ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr));
700	} else if ((path->query || !path_rec_start(priv, path)) &&
701		    path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) {
702		_IF_ENQUEUE(&path->queue, mb);
703	} else {
704		if_inc_counter(priv->dev, IFCOUNTER_OERRORS, 1);
705		m_freem(mb);
706	}
707}
708
709static int
710ipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb)
711{
712	struct ipoib_header *eh;
713
714	eh = mtod(mb, struct ipoib_header *);
715	if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
716		/* Add in the P_Key for multicast*/
717		eh->hwaddr[8] = (priv->pkey >> 8) & 0xff;
718		eh->hwaddr[9] = priv->pkey & 0xff;
719
720		ipoib_mcast_send(priv, eh->hwaddr + 4, mb);
721	} else
722		ipoib_unicast_send(mb, priv, eh);
723
724	return 0;
725}
726
727void
728ipoib_start_locked(struct ifnet *dev, struct ipoib_dev_priv *priv)
729{
730	struct mbuf *mb;
731
732	assert_spin_locked(&priv->lock);
733
734	while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) &&
735	    (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
736		IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
737		if (mb == NULL)
738			break;
739		INFINIBAND_BPF_MTAP(dev, mb);
740		ipoib_send_one(priv, mb);
741	}
742}
743
744static void
745_ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv)
746{
747
748	if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
749	    IFF_DRV_RUNNING)
750		return;
751
752	spin_lock(&priv->lock);
753	ipoib_start_locked(dev, priv);
754	spin_unlock(&priv->lock);
755}
756
757static void
758ipoib_start(struct ifnet *dev)
759{
760	_ipoib_start(dev, dev->if_softc);
761}
762
763static void
764ipoib_vlan_start(struct ifnet *dev)
765{
766	struct ipoib_dev_priv *priv;
767	struct mbuf *mb;
768
769	priv = VLAN_COOKIE(dev);
770	if (priv != NULL)
771		return _ipoib_start(dev, priv);
772	while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) {
773		IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
774		if (mb == NULL)
775			break;
776		m_freem(mb);
777		if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
778	}
779}
780
781int
782ipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port)
783{
784
785	/* Allocate RX/TX "rings" to hold queued mbs */
786	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
787				GFP_KERNEL);
788	if (!priv->rx_ring) {
789		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
790		       ca->name, ipoib_recvq_size);
791		goto out;
792	}
793
794	priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL);
795	if (!priv->tx_ring) {
796		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
797		       ca->name, ipoib_sendq_size);
798		goto out_rx_ring_cleanup;
799	}
800	memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring);
801
802	/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
803
804	if (ipoib_ib_dev_init(priv, ca, port))
805		goto out_tx_ring_cleanup;
806
807	return 0;
808
809out_tx_ring_cleanup:
810	kfree(priv->tx_ring);
811
812out_rx_ring_cleanup:
813	kfree(priv->rx_ring);
814
815out:
816	return -ENOMEM;
817}
818
819static void
820ipoib_detach(struct ipoib_dev_priv *priv)
821{
822	struct ifnet *dev;
823
824	dev = priv->dev;
825	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
826		priv->gone = 1;
827		infiniband_ifdetach(dev);
828		if_free(dev);
829		free_unr(ipoib_unrhdr, priv->unit);
830	} else
831		VLAN_SETCOOKIE(priv->dev, NULL);
832
833	free(priv, M_TEMP);
834}
835
836void
837ipoib_dev_cleanup(struct ipoib_dev_priv *priv)
838{
839	struct ipoib_dev_priv *cpriv, *tcpriv;
840
841	/* Delete any child interfaces first */
842	list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
843		ipoib_dev_cleanup(cpriv);
844		ipoib_detach(cpriv);
845	}
846
847	ipoib_ib_dev_cleanup(priv);
848
849	kfree(priv->rx_ring);
850	kfree(priv->tx_ring);
851
852	priv->rx_ring = NULL;
853	priv->tx_ring = NULL;
854}
855
856static struct ipoib_dev_priv *
857ipoib_priv_alloc(void)
858{
859	struct ipoib_dev_priv *priv;
860
861	priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK);
862	spin_lock_init(&priv->lock);
863	spin_lock_init(&priv->drain_lock);
864	mutex_init(&priv->vlan_mutex);
865	INIT_LIST_HEAD(&priv->path_list);
866	INIT_LIST_HEAD(&priv->child_intfs);
867	INIT_LIST_HEAD(&priv->dead_ahs);
868	INIT_LIST_HEAD(&priv->multicast_list);
869	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
870	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
871	INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
872	INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
873	INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
874	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
875	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
876	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
877	memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN);
878
879	return (priv);
880}
881
882struct ipoib_dev_priv *
883ipoib_intf_alloc(const char *name)
884{
885	struct ipoib_dev_priv *priv;
886	struct ifnet *dev;
887
888	priv = ipoib_priv_alloc();
889	dev = priv->dev = if_alloc(IFT_INFINIBAND);
890	if (!dev) {
891		free(priv, M_TEMP);
892		return NULL;
893	}
894	dev->if_softc = priv;
895	priv->unit = alloc_unr(ipoib_unrhdr);
896	if (priv->unit == -1) {
897		if_free(dev);
898		free(priv, M_TEMP);
899		return NULL;
900	}
901	if_initname(dev, name, priv->unit);
902	dev->if_flags = IFF_BROADCAST | IFF_MULTICAST;
903
904	infiniband_ifattach(dev, NULL, priv->broadcastaddr);
905
906	dev->if_init = ipoib_init;
907	dev->if_ioctl = ipoib_ioctl;
908	dev->if_start = ipoib_start;
909
910	dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
911
912	priv->dev = dev;
913	if_link_state_change(dev, LINK_STATE_DOWN);
914
915	return dev->if_softc;
916}
917
918int
919ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
920{
921	struct ib_device_attr *device_attr = &hca->attrs;
922
923	priv->hca_caps = device_attr->device_cap_flags;
924
925	priv->dev->if_hwassist = 0;
926	priv->dev->if_capabilities = 0;
927
928#ifndef CONFIG_INFINIBAND_IPOIB_CM
929	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
930		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
931		priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP;
932		priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
933	}
934
935#if 0
936	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) {
937		priv->dev->if_capabilities |= IFCAP_TSO4;
938		priv->dev->if_hwassist |= CSUM_TSO;
939	}
940#endif
941#endif
942	priv->dev->if_capabilities |=
943	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE;
944	priv->dev->if_capenable = priv->dev->if_capabilities;
945
946	return 0;
947}
948
949
950static struct ifnet *
951ipoib_add_port(const char *format, struct ib_device *hca, u8 port)
952{
953	struct ipoib_dev_priv *priv;
954	struct ib_port_attr attr;
955	int result = -ENOMEM;
956
957	priv = ipoib_intf_alloc(format);
958	if (!priv)
959		goto alloc_mem_failed;
960
961	if (!ib_query_port(hca, port, &attr))
962		priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
963	else {
964		printk(KERN_WARNING "%s: ib_query_port %d failed\n",
965		       hca->name, port);
966		goto device_init_failed;
967	}
968
969	/* MTU will be reset when mcast join happens */
970	priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
971	priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu;
972
973	result = ib_query_pkey(hca, port, 0, &priv->pkey);
974	if (result) {
975		printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
976		       hca->name, port, result);
977		goto device_init_failed;
978	}
979
980	if (ipoib_set_dev_features(priv, hca))
981		goto device_init_failed;
982
983	/*
984	 * Set the full membership bit, so that we join the right
985	 * broadcast group, etc.
986	 */
987	priv->pkey |= 0x8000;
988
989	priv->broadcastaddr[8] = priv->pkey >> 8;
990	priv->broadcastaddr[9] = priv->pkey & 0xff;
991
992	result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
993	if (result) {
994		printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
995		       hca->name, port, result);
996		goto device_init_failed;
997	}
998	memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
999
1000	result = ipoib_dev_init(priv, hca, port);
1001	if (result < 0) {
1002		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
1003		       hca->name, port, result);
1004		goto device_init_failed;
1005	}
1006	if (ipoib_cm_admin_enabled(priv))
1007		priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv));
1008
1009	INIT_IB_EVENT_HANDLER(&priv->event_handler,
1010			      priv->ca, ipoib_event);
1011	result = ib_register_event_handler(&priv->event_handler);
1012	if (result < 0) {
1013		printk(KERN_WARNING "%s: ib_register_event_handler failed for "
1014		       "port %d (ret = %d)\n",
1015		       hca->name, port, result);
1016		goto event_failed;
1017	}
1018	if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port);
1019
1020	return priv->dev;
1021
1022event_failed:
1023	ipoib_dev_cleanup(priv);
1024
1025device_init_failed:
1026	ipoib_detach(priv);
1027
1028alloc_mem_failed:
1029	return ERR_PTR(result);
1030}
1031
1032static void
1033ipoib_add_one(struct ib_device *device)
1034{
1035	struct list_head *dev_list;
1036	struct ifnet *dev;
1037	struct ipoib_dev_priv *priv;
1038	int s, e, p;
1039
1040	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1041		return;
1042
1043	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
1044	if (!dev_list)
1045		return;
1046
1047	INIT_LIST_HEAD(dev_list);
1048
1049	if (device->node_type == RDMA_NODE_IB_SWITCH) {
1050		s = 0;
1051		e = 0;
1052	} else {
1053		s = 1;
1054		e = device->phys_port_cnt;
1055	}
1056
1057	for (p = s; p <= e; ++p) {
1058		if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND)
1059			continue;
1060		dev = ipoib_add_port("ib", device, p);
1061		if (!IS_ERR(dev)) {
1062			priv = dev->if_softc;
1063			list_add_tail(&priv->list, dev_list);
1064		}
1065	}
1066
1067	ib_set_client_data(device, &ipoib_client, dev_list);
1068}
1069
1070static void
1071ipoib_remove_one(struct ib_device *device, void *client_data)
1072{
1073	struct ipoib_dev_priv *priv, *tmp;
1074	struct list_head *dev_list = client_data;
1075
1076	if (!dev_list)
1077		return;
1078
1079	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1080		return;
1081
1082	list_for_each_entry_safe(priv, tmp, dev_list, list) {
1083		if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND)
1084			continue;
1085
1086		ipoib_stop(priv);
1087
1088		ib_unregister_event_handler(&priv->event_handler);
1089
1090		/* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */
1091
1092		flush_workqueue(ipoib_workqueue);
1093
1094		ipoib_dev_cleanup(priv);
1095		ipoib_detach(priv);
1096	}
1097
1098	kfree(dev_list);
1099}
1100
1101static int
1102ipoib_match_dev_addr(const struct sockaddr *addr, struct net_device *dev)
1103{
1104	struct ifaddr *ifa;
1105	int retval = 0;
1106
1107	IF_ADDR_RLOCK(dev);
1108	CK_STAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) {
1109		if (ifa->ifa_addr == NULL ||
1110		    ifa->ifa_addr->sa_family != addr->sa_family ||
1111		    ifa->ifa_addr->sa_len != addr->sa_len) {
1112			continue;
1113		}
1114		if (memcmp(ifa->ifa_addr, addr, addr->sa_len) == 0) {
1115			retval = 1;
1116			break;
1117		}
1118	}
1119	IF_ADDR_RUNLOCK(dev);
1120
1121	return (retval);
1122}
1123
1124/*
1125 * ipoib_match_gid_pkey_addr - returns the number of IPoIB netdevs on
1126 * top a given ipoib device matching a pkey_index and address, if one
1127 * exists.
1128 *
1129 * @found_net_dev: contains a matching net_device if the return value
1130 * >= 1, with a reference held.
1131 */
1132static int
1133ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
1134    const union ib_gid *gid, u16 pkey_index, const struct sockaddr *addr,
1135    struct net_device **found_net_dev)
1136{
1137	struct ipoib_dev_priv *child_priv;
1138	int matches = 0;
1139
1140	if (priv->pkey_index == pkey_index &&
1141	    (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) {
1142		if (addr == NULL || ipoib_match_dev_addr(addr, priv->dev) != 0) {
1143			if (*found_net_dev == NULL) {
1144				struct net_device *net_dev;
1145
1146				if (priv->parent != NULL)
1147					net_dev = priv->parent;
1148				else
1149					net_dev = priv->dev;
1150				*found_net_dev = net_dev;
1151				dev_hold(net_dev);
1152			}
1153			matches++;
1154		}
1155	}
1156
1157	/* Check child interfaces */
1158	mutex_lock(&priv->vlan_mutex);
1159	list_for_each_entry(child_priv, &priv->child_intfs, list) {
1160		matches += ipoib_match_gid_pkey_addr(child_priv, gid,
1161		    pkey_index, addr, found_net_dev);
1162		if (matches > 1)
1163			break;
1164	}
1165	mutex_unlock(&priv->vlan_mutex);
1166
1167	return matches;
1168}
1169
1170/*
1171 * __ipoib_get_net_dev_by_params - returns the number of matching
1172 * net_devs found (between 0 and 2). Also return the matching
1173 * net_device in the @net_dev parameter, holding a reference to the
1174 * net_device, if the number of matches >= 1
1175 */
1176static int
1177__ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
1178    u16 pkey_index, const union ib_gid *gid,
1179    const struct sockaddr *addr, struct net_device **net_dev)
1180{
1181	struct ipoib_dev_priv *priv;
1182	int matches = 0;
1183
1184	*net_dev = NULL;
1185
1186	list_for_each_entry(priv, dev_list, list) {
1187		if (priv->port != port)
1188			continue;
1189
1190		matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index,
1191		    addr, net_dev);
1192
1193		if (matches > 1)
1194			break;
1195	}
1196
1197	return matches;
1198}
1199
1200static struct net_device *
1201ipoib_get_net_dev_by_params(struct ib_device *dev, u8 port, u16 pkey,
1202    const union ib_gid *gid, const struct sockaddr *addr, void *client_data)
1203{
1204	struct net_device *net_dev;
1205	struct list_head *dev_list = client_data;
1206	u16 pkey_index;
1207	int matches;
1208	int ret;
1209
1210	if (!rdma_protocol_ib(dev, port))
1211		return NULL;
1212
1213	ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index);
1214	if (ret)
1215		return NULL;
1216
1217	if (!dev_list)
1218		return NULL;
1219
1220	/* See if we can find a unique device matching the L2 parameters */
1221	matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
1222						gid, NULL, &net_dev);
1223
1224	switch (matches) {
1225	case 0:
1226		return NULL;
1227	case 1:
1228		return net_dev;
1229	}
1230
1231	dev_put(net_dev);
1232
1233	/* Couldn't find a unique device with L2 parameters only. Use L3
1234	 * address to uniquely match the net device */
1235	matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
1236						gid, addr, &net_dev);
1237	switch (matches) {
1238	case 0:
1239		return NULL;
1240	default:
1241		dev_warn_ratelimited(&dev->dev,
1242				     "duplicate IP address detected\n");
1243		/* Fall through */
1244	case 1:
1245		return net_dev;
1246	}
1247}
1248
1249static void
1250ipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
1251{
1252	struct ipoib_dev_priv *parent;
1253	struct ipoib_dev_priv *priv;
1254	struct ifnet *dev;
1255	uint16_t pkey;
1256	int error;
1257
1258	if (ifp->if_type != IFT_INFINIBAND)
1259		return;
1260	dev = VLAN_DEVAT(ifp, vtag);
1261	if (dev == NULL)
1262		return;
1263	priv = NULL;
1264	error = 0;
1265	parent = ifp->if_softc;
1266	/* We only support 15 bits of pkey. */
1267	if (vtag & 0x8000)
1268		return;
1269	pkey = vtag | 0x8000;	/* Set full membership bit. */
1270	if (pkey == parent->pkey)
1271		return;
1272	/* Check for dups */
1273	mutex_lock(&parent->vlan_mutex);
1274	list_for_each_entry(priv, &parent->child_intfs, list) {
1275		if (priv->pkey == pkey) {
1276			priv = NULL;
1277			error = EBUSY;
1278			goto out;
1279		}
1280	}
1281	priv = ipoib_priv_alloc();
1282	priv->dev = dev;
1283	priv->max_ib_mtu = parent->max_ib_mtu;
1284	priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu;
1285	set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
1286	error = ipoib_set_dev_features(priv, parent->ca);
1287	if (error)
1288		goto out;
1289	priv->pkey = pkey;
1290	priv->broadcastaddr[8] = pkey >> 8;
1291	priv->broadcastaddr[9] = pkey & 0xff;
1292	dev->if_broadcastaddr = priv->broadcastaddr;
1293	error = ipoib_dev_init(priv, parent->ca, parent->port);
1294	if (error)
1295		goto out;
1296	priv->parent = parent->dev;
1297	list_add_tail(&priv->list, &parent->child_intfs);
1298	VLAN_SETCOOKIE(dev, priv);
1299	dev->if_start = ipoib_vlan_start;
1300	dev->if_drv_flags &= ~IFF_DRV_RUNNING;
1301	dev->if_hdrlen = IPOIB_HEADER_LEN;
1302	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1303		ipoib_open(priv);
1304	mutex_unlock(&parent->vlan_mutex);
1305	return;
1306out:
1307	mutex_unlock(&parent->vlan_mutex);
1308	if (priv)
1309		free(priv, M_TEMP);
1310	if (error)
1311		ipoib_warn(parent,
1312		    "failed to initialize subinterface: device %s, port %d vtag 0x%X",
1313		    parent->ca->name, parent->port, vtag);
1314	return;
1315}
1316
1317static void
1318ipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
1319{
1320	struct ipoib_dev_priv *parent;
1321	struct ipoib_dev_priv *priv;
1322	struct ifnet *dev;
1323	uint16_t pkey;
1324
1325	if (ifp->if_type != IFT_INFINIBAND)
1326		return;
1327
1328	dev = VLAN_DEVAT(ifp, vtag);
1329	if (dev)
1330		VLAN_SETCOOKIE(dev, NULL);
1331	pkey = vtag | 0x8000;
1332	parent = ifp->if_softc;
1333	mutex_lock(&parent->vlan_mutex);
1334	list_for_each_entry(priv, &parent->child_intfs, list) {
1335		if (priv->pkey == pkey) {
1336			ipoib_dev_cleanup(priv);
1337			list_del(&priv->list);
1338			break;
1339		}
1340	}
1341	mutex_unlock(&parent->vlan_mutex);
1342}
1343
1344eventhandler_tag ipoib_vlan_attach;
1345eventhandler_tag ipoib_vlan_detach;
1346
1347static int __init
1348ipoib_init_module(void)
1349{
1350	int ret;
1351
1352	ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
1353	ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
1354	ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
1355
1356	ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
1357	ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
1358	ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
1359						     IPOIB_MIN_QUEUE_SIZE));
1360#ifdef CONFIG_INFINIBAND_IPOIB_CM
1361	ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1362#endif
1363
1364	ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1365		ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST);
1366	ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1367		ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST);
1368
1369	/*
1370	 * We create our own workqueue mainly because we want to be
1371	 * able to flush it when devices are being removed.  We can't
1372	 * use schedule_work()/flush_scheduled_work() because both
1373	 * unregister_netdev() and linkwatch_event take the rtnl lock,
1374	 * so flush_scheduled_work() can deadlock during device
1375	 * removal.
1376	 */
1377	ipoib_workqueue = create_singlethread_workqueue("ipoib");
1378	if (!ipoib_workqueue) {
1379		ret = -ENOMEM;
1380		goto err_fs;
1381	}
1382
1383	ib_sa_register_client(&ipoib_sa_client);
1384
1385	ret = ib_register_client(&ipoib_client);
1386	if (ret)
1387		goto err_sa;
1388
1389	return 0;
1390
1391err_sa:
1392	ib_sa_unregister_client(&ipoib_sa_client);
1393	destroy_workqueue(ipoib_workqueue);
1394
1395err_fs:
1396	return ret;
1397}
1398
1399static void __exit
1400ipoib_cleanup_module(void)
1401{
1402
1403	EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach);
1404	EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach);
1405	ib_unregister_client(&ipoib_client);
1406	ib_sa_unregister_client(&ipoib_sa_client);
1407	destroy_workqueue(ipoib_workqueue);
1408}
1409module_init_order(ipoib_init_module, SI_ORDER_FIFTH);
1410module_exit_order(ipoib_cleanup_module, SI_ORDER_FIFTH);
1411
1412static int
1413ipoib_evhand(module_t mod, int event, void *arg)
1414{
1415	return (0);
1416}
1417
1418static moduledata_t ipoib_mod = {
1419	.name = "ipoib",
1420	.evhand = ipoib_evhand,
1421};
1422
1423DECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_LAST, SI_ORDER_ANY);
1424MODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
1425MODULE_DEPEND(ipoib, if_infiniband, 1, 1, 1);
1426MODULE_DEPEND(ipoib, linuxkpi, 1, 1, 1);
1427