1219820Sjeff/*
2219820Sjeff * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3219820Sjeff * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4219820Sjeff * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
5219820Sjeff *
6219820Sjeff * This software is available to you under a choice of one of two
7219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
8219820Sjeff * General Public License (GPL) Version 2, available from the file
9219820Sjeff * COPYING in the main directory of this source tree, or the
10219820Sjeff * OpenIB.org BSD license below:
11219820Sjeff *
12219820Sjeff *     Redistribution and use in source and binary forms, with or
13219820Sjeff *     without modification, are permitted provided that the following
14219820Sjeff *     conditions are met:
15219820Sjeff *
16219820Sjeff *      - Redistributions of source code must retain the above
17219820Sjeff *        copyright notice, this list of conditions and the following
18219820Sjeff *        disclaimer.
19219820Sjeff *
20219820Sjeff *      - Redistributions in binary form must reproduce the above
21219820Sjeff *        copyright notice, this list of conditions and the following
22219820Sjeff *        disclaimer in the documentation and/or other materials
23219820Sjeff *        provided with the distribution.
24219820Sjeff *
25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32219820Sjeff * SOFTWARE.
33219820Sjeff */
34219820Sjeff
35219820Sjeff#include "ipoib.h"
36219820Sjeff
37219820Sjeffstatic	int ipoib_resolvemulti(struct ifnet *, struct sockaddr **,
38219820Sjeff		struct sockaddr *);
39219820Sjeff
40219820Sjeff
41219820Sjeff#include <linux/module.h>
42219820Sjeff
43219820Sjeff#include <linux/slab.h>
44219820Sjeff#include <linux/kernel.h>
45219820Sjeff#include <linux/vmalloc.h>
46219820Sjeff
47219820Sjeff#include <linux/if_arp.h>	/* For ARPHRD_xxx */
48219820Sjeff#include <linux/if_vlan.h>
49219820Sjeff#include <net/ip.h>
50219820Sjeff#include <net/ipv6.h>
51219820Sjeff
52219820SjeffMODULE_AUTHOR("Roland Dreier");
53219820SjeffMODULE_DESCRIPTION("IP-over-InfiniBand net driver");
54219820SjeffMODULE_LICENSE("Dual BSD/GPL");
55219820Sjeff
56219820Sjeffint ipoib_sendq_size = IPOIB_TX_RING_SIZE;
57219820Sjeffint ipoib_recvq_size = IPOIB_RX_RING_SIZE;
58219820Sjeff
59219820Sjeffmodule_param_named(send_queue_size, ipoib_sendq_size, int, 0444);
60219820SjeffMODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
61219820Sjeffmodule_param_named(recv_queue_size, ipoib_recvq_size, int, 0444);
62219820SjeffMODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
63219820Sjeff
64219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
65219820Sjeffint ipoib_debug_level = 1;
66219820Sjeff
67219820Sjeffmodule_param_named(debug_level, ipoib_debug_level, int, 0644);
68219820SjeffMODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
69219820Sjeff#endif
70219820Sjeff
71219820Sjeffstruct ipoib_path_iter {
72219820Sjeff	struct ipoib_dev_priv *priv;
73219820Sjeff	struct ipoib_path  path;
74219820Sjeff};
75219820Sjeff
76219820Sjeffstatic const u8 ipv4_bcast_addr[] = {
77219820Sjeff	0x00, 0xff, 0xff, 0xff,
78219820Sjeff	0xff, 0x12, 0x40, 0x1b,	0x00, 0x00, 0x00, 0x00,
79219820Sjeff	0x00, 0x00, 0x00, 0x00,	0xff, 0xff, 0xff, 0xff
80219820Sjeff};
81219820Sjeff
82219820Sjeffstruct workqueue_struct *ipoib_workqueue;
83219820Sjeff
84219820Sjeffstruct ib_sa_client ipoib_sa_client;
85219820Sjeff
86219820Sjeffstatic void ipoib_add_one(struct ib_device *device);
87219820Sjeffstatic void ipoib_remove_one(struct ib_device *device);
88219820Sjeffstatic void ipoib_start(struct ifnet *dev);
89219820Sjeffstatic int ipoib_output(struct ifnet *ifp, struct mbuf *m,
90249976Sglebius	    const struct sockaddr *dst, struct route *ro);
91219820Sjeffstatic int ipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
92219820Sjeffstatic void ipoib_input(struct ifnet *ifp, struct mbuf *m);
93219820Sjeff
94219820Sjeff#define	IPOIB_MTAP(_ifp, _m)					\
95219820Sjeffdo {								\
96219820Sjeff	if (bpf_peers_present((_ifp)->if_bpf)) {		\
97219820Sjeff		M_ASSERTVALID(_m);				\
98219820Sjeff		ipoib_mtap_mb((_ifp), (_m));			\
99219820Sjeff	}							\
100219820Sjeff} while (0)
101219820Sjeff
102219820Sjeff/*
103219820Sjeff * This is for clients that have an ipoib_header in the mbuf.
104219820Sjeff */
105219820Sjeffstatic void
106219820Sjeffipoib_mtap_mb(struct ifnet *ifp, struct mbuf *mb)
107219820Sjeff{
108219820Sjeff	struct ipoib_header *ih;
109219820Sjeff	struct ether_header eh;
110219820Sjeff
111219820Sjeff	ih = mtod(mb, struct ipoib_header *);
112219820Sjeff	eh.ether_type = ih->proto;
113219820Sjeff	bcopy(ih->hwaddr, &eh.ether_dhost, ETHER_ADDR_LEN);
114219820Sjeff	bzero(&eh.ether_shost, ETHER_ADDR_LEN);
115219820Sjeff	mb->m_data += sizeof(struct ipoib_header);
116219820Sjeff	mb->m_len -= sizeof(struct ipoib_header);
117219820Sjeff	bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
118219820Sjeff	mb->m_data -= sizeof(struct ipoib_header);
119219820Sjeff	mb->m_len += sizeof(struct ipoib_header);
120219820Sjeff}
121219820Sjeff
122219820Sjeffvoid
123219820Sjeffipoib_mtap_proto(struct ifnet *ifp, struct mbuf *mb, uint16_t proto)
124219820Sjeff{
125219820Sjeff	struct ether_header eh;
126219820Sjeff
127219820Sjeff	eh.ether_type = proto;
128219820Sjeff	bzero(&eh.ether_shost, ETHER_ADDR_LEN);
129219820Sjeff	bzero(&eh.ether_dhost, ETHER_ADDR_LEN);
130219820Sjeff	bpf_mtap2(ifp->if_bpf, &eh, sizeof(eh), mb);
131219820Sjeff}
132219820Sjeff
133219820Sjeffstatic struct ib_client ipoib_client = {
134219820Sjeff	.name   = "ipoib",
135219820Sjeff	.add    = ipoib_add_one,
136219820Sjeff	.remove = ipoib_remove_one
137219820Sjeff};
138219820Sjeff
139219820Sjeffint
140219820Sjeffipoib_open(struct ipoib_dev_priv *priv)
141219820Sjeff{
142219820Sjeff	struct ifnet *dev = priv->dev;
143219820Sjeff
144219820Sjeff	ipoib_dbg(priv, "bringing up interface\n");
145219820Sjeff
146219820Sjeff	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
147219820Sjeff
148219820Sjeff	if (ipoib_pkey_dev_delay_open(priv))
149219820Sjeff		return 0;
150219820Sjeff
151219820Sjeff	if (ipoib_ib_dev_open(priv))
152219820Sjeff		goto err_disable;
153219820Sjeff
154219820Sjeff	if (ipoib_ib_dev_up(priv))
155219820Sjeff		goto err_stop;
156219820Sjeff
157219820Sjeff	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
158219820Sjeff		struct ipoib_dev_priv *cpriv;
159219820Sjeff
160219820Sjeff		/* Bring up any child interfaces too */
161219820Sjeff		mutex_lock(&priv->vlan_mutex);
162219820Sjeff		list_for_each_entry(cpriv, &priv->child_intfs, list)
163219820Sjeff			if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
164219820Sjeff				ipoib_open(cpriv);
165219820Sjeff		mutex_unlock(&priv->vlan_mutex);
166219820Sjeff	}
167219820Sjeff	dev->if_drv_flags |= IFF_DRV_RUNNING;
168219820Sjeff	dev->if_drv_flags &= ~IFF_DRV_OACTIVE;
169219820Sjeff
170219820Sjeff	return 0;
171219820Sjeff
172219820Sjefferr_stop:
173219820Sjeff	ipoib_ib_dev_stop(priv, 1);
174219820Sjeff
175219820Sjefferr_disable:
176219820Sjeff	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
177219820Sjeff
178219820Sjeff	return -EINVAL;
179219820Sjeff}
180219820Sjeff
181219820Sjeffstatic void
182219820Sjeffipoib_init(void *arg)
183219820Sjeff{
184219820Sjeff	struct ifnet *dev;
185219820Sjeff	struct ipoib_dev_priv *priv;
186219820Sjeff
187219820Sjeff	priv = arg;
188219820Sjeff	dev = priv->dev;
189219820Sjeff	if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0)
190219820Sjeff		ipoib_open(priv);
191219820Sjeff	queue_work(ipoib_workqueue, &priv->flush_light);
192219820Sjeff}
193219820Sjeff
194219820Sjeff
195219820Sjeffstatic int
196219820Sjeffipoib_stop(struct ipoib_dev_priv *priv)
197219820Sjeff{
198219820Sjeff	struct ifnet *dev = priv->dev;
199219820Sjeff
200219820Sjeff	ipoib_dbg(priv, "stopping interface\n");
201219820Sjeff
202219820Sjeff	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
203219820Sjeff
204219820Sjeff	dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
205219820Sjeff
206219820Sjeff	ipoib_ib_dev_down(priv, 0);
207219820Sjeff	ipoib_ib_dev_stop(priv, 0);
208219820Sjeff
209219820Sjeff	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
210219820Sjeff		struct ipoib_dev_priv *cpriv;
211219820Sjeff
212219820Sjeff		/* Bring down any child interfaces too */
213219820Sjeff		mutex_lock(&priv->vlan_mutex);
214219820Sjeff		list_for_each_entry(cpriv, &priv->child_intfs, list)
215219820Sjeff			if ((cpriv->dev->if_drv_flags & IFF_DRV_RUNNING) != 0)
216219820Sjeff				ipoib_stop(cpriv);
217219820Sjeff		mutex_unlock(&priv->vlan_mutex);
218219820Sjeff	}
219219820Sjeff
220219820Sjeff	return 0;
221219820Sjeff}
222219820Sjeff
223219820Sjeffint
224219820Sjeffipoib_change_mtu(struct ipoib_dev_priv *priv, int new_mtu)
225219820Sjeff{
226219820Sjeff	struct ifnet *dev = priv->dev;
227219820Sjeff
228219820Sjeff	/* dev->if_mtu > 2K ==> connected mode */
229219820Sjeff	if (ipoib_cm_admin_enabled(priv)) {
230219820Sjeff		if (new_mtu > IPOIB_CM_MTU(ipoib_cm_max_mtu(priv)))
231219820Sjeff			return -EINVAL;
232219820Sjeff
233219820Sjeff		if (new_mtu > priv->mcast_mtu)
234219820Sjeff			ipoib_warn(priv, "mtu > %d will cause multicast packet drops.\n",
235219820Sjeff				   priv->mcast_mtu);
236219820Sjeff
237219820Sjeff		dev->if_mtu = new_mtu;
238219820Sjeff		return 0;
239219820Sjeff	}
240219820Sjeff
241219820Sjeff	if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))
242219820Sjeff		return -EINVAL;
243219820Sjeff
244219820Sjeff	priv->admin_mtu = new_mtu;
245219820Sjeff
246219820Sjeff	dev->if_mtu = min(priv->mcast_mtu, priv->admin_mtu);
247219820Sjeff
248219820Sjeff	queue_work(ipoib_workqueue, &priv->flush_light);
249219820Sjeff
250219820Sjeff	return 0;
251219820Sjeff}
252219820Sjeff
253219820Sjeffstatic int
254219820Sjeffipoib_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
255219820Sjeff{
256219820Sjeff	struct ipoib_dev_priv *priv = ifp->if_softc;
257219820Sjeff	struct ifaddr *ifa = (struct ifaddr *) data;
258219820Sjeff	struct ifreq *ifr = (struct ifreq *) data;
259219820Sjeff	int error = 0;
260219820Sjeff
261297648Shselasky	/* check if detaching */
262297648Shselasky	if (priv == NULL || priv->gone != 0)
263297648Shselasky		return (ENXIO);
264297648Shselasky
265219820Sjeff	switch (command) {
266219820Sjeff	case SIOCSIFFLAGS:
267219820Sjeff		if (ifp->if_flags & IFF_UP) {
268219820Sjeff			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
269219820Sjeff				error = -ipoib_open(priv);
270219820Sjeff		} else
271219820Sjeff			if (ifp->if_drv_flags & IFF_DRV_RUNNING)
272219820Sjeff				ipoib_stop(priv);
273219820Sjeff		break;
274219820Sjeff	case SIOCADDMULTI:
275219820Sjeff	case SIOCDELMULTI:
276219820Sjeff		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
277219820Sjeff			queue_work(ipoib_workqueue, &priv->restart_task);
278219820Sjeff		break;
279219820Sjeff	case SIOCSIFADDR:
280219820Sjeff		ifp->if_flags |= IFF_UP;
281219820Sjeff
282219820Sjeff		switch (ifa->ifa_addr->sa_family) {
283219820Sjeff#ifdef INET
284219820Sjeff		case AF_INET:
285219820Sjeff			ifp->if_init(ifp->if_softc);	/* before arpwhohas */
286219820Sjeff			arp_ifinit(ifp, ifa);
287219820Sjeff			break;
288219820Sjeff#endif
289219820Sjeff		default:
290219820Sjeff			ifp->if_init(ifp->if_softc);
291219820Sjeff			break;
292219820Sjeff		}
293219820Sjeff		break;
294219820Sjeff
295219820Sjeff	case SIOCGIFADDR:
296332160Sbrooks		bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0],
297332160Sbrooks		    INFINIBAND_ALEN);
298219820Sjeff		break;
299219820Sjeff
300219820Sjeff	case SIOCSIFMTU:
301219820Sjeff		/*
302219820Sjeff		 * Set the interface MTU.
303219820Sjeff		 */
304219820Sjeff		error = -ipoib_change_mtu(priv, ifr->ifr_mtu);
305219820Sjeff		break;
306219820Sjeff	default:
307219820Sjeff		error = EINVAL;
308219820Sjeff		break;
309219820Sjeff	}
310219820Sjeff	return (error);
311219820Sjeff}
312219820Sjeff
313219820Sjeff
314219820Sjeffstatic struct ipoib_path *
315219820Sjeff__path_find(struct ipoib_dev_priv *priv, void *gid)
316219820Sjeff{
317219820Sjeff	struct rb_node *n = priv->path_tree.rb_node;
318219820Sjeff	struct ipoib_path *path;
319219820Sjeff	int ret;
320219820Sjeff
321219820Sjeff	while (n) {
322219820Sjeff		path = rb_entry(n, struct ipoib_path, rb_node);
323219820Sjeff
324219820Sjeff		ret = memcmp(gid, path->pathrec.dgid.raw,
325219820Sjeff			     sizeof (union ib_gid));
326219820Sjeff
327219820Sjeff		if (ret < 0)
328219820Sjeff			n = n->rb_left;
329219820Sjeff		else if (ret > 0)
330219820Sjeff			n = n->rb_right;
331219820Sjeff		else
332219820Sjeff			return path;
333219820Sjeff	}
334219820Sjeff
335219820Sjeff	return NULL;
336219820Sjeff}
337219820Sjeff
338219820Sjeffstatic int
339219820Sjeff__path_add(struct ipoib_dev_priv *priv, struct ipoib_path *path)
340219820Sjeff{
341219820Sjeff	struct rb_node **n = &priv->path_tree.rb_node;
342219820Sjeff	struct rb_node *pn = NULL;
343219820Sjeff	struct ipoib_path *tpath;
344219820Sjeff	int ret;
345219820Sjeff
346219820Sjeff	while (*n) {
347219820Sjeff		pn = *n;
348219820Sjeff		tpath = rb_entry(pn, struct ipoib_path, rb_node);
349219820Sjeff
350219820Sjeff		ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
351219820Sjeff			     sizeof (union ib_gid));
352219820Sjeff		if (ret < 0)
353219820Sjeff			n = &pn->rb_left;
354219820Sjeff		else if (ret > 0)
355219820Sjeff			n = &pn->rb_right;
356219820Sjeff		else
357219820Sjeff			return -EEXIST;
358219820Sjeff	}
359219820Sjeff
360219820Sjeff	rb_link_node(&path->rb_node, pn, n);
361219820Sjeff	rb_insert_color(&path->rb_node, &priv->path_tree);
362219820Sjeff
363219820Sjeff	list_add_tail(&path->list, &priv->path_list);
364219820Sjeff
365219820Sjeff	return 0;
366219820Sjeff}
367219820Sjeff
368219820Sjeffvoid
369219820Sjeffipoib_path_free(struct ipoib_dev_priv *priv, struct ipoib_path *path)
370219820Sjeff{
371219820Sjeff
372219820Sjeff	_IF_DRAIN(&path->queue);
373219820Sjeff
374219820Sjeff	if (path->ah)
375219820Sjeff		ipoib_put_ah(path->ah);
376219820Sjeff	if (ipoib_cm_get(path))
377219820Sjeff		ipoib_cm_destroy_tx(ipoib_cm_get(path));
378219820Sjeff
379219820Sjeff	kfree(path);
380219820Sjeff}
381219820Sjeff
382219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
383219820Sjeff
384219820Sjeffstruct ipoib_path_iter *
385219820Sjeffipoib_path_iter_init(struct ipoib_dev_priv *priv)
386219820Sjeff{
387219820Sjeff	struct ipoib_path_iter *iter;
388219820Sjeff
389219820Sjeff	iter = kmalloc(sizeof *iter, GFP_KERNEL);
390219820Sjeff	if (!iter)
391219820Sjeff		return NULL;
392219820Sjeff
393219820Sjeff	iter->priv = priv;
394219820Sjeff	memset(iter->path.pathrec.dgid.raw, 0, 16);
395219820Sjeff
396219820Sjeff	if (ipoib_path_iter_next(iter)) {
397219820Sjeff		kfree(iter);
398219820Sjeff		return NULL;
399219820Sjeff	}
400219820Sjeff
401219820Sjeff	return iter;
402219820Sjeff}
403219820Sjeff
404219820Sjeffint
405219820Sjeffipoib_path_iter_next(struct ipoib_path_iter *iter)
406219820Sjeff{
407219820Sjeff	struct ipoib_dev_priv *priv = iter->priv;
408219820Sjeff	struct rb_node *n;
409219820Sjeff	struct ipoib_path *path;
410219820Sjeff	int ret = 1;
411219820Sjeff
412219820Sjeff	spin_lock_irq(&priv->lock);
413219820Sjeff
414219820Sjeff	n = rb_first(&priv->path_tree);
415219820Sjeff
416219820Sjeff	while (n) {
417219820Sjeff		path = rb_entry(n, struct ipoib_path, rb_node);
418219820Sjeff
419219820Sjeff		if (memcmp(iter->path.pathrec.dgid.raw, path->pathrec.dgid.raw,
420219820Sjeff			   sizeof (union ib_gid)) < 0) {
421219820Sjeff			iter->path = *path;
422219820Sjeff			ret = 0;
423219820Sjeff			break;
424219820Sjeff		}
425219820Sjeff
426219820Sjeff		n = rb_next(n);
427219820Sjeff	}
428219820Sjeff
429219820Sjeff	spin_unlock_irq(&priv->lock);
430219820Sjeff
431219820Sjeff	return ret;
432219820Sjeff}
433219820Sjeff
434219820Sjeffvoid
435219820Sjeffipoib_path_iter_read(struct ipoib_path_iter *iter, struct ipoib_path *path)
436219820Sjeff{
437219820Sjeff	*path = iter->path;
438219820Sjeff}
439219820Sjeff
440219820Sjeff#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
441219820Sjeff
442219820Sjeffvoid
443219820Sjeffipoib_mark_paths_invalid(struct ipoib_dev_priv *priv)
444219820Sjeff{
445219820Sjeff	struct ipoib_path *path, *tp;
446219820Sjeff
447219820Sjeff	spin_lock_irq(&priv->lock);
448219820Sjeff
449219820Sjeff	list_for_each_entry_safe(path, tp, &priv->path_list, list) {
450219820Sjeff		ipoib_dbg(priv, "mark path LID 0x%04x GID %16D invalid\n",
451219820Sjeff			be16_to_cpu(path->pathrec.dlid),
452219820Sjeff			path->pathrec.dgid.raw, ":");
453219820Sjeff		path->valid =  0;
454219820Sjeff	}
455219820Sjeff
456219820Sjeff	spin_unlock_irq(&priv->lock);
457219820Sjeff}
458219820Sjeff
459219820Sjeffvoid
460219820Sjeffipoib_flush_paths(struct ipoib_dev_priv *priv)
461219820Sjeff{
462219820Sjeff	struct ipoib_path *path, *tp;
463219820Sjeff	LIST_HEAD(remove_list);
464219820Sjeff	unsigned long flags;
465219820Sjeff
466219820Sjeff	spin_lock_irqsave(&priv->lock, flags);
467219820Sjeff
468219820Sjeff	list_splice_init(&priv->path_list, &remove_list);
469219820Sjeff
470219820Sjeff	list_for_each_entry(path, &remove_list, list)
471219820Sjeff		rb_erase(&path->rb_node, &priv->path_tree);
472219820Sjeff
473219820Sjeff	list_for_each_entry_safe(path, tp, &remove_list, list) {
474219820Sjeff		if (path->query)
475219820Sjeff			ib_sa_cancel_query(path->query_id, path->query);
476219820Sjeff		spin_unlock_irqrestore(&priv->lock, flags);
477219820Sjeff		wait_for_completion(&path->done);
478219820Sjeff		ipoib_path_free(priv, path);
479219820Sjeff		spin_lock_irqsave(&priv->lock, flags);
480219820Sjeff	}
481219820Sjeff
482219820Sjeff	spin_unlock_irqrestore(&priv->lock, flags);
483219820Sjeff}
484219820Sjeff
485219820Sjeffstatic void
486219820Sjeffpath_rec_completion(int status, struct ib_sa_path_rec *pathrec, void *path_ptr)
487219820Sjeff{
488219820Sjeff	struct ipoib_path *path = path_ptr;
489219820Sjeff	struct ipoib_dev_priv *priv = path->priv;
490219820Sjeff	struct ifnet *dev = priv->dev;
491219820Sjeff	struct ipoib_ah *ah = NULL;
492219820Sjeff	struct ipoib_ah *old_ah = NULL;
493219820Sjeff	struct ifqueue mbqueue;
494219820Sjeff	struct mbuf *mb;
495219820Sjeff	unsigned long flags;
496219820Sjeff
497219820Sjeff	if (!status)
498219820Sjeff		ipoib_dbg(priv, "PathRec LID 0x%04x for GID %16D\n",
499219820Sjeff			  be16_to_cpu(pathrec->dlid), pathrec->dgid.raw, ":");
500219820Sjeff	else
501219820Sjeff		ipoib_dbg(priv, "PathRec status %d for GID %16D\n",
502219820Sjeff			  status, path->pathrec.dgid.raw, ":");
503219820Sjeff
504219820Sjeff	bzero(&mbqueue, sizeof(mbqueue));
505219820Sjeff
506219820Sjeff	if (!status) {
507219820Sjeff		struct ib_ah_attr av;
508219820Sjeff
509219820Sjeff		if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av))
510219820Sjeff			ah = ipoib_create_ah(priv, priv->pd, &av);
511219820Sjeff	}
512219820Sjeff
513219820Sjeff	spin_lock_irqsave(&priv->lock, flags);
514219820Sjeff
515219820Sjeff	if (ah) {
516219820Sjeff		path->pathrec = *pathrec;
517219820Sjeff
518219820Sjeff		old_ah   = path->ah;
519219820Sjeff		path->ah = ah;
520219820Sjeff
521219820Sjeff		ipoib_dbg(priv, "created address handle %p for LID 0x%04x, SL %d\n",
522219820Sjeff			  ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
523219820Sjeff
524219820Sjeff		for (;;) {
525219820Sjeff			_IF_DEQUEUE(&path->queue, mb);
526219820Sjeff			if (mb == NULL)
527219820Sjeff				break;
528219820Sjeff			_IF_ENQUEUE(&mbqueue, mb);
529219820Sjeff		}
530219820Sjeff
531219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM
532219820Sjeff		if (ipoib_cm_enabled(priv, path->hwaddr) && !ipoib_cm_get(path))
533219820Sjeff			ipoib_cm_set(path, ipoib_cm_create_tx(priv, path));
534219820Sjeff#endif
535219820Sjeff
536219820Sjeff		path->valid = 1;
537219820Sjeff	}
538219820Sjeff
539219820Sjeff	path->query = NULL;
540219820Sjeff	complete(&path->done);
541219820Sjeff
542219820Sjeff	spin_unlock_irqrestore(&priv->lock, flags);
543219820Sjeff
544219820Sjeff	if (old_ah)
545219820Sjeff		ipoib_put_ah(old_ah);
546219820Sjeff
547219820Sjeff	for (;;) {
548219820Sjeff		_IF_DEQUEUE(&mbqueue, mb);
549219820Sjeff		if (mb == NULL)
550219820Sjeff			break;
551219820Sjeff		mb->m_pkthdr.rcvif = dev;
552219820Sjeff		if (dev->if_transmit(dev, mb))
553219820Sjeff			ipoib_warn(priv, "dev_queue_xmit failed "
554219820Sjeff				   "to requeue packet\n");
555219820Sjeff	}
556219820Sjeff}
557219820Sjeff
558219820Sjeffstatic struct ipoib_path *
559219820Sjeffpath_rec_create(struct ipoib_dev_priv *priv, uint8_t *hwaddr)
560219820Sjeff{
561219820Sjeff	struct ipoib_path *path;
562219820Sjeff
563219820Sjeff	if (!priv->broadcast)
564219820Sjeff		return NULL;
565219820Sjeff
566219820Sjeff	path = kzalloc(sizeof *path, GFP_ATOMIC);
567219820Sjeff	if (!path)
568219820Sjeff		return NULL;
569219820Sjeff
570219820Sjeff	path->priv = priv;
571219820Sjeff
572219820Sjeff	bzero(&path->queue, sizeof(path->queue));
573219820Sjeff
574219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM
575219820Sjeff	memcpy(&path->hwaddr, hwaddr, INFINIBAND_ALEN);
576219820Sjeff#endif
577219820Sjeff	memcpy(path->pathrec.dgid.raw, &hwaddr[4], sizeof (union ib_gid));
578219820Sjeff	path->pathrec.sgid	    = priv->local_gid;
579219820Sjeff	path->pathrec.pkey	    = cpu_to_be16(priv->pkey);
580219820Sjeff	path->pathrec.numb_path     = 1;
581219820Sjeff	path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
582219820Sjeff
583219820Sjeff	return path;
584219820Sjeff}
585219820Sjeff
586219820Sjeffstatic int
587219820Sjeffpath_rec_start(struct ipoib_dev_priv *priv, struct ipoib_path *path)
588219820Sjeff{
589219820Sjeff	struct ifnet *dev = priv->dev;
590219820Sjeff
591219820Sjeff	ib_sa_comp_mask comp_mask = IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU;
592219820Sjeff	struct ib_sa_path_rec p_rec;
593219820Sjeff
594219820Sjeff	p_rec = path->pathrec;
595219820Sjeff	p_rec.mtu_selector = IB_SA_GT;
596219820Sjeff
597219820Sjeff	switch (roundup_pow_of_two(dev->if_mtu + IPOIB_ENCAP_LEN)) {
598219820Sjeff	case 512:
599219820Sjeff		p_rec.mtu = IB_MTU_256;
600219820Sjeff		break;
601219820Sjeff	case 1024:
602219820Sjeff		p_rec.mtu = IB_MTU_512;
603219820Sjeff		break;
604219820Sjeff	case 2048:
605219820Sjeff		p_rec.mtu = IB_MTU_1024;
606219820Sjeff		break;
607219820Sjeff	case 4096:
608219820Sjeff		p_rec.mtu = IB_MTU_2048;
609219820Sjeff		break;
610219820Sjeff	default:
611219820Sjeff		/* Wildcard everything */
612219820Sjeff		comp_mask = 0;
613219820Sjeff		p_rec.mtu = 0;
614219820Sjeff		p_rec.mtu_selector = 0;
615219820Sjeff	}
616219820Sjeff
617219820Sjeff	ipoib_dbg(priv, "Start path record lookup for %16D MTU > %d\n",
618219820Sjeff		  p_rec.dgid.raw, ":",
619219820Sjeff		  comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0);
620219820Sjeff
621219820Sjeff	init_completion(&path->done);
622219820Sjeff
623219820Sjeff	path->query_id =
624219820Sjeff		ib_sa_path_rec_get(&ipoib_sa_client, priv->ca, priv->port,
625219820Sjeff				   &p_rec, comp_mask		|
626219820Sjeff				   IB_SA_PATH_REC_DGID		|
627219820Sjeff				   IB_SA_PATH_REC_SGID		|
628219820Sjeff				   IB_SA_PATH_REC_NUMB_PATH	|
629219820Sjeff				   IB_SA_PATH_REC_TRAFFIC_CLASS |
630219820Sjeff				   IB_SA_PATH_REC_PKEY,
631219820Sjeff				   1000, GFP_ATOMIC,
632219820Sjeff				   path_rec_completion,
633219820Sjeff				   path, &path->query);
634219820Sjeff	if (path->query_id < 0) {
635219820Sjeff		ipoib_warn(priv, "ib_sa_path_rec_get failed: %d\n", path->query_id);
636219820Sjeff		path->query = NULL;
637219820Sjeff		complete(&path->done);
638219820Sjeff		return path->query_id;
639219820Sjeff	}
640219820Sjeff
641219820Sjeff	return 0;
642219820Sjeff}
643219820Sjeff
644219820Sjeffstatic void
645219820Sjeffipoib_unicast_send(struct mbuf *mb, struct ipoib_dev_priv *priv, struct ipoib_header *eh)
646219820Sjeff{
647219820Sjeff	struct ipoib_path *path;
648219820Sjeff
649219820Sjeff	path = __path_find(priv, eh->hwaddr + 4);
650219820Sjeff	if (!path || !path->valid) {
651219820Sjeff		int new_path = 0;
652219820Sjeff
653219820Sjeff		if (!path) {
654219820Sjeff			path = path_rec_create(priv, eh->hwaddr);
655219820Sjeff			new_path = 1;
656219820Sjeff		}
657219820Sjeff		if (path) {
658219820Sjeff			_IF_ENQUEUE(&path->queue, mb);
659219820Sjeff			if (!path->query && path_rec_start(priv, path)) {
660219820Sjeff				if (new_path)
661219820Sjeff					ipoib_path_free(priv, path);
662219820Sjeff				return;
663219820Sjeff			} else
664219820Sjeff				__path_add(priv, path);
665219820Sjeff		} else {
666219820Sjeff			++priv->dev->if_oerrors;
667219820Sjeff			m_freem(mb);
668219820Sjeff		}
669219820Sjeff
670219820Sjeff		return;
671219820Sjeff	}
672219820Sjeff
673219820Sjeff	if (ipoib_cm_get(path) && ipoib_cm_up(path)) {
674219820Sjeff		ipoib_cm_send(priv, mb, ipoib_cm_get(path));
675219820Sjeff	} else if (path->ah) {
676219820Sjeff		ipoib_send(priv, mb, path->ah, IPOIB_QPN(eh->hwaddr));
677219820Sjeff	} else if ((path->query || !path_rec_start(priv, path)) &&
678219820Sjeff		    path->queue.ifq_len < IPOIB_MAX_PATH_REC_QUEUE) {
679219820Sjeff		_IF_ENQUEUE(&path->queue, mb);
680219820Sjeff	} else {
681219820Sjeff		++priv->dev->if_oerrors;
682219820Sjeff		m_freem(mb);
683219820Sjeff	}
684219820Sjeff}
685219820Sjeff
686219820Sjeffstatic int
687219820Sjeffipoib_send_one(struct ipoib_dev_priv *priv, struct mbuf *mb)
688219820Sjeff{
689219820Sjeff	struct ipoib_header *eh;
690219820Sjeff
691219820Sjeff	eh = mtod(mb, struct ipoib_header *);
692219820Sjeff	if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
693219820Sjeff		/* Add in the P_Key for multicast*/
694219820Sjeff		eh->hwaddr[8] = (priv->pkey >> 8) & 0xff;
695219820Sjeff		eh->hwaddr[9] = priv->pkey & 0xff;
696219820Sjeff
697219820Sjeff		ipoib_mcast_send(priv, eh->hwaddr + 4, mb);
698219820Sjeff	} else
699219820Sjeff		ipoib_unicast_send(mb, priv, eh);
700219820Sjeff
701219820Sjeff	return 0;
702219820Sjeff}
703219820Sjeff
704219820Sjeff
705219820Sjeffstatic void
706219820Sjeff_ipoib_start(struct ifnet *dev, struct ipoib_dev_priv *priv)
707219820Sjeff{
708219820Sjeff	struct mbuf *mb;
709219820Sjeff
710219820Sjeff	if ((dev->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
711219820Sjeff	    IFF_DRV_RUNNING)
712219820Sjeff		return;
713219820Sjeff
714219820Sjeff	spin_lock(&priv->lock);
715219820Sjeff	while (!IFQ_DRV_IS_EMPTY(&dev->if_snd) &&
716219820Sjeff	    (dev->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
717219820Sjeff		IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
718219820Sjeff		if (mb == NULL)
719219820Sjeff			break;
720219820Sjeff		IPOIB_MTAP(dev, mb);
721219820Sjeff		ipoib_send_one(priv, mb);
722219820Sjeff	}
723219820Sjeff	spin_unlock(&priv->lock);
724219820Sjeff}
725219820Sjeff
726219820Sjeffstatic void
727219820Sjeffipoib_start(struct ifnet *dev)
728219820Sjeff{
729219820Sjeff	_ipoib_start(dev, dev->if_softc);
730219820Sjeff}
731219820Sjeff
732219820Sjeffstatic void
733219820Sjeffipoib_vlan_start(struct ifnet *dev)
734219820Sjeff{
735219820Sjeff	struct ipoib_dev_priv *priv;
736219820Sjeff	struct mbuf *mb;
737219820Sjeff
738219820Sjeff	priv = VLAN_COOKIE(dev);
739219820Sjeff	if (priv != NULL)
740219820Sjeff		return _ipoib_start(dev, priv);
741219820Sjeff	while (!IFQ_DRV_IS_EMPTY(&dev->if_snd)) {
742219820Sjeff		IFQ_DRV_DEQUEUE(&dev->if_snd, mb);
743219820Sjeff		if (mb == NULL)
744219820Sjeff			break;
745219820Sjeff		m_freem(mb);
746219820Sjeff		dev->if_oerrors++;
747219820Sjeff	}
748219820Sjeff}
749219820Sjeff
750219820Sjeffint
751219820Sjeffipoib_dev_init(struct ipoib_dev_priv *priv, struct ib_device *ca, int port)
752219820Sjeff{
753219820Sjeff
754219820Sjeff	/* Allocate RX/TX "rings" to hold queued mbs */
755219820Sjeff	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
756219820Sjeff				GFP_KERNEL);
757219820Sjeff	if (!priv->rx_ring) {
758219820Sjeff		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
759219820Sjeff		       ca->name, ipoib_recvq_size);
760219820Sjeff		goto out;
761219820Sjeff	}
762219820Sjeff
763219820Sjeff	priv->tx_ring = kzalloc(ipoib_sendq_size * sizeof *priv->tx_ring, GFP_KERNEL);
764219820Sjeff	if (!priv->tx_ring) {
765219820Sjeff		printk(KERN_WARNING "%s: failed to allocate TX ring (%d entries)\n",
766219820Sjeff		       ca->name, ipoib_sendq_size);
767219820Sjeff		goto out_rx_ring_cleanup;
768219820Sjeff	}
769219820Sjeff	memset(priv->tx_ring, 0, ipoib_sendq_size * sizeof *priv->tx_ring);
770219820Sjeff
771219820Sjeff	/* priv->tx_head, tx_tail & tx_outstanding are already 0 */
772219820Sjeff
773219820Sjeff	if (ipoib_ib_dev_init(priv, ca, port))
774219820Sjeff		goto out_tx_ring_cleanup;
775219820Sjeff
776219820Sjeff	return 0;
777219820Sjeff
778219820Sjeffout_tx_ring_cleanup:
779219820Sjeff	kfree(priv->tx_ring);
780219820Sjeff
781219820Sjeffout_rx_ring_cleanup:
782219820Sjeff	kfree(priv->rx_ring);
783219820Sjeff
784219820Sjeffout:
785219820Sjeff	return -ENOMEM;
786219820Sjeff}
787219820Sjeff
788219820Sjeffstatic void
789219820Sjeffipoib_detach(struct ipoib_dev_priv *priv)
790219820Sjeff{
791219820Sjeff	struct ifnet *dev;
792219820Sjeff
793219820Sjeff	dev = priv->dev;
794219820Sjeff	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
795297648Shselasky		priv->gone = 1;
796219820Sjeff		bpfdetach(dev);
797219820Sjeff		if_detach(dev);
798219820Sjeff		if_free(dev);
799219820Sjeff	} else
800219820Sjeff		VLAN_SETCOOKIE(priv->dev, NULL);
801219820Sjeff
802219820Sjeff	free(priv, M_TEMP);
803219820Sjeff}
804219820Sjeff
805219820Sjeffvoid
806219820Sjeffipoib_dev_cleanup(struct ipoib_dev_priv *priv)
807219820Sjeff{
808219820Sjeff	struct ipoib_dev_priv *cpriv, *tcpriv;
809219820Sjeff
810219820Sjeff	/* Delete any child interfaces first */
811219820Sjeff	list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
812219820Sjeff		ipoib_dev_cleanup(cpriv);
813219820Sjeff		ipoib_detach(cpriv);
814219820Sjeff	}
815219820Sjeff
816219820Sjeff	ipoib_ib_dev_cleanup(priv);
817219820Sjeff
818219820Sjeff	kfree(priv->rx_ring);
819219820Sjeff	kfree(priv->tx_ring);
820219820Sjeff
821219820Sjeff	priv->rx_ring = NULL;
822219820Sjeff	priv->tx_ring = NULL;
823219820Sjeff}
824219820Sjeff
825219820Sjeffstatic volatile int ipoib_unit;
826219820Sjeff
827219820Sjeffstatic struct ipoib_dev_priv *
828219820Sjeffipoib_priv_alloc(void)
829219820Sjeff{
830219820Sjeff	struct ipoib_dev_priv *priv;
831219820Sjeff
832219820Sjeff	priv = malloc(sizeof(struct ipoib_dev_priv), M_TEMP, M_ZERO|M_WAITOK);
833219820Sjeff	spin_lock_init(&priv->lock);
834219820Sjeff	mutex_init(&priv->vlan_mutex);
835219820Sjeff	INIT_LIST_HEAD(&priv->path_list);
836219820Sjeff	INIT_LIST_HEAD(&priv->child_intfs);
837219820Sjeff	INIT_LIST_HEAD(&priv->dead_ahs);
838219820Sjeff	INIT_LIST_HEAD(&priv->multicast_list);
839219820Sjeff	INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
840219820Sjeff	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
841219820Sjeff	INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
842219820Sjeff	INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
843219820Sjeff	INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
844219820Sjeff	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
845219820Sjeff	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
846219820Sjeff	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
847219820Sjeff	memcpy(priv->broadcastaddr, ipv4_bcast_addr, INFINIBAND_ALEN);
848219820Sjeff
849219820Sjeff	return (priv);
850219820Sjeff}
851219820Sjeff
852219820Sjeffstruct ipoib_dev_priv *
853219820Sjeffipoib_intf_alloc(const char *name)
854219820Sjeff{
855219820Sjeff	struct ipoib_dev_priv *priv;
856219820Sjeff	struct sockaddr_dl *sdl;
857219820Sjeff	struct ifnet *dev;
858219820Sjeff
859219820Sjeff	priv = ipoib_priv_alloc();
860219820Sjeff	dev = priv->dev = if_alloc(IFT_INFINIBAND);
861219820Sjeff	if (!dev) {
862219820Sjeff		free(priv, M_TEMP);
863219820Sjeff		return NULL;
864219820Sjeff	}
865219820Sjeff	dev->if_softc = priv;
866219820Sjeff	if_initname(dev, name, atomic_fetchadd_int(&ipoib_unit, 1));
867219820Sjeff	dev->if_flags = IFF_BROADCAST | IFF_MULTICAST;
868219820Sjeff	dev->if_addrlen = INFINIBAND_ALEN;
869219820Sjeff	dev->if_hdrlen = IPOIB_HEADER_LEN;
870219820Sjeff	if_attach(dev);
871219820Sjeff	dev->if_init = ipoib_init;
872219820Sjeff	dev->if_ioctl = ipoib_ioctl;
873219820Sjeff	dev->if_start = ipoib_start;
874219820Sjeff	dev->if_output = ipoib_output;
875219820Sjeff	dev->if_input = ipoib_input;
876219820Sjeff	dev->if_resolvemulti = ipoib_resolvemulti;
877241696Sjhb	if_initbaudrate(dev, IF_Gbps(10));
878219820Sjeff	dev->if_broadcastaddr = priv->broadcastaddr;
879219820Sjeff	dev->if_snd.ifq_maxlen = ipoib_sendq_size * 2;
880219820Sjeff	sdl = (struct sockaddr_dl *)dev->if_addr->ifa_addr;
881219820Sjeff	sdl->sdl_type = IFT_INFINIBAND;
882219820Sjeff	sdl->sdl_alen = dev->if_addrlen;
883219820Sjeff	priv->dev = dev;
884219820Sjeff	if_link_state_change(dev, LINK_STATE_DOWN);
885219820Sjeff	bpfattach(dev, DLT_EN10MB, ETHER_HDR_LEN);
886219820Sjeff
887219820Sjeff	return dev->if_softc;
888219820Sjeff}
889219820Sjeff
890219820Sjeffint
891219820Sjeffipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
892219820Sjeff{
893219820Sjeff	struct ib_device_attr *device_attr;
894219820Sjeff	int result = -ENOMEM;
895219820Sjeff
896219820Sjeff	device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
897219820Sjeff	if (!device_attr) {
898219820Sjeff		printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
899219820Sjeff		       hca->name, sizeof *device_attr);
900219820Sjeff		return result;
901219820Sjeff	}
902219820Sjeff
903219820Sjeff	result = ib_query_device(hca, device_attr);
904219820Sjeff	if (result) {
905219820Sjeff		printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
906219820Sjeff		       hca->name, result);
907219820Sjeff		kfree(device_attr);
908219820Sjeff		return result;
909219820Sjeff	}
910219820Sjeff	priv->hca_caps = device_attr->device_cap_flags;
911219820Sjeff
912219820Sjeff	kfree(device_attr);
913219820Sjeff
914219820Sjeff	priv->dev->if_hwassist = 0;
915219820Sjeff	priv->dev->if_capabilities = 0;
916219820Sjeff
917219820Sjeff#ifndef CONFIG_INFINIBAND_IPOIB_CM
918219820Sjeff	if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
919219820Sjeff		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
920219820Sjeff		priv->dev->if_hwassist = CSUM_IP | CSUM_TCP | CSUM_UDP;
921219820Sjeff		priv->dev->if_capabilities = IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM;
922219820Sjeff	}
923219820Sjeff
924219820Sjeff#if 0
925220555Sbz	if (priv->dev->features & NETIF_F_SG && priv->hca_caps & IB_DEVICE_UD_TSO) {
926220555Sbz		priv->dev->if_capabilities |= IFCAP_TSO4;
927220555Sbz		priv->dev->if_hwassist |= CSUM_TSO;
928220555Sbz	}
929219820Sjeff#endif
930219820Sjeff#endif
931219820Sjeff	priv->dev->if_capabilities |=
932219820Sjeff	    IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_LINKSTATE;
933219820Sjeff	priv->dev->if_capenable = priv->dev->if_capabilities;
934219820Sjeff
935219820Sjeff	return 0;
936219820Sjeff}
937219820Sjeff
938219820Sjeff
939219820Sjeffstatic struct ifnet *
940219820Sjeffipoib_add_port(const char *format, struct ib_device *hca, u8 port)
941219820Sjeff{
942219820Sjeff	struct ipoib_dev_priv *priv;
943219820Sjeff	struct ib_port_attr attr;
944219820Sjeff	int result = -ENOMEM;
945219820Sjeff
946219820Sjeff	priv = ipoib_intf_alloc(format);
947219820Sjeff	if (!priv)
948219820Sjeff		goto alloc_mem_failed;
949219820Sjeff
950219820Sjeff	if (!ib_query_port(hca, port, &attr))
951219820Sjeff		priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
952219820Sjeff	else {
953219820Sjeff		printk(KERN_WARNING "%s: ib_query_port %d failed\n",
954219820Sjeff		       hca->name, port);
955219820Sjeff		goto device_init_failed;
956219820Sjeff	}
957219820Sjeff
958219820Sjeff	/* MTU will be reset when mcast join happens */
959219820Sjeff	priv->dev->if_mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
960219820Sjeff	priv->mcast_mtu = priv->admin_mtu = priv->dev->if_mtu;
961219820Sjeff
962219820Sjeff	result = ib_query_pkey(hca, port, 0, &priv->pkey);
963219820Sjeff	if (result) {
964219820Sjeff		printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
965219820Sjeff		       hca->name, port, result);
966219820Sjeff		goto device_init_failed;
967219820Sjeff	}
968219820Sjeff
969219820Sjeff	if (ipoib_set_dev_features(priv, hca))
970219820Sjeff		goto device_init_failed;
971219820Sjeff
972219820Sjeff	/*
973219820Sjeff	 * Set the full membership bit, so that we join the right
974219820Sjeff	 * broadcast group, etc.
975219820Sjeff	 */
976219820Sjeff	priv->pkey |= 0x8000;
977219820Sjeff
978219820Sjeff	priv->broadcastaddr[8] = priv->pkey >> 8;
979219820Sjeff	priv->broadcastaddr[9] = priv->pkey & 0xff;
980219820Sjeff
981219820Sjeff	result = ib_query_gid(hca, port, 0, &priv->local_gid);
982219820Sjeff	if (result) {
983219820Sjeff		printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
984219820Sjeff		       hca->name, port, result);
985219820Sjeff		goto device_init_failed;
986219820Sjeff	}
987219820Sjeff	memcpy(IF_LLADDR(priv->dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
988219820Sjeff
989219820Sjeff	result = ipoib_dev_init(priv, hca, port);
990219820Sjeff	if (result < 0) {
991219820Sjeff		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
992219820Sjeff		       hca->name, port, result);
993219820Sjeff		goto device_init_failed;
994219820Sjeff	}
995219820Sjeff	if (ipoib_cm_admin_enabled(priv))
996219820Sjeff		priv->dev->if_mtu = IPOIB_CM_MTU(ipoib_cm_max_mtu(priv));
997219820Sjeff
998219820Sjeff	INIT_IB_EVENT_HANDLER(&priv->event_handler,
999219820Sjeff			      priv->ca, ipoib_event);
1000219820Sjeff	result = ib_register_event_handler(&priv->event_handler);
1001219820Sjeff	if (result < 0) {
1002219820Sjeff		printk(KERN_WARNING "%s: ib_register_event_handler failed for "
1003219820Sjeff		       "port %d (ret = %d)\n",
1004219820Sjeff		       hca->name, port, result);
1005219820Sjeff		goto event_failed;
1006219820Sjeff	}
1007219820Sjeff	if_printf(priv->dev, "Attached to %s port %d\n", hca->name, port);
1008219820Sjeff
1009219820Sjeff	return priv->dev;
1010219820Sjeff
1011219820Sjeffevent_failed:
1012219820Sjeff	ipoib_dev_cleanup(priv);
1013219820Sjeff
1014219820Sjeffdevice_init_failed:
1015219820Sjeff	ipoib_detach(priv);
1016219820Sjeff
1017219820Sjeffalloc_mem_failed:
1018219820Sjeff	return ERR_PTR(result);
1019219820Sjeff}
1020219820Sjeff
1021219820Sjeffstatic void
1022219820Sjeffipoib_add_one(struct ib_device *device)
1023219820Sjeff{
1024219820Sjeff	struct list_head *dev_list;
1025219820Sjeff	struct ifnet *dev;
1026219820Sjeff	struct ipoib_dev_priv *priv;
1027219820Sjeff	int s, e, p;
1028219820Sjeff
1029219820Sjeff	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1030219820Sjeff		return;
1031219820Sjeff
1032219820Sjeff	dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
1033219820Sjeff	if (!dev_list)
1034219820Sjeff		return;
1035219820Sjeff
1036219820Sjeff	INIT_LIST_HEAD(dev_list);
1037219820Sjeff
1038219820Sjeff	if (device->node_type == RDMA_NODE_IB_SWITCH) {
1039219820Sjeff		s = 0;
1040219820Sjeff		e = 0;
1041219820Sjeff	} else {
1042219820Sjeff		s = 1;
1043219820Sjeff		e = device->phys_port_cnt;
1044219820Sjeff	}
1045219820Sjeff
1046219820Sjeff	for (p = s; p <= e; ++p) {
1047219820Sjeff		if (rdma_port_get_link_layer(device, p) != IB_LINK_LAYER_INFINIBAND)
1048219820Sjeff			continue;
1049219820Sjeff		dev = ipoib_add_port("ib", device, p);
1050219820Sjeff		if (!IS_ERR(dev)) {
1051219820Sjeff			priv = dev->if_softc;
1052219820Sjeff			list_add_tail(&priv->list, dev_list);
1053219820Sjeff		}
1054219820Sjeff	}
1055219820Sjeff
1056219820Sjeff	ib_set_client_data(device, &ipoib_client, dev_list);
1057219820Sjeff}
1058219820Sjeff
1059219820Sjeffstatic void
1060219820Sjeffipoib_remove_one(struct ib_device *device)
1061219820Sjeff{
1062219820Sjeff	struct ipoib_dev_priv *priv, *tmp;
1063219820Sjeff	struct list_head *dev_list;
1064219820Sjeff
1065219820Sjeff	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1066219820Sjeff		return;
1067219820Sjeff
1068219820Sjeff	dev_list = ib_get_client_data(device, &ipoib_client);
1069219820Sjeff
1070219820Sjeff	list_for_each_entry_safe(priv, tmp, dev_list, list) {
1071219820Sjeff		if (rdma_port_get_link_layer(device, priv->port) != IB_LINK_LAYER_INFINIBAND)
1072219820Sjeff			continue;
1073219820Sjeff
1074254576Sjhb		ipoib_stop(priv);
1075254576Sjhb
1076219820Sjeff		ib_unregister_event_handler(&priv->event_handler);
1077219820Sjeff
1078219820Sjeff		/* dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); */
1079219820Sjeff
1080219820Sjeff		flush_workqueue(ipoib_workqueue);
1081219820Sjeff
1082219820Sjeff		ipoib_dev_cleanup(priv);
1083219820Sjeff		ipoib_detach(priv);
1084219820Sjeff	}
1085219820Sjeff
1086219820Sjeff	kfree(dev_list);
1087219820Sjeff}
1088219820Sjeff
1089219820Sjeffstatic void
1090219820Sjeffipoib_config_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
1091219820Sjeff{
1092219820Sjeff	struct ipoib_dev_priv *parent;
1093219820Sjeff	struct ipoib_dev_priv *priv;
1094219820Sjeff	struct ifnet *dev;
1095219820Sjeff	uint16_t pkey;
1096219820Sjeff	int error;
1097219820Sjeff
1098219820Sjeff	if (ifp->if_type != IFT_INFINIBAND)
1099219820Sjeff		return;
1100219820Sjeff	dev = VLAN_DEVAT(ifp, vtag);
1101219820Sjeff	if (dev == NULL)
1102219820Sjeff		return;
1103219820Sjeff	priv = NULL;
1104219820Sjeff	error = 0;
1105219820Sjeff	parent = ifp->if_softc;
1106219820Sjeff	/* We only support 15 bits of pkey. */
1107219820Sjeff	if (vtag & 0x8000)
1108219820Sjeff		return;
1109219820Sjeff	pkey = vtag | 0x8000;	/* Set full membership bit. */
1110219820Sjeff	if (pkey == parent->pkey)
1111219820Sjeff		return;
1112219820Sjeff	/* Check for dups */
1113219820Sjeff	mutex_lock(&parent->vlan_mutex);
1114219820Sjeff	list_for_each_entry(priv, &parent->child_intfs, list) {
1115219820Sjeff		if (priv->pkey == pkey) {
1116219820Sjeff			priv = NULL;
1117219820Sjeff			error = EBUSY;
1118219820Sjeff			goto out;
1119219820Sjeff		}
1120219820Sjeff	}
1121219820Sjeff	priv = ipoib_priv_alloc();
1122219820Sjeff	priv->dev = dev;
1123219820Sjeff	priv->max_ib_mtu = parent->max_ib_mtu;
1124219820Sjeff	priv->mcast_mtu = priv->admin_mtu = parent->dev->if_mtu;
1125219820Sjeff	set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
1126219820Sjeff	error = ipoib_set_dev_features(priv, parent->ca);
1127219820Sjeff	if (error)
1128219820Sjeff		goto out;
1129219820Sjeff	priv->pkey = pkey;
1130219820Sjeff	priv->broadcastaddr[8] = pkey >> 8;
1131219820Sjeff	priv->broadcastaddr[9] = pkey & 0xff;
1132219820Sjeff	dev->if_broadcastaddr = priv->broadcastaddr;
1133219820Sjeff	error = ipoib_dev_init(priv, parent->ca, parent->port);
1134219820Sjeff	if (error)
1135219820Sjeff		goto out;
1136219820Sjeff	priv->parent = parent->dev;
1137219820Sjeff	list_add_tail(&priv->list, &parent->child_intfs);
1138219820Sjeff	VLAN_SETCOOKIE(dev, priv);
1139219820Sjeff	dev->if_start = ipoib_vlan_start;
1140219820Sjeff	dev->if_drv_flags &= ~IFF_DRV_RUNNING;
1141219820Sjeff	dev->if_hdrlen = IPOIB_HEADER_LEN;
1142219820Sjeff	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1143219820Sjeff		ipoib_open(priv);
1144219820Sjeff	mutex_unlock(&parent->vlan_mutex);
1145219820Sjeff	return;
1146219820Sjeffout:
1147219820Sjeff	mutex_unlock(&parent->vlan_mutex);
1148219820Sjeff	if (priv)
1149219820Sjeff		free(priv, M_TEMP);
1150219820Sjeff	if (error)
1151219820Sjeff		ipoib_warn(parent,
1152219820Sjeff		    "failed to initialize subinterface: device %s, port %d vtag 0x%X",
1153219820Sjeff		    parent->ca->name, parent->port, vtag);
1154219820Sjeff	return;
1155219820Sjeff}
1156219820Sjeff
1157219820Sjeffstatic void
1158219820Sjeffipoib_unconfig_vlan(void *arg, struct ifnet *ifp, u_int16_t vtag)
1159219820Sjeff{
1160219820Sjeff	struct ipoib_dev_priv *parent;
1161219820Sjeff	struct ipoib_dev_priv *priv;
1162219820Sjeff	struct ifnet *dev;
1163219820Sjeff	uint16_t pkey;
1164219820Sjeff
1165219820Sjeff	if (ifp->if_type != IFT_INFINIBAND)
1166219820Sjeff		return;
1167219820Sjeff
1168219820Sjeff	dev = VLAN_DEVAT(ifp, vtag);
1169219820Sjeff	if (dev)
1170219820Sjeff		VLAN_SETCOOKIE(dev, NULL);
1171219820Sjeff	pkey = vtag | 0x8000;
1172219820Sjeff	parent = ifp->if_softc;
1173219820Sjeff	mutex_lock(&parent->vlan_mutex);
1174219820Sjeff	list_for_each_entry(priv, &parent->child_intfs, list) {
1175219820Sjeff		if (priv->pkey == pkey) {
1176219820Sjeff			ipoib_dev_cleanup(priv);
1177219820Sjeff			list_del(&priv->list);
1178219820Sjeff			break;
1179219820Sjeff		}
1180219820Sjeff	}
1181219820Sjeff	mutex_unlock(&parent->vlan_mutex);
1182219820Sjeff}
1183219820Sjeff
1184219820Sjeffeventhandler_tag ipoib_vlan_attach;
1185219820Sjeffeventhandler_tag ipoib_vlan_detach;
1186219820Sjeff
1187219820Sjeffstatic int __init
1188219820Sjeffipoib_init_module(void)
1189219820Sjeff{
1190219820Sjeff	int ret;
1191219820Sjeff
1192219820Sjeff	ipoib_recvq_size = roundup_pow_of_two(ipoib_recvq_size);
1193219820Sjeff	ipoib_recvq_size = min(ipoib_recvq_size, IPOIB_MAX_QUEUE_SIZE);
1194219820Sjeff	ipoib_recvq_size = max(ipoib_recvq_size, IPOIB_MIN_QUEUE_SIZE);
1195219820Sjeff
1196219820Sjeff	ipoib_sendq_size = roundup_pow_of_two(ipoib_sendq_size);
1197219820Sjeff	ipoib_sendq_size = min(ipoib_sendq_size, IPOIB_MAX_QUEUE_SIZE);
1198219820Sjeff	ipoib_sendq_size = max(ipoib_sendq_size, max(2 * MAX_SEND_CQE,
1199219820Sjeff						     IPOIB_MIN_QUEUE_SIZE));
1200219820Sjeff#ifdef CONFIG_INFINIBAND_IPOIB_CM
1201219820Sjeff	ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
1202219820Sjeff#endif
1203219820Sjeff
1204219820Sjeff	ipoib_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1205219820Sjeff		ipoib_config_vlan, NULL, EVENTHANDLER_PRI_FIRST);
1206219820Sjeff	ipoib_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1207219820Sjeff		ipoib_unconfig_vlan, NULL, EVENTHANDLER_PRI_FIRST);
1208219820Sjeff
1209219820Sjeff	/*
1210219820Sjeff	 * We create our own workqueue mainly because we want to be
1211219820Sjeff	 * able to flush it when devices are being removed.  We can't
1212219820Sjeff	 * use schedule_work()/flush_scheduled_work() because both
1213219820Sjeff	 * unregister_netdev() and linkwatch_event take the rtnl lock,
1214219820Sjeff	 * so flush_scheduled_work() can deadlock during device
1215219820Sjeff	 * removal.
1216219820Sjeff	 */
1217219820Sjeff	ipoib_workqueue = create_singlethread_workqueue("ipoib");
1218219820Sjeff	if (!ipoib_workqueue) {
1219219820Sjeff		ret = -ENOMEM;
1220219820Sjeff		goto err_fs;
1221219820Sjeff	}
1222219820Sjeff
1223219820Sjeff	ib_sa_register_client(&ipoib_sa_client);
1224219820Sjeff
1225219820Sjeff	ret = ib_register_client(&ipoib_client);
1226219820Sjeff	if (ret)
1227219820Sjeff		goto err_sa;
1228219820Sjeff
1229219820Sjeff	return 0;
1230219820Sjeff
1231219820Sjefferr_sa:
1232219820Sjeff	ib_sa_unregister_client(&ipoib_sa_client);
1233219820Sjeff	destroy_workqueue(ipoib_workqueue);
1234219820Sjeff
1235219820Sjefferr_fs:
1236219820Sjeff	return ret;
1237219820Sjeff}
1238219820Sjeff
1239219820Sjeffstatic void __exit
1240219820Sjeffipoib_cleanup_module(void)
1241219820Sjeff{
1242219820Sjeff
1243219820Sjeff	EVENTHANDLER_DEREGISTER(vlan_config, ipoib_vlan_attach);
1244219820Sjeff	EVENTHANDLER_DEREGISTER(vlan_unconfig, ipoib_vlan_detach);
1245219820Sjeff	ib_unregister_client(&ipoib_client);
1246219820Sjeff	ib_sa_unregister_client(&ipoib_sa_client);
1247219820Sjeff	destroy_workqueue(ipoib_workqueue);
1248219820Sjeff}
1249219820Sjeff
1250219820Sjeff/*
1251219820Sjeff * Infiniband output routine.
1252219820Sjeff */
1253219820Sjeffstatic int
1254219820Sjeffipoib_output(struct ifnet *ifp, struct mbuf *m,
1255249976Sglebius	const struct sockaddr *dst, struct route *ro)
1256219820Sjeff{
1257219820Sjeff	u_char edst[INFINIBAND_ALEN];
1258219820Sjeff	struct llentry *lle = NULL;
1259219820Sjeff	struct rtentry *rt0 = NULL;
1260219820Sjeff	struct ipoib_header *eh;
1261219820Sjeff	int error = 0;
1262219820Sjeff	short type;
1263219820Sjeff
1264219820Sjeff	if (ro != NULL) {
1265219820Sjeff		if (!(m->m_flags & (M_BCAST | M_MCAST)))
1266219820Sjeff			lle = ro->ro_lle;
1267219820Sjeff		rt0 = ro->ro_rt;
1268219820Sjeff	}
1269219820Sjeff#ifdef MAC
1270219820Sjeff	error = mac_ifnet_check_transmit(ifp, m);
1271219820Sjeff	if (error)
1272219820Sjeff		goto bad;
1273219820Sjeff#endif
1274219820Sjeff
1275219820Sjeff	M_PROFILE(m);
1276219820Sjeff	if (ifp->if_flags & IFF_MONITOR) {
1277219820Sjeff		error = ENETDOWN;
1278219820Sjeff		goto bad;
1279219820Sjeff	}
1280219820Sjeff	if (!((ifp->if_flags & IFF_UP) &&
1281219820Sjeff	    (ifp->if_drv_flags & IFF_DRV_RUNNING))) {
1282219820Sjeff		error = ENETDOWN;
1283219820Sjeff		goto bad;
1284219820Sjeff	}
1285219820Sjeff
1286219820Sjeff	switch (dst->sa_family) {
1287219820Sjeff#ifdef INET
1288219820Sjeff	case AF_INET:
1289219820Sjeff		if (lle != NULL && (lle->la_flags & LLE_VALID))
1290219820Sjeff			memcpy(edst, &lle->ll_addr.mac8, sizeof(edst));
1291219820Sjeff		else if (m->m_flags & M_MCAST)
1292219820Sjeff			ip_ib_mc_map(((struct sockaddr_in *)dst)->sin_addr.s_addr, ifp->if_broadcastaddr, edst);
1293219820Sjeff		else
1294219820Sjeff			error = arpresolve(ifp, rt0, m, dst, edst, &lle);
1295219820Sjeff		if (error)
1296219820Sjeff			return (error == EWOULDBLOCK ? 0 : error);
1297219820Sjeff		type = htons(ETHERTYPE_IP);
1298219820Sjeff		break;
1299219820Sjeff	case AF_ARP:
1300219820Sjeff	{
1301219820Sjeff		struct arphdr *ah;
1302219820Sjeff		ah = mtod(m, struct arphdr *);
1303219820Sjeff		ah->ar_hrd = htons(ARPHRD_INFINIBAND);
1304219820Sjeff
1305219820Sjeff		switch(ntohs(ah->ar_op)) {
1306219820Sjeff		case ARPOP_REVREQUEST:
1307219820Sjeff		case ARPOP_REVREPLY:
1308219820Sjeff			type = htons(ETHERTYPE_REVARP);
1309219820Sjeff			break;
1310219820Sjeff		case ARPOP_REQUEST:
1311219820Sjeff		case ARPOP_REPLY:
1312219820Sjeff		default:
1313219820Sjeff			type = htons(ETHERTYPE_ARP);
1314219820Sjeff			break;
1315219820Sjeff		}
1316219820Sjeff
1317219820Sjeff		if (m->m_flags & M_BCAST)
1318219820Sjeff			bcopy(ifp->if_broadcastaddr, edst, INFINIBAND_ALEN);
1319219820Sjeff		else
1320219820Sjeff			bcopy(ar_tha(ah), edst, INFINIBAND_ALEN);
1321219820Sjeff
1322219820Sjeff	}
1323219820Sjeff	break;
1324219820Sjeff#endif
1325219820Sjeff#ifdef INET6
1326219820Sjeff	case AF_INET6:
1327219820Sjeff		if (lle != NULL && (lle->la_flags & LLE_VALID))
1328219820Sjeff			memcpy(edst, &lle->ll_addr.mac8, sizeof(edst));
1329219820Sjeff		else if (m->m_flags & M_MCAST)
1330219820Sjeff			ipv6_ib_mc_map(&((struct sockaddr_in6 *)dst)->sin6_addr, ifp->if_broadcastaddr, edst);
1331219820Sjeff		else
1332219820Sjeff			error = nd6_storelladdr(ifp, m, dst, (u_char *)edst, &lle);
1333219820Sjeff		if (error)
1334219820Sjeff			return error;
1335219820Sjeff		type = htons(ETHERTYPE_IPV6);
1336219820Sjeff		break;
1337219820Sjeff#endif
1338219820Sjeff
1339219820Sjeff	default:
1340219820Sjeff		if_printf(ifp, "can't handle af%d\n", dst->sa_family);
1341219820Sjeff		error = EAFNOSUPPORT;
1342219820Sjeff		goto bad;
1343219820Sjeff	}
1344219820Sjeff
1345219820Sjeff	/*
1346219820Sjeff	 * Add local net header.  If no space in first mbuf,
1347219820Sjeff	 * allocate another.
1348219820Sjeff	 */
1349243882Sglebius	M_PREPEND(m, IPOIB_HEADER_LEN, M_NOWAIT);
1350219820Sjeff	if (m == NULL) {
1351219820Sjeff		error = ENOBUFS;
1352219820Sjeff		goto bad;
1353219820Sjeff	}
1354219820Sjeff	eh = mtod(m, struct ipoib_header *);
1355219820Sjeff	(void)memcpy(&eh->proto, &type, sizeof(eh->proto));
1356219820Sjeff	(void)memcpy(&eh->hwaddr, edst, sizeof (edst));
1357219820Sjeff
1358219820Sjeff	/*
1359219820Sjeff	 * Queue message on interface, update output statistics if
1360219820Sjeff	 * successful, and start output if interface not yet active.
1361219820Sjeff	 */
1362219820Sjeff	return ((ifp->if_transmit)(ifp, m));
1363219820Sjeffbad:
1364219820Sjeff	if (m != NULL)
1365219820Sjeff		m_freem(m);
1366219820Sjeff	return (error);
1367219820Sjeff}
1368219820Sjeff
1369219820Sjeff/*
1370219820Sjeff * Upper layer processing for a received Infiniband packet.
1371219820Sjeff */
1372219820Sjeffvoid
1373219820Sjeffipoib_demux(struct ifnet *ifp, struct mbuf *m, u_short proto)
1374219820Sjeff{
1375219820Sjeff	int isr;
1376219820Sjeff
1377219820Sjeff#ifdef MAC
1378219820Sjeff	/*
1379219820Sjeff	 * Tag the mbuf with an appropriate MAC label before any other
1380219820Sjeff	 * consumers can get to it.
1381219820Sjeff	 */
1382219820Sjeff	mac_ifnet_create_mbuf(ifp, m);
1383219820Sjeff#endif
1384219820Sjeff	/* Allow monitor mode to claim this frame, after stats are updated. */
1385219820Sjeff	if (ifp->if_flags & IFF_MONITOR) {
1386219820Sjeff		if_printf(ifp, "discard frame at IFF_MONITOR\n");
1387219820Sjeff		m_freem(m);
1388219820Sjeff		return;
1389219820Sjeff	}
1390219820Sjeff	/*
1391219820Sjeff	 * Dispatch frame to upper layer.
1392219820Sjeff	 */
1393219820Sjeff	switch (proto) {
1394219820Sjeff#ifdef INET
1395219820Sjeff	case ETHERTYPE_IP:
1396219820Sjeff		isr = NETISR_IP;
1397219820Sjeff		break;
1398219820Sjeff
1399219820Sjeff	case ETHERTYPE_ARP:
1400219820Sjeff		if (ifp->if_flags & IFF_NOARP) {
1401219820Sjeff			/* Discard packet if ARP is disabled on interface */
1402219820Sjeff			m_freem(m);
1403219820Sjeff			return;
1404219820Sjeff		}
1405219820Sjeff		isr = NETISR_ARP;
1406219820Sjeff		break;
1407219820Sjeff#endif
1408219820Sjeff#ifdef INET6
1409219820Sjeff	case ETHERTYPE_IPV6:
1410219820Sjeff		isr = NETISR_IPV6;
1411219820Sjeff		break;
1412219820Sjeff#endif
1413219820Sjeff	default:
1414219820Sjeff		goto discard;
1415219820Sjeff	}
1416219820Sjeff	netisr_dispatch(isr, m);
1417219820Sjeff	return;
1418219820Sjeff
1419219820Sjeffdiscard:
1420219820Sjeff	m_freem(m);
1421219820Sjeff}
1422219820Sjeff
1423219820Sjeff/*
1424219820Sjeff * Process a received Infiniband packet.
1425219820Sjeff */
1426219820Sjeffstatic void
1427219820Sjeffipoib_input(struct ifnet *ifp, struct mbuf *m)
1428219820Sjeff{
1429219820Sjeff	struct ipoib_header *eh;
1430219820Sjeff
1431219820Sjeff	if ((ifp->if_flags & IFF_UP) == 0) {
1432219820Sjeff		m_freem(m);
1433219820Sjeff		return;
1434219820Sjeff	}
1435219820Sjeff	CURVNET_SET_QUIET(ifp->if_vnet);
1436219820Sjeff
1437219820Sjeff	/* Let BPF have it before we strip the header. */
1438219820Sjeff	IPOIB_MTAP(ifp, m);
1439219820Sjeff	eh = mtod(m, struct ipoib_header *);
1440219820Sjeff	/*
1441219820Sjeff	 * Reset layer specific mbuf flags to avoid confusing upper layers.
1442219820Sjeff	 * Strip off Infiniband header.
1443219820Sjeff	 */
1444219820Sjeff	m->m_flags &= ~M_VLANTAG;
1445254523Sandre	m_clrprotoflags(m);
1446219820Sjeff	m_adj(m, IPOIB_HEADER_LEN);
1447219820Sjeff
1448219820Sjeff	if (IPOIB_IS_MULTICAST(eh->hwaddr)) {
1449219820Sjeff		if (memcmp(eh->hwaddr, ifp->if_broadcastaddr,
1450219820Sjeff		    ifp->if_addrlen) == 0)
1451219820Sjeff			m->m_flags |= M_BCAST;
1452219820Sjeff		else
1453219820Sjeff			m->m_flags |= M_MCAST;
1454219820Sjeff		ifp->if_imcasts++;
1455219820Sjeff	}
1456219820Sjeff
1457219820Sjeff	ipoib_demux(ifp, m, ntohs(eh->proto));
1458219820Sjeff	CURVNET_RESTORE();
1459219820Sjeff}
1460219820Sjeff
1461219820Sjeffstatic int
1462219820Sjeffipoib_resolvemulti(struct ifnet *ifp, struct sockaddr **llsa,
1463219820Sjeff	struct sockaddr *sa)
1464219820Sjeff{
1465219820Sjeff	struct sockaddr_dl *sdl;
1466219820Sjeff#ifdef INET
1467219820Sjeff	struct sockaddr_in *sin;
1468219820Sjeff#endif
1469219820Sjeff#ifdef INET6
1470219820Sjeff	struct sockaddr_in6 *sin6;
1471219820Sjeff#endif
1472219820Sjeff	u_char *e_addr;
1473219820Sjeff
1474219820Sjeff	switch(sa->sa_family) {
1475219820Sjeff	case AF_LINK:
1476219820Sjeff		/*
1477219820Sjeff		 * No mapping needed. Just check that it's a valid MC address.
1478219820Sjeff		 */
1479219820Sjeff		sdl = (struct sockaddr_dl *)sa;
1480219820Sjeff		e_addr = LLADDR(sdl);
1481219820Sjeff		if (!IPOIB_IS_MULTICAST(e_addr))
1482219820Sjeff			return EADDRNOTAVAIL;
1483219820Sjeff		*llsa = 0;
1484219820Sjeff		return 0;
1485219820Sjeff
1486219820Sjeff#ifdef INET
1487219820Sjeff	case AF_INET:
1488219820Sjeff		sin = (struct sockaddr_in *)sa;
1489219820Sjeff		if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
1490219820Sjeff			return EADDRNOTAVAIL;
1491219820Sjeff		sdl = malloc(sizeof *sdl, M_IFMADDR,
1492219820Sjeff		       M_NOWAIT|M_ZERO);
1493219820Sjeff		if (sdl == NULL)
1494219820Sjeff			return ENOMEM;
1495219820Sjeff		sdl->sdl_len = sizeof *sdl;
1496219820Sjeff		sdl->sdl_family = AF_LINK;
1497219820Sjeff		sdl->sdl_index = ifp->if_index;
1498219820Sjeff		sdl->sdl_type = IFT_INFINIBAND;
1499219820Sjeff		sdl->sdl_alen = INFINIBAND_ALEN;
1500219820Sjeff		e_addr = LLADDR(sdl);
1501219820Sjeff		ip_ib_mc_map(sin->sin_addr.s_addr, ifp->if_broadcastaddr,
1502219820Sjeff		    e_addr);
1503219820Sjeff		*llsa = (struct sockaddr *)sdl;
1504219820Sjeff		return 0;
1505219820Sjeff#endif
1506219820Sjeff#ifdef INET6
1507219820Sjeff	case AF_INET6:
1508219820Sjeff		sin6 = (struct sockaddr_in6 *)sa;
1509219820Sjeff		/*
1510219820Sjeff		 * An IP6 address of 0 means listen to all
1511219820Sjeff		 * of the multicast address used for IP6.
1512219820Sjeff		 * This has no meaning in ipoib.
1513219820Sjeff		 */
1514219820Sjeff		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
1515219820Sjeff			return EADDRNOTAVAIL;
1516219820Sjeff		if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1517219820Sjeff			return EADDRNOTAVAIL;
1518219820Sjeff		sdl = malloc(sizeof *sdl, M_IFMADDR,
1519219820Sjeff		       M_NOWAIT|M_ZERO);
1520219820Sjeff		if (sdl == NULL)
1521219820Sjeff			return (ENOMEM);
1522219820Sjeff		sdl->sdl_len = sizeof *sdl;
1523219820Sjeff		sdl->sdl_family = AF_LINK;
1524219820Sjeff		sdl->sdl_index = ifp->if_index;
1525219820Sjeff		sdl->sdl_type = IFT_INFINIBAND;
1526219820Sjeff		sdl->sdl_alen = INFINIBAND_ALEN;
1527219820Sjeff		e_addr = LLADDR(sdl);
1528219820Sjeff		ipv6_ib_mc_map(&sin6->sin6_addr, ifp->if_broadcastaddr, e_addr);
1529219820Sjeff		*llsa = (struct sockaddr *)sdl;
1530219820Sjeff		return 0;
1531219820Sjeff#endif
1532219820Sjeff
1533219820Sjeff	default:
1534219820Sjeff		return EAFNOSUPPORT;
1535219820Sjeff	}
1536219820Sjeff}
1537219820Sjeff
1538219820Sjeffmodule_init(ipoib_init_module);
1539219820Sjeffmodule_exit(ipoib_cleanup_module);
1540255932Salfred
1541255932Salfred#undef MODULE_VERSION
1542255932Salfred#include <sys/module.h>
1543255932Salfredstatic int
1544255932Salfredipoib_evhand(module_t mod, int event, void *arg)
1545255932Salfred{
1546358933Shselasky	return (0);
1547255932Salfred}
1548255932Salfred
1549255932Salfredstatic moduledata_t ipoib_mod = {
1550358933Shselasky	.name = "ipoib",
1551358933Shselasky	.evhand = ipoib_evhand,
1552255932Salfred};
1553255932Salfred
1554255932SalfredDECLARE_MODULE(ipoib, ipoib_mod, SI_SUB_SMP, SI_ORDER_ANY);
1555255932SalfredMODULE_DEPEND(ipoib, ibcore, 1, 1, 1);
1556255932Salfred
1557